From c2504931ee6bb19b4d38d0d654b02a6fbc797ebd Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Tue, 18 Sep 2007 19:42:40 +0000 Subject: Optimize unicode_hash() by not calling _PyUnicode_AsDefaultEncodedString() at all -- this saves two object allocations (three block allocations!) and lots of redundant work. By using the same hash algorithm as string_hash(), we maintain the invariant that the hash of an ASCII string is the same whether represented as a PyString or a PyUnicode. --- Objects/unicodeobject.c | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 140ffaf..2a6a087 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -6587,21 +6587,27 @@ unicode_getitem(PyUnicodeObject *self, Py_ssize_t index) return (PyObject*) PyUnicode_FromUnicode(&self->str[index], 1); } +/* Believe it or not, this produces the same value for ASCII strings + as string_hash(). */ static long unicode_hash(PyUnicodeObject *self) { - if (self->hash != -1) { - return self->hash; - } - else { - /* Since Unicode objects compare equal to their UTF-8 string - counterparts, we hash the UTF-8 string. */ - PyObject *v = _PyUnicode_AsDefaultEncodedString((PyObject*)self, NULL); - if (v == NULL) - return -1; - assert(PyString_CheckExact(v)); - return self->hash = v->ob_type->tp_hash(v); - } + Py_ssize_t len; + Py_UNICODE *p; + long x; + + if (self->hash != -1) + return self->hash; + len = Py_Size(self); + p = self->str; + x = *p << 7; + while (--len >= 0) + x = (1000003*x) ^ *p++; + x ^= Py_Size(self); + if (x == -1) + x = -2; + self->hash = x; + return x; } PyDoc_STRVAR(index__doc__, -- cgit v0.12