diff options
author | Guido van Rossum <guido@python.org> | 2007-09-18 19:42:40 (GMT) |
---|---|---|
committer | Guido van Rossum <guido@python.org> | 2007-09-18 19:42:40 (GMT) |
commit | c2504931ee6bb19b4d38d0d654b02a6fbc797ebd (patch) | |
tree | ae0fac70cd3c00538e97315db099f26d3f634d35 | |
parent | e4a9e788d367b99162a0b584d23f4fd111bde1cf (diff) | |
download | cpython-c2504931ee6bb19b4d38d0d654b02a6fbc797ebd.zip cpython-c2504931ee6bb19b4d38d0d654b02a6fbc797ebd.tar.gz cpython-c2504931ee6bb19b4d38d0d654b02a6fbc797ebd.tar.bz2 |
Optimize unicode_hash() by not calling
_PyUnicode_AsDefaultEncodedString() at all -- this saves two object
allocations (three block allocations!) and lots of redundant work.
By using the same hash algorithm as string_hash(), we maintain the
invariant that the hash of an ASCII string is the same whether
represented as a PyString or a PyUnicode.
-rw-r--r-- | Objects/unicodeobject.c | 30 |
1 files changed, 18 insertions, 12 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 140ffaf..2a6a087 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -6587,21 +6587,27 @@ unicode_getitem(PyUnicodeObject *self, Py_ssize_t index) return (PyObject*) PyUnicode_FromUnicode(&self->str[index], 1); } +/* Believe it or not, this produces the same value for ASCII strings + as string_hash(). */ static long unicode_hash(PyUnicodeObject *self) { - if (self->hash != -1) { - return self->hash; - } - else { - /* Since Unicode objects compare equal to their UTF-8 string - counterparts, we hash the UTF-8 string. */ - PyObject *v = _PyUnicode_AsDefaultEncodedString((PyObject*)self, NULL); - if (v == NULL) - return -1; - assert(PyString_CheckExact(v)); - return self->hash = v->ob_type->tp_hash(v); - } + Py_ssize_t len; + Py_UNICODE *p; + long x; + + if (self->hash != -1) + return self->hash; + len = Py_Size(self); + p = self->str; + x = *p << 7; + while (--len >= 0) + x = (1000003*x) ^ *p++; + x ^= Py_Size(self); + if (x == -1) + x = -2; + self->hash = x; + return x; } PyDoc_STRVAR(index__doc__, |