summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGuido van Rossum <guido@python.org>2007-09-18 19:42:40 (GMT)
committerGuido van Rossum <guido@python.org>2007-09-18 19:42:40 (GMT)
commitc2504931ee6bb19b4d38d0d654b02a6fbc797ebd (patch)
treeae0fac70cd3c00538e97315db099f26d3f634d35
parente4a9e788d367b99162a0b584d23f4fd111bde1cf (diff)
downloadcpython-c2504931ee6bb19b4d38d0d654b02a6fbc797ebd.zip
cpython-c2504931ee6bb19b4d38d0d654b02a6fbc797ebd.tar.gz
cpython-c2504931ee6bb19b4d38d0d654b02a6fbc797ebd.tar.bz2
Optimize unicode_hash() by not calling
_PyUnicode_AsDefaultEncodedString() at all -- this saves two object allocations (three block allocations!) and lots of redundant work. By using the same hash algorithm as string_hash(), we maintain the invariant that the hash of an ASCII string is the same whether represented as a PyString or a PyUnicode.
-rw-r--r--Objects/unicodeobject.c30
1 files changed, 18 insertions, 12 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 140ffaf..2a6a087 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -6587,21 +6587,27 @@ unicode_getitem(PyUnicodeObject *self, Py_ssize_t index)
return (PyObject*) PyUnicode_FromUnicode(&self->str[index], 1);
}
+/* Believe it or not, this produces the same value for ASCII strings
+ as string_hash(). */
static long
unicode_hash(PyUnicodeObject *self)
{
- if (self->hash != -1) {
- return self->hash;
- }
- else {
- /* Since Unicode objects compare equal to their UTF-8 string
- counterparts, we hash the UTF-8 string. */
- PyObject *v = _PyUnicode_AsDefaultEncodedString((PyObject*)self, NULL);
- if (v == NULL)
- return -1;
- assert(PyString_CheckExact(v));
- return self->hash = v->ob_type->tp_hash(v);
- }
+ Py_ssize_t len;
+ Py_UNICODE *p;
+ long x;
+
+ if (self->hash != -1)
+ return self->hash;
+ len = Py_Size(self);
+ p = self->str;
+ x = *p << 7;
+ while (--len >= 0)
+ x = (1000003*x) ^ *p++;
+ x ^= Py_Size(self);
+ if (x == -1)
+ x = -2;
+ self->hash = x;
+ return x;
}
PyDoc_STRVAR(index__doc__,