summaryrefslogtreecommitdiffstats
path: root/Objects/unicodeobject.c
diff options
context:
space:
mode:
authorGuido van Rossum <guido@python.org>2007-05-04 04:17:33 (GMT)
committerGuido van Rossum <guido@python.org>2007-05-04 04:17:33 (GMT)
commit09dc34fc9c51f37f94cd9030ab8760677b360396 (patch)
tree264fb2452e8f27b0852350b363916ff7be5770c5 /Objects/unicodeobject.c
parentf15a29f975bbdef6de0aa19a19b176d1baf8f5ab (diff)
downloadcpython-09dc34fc9c51f37f94cd9030ab8760677b360396.zip
cpython-09dc34fc9c51f37f94cd9030ab8760677b360396.tar.gz
cpython-09dc34fc9c51f37f94cd9030ab8760677b360396.tar.bz2
Compare and hash unicode objects like their UTF-8 representations.
Accept Unicode characters < 256 for 'c' format.
Diffstat (limited to 'Objects/unicodeobject.c')
-rw-r--r--Objects/unicodeobject.c74
1 files changed, 23 insertions, 51 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index db3f9c4..26d6fc6 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -5406,33 +5406,23 @@ unicode_compare(PyUnicodeObject *str1, PyUnicodeObject *str2)
int PyUnicode_Compare(PyObject *left,
PyObject *right)
{
- PyUnicodeObject *u = NULL, *v = NULL;
- int result;
-
- /* Coerce the two arguments */
- u = (PyUnicodeObject *)PyUnicode_FromObject(left);
- if (u == NULL)
- goto onError;
- v = (PyUnicodeObject *)PyUnicode_FromObject(right);
- if (v == NULL)
- goto onError;
-
- /* Shortcut for empty or interned objects */
- if (v == u) {
- Py_DECREF(u);
- Py_DECREF(v);
- return 0;
- }
-
- result = unicode_compare(u, v);
-
- Py_DECREF(u);
- Py_DECREF(v);
- return result;
-
-onError:
- Py_XDECREF(u);
- Py_XDECREF(v);
+ if (PyUnicode_Check(left) && PyUnicode_Check(right))
+ return unicode_compare((PyUnicodeObject *)left,
+ (PyUnicodeObject *)right);
+ if ((PyString_Check(left) && PyUnicode_Check(right)) ||
+ (PyUnicode_Check(left) && PyString_Check(right))) {
+ if (PyUnicode_Check(left))
+ left = _PyUnicode_AsDefaultEncodedString(left, NULL);
+ if (PyUnicode_Check(right))
+ right = _PyUnicode_AsDefaultEncodedString(right, NULL);
+ assert(PyString_Check(left));
+ assert(PyString_Check(right));
+ return PyObject_Compare(left, right);
+ }
+ PyErr_Format(PyExc_TypeError,
+ "Can't compare %.100s and %.100s",
+ left->ob_type->tp_name,
+ right->ob_type->tp_name);
return -1;
}
@@ -5802,30 +5792,12 @@ unicode_getitem(PyUnicodeObject *self, Py_ssize_t index)
}
static long
-unicode_hash(PyUnicodeObject *self)
-{
- /* Since Unicode objects compare equal to their ASCII string
- counterparts, they should use the individual character values
- as basis for their hash value. This is needed to assure that
- strings and Unicode objects behave in the same way as
- dictionary keys. */
-
- register Py_ssize_t len;
- register Py_UNICODE *p;
- register long x;
-
- if (self->hash != -1)
- return self->hash;
- len = PyUnicode_GET_SIZE(self);
- p = PyUnicode_AS_UNICODE(self);
- x = *p << 7;
- while (--len >= 0)
- x = (1000003*x) ^ *p++;
- x ^= PyUnicode_GET_SIZE(self);
- if (x == -1)
- x = -2;
- self->hash = x;
- return x;
+unicode_hash(PyObject *self)
+{
+ /* Since Unicode objects compare equal to their UTF-8 string
+ counterparts, we hash the UTF-8 string. */
+ PyObject *v = _PyUnicode_AsDefaultEncodedString(self, NULL);
+ return PyObject_Hash(v);
}
PyDoc_STRVAR(index__doc__,