summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMarc-André Lemburg <mal@egenix.com>2000-07-04 09:51:07 (GMT)
committerMarc-André Lemburg <mal@egenix.com>2000-07-04 09:51:07 (GMT)
commit1e7205a62aaa5779824681407d753abed2d45b28 (patch)
tree60d6e3733f8ccc490ab35f5258b0bc8bf1e6ca5a
parent4b0200e322b333f5eac3a6b98b9ef527a596c856 (diff)
downloadcpython-1e7205a62aaa5779824681407d753abed2d45b28.zip
cpython-1e7205a62aaa5779824681407d753abed2d45b28.tar.gz
cpython-1e7205a62aaa5779824681407d753abed2d45b28.tar.bz2
Bill Tutt:
Make unicode_compare a true UTF-16 compare function (includes support for surrogates).
-rw-r--r--Objects/unicodeobject.c35
1 files changed, 29 insertions, 6 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index b4096a0..59824c6 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -3045,22 +3045,45 @@ unicode_center(PyUnicodeObject *self, PyObject *args)
return (PyObject*) pad(self, left, marg - left, ' ');
}
+/* speedy UTF-16 code point order comparison */
+/* gleaned from: */
+/* http://www-4.ibm.com/software/developer/library/utf16.html?dwzone=unicode */
+
+static unsigned short utf16Fixup[32] =
+{
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0x2000, 0xf800, 0xf800, 0xf800, 0xf800
+};
+
static int
unicode_compare(PyUnicodeObject *str1, PyUnicodeObject *str2)
{
int len1, len2;
+
Py_UNICODE *s1 = str1->str;
Py_UNICODE *s2 = str2->str;
len1 = str1->length;
len2 = str2->length;
-
+
while (len1 > 0 && len2 > 0) {
- int cmp = (*s1++) - (*s2++);
- if (cmp)
- /* This should make Christian happy! */
- return (cmp < 0) ? -1 : (cmp != 0);
- len1--, len2--;
+ unsigned short c1, c2; /* 16 bits */
+ int diff; /* 32 bits */
+
+ c1 = *s1++;
+ c2 = *s2++;
+ if (c1 > (1<<11) * 26)
+ c1 += utf16Fixup[c1>>11];
+ if (c2 > (1<<11) * 26)
+ c2 += utf16Fixup[c2>>11];
+
+ /* now c1 and c2 are in UTF-32-compatible order */
+ diff = (int)c1 - (int)c2;
+ if (diff)
+ return (diff < 0) ? -1 : (diff != 0);
+ len1--; len2--;
}
return (len1 < len2) ? -1 : (len1 != len2);