diff options
author | Marc-André Lemburg <mal@egenix.com> | 2000-07-04 09:51:07 (GMT) |
---|---|---|
committer | Marc-André Lemburg <mal@egenix.com> | 2000-07-04 09:51:07 (GMT) |
commit | 1e7205a62aaa5779824681407d753abed2d45b28 (patch) | |
tree | 60d6e3733f8ccc490ab35f5258b0bc8bf1e6ca5a | |
parent | 4b0200e322b333f5eac3a6b98b9ef527a596c856 (diff) | |
download | cpython-1e7205a62aaa5779824681407d753abed2d45b28.zip cpython-1e7205a62aaa5779824681407d753abed2d45b28.tar.gz cpython-1e7205a62aaa5779824681407d753abed2d45b28.tar.bz2 |
Bill Tutt:
Make unicode_compare a true UTF-16 compare function (includes
support for surrogates).
-rw-r--r-- | Objects/unicodeobject.c | 35 |
1 files changed, 29 insertions, 6 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index b4096a0..59824c6 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -3045,22 +3045,45 @@ unicode_center(PyUnicodeObject *self, PyObject *args) return (PyObject*) pad(self, left, marg - left, ' '); } +/* speedy UTF-16 code point order comparison */ +/* gleaned from: */ +/* http://www-4.ibm.com/software/developer/library/utf16.html?dwzone=unicode */ + +static unsigned short utf16Fixup[32] = +{ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0x2000, 0xf800, 0xf800, 0xf800, 0xf800 +}; + static int unicode_compare(PyUnicodeObject *str1, PyUnicodeObject *str2) { int len1, len2; + Py_UNICODE *s1 = str1->str; Py_UNICODE *s2 = str2->str; len1 = str1->length; len2 = str2->length; - + while (len1 > 0 && len2 > 0) { - int cmp = (*s1++) - (*s2++); - if (cmp) - /* This should make Christian happy! */ - return (cmp < 0) ? -1 : (cmp != 0); - len1--, len2--; + unsigned short c1, c2; /* 16 bits */ + int diff; /* 32 bits */ + + c1 = *s1++; + c2 = *s2++; + if (c1 > (1<<11) * 26) + c1 += utf16Fixup[c1>>11]; + if (c2 > (1<<11) * 26) + c2 += utf16Fixup[c2>>11]; + + /* now c1 and c2 are in UTF-32-compatible order */ + diff = (int)c1 - (int)c2; + if (diff) + return (diff < 0) ? -1 : (diff != 0); + len1--; len2--; } return (len1 < len2) ? -1 : (len1 != len2); |