diff options
author | Fredrik Lundh <fredrik@pythonware.com> | 2001-06-26 16:39:36 (GMT) |
---|---|---|
committer | Fredrik Lundh <fredrik@pythonware.com> | 2001-06-26 16:39:36 (GMT) |
commit | 45714e9ecb97c53254d644ecee00d66d36a21449 (patch) | |
tree | c86c8f7e98e48d547f5a577fd42ec931477b0ce4 | |
parent | 3083163dc12024e9d46d4e2d752645256b7ba7c3 (diff) | |
download | cpython-45714e9ecb97c53254d644ecee00d66d36a21449.zip cpython-45714e9ecb97c53254d644ecee00d66d36a21449.tar.gz cpython-45714e9ecb97c53254d644ecee00d66d36a21449.tar.bz2 |
experimental UCS-4 support: made compare a bit more robust, in case
sizeof(Py_UNICODE) >= sizeof(long). also changed surrogate expansion
to work if sizeof(Py_UNICODE) > 2.
-rw-r--r-- | Objects/unicodeobject.c | 25 |
1 files changed, 14 insertions, 11 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index ba606f5..c62f65b 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -787,7 +787,7 @@ PyObject *PyUnicode_DecodeUTF8(const char *s, *p++ = (Py_UNICODE)(0xD800 + (ch >> 10)); /* low surrogate = bottom 10 bits added to DC00 */ - *p++ = (Py_UNICODE)(0xDC00 + (ch & ~0xFC00)); + *p++ = (Py_UNICODE)(0xDC00 + (ch & 0x03FF)); break; default: @@ -1274,7 +1274,7 @@ PyObject *PyUnicode_DecodeUnicodeEscape(const char *s, /* UCS-4 character. store as two surrogate characters */ chr -= 0x10000L; *p++ = 0xD800 + (Py_UNICODE) (chr >> 10); - *p++ = 0xDC00 + (Py_UNICODE) (chr & ~0xFC00); + *p++ = 0xDC00 + (Py_UNICODE) (chr & 0x03FF); } else { if (unicodeescape_decoding_error( &s, &x, errors, @@ -3260,19 +3260,19 @@ unicode_compare(PyUnicodeObject *str1, PyUnicodeObject *str2) while (len1 > 0 && len2 > 0) { Py_UNICODE c1, c2; - long diff; c1 = *s1++; c2 = *s2++; + if (c1 > (1<<11) * 26) c1 += utf16Fixup[c1>>11]; if (c2 > (1<<11) * 26) c2 += utf16Fixup[c2>>11]; - /* now c1 and c2 are in UTF-32-compatible order */ - diff = (long)c1 - (long)c2; - if (diff) - return (diff < 0) ? -1 : (diff != 0); + + if (c1 != c2) + return (c1 < c2) ? -1 : 1; + len1--; len2--; } @@ -3293,11 +3293,14 @@ unicode_compare(PyUnicodeObject *str1, PyUnicodeObject *str2) len2 = str2->length; while (len1 > 0 && len2 > 0) { - register long diff; + Py_UNICODE c1, c2; + + c1 = *s1++; + c2 = *s2++; + + if (c1 != c2) + return (c1 < c2) ? -1 : 1; - diff = (long)*s1++ - (long)*s2++; - if (diff) - return (diff < 0) ? -1 : (diff != 0); len1--; len2--; } |