summaryrefslogtreecommitdiffstats
path: root/Objects/unicodeobject.c
diff options
context:
space:
mode:
authorFredrik Lundh <fredrik@pythonware.com>2001-06-26 16:39:36 (GMT)
committerFredrik Lundh <fredrik@pythonware.com>2001-06-26 16:39:36 (GMT)
commit45714e9ecb97c53254d644ecee00d66d36a21449 (patch)
treec86c8f7e98e48d547f5a577fd42ec931477b0ce4 /Objects/unicodeobject.c
parent3083163dc12024e9d46d4e2d752645256b7ba7c3 (diff)
downloadcpython-45714e9ecb97c53254d644ecee00d66d36a21449.zip
cpython-45714e9ecb97c53254d644ecee00d66d36a21449.tar.gz
cpython-45714e9ecb97c53254d644ecee00d66d36a21449.tar.bz2
experimental UCS-4 support: made compare a bit more robust, in case
sizeof(Py_UNICODE) >= sizeof(long). also changed surrogate expansion to work if sizeof(Py_UNICODE) > 2.
Diffstat (limited to 'Objects/unicodeobject.c')
-rw-r--r--Objects/unicodeobject.c25
1 files changed, 14 insertions, 11 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index ba606f5..c62f65b 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -787,7 +787,7 @@ PyObject *PyUnicode_DecodeUTF8(const char *s,
*p++ = (Py_UNICODE)(0xD800 + (ch >> 10));
/* low surrogate = bottom 10 bits added to DC00 */
- *p++ = (Py_UNICODE)(0xDC00 + (ch & ~0xFC00));
+ *p++ = (Py_UNICODE)(0xDC00 + (ch & 0x03FF));
break;
default:
@@ -1274,7 +1274,7 @@ PyObject *PyUnicode_DecodeUnicodeEscape(const char *s,
/* UCS-4 character. store as two surrogate characters */
chr -= 0x10000L;
*p++ = 0xD800 + (Py_UNICODE) (chr >> 10);
- *p++ = 0xDC00 + (Py_UNICODE) (chr & ~0xFC00);
+ *p++ = 0xDC00 + (Py_UNICODE) (chr & 0x03FF);
} else {
if (unicodeescape_decoding_error(
&s, &x, errors,
@@ -3260,19 +3260,19 @@ unicode_compare(PyUnicodeObject *str1, PyUnicodeObject *str2)
while (len1 > 0 && len2 > 0) {
Py_UNICODE c1, c2;
- long diff;
c1 = *s1++;
c2 = *s2++;
+
if (c1 > (1<<11) * 26)
c1 += utf16Fixup[c1>>11];
if (c2 > (1<<11) * 26)
c2 += utf16Fixup[c2>>11];
-
/* now c1 and c2 are in UTF-32-compatible order */
- diff = (long)c1 - (long)c2;
- if (diff)
- return (diff < 0) ? -1 : (diff != 0);
+
+ if (c1 != c2)
+ return (c1 < c2) ? -1 : 1;
+
len1--; len2--;
}
@@ -3293,11 +3293,14 @@ unicode_compare(PyUnicodeObject *str1, PyUnicodeObject *str2)
len2 = str2->length;
while (len1 > 0 && len2 > 0) {
- register long diff;
+ Py_UNICODE c1, c2;
+
+ c1 = *s1++;
+ c2 = *s2++;
+
+ if (c1 != c2)
+ return (c1 < c2) ? -1 : 1;
- diff = (long)*s1++ - (long)*s2++;
- if (diff)
- return (diff < 0) ? -1 : (diff != 0);
len1--; len2--;
}