summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMartin v. Löwis <martin@v.loewis.de>2001-06-26 22:43:40 (GMT)
committerMartin v. Löwis <martin@v.loewis.de>2001-06-26 22:43:40 (GMT)
commitac93bc250128105ff4c5f2c9d30027dbb0486db7 (patch)
tree09cd0dbdeb1335aaeed2bffe3617ffb9dbd2fdf9
parent208efe56401ec6a1d7eef874fcc5848084d15692 (diff)
downloadcpython-ac93bc250128105ff4c5f2c9d30027dbb0486db7.zip
cpython-ac93bc250128105ff4c5f2c9d30027dbb0486db7.tar.gz
cpython-ac93bc250128105ff4c5f2c9d30027dbb0486db7.tar.bz2
When decoding UTF-16, don't assume that the buffer is in native endianness
when checking surrogates.
-rw-r--r--Objects/unicodeobject.c8
1 files changed, 4 insertions, 4 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 7dc370a..ffac371 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -1065,16 +1065,16 @@ PyObject *PyUnicode_DecodeUTF16(const char *s,
errmsg = "unexpected end of data";
goto utf16Error;
}
- if (0xDC00 <= *q && *q <= 0xDFFF) {
+ if (0xD800 <= ch && ch <= 0xDBFF) {
Py_UCS2 ch2 = *q++;
#ifdef BYTEORDER_IS_LITTLE_ENDIAN
if (bo == 1)
- ch = (ch >> 8) | (ch << 8);
+ ch2 = (ch2 >> 8) | (ch2 << 8);
#else
if (bo == -1)
- ch = (ch >> 8) | (ch << 8);
+ ch2 = (ch2 >> 8) | (ch2 << 8);
#endif
- if (0xD800 <= ch && ch <= 0xDBFF) {
+ if (0xDC00 <= ch2 && ch2 <= 0xDFFF) {
#if Py_UNICODE_SIZE == 2
/* This is valid data (a UTF-16 surrogate pair), but
we are not able to store this information since our