diff options
author | Martin v. Löwis <martin@v.loewis.de> | 2001-06-26 22:43:40 (GMT) |
---|---|---|
committer | Martin v. Löwis <martin@v.loewis.de> | 2001-06-26 22:43:40 (GMT) |
commit | ac93bc250128105ff4c5f2c9d30027dbb0486db7 (patch) | |
tree | 09cd0dbdeb1335aaeed2bffe3617ffb9dbd2fdf9 | |
parent | 208efe56401ec6a1d7eef874fcc5848084d15692 (diff) | |
download | cpython-ac93bc250128105ff4c5f2c9d30027dbb0486db7.zip cpython-ac93bc250128105ff4c5f2c9d30027dbb0486db7.tar.gz cpython-ac93bc250128105ff4c5f2c9d30027dbb0486db7.tar.bz2 |
When decoding UTF-16, don't assume that the buffer is in native endianness
when checking surrogates.
-rw-r--r-- | Objects/unicodeobject.c | 8 |
1 files changed, 4 insertions, 4 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 7dc370a..ffac371 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -1065,16 +1065,16 @@ PyObject *PyUnicode_DecodeUTF16(const char *s, errmsg = "unexpected end of data"; goto utf16Error; } - if (0xDC00 <= *q && *q <= 0xDFFF) { + if (0xD800 <= ch && ch <= 0xDBFF) { Py_UCS2 ch2 = *q++; #ifdef BYTEORDER_IS_LITTLE_ENDIAN if (bo == 1) - ch = (ch >> 8) | (ch << 8); + ch2 = (ch2 >> 8) | (ch2 << 8); #else if (bo == -1) - ch = (ch >> 8) | (ch << 8); + ch2 = (ch2 >> 8) | (ch2 << 8); #endif - if (0xD800 <= ch && ch <= 0xDBFF) { + if (0xDC00 <= ch2 && ch2 <= 0xDFFF) { #if Py_UNICODE_SIZE == 2 /* This is valid data (a UTF-16 surrogate pair), but we are not able to store this information since our |