diff options
author | Antoine Pitrou <solipsis@pitrou.net> | 2012-09-23 17:55:21 (GMT) |
---|---|---|
committer | Antoine Pitrou <solipsis@pitrou.net> | 2012-09-23 17:55:21 (GMT) |
commit | 6f80f5d4446f06d15274ad519cae6929a3565cc0 (patch) | |
tree | 652d58b6404e41887d2acbe1fe538d3eb05267a1 /Objects | |
parent | 20b8d992b008672d52a84c8d35992033ccfc9d84 (diff) | |
download | cpython-6f80f5d4446f06d15274ad519cae6929a3565cc0.zip cpython-6f80f5d4446f06d15274ad519cae6929a3565cc0.tar.gz cpython-6f80f5d4446f06d15274ad519cae6929a3565cc0.tar.bz2 |
Issue #15379: Fix passing of non-BMP characters as integers for the charmap decoder (already working as unicode strings).
Patch by Serhiy Storchaka.
Diffstat (limited to 'Objects')
-rw-r--r-- | Objects/unicodeobject.c | 28 |
1 files changed, 26 insertions, 2 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 8b782b4..f59db36 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -5250,12 +5250,36 @@ PyObject *PyUnicode_DecodeCharmap(const char *s, /* Apply mapping */ if (PyLong_Check(x)) { long value = PyLong_AS_LONG(x); - if (value < 0 || value > 65535) { + if (value < 0 || value > 0x10FFFF) { PyErr_SetString(PyExc_TypeError, - "character mapping must be in range(65536)"); + "character mapping must be in range(0x110000)"); Py_DECREF(x); goto onError; } + +#ifndef Py_UNICODE_WIDE + if (value > 0xFFFF) { + /* see the code for 1-n mapping below */ + if (extrachars < 2) { + /* resize first */ + Py_ssize_t oldpos = p - PyUnicode_AS_UNICODE(v); + Py_ssize_t needed = 10 - extrachars; + extrachars += needed; + /* XXX overflow detection missing */ + if (_PyUnicode_Resize(&v, + PyUnicode_GET_SIZE(v) + needed) < 0) { + Py_DECREF(x); + goto onError; + } + p = PyUnicode_AS_UNICODE(v) + oldpos; + } + value -= 0x10000; + *p++ = 0xD800 | (value >> 10); + *p++ = 0xDC00 | (value & 0x3FF); + extrachars -= 2; + } + else +#endif *p++ = (Py_UNICODE)value; } else if (x == Py_None) { |