diff options
author | Antoine Pitrou <solipsis@pitrou.net> | 2012-11-17 20:14:58 (GMT) |
---|---|---|
committer | Antoine Pitrou <solipsis@pitrou.net> | 2012-11-17 20:14:58 (GMT) |
commit | e3ae321222c32d64232c46e4062082effec79bcf (patch) | |
tree | 1afcaf1354d5d6e7ddb9d3fb3a534b43c37f1769 /Objects | |
parent | 1df43d33d2f17584639759519e290021b063485c (diff) | |
download | cpython-e3ae321222c32d64232c46e4062082effec79bcf.zip cpython-e3ae321222c32d64232c46e4062082effec79bcf.tar.gz cpython-e3ae321222c32d64232c46e4062082effec79bcf.tar.bz2 |
Issue #15379: Fix passing of non-BMP characters as integers for the charmap decoder (already working as unicode strings).
Patch by Serhiy Storchaka.
Diffstat (limited to 'Objects')
-rw-r--r-- | Objects/unicodeobject.c | 28 |
1 files changed, 26 insertions, 2 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index a4c04f4..b4c37fb 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -4127,12 +4127,36 @@ PyObject *PyUnicode_DecodeCharmap(const char *s, /* Apply mapping */ if (PyInt_Check(x)) { long value = PyInt_AS_LONG(x); - if (value < 0 || value > 65535) { + if (value < 0 || value > 0x10FFFF) { PyErr_SetString(PyExc_TypeError, - "character mapping must be in range(65536)"); + "character mapping must be in range(0x110000)"); Py_DECREF(x); goto onError; } + +#ifndef Py_UNICODE_WIDE + if (value > 0xFFFF) { + /* see the code for 1-n mapping below */ + if (extrachars < 2) { + /* resize first */ + Py_ssize_t oldpos = p - PyUnicode_AS_UNICODE(v); + Py_ssize_t needed = 10 - extrachars; + extrachars += needed; + /* XXX overflow detection missing */ + if (_PyUnicode_Resize(&v, + PyUnicode_GET_SIZE(v) + needed) < 0) { + Py_DECREF(x); + goto onError; + } + p = PyUnicode_AS_UNICODE(v) + oldpos; + } + value -= 0x10000; + *p++ = 0xD800 | (value >> 10); + *p++ = 0xDC00 | (value & 0x3FF); + extrachars -= 2; + } + else +#endif *p++ = (Py_UNICODE)value; } else if (x == Py_None) { |