diff options
author | Serhiy Storchaka <storchaka@gmail.com> | 2013-01-15 12:43:21 (GMT) |
---|---|---|
committer | Serhiy Storchaka <storchaka@gmail.com> | 2013-01-15 12:43:21 (GMT) |
commit | 4fb8caee87fea1707f9a754365d5ec30c9220f6f (patch) | |
tree | c2242f4c4edc984b0d6affe182576d8f8e53efcc /Objects/unicodeobject.c | |
parent | ad1d5f908a51e1c6fd487e31d6f6aab98bae5c00 (diff) | |
download | cpython-4fb8caee87fea1707f9a754365d5ec30c9220f6f.zip cpython-4fb8caee87fea1707f9a754365d5ec30c9220f6f.tar.gz cpython-4fb8caee87fea1707f9a754365d5ec30c9220f6f.tar.bz2 |
Issue #14850: Now a chamap decoder treates U+FFFE as "undefined mapping"
in any mapping, not only in an unicode string.
Diffstat (limited to 'Objects/unicodeobject.c')
-rw-r--r-- | Objects/unicodeobject.c | 46 |
1 files changed, 25 insertions, 21 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 7f86bfd..e1df874 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -5245,15 +5245,18 @@ PyObject *PyUnicode_DecodeCharmap(const char *s, if (PyErr_ExceptionMatches(PyExc_LookupError)) { /* No mapping found means: mapping is undefined. */ PyErr_Clear(); - x = Py_None; - Py_INCREF(x); + goto Undefined; } else goto onError; } /* Apply mapping */ + if (x == Py_None) + goto Undefined; if (PyLong_Check(x)) { long value = PyLong_AS_LONG(x); + if (value == 0xFFFE) + goto Undefined; if (value < 0 || value > 0x10FFFF) { PyErr_SetString(PyExc_TypeError, "character mapping must be in range(0x110000)"); @@ -5286,29 +5289,16 @@ PyObject *PyUnicode_DecodeCharmap(const char *s, #endif *p++ = (Py_UNICODE)value; } - else if (x == Py_None) { - /* undefined mapping */ - outpos = p-PyUnicode_AS_UNICODE(v); - startinpos = s-starts; - endinpos = startinpos+1; - if (unicode_decode_call_errorhandler( - errors, &errorHandler, - "charmap", "character maps to <undefined>", - &starts, &e, &startinpos, &endinpos, &exc, &s, - &v, &outpos, &p)) { - Py_DECREF(x); - goto onError; - } - Py_DECREF(x); - continue; - } else if (PyUnicode_Check(x)) { Py_ssize_t targetsize = PyUnicode_GET_SIZE(x); - if (targetsize == 1) + if (targetsize == 1) { /* 1-1 mapping */ - *p++ = *PyUnicode_AS_UNICODE(x); - + Py_UNICODE value = *PyUnicode_AS_UNICODE(x); + if (value == 0xFFFE) + goto Undefined; + *p++ = value; + } else if (targetsize > 1) { /* 1-n mapping */ if (targetsize > extrachars) { @@ -5342,6 +5332,20 @@ PyObject *PyUnicode_DecodeCharmap(const char *s, } Py_DECREF(x); ++s; + continue; +Undefined: + /* undefined mapping */ + Py_XDECREF(x); + outpos = p-PyUnicode_AS_UNICODE(v); + startinpos = s-starts; + endinpos = startinpos+1; + if (unicode_decode_call_errorhandler( + errors, &errorHandler, + "charmap", "character maps to <undefined>", + &starts, &e, &startinpos, &endinpos, &exc, &s, + &v, &outpos, &p)) { + goto onError; + } } } if (p - PyUnicode_AS_UNICODE(v) < PyUnicode_GET_SIZE(v)) |