diff options
author | Serhiy Storchaka <storchaka@gmail.com> | 2015-01-25 20:56:57 (GMT) |
---|---|---|
committer | Serhiy Storchaka <storchaka@gmail.com> | 2015-01-25 20:56:57 (GMT) |
commit | 07985ef387a87486a0e632844be03a8877e7f889 (patch) | |
tree | b89d636e8ba7b0cd170064698741aebb46951eb5 /Python/codecs.c | |
parent | 58f02019e0a772b5689b69182f4f162666657294 (diff) | |
download | cpython-07985ef387a87486a0e632844be03a8877e7f889.zip cpython-07985ef387a87486a0e632844be03a8877e7f889.tar.gz cpython-07985ef387a87486a0e632844be03a8877e7f889.tar.bz2 |
Issue #22286: The "backslashreplace" error handlers now works with
decoding and translating.
Diffstat (limited to 'Python/codecs.c')
-rw-r--r-- | Python/codecs.c | 144 |
1 files changed, 91 insertions, 53 deletions
diff --git a/Python/codecs.c b/Python/codecs.c index a0a5403..a558859 100644 --- a/Python/codecs.c +++ b/Python/codecs.c @@ -864,74 +864,112 @@ PyObject *PyCodec_XMLCharRefReplaceErrors(PyObject *exc) PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc) { - if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) { - PyObject *restuple; - PyObject *object; - Py_ssize_t i; - Py_ssize_t start; - Py_ssize_t end; - PyObject *res; - unsigned char *outp; - Py_ssize_t ressize; - Py_UCS4 c; - if (PyUnicodeEncodeError_GetStart(exc, &start)) + PyObject *object; + Py_ssize_t i; + Py_ssize_t start; + Py_ssize_t end; + PyObject *res; + unsigned char *outp; + int ressize; + Py_UCS4 c; + + if (PyObject_IsInstance(exc, PyExc_UnicodeDecodeError)) { + unsigned char *p; + if (PyUnicodeDecodeError_GetStart(exc, &start)) return NULL; - if (PyUnicodeEncodeError_GetEnd(exc, &end)) + if (PyUnicodeDecodeError_GetEnd(exc, &end)) return NULL; - if (!(object = PyUnicodeEncodeError_GetObject(exc))) + if (!(object = PyUnicodeDecodeError_GetObject(exc))) + return NULL; + if (!(p = (unsigned char*)PyBytes_AsString(object))) { + Py_DECREF(object); return NULL; - if (end - start > PY_SSIZE_T_MAX / (1+1+8)) - end = start + PY_SSIZE_T_MAX / (1+1+8); - for (i = start, ressize = 0; i < end; ++i) { - /* object is guaranteed to be "ready" */ - c = PyUnicode_READ_CHAR(object, i); - if (c >= 0x10000) { - ressize += 1+1+8; - } - else if (c >= 0x100) { - ressize += 1+1+4; - } - else - ressize += 1+1+2; } - res = PyUnicode_New(ressize, 127); + res = PyUnicode_New(4 * (end - start), 127); if (res == NULL) { Py_DECREF(object); return NULL; } - for (i = start, outp = PyUnicode_1BYTE_DATA(res); - i < end; ++i) { - c = PyUnicode_READ_CHAR(object, i); - *outp++ = '\\'; - if (c >= 0x00010000) { - *outp++ = 'U'; - *outp++ = Py_hexdigits[(c>>28)&0xf]; - *outp++ = Py_hexdigits[(c>>24)&0xf]; - *outp++ = Py_hexdigits[(c>>20)&0xf]; - *outp++ = Py_hexdigits[(c>>16)&0xf]; - *outp++ = Py_hexdigits[(c>>12)&0xf]; - *outp++ = Py_hexdigits[(c>>8)&0xf]; - } - else if (c >= 0x100) { - *outp++ = 'u'; - *outp++ = Py_hexdigits[(c>>12)&0xf]; - *outp++ = Py_hexdigits[(c>>8)&0xf]; - } - else - *outp++ = 'x'; - *outp++ = Py_hexdigits[(c>>4)&0xf]; - *outp++ = Py_hexdigits[c&0xf]; + outp = PyUnicode_1BYTE_DATA(res); + for (i = start; i < end; i++, outp += 4) { + unsigned char c = p[i]; + outp[0] = '\\'; + outp[1] = 'x'; + outp[2] = Py_hexdigits[(c>>4)&0xf]; + outp[3] = Py_hexdigits[c&0xf]; } assert(_PyUnicode_CheckConsistency(res, 1)); - restuple = Py_BuildValue("(Nn)", res, end); Py_DECREF(object); - return restuple; + return Py_BuildValue("(Nn)", res, end); + } + if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) { + if (PyUnicodeEncodeError_GetStart(exc, &start)) + return NULL; + if (PyUnicodeEncodeError_GetEnd(exc, &end)) + return NULL; + if (!(object = PyUnicodeEncodeError_GetObject(exc))) + return NULL; + } + else if (PyObject_IsInstance(exc, PyExc_UnicodeTranslateError)) { + if (PyUnicodeTranslateError_GetStart(exc, &start)) + return NULL; + if (PyUnicodeTranslateError_GetEnd(exc, &end)) + return NULL; + if (!(object = PyUnicodeTranslateError_GetObject(exc))) + return NULL; } else { wrong_exception_type(exc); return NULL; } + + if (end - start > PY_SSIZE_T_MAX / (1+1+8)) + end = start + PY_SSIZE_T_MAX / (1+1+8); + for (i = start, ressize = 0; i < end; ++i) { + /* object is guaranteed to be "ready" */ + c = PyUnicode_READ_CHAR(object, i); + if (c >= 0x10000) { + ressize += 1+1+8; + } + else if (c >= 0x100) { + ressize += 1+1+4; + } + else + ressize += 1+1+2; + } + res = PyUnicode_New(ressize, 127); + if (res == NULL) { + Py_DECREF(object); + return NULL; + } + outp = PyUnicode_1BYTE_DATA(res); + for (i = start; i < end; ++i) { + c = PyUnicode_READ_CHAR(object, i); + *outp++ = '\\'; + if (c >= 0x00010000) { + *outp++ = 'U'; + *outp++ = Py_hexdigits[(c>>28)&0xf]; + *outp++ = Py_hexdigits[(c>>24)&0xf]; + *outp++ = Py_hexdigits[(c>>20)&0xf]; + *outp++ = Py_hexdigits[(c>>16)&0xf]; + *outp++ = Py_hexdigits[(c>>12)&0xf]; + *outp++ = Py_hexdigits[(c>>8)&0xf]; + } + else if (c >= 0x100) { + *outp++ = 'u'; + *outp++ = Py_hexdigits[(c>>12)&0xf]; + *outp++ = Py_hexdigits[(c>>8)&0xf]; + } + else + *outp++ = 'x'; + *outp++ = Py_hexdigits[(c>>4)&0xf]; + *outp++ = Py_hexdigits[c&0xf]; + } + + assert(_PyUnicode_CheckConsistency(res, 1)); + Py_DECREF(object); + return Py_BuildValue("(Nn)", res, end); } static _PyUnicode_Name_CAPI *ucnhash_CAPI = NULL; @@ -1444,8 +1482,8 @@ static int _PyCodecRegistry_Init(void) backslashreplace_errors, METH_O, PyDoc_STR("Implements the 'backslashreplace' error handling, " - "which replaces an unencodable character with a " - "backslashed escape sequence.") + "which replaces malformed data with a backslashed " + "escape sequence.") } }, { |