summaryrefslogtreecommitdiffstats
path: root/Objects/unicodeobject.c
diff options
context:
space:
mode:
authorVictor Stinner <victor.stinner@gmail.com>2015-09-21 23:29:33 (GMT)
committerVictor Stinner <victor.stinner@gmail.com>2015-09-21 23:29:33 (GMT)
commit5ebae876281828c17f139ec063dae43a39fd7741 (patch)
tree830d5b3eb026658916895a93b54a822611a1d859 /Objects/unicodeobject.c
parent6174474bea9fe6f5f12f05a16004eabb817ce721 (diff)
downloadcpython-5ebae876281828c17f139ec063dae43a39fd7741.zip
cpython-5ebae876281828c17f139ec063dae43a39fd7741.tar.gz
cpython-5ebae876281828c17f139ec063dae43a39fd7741.tar.bz2
Issue #25207, #14626: Fix my commit.
It doesn't work to use #define XXX defined(YYY)" and then "#ifdef XXX" to check YYY.
Diffstat (limited to 'Objects/unicodeobject.c')
-rw-r--r--Objects/unicodeobject.c52
1 files changed, 43 insertions, 9 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index d0b285a..63a627f 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -4709,8 +4709,9 @@ PyUnicode_DecodeUTF8Stateful(const char *s,
Py_ssize_t startinpos;
Py_ssize_t endinpos;
const char *errmsg = "";
- PyObject *errorHandler = NULL;
+ PyObject *error_handler_obj = NULL;
PyObject *exc = NULL;
+ _Py_error_handler error_handler = _Py_ERROR_UNKNOWN;
if (size == 0) {
if (consumed)
@@ -4773,24 +4774,57 @@ PyUnicode_DecodeUTF8Stateful(const char *s,
continue;
}
- if (unicode_decode_call_errorhandler_writer(
- errors, &errorHandler,
- "utf-8", errmsg,
- &starts, &end, &startinpos, &endinpos, &exc, &s,
- &writer))
- goto onError;
+ /* undecodable byte: call the error handler */
+
+ if (error_handler == _Py_ERROR_UNKNOWN)
+ error_handler = get_error_handler(errors);
+
+ switch (error_handler)
+ {
+ case _Py_ERROR_REPLACE:
+ case _Py_ERROR_SURROGATEESCAPE:
+ {
+ unsigned char ch = (unsigned char)*s;
+
+ /* Fast-path: the error handler only writes one character,
+ but we may switch to UCS2 at the first write */
+ if (_PyUnicodeWriter_PrepareKind(&writer, PyUnicode_2BYTE_KIND) < 0)
+ goto onError;
+ kind = writer.kind;
+
+ if (error_handler == _Py_ERROR_REPLACE)
+ PyUnicode_WRITE(kind, writer.data, writer.pos, 0xfffd);
+ else
+ PyUnicode_WRITE(kind, writer.data, writer.pos, ch + 0xdc00);
+ writer.pos++;
+ ++s;
+ break;
+ }
+
+ case _Py_ERROR_IGNORE:
+ s++;
+ break;
+
+ default:
+ if (unicode_decode_call_errorhandler_writer(
+ errors, &error_handler_obj,
+ "utf-8", errmsg,
+ &starts, &end, &startinpos, &endinpos, &exc, &s,
+ &writer))
+ goto onError;
+ }
}
End:
if (consumed)
*consumed = s - starts;
- Py_XDECREF(errorHandler);
+ Py_XDECREF(error_handler_obj);
Py_XDECREF(exc);
return _PyUnicodeWriter_Finish(&writer);
onError:
- Py_XDECREF(errorHandler);
+ Py_XDECREF(error_handler_obj);
Py_XDECREF(exc);
_PyUnicodeWriter_Dealloc(&writer);
return NULL;