diff options
author | Victor Stinner <victor.stinner@gmail.com> | 2015-10-08 23:39:28 (GMT) |
---|---|---|
committer | Victor Stinner <victor.stinner@gmail.com> | 2015-10-08 23:39:28 (GMT) |
commit | e7bf86cd7d7c9a3924501875a08c4ef4a0063103 (patch) | |
tree | e0097b0f4fb0194d233d1d9f58bf8570815873c3 /Objects/stringlib | |
parent | fdfbf781140f22619b0ef6bfeac792496774bb69 (diff) | |
download | cpython-e7bf86cd7d7c9a3924501875a08c4ef4a0063103.zip cpython-e7bf86cd7d7c9a3924501875a08c4ef4a0063103.tar.gz cpython-e7bf86cd7d7c9a3924501875a08c4ef4a0063103.tar.bz2 |
Optimize backslashreplace error handler
Issue #25318: Optimize backslashreplace and xmlcharrefreplace error handlers in
UTF-8 encoder. Optimize also backslashreplace error handler for ASCII and
Latin1 encoders.
Use the new _PyBytesWriter API to optimize these error handlers for the
encoders. It avoids to create an exception and call the slow implementation of
the error handler.
Diffstat (limited to 'Objects/stringlib')
-rw-r--r-- | Objects/stringlib/codecs.h | 18 |
1 files changed, 16 insertions, 2 deletions
diff --git a/Objects/stringlib/codecs.h b/Objects/stringlib/codecs.h index d7a9918..ae99d1a 100644 --- a/Objects/stringlib/codecs.h +++ b/Objects/stringlib/codecs.h @@ -334,7 +334,6 @@ STRINGLIB(utf8_encoder)(PyObject *unicode, i += (endpos - startpos - 1); break; - case _Py_ERROR_SURROGATEPASS: for (k=startpos; k<endpos; k++) { ch = data[k]; @@ -345,6 +344,22 @@ STRINGLIB(utf8_encoder)(PyObject *unicode, i += (endpos - startpos - 1); break; + case _Py_ERROR_BACKSLASHREPLACE: + p = backslashreplace(&writer, max_char_size, p, + unicode, startpos, endpos); + if (p == NULL) + goto error; + i += (endpos - startpos - 1); + break; + + case _Py_ERROR_XMLCHARREFREPLACE: + p = xmlcharrefreplace(&writer, max_char_size, p, + unicode, startpos, endpos); + if (p == NULL) + goto error; + i += (endpos - startpos - 1); + break; + case _Py_ERROR_SURROGATEESCAPE: for (k=startpos; k<endpos; k++) { ch = data[k]; @@ -359,7 +374,6 @@ STRINGLIB(utf8_encoder)(PyObject *unicode, startpos = k; assert(startpos < endpos); /* fall through the default handler */ - default: rep = unicode_encode_call_errorhandler( errors, &error_handler_obj, "utf-8", "surrogates not allowed", |