summaryrefslogtreecommitdiffstats
path: root/Objects/stringlib
diff options
context:
space:
mode:
authorVictor Stinner <victor.stinner@gmail.com>2015-10-08 23:39:28 (GMT)
committerVictor Stinner <victor.stinner@gmail.com>2015-10-08 23:39:28 (GMT)
commite7bf86cd7d7c9a3924501875a08c4ef4a0063103 (patch)
treee0097b0f4fb0194d233d1d9f58bf8570815873c3 /Objects/stringlib
parentfdfbf781140f22619b0ef6bfeac792496774bb69 (diff)
downloadcpython-e7bf86cd7d7c9a3924501875a08c4ef4a0063103.zip
cpython-e7bf86cd7d7c9a3924501875a08c4ef4a0063103.tar.gz
cpython-e7bf86cd7d7c9a3924501875a08c4ef4a0063103.tar.bz2
Optimize backslashreplace error handler
Issue #25318: Optimize backslashreplace and xmlcharrefreplace error handlers in UTF-8 encoder. Optimize also backslashreplace error handler for ASCII and Latin1 encoders. Use the new _PyBytesWriter API to optimize these error handlers for the encoders. It avoids to create an exception and call the slow implementation of the error handler.
Diffstat (limited to 'Objects/stringlib')
-rw-r--r--Objects/stringlib/codecs.h18
1 files changed, 16 insertions, 2 deletions
diff --git a/Objects/stringlib/codecs.h b/Objects/stringlib/codecs.h
index d7a9918..ae99d1a 100644
--- a/Objects/stringlib/codecs.h
+++ b/Objects/stringlib/codecs.h
@@ -334,7 +334,6 @@ STRINGLIB(utf8_encoder)(PyObject *unicode,
i += (endpos - startpos - 1);
break;
-
case _Py_ERROR_SURROGATEPASS:
for (k=startpos; k<endpos; k++) {
ch = data[k];
@@ -345,6 +344,22 @@ STRINGLIB(utf8_encoder)(PyObject *unicode,
i += (endpos - startpos - 1);
break;
+ case _Py_ERROR_BACKSLASHREPLACE:
+ p = backslashreplace(&writer, max_char_size, p,
+ unicode, startpos, endpos);
+ if (p == NULL)
+ goto error;
+ i += (endpos - startpos - 1);
+ break;
+
+ case _Py_ERROR_XMLCHARREFREPLACE:
+ p = xmlcharrefreplace(&writer, max_char_size, p,
+ unicode, startpos, endpos);
+ if (p == NULL)
+ goto error;
+ i += (endpos - startpos - 1);
+ break;
+
case _Py_ERROR_SURROGATEESCAPE:
for (k=startpos; k<endpos; k++) {
ch = data[k];
@@ -359,7 +374,6 @@ STRINGLIB(utf8_encoder)(PyObject *unicode,
startpos = k;
assert(startpos < endpos);
/* fall through the default handler */
-
default:
rep = unicode_encode_call_errorhandler(
errors, &error_handler_obj, "utf-8", "surrogates not allowed",