summaryrefslogtreecommitdiffstats
path: root/Objects/unicodeobject.c
diff options
context:
space:
mode:
authorVictor Stinner <victor.stinner@gmail.com>2015-09-29 10:32:13 (GMT)
committerVictor Stinner <victor.stinner@gmail.com>2015-09-29 10:32:13 (GMT)
commitc3713e9706e51bbd30958c27d35e7fda764b0c4a (patch)
tree43a7def678412164cfe0fdcbc0ac1250d7d3ab10 /Objects/unicodeobject.c
parent5fbeabcbb6ea1d4af91fea0bc96c3d01f47b728f (diff)
downloadcpython-c3713e9706e51bbd30958c27d35e7fda764b0c4a.zip
cpython-c3713e9706e51bbd30958c27d35e7fda764b0c4a.tar.gz
cpython-c3713e9706e51bbd30958c27d35e7fda764b0c4a.tar.bz2
Optimize ascii/latin1+surrogateescape encoders
Issue #25227: Optimize ASCII and latin1 encoders with the ``surrogateescape`` error handler: the encoders are now up to 3 times as fast. Initial patch written by Serhiy Storchaka.
Diffstat (limited to 'Objects/unicodeobject.c')
-rw-r--r--Objects/unicodeobject.c16
1 files changed, 16 insertions, 0 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index da2aac7..6657cd4 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -6532,6 +6532,22 @@ unicode_encode_ucs1(PyObject *unicode,
pos = collend;
break;
+ case _Py_ERROR_SURROGATEESCAPE:
+ for (i = collstart; i < collend; ++i) {
+ ch = PyUnicode_READ(kind, data, i);
+ if (ch < 0xdc80 || 0xdcff < ch) {
+ /* Not a UTF-8b surrogate */
+ break;
+ }
+ *str++ = (char)(ch - 0xdc00);
+ ++pos;
+ }
+ if (i >= collend)
+ break;
+ collstart = pos;
+ assert(collstart != collend);
+ /* fallback to general error handling */
+
default:
repunicode = unicode_encode_call_errorhandler(errors, &error_handler_obj,
encoding, reason, unicode, &exc,