Optimize ascii/latin1+surrogateescape encoders

Issue #25227: Optimize ASCII and latin1 encoders with the ``surrogateescape`` error handler: the encoders are now up to 3 times as fast. Initial patch written by Serhiy Storchaka.
author: Victor Stinner <victor.stinner@gmail.com> 2015-09-29 10:32:13 (GMT)
committer: Victor Stinner <victor.stinner@gmail.com> 2015-09-29 10:32:13 (GMT)
commit: c3713e9706e51bbd30958c27d35e7fda764b0c4a (patch)
tree: 43a7def678412164cfe0fdcbc0ac1250d7d3ab10 /Objects/unicodeobject.c
parent: 5fbeabcbb6ea1d4af91fea0bc96c3d01f47b728f (diff)
download: cpython-c3713e9706e51bbd30958c27d35e7fda764b0c4a.zip
cpython-c3713e9706e51bbd30958c27d35e7fda764b0c4a.tar.gz
cpython-c3713e9706e51bbd30958c27d35e7fda764b0c4a.tar.bz2
1 files changed, 16 insertions, 0 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index da2aac7..6657cd4 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -6532,6 +6532,22 @@ unicode_encode_ucs1(PyObject *unicode,
                 pos = collend;
                 break;
 
+            case _Py_ERROR_SURROGATEESCAPE:
+                for (i = collstart; i < collend; ++i) {
+                    ch = PyUnicode_READ(kind, data, i);
+                    if (ch < 0xdc80 || 0xdcff < ch) {
+                        /* Not a UTF-8b surrogate */
+                        break;
+                    }
+                    *str++ = (char)(ch - 0xdc00);
+                    ++pos;
+                }
+                if (i >= collend)
+                    break;
+                collstart = pos;
+                assert(collstart != collend);
+                /* fallback to general error handling */
+
             default:
                 repunicode = unicode_encode_call_errorhandler(errors, &error_handler_obj,
                                                               encoding, reason, unicode, &exc,
author	Victor Stinner <victor.stinner@gmail.com>	2015-09-29 10:32:13 (GMT)
committer	Victor Stinner <victor.stinner@gmail.com>	2015-09-29 10:32:13 (GMT)
commit	c3713e9706e51bbd30958c27d35e7fda764b0c4a (patch)
tree	43a7def678412164cfe0fdcbc0ac1250d7d3ab10 /Objects/unicodeobject.c
parent	5fbeabcbb6ea1d4af91fea0bc96c3d01f47b728f (diff)
download	cpython-c3713e9706e51bbd30958c27d35e7fda764b0c4a.zip cpython-c3713e9706e51bbd30958c27d35e7fda764b0c4a.tar.gz cpython-c3713e9706e51bbd30958c27d35e7fda764b0c4a.tar.bz2