Optimize error handlers of ASCII and Latin1 encoders when the replacement

string is pure ASCII: use _PyBytesWriter_WriteBytes(), don't check individual character. Cleanup unicode_encode_ucs1(): * Rename repunicode to rep * Clear rep object on error * Factorize code between bytes and unicode path
author: Victor Stinner <victor.stinner@gmail.com> 2015-10-09 11:10:05 (GMT)
committer: Victor Stinner <victor.stinner@gmail.com> 2015-10-09 11:10:05 (GMT)
commit: 6bd525b656f75c9752d39d9c4be1e1b29fa67cdb (patch)
tree: 645853491c0ae3addc1f578dfe0b5345b3cd7b0f /Objects/stringlib
parent: ce179bf6baed91ba84cc3ff647e96287c3b8e2f2 (diff)
download: cpython-6bd525b656f75c9752d39d9c4be1e1b29fa67cdb.zip
cpython-6bd525b656f75c9752d39d9c4be1e1b29fa67cdb.tar.gz
cpython-6bd525b656f75c9752d39d9c4be1e1b29fa67cdb.tar.bz2
1 files changed, 7 insertions, 11 deletions
diff --git a/Objects/stringlib/codecs.h b/Objects/stringlib/codecs.h
index 7e8d928..2beb604 100644
--- a/Objects/stringlib/codecs.h
+++ b/Objects/stringlib/codecs.h
@@ -311,7 +311,7 @@ STRINGLIB(utf8_encoder)(PyObject *unicode,
 #if STRINGLIB_SIZEOF_CHAR > 1
         else if (Py_UNICODE_IS_SURROGATE(ch)) {
             Py_ssize_t startpos, endpos, newpos;
-            Py_ssize_t repsize, k;
+            Py_ssize_t k;
             if (error_handler == _Py_ERROR_UNKNOWN)
                 error_handler = get_error_handler(errors);
 
@@ -392,20 +392,12 @@ STRINGLIB(utf8_encoder)(PyObject *unicode,
                     p = _PyBytesWriter_WriteBytes(&writer, p,
                                                   PyBytes_AS_STRING(rep),
                                                   PyBytes_GET_SIZE(rep));
-                    if (p == NULL)
-                        goto error;
                 }
                 else {
                     /* rep is unicode */
                     if (PyUnicode_READY(rep) < 0)
                         goto error;
 
-                    repsize = PyUnicode_GET_LENGTH(rep);
-
-                    p = _PyBytesWriter_Prepare(&writer, p, repsize);
-                    if (p == NULL)
-                        goto error;
-
                     if (!PyUnicode_IS_ASCII(rep)) {
                         raise_encode_exception(&exc, "utf-8",
                                                unicode,
@@ -415,9 +407,13 @@ STRINGLIB(utf8_encoder)(PyObject *unicode,
                     }
 
                     assert(PyUnicode_KIND(rep) == PyUnicode_1BYTE_KIND);
-                    memcpy(p, PyUnicode_DATA(rep), repsize);
-                    p += repsize;
+                    p = _PyBytesWriter_WriteBytes(&writer, p,
+                                                  PyUnicode_DATA(rep),
+                                                  PyUnicode_GET_LENGTH(rep));
                 }
+
+                if (p == NULL)
+                    goto error;
                 Py_CLEAR(rep);
 
                 i = newpos;
author	Victor Stinner <victor.stinner@gmail.com>	2015-10-09 11:10:05 (GMT)
committer	Victor Stinner <victor.stinner@gmail.com>	2015-10-09 11:10:05 (GMT)
commit	6bd525b656f75c9752d39d9c4be1e1b29fa67cdb (patch)
tree	645853491c0ae3addc1f578dfe0b5345b3cd7b0f /Objects/stringlib
parent	ce179bf6baed91ba84cc3ff647e96287c3b8e2f2 (diff)
download	cpython-6bd525b656f75c9752d39d9c4be1e1b29fa67cdb.zip cpython-6bd525b656f75c9752d39d9c4be1e1b29fa67cdb.tar.gz cpython-6bd525b656f75c9752d39d9c4be1e1b29fa67cdb.tar.bz2