Issue #28561: Clean up UTF-8 encoder: remove dead code, update comments, etc.

Patch by Xiang Zhang.
author: Serhiy Storchaka <storchaka@gmail.com> 2016-10-30 16:25:27 (GMT)
committer: Serhiy Storchaka <storchaka@gmail.com> 2016-10-30 16:25:27 (GMT)
commit: 998c9cdd423409e2b40e02eb41614536f9d8005c (patch)
tree: 920f71e54681e719bfb9187052d26db97267e9c5 /Objects/stringlib
parent: b7d14a09c245f1e78911208b7f65bd09d7c03f2c (diff)
download: cpython-998c9cdd423409e2b40e02eb41614536f9d8005c.zip
cpython-998c9cdd423409e2b40e02eb41614536f9d8005c.tar.gz
cpython-998c9cdd423409e2b40e02eb41614536f9d8005c.tar.bz2
1 files changed, 4 insertions, 10 deletions
diff --git a/Objects/stringlib/codecs.h b/Objects/stringlib/codecs.h
index a9d0a34..43f2f32 100644
--- a/Objects/stringlib/codecs.h
+++ b/Objects/stringlib/codecs.h
@@ -262,9 +262,7 @@ STRINGLIB(utf8_encoder)(PyObject *unicode,
                         Py_ssize_t size,
                         const char *errors)
 {
-#define MAX_SHORT_UNICHARS 300  /* largest size we'll do on the stack */
-
-    Py_ssize_t i;                /* index into s of next input byte */
+    Py_ssize_t i;                /* index into data of next input character */
     char *p;                     /* next free byte in output buffer */
 #if STRINGLIB_SIZEOF_CHAR > 1
     PyObject *error_handler_obj = NULL;
@@ -389,7 +387,7 @@ STRINGLIB(utf8_encoder)(PyObject *unicode,
                     goto error;
 
                 /* subtract preallocated bytes */
-                writer.min_size -= max_char_size;
+                writer.min_size -= max_char_size * (newpos - startpos);
 
                 if (PyBytes_Check(rep)) {
                     p = _PyBytesWriter_WriteBytes(&writer, p,
@@ -402,14 +400,12 @@ STRINGLIB(utf8_encoder)(PyObject *unicode,
                         goto error;
 
                     if (!PyUnicode_IS_ASCII(rep)) {
-                        raise_encode_exception(&exc, "utf-8",
-                                               unicode,
-                                               i-1, i,
+                        raise_encode_exception(&exc, "utf-8", unicode,
+                                               startpos, endpos,
                                                "surrogates not allowed");
                         goto error;
                     }
 
-                    assert(PyUnicode_KIND(rep) == PyUnicode_1BYTE_KIND);
                     p = _PyBytesWriter_WriteBytes(&writer, p,
                                                   PyUnicode_DATA(rep),
                                                   PyUnicode_GET_LENGTH(rep));
@@ -463,8 +459,6 @@ STRINGLIB(utf8_encoder)(PyObject *unicode,
     _PyBytesWriter_Dealloc(&writer);
     return NULL;
 #endif
-
-#undef MAX_SHORT_UNICHARS
 }
 
 /* The pattern for constructing UCS2-repeated masks. */
author	Serhiy Storchaka <storchaka@gmail.com>	2016-10-30 16:25:27 (GMT)
committer	Serhiy Storchaka <storchaka@gmail.com>	2016-10-30 16:25:27 (GMT)
commit	998c9cdd423409e2b40e02eb41614536f9d8005c (patch)
tree	920f71e54681e719bfb9187052d26db97267e9c5 /Objects/stringlib
parent	b7d14a09c245f1e78911208b7f65bd09d7c03f2c (diff)
download	cpython-998c9cdd423409e2b40e02eb41614536f9d8005c.zip cpython-998c9cdd423409e2b40e02eb41614536f9d8005c.tar.gz cpython-998c9cdd423409e2b40e02eb41614536f9d8005c.tar.bz2