diff options
author | Serhiy Storchaka <storchaka@gmail.com> | 2016-10-30 16:25:27 (GMT) |
---|---|---|
committer | Serhiy Storchaka <storchaka@gmail.com> | 2016-10-30 16:25:27 (GMT) |
commit | 998c9cdd423409e2b40e02eb41614536f9d8005c (patch) | |
tree | 920f71e54681e719bfb9187052d26db97267e9c5 /Objects/stringlib | |
parent | b7d14a09c245f1e78911208b7f65bd09d7c03f2c (diff) | |
download | cpython-998c9cdd423409e2b40e02eb41614536f9d8005c.zip cpython-998c9cdd423409e2b40e02eb41614536f9d8005c.tar.gz cpython-998c9cdd423409e2b40e02eb41614536f9d8005c.tar.bz2 |
Issue #28561: Clean up UTF-8 encoder: remove dead code, update comments, etc.
Patch by Xiang Zhang.
Diffstat (limited to 'Objects/stringlib')
-rw-r--r-- | Objects/stringlib/codecs.h | 14 |
1 files changed, 4 insertions, 10 deletions
diff --git a/Objects/stringlib/codecs.h b/Objects/stringlib/codecs.h index a9d0a34..43f2f32 100644 --- a/Objects/stringlib/codecs.h +++ b/Objects/stringlib/codecs.h @@ -262,9 +262,7 @@ STRINGLIB(utf8_encoder)(PyObject *unicode, Py_ssize_t size, const char *errors) { -#define MAX_SHORT_UNICHARS 300 /* largest size we'll do on the stack */ - - Py_ssize_t i; /* index into s of next input byte */ + Py_ssize_t i; /* index into data of next input character */ char *p; /* next free byte in output buffer */ #if STRINGLIB_SIZEOF_CHAR > 1 PyObject *error_handler_obj = NULL; @@ -389,7 +387,7 @@ STRINGLIB(utf8_encoder)(PyObject *unicode, goto error; /* subtract preallocated bytes */ - writer.min_size -= max_char_size; + writer.min_size -= max_char_size * (newpos - startpos); if (PyBytes_Check(rep)) { p = _PyBytesWriter_WriteBytes(&writer, p, @@ -402,14 +400,12 @@ STRINGLIB(utf8_encoder)(PyObject *unicode, goto error; if (!PyUnicode_IS_ASCII(rep)) { - raise_encode_exception(&exc, "utf-8", - unicode, - i-1, i, + raise_encode_exception(&exc, "utf-8", unicode, + startpos, endpos, "surrogates not allowed"); goto error; } - assert(PyUnicode_KIND(rep) == PyUnicode_1BYTE_KIND); p = _PyBytesWriter_WriteBytes(&writer, p, PyUnicode_DATA(rep), PyUnicode_GET_LENGTH(rep)); @@ -463,8 +459,6 @@ STRINGLIB(utf8_encoder)(PyObject *unicode, _PyBytesWriter_Dealloc(&writer); return NULL; #endif - -#undef MAX_SHORT_UNICHARS } /* The pattern for constructing UCS2-repeated masks. */ |