diff options
author | Inada Naoki <songofacandy@gmail.com> | 2020-02-27 04:48:59 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-02-27 04:48:59 (GMT) |
commit | 02a4d57263a9846de35b0db12763ff9e7326f62c (patch) | |
tree | 7055c08b72477a75014f9cc65f95ee5ec23d95da /Objects/stringlib | |
parent | 0c6e3aa67b84adb0fb7c272ae06b7ae77f832295 (diff) | |
download | cpython-02a4d57263a9846de35b0db12763ff9e7326f62c.zip cpython-02a4d57263a9846de35b0db12763ff9e7326f62c.tar.gz cpython-02a4d57263a9846de35b0db12763ff9e7326f62c.tar.bz2 |
bpo-39087: Optimize PyUnicode_AsUTF8AndSize() (GH-18327)
Avoid using temporary bytes object.
Diffstat (limited to 'Objects/stringlib')
-rw-r--r-- | Objects/stringlib/codecs.h | 35 |
1 files changed, 17 insertions, 18 deletions
diff --git a/Objects/stringlib/codecs.h b/Objects/stringlib/codecs.h index 269a558..eb42e07 100644 --- a/Objects/stringlib/codecs.h +++ b/Objects/stringlib/codecs.h @@ -256,8 +256,9 @@ InvalidContinuation3: /* UTF-8 encoder specialized for a Unicode kind to avoid the slow PyUnicode_READ() macro. Delete some parts of the code depending on the kind: UCS-1 strings don't need to handle surrogates for example. */ -Py_LOCAL_INLINE(PyObject *) -STRINGLIB(utf8_encoder)(PyObject *unicode, +Py_LOCAL_INLINE(char *) +STRINGLIB(utf8_encoder)(_PyBytesWriter *writer, + PyObject *unicode, STRINGLIB_CHAR *data, Py_ssize_t size, _Py_error_handler error_handler, @@ -277,17 +278,16 @@ STRINGLIB(utf8_encoder)(PyObject *unicode, #else /* STRINGLIB_SIZEOF_CHAR == 4 */ const Py_ssize_t max_char_size = 4; #endif - _PyBytesWriter writer; assert(size >= 0); - _PyBytesWriter_Init(&writer); - if (size > PY_SSIZE_T_MAX / max_char_size) { /* integer overflow */ - return PyErr_NoMemory(); + PyErr_NoMemory(); + return NULL; } - p = _PyBytesWriter_Alloc(&writer, size * max_char_size); + _PyBytesWriter_Init(writer); + p = _PyBytesWriter_Alloc(writer, size * max_char_size); if (p == NULL) return NULL; @@ -323,7 +323,7 @@ STRINGLIB(utf8_encoder)(PyObject *unicode, endpos++; /* Only overallocate the buffer if it's not the last write */ - writer.overallocate = (endpos < size); + writer->overallocate = (endpos < size); switch (error_handler) { @@ -347,8 +347,8 @@ STRINGLIB(utf8_encoder)(PyObject *unicode, case _Py_ERROR_BACKSLASHREPLACE: /* subtract preallocated bytes */ - writer.min_size -= max_char_size * (endpos - startpos); - p = backslashreplace(&writer, p, + writer->min_size -= max_char_size * (endpos - startpos); + p = backslashreplace(writer, p, unicode, startpos, endpos); if (p == NULL) goto error; @@ -357,8 +357,8 @@ STRINGLIB(utf8_encoder)(PyObject *unicode, case _Py_ERROR_XMLCHARREFREPLACE: /* subtract preallocated bytes */ - writer.min_size -= max_char_size * (endpos - startpos); - p = xmlcharrefreplace(&writer, p, + writer->min_size -= max_char_size * (endpos - startpos); + p = xmlcharrefreplace(writer, p, unicode, startpos, endpos); if (p == NULL) goto error; @@ -387,10 +387,10 @@ STRINGLIB(utf8_encoder)(PyObject *unicode, goto error; /* subtract preallocated bytes */ - writer.min_size -= max_char_size * (newpos - startpos); + writer->min_size -= max_char_size * (newpos - startpos); if (PyBytes_Check(rep)) { - p = _PyBytesWriter_WriteBytes(&writer, p, + p = _PyBytesWriter_WriteBytes(writer, p, PyBytes_AS_STRING(rep), PyBytes_GET_SIZE(rep)); } @@ -406,7 +406,7 @@ STRINGLIB(utf8_encoder)(PyObject *unicode, goto error; } - p = _PyBytesWriter_WriteBytes(&writer, p, + p = _PyBytesWriter_WriteBytes(writer, p, PyUnicode_DATA(rep), PyUnicode_GET_LENGTH(rep)); } @@ -420,7 +420,7 @@ STRINGLIB(utf8_encoder)(PyObject *unicode, /* If overallocation was disabled, ensure that it was the last write. Otherwise, we missed an optimization */ - assert(writer.overallocate || i == size); + assert(writer->overallocate || i == size); } else #if STRINGLIB_SIZEOF_CHAR > 2 @@ -449,14 +449,13 @@ STRINGLIB(utf8_encoder)(PyObject *unicode, Py_XDECREF(error_handler_obj); Py_XDECREF(exc); #endif - return _PyBytesWriter_Finish(&writer, p); + return p; #if STRINGLIB_SIZEOF_CHAR > 1 error: Py_XDECREF(rep); Py_XDECREF(error_handler_obj); Py_XDECREF(exc); - _PyBytesWriter_Dealloc(&writer); return NULL; #endif } |