diff options
author | Victor Stinner <victor.stinner@gmail.com> | 2012-05-07 10:47:02 (GMT) |
---|---|---|
committer | Victor Stinner <victor.stinner@gmail.com> | 2012-05-07 10:47:02 (GMT) |
commit | 202fdca133ce8f5b0c37cca1353070e0721c688d (patch) | |
tree | 7e6f1c58ca7b836f8fb8132dea7f85b08d403894 /Objects/unicodeobject.c | |
parent | 9fad1604110cd7a0bb32792aa6d6c6a63018d51e (diff) | |
download | cpython-202fdca133ce8f5b0c37cca1353070e0721c688d.zip cpython-202fdca133ce8f5b0c37cca1353070e0721c688d.tar.gz cpython-202fdca133ce8f5b0c37cca1353070e0721c688d.tar.bz2 |
Close #14716: str.format() now uses the new "unicode writer" API instead of the
PyAccu API. For example, it makes str.format() from 25% to 30% faster on Linux.
Diffstat (limited to 'Objects/unicodeobject.c')
-rw-r--r-- | Objects/unicodeobject.c | 258 |
1 files changed, 129 insertions, 129 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 129a5fc..0722312 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -13200,6 +13200,135 @@ unicode_endswith(PyObject *self, return PyBool_FromLong(result); } +typedef struct { + PyObject *buffer; + void *data; + enum PyUnicode_Kind kind; + Py_UCS4 maxchar; + Py_ssize_t pos; +} unicode_writer_t; + +Py_LOCAL_INLINE(void) +unicode_writer_update(unicode_writer_t *writer) +{ + writer->maxchar = PyUnicode_MAX_CHAR_VALUE(writer->buffer); + writer->data = PyUnicode_DATA(writer->buffer); + writer->kind = PyUnicode_KIND(writer->buffer); +} + +Py_LOCAL(int) +unicode_writer_init(unicode_writer_t *writer, + Py_ssize_t length, Py_UCS4 maxchar) +{ + writer->pos = 0; + writer->buffer = PyUnicode_New(length, maxchar); + if (writer->buffer == NULL) + return -1; + unicode_writer_update(writer); + return 0; +} + +Py_LOCAL_INLINE(int) +unicode_writer_prepare(unicode_writer_t *writer, + Py_ssize_t length, Py_UCS4 maxchar) +{ + Py_ssize_t newlen; + PyObject *newbuffer; + + if (length > PY_SSIZE_T_MAX - writer->pos) { + PyErr_NoMemory(); + return -1; + } + newlen = writer->pos + length; + + if (newlen > PyUnicode_GET_LENGTH(writer->buffer)) { + /* overallocate 25% to limit the number of resize */ + if (newlen <= (PY_SSIZE_T_MAX - newlen / 4)) + newlen += newlen / 4; + + if (maxchar > writer->maxchar) { + /* resize + widen */ + newbuffer = PyUnicode_New(newlen, maxchar); + if (newbuffer == NULL) + return -1; + PyUnicode_CopyCharacters(newbuffer, 0, + writer->buffer, 0, writer->pos); + Py_DECREF(writer->buffer); + } + else { + newbuffer = resize_compact(writer->buffer, newlen); + if (newbuffer == NULL) + return -1; + } + writer->buffer = newbuffer; + unicode_writer_update(writer); + } + else if (maxchar > writer->maxchar) { + if (unicode_widen(&writer->buffer, writer->pos, maxchar) < 0) + return -1; + unicode_writer_update(writer); + } + return 0; +} + +Py_LOCAL_INLINE(int) +unicode_writer_write_str( + unicode_writer_t *writer, + PyObject *str, Py_ssize_t start, Py_ssize_t length) +{ + Py_UCS4 maxchar; + + assert(str != NULL); + assert(PyUnicode_Check(str)); + if (PyUnicode_READY(str) == -1) + return -1; + + assert(0 <= start); + assert(0 <= length); + assert(start + length <= PyUnicode_GET_LENGTH(str)); + if (length == 0) + return 0; + + maxchar = _PyUnicode_FindMaxChar(str, start, start + length); + if (unicode_writer_prepare(writer, length, maxchar) == -1) + return -1; + + assert((writer->pos + length) <= PyUnicode_GET_LENGTH(writer->buffer)); + copy_characters(writer->buffer, writer->pos, + str, start, length); + writer->pos += length; + return 0; +} + +Py_LOCAL_INLINE(int) +unicode_writer_write_char( + unicode_writer_t *writer, + Py_UCS4 ch) +{ + if (unicode_writer_prepare(writer, 1, ch) == -1) + return -1; + assert((writer->pos + 1) <= PyUnicode_GET_LENGTH(writer->buffer)); + PyUnicode_WRITE(writer->kind, writer->data, writer->pos, ch); + writer->pos += 1; + return 0; +} + +Py_LOCAL(PyObject *) +unicode_writer_finish(unicode_writer_t *writer) +{ + if (PyUnicode_Resize(&writer->buffer, writer->pos) < 0) { + Py_DECREF(writer->buffer); + return NULL; + } + return writer->buffer; +} + +Py_LOCAL(void) +unicode_writer_dealloc(unicode_writer_t *writer) +{ + Py_CLEAR(writer->buffer); +} + #include "stringlib/unicode_format.h" PyDoc_STRVAR(format__doc__, @@ -13649,135 +13778,6 @@ formatchar(PyObject *v) return (Py_UCS4) -1; } -typedef struct { - PyObject *buffer; - void *data; - enum PyUnicode_Kind kind; - Py_UCS4 maxchar; - Py_ssize_t pos; -} unicode_writer_t; - -Py_LOCAL_INLINE(void) -unicode_writer_update(unicode_writer_t *writer) -{ - writer->maxchar = PyUnicode_MAX_CHAR_VALUE(writer->buffer); - writer->data = PyUnicode_DATA(writer->buffer); - writer->kind = PyUnicode_KIND(writer->buffer); -} - -Py_LOCAL(int) -unicode_writer_init(unicode_writer_t *writer, - Py_ssize_t length, Py_UCS4 maxchar) -{ - writer->pos = 0; - writer->buffer = PyUnicode_New(length, maxchar); - if (writer->buffer == NULL) - return -1; - unicode_writer_update(writer); - return 0; -} - -Py_LOCAL_INLINE(int) -unicode_writer_prepare(unicode_writer_t *writer, - Py_ssize_t length, Py_UCS4 maxchar) -{ - Py_ssize_t newlen; - PyObject *newbuffer; - - if (length > PY_SSIZE_T_MAX - writer->pos) { - PyErr_NoMemory(); - return -1; - } - newlen = writer->pos + length; - - if (newlen > PyUnicode_GET_LENGTH(writer->buffer)) { - /* overallocate 25% to limit the number of resize */ - if (newlen <= (PY_SSIZE_T_MAX - newlen / 4)) - newlen += newlen / 4; - - if (maxchar > writer->maxchar) { - /* resize + widen */ - newbuffer = PyUnicode_New(newlen, maxchar); - if (newbuffer == NULL) - return -1; - PyUnicode_CopyCharacters(newbuffer, 0, - writer->buffer, 0, writer->pos); - Py_DECREF(writer->buffer); - } - else { - newbuffer = resize_compact(writer->buffer, newlen); - if (newbuffer == NULL) - return -1; - } - writer->buffer = newbuffer; - unicode_writer_update(writer); - } - else if (maxchar > writer->maxchar) { - if (unicode_widen(&writer->buffer, writer->pos, maxchar) < 0) - return -1; - unicode_writer_update(writer); - } - return 0; -} - -Py_LOCAL_INLINE(int) -unicode_writer_write_str( - unicode_writer_t *writer, - PyObject *str, Py_ssize_t start, Py_ssize_t length) -{ - Py_UCS4 maxchar; - - assert(str != NULL); - assert(PyUnicode_Check(str)); - if (PyUnicode_READY(str) == -1) - return -1; - - assert(0 <= start); - assert(0 <= length); - assert(start + length <= PyUnicode_GET_LENGTH(str)); - if (length == 0) - return 0; - - maxchar = _PyUnicode_FindMaxChar(str, start, start + length); - if (unicode_writer_prepare(writer, length, maxchar) == -1) - return -1; - - assert((writer->pos + length) <= PyUnicode_GET_LENGTH(writer->buffer)); - copy_characters(writer->buffer, writer->pos, - str, start, length); - writer->pos += length; - return 0; -} - -Py_LOCAL_INLINE(int) -unicode_writer_write_char( - unicode_writer_t *writer, - Py_UCS4 ch) -{ - if (unicode_writer_prepare(writer, 1, ch) == -1) - return -1; - assert((writer->pos + 1) <= PyUnicode_GET_LENGTH(writer->buffer)); - PyUnicode_WRITE(writer->kind, writer->data, writer->pos, ch); - writer->pos += 1; - return 0; -} - -Py_LOCAL(PyObject *) -unicode_writer_finish(unicode_writer_t *writer) -{ - if (PyUnicode_Resize(&writer->buffer, writer->pos) < 0) { - Py_DECREF(writer->buffer); - return NULL; - } - return writer->buffer; -} - -Py_LOCAL(void) -unicode_writer_dealloc(unicode_writer_t *writer) -{ - Py_CLEAR(writer->buffer); -} - PyObject * PyUnicode_Format(PyObject *format, PyObject *args) { |