diff options
author | Victor Stinner <victor.stinner@gmail.com> | 2013-04-02 23:48:39 (GMT) |
---|---|---|
committer | Victor Stinner <victor.stinner@gmail.com> | 2013-04-02 23:48:39 (GMT) |
commit | cfc4c13b04223705a43595579b46020c9e876ac4 (patch) | |
tree | 238b3c5e9e2ac723e86fa2b3eb711cd5eb45eebf /Objects | |
parent | 4489e927a6e030f19fee77783ebb209119f4ad60 (diff) | |
download | cpython-cfc4c13b04223705a43595579b46020c9e876ac4.zip cpython-cfc4c13b04223705a43595579b46020c9e876ac4.tar.gz cpython-cfc4c13b04223705a43595579b46020c9e876ac4.tar.bz2 |
Add _PyUnicodeWriter_WriteSubstring() function
Write a function to enable more optimizations:
* If the substring is the whole string and overallocation is disabled, just
keep a reference to the string, don't copy characters
* Avoid a call to the expensive _PyUnicode_FindMaxChar() function when
possible
Diffstat (limited to 'Objects')
-rw-r--r-- | Objects/stringlib/unicode_format.h | 18 | ||||
-rw-r--r-- | Objects/unicodeobject.c | 48 |
2 files changed, 45 insertions, 21 deletions
diff --git a/Objects/stringlib/unicode_format.h b/Objects/stringlib/unicode_format.h index e9be516..2f58946 100644 --- a/Objects/stringlib/unicode_format.h +++ b/Objects/stringlib/unicode_format.h @@ -869,25 +869,19 @@ do_markup(SubString *input, PyObject *args, PyObject *kwargs, SubString literal; SubString field_name; SubString format_spec; - Py_UCS4 conversion, maxchar; - Py_ssize_t sublen; - int err; + Py_UCS4 conversion; MarkupIterator_init(&iter, input->str, input->start, input->end); while ((result = MarkupIterator_next(&iter, &literal, &field_present, &field_name, &format_spec, &conversion, &format_spec_needs_expanding)) == 2) { - sublen = literal.end - literal.start; - if (sublen) { - maxchar = _PyUnicode_FindMaxChar(literal.str, - literal.start, literal.end); - err = _PyUnicodeWriter_Prepare(writer, sublen, maxchar); - if (err == -1) + if (literal.end != literal.start) { + if (!field_present && iter.str.start == iter.str.end) + writer->overallocate = 0; + if (_PyUnicodeWriter_WriteSubstring(writer, literal.str, + literal.start, literal.end) < 0) return 0; - _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos, - literal.str, literal.start, sublen); - writer->pos += sublen; } if (field_present) { diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 23e57f0..a926e37 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -12791,6 +12791,41 @@ _PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer, PyObject *str) } int +_PyUnicodeWriter_WriteSubstring(_PyUnicodeWriter *writer, PyObject *str, + Py_ssize_t start, Py_ssize_t end) +{ + Py_UCS4 maxchar; + Py_ssize_t len; + + if (PyUnicode_READY(str) == -1) + return -1; + + assert(0 <= start); + assert(end <= PyUnicode_GET_LENGTH(str)); + assert(start <= end); + + if (end == 0) + return 0; + + if (start == 0 && end == PyUnicode_GET_LENGTH(str)) + return _PyUnicodeWriter_WriteStr(writer, str); + + if (PyUnicode_MAX_CHAR_VALUE(str) > writer->maxchar) + maxchar = _PyUnicode_FindMaxChar(str, start, end); + else + maxchar = writer->maxchar; + len = end - start; + + if (_PyUnicodeWriter_Prepare(writer, len, maxchar) < 0) + return -1; + + _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos, + str, start, len); + writer->pos += len; + return 0; +} + +int _PyUnicodeWriter_WriteCstr(_PyUnicodeWriter *writer, const char *str, Py_ssize_t len) { Py_UCS4 maxchar; @@ -13963,7 +13998,7 @@ PyUnicode_Format(PyObject *format, PyObject *args) while (--ctx.fmtcnt >= 0) { if (PyUnicode_READ(ctx.fmtkind, ctx.fmtdata, ctx.fmtpos) != '%') { - Py_ssize_t nonfmtpos, sublen; + Py_ssize_t nonfmtpos; Py_UCS4 maxchar; nonfmtpos = ctx.fmtpos++; @@ -13976,15 +14011,10 @@ PyUnicode_Format(PyObject *format, PyObject *args) ctx.fmtpos--; ctx.writer.overallocate = 0; } - sublen = ctx.fmtpos - nonfmtpos; - maxchar = _PyUnicode_FindMaxChar(ctx.fmtstr, - nonfmtpos, nonfmtpos + sublen); - if (_PyUnicodeWriter_Prepare(&ctx.writer, sublen, maxchar) == -1) - goto onError; - _PyUnicode_FastCopyCharacters(ctx.writer.buffer, ctx.writer.pos, - ctx.fmtstr, nonfmtpos, sublen); - ctx.writer.pos += sublen; + if (_PyUnicodeWriter_WriteSubstring(&ctx.writer, ctx.fmtstr, + nonfmtpos, ctx.fmtpos) < 0) + goto onError; } else { ctx.fmtpos++; |