summaryrefslogtreecommitdiffstats
path: root/Objects
diff options
context:
space:
mode:
authorVictor Stinner <victor.stinner@gmail.com>2013-04-02 23:48:39 (GMT)
committerVictor Stinner <victor.stinner@gmail.com>2013-04-02 23:48:39 (GMT)
commitcfc4c13b04223705a43595579b46020c9e876ac4 (patch)
tree238b3c5e9e2ac723e86fa2b3eb711cd5eb45eebf /Objects
parent4489e927a6e030f19fee77783ebb209119f4ad60 (diff)
downloadcpython-cfc4c13b04223705a43595579b46020c9e876ac4.zip
cpython-cfc4c13b04223705a43595579b46020c9e876ac4.tar.gz
cpython-cfc4c13b04223705a43595579b46020c9e876ac4.tar.bz2
Add _PyUnicodeWriter_WriteSubstring() function
Write a function to enable more optimizations: * If the substring is the whole string and overallocation is disabled, just keep a reference to the string, don't copy characters * Avoid a call to the expensive _PyUnicode_FindMaxChar() function when possible
Diffstat (limited to 'Objects')
-rw-r--r--Objects/stringlib/unicode_format.h18
-rw-r--r--Objects/unicodeobject.c48
2 files changed, 45 insertions, 21 deletions
diff --git a/Objects/stringlib/unicode_format.h b/Objects/stringlib/unicode_format.h
index e9be516..2f58946 100644
--- a/Objects/stringlib/unicode_format.h
+++ b/Objects/stringlib/unicode_format.h
@@ -869,25 +869,19 @@ do_markup(SubString *input, PyObject *args, PyObject *kwargs,
SubString literal;
SubString field_name;
SubString format_spec;
- Py_UCS4 conversion, maxchar;
- Py_ssize_t sublen;
- int err;
+ Py_UCS4 conversion;
MarkupIterator_init(&iter, input->str, input->start, input->end);
while ((result = MarkupIterator_next(&iter, &literal, &field_present,
&field_name, &format_spec,
&conversion,
&format_spec_needs_expanding)) == 2) {
- sublen = literal.end - literal.start;
- if (sublen) {
- maxchar = _PyUnicode_FindMaxChar(literal.str,
- literal.start, literal.end);
- err = _PyUnicodeWriter_Prepare(writer, sublen, maxchar);
- if (err == -1)
+ if (literal.end != literal.start) {
+ if (!field_present && iter.str.start == iter.str.end)
+ writer->overallocate = 0;
+ if (_PyUnicodeWriter_WriteSubstring(writer, literal.str,
+ literal.start, literal.end) < 0)
return 0;
- _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
- literal.str, literal.start, sublen);
- writer->pos += sublen;
}
if (field_present) {
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 23e57f0..a926e37 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -12791,6 +12791,41 @@ _PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer, PyObject *str)
}
int
+_PyUnicodeWriter_WriteSubstring(_PyUnicodeWriter *writer, PyObject *str,
+ Py_ssize_t start, Py_ssize_t end)
+{
+ Py_UCS4 maxchar;
+ Py_ssize_t len;
+
+ if (PyUnicode_READY(str) == -1)
+ return -1;
+
+ assert(0 <= start);
+ assert(end <= PyUnicode_GET_LENGTH(str));
+ assert(start <= end);
+
+ if (end == 0)
+ return 0;
+
+ if (start == 0 && end == PyUnicode_GET_LENGTH(str))
+ return _PyUnicodeWriter_WriteStr(writer, str);
+
+ if (PyUnicode_MAX_CHAR_VALUE(str) > writer->maxchar)
+ maxchar = _PyUnicode_FindMaxChar(str, start, end);
+ else
+ maxchar = writer->maxchar;
+ len = end - start;
+
+ if (_PyUnicodeWriter_Prepare(writer, len, maxchar) < 0)
+ return -1;
+
+ _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
+ str, start, len);
+ writer->pos += len;
+ return 0;
+}
+
+int
_PyUnicodeWriter_WriteCstr(_PyUnicodeWriter *writer, const char *str, Py_ssize_t len)
{
Py_UCS4 maxchar;
@@ -13963,7 +13998,7 @@ PyUnicode_Format(PyObject *format, PyObject *args)
while (--ctx.fmtcnt >= 0) {
if (PyUnicode_READ(ctx.fmtkind, ctx.fmtdata, ctx.fmtpos) != '%') {
- Py_ssize_t nonfmtpos, sublen;
+ Py_ssize_t nonfmtpos;
Py_UCS4 maxchar;
nonfmtpos = ctx.fmtpos++;
@@ -13976,15 +14011,10 @@ PyUnicode_Format(PyObject *format, PyObject *args)
ctx.fmtpos--;
ctx.writer.overallocate = 0;
}
- sublen = ctx.fmtpos - nonfmtpos;
- maxchar = _PyUnicode_FindMaxChar(ctx.fmtstr,
- nonfmtpos, nonfmtpos + sublen);
- if (_PyUnicodeWriter_Prepare(&ctx.writer, sublen, maxchar) == -1)
- goto onError;
- _PyUnicode_FastCopyCharacters(ctx.writer.buffer, ctx.writer.pos,
- ctx.fmtstr, nonfmtpos, sublen);
- ctx.writer.pos += sublen;
+ if (_PyUnicodeWriter_WriteSubstring(&ctx.writer, ctx.fmtstr,
+ nonfmtpos, ctx.fmtpos) < 0)
+ goto onError;
}
else {
ctx.fmtpos++;