summaryrefslogtreecommitdiffstats
path: root/Objects/stringlib
diff options
context:
space:
mode:
authorVictor Stinner <victor.stinner@gmail.com>2012-05-29 10:57:52 (GMT)
committerVictor Stinner <victor.stinner@gmail.com>2012-05-29 10:57:52 (GMT)
commitd3f0882dfb3a15d604de1b1620b2bf8de9d643bb (patch)
tree16c78bd58f57ffce487f71bb075372d72cfdcbde /Objects/stringlib
parenta1b0c9fc4d68cd4e1103456d0cedf2ef3bbbfe9a (diff)
downloadcpython-d3f0882dfb3a15d604de1b1620b2bf8de9d643bb.zip
cpython-d3f0882dfb3a15d604de1b1620b2bf8de9d643bb.tar.gz
cpython-d3f0882dfb3a15d604de1b1620b2bf8de9d643bb.tar.bz2
Issue #14744: Use the new _PyUnicodeWriter internal API to speed up str%args and str.format(args)
* Formatting string, int, float and complex use the _PyUnicodeWriter API. It avoids a temporary buffer in most cases. * Add _PyUnicodeWriter_WriteStr() to restore the PyAccu optimization: just keep a reference to the string if the output is only composed of one string * Disable overallocation when formatting the last argument of str%args and str.format(args) * Overallocation allocates at least 100 characters: add min_length attribute to the _PyUnicodeWriter structure * Add new private functions: _PyUnicode_FastCopyCharacters(), _PyUnicode_FastFill() and _PyUnicode_FromASCII() The speed up is around 20% in average.
Diffstat (limited to 'Objects/stringlib')
-rw-r--r--Objects/stringlib/asciilib.h2
-rw-r--r--Objects/stringlib/unicode_format.h46
2 files changed, 22 insertions, 26 deletions
diff --git a/Objects/stringlib/asciilib.h b/Objects/stringlib/asciilib.h
index ab5bae7..f62813d 100644
--- a/Objects/stringlib/asciilib.h
+++ b/Objects/stringlib/asciilib.h
@@ -18,7 +18,7 @@
#define STRINGLIB_TODECIMAL Py_UNICODE_TODECIMAL
#define STRINGLIB_STR PyUnicode_1BYTE_DATA
#define STRINGLIB_LEN PyUnicode_GET_LENGTH
-#define STRINGLIB_NEW unicode_fromascii
+#define STRINGLIB_NEW(STR,LEN) _PyUnicode_FromASCII((char*)(STR),(LEN))
#define STRINGLIB_RESIZE not_supported
#define STRINGLIB_CHECK PyUnicode_Check
#define STRINGLIB_CHECK_EXACT PyUnicode_CheckExact
diff --git a/Objects/stringlib/unicode_format.h b/Objects/stringlib/unicode_format.h
index 9c0b0cf..d71cf44 100644
--- a/Objects/stringlib/unicode_format.h
+++ b/Objects/stringlib/unicode_format.h
@@ -499,26 +499,26 @@ render_field(PyObject *fieldobj, SubString *format_spec, _PyUnicodeWriter *write
int ok = 0;
PyObject *result = NULL;
PyObject *format_spec_object = NULL;
- PyObject *(*formatter)(PyObject *, PyObject *, Py_ssize_t, Py_ssize_t) = NULL;
- Py_ssize_t len;
+ int (*formatter) (_PyUnicodeWriter*, PyObject *, PyObject *, Py_ssize_t, Py_ssize_t) = NULL;
+ int err;
/* If we know the type exactly, skip the lookup of __format__ and just
call the formatter directly. */
if (PyUnicode_CheckExact(fieldobj))
- formatter = _PyUnicode_FormatAdvanced;
+ formatter = _PyUnicode_FormatAdvancedWriter;
else if (PyLong_CheckExact(fieldobj))
- formatter =_PyLong_FormatAdvanced;
+ formatter = _PyLong_FormatAdvancedWriter;
else if (PyFloat_CheckExact(fieldobj))
- formatter = _PyFloat_FormatAdvanced;
-
- /* XXX: for 2.6, convert format_spec to the appropriate type
- (unicode, str) */
+ formatter = _PyFloat_FormatAdvancedWriter;
+ else if (PyComplex_CheckExact(fieldobj))
+ formatter = _PyComplex_FormatAdvancedWriter;
if (formatter) {
/* we know exactly which formatter will be called when __format__ is
looked up, so call it directly, instead. */
- result = formatter(fieldobj, format_spec->str,
- format_spec->start, format_spec->end);
+ err = formatter(writer, fieldobj, format_spec->str,
+ format_spec->start, format_spec->end);
+ return (err == 0);
}
else {
/* We need to create an object out of the pointers we have, because
@@ -536,17 +536,11 @@ render_field(PyObject *fieldobj, SubString *format_spec, _PyUnicodeWriter *write
}
if (result == NULL)
goto done;
- if (PyUnicode_READY(result) == -1)
- goto done;
- len = PyUnicode_GET_LENGTH(result);
- if (_PyUnicodeWriter_Prepare(writer,
- len, PyUnicode_MAX_CHAR_VALUE(result)) == -1)
+ if (_PyUnicodeWriter_WriteStr(writer, result) == -1)
goto done;
- copy_characters(writer->buffer, writer->pos,
- result, 0, len);
- writer->pos += len;
ok = 1;
+
done:
Py_XDECREF(format_spec_object);
Py_XDECREF(result);
@@ -897,16 +891,19 @@ do_markup(SubString *input, PyObject *args, PyObject *kwargs,
err = _PyUnicodeWriter_Prepare(writer, sublen, maxchar);
if (err == -1)
return 0;
- copy_characters(writer->buffer, writer->pos,
- literal.str, literal.start, sublen);
+ _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
+ literal.str, literal.start, sublen);
writer->pos += sublen;
}
- if (field_present)
+ if (field_present) {
+ if (iter.str.start == iter.str.end)
+ writer->flags.overallocate = 0;
if (!output_markup(&field_name, &format_spec,
format_spec_needs_expanding, conversion, writer,
args, kwargs, recursion_depth, auto_number))
return 0;
+ }
}
return result;
}
@@ -921,7 +918,7 @@ build_string(SubString *input, PyObject *args, PyObject *kwargs,
int recursion_depth, AutoNumber *auto_number)
{
_PyUnicodeWriter writer;
- Py_ssize_t initlen;
+ Py_ssize_t minlen;
/* check the recursion level */
if (recursion_depth <= 0) {
@@ -930,9 +927,8 @@ build_string(SubString *input, PyObject *args, PyObject *kwargs,
return NULL;
}
- initlen = PyUnicode_GET_LENGTH(input->str) + 100;
- if (_PyUnicodeWriter_Init(&writer, initlen, 127) == -1)
- return NULL;
+ minlen = PyUnicode_GET_LENGTH(input->str) + 100;
+ _PyUnicodeWriter_Init(&writer, minlen);
if (!do_markup(input, args, kwargs, &writer, recursion_depth,
auto_number)) {