summaryrefslogtreecommitdiffstats
path: root/Include
diff options
context:
space:
mode:
authorVictor Stinner <victor.stinner@gmail.com>2012-05-29 10:57:52 (GMT)
committerVictor Stinner <victor.stinner@gmail.com>2012-05-29 10:57:52 (GMT)
commitd3f0882dfb3a15d604de1b1620b2bf8de9d643bb (patch)
tree16c78bd58f57ffce487f71bb075372d72cfdcbde /Include
parenta1b0c9fc4d68cd4e1103456d0cedf2ef3bbbfe9a (diff)
downloadcpython-d3f0882dfb3a15d604de1b1620b2bf8de9d643bb.zip
cpython-d3f0882dfb3a15d604de1b1620b2bf8de9d643bb.tar.gz
cpython-d3f0882dfb3a15d604de1b1620b2bf8de9d643bb.tar.bz2
Issue #14744: Use the new _PyUnicodeWriter internal API to speed up str%args and str.format(args)
* Formatting string, int, float and complex use the _PyUnicodeWriter API. It avoids a temporary buffer in most cases. * Add _PyUnicodeWriter_WriteStr() to restore the PyAccu optimization: just keep a reference to the string if the output is only composed of one string * Disable overallocation when formatting the last argument of str%args and str.format(args) * Overallocation allocates at least 100 characters: add min_length attribute to the _PyUnicodeWriter structure * Add new private functions: _PyUnicode_FastCopyCharacters(), _PyUnicode_FastFill() and _PyUnicode_FromASCII() The speed up is around 20% in average.
Diffstat (limited to 'Include')
-rw-r--r--Include/complexobject.h10
-rw-r--r--Include/floatobject.h10
-rw-r--r--Include/longobject.h18
-rw-r--r--Include/unicodeobject.h95
4 files changed, 114 insertions, 19 deletions
diff --git a/Include/complexobject.h b/Include/complexobject.h
index 3e4ecff..1934f3b 100644
--- a/Include/complexobject.h
+++ b/Include/complexobject.h
@@ -63,10 +63,12 @@ PyAPI_FUNC(Py_complex) PyComplex_AsCComplex(PyObject *op);
/* Format the object based on the format_spec, as defined in PEP 3101
(Advanced String Formatting). */
#ifndef Py_LIMITED_API
-PyAPI_FUNC(PyObject *) _PyComplex_FormatAdvanced(PyObject *obj,
- PyObject *format_spec,
- Py_ssize_t start,
- Py_ssize_t end);
+PyAPI_FUNC(int) _PyComplex_FormatAdvancedWriter(
+ _PyUnicodeWriter *writer,
+ PyObject *obj,
+ PyObject *format_spec,
+ Py_ssize_t start,
+ Py_ssize_t end);
#endif
#ifdef __cplusplus
diff --git a/Include/floatobject.h b/Include/floatobject.h
index 0ca4881..46ef6e6 100644
--- a/Include/floatobject.h
+++ b/Include/floatobject.h
@@ -112,10 +112,12 @@ PyAPI_FUNC(int) PyFloat_ClearFreeList(void);
/* Format the object based on the format_spec, as defined in PEP 3101
(Advanced String Formatting). */
-PyAPI_FUNC(PyObject *) _PyFloat_FormatAdvanced(PyObject *obj,
- PyObject *format_spec,
- Py_ssize_t start,
- Py_ssize_t end);
+PyAPI_FUNC(int) _PyFloat_FormatAdvancedWriter(
+ _PyUnicodeWriter *writer,
+ PyObject *obj,
+ PyObject *format_spec,
+ Py_ssize_t start,
+ Py_ssize_t end);
#endif /* Py_LIMITED_API */
#ifdef __cplusplus
diff --git a/Include/longobject.h b/Include/longobject.h
index c58ddf4..d741f1b 100644
--- a/Include/longobject.h
+++ b/Include/longobject.h
@@ -151,14 +151,22 @@ PyAPI_FUNC(int) _PyLong_AsByteArray(PyLongObject* v,
/* _PyLong_Format: Convert the long to a string object with given base,
appending a base prefix of 0[box] if base is 2, 8 or 16. */
-PyAPI_FUNC(PyObject *) _PyLong_Format(PyObject *aa, int base);
+PyAPI_FUNC(PyObject *) _PyLong_Format(PyObject *obj, int base);
+
+PyAPI_FUNC(int) _PyLong_FormatWriter(
+ _PyUnicodeWriter *writer,
+ PyObject *obj,
+ int base,
+ int alternate);
/* Format the object based on the format_spec, as defined in PEP 3101
(Advanced String Formatting). */
-PyAPI_FUNC(PyObject *) _PyLong_FormatAdvanced(PyObject *obj,
- PyObject *format_spec,
- Py_ssize_t start,
- Py_ssize_t end);
+PyAPI_FUNC(int) _PyLong_FormatAdvancedWriter(
+ _PyUnicodeWriter *writer,
+ PyObject *obj,
+ PyObject *format_spec,
+ Py_ssize_t start,
+ Py_ssize_t end);
#endif /* Py_LIMITED_API */
/* These aren't really part of the long object, but they're handy. The
diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h
index 486d4fa..99ea48b 100644
--- a/Include/unicodeobject.h
+++ b/Include/unicodeobject.h
@@ -648,8 +648,20 @@ PyAPI_FUNC(Py_ssize_t) PyUnicode_CopyCharacters(
Py_ssize_t from_start,
Py_ssize_t how_many
);
+
+/* Unsafe version of PyUnicode_CopyCharacters(): don't check arguments and so
+ may crash if parameters are invalid (e.g. if the output string
+ is too short). */
+PyAPI_FUNC(void) _PyUnicode_FastCopyCharacters(
+ PyObject *to,
+ Py_ssize_t to_start,
+ PyObject *from,
+ Py_ssize_t from_start,
+ Py_ssize_t how_many
+ );
#endif
+#ifndef Py_LIMITED_API
/* Fill a string with a character: write fill_char into
unicode[start:start+length].
@@ -658,13 +670,21 @@ PyAPI_FUNC(Py_ssize_t) PyUnicode_CopyCharacters(
Return the number of written character, or return -1 and raise an exception
on error. */
-#ifndef Py_LIMITED_API
PyAPI_FUNC(Py_ssize_t) PyUnicode_Fill(
PyObject *unicode,
Py_ssize_t start,
Py_ssize_t length,
Py_UCS4 fill_char
);
+
+/* Unsafe version of PyUnicode_Fill(): don't check arguments and so may crash
+ if parameters are invalid (e.g. if length is longer than the string). */
+PyAPI_FUNC(void) _PyUnicode_FastFill(
+ PyObject *unicode,
+ Py_ssize_t start,
+ Py_ssize_t length,
+ Py_UCS4 fill_char
+ );
#endif
/* Create a Unicode Object from the Py_UNICODE buffer u of the given
@@ -696,13 +716,19 @@ PyAPI_FUNC(PyObject*) PyUnicode_FromString(
const char *u /* UTF-8 encoded string */
);
+#ifndef Py_LIMITED_API
/* Create a new string from a buffer of Py_UCS1, Py_UCS2 or Py_UCS4 characters.
Scan the string to find the maximum character. */
-#ifndef Py_LIMITED_API
PyAPI_FUNC(PyObject*) PyUnicode_FromKindAndData(
int kind,
const void *buffer,
Py_ssize_t size);
+
+/* Create a new string from a buffer of ASCII characters.
+ WARNING: Don't check if the string contains any non-ASCII character. */
+PyAPI_FUNC(PyObject*) _PyUnicode_FromASCII(
+ const char *buffer,
+ Py_ssize_t size);
#endif
PyAPI_FUNC(PyObject*) PyUnicode_Substring(
@@ -865,12 +891,69 @@ PyAPI_FUNC(PyObject *) PyUnicode_FromFormat(
);
#ifndef Py_LIMITED_API
+typedef struct {
+ PyObject *buffer;
+ void *data;
+ enum PyUnicode_Kind kind;
+ Py_UCS4 maxchar;
+ Py_ssize_t size;
+ Py_ssize_t pos;
+ /* minimum length of the buffer when overallocation is enabled,
+ see _PyUnicodeWriter_Init() */
+ Py_ssize_t min_length;
+ struct {
+ unsigned char overallocate:1;
+ /* If readonly is 1, buffer is a shared string (cannot be modified)
+ and size is set to 0. */
+ unsigned char readonly:1;
+ } flags;
+} _PyUnicodeWriter ;
+
+/* Initialize a Unicode writer.
+
+ If min_length is greater than zero, _PyUnicodeWriter_Prepare()
+ overallocates the buffer and min_length is the minimum length in characters
+ of the buffer. */
+PyAPI_FUNC(void)
+_PyUnicodeWriter_Init(_PyUnicodeWriter *writer, Py_ssize_t min_length);
+
+/* Prepare the buffer to write 'length' characters
+ with the specified maximum character.
+
+ Return 0 on success, raise an exception and return -1 on error. */
+#define _PyUnicodeWriter_Prepare(WRITER, LENGTH, MAXCHAR) \
+ (((MAXCHAR) <= (WRITER)->maxchar \
+ && (LENGTH) <= (WRITER)->size - (WRITER)->pos) \
+ ? 0 \
+ : (((LENGTH) == 0) \
+ ? 0 \
+ : _PyUnicodeWriter_PrepareInternal((WRITER), (LENGTH), (MAXCHAR))))
+
+/* Don't call this function directly, use the _PyUnicodeWriter_Prepare() macro
+ instead. */
+PyAPI_FUNC(int)
+_PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer,
+ Py_ssize_t length, Py_UCS4 maxchar);
+
+PyAPI_FUNC(int)
+_PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer, PyObject *str);
+
+PyAPI_FUNC(PyObject *)
+_PyUnicodeWriter_Finish(_PyUnicodeWriter *writer);
+
+PyAPI_FUNC(void)
+_PyUnicodeWriter_Dealloc(_PyUnicodeWriter *writer);
+#endif
+
+#ifndef Py_LIMITED_API
/* Format the object based on the format_spec, as defined in PEP 3101
(Advanced String Formatting). */
-PyAPI_FUNC(PyObject *) _PyUnicode_FormatAdvanced(PyObject *obj,
- PyObject *format_spec,
- Py_ssize_t start,
- Py_ssize_t end);
+PyAPI_FUNC(int) _PyUnicode_FormatAdvancedWriter(
+ _PyUnicodeWriter *writer,
+ PyObject *obj,
+ PyObject *format_spec,
+ Py_ssize_t start,
+ Py_ssize_t end);
#endif
PyAPI_FUNC(void) PyUnicode_InternInPlace(PyObject **);