summaryrefslogtreecommitdiffstats
path: root/Include/unicodeobject.h
diff options
context:
space:
mode:
authorVictor Stinner <victor.stinner@gmail.com>2012-05-29 10:57:52 (GMT)
committerVictor Stinner <victor.stinner@gmail.com>2012-05-29 10:57:52 (GMT)
commitd3f0882dfb3a15d604de1b1620b2bf8de9d643bb (patch)
tree16c78bd58f57ffce487f71bb075372d72cfdcbde /Include/unicodeobject.h
parenta1b0c9fc4d68cd4e1103456d0cedf2ef3bbbfe9a (diff)
downloadcpython-d3f0882dfb3a15d604de1b1620b2bf8de9d643bb.zip
cpython-d3f0882dfb3a15d604de1b1620b2bf8de9d643bb.tar.gz
cpython-d3f0882dfb3a15d604de1b1620b2bf8de9d643bb.tar.bz2
Issue #14744: Use the new _PyUnicodeWriter internal API to speed up str%args and str.format(args)
* Formatting string, int, float and complex use the _PyUnicodeWriter API. It avoids a temporary buffer in most cases. * Add _PyUnicodeWriter_WriteStr() to restore the PyAccu optimization: just keep a reference to the string if the output is only composed of one string * Disable overallocation when formatting the last argument of str%args and str.format(args) * Overallocation allocates at least 100 characters: add min_length attribute to the _PyUnicodeWriter structure * Add new private functions: _PyUnicode_FastCopyCharacters(), _PyUnicode_FastFill() and _PyUnicode_FromASCII() The speed up is around 20% in average.
Diffstat (limited to 'Include/unicodeobject.h')
-rw-r--r--Include/unicodeobject.h95
1 files changed, 89 insertions, 6 deletions
diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h
index 486d4fa..99ea48b 100644
--- a/Include/unicodeobject.h
+++ b/Include/unicodeobject.h
@@ -648,8 +648,20 @@ PyAPI_FUNC(Py_ssize_t) PyUnicode_CopyCharacters(
Py_ssize_t from_start,
Py_ssize_t how_many
);
+
+/* Unsafe version of PyUnicode_CopyCharacters(): don't check arguments and so
+ may crash if parameters are invalid (e.g. if the output string
+ is too short). */
+PyAPI_FUNC(void) _PyUnicode_FastCopyCharacters(
+ PyObject *to,
+ Py_ssize_t to_start,
+ PyObject *from,
+ Py_ssize_t from_start,
+ Py_ssize_t how_many
+ );
#endif
+#ifndef Py_LIMITED_API
/* Fill a string with a character: write fill_char into
unicode[start:start+length].
@@ -658,13 +670,21 @@ PyAPI_FUNC(Py_ssize_t) PyUnicode_CopyCharacters(
Return the number of written character, or return -1 and raise an exception
on error. */
-#ifndef Py_LIMITED_API
PyAPI_FUNC(Py_ssize_t) PyUnicode_Fill(
PyObject *unicode,
Py_ssize_t start,
Py_ssize_t length,
Py_UCS4 fill_char
);
+
+/* Unsafe version of PyUnicode_Fill(): don't check arguments and so may crash
+ if parameters are invalid (e.g. if length is longer than the string). */
+PyAPI_FUNC(void) _PyUnicode_FastFill(
+ PyObject *unicode,
+ Py_ssize_t start,
+ Py_ssize_t length,
+ Py_UCS4 fill_char
+ );
#endif
/* Create a Unicode Object from the Py_UNICODE buffer u of the given
@@ -696,13 +716,19 @@ PyAPI_FUNC(PyObject*) PyUnicode_FromString(
const char *u /* UTF-8 encoded string */
);
+#ifndef Py_LIMITED_API
/* Create a new string from a buffer of Py_UCS1, Py_UCS2 or Py_UCS4 characters.
Scan the string to find the maximum character. */
-#ifndef Py_LIMITED_API
PyAPI_FUNC(PyObject*) PyUnicode_FromKindAndData(
int kind,
const void *buffer,
Py_ssize_t size);
+
+/* Create a new string from a buffer of ASCII characters.
+ WARNING: Don't check if the string contains any non-ASCII character. */
+PyAPI_FUNC(PyObject*) _PyUnicode_FromASCII(
+ const char *buffer,
+ Py_ssize_t size);
#endif
PyAPI_FUNC(PyObject*) PyUnicode_Substring(
@@ -865,12 +891,69 @@ PyAPI_FUNC(PyObject *) PyUnicode_FromFormat(
);
#ifndef Py_LIMITED_API
+typedef struct {
+ PyObject *buffer;
+ void *data;
+ enum PyUnicode_Kind kind;
+ Py_UCS4 maxchar;
+ Py_ssize_t size;
+ Py_ssize_t pos;
+ /* minimum length of the buffer when overallocation is enabled,
+ see _PyUnicodeWriter_Init() */
+ Py_ssize_t min_length;
+ struct {
+ unsigned char overallocate:1;
+ /* If readonly is 1, buffer is a shared string (cannot be modified)
+ and size is set to 0. */
+ unsigned char readonly:1;
+ } flags;
+} _PyUnicodeWriter ;
+
+/* Initialize a Unicode writer.
+
+ If min_length is greater than zero, _PyUnicodeWriter_Prepare()
+ overallocates the buffer and min_length is the minimum length in characters
+ of the buffer. */
+PyAPI_FUNC(void)
+_PyUnicodeWriter_Init(_PyUnicodeWriter *writer, Py_ssize_t min_length);
+
+/* Prepare the buffer to write 'length' characters
+ with the specified maximum character.
+
+ Return 0 on success, raise an exception and return -1 on error. */
+#define _PyUnicodeWriter_Prepare(WRITER, LENGTH, MAXCHAR) \
+ (((MAXCHAR) <= (WRITER)->maxchar \
+ && (LENGTH) <= (WRITER)->size - (WRITER)->pos) \
+ ? 0 \
+ : (((LENGTH) == 0) \
+ ? 0 \
+ : _PyUnicodeWriter_PrepareInternal((WRITER), (LENGTH), (MAXCHAR))))
+
+/* Don't call this function directly, use the _PyUnicodeWriter_Prepare() macro
+ instead. */
+PyAPI_FUNC(int)
+_PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer,
+ Py_ssize_t length, Py_UCS4 maxchar);
+
+PyAPI_FUNC(int)
+_PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer, PyObject *str);
+
+PyAPI_FUNC(PyObject *)
+_PyUnicodeWriter_Finish(_PyUnicodeWriter *writer);
+
+PyAPI_FUNC(void)
+_PyUnicodeWriter_Dealloc(_PyUnicodeWriter *writer);
+#endif
+
+#ifndef Py_LIMITED_API
/* Format the object based on the format_spec, as defined in PEP 3101
(Advanced String Formatting). */
-PyAPI_FUNC(PyObject *) _PyUnicode_FormatAdvanced(PyObject *obj,
- PyObject *format_spec,
- Py_ssize_t start,
- Py_ssize_t end);
+PyAPI_FUNC(int) _PyUnicode_FormatAdvancedWriter(
+ _PyUnicodeWriter *writer,
+ PyObject *obj,
+ PyObject *format_spec,
+ Py_ssize_t start,
+ Py_ssize_t end);
#endif
PyAPI_FUNC(void) PyUnicode_InternInPlace(PyObject **);