diff options
author | Victor Stinner <victor.stinner@gmail.com> | 2012-05-29 10:57:52 (GMT) |
---|---|---|
committer | Victor Stinner <victor.stinner@gmail.com> | 2012-05-29 10:57:52 (GMT) |
commit | d3f0882dfb3a15d604de1b1620b2bf8de9d643bb (patch) | |
tree | 16c78bd58f57ffce487f71bb075372d72cfdcbde /Include | |
parent | a1b0c9fc4d68cd4e1103456d0cedf2ef3bbbfe9a (diff) | |
download | cpython-d3f0882dfb3a15d604de1b1620b2bf8de9d643bb.zip cpython-d3f0882dfb3a15d604de1b1620b2bf8de9d643bb.tar.gz cpython-d3f0882dfb3a15d604de1b1620b2bf8de9d643bb.tar.bz2 |
Issue #14744: Use the new _PyUnicodeWriter internal API to speed up str%args and str.format(args)
* Formatting string, int, float and complex use the _PyUnicodeWriter API. It
avoids a temporary buffer in most cases.
* Add _PyUnicodeWriter_WriteStr() to restore the PyAccu optimization: just
keep a reference to the string if the output is only composed of one string
* Disable overallocation when formatting the last argument of str%args and
str.format(args)
* Overallocation allocates at least 100 characters: add min_length attribute
to the _PyUnicodeWriter structure
* Add new private functions: _PyUnicode_FastCopyCharacters(),
_PyUnicode_FastFill() and _PyUnicode_FromASCII()
The speed up is around 20% in average.
Diffstat (limited to 'Include')
-rw-r--r-- | Include/complexobject.h | 10 | ||||
-rw-r--r-- | Include/floatobject.h | 10 | ||||
-rw-r--r-- | Include/longobject.h | 18 | ||||
-rw-r--r-- | Include/unicodeobject.h | 95 |
4 files changed, 114 insertions, 19 deletions
diff --git a/Include/complexobject.h b/Include/complexobject.h index 3e4ecff..1934f3b 100644 --- a/Include/complexobject.h +++ b/Include/complexobject.h @@ -63,10 +63,12 @@ PyAPI_FUNC(Py_complex) PyComplex_AsCComplex(PyObject *op); /* Format the object based on the format_spec, as defined in PEP 3101 (Advanced String Formatting). */ #ifndef Py_LIMITED_API -PyAPI_FUNC(PyObject *) _PyComplex_FormatAdvanced(PyObject *obj, - PyObject *format_spec, - Py_ssize_t start, - Py_ssize_t end); +PyAPI_FUNC(int) _PyComplex_FormatAdvancedWriter( + _PyUnicodeWriter *writer, + PyObject *obj, + PyObject *format_spec, + Py_ssize_t start, + Py_ssize_t end); #endif #ifdef __cplusplus diff --git a/Include/floatobject.h b/Include/floatobject.h index 0ca4881..46ef6e6 100644 --- a/Include/floatobject.h +++ b/Include/floatobject.h @@ -112,10 +112,12 @@ PyAPI_FUNC(int) PyFloat_ClearFreeList(void); /* Format the object based on the format_spec, as defined in PEP 3101 (Advanced String Formatting). */ -PyAPI_FUNC(PyObject *) _PyFloat_FormatAdvanced(PyObject *obj, - PyObject *format_spec, - Py_ssize_t start, - Py_ssize_t end); +PyAPI_FUNC(int) _PyFloat_FormatAdvancedWriter( + _PyUnicodeWriter *writer, + PyObject *obj, + PyObject *format_spec, + Py_ssize_t start, + Py_ssize_t end); #endif /* Py_LIMITED_API */ #ifdef __cplusplus diff --git a/Include/longobject.h b/Include/longobject.h index c58ddf4..d741f1b 100644 --- a/Include/longobject.h +++ b/Include/longobject.h @@ -151,14 +151,22 @@ PyAPI_FUNC(int) _PyLong_AsByteArray(PyLongObject* v, /* _PyLong_Format: Convert the long to a string object with given base, appending a base prefix of 0[box] if base is 2, 8 or 16. */ -PyAPI_FUNC(PyObject *) _PyLong_Format(PyObject *aa, int base); +PyAPI_FUNC(PyObject *) _PyLong_Format(PyObject *obj, int base); + +PyAPI_FUNC(int) _PyLong_FormatWriter( + _PyUnicodeWriter *writer, + PyObject *obj, + int base, + int alternate); /* Format the object based on the format_spec, as defined in PEP 3101 (Advanced String Formatting). */ -PyAPI_FUNC(PyObject *) _PyLong_FormatAdvanced(PyObject *obj, - PyObject *format_spec, - Py_ssize_t start, - Py_ssize_t end); +PyAPI_FUNC(int) _PyLong_FormatAdvancedWriter( + _PyUnicodeWriter *writer, + PyObject *obj, + PyObject *format_spec, + Py_ssize_t start, + Py_ssize_t end); #endif /* Py_LIMITED_API */ /* These aren't really part of the long object, but they're handy. The diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h index 486d4fa..99ea48b 100644 --- a/Include/unicodeobject.h +++ b/Include/unicodeobject.h @@ -648,8 +648,20 @@ PyAPI_FUNC(Py_ssize_t) PyUnicode_CopyCharacters( Py_ssize_t from_start, Py_ssize_t how_many ); + +/* Unsafe version of PyUnicode_CopyCharacters(): don't check arguments and so + may crash if parameters are invalid (e.g. if the output string + is too short). */ +PyAPI_FUNC(void) _PyUnicode_FastCopyCharacters( + PyObject *to, + Py_ssize_t to_start, + PyObject *from, + Py_ssize_t from_start, + Py_ssize_t how_many + ); #endif +#ifndef Py_LIMITED_API /* Fill a string with a character: write fill_char into unicode[start:start+length]. @@ -658,13 +670,21 @@ PyAPI_FUNC(Py_ssize_t) PyUnicode_CopyCharacters( Return the number of written character, or return -1 and raise an exception on error. */ -#ifndef Py_LIMITED_API PyAPI_FUNC(Py_ssize_t) PyUnicode_Fill( PyObject *unicode, Py_ssize_t start, Py_ssize_t length, Py_UCS4 fill_char ); + +/* Unsafe version of PyUnicode_Fill(): don't check arguments and so may crash + if parameters are invalid (e.g. if length is longer than the string). */ +PyAPI_FUNC(void) _PyUnicode_FastFill( + PyObject *unicode, + Py_ssize_t start, + Py_ssize_t length, + Py_UCS4 fill_char + ); #endif /* Create a Unicode Object from the Py_UNICODE buffer u of the given @@ -696,13 +716,19 @@ PyAPI_FUNC(PyObject*) PyUnicode_FromString( const char *u /* UTF-8 encoded string */ ); +#ifndef Py_LIMITED_API /* Create a new string from a buffer of Py_UCS1, Py_UCS2 or Py_UCS4 characters. Scan the string to find the maximum character. */ -#ifndef Py_LIMITED_API PyAPI_FUNC(PyObject*) PyUnicode_FromKindAndData( int kind, const void *buffer, Py_ssize_t size); + +/* Create a new string from a buffer of ASCII characters. + WARNING: Don't check if the string contains any non-ASCII character. */ +PyAPI_FUNC(PyObject*) _PyUnicode_FromASCII( + const char *buffer, + Py_ssize_t size); #endif PyAPI_FUNC(PyObject*) PyUnicode_Substring( @@ -865,12 +891,69 @@ PyAPI_FUNC(PyObject *) PyUnicode_FromFormat( ); #ifndef Py_LIMITED_API +typedef struct { + PyObject *buffer; + void *data; + enum PyUnicode_Kind kind; + Py_UCS4 maxchar; + Py_ssize_t size; + Py_ssize_t pos; + /* minimum length of the buffer when overallocation is enabled, + see _PyUnicodeWriter_Init() */ + Py_ssize_t min_length; + struct { + unsigned char overallocate:1; + /* If readonly is 1, buffer is a shared string (cannot be modified) + and size is set to 0. */ + unsigned char readonly:1; + } flags; +} _PyUnicodeWriter ; + +/* Initialize a Unicode writer. + + If min_length is greater than zero, _PyUnicodeWriter_Prepare() + overallocates the buffer and min_length is the minimum length in characters + of the buffer. */ +PyAPI_FUNC(void) +_PyUnicodeWriter_Init(_PyUnicodeWriter *writer, Py_ssize_t min_length); + +/* Prepare the buffer to write 'length' characters + with the specified maximum character. + + Return 0 on success, raise an exception and return -1 on error. */ +#define _PyUnicodeWriter_Prepare(WRITER, LENGTH, MAXCHAR) \ + (((MAXCHAR) <= (WRITER)->maxchar \ + && (LENGTH) <= (WRITER)->size - (WRITER)->pos) \ + ? 0 \ + : (((LENGTH) == 0) \ + ? 0 \ + : _PyUnicodeWriter_PrepareInternal((WRITER), (LENGTH), (MAXCHAR)))) + +/* Don't call this function directly, use the _PyUnicodeWriter_Prepare() macro + instead. */ +PyAPI_FUNC(int) +_PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer, + Py_ssize_t length, Py_UCS4 maxchar); + +PyAPI_FUNC(int) +_PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer, PyObject *str); + +PyAPI_FUNC(PyObject *) +_PyUnicodeWriter_Finish(_PyUnicodeWriter *writer); + +PyAPI_FUNC(void) +_PyUnicodeWriter_Dealloc(_PyUnicodeWriter *writer); +#endif + +#ifndef Py_LIMITED_API /* Format the object based on the format_spec, as defined in PEP 3101 (Advanced String Formatting). */ -PyAPI_FUNC(PyObject *) _PyUnicode_FormatAdvanced(PyObject *obj, - PyObject *format_spec, - Py_ssize_t start, - Py_ssize_t end); +PyAPI_FUNC(int) _PyUnicode_FormatAdvancedWriter( + _PyUnicodeWriter *writer, + PyObject *obj, + PyObject *format_spec, + Py_ssize_t start, + Py_ssize_t end); #endif PyAPI_FUNC(void) PyUnicode_InternInPlace(PyObject **); |