diff options
author | Victor Stinner <victor.stinner@gmail.com> | 2012-05-29 10:57:52 (GMT) |
---|---|---|
committer | Victor Stinner <victor.stinner@gmail.com> | 2012-05-29 10:57:52 (GMT) |
commit | d3f0882dfb3a15d604de1b1620b2bf8de9d643bb (patch) | |
tree | 16c78bd58f57ffce487f71bb075372d72cfdcbde /Include/unicodeobject.h | |
parent | a1b0c9fc4d68cd4e1103456d0cedf2ef3bbbfe9a (diff) | |
download | cpython-d3f0882dfb3a15d604de1b1620b2bf8de9d643bb.zip cpython-d3f0882dfb3a15d604de1b1620b2bf8de9d643bb.tar.gz cpython-d3f0882dfb3a15d604de1b1620b2bf8de9d643bb.tar.bz2 |
Issue #14744: Use the new _PyUnicodeWriter internal API to speed up str%args and str.format(args)
* Formatting string, int, float and complex use the _PyUnicodeWriter API. It
avoids a temporary buffer in most cases.
* Add _PyUnicodeWriter_WriteStr() to restore the PyAccu optimization: just
keep a reference to the string if the output is only composed of one string
* Disable overallocation when formatting the last argument of str%args and
str.format(args)
* Overallocation allocates at least 100 characters: add min_length attribute
to the _PyUnicodeWriter structure
* Add new private functions: _PyUnicode_FastCopyCharacters(),
_PyUnicode_FastFill() and _PyUnicode_FromASCII()
The speed up is around 20% in average.
Diffstat (limited to 'Include/unicodeobject.h')
-rw-r--r-- | Include/unicodeobject.h | 95 |
1 files changed, 89 insertions, 6 deletions
diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h index 486d4fa..99ea48b 100644 --- a/Include/unicodeobject.h +++ b/Include/unicodeobject.h @@ -648,8 +648,20 @@ PyAPI_FUNC(Py_ssize_t) PyUnicode_CopyCharacters( Py_ssize_t from_start, Py_ssize_t how_many ); + +/* Unsafe version of PyUnicode_CopyCharacters(): don't check arguments and so + may crash if parameters are invalid (e.g. if the output string + is too short). */ +PyAPI_FUNC(void) _PyUnicode_FastCopyCharacters( + PyObject *to, + Py_ssize_t to_start, + PyObject *from, + Py_ssize_t from_start, + Py_ssize_t how_many + ); #endif +#ifndef Py_LIMITED_API /* Fill a string with a character: write fill_char into unicode[start:start+length]. @@ -658,13 +670,21 @@ PyAPI_FUNC(Py_ssize_t) PyUnicode_CopyCharacters( Return the number of written character, or return -1 and raise an exception on error. */ -#ifndef Py_LIMITED_API PyAPI_FUNC(Py_ssize_t) PyUnicode_Fill( PyObject *unicode, Py_ssize_t start, Py_ssize_t length, Py_UCS4 fill_char ); + +/* Unsafe version of PyUnicode_Fill(): don't check arguments and so may crash + if parameters are invalid (e.g. if length is longer than the string). */ +PyAPI_FUNC(void) _PyUnicode_FastFill( + PyObject *unicode, + Py_ssize_t start, + Py_ssize_t length, + Py_UCS4 fill_char + ); #endif /* Create a Unicode Object from the Py_UNICODE buffer u of the given @@ -696,13 +716,19 @@ PyAPI_FUNC(PyObject*) PyUnicode_FromString( const char *u /* UTF-8 encoded string */ ); +#ifndef Py_LIMITED_API /* Create a new string from a buffer of Py_UCS1, Py_UCS2 or Py_UCS4 characters. Scan the string to find the maximum character. */ -#ifndef Py_LIMITED_API PyAPI_FUNC(PyObject*) PyUnicode_FromKindAndData( int kind, const void *buffer, Py_ssize_t size); + +/* Create a new string from a buffer of ASCII characters. + WARNING: Don't check if the string contains any non-ASCII character. */ +PyAPI_FUNC(PyObject*) _PyUnicode_FromASCII( + const char *buffer, + Py_ssize_t size); #endif PyAPI_FUNC(PyObject*) PyUnicode_Substring( @@ -865,12 +891,69 @@ PyAPI_FUNC(PyObject *) PyUnicode_FromFormat( ); #ifndef Py_LIMITED_API +typedef struct { + PyObject *buffer; + void *data; + enum PyUnicode_Kind kind; + Py_UCS4 maxchar; + Py_ssize_t size; + Py_ssize_t pos; + /* minimum length of the buffer when overallocation is enabled, + see _PyUnicodeWriter_Init() */ + Py_ssize_t min_length; + struct { + unsigned char overallocate:1; + /* If readonly is 1, buffer is a shared string (cannot be modified) + and size is set to 0. */ + unsigned char readonly:1; + } flags; +} _PyUnicodeWriter ; + +/* Initialize a Unicode writer. + + If min_length is greater than zero, _PyUnicodeWriter_Prepare() + overallocates the buffer and min_length is the minimum length in characters + of the buffer. */ +PyAPI_FUNC(void) +_PyUnicodeWriter_Init(_PyUnicodeWriter *writer, Py_ssize_t min_length); + +/* Prepare the buffer to write 'length' characters + with the specified maximum character. + + Return 0 on success, raise an exception and return -1 on error. */ +#define _PyUnicodeWriter_Prepare(WRITER, LENGTH, MAXCHAR) \ + (((MAXCHAR) <= (WRITER)->maxchar \ + && (LENGTH) <= (WRITER)->size - (WRITER)->pos) \ + ? 0 \ + : (((LENGTH) == 0) \ + ? 0 \ + : _PyUnicodeWriter_PrepareInternal((WRITER), (LENGTH), (MAXCHAR)))) + +/* Don't call this function directly, use the _PyUnicodeWriter_Prepare() macro + instead. */ +PyAPI_FUNC(int) +_PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer, + Py_ssize_t length, Py_UCS4 maxchar); + +PyAPI_FUNC(int) +_PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer, PyObject *str); + +PyAPI_FUNC(PyObject *) +_PyUnicodeWriter_Finish(_PyUnicodeWriter *writer); + +PyAPI_FUNC(void) +_PyUnicodeWriter_Dealloc(_PyUnicodeWriter *writer); +#endif + +#ifndef Py_LIMITED_API /* Format the object based on the format_spec, as defined in PEP 3101 (Advanced String Formatting). */ -PyAPI_FUNC(PyObject *) _PyUnicode_FormatAdvanced(PyObject *obj, - PyObject *format_spec, - Py_ssize_t start, - Py_ssize_t end); +PyAPI_FUNC(int) _PyUnicode_FormatAdvancedWriter( + _PyUnicodeWriter *writer, + PyObject *obj, + PyObject *format_spec, + Py_ssize_t start, + Py_ssize_t end); #endif PyAPI_FUNC(void) PyUnicode_InternInPlace(PyObject **); |