diff options
author | Victor Stinner <vstinner@python.org> | 2020-06-25 12:07:40 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-06-25 12:07:40 (GMT) |
commit | 91698d8caa4b5bb6e8dbb64b156e8afe9e32cac1 (patch) | |
tree | afe2af94e5cf72cc28d405ae2c30dcb2210e52ea /Objects | |
parent | 0f8ec1fff01173803645ad6a8aea24997bf66fc1 (diff) | |
download | cpython-91698d8caa4b5bb6e8dbb64b156e8afe9e32cac1.zip cpython-91698d8caa4b5bb6e8dbb64b156e8afe9e32cac1.tar.gz cpython-91698d8caa4b5bb6e8dbb64b156e8afe9e32cac1.tar.bz2 |
bpo-40521: Optimize PyBytes_FromStringAndSize(str, 0) (GH-21142)
Always create the empty bytes string singleton.
Optimize PyBytes_FromStringAndSize(str, 0): it no longer has to check
if the empty string singleton was created or not, it is always
available.
Add functions:
* _PyBytes_Init()
* bytes_get_empty(), bytes_new_empty()
* bytes_create_empty_string_singleton()
* unicode_create_empty_string_singleton()
_Py_unicode_state: rename empty structure member to empty_string.
Diffstat (limited to 'Objects')
-rw-r--r-- | Objects/bytesobject.c | 91 | ||||
-rw-r--r-- | Objects/unicodeobject.c | 59 |
2 files changed, 99 insertions, 51 deletions
diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index ce006e1..782bc8e 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -4,8 +4,9 @@ #include "Python.h" #include "pycore_abstract.h" // _PyIndex_Check() -#include "pycore_bytes_methods.h" -#include "pycore_object.h" +#include "pycore_bytes_methods.h" // _Py_bytes_startswith() +#include "pycore_initconfig.h" // _PyStatus_OK() +#include "pycore_object.h" // _PyObject_GC_TRACK #include "pycore_pymem.h" // PYMEM_CLEANBYTE #include "pystrhex.h" @@ -41,6 +42,44 @@ get_bytes_state(void) } +// Return a borrowed reference to the empty bytes string singleton. +static inline PyObject* bytes_get_empty(void) +{ + struct _Py_bytes_state *state = get_bytes_state(); + // bytes_get_empty() must not be called before _PyBytes_Init() + // or after _PyBytes_Fini() + assert(state->empty_string != NULL); + return state->empty_string; +} + + +// Return a strong reference to the empty bytes string singleton. +static inline PyObject* bytes_new_empty(void) +{ + PyObject *empty = bytes_get_empty(); + Py_INCREF(empty); + return (PyObject *)empty; +} + + +static int +bytes_create_empty_string_singleton(struct _Py_bytes_state *state) +{ + // Create the empty bytes string singleton + PyBytesObject *op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE); + if (op == NULL) { + return -1; + } + _PyObject_InitVar((PyVarObject*)op, &PyBytes_Type, 0); + op->ob_shash = -1; + op->ob_sval[0] = '\0'; + + assert(state->empty_string == NULL); + state->empty_string = (PyObject *)op; + return 0; +} + + /* For PyBytes_FromString(), the parameter `str' points to a null-terminated string containing exactly `size' bytes. @@ -70,12 +109,7 @@ _PyBytes_FromSize(Py_ssize_t size, int use_calloc) assert(size >= 0); if (size == 0) { - struct _Py_bytes_state *state = get_bytes_state(); - op = state->empty_string; - if (op != NULL) { - Py_INCREF(op); - return (PyObject *)op; - } + return bytes_new_empty(); } if ((size_t)size > (size_t)PY_SSIZE_T_MAX - PyBytesObject_SIZE) { @@ -94,13 +128,8 @@ _PyBytes_FromSize(Py_ssize_t size, int use_calloc) } _PyObject_InitVar((PyVarObject*)op, &PyBytes_Type, size); op->ob_shash = -1; - if (!use_calloc) + if (!use_calloc) { op->ob_sval[size] = '\0'; - /* empty byte string singleton */ - if (size == 0) { - struct _Py_bytes_state *state = get_bytes_state(); - Py_INCREF(op); - state->empty_string = op; } return (PyObject *) op; } @@ -122,6 +151,9 @@ PyBytes_FromStringAndSize(const char *str, Py_ssize_t size) return (PyObject *)op; } } + if (size == 0) { + return bytes_new_empty(); + } op = (PyBytesObject *)_PyBytes_FromSize(size, 0); if (op == NULL) @@ -155,11 +187,7 @@ PyBytes_FromString(const char *str) struct _Py_bytes_state *state = get_bytes_state(); if (size == 0) { - op = state->empty_string; - if (op != NULL) { - Py_INCREF(op); - return (PyObject *)op; - } + return bytes_new_empty(); } else if (size == 1) { op = state->characters[*str & UCHAR_MAX]; @@ -178,11 +206,8 @@ PyBytes_FromString(const char *str) op->ob_shash = -1; memcpy(op->ob_sval, str, size+1); /* share short strings */ - if (size == 0) { - Py_INCREF(op); - state->empty_string = op; - } - else if (size == 1) { + if (size == 1) { + assert(state->characters[*str & UCHAR_MAX] == NULL); Py_INCREF(op); state->characters[*str & UCHAR_MAX] = op; } @@ -1272,7 +1297,7 @@ PyBytes_AsStringAndSize(PyObject *obj, /* -------------------------------------------------------------------- */ /* Methods */ -#define STRINGLIB_GET_EMPTY() get_bytes_state()->empty_string +#define STRINGLIB_GET_EMPTY() bytes_get_empty() #include "stringlib/stringdefs.h" @@ -3053,9 +3078,9 @@ _PyBytes_Resize(PyObject **pv, Py_ssize_t newsize) goto error; } if (newsize == 0) { - *pv = _PyBytes_FromSize(0, 0); + *pv = bytes_new_empty(); Py_DECREF(v); - return (*pv == NULL) ? -1 : 0; + return 0; } /* XXX UNREF/NEWREF interface should be more symmetrical */ #ifdef Py_REF_DEBUG @@ -3084,6 +3109,18 @@ error: return -1; } + +PyStatus +_PyBytes_Init(PyThreadState *tstate) +{ + struct _Py_bytes_state *state = &tstate->interp->bytes; + if (bytes_create_empty_string_singleton(state) < 0) { + return _PyStatus_NO_MEMORY(); + } + return _PyStatus_OK(); +} + + void _PyBytes_Fini(PyThreadState *tstate) { diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 5ba9951..55c8867 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -41,16 +41,15 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. #define PY_SSIZE_T_CLEAN #include "Python.h" #include "pycore_abstract.h" // _PyIndex_Check() -#include "pycore_bytes_methods.h" -#include "pycore_fileutils.h" -#include "pycore_initconfig.h" +#include "pycore_bytes_methods.h" // _Py_bytes_lower() +#include "pycore_initconfig.h" // _PyStatus_OK() #include "pycore_interp.h" // PyInterpreterState.fs_codec -#include "pycore_object.h" -#include "pycore_pathconfig.h" -#include "pycore_pylifecycle.h" +#include "pycore_object.h" // _PyObject_GC_TRACK() +#include "pycore_pathconfig.h" // _Py_DumpPathConfig() +#include "pycore_pylifecycle.h" // _Py_SetFileSystemEncoding() #include "pycore_pystate.h" // _PyInterpreterState_GET() -#include "ucnhash.h" -#include "stringlib/eq.h" +#include "ucnhash.h" // _PyUnicode_Name_CAPI +#include "stringlib/eq.h" // unicode_eq() #ifdef MS_WINDOWS #include <windows.h> @@ -236,10 +235,12 @@ static inline PyObject* unicode_get_empty(void) struct _Py_unicode_state *state = get_unicode_state(); // unicode_get_empty() must not be called before _PyUnicode_Init() // or after _PyUnicode_Fini() - assert(state->empty != NULL); - return state->empty; + assert(state->empty_string != NULL); + return state->empty_string; } + +// Return a strong reference to the empty string singleton. static inline PyObject* unicode_new_empty(void) { PyObject *empty = unicode_get_empty(); @@ -1385,6 +1386,26 @@ _PyUnicode_Dump(PyObject *op) } #endif +static int +unicode_create_empty_string_singleton(struct _Py_unicode_state *state) +{ + // Use size=1 rather than size=0, so PyUnicode_New(0, maxchar) can be + // optimized to always use state->empty_string without having to check if + // it is NULL or not. + PyObject *empty = PyUnicode_New(1, 0); + if (empty == NULL) { + return -1; + } + PyUnicode_1BYTE_DATA(empty)[0] = 0; + _PyUnicode_LENGTH(empty) = 0; + assert(_PyUnicode_CheckConsistency(empty, 1)); + + assert(state->empty_string == NULL); + state->empty_string = empty; + return 0; +} + + PyObject * PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar) { @@ -1972,7 +1993,7 @@ static int unicode_is_singleton(PyObject *unicode) { struct _Py_unicode_state *state = get_unicode_state(); - if (unicode == state->empty) { + if (unicode == state->empty_string) { return 1; } PyASCIIObject *ascii = (PyASCIIObject *)unicode; @@ -15542,20 +15563,10 @@ _PyUnicode_Init(PyThreadState *tstate) 0x2029, /* PARAGRAPH SEPARATOR */ }; - // Use size=1 rather than size=0, so PyUnicode_New(0, maxchar) can be - // optimized to always use state->empty without having to check if it is - // NULL or not. - PyObject *empty = PyUnicode_New(1, 0); - if (empty == NULL) { + struct _Py_unicode_state *state = &tstate->interp->unicode; + if (unicode_create_empty_string_singleton(state) < 0) { return _PyStatus_NO_MEMORY(); } - PyUnicode_1BYTE_DATA(empty)[0] = 0; - _PyUnicode_LENGTH(empty) = 0; - assert(_PyUnicode_CheckConsistency(empty, 1)); - - struct _Py_unicode_state *state = &tstate->interp->unicode; - assert(state->empty == NULL); - state->empty = empty; if (_Py_IsMainInterpreter(tstate)) { /* initialize the linebreak bloom filter */ @@ -16223,7 +16234,7 @@ _PyUnicode_Fini(PyThreadState *tstate) #endif /* __INSURE__ */ } - Py_CLEAR(state->empty); + Py_CLEAR(state->empty_string); for (Py_ssize_t i = 0; i < 256; i++) { Py_CLEAR(state->latin1[i]); |