diff options
-rw-r--r-- | Include/internal/pycore_interp.h | 4 | ||||
-rw-r--r-- | Include/internal/pycore_pylifecycle.h | 1 | ||||
-rw-r--r-- | Objects/bytesobject.c | 91 | ||||
-rw-r--r-- | Objects/unicodeobject.c | 59 | ||||
-rw-r--r-- | Python/pylifecycle.c | 5 |
5 files changed, 107 insertions, 53 deletions
diff --git a/Include/internal/pycore_interp.h b/Include/internal/pycore_interp.h index bf1769e..cfc2747 100644 --- a/Include/internal/pycore_interp.h +++ b/Include/internal/pycore_interp.h @@ -66,13 +66,13 @@ struct _Py_unicode_fs_codec { }; struct _Py_bytes_state { + PyObject *empty_string; PyBytesObject *characters[256]; - PyBytesObject *empty_string; }; struct _Py_unicode_state { // The empty Unicode object is a singleton to improve performance. - PyObject *empty; + PyObject *empty_string; /* Single character Unicode strings in the Latin-1 range are being shared as well. */ PyObject *latin1[256]; diff --git a/Include/internal/pycore_pylifecycle.h b/Include/internal/pycore_pylifecycle.h index 3b21737..bffc95b 100644 --- a/Include/internal/pycore_pylifecycle.h +++ b/Include/internal/pycore_pylifecycle.h @@ -32,6 +32,7 @@ PyAPI_FUNC(int) _Py_IsLocaleCoercionTarget(const char *ctype_loc); /* Various one-time initializers */ extern PyStatus _PyUnicode_Init(PyThreadState *tstate); +extern PyStatus _PyBytes_Init(PyThreadState *tstate); extern int _PyStructSequence_Init(void); extern int _PyLong_Init(PyThreadState *tstate); extern PyStatus _PyTuple_Init(PyThreadState *tstate); diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index ce006e1..782bc8e 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -4,8 +4,9 @@ #include "Python.h" #include "pycore_abstract.h" // _PyIndex_Check() -#include "pycore_bytes_methods.h" -#include "pycore_object.h" +#include "pycore_bytes_methods.h" // _Py_bytes_startswith() +#include "pycore_initconfig.h" // _PyStatus_OK() +#include "pycore_object.h" // _PyObject_GC_TRACK #include "pycore_pymem.h" // PYMEM_CLEANBYTE #include "pystrhex.h" @@ -41,6 +42,44 @@ get_bytes_state(void) } +// Return a borrowed reference to the empty bytes string singleton. +static inline PyObject* bytes_get_empty(void) +{ + struct _Py_bytes_state *state = get_bytes_state(); + // bytes_get_empty() must not be called before _PyBytes_Init() + // or after _PyBytes_Fini() + assert(state->empty_string != NULL); + return state->empty_string; +} + + +// Return a strong reference to the empty bytes string singleton. +static inline PyObject* bytes_new_empty(void) +{ + PyObject *empty = bytes_get_empty(); + Py_INCREF(empty); + return (PyObject *)empty; +} + + +static int +bytes_create_empty_string_singleton(struct _Py_bytes_state *state) +{ + // Create the empty bytes string singleton + PyBytesObject *op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE); + if (op == NULL) { + return -1; + } + _PyObject_InitVar((PyVarObject*)op, &PyBytes_Type, 0); + op->ob_shash = -1; + op->ob_sval[0] = '\0'; + + assert(state->empty_string == NULL); + state->empty_string = (PyObject *)op; + return 0; +} + + /* For PyBytes_FromString(), the parameter `str' points to a null-terminated string containing exactly `size' bytes. @@ -70,12 +109,7 @@ _PyBytes_FromSize(Py_ssize_t size, int use_calloc) assert(size >= 0); if (size == 0) { - struct _Py_bytes_state *state = get_bytes_state(); - op = state->empty_string; - if (op != NULL) { - Py_INCREF(op); - return (PyObject *)op; - } + return bytes_new_empty(); } if ((size_t)size > (size_t)PY_SSIZE_T_MAX - PyBytesObject_SIZE) { @@ -94,13 +128,8 @@ _PyBytes_FromSize(Py_ssize_t size, int use_calloc) } _PyObject_InitVar((PyVarObject*)op, &PyBytes_Type, size); op->ob_shash = -1; - if (!use_calloc) + if (!use_calloc) { op->ob_sval[size] = '\0'; - /* empty byte string singleton */ - if (size == 0) { - struct _Py_bytes_state *state = get_bytes_state(); - Py_INCREF(op); - state->empty_string = op; } return (PyObject *) op; } @@ -122,6 +151,9 @@ PyBytes_FromStringAndSize(const char *str, Py_ssize_t size) return (PyObject *)op; } } + if (size == 0) { + return bytes_new_empty(); + } op = (PyBytesObject *)_PyBytes_FromSize(size, 0); if (op == NULL) @@ -155,11 +187,7 @@ PyBytes_FromString(const char *str) struct _Py_bytes_state *state = get_bytes_state(); if (size == 0) { - op = state->empty_string; - if (op != NULL) { - Py_INCREF(op); - return (PyObject *)op; - } + return bytes_new_empty(); } else if (size == 1) { op = state->characters[*str & UCHAR_MAX]; @@ -178,11 +206,8 @@ PyBytes_FromString(const char *str) op->ob_shash = -1; memcpy(op->ob_sval, str, size+1); /* share short strings */ - if (size == 0) { - Py_INCREF(op); - state->empty_string = op; - } - else if (size == 1) { + if (size == 1) { + assert(state->characters[*str & UCHAR_MAX] == NULL); Py_INCREF(op); state->characters[*str & UCHAR_MAX] = op; } @@ -1272,7 +1297,7 @@ PyBytes_AsStringAndSize(PyObject *obj, /* -------------------------------------------------------------------- */ /* Methods */ -#define STRINGLIB_GET_EMPTY() get_bytes_state()->empty_string +#define STRINGLIB_GET_EMPTY() bytes_get_empty() #include "stringlib/stringdefs.h" @@ -3053,9 +3078,9 @@ _PyBytes_Resize(PyObject **pv, Py_ssize_t newsize) goto error; } if (newsize == 0) { - *pv = _PyBytes_FromSize(0, 0); + *pv = bytes_new_empty(); Py_DECREF(v); - return (*pv == NULL) ? -1 : 0; + return 0; } /* XXX UNREF/NEWREF interface should be more symmetrical */ #ifdef Py_REF_DEBUG @@ -3084,6 +3109,18 @@ error: return -1; } + +PyStatus +_PyBytes_Init(PyThreadState *tstate) +{ + struct _Py_bytes_state *state = &tstate->interp->bytes; + if (bytes_create_empty_string_singleton(state) < 0) { + return _PyStatus_NO_MEMORY(); + } + return _PyStatus_OK(); +} + + void _PyBytes_Fini(PyThreadState *tstate) { diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 5ba9951..55c8867 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -41,16 +41,15 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. #define PY_SSIZE_T_CLEAN #include "Python.h" #include "pycore_abstract.h" // _PyIndex_Check() -#include "pycore_bytes_methods.h" -#include "pycore_fileutils.h" -#include "pycore_initconfig.h" +#include "pycore_bytes_methods.h" // _Py_bytes_lower() +#include "pycore_initconfig.h" // _PyStatus_OK() #include "pycore_interp.h" // PyInterpreterState.fs_codec -#include "pycore_object.h" -#include "pycore_pathconfig.h" -#include "pycore_pylifecycle.h" +#include "pycore_object.h" // _PyObject_GC_TRACK() +#include "pycore_pathconfig.h" // _Py_DumpPathConfig() +#include "pycore_pylifecycle.h" // _Py_SetFileSystemEncoding() #include "pycore_pystate.h" // _PyInterpreterState_GET() -#include "ucnhash.h" -#include "stringlib/eq.h" +#include "ucnhash.h" // _PyUnicode_Name_CAPI +#include "stringlib/eq.h" // unicode_eq() #ifdef MS_WINDOWS #include <windows.h> @@ -236,10 +235,12 @@ static inline PyObject* unicode_get_empty(void) struct _Py_unicode_state *state = get_unicode_state(); // unicode_get_empty() must not be called before _PyUnicode_Init() // or after _PyUnicode_Fini() - assert(state->empty != NULL); - return state->empty; + assert(state->empty_string != NULL); + return state->empty_string; } + +// Return a strong reference to the empty string singleton. static inline PyObject* unicode_new_empty(void) { PyObject *empty = unicode_get_empty(); @@ -1385,6 +1386,26 @@ _PyUnicode_Dump(PyObject *op) } #endif +static int +unicode_create_empty_string_singleton(struct _Py_unicode_state *state) +{ + // Use size=1 rather than size=0, so PyUnicode_New(0, maxchar) can be + // optimized to always use state->empty_string without having to check if + // it is NULL or not. + PyObject *empty = PyUnicode_New(1, 0); + if (empty == NULL) { + return -1; + } + PyUnicode_1BYTE_DATA(empty)[0] = 0; + _PyUnicode_LENGTH(empty) = 0; + assert(_PyUnicode_CheckConsistency(empty, 1)); + + assert(state->empty_string == NULL); + state->empty_string = empty; + return 0; +} + + PyObject * PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar) { @@ -1972,7 +1993,7 @@ static int unicode_is_singleton(PyObject *unicode) { struct _Py_unicode_state *state = get_unicode_state(); - if (unicode == state->empty) { + if (unicode == state->empty_string) { return 1; } PyASCIIObject *ascii = (PyASCIIObject *)unicode; @@ -15542,20 +15563,10 @@ _PyUnicode_Init(PyThreadState *tstate) 0x2029, /* PARAGRAPH SEPARATOR */ }; - // Use size=1 rather than size=0, so PyUnicode_New(0, maxchar) can be - // optimized to always use state->empty without having to check if it is - // NULL or not. - PyObject *empty = PyUnicode_New(1, 0); - if (empty == NULL) { + struct _Py_unicode_state *state = &tstate->interp->unicode; + if (unicode_create_empty_string_singleton(state) < 0) { return _PyStatus_NO_MEMORY(); } - PyUnicode_1BYTE_DATA(empty)[0] = 0; - _PyUnicode_LENGTH(empty) = 0; - assert(_PyUnicode_CheckConsistency(empty, 1)); - - struct _Py_unicode_state *state = &tstate->interp->unicode; - assert(state->empty == NULL); - state->empty = empty; if (_Py_IsMainInterpreter(tstate)) { /* initialize the linebreak bloom filter */ @@ -16223,7 +16234,7 @@ _PyUnicode_Fini(PyThreadState *tstate) #endif /* __INSURE__ */ } - Py_CLEAR(state->empty); + Py_CLEAR(state->empty_string); for (Py_ssize_t i = 0; i < 256; i++) { Py_CLEAR(state->latin1[i]); diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index 4b658f8..cd993ea 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -607,6 +607,11 @@ pycore_init_types(PyThreadState *tstate) return status; } + status = _PyBytes_Init(tstate); + if (_PyStatus_EXCEPTION(status)) { + return status; + } + status = _PyExc_Init(tstate); if (_PyStatus_EXCEPTION(status)) { return status; |