From c41eed1a874e2f22bde45c3c89418414b7a37f46 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 23 Jun 2020 15:54:35 +0200 Subject: bpo-40521: Make bytes singletons per interpreter (GH-21074) Each interpreter now has its own empty bytes string and single byte character singletons. Replace STRINGLIB_EMPTY macro with STRINGLIB_GET_EMPTY() macro. --- Include/internal/pycore_interp.h | 6 ++ Include/internal/pycore_pylifecycle.h | 2 +- .../2020-05-20-01-17-34.bpo-40521.wvAehI.rst | 14 ++-- Objects/bytesobject.c | 82 +++++++++++++++------- Objects/stringlib/README.txt | 6 +- Objects/stringlib/asciilib.h | 2 +- Objects/stringlib/partition.h | 20 +++--- Objects/stringlib/stringdefs.h | 5 +- Objects/stringlib/ucs1lib.h | 2 +- Objects/stringlib/ucs2lib.h | 2 +- Objects/stringlib/ucs4lib.h | 2 +- Objects/stringlib/unicodedefs.h | 2 +- Python/pylifecycle.c | 4 +- 13 files changed, 96 insertions(+), 53 deletions(-) diff --git a/Include/internal/pycore_interp.h b/Include/internal/pycore_interp.h index 697d97a..64e891f 100644 --- a/Include/internal/pycore_interp.h +++ b/Include/internal/pycore_interp.h @@ -65,6 +65,11 @@ struct _Py_unicode_fs_codec { _Py_error_handler error_handler; }; +struct _Py_bytes_state { + PyBytesObject *characters[256]; + PyBytesObject *empty_string; +}; + struct _Py_unicode_state { struct _Py_unicode_fs_codec fs_codec; }; @@ -233,6 +238,7 @@ struct _is { */ PyLongObject* small_ints[_PY_NSMALLNEGINTS + _PY_NSMALLPOSINTS]; #endif + struct _Py_bytes_state bytes; struct _Py_unicode_state unicode; struct _Py_float_state float_state; /* Using a cache is very effective since typically only a single slice is diff --git a/Include/internal/pycore_pylifecycle.h b/Include/internal/pycore_pylifecycle.h index 83ce1d2..9a3063a 100644 --- a/Include/internal/pycore_pylifecycle.h +++ b/Include/internal/pycore_pylifecycle.h @@ -63,7 +63,7 @@ extern void _PyDict_Fini(PyThreadState *tstate); extern void _PyTuple_Fini(PyThreadState *tstate); extern void _PyList_Fini(PyThreadState *tstate); extern void _PySet_Fini(PyThreadState *tstate); -extern void _PyBytes_Fini(void); +extern void _PyBytes_Fini(PyThreadState *tstate); extern void _PyFloat_Fini(PyThreadState *tstate); extern void _PySlice_Fini(PyThreadState *tstate); extern void _PyAsyncGen_Fini(PyThreadState *tstate); diff --git a/Misc/NEWS.d/next/Core and Builtins/2020-05-20-01-17-34.bpo-40521.wvAehI.rst b/Misc/NEWS.d/next/Core and Builtins/2020-05-20-01-17-34.bpo-40521.wvAehI.rst index 24fd437..95fab36 100644 --- a/Misc/NEWS.d/next/Core and Builtins/2020-05-20-01-17-34.bpo-40521.wvAehI.rst +++ b/Misc/NEWS.d/next/Core and Builtins/2020-05-20-01-17-34.bpo-40521.wvAehI.rst @@ -1,5 +1,9 @@ -The tuple free lists, the empty tuple singleton, the list free list, the empty -frozenset singleton, the float free list, the slice cache, the dict free lists, -the frame free list, the asynchronous generator free lists, and the context -free list are no longer shared by all interpreters: each interpreter now its -has own free lists and caches. +Each interpreter now its has own free lists, singletons and caches: + +* Free lists: float, tuple, list, dict, frame, context, + asynchronous generator. +* Singletons: empty tuple, empty frozenset, empty bytes string, + single byte character. +* Slice cache. + +They are no longer shared by all interpreters. diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index d397214..ce006e1 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -18,9 +18,6 @@ class bytes "PyBytesObject *" "&PyBytes_Type" #include "clinic/bytesobject.c.h" -static PyBytesObject *characters[UCHAR_MAX + 1]; -static PyBytesObject *nullstring; - _Py_IDENTIFIER(__bytes__); /* PyBytesObject_SIZE gives the basic size of a string; any memory allocation @@ -35,6 +32,15 @@ _Py_IDENTIFIER(__bytes__); Py_LOCAL_INLINE(Py_ssize_t) _PyBytesWriter_GetSize(_PyBytesWriter *writer, char *str); + +static struct _Py_bytes_state* +get_bytes_state(void) +{ + PyInterpreterState *interp = _PyInterpreterState_GET(); + return &interp->bytes; +} + + /* For PyBytes_FromString(), the parameter `str' points to a null-terminated string containing exactly `size' bytes. @@ -63,9 +69,13 @@ _PyBytes_FromSize(Py_ssize_t size, int use_calloc) PyBytesObject *op; assert(size >= 0); - if (size == 0 && (op = nullstring) != NULL) { - Py_INCREF(op); - return (PyObject *)op; + if (size == 0) { + struct _Py_bytes_state *state = get_bytes_state(); + op = state->empty_string; + if (op != NULL) { + Py_INCREF(op); + return (PyObject *)op; + } } if ((size_t)size > (size_t)PY_SSIZE_T_MAX - PyBytesObject_SIZE) { @@ -88,8 +98,9 @@ _PyBytes_FromSize(Py_ssize_t size, int use_calloc) op->ob_sval[size] = '\0'; /* empty byte string singleton */ if (size == 0) { - nullstring = op; + struct _Py_bytes_state *state = get_bytes_state(); Py_INCREF(op); + state->empty_string = op; } return (PyObject *) op; } @@ -103,11 +114,13 @@ PyBytes_FromStringAndSize(const char *str, Py_ssize_t size) "Negative size passed to PyBytes_FromStringAndSize"); return NULL; } - if (size == 1 && str != NULL && - (op = characters[*str & UCHAR_MAX]) != NULL) - { - Py_INCREF(op); - return (PyObject *)op; + if (size == 1 && str != NULL) { + struct _Py_bytes_state *state = get_bytes_state(); + op = state->characters[*str & UCHAR_MAX]; + if (op != NULL) { + Py_INCREF(op); + return (PyObject *)op; + } } op = (PyBytesObject *)_PyBytes_FromSize(size, 0); @@ -119,8 +132,9 @@ PyBytes_FromStringAndSize(const char *str, Py_ssize_t size) memcpy(op->ob_sval, str, size); /* share short strings */ if (size == 1) { - characters[*str & UCHAR_MAX] = op; + struct _Py_bytes_state *state = get_bytes_state(); Py_INCREF(op); + state->characters[*str & UCHAR_MAX] = op; } return (PyObject *) op; } @@ -138,13 +152,21 @@ PyBytes_FromString(const char *str) "byte string is too long"); return NULL; } - if (size == 0 && (op = nullstring) != NULL) { - Py_INCREF(op); - return (PyObject *)op; + + struct _Py_bytes_state *state = get_bytes_state(); + if (size == 0) { + op = state->empty_string; + if (op != NULL) { + Py_INCREF(op); + return (PyObject *)op; + } } - if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) { - Py_INCREF(op); - return (PyObject *)op; + else if (size == 1) { + op = state->characters[*str & UCHAR_MAX]; + if (op != NULL) { + Py_INCREF(op); + return (PyObject *)op; + } } /* Inline PyObject_NewVar */ @@ -157,11 +179,12 @@ PyBytes_FromString(const char *str) memcpy(op->ob_sval, str, size+1); /* share short strings */ if (size == 0) { - nullstring = op; Py_INCREF(op); - } else if (size == 1) { - characters[*str & UCHAR_MAX] = op; + state->empty_string = op; + } + else if (size == 1) { Py_INCREF(op); + state->characters[*str & UCHAR_MAX] = op; } return (PyObject *) op; } @@ -1249,6 +1272,8 @@ PyBytes_AsStringAndSize(PyObject *obj, /* -------------------------------------------------------------------- */ /* Methods */ +#define STRINGLIB_GET_EMPTY() get_bytes_state()->empty_string + #include "stringlib/stringdefs.h" #include "stringlib/fastsearch.h" @@ -1261,6 +1286,8 @@ PyBytes_AsStringAndSize(PyObject *obj, #include "stringlib/transmogrify.h" +#undef STRINGLIB_GET_EMPTY + PyObject * PyBytes_Repr(PyObject *obj, int smartquotes) { @@ -3058,12 +3085,13 @@ error: } void -_PyBytes_Fini(void) +_PyBytes_Fini(PyThreadState *tstate) { - int i; - for (i = 0; i < UCHAR_MAX + 1; i++) - Py_CLEAR(characters[i]); - Py_CLEAR(nullstring); + struct _Py_bytes_state* state = &tstate->interp->bytes; + for (int i = 0; i < UCHAR_MAX + 1; i++) { + Py_CLEAR(state->characters[i]); + } + Py_CLEAR(state->empty_string); } /*********************** Bytes Iterator ****************************/ diff --git a/Objects/stringlib/README.txt b/Objects/stringlib/README.txt index 8ff6ad8..e1e3292 100644 --- a/Objects/stringlib/README.txt +++ b/Objects/stringlib/README.txt @@ -11,10 +11,10 @@ STRINGLIB_CHAR the type used to hold a character (char or Py_UNICODE) -STRINGLIB_EMPTY +STRINGLIB_GET_EMPTY() - a PyObject representing the empty string, only to be used if - STRINGLIB_MUTABLE is 0 + returns a PyObject representing the empty string, only to be used if + STRINGLIB_MUTABLE is 0. It must not be NULL. Py_ssize_t STRINGLIB_LEN(PyObject*) diff --git a/Objects/stringlib/asciilib.h b/Objects/stringlib/asciilib.h index e69a2c0..8599d38 100644 --- a/Objects/stringlib/asciilib.h +++ b/Objects/stringlib/asciilib.h @@ -11,7 +11,7 @@ #define STRINGLIB_CHAR Py_UCS1 #define STRINGLIB_TYPE_NAME "unicode" #define STRINGLIB_PARSE_CODE "U" -#define STRINGLIB_EMPTY unicode_empty +#define STRINGLIB_GET_EMPTY() unicode_empty #define STRINGLIB_ISSPACE Py_UNICODE_ISSPACE #define STRINGLIB_ISLINEBREAK BLOOM_LINEBREAK #define STRINGLIB_ISDECIMAL Py_UNICODE_ISDECIMAL diff --git a/Objects/stringlib/partition.h b/Objects/stringlib/partition.h index ed32a6f..3731df5 100644 --- a/Objects/stringlib/partition.h +++ b/Objects/stringlib/partition.h @@ -37,10 +37,12 @@ STRINGLIB(partition)(PyObject* str_obj, #else Py_INCREF(str_obj); PyTuple_SET_ITEM(out, 0, (PyObject*) str_obj); - Py_INCREF(STRINGLIB_EMPTY); - PyTuple_SET_ITEM(out, 1, (PyObject*) STRINGLIB_EMPTY); - Py_INCREF(STRINGLIB_EMPTY); - PyTuple_SET_ITEM(out, 2, (PyObject*) STRINGLIB_EMPTY); + PyObject *empty = (PyObject*)STRINGLIB_GET_EMPTY(); + assert(empty != NULL); + Py_INCREF(empty); + PyTuple_SET_ITEM(out, 1, empty); + Py_INCREF(empty); + PyTuple_SET_ITEM(out, 2, empty); #endif return out; } @@ -90,10 +92,12 @@ STRINGLIB(rpartition)(PyObject* str_obj, return NULL; } #else - Py_INCREF(STRINGLIB_EMPTY); - PyTuple_SET_ITEM(out, 0, (PyObject*) STRINGLIB_EMPTY); - Py_INCREF(STRINGLIB_EMPTY); - PyTuple_SET_ITEM(out, 1, (PyObject*) STRINGLIB_EMPTY); + PyObject *empty = (PyObject*)STRINGLIB_GET_EMPTY(); + assert(empty != NULL); + Py_INCREF(empty); + PyTuple_SET_ITEM(out, 0, empty); + Py_INCREF(empty); + PyTuple_SET_ITEM(out, 1, empty); Py_INCREF(str_obj); PyTuple_SET_ITEM(out, 2, (PyObject*) str_obj); #endif diff --git a/Objects/stringlib/stringdefs.h b/Objects/stringlib/stringdefs.h index ce27f3e..c12ecc5 100644 --- a/Objects/stringlib/stringdefs.h +++ b/Objects/stringlib/stringdefs.h @@ -1,6 +1,10 @@ #ifndef STRINGLIB_STRINGDEFS_H #define STRINGLIB_STRINGDEFS_H +#ifndef STRINGLIB_GET_EMPTY +# error "STRINGLIB_GET_EMPTY macro must be defined" +#endif + /* this is sort of a hack. there's at least one place (formatting floats) where some stringlib code takes a different path if it's compiled as unicode. */ @@ -13,7 +17,6 @@ #define STRINGLIB_CHAR char #define STRINGLIB_TYPE_NAME "string" #define STRINGLIB_PARSE_CODE "S" -#define STRINGLIB_EMPTY nullstring #define STRINGLIB_ISSPACE Py_ISSPACE #define STRINGLIB_ISLINEBREAK(x) ((x == '\n') || (x == '\r')) #define STRINGLIB_ISDECIMAL(x) ((x >= '0') && (x <= '9')) diff --git a/Objects/stringlib/ucs1lib.h b/Objects/stringlib/ucs1lib.h index bc4b104..bdf3035 100644 --- a/Objects/stringlib/ucs1lib.h +++ b/Objects/stringlib/ucs1lib.h @@ -11,7 +11,7 @@ #define STRINGLIB_CHAR Py_UCS1 #define STRINGLIB_TYPE_NAME "unicode" #define STRINGLIB_PARSE_CODE "U" -#define STRINGLIB_EMPTY unicode_empty +#define STRINGLIB_GET_EMPTY() unicode_empty #define STRINGLIB_ISSPACE Py_UNICODE_ISSPACE #define STRINGLIB_ISLINEBREAK BLOOM_LINEBREAK #define STRINGLIB_ISDECIMAL Py_UNICODE_ISDECIMAL diff --git a/Objects/stringlib/ucs2lib.h b/Objects/stringlib/ucs2lib.h index 86a1dff..9d68888 100644 --- a/Objects/stringlib/ucs2lib.h +++ b/Objects/stringlib/ucs2lib.h @@ -11,7 +11,7 @@ #define STRINGLIB_CHAR Py_UCS2 #define STRINGLIB_TYPE_NAME "unicode" #define STRINGLIB_PARSE_CODE "U" -#define STRINGLIB_EMPTY unicode_empty +#define STRINGLIB_GET_EMPTY() unicode_empty #define STRINGLIB_ISSPACE Py_UNICODE_ISSPACE #define STRINGLIB_ISLINEBREAK BLOOM_LINEBREAK #define STRINGLIB_ISDECIMAL Py_UNICODE_ISDECIMAL diff --git a/Objects/stringlib/ucs4lib.h b/Objects/stringlib/ucs4lib.h index 3c32a93..c7dfa52 100644 --- a/Objects/stringlib/ucs4lib.h +++ b/Objects/stringlib/ucs4lib.h @@ -11,7 +11,7 @@ #define STRINGLIB_CHAR Py_UCS4 #define STRINGLIB_TYPE_NAME "unicode" #define STRINGLIB_PARSE_CODE "U" -#define STRINGLIB_EMPTY unicode_empty +#define STRINGLIB_GET_EMPTY() unicode_empty #define STRINGLIB_ISSPACE Py_UNICODE_ISSPACE #define STRINGLIB_ISLINEBREAK BLOOM_LINEBREAK #define STRINGLIB_ISDECIMAL Py_UNICODE_ISDECIMAL diff --git a/Objects/stringlib/unicodedefs.h b/Objects/stringlib/unicodedefs.h index 3db5629..e4d4163 100644 --- a/Objects/stringlib/unicodedefs.h +++ b/Objects/stringlib/unicodedefs.h @@ -13,7 +13,7 @@ #define STRINGLIB_CHAR Py_UNICODE #define STRINGLIB_TYPE_NAME "unicode" #define STRINGLIB_PARSE_CODE "U" -#define STRINGLIB_EMPTY unicode_empty +#define STRINGLIB_GET_EMPTY() unicode_empty #define STRINGLIB_ISSPACE Py_UNICODE_ISSPACE #define STRINGLIB_ISLINEBREAK BLOOM_LINEBREAK #define STRINGLIB_ISDECIMAL Py_UNICODE_ISDECIMAL diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index aaea045..4bb32ab 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -1262,9 +1262,7 @@ finalize_interp_types(PyThreadState *tstate, int is_main_interp) _PySlice_Fini(tstate); - if (is_main_interp) { - _PyBytes_Fini(); - } + _PyBytes_Fini(tstate); _PyUnicode_Fini(tstate); _PyFloat_Fini(tstate); _PyLong_Fini(tstate); -- cgit v0.12