diff options
author | Eric Snow <ericsnowcurrently@gmail.com> | 2022-02-08 20:39:07 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-02-08 20:39:07 (GMT) |
commit | 81c72044a181dbbfbf689d7a977d0d99090f26a8 (patch) | |
tree | 14329746bd6f179cf2ae7c9818e1ae881eb46360 /Objects/unicodeobject.c | |
parent | c018d3037b5b62e6d48d5985d1a37b91762fbffb (diff) | |
download | cpython-81c72044a181dbbfbf689d7a977d0d99090f26a8.zip cpython-81c72044a181dbbfbf689d7a977d0d99090f26a8.tar.gz cpython-81c72044a181dbbfbf689d7a977d0d99090f26a8.tar.bz2 |
bpo-46541: Replace core use of _Py_IDENTIFIER() with statically initialized global objects. (gh-30928)
We're no longer using _Py_IDENTIFIER() (or _Py_static_string()) in any core CPython code. It is still used in a number of non-builtin stdlib modules.
The replacement is: PyUnicodeObject (not pointer) fields under _PyRuntimeState, statically initialized as part of _PyRuntime. A new _Py_GET_GLOBAL_IDENTIFIER() macro facilitates lookup of the fields (along with _Py_GET_GLOBAL_STRING() for non-identifier strings).
https://bugs.python.org/issue46541#msg411799 explains the rationale for this change.
The core of the change is in:
* (new) Include/internal/pycore_global_strings.h - the declarations for the global strings, along with the macros
* Include/internal/pycore_runtime_init.h - added the static initializers for the global strings
* Include/internal/pycore_global_objects.h - where the struct in pycore_global_strings.h is hooked into _PyRuntimeState
* Tools/scripts/generate_global_objects.py - added generation of the global string declarations and static initializers
I've also added a --check flag to generate_global_objects.py (along with make check-global-objects) to check for unused global strings. That check is added to the PR CI config.
The remainder of this change updates the core code to use _Py_GET_GLOBAL_IDENTIFIER() instead of _Py_IDENTIFIER() and the related _Py*Id functions (likewise for _Py_GET_GLOBAL_STRING() instead of _Py_static_string()). This includes adding a few functions where there wasn't already an alternative to _Py*Id(), replacing the _Py_Identifier * parameter with PyObject *.
The following are not changed (yet):
* stop using _Py_IDENTIFIER() in the stdlib modules
* (maybe) get rid of _Py_IDENTIFIER(), etc. entirely -- this may not be doable as at least one package on PyPI using this (private) API
* (maybe) intern the strings during runtime init
https://bugs.python.org/issue46541
Diffstat (limited to 'Objects/unicodeobject.c')
-rw-r--r-- | Objects/unicodeobject.c | 45 |
1 files changed, 12 insertions, 33 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 888871a..908ad51 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -260,11 +260,7 @@ get_unicode_state(void) // Return a borrowed reference to the empty string singleton. static inline PyObject* unicode_get_empty(void) { - struct _Py_unicode_state *state = get_unicode_state(); - // unicode_get_empty() must not be called before _PyUnicode_Init() - // or after _PyUnicode_Fini() - assert(state->empty_string != NULL); - return state->empty_string; + return &_Py_STR(empty); } @@ -1388,25 +1384,6 @@ _PyUnicode_Dump(PyObject *op) } #endif -static int -unicode_create_empty_string_singleton(struct _Py_unicode_state *state) -{ - // Use size=1 rather than size=0, so PyUnicode_New(0, maxchar) can be - // optimized to always use state->empty_string without having to check if - // it is NULL or not. - PyObject *empty = PyUnicode_New(1, 0); - if (empty == NULL) { - return -1; - } - PyUnicode_1BYTE_DATA(empty)[0] = 0; - _PyUnicode_LENGTH(empty) = 0; - assert(_PyUnicode_CheckConsistency(empty, 1)); - - assert(state->empty_string == NULL); - state->empty_string = empty; - return 0; -} - PyObject * PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar) @@ -2009,10 +1986,11 @@ unicode_dealloc(PyObject *unicode) static int unicode_is_singleton(PyObject *unicode) { - struct _Py_unicode_state *state = get_unicode_state(); - if (unicode == state->empty_string) { + if (unicode == &_Py_STR(empty)) { return 1; } + + struct _Py_unicode_state *state = get_unicode_state(); PyASCIIObject *ascii = (PyASCIIObject *)unicode; if (ascii->state.kind != PyUnicode_WCHAR_KIND && ascii->length == 1) { Py_UCS4 ch = PyUnicode_READ_CHAR(unicode, 0); @@ -15551,11 +15529,14 @@ _PyUnicode_InitState(PyInterpreterState *interp) PyStatus _PyUnicode_InitGlobalObjects(PyInterpreterState *interp) { - struct _Py_unicode_state *state = &interp->unicode; - if (unicode_create_empty_string_singleton(state) < 0) { - return _PyStatus_NO_MEMORY(); + if (!_Py_IsMainInterpreter(interp)) { + return _PyStatus_OK(); } +#ifdef Py_DEBUG + assert(_PyUnicode_CheckConsistency(&_Py_STR(empty), 1)); +#endif + return _PyStatus_OK(); } @@ -15798,15 +15779,14 @@ PyDoc_STRVAR(length_hint_doc, "Private method returning an estimate of len(list( static PyObject * unicodeiter_reduce(unicodeiterobject *it, PyObject *Py_UNUSED(ignored)) { - _Py_IDENTIFIER(iter); if (it->it_seq != NULL) { - return Py_BuildValue("N(O)n", _PyEval_GetBuiltinId(&PyId_iter), + return Py_BuildValue("N(O)n", _PyEval_GetBuiltin(&_Py_ID(iter)), it->it_seq, it->it_index); } else { PyObject *u = (PyObject *)_PyUnicode_New(0); if (u == NULL) return NULL; - return Py_BuildValue("N(N)", _PyEval_GetBuiltinId(&PyId_iter), u); + return Py_BuildValue("N(N)", _PyEval_GetBuiltin(&_Py_ID(iter)), u); } } @@ -16137,7 +16117,6 @@ _PyUnicode_Fini(PyInterpreterState *interp) for (Py_ssize_t i = 0; i < 256; i++) { Py_CLEAR(state->latin1[i]); } - Py_CLEAR(state->empty_string); } |