diff options
author | Eric Snow <ericsnowcurrently@gmail.com> | 2023-03-28 18:52:28 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-03-28 18:52:28 (GMT) |
commit | ba65a065cf07a7a9f53be61057a090f7311a5ad7 (patch) | |
tree | 279e23bf7b8b1cfbf9a1a976a086ca129bbbe5cf /Objects/unicodeobject.c | |
parent | 7703def37e4fa7d25c3d23756de8f527daa4e165 (diff) | |
download | cpython-ba65a065cf07a7a9f53be61057a090f7311a5ad7.zip cpython-ba65a065cf07a7a9f53be61057a090f7311a5ad7.tar.gz cpython-ba65a065cf07a7a9f53be61057a090f7311a5ad7.tar.bz2 |
gh-100227: Move the Dict of Interned Strings to PyInterpreterState (gh-102339)
We can revisit the options for keeping it global later, if desired. For now the approach seems quite complex, so we've gone with the simpler isolation solution in the meantime.
https://github.com/python/cpython/issues/100227
Diffstat (limited to 'Objects/unicodeobject.c')
-rw-r--r-- | Objects/unicodeobject.c | 98 |
1 files changed, 56 insertions, 42 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index b9fb531..85e5ae7 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -231,14 +231,32 @@ static inline PyObject* unicode_new_empty(void) Another way to look at this is that to say that the actual reference count of a string is: s->ob_refcnt + (s->state ? 2 : 0) */ -static inline PyObject *get_interned_dict(void) +static inline PyObject *get_interned_dict(PyInterpreterState *interp) { - return _Py_CACHED_OBJECT(interned_strings); + return _Py_INTERP_CACHED_OBJECT(interp, interned_strings); } -static inline void set_interned_dict(PyObject *dict) +static int +init_interned_dict(PyInterpreterState *interp) +{ + assert(get_interned_dict(interp) == NULL); + PyObject *interned = interned = PyDict_New(); + if (interned == NULL) { + return -1; + } + _Py_INTERP_CACHED_OBJECT(interp, interned_strings) = interned; + return 0; +} + +static void +clear_interned_dict(PyInterpreterState *interp) { - _Py_CACHED_OBJECT(interned_strings) = dict; + PyObject *interned = get_interned_dict(interp); + if (interned != NULL) { + PyDict_Clear(interned); + Py_DECREF(interned); + _Py_INTERP_CACHED_OBJECT(interp, interned_strings) = NULL; + } } #define _Py_RETURN_UNICODE_EMPTY() \ @@ -1520,12 +1538,12 @@ find_maxchar_surrogates(const wchar_t *begin, const wchar_t *end, static void unicode_dealloc(PyObject *unicode) { + PyInterpreterState *interp = _PyInterpreterState_GET(); #ifdef Py_DEBUG if (!unicode_is_finalizing() && unicode_is_singleton(unicode)) { _Py_FatalRefcountError("deallocating an Unicode singleton"); } #endif - PyObject *interned = get_interned_dict(); if (PyUnicode_CHECK_INTERNED(unicode)) { /* Revive the dead object temporarily. PyDict_DelItem() removes two references (key and value) which were ignored by @@ -1534,6 +1552,8 @@ unicode_dealloc(PyObject *unicode) PyDict_DelItem(). */ assert(Py_REFCNT(unicode) == 0); Py_SET_REFCNT(unicode, 3); + PyObject *interned = get_interned_dict(interp); + assert(interned != NULL); if (PyDict_DelItem(interned, unicode) != 0) { _PyErr_WriteUnraisableMsg("deletion of interned string failed", NULL); @@ -14529,34 +14549,29 @@ _PyUnicode_InitState(PyInterpreterState *interp) PyStatus _PyUnicode_InitGlobalObjects(PyInterpreterState *interp) { - if (!_Py_IsMainInterpreter(interp)) { - return _PyStatus_OK(); - } - // Initialize the global interned dict - PyObject *interned = PyDict_New(); - if (interned == NULL) { + if (init_interned_dict(interp)) { PyErr_Clear(); return _PyStatus_ERR("failed to create interned dict"); } - set_interned_dict(interned); - - /* Intern statically allocated string identifiers and deepfreeze strings. - * This must be done before any module initialization so that statically - * allocated string identifiers are used instead of heap allocated strings. - * Deepfreeze uses the interned identifiers if present to save space - * else generates them and they are interned to speed up dict lookups. - */ - _PyUnicode_InitStaticStrings(); + if (_Py_IsMainInterpreter(interp)) { + /* Intern statically allocated string identifiers and deepfreeze strings. + * This must be done before any module initialization so that statically + * allocated string identifiers are used instead of heap allocated strings. + * Deepfreeze uses the interned identifiers if present to save space + * else generates them and they are interned to speed up dict lookups. + */ + _PyUnicode_InitStaticStrings(interp); #ifdef Py_DEBUG - assert(_PyUnicode_CheckConsistency(&_Py_STR(empty), 1)); + assert(_PyUnicode_CheckConsistency(&_Py_STR(empty), 1)); - for (int i = 0; i < 256; i++) { - assert(_PyUnicode_CheckConsistency(LATIN1(i), 1)); - } + for (int i = 0; i < 256; i++) { + assert(_PyUnicode_CheckConsistency(LATIN1(i), 1)); + } #endif + } return _PyStatus_OK(); } @@ -14586,7 +14601,7 @@ error: void -PyUnicode_InternInPlace(PyObject **p) +_PyUnicode_InternInPlace(PyInterpreterState *interp, PyObject **p) { PyObject *s = *p; #ifdef Py_DEBUG @@ -14608,7 +14623,7 @@ PyUnicode_InternInPlace(PyObject **p) return; } - PyObject *interned = get_interned_dict(); + PyObject *interned = get_interned_dict(interp); assert(interned != NULL); PyObject *t = PyDict_SetDefault(interned, s, s); @@ -14629,6 +14644,13 @@ PyUnicode_InternInPlace(PyObject **p) _PyUnicode_STATE(s).interned = 1; } +void +PyUnicode_InternInPlace(PyObject **p) +{ + PyInterpreterState *interp = _PyInterpreterState_GET(); + _PyUnicode_InternInPlace(interp, p); +} + // Function kept for the stable ABI. PyAPI_FUNC(void) PyUnicode_InternImmortal(PyObject **); void @@ -14653,12 +14675,7 @@ PyUnicode_InternFromString(const char *cp) void _PyUnicode_ClearInterned(PyInterpreterState *interp) { - if (!_Py_IsMainInterpreter(interp)) { - // interned dict is shared by all interpreters - return; - } - - PyObject *interned = get_interned_dict(); + PyObject *interned = get_interned_dict(interp); if (interned == NULL) { return; } @@ -14693,9 +14710,7 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp) total_length); #endif - PyDict_Clear(interned); - Py_DECREF(interned); - set_interned_dict(NULL); + clear_interned_dict(interp); } @@ -15108,7 +15123,7 @@ _PyUnicode_EnableLegacyWindowsFSEncoding(void) static inline int unicode_is_finalizing(void) { - return (get_interned_dict() == NULL); + return (get_interned_dict(_PyInterpreterState_Main()) == NULL); } #endif @@ -15131,14 +15146,13 @@ _PyUnicode_Fini(PyInterpreterState *interp) { struct _Py_unicode_state *state = &interp->unicode; - if (_Py_IsMainInterpreter(interp)) { - // _PyUnicode_ClearInterned() must be called before _PyUnicode_Fini() - assert(get_interned_dict() == NULL); - // bpo-47182: force a unicodedata CAPI capsule re-import on - // subsequent initialization of main interpreter. - } + // _PyUnicode_ClearInterned() must be called before _PyUnicode_Fini() + assert(get_interned_dict(interp) == NULL); _PyUnicode_FiniEncodings(&state->fs_codec); + + // bpo-47182: force a unicodedata CAPI capsule re-import on + // subsequent initialization of interpreter. interp->unicode.ucnhash_capi = NULL; unicode_clear_identifiers(state); |