summaryrefslogtreecommitdiffstats
path: root/Objects/unicodeobject.c
diff options
context:
space:
mode:
authorEric Snow <ericsnowcurrently@gmail.com>2023-03-28 18:52:28 (GMT)
committerGitHub <noreply@github.com>2023-03-28 18:52:28 (GMT)
commitba65a065cf07a7a9f53be61057a090f7311a5ad7 (patch)
tree279e23bf7b8b1cfbf9a1a976a086ca129bbbe5cf /Objects/unicodeobject.c
parent7703def37e4fa7d25c3d23756de8f527daa4e165 (diff)
downloadcpython-ba65a065cf07a7a9f53be61057a090f7311a5ad7.zip
cpython-ba65a065cf07a7a9f53be61057a090f7311a5ad7.tar.gz
cpython-ba65a065cf07a7a9f53be61057a090f7311a5ad7.tar.bz2
gh-100227: Move the Dict of Interned Strings to PyInterpreterState (gh-102339)
We can revisit the options for keeping it global later, if desired. For now the approach seems quite complex, so we've gone with the simpler isolation solution in the meantime. https://github.com/python/cpython/issues/100227
Diffstat (limited to 'Objects/unicodeobject.c')
-rw-r--r--Objects/unicodeobject.c98
1 files changed, 56 insertions, 42 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index b9fb531..85e5ae7 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -231,14 +231,32 @@ static inline PyObject* unicode_new_empty(void)
Another way to look at this is that to say that the actual reference
count of a string is: s->ob_refcnt + (s->state ? 2 : 0)
*/
-static inline PyObject *get_interned_dict(void)
+static inline PyObject *get_interned_dict(PyInterpreterState *interp)
{
- return _Py_CACHED_OBJECT(interned_strings);
+ return _Py_INTERP_CACHED_OBJECT(interp, interned_strings);
}
-static inline void set_interned_dict(PyObject *dict)
+static int
+init_interned_dict(PyInterpreterState *interp)
+{
+ assert(get_interned_dict(interp) == NULL);
+ PyObject *interned = interned = PyDict_New();
+ if (interned == NULL) {
+ return -1;
+ }
+ _Py_INTERP_CACHED_OBJECT(interp, interned_strings) = interned;
+ return 0;
+}
+
+static void
+clear_interned_dict(PyInterpreterState *interp)
{
- _Py_CACHED_OBJECT(interned_strings) = dict;
+ PyObject *interned = get_interned_dict(interp);
+ if (interned != NULL) {
+ PyDict_Clear(interned);
+ Py_DECREF(interned);
+ _Py_INTERP_CACHED_OBJECT(interp, interned_strings) = NULL;
+ }
}
#define _Py_RETURN_UNICODE_EMPTY() \
@@ -1520,12 +1538,12 @@ find_maxchar_surrogates(const wchar_t *begin, const wchar_t *end,
static void
unicode_dealloc(PyObject *unicode)
{
+ PyInterpreterState *interp = _PyInterpreterState_GET();
#ifdef Py_DEBUG
if (!unicode_is_finalizing() && unicode_is_singleton(unicode)) {
_Py_FatalRefcountError("deallocating an Unicode singleton");
}
#endif
- PyObject *interned = get_interned_dict();
if (PyUnicode_CHECK_INTERNED(unicode)) {
/* Revive the dead object temporarily. PyDict_DelItem() removes two
references (key and value) which were ignored by
@@ -1534,6 +1552,8 @@ unicode_dealloc(PyObject *unicode)
PyDict_DelItem(). */
assert(Py_REFCNT(unicode) == 0);
Py_SET_REFCNT(unicode, 3);
+ PyObject *interned = get_interned_dict(interp);
+ assert(interned != NULL);
if (PyDict_DelItem(interned, unicode) != 0) {
_PyErr_WriteUnraisableMsg("deletion of interned string failed",
NULL);
@@ -14529,34 +14549,29 @@ _PyUnicode_InitState(PyInterpreterState *interp)
PyStatus
_PyUnicode_InitGlobalObjects(PyInterpreterState *interp)
{
- if (!_Py_IsMainInterpreter(interp)) {
- return _PyStatus_OK();
- }
-
// Initialize the global interned dict
- PyObject *interned = PyDict_New();
- if (interned == NULL) {
+ if (init_interned_dict(interp)) {
PyErr_Clear();
return _PyStatus_ERR("failed to create interned dict");
}
- set_interned_dict(interned);
-
- /* Intern statically allocated string identifiers and deepfreeze strings.
- * This must be done before any module initialization so that statically
- * allocated string identifiers are used instead of heap allocated strings.
- * Deepfreeze uses the interned identifiers if present to save space
- * else generates them and they are interned to speed up dict lookups.
- */
- _PyUnicode_InitStaticStrings();
+ if (_Py_IsMainInterpreter(interp)) {
+ /* Intern statically allocated string identifiers and deepfreeze strings.
+ * This must be done before any module initialization so that statically
+ * allocated string identifiers are used instead of heap allocated strings.
+ * Deepfreeze uses the interned identifiers if present to save space
+ * else generates them and they are interned to speed up dict lookups.
+ */
+ _PyUnicode_InitStaticStrings(interp);
#ifdef Py_DEBUG
- assert(_PyUnicode_CheckConsistency(&_Py_STR(empty), 1));
+ assert(_PyUnicode_CheckConsistency(&_Py_STR(empty), 1));
- for (int i = 0; i < 256; i++) {
- assert(_PyUnicode_CheckConsistency(LATIN1(i), 1));
- }
+ for (int i = 0; i < 256; i++) {
+ assert(_PyUnicode_CheckConsistency(LATIN1(i), 1));
+ }
#endif
+ }
return _PyStatus_OK();
}
@@ -14586,7 +14601,7 @@ error:
void
-PyUnicode_InternInPlace(PyObject **p)
+_PyUnicode_InternInPlace(PyInterpreterState *interp, PyObject **p)
{
PyObject *s = *p;
#ifdef Py_DEBUG
@@ -14608,7 +14623,7 @@ PyUnicode_InternInPlace(PyObject **p)
return;
}
- PyObject *interned = get_interned_dict();
+ PyObject *interned = get_interned_dict(interp);
assert(interned != NULL);
PyObject *t = PyDict_SetDefault(interned, s, s);
@@ -14629,6 +14644,13 @@ PyUnicode_InternInPlace(PyObject **p)
_PyUnicode_STATE(s).interned = 1;
}
+void
+PyUnicode_InternInPlace(PyObject **p)
+{
+ PyInterpreterState *interp = _PyInterpreterState_GET();
+ _PyUnicode_InternInPlace(interp, p);
+}
+
// Function kept for the stable ABI.
PyAPI_FUNC(void) PyUnicode_InternImmortal(PyObject **);
void
@@ -14653,12 +14675,7 @@ PyUnicode_InternFromString(const char *cp)
void
_PyUnicode_ClearInterned(PyInterpreterState *interp)
{
- if (!_Py_IsMainInterpreter(interp)) {
- // interned dict is shared by all interpreters
- return;
- }
-
- PyObject *interned = get_interned_dict();
+ PyObject *interned = get_interned_dict(interp);
if (interned == NULL) {
return;
}
@@ -14693,9 +14710,7 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp)
total_length);
#endif
- PyDict_Clear(interned);
- Py_DECREF(interned);
- set_interned_dict(NULL);
+ clear_interned_dict(interp);
}
@@ -15108,7 +15123,7 @@ _PyUnicode_EnableLegacyWindowsFSEncoding(void)
static inline int
unicode_is_finalizing(void)
{
- return (get_interned_dict() == NULL);
+ return (get_interned_dict(_PyInterpreterState_Main()) == NULL);
}
#endif
@@ -15131,14 +15146,13 @@ _PyUnicode_Fini(PyInterpreterState *interp)
{
struct _Py_unicode_state *state = &interp->unicode;
- if (_Py_IsMainInterpreter(interp)) {
- // _PyUnicode_ClearInterned() must be called before _PyUnicode_Fini()
- assert(get_interned_dict() == NULL);
- // bpo-47182: force a unicodedata CAPI capsule re-import on
- // subsequent initialization of main interpreter.
- }
+ // _PyUnicode_ClearInterned() must be called before _PyUnicode_Fini()
+ assert(get_interned_dict(interp) == NULL);
_PyUnicode_FiniEncodings(&state->fs_codec);
+
+ // bpo-47182: force a unicodedata CAPI capsule re-import on
+ // subsequent initialization of interpreter.
interp->unicode.ucnhash_capi = NULL;
unicode_clear_identifiers(state);