diff options
author | Petr Viktorin <encukou@gmail.com> | 2024-06-24 18:24:19 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-06-24 18:24:19 (GMT) |
commit | 9769b7ae064a0546a98cbcbec2561dbaba20cd23 (patch) | |
tree | 1b953e866faca7527099a68daf362e29501a7680 /Objects | |
parent | 447e07ab3d569bb4b2209ccfe3889fafa3ad6693 (diff) | |
download | cpython-9769b7ae064a0546a98cbcbec2561dbaba20cd23.zip cpython-9769b7ae064a0546a98cbcbec2561dbaba20cd23.tar.gz cpython-9769b7ae064a0546a98cbcbec2561dbaba20cd23.tar.bz2 |
[3.13] gh-113993: Allow interned strings to be mortal, and fix related issues (GH-120520) (GH-120945)
* Add an InternalDocs file describing how interning should work and how to use it.
* Add internal functions to *explicitly* request what kind of interning is done:
- `_PyUnicode_InternMortal`
- `_PyUnicode_InternImmortal`
- `_PyUnicode_InternStatic`
* Switch uses of `PyUnicode_InternInPlace` to those.
* Disallow using `_Py_SetImmortal` on strings directly.
You should use `_PyUnicode_InternImmortal` instead:
- Strings should be interned before immortalization, otherwise you're possibly
interning a immortalizing copy.
- `_Py_SetImmortal` doesn't handle the `SSTATE_INTERNED_MORTAL` to
`SSTATE_INTERNED_IMMORTAL` update, and those flags can't be changed in
backports, as they are now part of public API and version-specific ABI.
* Add private `_only_immortal` argument for `sys.getunicodeinternedsize`, used in refleak test machinery.
* Make sure the statically allocated string singletons are unique. This means these sets are now disjoint:
- `_Py_ID`
- `_Py_STR` (including the empty string)
- one-character latin-1 singletons
Now, when you intern a singleton, that exact singleton will be interned.
* Add a `_Py_LATIN1_CHR` macro, use it instead of `_Py_ID`/`_Py_STR` for one-character latin-1 singletons everywhere (including Clinic).
* Intern `_Py_STR` singletons at startup.
* For free-threaded builds, intern `_Py_LATIN1_CHR` singletons at startup.
* Beef up the tests. Cover internal details (marked with `@cpython_only`).
* Add lots of assertions
Co-authored-by: Eric Snow <ericsnowcurrently@gmail.com>
Diffstat (limited to 'Objects')
-rw-r--r-- | Objects/codeobject.c | 14 | ||||
-rw-r--r-- | Objects/dictobject.c | 3 | ||||
-rw-r--r-- | Objects/object.c | 10 | ||||
-rw-r--r-- | Objects/typeobject.c | 10 | ||||
-rw-r--r-- | Objects/unicodeobject.c | 544 |
5 files changed, 454 insertions, 127 deletions
diff --git a/Objects/codeobject.c b/Objects/codeobject.c index 55b512b..7b1244a 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -137,6 +137,7 @@ static PyObject *intern_one_constant(PyObject *op); static int intern_strings(PyObject *tuple) { + PyInterpreterState *interp = _PyInterpreterState_GET(); Py_ssize_t i; for (i = PyTuple_GET_SIZE(tuple); --i >= 0; ) { @@ -146,7 +147,7 @@ intern_strings(PyObject *tuple) "non-string found in code slot"); return -1; } - PyUnicode_InternInPlace(&_PyTuple_ITEMS(tuple)[i]); + _PyUnicode_InternMortal(interp, &_PyTuple_ITEMS(tuple)[i]); } return 0; } @@ -157,12 +158,13 @@ intern_strings(PyObject *tuple) static int intern_constants(PyObject *tuple, int *modified) { + PyInterpreterState *interp = _PyInterpreterState_GET(); for (Py_ssize_t i = PyTuple_GET_SIZE(tuple); --i >= 0; ) { PyObject *v = PyTuple_GET_ITEM(tuple, i); if (PyUnicode_CheckExact(v)) { if (should_intern_string(v)) { PyObject *w = v; - PyUnicode_InternInPlace(&v); + _PyUnicode_InternMortal(interp, &v); if (w != v) { PyTuple_SET_ITEM(tuple, i, v); if (modified) { @@ -458,12 +460,13 @@ init_code(PyCodeObject *co, struct _PyCodeConstructor *con) con->stacksize = 1; } + PyInterpreterState *interp = _PyInterpreterState_GET(); co->co_filename = Py_NewRef(con->filename); co->co_name = Py_NewRef(con->name); co->co_qualname = Py_NewRef(con->qualname); - PyUnicode_InternInPlace(&co->co_filename); - PyUnicode_InternInPlace(&co->co_name); - PyUnicode_InternInPlace(&co->co_qualname); + _PyUnicode_InternMortal(interp, &co->co_filename); + _PyUnicode_InternMortal(interp, &co->co_name); + _PyUnicode_InternMortal(interp, &co->co_qualname); co->co_flags = con->flags; co->co_firstlineno = con->firstlineno; @@ -489,7 +492,6 @@ init_code(PyCodeObject *co, struct _PyCodeConstructor *con) co->co_framesize = nlocalsplus + con->stacksize + FRAME_SPECIALS_SIZE; co->co_ncellvars = ncellvars; co->co_nfreevars = nfreevars; - PyInterpreterState *interp = _PyInterpreterState_GET(); #ifdef Py_GIL_DISABLED PyMutex_Lock(&interp->func_state.mutex); #endif diff --git a/Objects/dictobject.c b/Objects/dictobject.c index c7ea6bf..fa9d8ab 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -4910,7 +4910,8 @@ PyDict_SetItemString(PyObject *v, const char *key, PyObject *item) kv = PyUnicode_FromString(key); if (kv == NULL) return -1; - PyUnicode_InternInPlace(&kv); /* XXX Should we really? */ + PyInterpreterState *interp = _PyInterpreterState_GET(); + _PyUnicode_InternImmortal(interp, &kv); /* XXX Should we really? */ err = PyDict_SetItem(v, kv, item); Py_DECREF(kv); return err; diff --git a/Objects/object.c b/Objects/object.c index 0355303..fcd81b8 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -1326,7 +1326,8 @@ PyObject_SetAttr(PyObject *v, PyObject *name, PyObject *value) } Py_INCREF(name); - PyUnicode_InternInPlace(&name); + PyInterpreterState *interp = _PyInterpreterState_GET(); + _PyUnicode_InternMortal(interp, &name); if (tp->tp_setattro != NULL) { err = (*tp->tp_setattro)(v, name, value); Py_DECREF(name); @@ -2409,6 +2410,13 @@ _Py_NewReferenceNoTotal(PyObject *op) void _Py_SetImmortalUntracked(PyObject *op) { +#ifdef Py_DEBUG + // For strings, use _PyUnicode_InternImmortal instead. + if (PyUnicode_CheckExact(op)) { + assert(PyUnicode_CHECK_INTERNED(op) == SSTATE_INTERNED_IMMORTAL + || PyUnicode_CHECK_INTERNED(op) == SSTATE_INTERNED_IMMORTAL_STATIC); + } +#endif #ifdef Py_GIL_DISABLED op->ob_tid = _Py_UNOWNED_TID; op->ob_ref_local = _Py_IMMORTAL_REFCNT_LOCAL; diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 79085b6..964e0cd 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -1322,8 +1322,10 @@ type_module(PyTypeObject *type) if (s != NULL) { mod = PyUnicode_FromStringAndSize( type->tp_name, (Py_ssize_t)(s - type->tp_name)); - if (mod != NULL) - PyUnicode_InternInPlace(&mod); + if (mod != NULL) { + PyInterpreterState *interp = _PyInterpreterState_GET(); + _PyUnicode_InternMortal(interp, &mod); + } } else { mod = &_Py_ID(builtins); @@ -5490,9 +5492,9 @@ type_setattro(PyObject *self, PyObject *name, PyObject *value) if (name == NULL) return -1; } - /* bpo-40521: Interned strings are shared by all subinterpreters */ if (!PyUnicode_CHECK_INTERNED(name)) { - PyUnicode_InternInPlace(&name); + PyInterpreterState *interp = _PyInterpreterState_GET(); + _PyUnicode_InternMortal(interp, &name); if (!PyUnicode_CHECK_INTERNED(name)) { PyErr_SetString(PyExc_MemoryError, "Out of memory interning an attribute name"); diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 59ca344..b21886e 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -177,10 +177,7 @@ NOTE: In the interpreter's initialization phase, some globals are currently *_to++ = (to_type) *_iter++; \ } while (0) -#define LATIN1(ch) \ - (ch < 128 \ - ? (PyObject*)&_Py_SINGLETON(strings).ascii[ch] \ - : (PyObject*)&_Py_SINGLETON(strings).latin1[ch - 128]) +#define LATIN1 _Py_LATIN1_CHR #ifdef MS_WINDOWS /* On Windows, overallocate by 50% is the best factor */ @@ -215,18 +212,20 @@ static inline PyObject* unicode_get_empty(void) return &_Py_STR(empty); } -/* This dictionary holds all interned unicode strings. Note that references - to strings in this dictionary are *not* counted in the string's ob_refcnt. - When the interned string reaches a refcnt of 0 the string deallocation - function will delete the reference from this dictionary. -*/ +/* This dictionary holds per-interpreter interned strings. + * See InternalDocs/string_interning.md for details. + */ static inline PyObject *get_interned_dict(PyInterpreterState *interp) { return _Py_INTERP_CACHED_OBJECT(interp, interned_strings); } +/* This hashtable holds statically allocated interned strings. + * See InternalDocs/string_interning.md for details. + */ #define INTERNED_STRINGS _PyRuntime.cached_objects.interned_strings +/* Get number of all interned strings for the current interpreter. */ Py_ssize_t _PyUnicode_InternedSize(void) { @@ -234,6 +233,27 @@ _PyUnicode_InternedSize(void) return _Py_hashtable_len(INTERNED_STRINGS) + PyDict_GET_SIZE(dict); } +/* Get number of immortal interned strings for the current interpreter. */ +Py_ssize_t +_PyUnicode_InternedSize_Immortal(void) +{ + PyObject *dict = get_interned_dict(_PyInterpreterState_GET()); + PyObject *key, *value; + Py_ssize_t pos = 0; + Py_ssize_t count = 0; + + // It's tempting to keep a count and avoid a loop here. But, this function + // is intended for refleak tests. It spends extra work to report the true + // value, to help detect bugs in optimizations. + + while (PyDict_Next(dict, &pos, &key, &value)) { + if (_Py_IsImmortal(key)) { + count++; + } + } + return _Py_hashtable_len(INTERNED_STRINGS) + count; +} + static Py_hash_t unicode_hash(PyObject *); static int unicode_compare_eq(PyObject *, PyObject *); @@ -259,20 +279,6 @@ hashtable_unicode_compare(const void *key1, const void *key2) static int init_interned_dict(PyInterpreterState *interp) { - if (_Py_IsMainInterpreter(interp)) { - assert(INTERNED_STRINGS == NULL); - _Py_hashtable_allocator_t hashtable_alloc = {PyMem_RawMalloc, PyMem_RawFree}; - INTERNED_STRINGS = _Py_hashtable_new_full( - hashtable_unicode_hash, - hashtable_unicode_compare, - NULL, - NULL, - &hashtable_alloc - ); - if (INTERNED_STRINGS == NULL) { - return -1; - } - } assert(get_interned_dict(interp) == NULL); PyObject *interned = interned = PyDict_New(); if (interned == NULL) { @@ -291,7 +297,57 @@ clear_interned_dict(PyInterpreterState *interp) Py_DECREF(interned); _Py_INTERP_CACHED_OBJECT(interp, interned_strings) = NULL; } - if (_Py_IsMainInterpreter(interp) && INTERNED_STRINGS != NULL) { +} + +static PyStatus +init_global_interned_strings(PyInterpreterState *interp) +{ + assert(INTERNED_STRINGS == NULL); + _Py_hashtable_allocator_t hashtable_alloc = {PyMem_RawMalloc, PyMem_RawFree}; + + INTERNED_STRINGS = _Py_hashtable_new_full( + hashtable_unicode_hash, + hashtable_unicode_compare, + // Objects stored here are immortal and statically allocated, + // so we don't need key_destroy_func & value_destroy_func: + NULL, + NULL, + &hashtable_alloc + ); + if (INTERNED_STRINGS == NULL) { + PyErr_Clear(); + return _PyStatus_ERR("failed to create global interned dict"); + } + + /* Intern statically allocated string identifiers and deepfreeze strings. + * This must be done before any module initialization so that statically + * allocated string identifiers are used instead of heap allocated strings. + * Deepfreeze uses the interned identifiers if present to save space + * else generates them and they are interned to speed up dict lookups. + */ + _PyUnicode_InitStaticStrings(interp); + +#ifdef Py_GIL_DISABLED +// In the free-threaded build, intern the 1-byte strings as well + for (int i = 0; i < 256; i++) { + PyObject *s = LATIN1(i); + _PyUnicode_InternStatic(interp, &s); + assert(s == LATIN1(i)); + } +#endif +#ifdef Py_DEBUG + assert(_PyUnicode_CheckConsistency(&_Py_STR(empty), 1)); + + for (int i = 0; i < 256; i++) { + assert(_PyUnicode_CheckConsistency(LATIN1(i), 1)); + } +#endif + return _PyStatus_OK(); +} + +static void clear_global_interned_strings(void) +{ + if (INTERNED_STRINGS != NULL) { _Py_hashtable_destroy(INTERNED_STRINGS); INTERNED_STRINGS = NULL; } @@ -624,6 +680,39 @@ _PyUnicode_CheckConsistency(PyObject *op, int check_content) } CHECK(PyUnicode_READ(kind, data, ascii->length) == 0); } + + /* Check interning state */ +#ifdef Py_DEBUG + switch (PyUnicode_CHECK_INTERNED(op)) { + case SSTATE_NOT_INTERNED: + if (ascii->state.statically_allocated) { + CHECK(_Py_IsImmortal(op)); + // This state is for two exceptions: + // - strings are currently checked before they're interned + // - the 256 one-latin1-character strings + // are static but use SSTATE_NOT_INTERNED + } + else { + CHECK(!_Py_IsImmortal(op)); + } + break; + case SSTATE_INTERNED_MORTAL: + CHECK(!ascii->state.statically_allocated); + CHECK(!_Py_IsImmortal(op)); + break; + case SSTATE_INTERNED_IMMORTAL: + CHECK(!ascii->state.statically_allocated); + CHECK(_Py_IsImmortal(op)); + break; + case SSTATE_INTERNED_IMMORTAL_STATIC: + CHECK(ascii->state.statically_allocated); + CHECK(_Py_IsImmortal(op)); + break; + default: + Py_UNREACHABLE(); + } +#endif + return 1; #undef CHECK @@ -1580,16 +1669,74 @@ unicode_dealloc(PyObject *unicode) _Py_FatalRefcountError("deallocating an Unicode singleton"); } #endif - /* This should never get called, but we also don't want to SEGV if - * we accidentally decref an immortal string out of existence. Since - * the string is an immortal object, just re-set the reference count. - */ - if (PyUnicode_CHECK_INTERNED(unicode) - || _PyUnicode_STATE(unicode).statically_allocated) - { + if (_PyUnicode_STATE(unicode).statically_allocated) { + /* This should never get called, but we also don't want to SEGV if + * we accidentally decref an immortal string out of existence. Since + * the string is an immortal object, just re-set the reference count. + */ +#ifdef Py_DEBUG + Py_UNREACHABLE(); +#endif _Py_SetImmortal(unicode); return; } + switch (_PyUnicode_STATE(unicode).interned) { + case SSTATE_NOT_INTERNED: + break; + case SSTATE_INTERNED_MORTAL: + /* Remove the object from the intern dict. + * Before doing so, we set the refcount to 2: the key and value + * in the interned_dict. + */ + assert(Py_REFCNT(unicode) == 0); + Py_SET_REFCNT(unicode, 2); +#ifdef Py_REF_DEBUG + /* let's be pedantic with the ref total */ + _Py_IncRefTotal(_PyThreadState_GET()); + _Py_IncRefTotal(_PyThreadState_GET()); +#endif + PyInterpreterState *interp = _PyInterpreterState_GET(); + PyObject *interned = get_interned_dict(interp); + assert(interned != NULL); + PyObject *popped; + int r = PyDict_Pop(interned, unicode, &popped); + if (r == -1) { + PyErr_WriteUnraisable(unicode); + // We don't know what happened to the string. It's probably + // best to leak it: + // - if it was popped, there are no more references to it + // so it can't cause trouble (except wasted memory) + // - if it wasn't popped, it'll remain interned + _Py_SetImmortal(unicode); + _PyUnicode_STATE(unicode).interned = SSTATE_INTERNED_IMMORTAL; + return; + } + if (r == 0) { + // The interned string was not found in the interned_dict. +#ifdef Py_DEBUG + Py_UNREACHABLE(); +#endif + _Py_SetImmortal(unicode); + return; + } + // Successfully popped. + assert(popped == unicode); + // Only our `popped` reference should be left; remove it too. + assert(Py_REFCNT(unicode) == 1); + Py_SET_REFCNT(unicode, 0); +#ifdef Py_REF_DEBUG + /* let's be pedantic with the ref total */ + _Py_DecRefTotal(_PyThreadState_GET()); +#endif + break; + default: + // As with `statically_allocated` above. +#ifdef Py_REF_DEBUG + Py_UNREACHABLE(); +#endif + _Py_SetImmortal(unicode); + return; + } if (_PyUnicode_HAS_UTF8_MEMORY(unicode)) { PyMem_Free(_PyUnicode_UTF8(unicode)); } @@ -1934,7 +2081,7 @@ _PyUnicode_FromId(_Py_Identifier *id) if (!obj) { goto end; } - PyUnicode_InternInPlace(&obj); + _PyUnicode_InternImmortal(interp, &obj); if (index >= ids->size) { // Overallocate to reduce the number of realloc @@ -10816,8 +10963,10 @@ _PyUnicode_EqualToASCIIId(PyObject *left, _Py_Identifier *right) if (left == right_uni) return 1; - if (PyUnicode_CHECK_INTERNED(left)) + assert(PyUnicode_CHECK_INTERNED(right_uni)); + if (PyUnicode_CHECK_INTERNED(left)) { return 0; + } Py_hash_t right_hash = FT_ATOMIC_LOAD_SSIZE_RELAXED(_PyUnicode_HASH(right_uni)); assert(right_hash != -1); @@ -14856,30 +15005,19 @@ _PyUnicode_InitState(PyInterpreterState *interp) PyStatus _PyUnicode_InitGlobalObjects(PyInterpreterState *interp) { - // Initialize the global interned dict + if (_Py_IsMainInterpreter(interp)) { + PyStatus status = init_global_interned_strings(interp); + if (_PyStatus_EXCEPTION(status)) { + return status; + } + } + assert(INTERNED_STRINGS); + if (init_interned_dict(interp)) { PyErr_Clear(); return _PyStatus_ERR("failed to create interned dict"); } - if (_Py_IsMainInterpreter(interp)) { - /* Intern statically allocated string identifiers and deepfreeze strings. - * This must be done before any module initialization so that statically - * allocated string identifiers are used instead of heap allocated strings. - * Deepfreeze uses the interned identifiers if present to save space - * else generates them and they are interned to speed up dict lookups. - */ - _PyUnicode_InitStaticStrings(interp); - -#ifdef Py_DEBUG - assert(_PyUnicode_CheckConsistency(&_Py_STR(empty), 1)); - - for (int i = 0; i < 256; i++) { - assert(_PyUnicode_CheckConsistency(LATIN1(i), 1)); - } -#endif - } - return _PyStatus_OK(); } @@ -14902,106 +15040,267 @@ error: return _PyStatus_ERR("Can't initialize unicode types"); } +static /* non-null */ PyObject* +intern_static(PyInterpreterState *interp, PyObject *s /* stolen */) +{ + // Note that this steals a reference to `s`, but in many cases that + // stolen ref is returned, requiring no decref/incref. + + assert(s != NULL); + assert(_PyUnicode_CHECK(s)); + assert(_PyUnicode_STATE(s).statically_allocated); + assert(_Py_IsImmortal(s)); + + switch (PyUnicode_CHECK_INTERNED(s)) { + case SSTATE_NOT_INTERNED: + break; + case SSTATE_INTERNED_IMMORTAL_STATIC: + return s; + default: + Py_FatalError("_PyUnicode_InternStatic called on wrong string"); + } + +#ifdef Py_DEBUG + /* We must not add process-global interned string if there's already a + * per-interpreter interned_dict, which might contain duplicates. + * Except "short string" singletons: those are special-cased. */ + PyObject *interned = get_interned_dict(interp); + assert(interned == NULL || unicode_is_singleton(s)); +#ifdef Py_GIL_DISABLED + // In the free-threaded build, don't allow even the short strings. + assert(interned == NULL); +#endif +#endif + + /* Look in the global cache first. */ + PyObject *r = (PyObject *)_Py_hashtable_get(INTERNED_STRINGS, s); + /* We should only init each string once */ + assert(r == NULL); + /* but just in case (for the non-debug build), handle this */ + if (r != NULL && r != s) { + assert(_PyUnicode_STATE(r).interned == SSTATE_INTERNED_IMMORTAL_STATIC); + assert(_PyUnicode_CHECK(r)); + Py_DECREF(s); + return Py_NewRef(r); + } + + if (_Py_hashtable_set(INTERNED_STRINGS, s, s) < -1) { + Py_FatalError("failed to intern static string"); + } + + _PyUnicode_STATE(s).interned = SSTATE_INTERNED_IMMORTAL_STATIC; + return s; +} void -_PyUnicode_InternInPlace(PyInterpreterState *interp, PyObject **p) +_PyUnicode_InternStatic(PyInterpreterState *interp, PyObject **p) { - PyObject *s = *p; + // This should only be called as part of runtime initialization + assert(!Py_IsInitialized()); + + *p = intern_static(interp, *p); + assert(*p); +} + +static void +immortalize_interned(PyObject *s) +{ + assert(PyUnicode_CHECK_INTERNED(s) == SSTATE_INTERNED_MORTAL); + assert(!_Py_IsImmortal(s)); +#ifdef Py_REF_DEBUG + /* The reference count value should be excluded from the RefTotal. + The decrements to these objects will not be registered so they + need to be accounted for in here. */ + for (Py_ssize_t i = 0; i < Py_REFCNT(s); i++) { + _Py_DecRefTotal(_PyThreadState_GET()); + } +#endif + _PyUnicode_STATE(s).interned = SSTATE_INTERNED_IMMORTAL; + _Py_SetImmortal(s); +} + +static /* non-null */ PyObject* +intern_common(PyInterpreterState *interp, PyObject *s /* stolen */, + bool immortalize) +{ + // Note that this steals a reference to `s`, but in many cases that + // stolen ref is returned, requiring no decref/incref. + #ifdef Py_DEBUG assert(s != NULL); assert(_PyUnicode_CHECK(s)); #else if (s == NULL || !PyUnicode_Check(s)) { - return; + return s; } #endif /* If it's a subclass, we don't really know what putting it in the interned dict might do. */ if (!PyUnicode_CheckExact(s)) { - return; + return s; } - if (PyUnicode_CHECK_INTERNED(s)) { - return; + /* Handle statically allocated strings. */ + if (_PyUnicode_STATE(s).statically_allocated) { + return intern_static(interp, s); } - /* Look in the global cache first. */ - PyObject *r = (PyObject *)_Py_hashtable_get(INTERNED_STRINGS, s); - if (r != NULL && r != s) { - Py_SETREF(*p, Py_NewRef(r)); - return; + /* Is it already interned? */ + switch (PyUnicode_CHECK_INTERNED(s)) { + case SSTATE_NOT_INTERNED: + // no, go on + break; + case SSTATE_INTERNED_MORTAL: + // yes but we might need to make it immortal + if (immortalize) { + immortalize_interned(s); + } + return s; + default: + // all done + return s; } - /* Handle statically allocated strings. */ - if (_PyUnicode_STATE(s).statically_allocated) { - assert(_Py_IsImmortal(s)); - if (_Py_hashtable_set(INTERNED_STRINGS, s, s) == 0) { - _PyUnicode_STATE(*p).interned = SSTATE_INTERNED_IMMORTAL_STATIC; +#if Py_GIL_DISABLED + /* In the free-threaded build, all interned strings are immortal */ + immortalize = 1; +#endif + + /* If it's already immortal, intern it as such */ + if (_Py_IsImmortal(s)) { + immortalize = 1; + } + + /* if it's a short string, get the singleton -- and intern it */ + if (PyUnicode_GET_LENGTH(s) == 1 && + PyUnicode_KIND(s) == PyUnicode_1BYTE_KIND) { + PyObject *r = LATIN1(*(unsigned char*)PyUnicode_DATA(s)); + if (!PyUnicode_CHECK_INTERNED(r)) { + r = intern_static(interp, r); } - return; + Py_DECREF(s); + return r; } +#ifdef Py_DEBUG + assert(!unicode_is_singleton(s)); +#endif - /* Look in the per-interpreter cache. */ + /* Look in the global cache now. */ + { + PyObject *r = (PyObject *)_Py_hashtable_get(INTERNED_STRINGS, s); + if (r != NULL) { + assert(_Py_IsImmortal(r)); + assert(r != s); // r must be statically_allocated; s is not + Py_DECREF(s); + return Py_NewRef(r); + } + } + + /* Do a setdefault on the per-interpreter cache. */ PyObject *interned = get_interned_dict(interp); assert(interned != NULL); PyObject *t; - int res = PyDict_SetDefaultRef(interned, s, s, &t); - if (res < 0) { - PyErr_Clear(); - return; - } - else if (res == 1) { - // value was already present (not inserted) - Py_SETREF(*p, t); - return; + { + int res = PyDict_SetDefaultRef(interned, s, s, &t); + if (res < 0) { + PyErr_Clear(); + return s; + } + else if (res == 1) { + // value was already present (not inserted) + Py_DECREF(s); + if (immortalize && + PyUnicode_CHECK_INTERNED(t) == SSTATE_INTERNED_MORTAL) { + immortalize_interned(t); + } + return t; + } + else { + // value was newly inserted + assert (s == t); + Py_DECREF(t); + } } - Py_DECREF(t); - if (_Py_IsImmortal(s)) { - // XXX Restrict this to the main interpreter? - _PyUnicode_STATE(*p).interned = SSTATE_INTERNED_IMMORTAL_STATIC; - return; - } + /* NOT_INTERNED -> INTERNED_MORTAL */ + + assert(_PyUnicode_STATE(s).interned == SSTATE_NOT_INTERNED); + if (!_Py_IsImmortal(s)) { + /* The two references in interned dict (key and value) are not counted. + unicode_dealloc() and _PyUnicode_ClearInterned() take care of this. */ + Py_SET_REFCNT(s, Py_REFCNT(s) - 2); #ifdef Py_REF_DEBUG - /* The reference count value excluding the 2 references from the - interned dictionary should be excluded from the RefTotal. The - decrements to these objects will not be registered so they - need to be accounted for in here. */ - for (Py_ssize_t i = 0; i < Py_REFCNT(s) - 2; i++) { + /* let's be pedantic with the ref total */ _Py_DecRefTotal(_PyThreadState_GET()); + _Py_DecRefTotal(_PyThreadState_GET()); +#endif + } + _PyUnicode_STATE(s).interned = SSTATE_INTERNED_MORTAL; + + /* INTERNED_MORTAL -> INTERNED_IMMORTAL (if needed) */ + +#ifdef Py_DEBUG + if (_Py_IsImmortal(s)) { + assert(immortalize); } #endif - _Py_SetImmortal(s); - _PyUnicode_STATE(*p).interned = SSTATE_INTERNED_IMMORTAL; + if (immortalize) { + immortalize_interned(s); + } + + return s; +} + +void +_PyUnicode_InternImmortal(PyInterpreterState *interp, PyObject **p) +{ + *p = intern_common(interp, *p, 1); + assert(*p); +} + +void +_PyUnicode_InternMortal(PyInterpreterState *interp, PyObject **p) +{ + *p = intern_common(interp, *p, 0); + assert(*p); +} + + +void +_PyUnicode_InternInPlace(PyInterpreterState *interp, PyObject **p) +{ + _PyUnicode_InternImmortal(interp, p); + return; } void PyUnicode_InternInPlace(PyObject **p) { PyInterpreterState *interp = _PyInterpreterState_GET(); - _PyUnicode_InternInPlace(interp, p); + _PyUnicode_InternImmortal(interp, p); } -// Function kept for the stable ABI. +// Public-looking name kept for the stable ABI; user should not call this: PyAPI_FUNC(void) PyUnicode_InternImmortal(PyObject **); void PyUnicode_InternImmortal(PyObject **p) { - PyUnicode_InternInPlace(p); - // Leak a reference on purpose - Py_INCREF(*p); + PyInterpreterState *interp = _PyInterpreterState_GET(); + _PyUnicode_InternImmortal(interp, p); } PyObject * PyUnicode_InternFromString(const char *cp) { PyObject *s = PyUnicode_FromString(cp); - if (s == NULL) + if (s == NULL) { return NULL; - PyUnicode_InternInPlace(&s); + } + PyInterpreterState *interp = _PyInterpreterState_GET(); + _PyUnicode_InternMortal(interp, &s); return s; } @@ -15015,20 +15314,6 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp) } assert(PyDict_CheckExact(interned)); - /* TODO: - * Currently, the runtime is not able to guarantee that it can exit without - * allocations that carry over to a future initialization of Python within - * the same process. i.e: - * ./python -X showrefcount -c 'import itertools' - * [237 refs, 237 blocks] - * - * Therefore, this should remain disabled for until there is a strict guarantee - * that no memory will be left after `Py_Finalize`. - */ -#ifdef Py_DEBUG - /* For all non-singleton interned strings, restore the two valid references - to that instance from within the intern string dictionary and let the - normal reference counting process clean up these instances. */ #ifdef INTERNED_STATS fprintf(stderr, "releasing %zd interned strings\n", PyDict_GET_SIZE(interned)); @@ -15042,13 +15327,32 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp) int shared = 0; switch (PyUnicode_CHECK_INTERNED(s)) { case SSTATE_INTERNED_IMMORTAL: + /* Make immortal interned strings mortal again. + * + * Currently, the runtime is not able to guarantee that it can exit + * without allocations that carry over to a future initialization + * of Python within the same process. i.e: + * ./python -X showrefcount -c 'import itertools' + * [237 refs, 237 blocks] + * + * This should remain disabled (`Py_DEBUG` only) until there is a + * strict guarantee that no memory will be left after + * `Py_Finalize`. + */ +#ifdef Py_DEBUG // Skip the Immortal Instance check and restore // the two references (key and value) ignored // by PyUnicode_InternInPlace(). _Py_SetMortal(s, 2); +#ifdef Py_REF_DEBUG + /* let's be pedantic with the ref total */ + _Py_IncRefTotal(_PyThreadState_GET()); + _Py_IncRefTotal(_PyThreadState_GET()); +#endif #ifdef INTERNED_STATS total_length += PyUnicode_GET_LENGTH(s); #endif +#endif // Py_DEBUG break; case SSTATE_INTERNED_IMMORTAL_STATIC: /* It is shared between interpreters, so we should unmark it @@ -15061,7 +15365,15 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp) } break; case SSTATE_INTERNED_MORTAL: - /* fall through */ + // Restore 2 references held by the interned dict; these will + // be decref'd by clear_interned_dict's PyDict_Clear. + Py_SET_REFCNT(s, Py_REFCNT(s) + 2); +#ifdef Py_REF_DEBUG + /* let's be pedantic with the ref total */ + _Py_IncRefTotal(_PyThreadState_GET()); + _Py_IncRefTotal(_PyThreadState_GET()); +#endif + break; case SSTATE_NOT_INTERNED: /* fall through */ default: @@ -15082,8 +15394,10 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp) for (Py_ssize_t i=0; i < ids->size; i++) { Py_XINCREF(ids->array[i]); } -#endif /* Py_DEBUG */ clear_interned_dict(interp); + if (_Py_IsMainInterpreter(interp)) { + clear_global_interned_strings(); + } } |