From 059b5baf98c9503d9d59c79fba117826caa5a3e1 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Fri, 13 May 2022 13:40:22 +0200 Subject: gh-85858: Remove PyUnicode_InternImmortal() function (#92579) Remove the PyUnicode_InternImmortal() function and the SSTATE_INTERNED_IMMORTAL macro. The PyUnicode_InternImmortal() function is still exported in the stable ABI. The function is removed from the API. PyASCIIObject.state.interned size is now a single bit, rather than 2 bits. Keep SSTATE_NOT_INTERNED and SSTATE_INTERNED_MORTAL macros for backward compatibility, but no longer use them internally since the interned member is now a single bit and so can only have two values (interned or not interned). Update stats of _PyUnicode_ClearInterned(). --- Doc/data/stable_abi.dat | 1 - Doc/whatsnew/3.12.rst | 4 ++ Include/cpython/unicodeobject.h | 13 +--- Include/unicodeobject.h | 4 -- .../2022-05-09-23-16-38.gh-issue-85858.VIcNDL.rst | 2 + Misc/stable_abi.toml | 1 + Objects/unicodeobject.c | 69 ++++++---------------- 7 files changed, 27 insertions(+), 67 deletions(-) create mode 100644 Misc/NEWS.d/next/C API/2022-05-09-23-16-38.gh-issue-85858.VIcNDL.rst diff --git a/Doc/data/stable_abi.dat b/Doc/data/stable_abi.dat index 3486f33..3912a7c 100644 --- a/Doc/data/stable_abi.dat +++ b/Doc/data/stable_abi.dat @@ -762,7 +762,6 @@ function,PyUnicode_FromWideChar,3.2,, function,PyUnicode_GetDefaultEncoding,3.2,, function,PyUnicode_GetLength,3.7,, function,PyUnicode_InternFromString,3.2,, -function,PyUnicode_InternImmortal,3.2,, function,PyUnicode_InternInPlace,3.2,, function,PyUnicode_IsIdentifier,3.2,, function,PyUnicode_Join,3.2,, diff --git a/Doc/whatsnew/3.12.rst b/Doc/whatsnew/3.12.rst index d5017c0..70b26ba 100644 --- a/Doc/whatsnew/3.12.rst +++ b/Doc/whatsnew/3.12.rst @@ -174,3 +174,7 @@ Removed * :c:func:`PyUnicode_GET_SIZE` * :c:func:`PyUnicode_GetSize` * :c:func:`PyUnicode_GET_DATA_SIZE` + +* Remove the ``PyUnicode_InternImmortal()`` function and the + ``SSTATE_INTERNED_IMMORTAL`` macro. + (Contributed by Victor Stinner in :gh:`85858`.) diff --git a/Include/cpython/unicodeobject.h b/Include/cpython/unicodeobject.h index 16db2cb..37bb13c 100644 --- a/Include/cpython/unicodeobject.h +++ b/Include/cpython/unicodeobject.h @@ -98,15 +98,9 @@ typedef struct { Py_ssize_t length; /* Number of code points in the string */ Py_hash_t hash; /* Hash value; -1 if not set */ struct { - /* - SSTATE_NOT_INTERNED (0) - SSTATE_INTERNED_MORTAL (1) - SSTATE_INTERNED_IMMORTAL (2) - - If interned != SSTATE_NOT_INTERNED, the two references from the - dictionary to this object are *not* counted in ob_refcnt. - */ - unsigned int interned:2; + /* If interned is set, the two references from the + dictionary to this object are *not* counted in ob_refcnt. */ + unsigned int interned:1; /* Character size: - PyUnicode_1BYTE_KIND (1): @@ -189,7 +183,6 @@ PyAPI_FUNC(int) _PyUnicode_CheckConsistency( /* Interning state. */ #define SSTATE_NOT_INTERNED 0 #define SSTATE_INTERNED_MORTAL 1 -#define SSTATE_INTERNED_IMMORTAL 2 /* Use only if you know it's a string */ static inline unsigned int PyUnicode_CHECK_INTERNED(PyObject *op) { diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h index f71f379..ed3e8d2 100644 --- a/Include/unicodeobject.h +++ b/Include/unicodeobject.h @@ -256,10 +256,6 @@ PyAPI_FUNC(PyObject *) PyUnicode_InternFromString( const char *u /* UTF-8 encoded string */ ); -// PyUnicode_InternImmortal() is deprecated since Python 3.10 -// and will be removed in Python 3.12. Use PyUnicode_InternInPlace() instead. -Py_DEPRECATED(3.10) PyAPI_FUNC(void) PyUnicode_InternImmortal(PyObject **); - /* --- wchar_t support for platforms which support it --------------------- */ #ifdef HAVE_WCHAR_H diff --git a/Misc/NEWS.d/next/C API/2022-05-09-23-16-38.gh-issue-85858.VIcNDL.rst b/Misc/NEWS.d/next/C API/2022-05-09-23-16-38.gh-issue-85858.VIcNDL.rst new file mode 100644 index 0000000..c175d1e --- /dev/null +++ b/Misc/NEWS.d/next/C API/2022-05-09-23-16-38.gh-issue-85858.VIcNDL.rst @@ -0,0 +1,2 @@ +Remove the ``PyUnicode_InternImmortal()`` function and the +``SSTATE_INTERNED_IMMORTAL`` macro. Patch by Victor Stinner. diff --git a/Misc/stable_abi.toml b/Misc/stable_abi.toml index 07cce36..d848f18 100644 --- a/Misc/stable_abi.toml +++ b/Misc/stable_abi.toml @@ -1563,6 +1563,7 @@ added = '3.2' [function.PyUnicode_InternImmortal] added = '3.2' + abi_only = true [function.PyUnicode_InternInPlace] added = '3.2' [data.PyUnicode_Type] diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index ee32757..e935829 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -1516,13 +1516,8 @@ unicode_dealloc(PyObject *unicode) } #endif - switch (PyUnicode_CHECK_INTERNED(unicode)) { - case SSTATE_NOT_INTERNED: - break; - - case SSTATE_INTERNED_MORTAL: - { #ifdef INTERNED_STRINGS + if (PyUnicode_CHECK_INTERNED(unicode)) { /* Revive the dead object temporarily. PyDict_DelItem() removes two references (key and value) which were ignored by PyUnicode_InternInPlace(). Use refcnt=3 rather than refcnt=2 @@ -1536,17 +1531,8 @@ unicode_dealloc(PyObject *unicode) } assert(Py_REFCNT(unicode) == 1); Py_SET_REFCNT(unicode, 0); -#endif - break; - } - - case SSTATE_INTERNED_IMMORTAL: - _PyObject_ASSERT_FAILED_MSG(unicode, "Immortal interned string died"); - break; - - default: - Py_UNREACHABLE(); } +#endif if (_PyUnicode_HAS_UTF8_MEMORY(unicode)) { PyObject_Free(_PyUnicode_UTF8(unicode)); @@ -14674,7 +14660,7 @@ PyUnicode_InternInPlace(PyObject **p) refcnt. unicode_dealloc() and _PyUnicode_ClearInterned() take care of this. */ Py_SET_REFCNT(s, Py_REFCNT(s) - 2); - _PyUnicode_STATE(s).interned = SSTATE_INTERNED_MORTAL; + _PyUnicode_STATE(s).interned = 1; #else // PyDict expects that interned strings have their hash // (PyASCIIObject.hash) already computed. @@ -14682,23 +14668,14 @@ PyUnicode_InternInPlace(PyObject **p) #endif } +// Function kept for the stable ABI. +PyAPI_FUNC(void) PyUnicode_InternImmortal(PyObject **); void PyUnicode_InternImmortal(PyObject **p) { - if (PyErr_WarnEx(PyExc_DeprecationWarning, - "PyUnicode_InternImmortal() is deprecated; " - "use PyUnicode_InternInPlace() instead", 1) < 0) - { - // The function has no return value, the exception cannot - // be reported to the caller, so just log it. - PyErr_WriteUnraisable(NULL); - } - PyUnicode_InternInPlace(p); - if (PyUnicode_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) { - _PyUnicode_STATE(*p).interned = SSTATE_INTERNED_IMMORTAL; - Py_INCREF(*p); - } + // Leak a reference on purpose + Py_INCREF(*p); } PyObject * @@ -14733,37 +14710,25 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp) fprintf(stderr, "releasing %zd interned strings\n", PyDict_GET_SIZE(interned)); - Py_ssize_t immortal_size = 0, mortal_size = 0; + Py_ssize_t total_length = 0; #endif Py_ssize_t pos = 0; PyObject *s, *ignored_value; while (PyDict_Next(interned, &pos, &s, &ignored_value)) { - switch (PyUnicode_CHECK_INTERNED(s)) { - case SSTATE_INTERNED_IMMORTAL: - Py_SET_REFCNT(s, Py_REFCNT(s) + 1); -#ifdef INTERNED_STATS - immortal_size += PyUnicode_GET_LENGTH(s); -#endif - break; - case SSTATE_INTERNED_MORTAL: - // Restore the two references (key and value) ignored - // by PyUnicode_InternInPlace(). - Py_SET_REFCNT(s, Py_REFCNT(s) + 2); + assert(PyUnicode_CHECK_INTERNED(s)); + // Restore the two references (key and value) ignored + // by PyUnicode_InternInPlace(). + Py_SET_REFCNT(s, Py_REFCNT(s) + 2); #ifdef INTERNED_STATS - mortal_size += PyUnicode_GET_LENGTH(s); + total_length += PyUnicode_GET_LENGTH(s); #endif - break; - case SSTATE_NOT_INTERNED: - /* fall through */ - default: - Py_UNREACHABLE(); - } - _PyUnicode_STATE(s).interned = SSTATE_NOT_INTERNED; + + _PyUnicode_STATE(s).interned = 0; } #ifdef INTERNED_STATS fprintf(stderr, - "total size of all interned strings: %zd/%zd mortal/immortal\n", - mortal_size, immortal_size); + "total length of all interned strings: %zd characters\n", + total_length); #endif PyDict_Clear(interned); -- cgit v0.12