diff options
author | Eddie Elizondo <eduardo.elizondorueda@gmail.com> | 2023-04-22 19:39:37 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-04-22 19:39:37 (GMT) |
commit | ea2c0016502472aa8baa3149050ada776d17a009 (patch) | |
tree | e9e3935a9b71a1f88ac18c24fe512d199880ff90 /Objects | |
parent | 916de04fd1838530096336aadb3b94b774ed6c90 (diff) | |
download | cpython-ea2c0016502472aa8baa3149050ada776d17a009.zip cpython-ea2c0016502472aa8baa3149050ada776d17a009.tar.gz cpython-ea2c0016502472aa8baa3149050ada776d17a009.tar.bz2 |
gh-84436: Implement Immortal Objects (gh-19474)
This is the implementation of PEP683
Motivation:
The PR introduces the ability to immortalize instances in CPython which bypasses reference counting. Tagging objects as immortal allows up to skip certain operations when we know that the object will be around for the entire execution of the runtime.
Note that this by itself will bring a performance regression to the runtime due to the extra reference count checks. However, this brings the ability of having truly immutable objects that are useful in other contexts such as immutable data sharing between sub-interpreters.
Diffstat (limited to 'Objects')
-rw-r--r-- | Objects/boolobject.c | 10 | ||||
-rw-r--r-- | Objects/bytes_methods.c | 9 | ||||
-rw-r--r-- | Objects/longobject.c | 26 | ||||
-rw-r--r-- | Objects/object.c | 32 | ||||
-rw-r--r-- | Objects/setobject.c | 5 | ||||
-rw-r--r-- | Objects/sliceobject.c | 15 | ||||
-rw-r--r-- | Objects/typeobject.c | 16 | ||||
-rw-r--r-- | Objects/unicodeobject.c | 101 |
8 files changed, 138 insertions, 76 deletions
diff --git a/Objects/boolobject.c b/Objects/boolobject.c index 9d8e956..597a76f 100644 --- a/Objects/boolobject.c +++ b/Objects/boolobject.c @@ -145,10 +145,14 @@ static PyNumberMethods bool_as_number = { 0, /* nb_index */ }; -static void _Py_NO_RETURN -bool_dealloc(PyObject* Py_UNUSED(ignore)) +static void +bool_dealloc(PyObject *boolean) { - _Py_FatalRefcountError("deallocating True or False"); + /* This should never get called, but we also don't want to SEGV if + * we accidentally decref Booleans out of existence. Instead, + * since bools are immortal, re-set the reference count. + */ + _Py_SetImmortal(boolean); } /* The type object for bool. Note that this cannot be subclassed! */ diff --git a/Objects/bytes_methods.c b/Objects/bytes_methods.c index ef9e65e..33aa9c3 100644 --- a/Objects/bytes_methods.c +++ b/Objects/bytes_methods.c @@ -258,9 +258,12 @@ _Py_bytes_istitle(const char *cptr, Py_ssize_t len) const unsigned char *e; int cased, previous_is_cased; - /* Shortcut for single character strings */ - if (len == 1) - return PyBool_FromLong(Py_ISUPPER(*p)); + if (len == 1) { + if (Py_ISUPPER(*p)) { + Py_RETURN_TRUE; + } + Py_RETURN_FALSE; + } /* Special case for empty strings */ if (len == 0) diff --git a/Objects/longobject.c b/Objects/longobject.c index bb4eac0..d98bbbb 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -52,8 +52,7 @@ static PyObject * get_small_int(sdigit ival) { assert(IS_SMALL_INT(ival)); - PyObject *v = (PyObject *)&_PyLong_SMALL_INTS[_PY_NSMALLNEGINTS + ival]; - return Py_NewRef(v); + return (PyObject *)&_PyLong_SMALL_INTS[_PY_NSMALLNEGINTS + ival]; } static PyLongObject * @@ -3271,6 +3270,27 @@ long_richcompare(PyObject *self, PyObject *other, int op) Py_RETURN_RICHCOMPARE(result, 0, op); } +static void +long_dealloc(PyObject *self) +{ + /* This should never get called, but we also don't want to SEGV if + * we accidentally decref small Ints out of existence. Instead, + * since small Ints are immortal, re-set the reference count. + */ + PyLongObject *pylong = (PyLongObject*)self; + if (pylong && _PyLong_IsCompact(pylong)) { + stwodigits ival = medium_value(pylong); + if (IS_SMALL_INT(ival)) { + PyLongObject *small_pylong = (PyLongObject *)get_small_int((sdigit)ival); + if (pylong == small_pylong) { + _Py_SetImmortal(self); + return; + } + } + } + Py_TYPE(self)->tp_free(self); +} + static Py_hash_t long_hash(PyLongObject *v) { @@ -6233,7 +6253,7 @@ PyTypeObject PyLong_Type = { "int", /* tp_name */ offsetof(PyLongObject, long_value.ob_digit), /* tp_basicsize */ sizeof(digit), /* tp_itemsize */ - 0, /* tp_dealloc */ + long_dealloc, /* tp_dealloc */ 0, /* tp_vectorcall_offset */ 0, /* tp_getattr */ 0, /* tp_setattr */ diff --git a/Objects/object.c b/Objects/object.c index e26f737..e508881 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -1754,10 +1754,14 @@ none_repr(PyObject *op) return PyUnicode_FromString("None"); } -static void _Py_NO_RETURN -none_dealloc(PyObject* Py_UNUSED(ignore)) +static void +none_dealloc(PyObject* none) { - _Py_FatalRefcountError("deallocating None"); + /* This should never get called, but we also don't want to SEGV if + * we accidentally decref None out of existence. Instead, + * since None is an immortal object, re-set the reference count. + */ + _Py_SetImmortal(none); } static PyObject * @@ -1823,7 +1827,7 @@ PyTypeObject _PyNone_Type = { "NoneType", 0, 0, - none_dealloc, /*tp_dealloc*/ /*never called*/ + none_dealloc, /*tp_dealloc*/ 0, /*tp_vectorcall_offset*/ 0, /*tp_getattr*/ 0, /*tp_setattr*/ @@ -1860,8 +1864,9 @@ PyTypeObject _PyNone_Type = { }; PyObject _Py_NoneStruct = { - _PyObject_EXTRA_INIT - 1, &_PyNone_Type + _PyObject_EXTRA_INIT + { _Py_IMMORTAL_REFCNT }, + &_PyNone_Type }; /* NotImplemented is an object that can be used to signal that an @@ -1894,13 +1899,14 @@ notimplemented_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) Py_RETURN_NOTIMPLEMENTED; } -static void _Py_NO_RETURN -notimplemented_dealloc(PyObject* ignore) +static void +notimplemented_dealloc(PyObject *notimplemented) { /* This should never get called, but we also don't want to SEGV if - * we accidentally decref NotImplemented out of existence. + * we accidentally decref NotImplemented out of existence. Instead, + * since Notimplemented is an immortal object, re-set the reference count. */ - Py_FatalError("deallocating NotImplemented"); + _Py_SetImmortal(notimplemented); } static int @@ -1962,7 +1968,8 @@ PyTypeObject _PyNotImplemented_Type = { PyObject _Py_NotImplementedStruct = { _PyObject_EXTRA_INIT - 1, &_PyNotImplemented_Type + { _Py_IMMORTAL_REFCNT }, + &_PyNotImplemented_Type }; extern PyTypeObject _Py_GenericAliasIterType; @@ -2143,7 +2150,8 @@ new_reference(PyObject *op) if (_PyRuntime.tracemalloc.config.tracing) { _PyTraceMalloc_NewReference(op); } - Py_SET_REFCNT(op, 1); + // Skip the immortal object check in Py_SET_REFCNT; always set refcnt to 1 + op->ob_refcnt = 1; #ifdef Py_TRACE_REFS _Py_AddToAllObjects(op, 1); #endif diff --git a/Objects/setobject.c b/Objects/setobject.c index fcdda2a..58f0ae7 100644 --- a/Objects/setobject.c +++ b/Objects/setobject.c @@ -2543,6 +2543,7 @@ static PyTypeObject _PySetDummy_Type = { }; static PyObject _dummy_struct = { - _PyObject_EXTRA_INIT - 2, &_PySetDummy_Type + _PyObject_EXTRA_INIT + { _Py_IMMORTAL_REFCNT }, + &_PySetDummy_Type }; diff --git a/Objects/sliceobject.c b/Objects/sliceobject.c index 584ebce..e6776ac 100644 --- a/Objects/sliceobject.c +++ b/Objects/sliceobject.c @@ -29,6 +29,16 @@ ellipsis_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) return Py_NewRef(Py_Ellipsis); } +static void +ellipsis_dealloc(PyObject *ellipsis) +{ + /* This should never get called, but we also don't want to SEGV if + * we accidentally decref Ellipsis out of existence. Instead, + * since Ellipsis is an immortal object, re-set the reference count. + */ + _Py_SetImmortal(ellipsis); +} + static PyObject * ellipsis_repr(PyObject *op) { @@ -51,7 +61,7 @@ PyTypeObject PyEllipsis_Type = { "ellipsis", /* tp_name */ 0, /* tp_basicsize */ 0, /* tp_itemsize */ - 0, /*never called*/ /* tp_dealloc */ + ellipsis_dealloc, /* tp_dealloc */ 0, /* tp_vectorcall_offset */ 0, /* tp_getattr */ 0, /* tp_setattr */ @@ -89,7 +99,8 @@ PyTypeObject PyEllipsis_Type = { PyObject _Py_EllipsisObject = { _PyObject_EXTRA_INIT - 1, &PyEllipsis_Type + { _Py_IMMORTAL_REFCNT }, + &PyEllipsis_Type }; diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 9ea458f..85bcd05 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -318,27 +318,11 @@ _PyType_InitCache(PyInterpreterState *interp) entry->version = 0; // Set to None so _PyType_Lookup() can use Py_SETREF(), // rather than using slower Py_XSETREF(). - // (See _PyType_FixCacheRefcounts() about the refcount.) entry->name = Py_None; entry->value = NULL; } } -// This is the temporary fix used by pycore_create_interpreter(), -// in pylifecycle.c. _PyType_InitCache() is called before the GIL -// has been created (for the main interpreter) and without the -// "current" thread state set. This causes crashes when the -// reftotal is updated, so we don't modify the refcount in -// _PyType_InitCache(), and instead do it later by calling -// _PyType_FixCacheRefcounts(). -// XXX This workaround should be removed once we have immortal -// objects (PEP 683). -void -_PyType_FixCacheRefcounts(void) -{ - _Py_RefcntAdd(Py_None, (1 << MCACHE_SIZE_EXP)); -} - static unsigned int _PyType_ClearCache(PyInterpreterState *interp) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 85e5ae7..fd056e3 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -228,14 +228,18 @@ static inline PyObject* unicode_new_empty(void) to strings in this dictionary are *not* counted in the string's ob_refcnt. When the interned string reaches a refcnt of 0 the string deallocation function will delete the reference from this dictionary. - Another way to look at this is that to say that the actual reference - count of a string is: s->ob_refcnt + (s->state ? 2 : 0) */ static inline PyObject *get_interned_dict(PyInterpreterState *interp) { return _Py_INTERP_CACHED_OBJECT(interp, interned_strings); } +Py_ssize_t +_PyUnicode_InternedSize() +{ + return PyObject_Length(get_interned_dict(_PyInterpreterState_GET())); +} + static int init_interned_dict(PyInterpreterState *interp) { @@ -1538,30 +1542,19 @@ find_maxchar_surrogates(const wchar_t *begin, const wchar_t *end, static void unicode_dealloc(PyObject *unicode) { - PyInterpreterState *interp = _PyInterpreterState_GET(); #ifdef Py_DEBUG if (!unicode_is_finalizing() && unicode_is_singleton(unicode)) { _Py_FatalRefcountError("deallocating an Unicode singleton"); } #endif + /* This should never get called, but we also don't want to SEGV if + * we accidentally decref an immortal string out of existence. Since + * the string is an immortal object, just re-set the reference count. + */ if (PyUnicode_CHECK_INTERNED(unicode)) { - /* Revive the dead object temporarily. PyDict_DelItem() removes two - references (key and value) which were ignored by - PyUnicode_InternInPlace(). Use refcnt=3 rather than refcnt=2 - to prevent calling unicode_dealloc() again. Adjust refcnt after - PyDict_DelItem(). */ - assert(Py_REFCNT(unicode) == 0); - Py_SET_REFCNT(unicode, 3); - PyObject *interned = get_interned_dict(interp); - assert(interned != NULL); - if (PyDict_DelItem(interned, unicode) != 0) { - _PyErr_WriteUnraisableMsg("deletion of interned string failed", - NULL); - } - assert(Py_REFCNT(unicode) == 1); - Py_SET_REFCNT(unicode, 0); + _Py_SetImmortal(unicode); + return; } - if (_PyUnicode_HAS_UTF8_MEMORY(unicode)) { PyObject_Free(_PyUnicode_UTF8(unicode)); } @@ -14637,11 +14630,21 @@ _PyUnicode_InternInPlace(PyInterpreterState *interp, PyObject **p) return; } - /* The two references in interned dict (key and value) are not counted by - refcnt. unicode_dealloc() and _PyUnicode_ClearInterned() take care of - this. */ - Py_SET_REFCNT(s, Py_REFCNT(s) - 2); - _PyUnicode_STATE(s).interned = 1; + if (_Py_IsImmortal(s)) { + _PyUnicode_STATE(*p).interned = SSTATE_INTERNED_IMMORTAL_STATIC; + return; + } +#ifdef Py_REF_DEBUG + /* The reference count value excluding the 2 references from the + interned dictionary should be excluded from the RefTotal. The + decrements to these objects will not be registered so they + need to be accounted for in here. */ + for (Py_ssize_t i = 0; i < Py_REFCNT(s) - 2; i++) { + _Py_DecRefTotal(_PyInterpreterState_GET()); + } +#endif + _Py_SetImmortal(s); + _PyUnicode_STATE(*p).interned = SSTATE_INTERNED_IMMORTAL; } void @@ -14681,10 +14684,20 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp) } assert(PyDict_CheckExact(interned)); - /* Interned unicode strings are not forcibly deallocated; rather, we give - them their stolen references back, and then clear and DECREF the - interned dict. */ - + /* TODO: + * Currently, the runtime is not able to guarantee that it can exit without + * allocations that carry over to a future initialization of Python within + * the same process. i.e: + * ./python -X showrefcount -c 'import itertools' + * [237 refs, 237 blocks] + * + * Therefore, this should remain disabled for until there is a strict guarantee + * that no memory will be left after `Py_Finalize`. + */ +#ifdef Py_DEBUG + /* For all non-singleton interned strings, restore the two valid references + to that instance from within the intern string dictionary and let the + normal reference counting process clean up these instances. */ #ifdef INTERNED_STATS fprintf(stderr, "releasing %zd interned strings\n", PyDict_GET_SIZE(interned)); @@ -14694,15 +14707,27 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp) Py_ssize_t pos = 0; PyObject *s, *ignored_value; while (PyDict_Next(interned, &pos, &s, &ignored_value)) { - assert(PyUnicode_CHECK_INTERNED(s)); - // Restore the two references (key and value) ignored - // by PyUnicode_InternInPlace(). - Py_SET_REFCNT(s, Py_REFCNT(s) + 2); + assert(PyUnicode_IS_READY(s)); + switch (PyUnicode_CHECK_INTERNED(s)) { + case SSTATE_INTERNED_IMMORTAL: + // Skip the Immortal Instance check and restore + // the two references (key and value) ignored + // by PyUnicode_InternInPlace(). + s->ob_refcnt = 2; #ifdef INTERNED_STATS - total_length += PyUnicode_GET_LENGTH(s); + total_length += PyUnicode_GET_LENGTH(s); #endif - - _PyUnicode_STATE(s).interned = 0; + break; + case SSTATE_INTERNED_IMMORTAL_STATIC: + break; + case SSTATE_INTERNED_MORTAL: + /* fall through */ + case SSTATE_NOT_INTERNED: + /* fall through */ + default: + Py_UNREACHABLE(); + } + _PyUnicode_STATE(s).interned = SSTATE_NOT_INTERNED; } #ifdef INTERNED_STATS fprintf(stderr, @@ -14710,6 +14735,12 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp) total_length); #endif + struct _Py_unicode_state *state = &interp->unicode; + struct _Py_unicode_ids *ids = &state->ids; + for (Py_ssize_t i=0; i < ids->size; i++) { + Py_XINCREF(ids->array[i]); + } +#endif /* Py_DEBUG */ clear_interned_dict(interp); } |