From 1e703a473343ed198c9a06a876b25d7d69d4bbd0 Mon Sep 17 00:00:00 2001 From: Carl Meyer Date: Tue, 7 Mar 2023 17:10:58 -0700 Subject: gh-102381: don't call watcher callback with dead object (#102382) Co-authored-by: T. Wouters --- Doc/c-api/code.rst | 17 +++++++++--- Doc/c-api/dict.rst | 21 ++++++++++++--- Doc/c-api/function.rst | 17 +++++++++--- Include/cpython/code.h | 13 +++++++--- Include/cpython/dictobject.h | 21 +++++++++------ Include/cpython/funcobject.h | 16 ++++++------ Include/internal/pycore_dict.h | 1 + Lib/test/test_capi/test_watchers.py | 52 +++++++++++++++++++++++++++++++++++-- Modules/_testcapi/watchers.c | 13 +++++++++- Objects/codeobject.c | 38 ++++++++++++++++++++++++++- Objects/dictobject.c | 34 +++++++++++++++++++++--- Objects/funcobject.c | 38 ++++++++++++++++++++++++++- 12 files changed, 243 insertions(+), 38 deletions(-) diff --git a/Doc/c-api/code.rst b/Doc/c-api/code.rst index 062ef3a..a99de99 100644 --- a/Doc/c-api/code.rst +++ b/Doc/c-api/code.rst @@ -172,6 +172,11 @@ bound into a function. before the destruction of *co* takes place, so the prior state of *co* can be inspected. + If *event* is ``PY_CODE_EVENT_DESTROY``, taking a reference in the callback + to the about-to-be-destroyed code object will resurrect it and prevent it + from being freed at this time. When the resurrected object is destroyed + later, any watcher callbacks active at that time will be called again. + Users of this API should not rely on internal runtime implementation details. Such details may include, but are not limited to, the exact order and timing of creation and destruction of code objects. While @@ -179,9 +184,15 @@ bound into a function. (including whether a callback is invoked or not), it does not change the semantics of the Python code being executed. - If the callback returns with an exception set, it must return ``-1``; this - exception will be printed as an unraisable exception using - :c:func:`PyErr_WriteUnraisable`. Otherwise it should return ``0``. + If the callback sets an exception, it must return ``-1``; this exception will + be printed as an unraisable exception using :c:func:`PyErr_WriteUnraisable`. + Otherwise it should return ``0``. + + There may already be a pending exception set on entry to the callback. In + this case, the callback should return ``0`` with the same exception still + set. This means the callback may not call any other API that can set an + exception unless it saves and clears the exception state first, and restores + it before returning. .. versionadded:: 3.12 diff --git a/Doc/c-api/dict.rst b/Doc/c-api/dict.rst index 34106ee..b9f84ce 100644 --- a/Doc/c-api/dict.rst +++ b/Doc/c-api/dict.rst @@ -298,13 +298,26 @@ Dictionary Objects dictionary. The callback may inspect but must not modify *dict*; doing so could have - unpredictable effects, including infinite recursion. + unpredictable effects, including infinite recursion. Do not trigger Python + code execution in the callback, as it could modify the dict as a side effect. + + If *event* is ``PyDict_EVENT_DEALLOCATED``, taking a new reference in the + callback to the about-to-be-destroyed dictionary will resurrect it and + prevent it from being freed at this time. When the resurrected object is + destroyed later, any watcher callbacks active at that time will be called + again. Callbacks occur before the notified modification to *dict* takes place, so the prior state of *dict* can be inspected. - If the callback returns with an exception set, it must return ``-1``; this - exception will be printed as an unraisable exception using - :c:func:`PyErr_WriteUnraisable`. Otherwise it should return ``0``. + If the callback sets an exception, it must return ``-1``; this exception will + be printed as an unraisable exception using :c:func:`PyErr_WriteUnraisable`. + Otherwise it should return ``0``. + + There may already be a pending exception set on entry to the callback. In + this case, the callback should return ``0`` with the same exception still + set. This means the callback may not call any other API that can set an + exception unless it saves and clears the exception state first, and restores + it before returning. .. versionadded:: 3.12 diff --git a/Doc/c-api/function.rst b/Doc/c-api/function.rst index bc7569d..947ed70 100644 --- a/Doc/c-api/function.rst +++ b/Doc/c-api/function.rst @@ -173,8 +173,19 @@ There are a few functions specific to Python functions. runtime behavior depending on optimization decisions, it does not change the semantics of the Python code being executed. - If the callback returns with an exception set, it must return ``-1``; this - exception will be printed as an unraisable exception using - :c:func:`PyErr_WriteUnraisable`. Otherwise it should return ``0``. + If *event* is ``PyFunction_EVENT_DESTROY``, Taking a reference in the + callback to the about-to-be-destroyed function will resurrect it, preventing + it from being freed at this time. When the resurrected object is destroyed + later, any watcher callbacks active at that time will be called again. + + If the callback sets an exception, it must return ``-1``; this exception will + be printed as an unraisable exception using :c:func:`PyErr_WriteUnraisable`. + Otherwise it should return ``0``. + + There may already be a pending exception set on entry to the callback. In + this case, the callback should return ``0`` with the same exception still + set. This means the callback may not call any other API that can set an + exception unless it saves and clears the exception state first, and restores + it before returning. .. versionadded:: 3.12 diff --git a/Include/cpython/code.h b/Include/cpython/code.h index 0e4bd8a..abcf125 100644 --- a/Include/cpython/code.h +++ b/Include/cpython/code.h @@ -224,9 +224,14 @@ PyAPI_FUNC(int) PyCode_Addr2Line(PyCodeObject *, int); PyAPI_FUNC(int) PyCode_Addr2Location(PyCodeObject *, int, int *, int *, int *, int *); -typedef enum PyCodeEvent { - PY_CODE_EVENT_CREATE, - PY_CODE_EVENT_DESTROY +#define PY_FOREACH_CODE_EVENT(V) \ + V(CREATE) \ + V(DESTROY) + +typedef enum { + #define PY_DEF_EVENT(op) PY_CODE_EVENT_##op, + PY_FOREACH_CODE_EVENT(PY_DEF_EVENT) + #undef PY_DEF_EVENT } PyCodeEvent; @@ -236,7 +241,7 @@ typedef enum PyCodeEvent { * The callback is invoked with a borrowed reference to co, after it is * created and before it is destroyed. * - * If the callback returns with an exception set, it must return -1. Otherwise + * If the callback sets an exception, it must return -1. Otherwise * it should return 0. */ typedef int (*PyCode_WatchCallback)( diff --git a/Include/cpython/dictobject.h b/Include/cpython/dictobject.h index 5001f35..ddada92 100644 --- a/Include/cpython/dictobject.h +++ b/Include/cpython/dictobject.h @@ -16,11 +16,11 @@ typedef struct { /* Dictionary version: globally unique, value change each time the dictionary is modified */ -#ifdef Py_BUILD_CORE +#ifdef Py_BUILD_CORE uint64_t ma_version_tag; #else Py_DEPRECATED(3.12) uint64_t ma_version_tag; -#endif +#endif PyDictKeysObject *ma_keys; @@ -90,13 +90,18 @@ PyAPI_FUNC(PyObject *) _PyDictView_Intersect(PyObject* self, PyObject *other); /* Dictionary watchers */ +#define PY_FOREACH_DICT_EVENT(V) \ + V(ADDED) \ + V(MODIFIED) \ + V(DELETED) \ + V(CLONED) \ + V(CLEARED) \ + V(DEALLOCATED) + typedef enum { - PyDict_EVENT_ADDED, - PyDict_EVENT_MODIFIED, - PyDict_EVENT_DELETED, - PyDict_EVENT_CLONED, - PyDict_EVENT_CLEARED, - PyDict_EVENT_DEALLOCATED, + #define PY_DEF_EVENT(EVENT) PyDict_EVENT_##EVENT, + PY_FOREACH_DICT_EVENT(PY_DEF_EVENT) + #undef PY_DEF_EVENT } PyDict_WatchEvent; // Callback to be invoked when a watched dict is cleared, dealloced, or modified. diff --git a/Include/cpython/funcobject.h b/Include/cpython/funcobject.h index 5979feb..c716330 100644 --- a/Include/cpython/funcobject.h +++ b/Include/cpython/funcobject.h @@ -131,17 +131,17 @@ PyAPI_DATA(PyTypeObject) PyStaticMethod_Type; PyAPI_FUNC(PyObject *) PyClassMethod_New(PyObject *); PyAPI_FUNC(PyObject *) PyStaticMethod_New(PyObject *); -#define FOREACH_FUNC_EVENT(V) \ - V(CREATE) \ - V(DESTROY) \ - V(MODIFY_CODE) \ - V(MODIFY_DEFAULTS) \ +#define PY_FOREACH_FUNC_EVENT(V) \ + V(CREATE) \ + V(DESTROY) \ + V(MODIFY_CODE) \ + V(MODIFY_DEFAULTS) \ V(MODIFY_KWDEFAULTS) typedef enum { - #define DEF_EVENT(EVENT) PyFunction_EVENT_##EVENT, - FOREACH_FUNC_EVENT(DEF_EVENT) - #undef DEF_EVENT + #define PY_DEF_EVENT(EVENT) PyFunction_EVENT_##EVENT, + PY_FOREACH_FUNC_EVENT(PY_DEF_EVENT) + #undef PY_DEF_EVENT } PyFunction_WatchEvent; /* diff --git a/Include/internal/pycore_dict.h b/Include/internal/pycore_dict.h index c74a343..1af5e59 100644 --- a/Include/internal/pycore_dict.h +++ b/Include/internal/pycore_dict.h @@ -164,6 +164,7 @@ _PyDict_NotifyEvent(PyDict_WatchEvent event, PyObject *key, PyObject *value) { + assert(Py_REFCNT((PyObject*)mp) > 0); int watcher_bits = mp->ma_version_tag & DICT_VERSION_MASK; if (watcher_bits) { _PyDict_SendEvent(watcher_bits, event, mp, key, value); diff --git a/Lib/test/test_capi/test_watchers.py b/Lib/test/test_capi/test_watchers.py index 1922614..93f6ef7 100644 --- a/Lib/test/test_capi/test_watchers.py +++ b/Lib/test/test_capi/test_watchers.py @@ -109,10 +109,21 @@ class TestDictWatchers(unittest.TestCase): self.watch(wid, d) with catch_unraisable_exception() as cm: d["foo"] = "bar" - self.assertIs(cm.unraisable.object, d) + self.assertIn( + "PyDict_EVENT_ADDED watcher callback for 0); PyInterpreterState *interp = _PyInterpreterState_GET(); assert(interp->_initialized); uint8_t bits = interp->active_code_watchers; @@ -25,7 +40,21 @@ notify_code_watchers(PyCodeEvent event, PyCodeObject *co) // callback must be non-null if the watcher bit is set assert(cb != NULL); if (cb(event, co) < 0) { - PyErr_WriteUnraisable((PyObject *) co); + // Don't risk resurrecting the object if an unraisablehook keeps + // a reference; pass a string as context. + PyObject *context = NULL; + PyObject *repr = code_repr(co); + if (repr) { + context = PyUnicode_FromFormat( + "%s watcher callback for %U", + code_event_name(event), repr); + Py_DECREF(repr); + } + if (context == NULL) { + context = Py_NewRef(Py_None); + } + PyErr_WriteUnraisable(context); + Py_DECREF(context); } } i++; @@ -1667,7 +1696,14 @@ code_new_impl(PyTypeObject *type, int argcount, int posonlyargcount, static void code_dealloc(PyCodeObject *co) { + assert(Py_REFCNT(co) == 0); + Py_SET_REFCNT(co, 1); notify_code_watchers(PY_CODE_EVENT_DESTROY, co); + if (Py_REFCNT(co) > 1) { + Py_SET_REFCNT(co, Py_REFCNT(co) - 1); + return; + } + Py_SET_REFCNT(co, 0); if (co->co_extra != NULL) { PyInterpreterState *interp = _PyInterpreterState_GET(); diff --git a/Objects/dictobject.c b/Objects/dictobject.c index fc658ca..e3795e7 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -2308,7 +2308,14 @@ Fail: static void dict_dealloc(PyDictObject *mp) { + assert(Py_REFCNT(mp) == 0); + Py_SET_REFCNT(mp, 1); _PyDict_NotifyEvent(PyDict_EVENT_DEALLOCATED, mp, NULL, NULL); + if (Py_REFCNT(mp) > 1) { + Py_SET_REFCNT(mp, Py_REFCNT(mp) - 1); + return; + } + Py_SET_REFCNT(mp, 0); PyDictValues *values = mp->ma_values; PyDictKeysObject *keys = mp->ma_keys; Py_ssize_t i, n; @@ -5732,6 +5739,18 @@ PyDict_ClearWatcher(int watcher_id) return 0; } +static const char * +dict_event_name(PyDict_WatchEvent event) { + switch (event) { + #define CASE(op) \ + case PyDict_EVENT_##op: \ + return "PyDict_EVENT_" #op; + PY_FOREACH_DICT_EVENT(CASE) + #undef CASE + } + Py_UNREACHABLE(); +} + void _PyDict_SendEvent(int watcher_bits, PyDict_WatchEvent event, @@ -5744,9 +5763,18 @@ _PyDict_SendEvent(int watcher_bits, if (watcher_bits & 1) { PyDict_WatchCallback cb = interp->dict_state.watchers[i]; if (cb && (cb(event, (PyObject*)mp, key, value) < 0)) { - // some dict modification paths (e.g. PyDict_Clear) can't raise, so we - // can't propagate exceptions from dict watchers. - PyErr_WriteUnraisable((PyObject *)mp); + // We don't want to resurrect the dict by potentially having an + // unraisablehook keep a reference to it, so we don't pass the + // dict as context, just an informative string message. Dict + // repr can call arbitrary code, so we invent a simpler version. + PyObject *context = PyUnicode_FromFormat( + "%s watcher callback for ", + dict_event_name(event), mp); + if (context == NULL) { + context = Py_NewRef(Py_None); + } + PyErr_WriteUnraisable(context); + Py_DECREF(context); } } watcher_bits >>= 1; diff --git a/Objects/funcobject.c b/Objects/funcobject.c index 91a6b3d..99048ea 100644 --- a/Objects/funcobject.c +++ b/Objects/funcobject.c @@ -8,6 +8,20 @@ #include "pycore_pyerrors.h" // _PyErr_Occurred() #include "structmember.h" // PyMemberDef +static PyObject* func_repr(PyFunctionObject *op); + +static const char * +func_event_name(PyFunction_WatchEvent event) { + switch (event) { + #define CASE(op) \ + case PyFunction_EVENT_##op: \ + return "PyFunction_EVENT_" #op; + PY_FOREACH_FUNC_EVENT(CASE) + #undef CASE + } + Py_UNREACHABLE(); +} + static void notify_func_watchers(PyInterpreterState *interp, PyFunction_WatchEvent event, PyFunctionObject *func, PyObject *new_value) @@ -21,7 +35,21 @@ notify_func_watchers(PyInterpreterState *interp, PyFunction_WatchEvent event, // callback must be non-null if the watcher bit is set assert(cb != NULL); if (cb(event, func, new_value) < 0) { - PyErr_WriteUnraisable((PyObject *) func); + // Don't risk resurrecting the func if an unraisablehook keeps a + // reference; pass a string as context. + PyObject *context = NULL; + PyObject *repr = func_repr(func); + if (repr != NULL) { + context = PyUnicode_FromFormat( + "%s watcher callback for %U", + func_event_name(event), repr); + Py_DECREF(repr); + } + if (context == NULL) { + context = Py_NewRef(Py_None); + } + PyErr_WriteUnraisable(context); + Py_DECREF(context); } } i++; @@ -33,6 +61,7 @@ static inline void handle_func_event(PyFunction_WatchEvent event, PyFunctionObject *func, PyObject *new_value) { + assert(Py_REFCNT(func) > 0); PyInterpreterState *interp = _PyInterpreterState_GET(); assert(interp->_initialized); if (interp->active_func_watchers) { @@ -766,7 +795,14 @@ func_clear(PyFunctionObject *op) static void func_dealloc(PyFunctionObject *op) { + assert(Py_REFCNT(op) == 0); + Py_SET_REFCNT(op, 1); handle_func_event(PyFunction_EVENT_DESTROY, op, NULL); + if (Py_REFCNT(op) > 1) { + Py_SET_REFCNT(op, Py_REFCNT(op) - 1); + return; + } + Py_SET_REFCNT(op, 0); _PyObject_GC_UNTRACK(op); if (op->func_weakreflist != NULL) { PyObject_ClearWeakRefs((PyObject *) op); -- cgit v0.12