summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDino Viehland <dinoviehland@meta.com>2024-11-21 16:41:19 (GMT)
committerGitHub <noreply@github.com>2024-11-21 16:41:19 (GMT)
commitbf542f8bb9f12f0df9481f2222b21545806dd9e1 (patch)
tree41db15281aa5a0e25e63a46e80cbe50ef80cb7f6
parent3926842117feffe5d2c9727e1899bea5ae2adb28 (diff)
downloadcpython-bf542f8bb9f12f0df9481f2222b21545806dd9e1.zip
cpython-bf542f8bb9f12f0df9481f2222b21545806dd9e1.tar.gz
cpython-bf542f8bb9f12f0df9481f2222b21545806dd9e1.tar.bz2
gh-124470: Fix crash when reading from object instance dictionary while replacing it (#122489)
Delay free a dictionary when replacing it
-rw-r--r--Include/internal/pycore_gc.h18
-rw-r--r--Include/internal/pycore_pymem.h16
-rw-r--r--Lib/test/test_free_threading/test_dict.py64
-rw-r--r--Misc/NEWS.d/next/Core_and_Builtins/2024-09-25-21-50-23.gh-issue-124470.pFr3_d.rst1
-rw-r--r--Objects/dictobject.c164
-rw-r--r--Objects/obmalloc.c62
-rw-r--r--Python/gc_free_threading.c49
7 files changed, 289 insertions, 85 deletions
diff --git a/Include/internal/pycore_gc.h b/Include/internal/pycore_gc.h
index 38a1c56..479fe10 100644
--- a/Include/internal/pycore_gc.h
+++ b/Include/internal/pycore_gc.h
@@ -50,7 +50,6 @@ static inline PyObject* _Py_FROM_GC(PyGC_Head *gc) {
# define _PyGC_BITS_UNREACHABLE (4)
# define _PyGC_BITS_FROZEN (8)
# define _PyGC_BITS_SHARED (16)
-# define _PyGC_BITS_SHARED_INLINE (32)
# define _PyGC_BITS_DEFERRED (64) // Use deferred reference counting
#endif
@@ -119,23 +118,6 @@ static inline void _PyObject_GC_SET_SHARED(PyObject *op) {
}
#define _PyObject_GC_SET_SHARED(op) _PyObject_GC_SET_SHARED(_Py_CAST(PyObject*, op))
-/* True if the memory of the object is shared between multiple
- * threads and needs special purpose when freeing due to
- * the possibility of in-flight lock-free reads occurring.
- * Objects with this bit that are GC objects will automatically
- * delay-freed by PyObject_GC_Del. */
-static inline int _PyObject_GC_IS_SHARED_INLINE(PyObject *op) {
- return _PyObject_HAS_GC_BITS(op, _PyGC_BITS_SHARED_INLINE);
-}
-#define _PyObject_GC_IS_SHARED_INLINE(op) \
- _PyObject_GC_IS_SHARED_INLINE(_Py_CAST(PyObject*, op))
-
-static inline void _PyObject_GC_SET_SHARED_INLINE(PyObject *op) {
- _PyObject_SET_GC_BITS(op, _PyGC_BITS_SHARED_INLINE);
-}
-#define _PyObject_GC_SET_SHARED_INLINE(op) \
- _PyObject_GC_SET_SHARED_INLINE(_Py_CAST(PyObject*, op))
-
#endif
/* Bit flags for _gc_prev */
diff --git a/Include/internal/pycore_pymem.h b/Include/internal/pycore_pymem.h
index dd6b076..5bb3400 100644
--- a/Include/internal/pycore_pymem.h
+++ b/Include/internal/pycore_pymem.h
@@ -120,11 +120,25 @@ extern int _PyMem_DebugEnabled(void);
extern void _PyMem_FreeDelayed(void *ptr);
// Enqueue an object to be freed possibly after some delay
-extern void _PyObject_FreeDelayed(void *ptr);
+#ifdef Py_GIL_DISABLED
+extern void _PyObject_XDecRefDelayed(PyObject *obj);
+#else
+static inline void _PyObject_XDecRefDelayed(PyObject *obj)
+{
+ Py_XDECREF(obj);
+}
+#endif
// Periodically process delayed free requests.
extern void _PyMem_ProcessDelayed(PyThreadState *tstate);
+
+// Periodically process delayed free requests when the world is stopped.
+// Notify of any objects whic should be freeed.
+typedef void (*delayed_dealloc_cb)(PyObject *, void *);
+extern void _PyMem_ProcessDelayedNoDealloc(PyThreadState *tstate,
+ delayed_dealloc_cb cb, void *state);
+
// Abandon all thread-local delayed free requests and push them to the
// interpreter's queue.
extern void _PyMem_AbandonDelayed(PyThreadState *tstate);
diff --git a/Lib/test/test_free_threading/test_dict.py b/Lib/test/test_free_threading/test_dict.py
index 80daf0d..13717cb 100644
--- a/Lib/test/test_free_threading/test_dict.py
+++ b/Lib/test/test_free_threading/test_dict.py
@@ -142,6 +142,70 @@ class TestDict(TestCase):
for ref in thread_list:
self.assertIsNone(ref())
+ def test_racing_set_object_dict(self):
+ """Races assigning to __dict__ should be thread safe"""
+ class C: pass
+ class MyDict(dict): pass
+ for cyclic in (False, True):
+ f = C()
+ f.__dict__ = {"foo": 42}
+ THREAD_COUNT = 10
+
+ def writer_func(l):
+ for i in range(1000):
+ if cyclic:
+ other_d = {}
+ d = MyDict({"foo": 100})
+ if cyclic:
+ d["x"] = other_d
+ other_d["bar"] = d
+ l.append(weakref.ref(d))
+ f.__dict__ = d
+
+ def reader_func():
+ for i in range(1000):
+ f.foo
+
+ lists = []
+ readers = []
+ writers = []
+ for x in range(THREAD_COUNT):
+ thread_list = []
+ lists.append(thread_list)
+ writer = Thread(target=partial(writer_func, thread_list))
+ writers.append(writer)
+
+ for x in range(THREAD_COUNT):
+ reader = Thread(target=partial(reader_func))
+ readers.append(reader)
+
+ for writer in writers:
+ writer.start()
+ for reader in readers:
+ reader.start()
+
+ for writer in writers:
+ writer.join()
+
+ for reader in readers:
+ reader.join()
+
+ f.__dict__ = {}
+ gc.collect()
+ gc.collect()
+
+ count = 0
+ ids = set()
+ for thread_list in lists:
+ for i, ref in enumerate(thread_list):
+ if ref() is None:
+ continue
+ count += 1
+ ids.add(id(ref()))
+ count += 1
+
+ self.assertEqual(count, 0)
+
if __name__ == "__main__":
unittest.main()
diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-09-25-21-50-23.gh-issue-124470.pFr3_d.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-09-25-21-50-23.gh-issue-124470.pFr3_d.rst
new file mode 100644
index 0000000..8f2f371
--- /dev/null
+++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-09-25-21-50-23.gh-issue-124470.pFr3_d.rst
@@ -0,0 +1 @@
+Fix crash in free-threaded builds when replacing object dictionary while reading attribute on another thread
diff --git a/Objects/dictobject.c b/Objects/dictobject.c
index 23616b3..393e9f9 100644
--- a/Objects/dictobject.c
+++ b/Objects/dictobject.c
@@ -7087,51 +7087,146 @@ set_dict_inline_values(PyObject *obj, PyDictObject *new_dict)
}
}
-int
-_PyObject_SetManagedDict(PyObject *obj, PyObject *new_dict)
+#ifdef Py_GIL_DISABLED
+
+// Trys and sets the dictionary for an object in the easy case when our current
+// dictionary is either completely not materialized or is a dictionary which
+// does not point at the inline values.
+static bool
+try_set_dict_inline_only_or_other_dict(PyObject *obj, PyObject *new_dict, PyDictObject **cur_dict)
+{
+ bool replaced = false;
+ Py_BEGIN_CRITICAL_SECTION(obj);
+
+ PyDictObject *dict = *cur_dict = _PyObject_GetManagedDict(obj);
+ if (dict == NULL) {
+ // We only have inline values, we can just completely replace them.
+ set_dict_inline_values(obj, (PyDictObject *)new_dict);
+ replaced = true;
+ goto exit_lock;
+ }
+
+ if (FT_ATOMIC_LOAD_PTR_RELAXED(dict->ma_values) != _PyObject_InlineValues(obj)) {
+ // We have a materialized dict which doesn't point at the inline values,
+ // We get to simply swap dictionaries and free the old dictionary.
+ FT_ATOMIC_STORE_PTR(_PyObject_ManagedDictPointer(obj)->dict,
+ (PyDictObject *)Py_XNewRef(new_dict));
+ replaced = true;
+ goto exit_lock;
+ }
+ else {
+ // We have inline values, we need to lock the dict and the object
+ // at the same time to safely dematerialize them. To do that while releasing
+ // the object lock we need a strong reference to the current dictionary.
+ Py_INCREF(dict);
+ }
+exit_lock:
+ Py_END_CRITICAL_SECTION();
+ return replaced;
+}
+
+// Replaces a dictionary that is probably the dictionary which has been
+// materialized and points at the inline values. We could have raced
+// and replaced it with another dictionary though.
+static int
+replace_dict_probably_inline_materialized(PyObject *obj, PyDictObject *inline_dict,
+ PyDictObject *cur_dict, PyObject *new_dict)
+{
+ _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(obj);
+
+ if (cur_dict == inline_dict) {
+ assert(FT_ATOMIC_LOAD_PTR_RELAXED(inline_dict->ma_values) == _PyObject_InlineValues(obj));
+
+ int err = _PyDict_DetachFromObject(inline_dict, obj);
+ if (err != 0) {
+ assert(new_dict == NULL);
+ return err;
+ }
+ }
+
+ FT_ATOMIC_STORE_PTR(_PyObject_ManagedDictPointer(obj)->dict,
+ (PyDictObject *)Py_XNewRef(new_dict));
+ return 0;
+}
+
+#endif
+
+static void
+decref_maybe_delay(PyObject *obj, bool delay)
+{
+ if (delay) {
+ _PyObject_XDecRefDelayed(obj);
+ }
+ else {
+ Py_XDECREF(obj);
+ }
+}
+
+static int
+set_or_clear_managed_dict(PyObject *obj, PyObject *new_dict, bool clear)
{
assert(Py_TYPE(obj)->tp_flags & Py_TPFLAGS_MANAGED_DICT);
+#ifndef NDEBUG
+ Py_BEGIN_CRITICAL_SECTION(obj);
assert(_PyObject_InlineValuesConsistencyCheck(obj));
+ Py_END_CRITICAL_SECTION();
+#endif
int err = 0;
PyTypeObject *tp = Py_TYPE(obj);
if (tp->tp_flags & Py_TPFLAGS_INLINE_VALUES) {
- PyDictObject *dict = _PyObject_GetManagedDict(obj);
- if (dict == NULL) {
#ifdef Py_GIL_DISABLED
- Py_BEGIN_CRITICAL_SECTION(obj);
+ PyDictObject *prev_dict;
+ if (!try_set_dict_inline_only_or_other_dict(obj, new_dict, &prev_dict)) {
+ // We had a materialized dictionary which pointed at the inline
+ // values. We need to lock both the object and the dict at the
+ // same time to safely replace it. We can't merely lock the dictionary
+ // while the object is locked because it could suspend the object lock.
+ PyDictObject *cur_dict;
- dict = _PyObject_ManagedDictPointer(obj)->dict;
- if (dict == NULL) {
- set_dict_inline_values(obj, (PyDictObject *)new_dict);
- }
+ assert(prev_dict != NULL);
+ Py_BEGIN_CRITICAL_SECTION2(obj, prev_dict);
- Py_END_CRITICAL_SECTION();
+ // We could have had another thread race in between the call to
+ // try_set_dict_inline_only_or_other_dict where we locked the object
+ // and when we unlocked and re-locked the dictionary.
+ cur_dict = _PyObject_GetManagedDict(obj);
- if (dict == NULL) {
- return 0;
+ err = replace_dict_probably_inline_materialized(obj, prev_dict,
+ cur_dict, new_dict);
+
+ Py_END_CRITICAL_SECTION2();
+
+ // Decref for the dictionary we incref'd in try_set_dict_inline_only_or_other_dict
+ // while the object was locked
+ decref_maybe_delay((PyObject *)prev_dict,
+ !clear && prev_dict != cur_dict);
+ if (err != 0) {
+ return err;
}
-#else
- set_dict_inline_values(obj, (PyDictObject *)new_dict);
- return 0;
-#endif
- }
- Py_BEGIN_CRITICAL_SECTION2(dict, obj);
+ prev_dict = cur_dict;
+ }
- // We've locked dict, but the actual dict could have changed
- // since we locked it.
- dict = _PyObject_ManagedDictPointer(obj)->dict;
- err = _PyDict_DetachFromObject(dict, obj);
- assert(err == 0 || new_dict == NULL);
- if (err == 0) {
- FT_ATOMIC_STORE_PTR(_PyObject_ManagedDictPointer(obj)->dict,
- (PyDictObject *)Py_XNewRef(new_dict));
+ if (prev_dict != NULL) {
+ // decref for the dictionary that we replaced
+ decref_maybe_delay((PyObject *)prev_dict, !clear);
}
- Py_END_CRITICAL_SECTION2();
- if (err == 0) {
- Py_XDECREF(dict);
+ return 0;
+#else
+ PyDictObject *dict = _PyObject_GetManagedDict(obj);
+ if (dict == NULL) {
+ set_dict_inline_values(obj, (PyDictObject *)new_dict);
+ return 0;
+ }
+ if (_PyDict_DetachFromObject(dict, obj) == 0) {
+ _PyObject_ManagedDictPointer(obj)->dict = (PyDictObject *)Py_XNewRef(new_dict);
+ Py_DECREF(dict);
+ return 0;
}
+ assert(new_dict == NULL);
+ return -1;
+#endif
}
else {
PyDictObject *dict;
@@ -7144,17 +7239,22 @@ _PyObject_SetManagedDict(PyObject *obj, PyObject *new_dict)
(PyDictObject *)Py_XNewRef(new_dict));
Py_END_CRITICAL_SECTION();
-
- Py_XDECREF(dict);
+ decref_maybe_delay((PyObject *)dict, !clear);
}
assert(_PyObject_InlineValuesConsistencyCheck(obj));
return err;
}
+int
+_PyObject_SetManagedDict(PyObject *obj, PyObject *new_dict)
+{
+ return set_or_clear_managed_dict(obj, new_dict, false);
+}
+
void
PyObject_ClearManagedDict(PyObject *obj)
{
- if (_PyObject_SetManagedDict(obj, NULL) < 0) {
+ if (set_or_clear_managed_dict(obj, NULL, true) < 0) {
/* Must be out of memory */
assert(PyErr_Occurred() == PyExc_MemoryError);
PyErr_WriteUnraisable(NULL);
diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c
index dfeccfa..3d6b1ab 100644
--- a/Objects/obmalloc.c
+++ b/Objects/obmalloc.c
@@ -1093,10 +1093,24 @@ struct _mem_work_chunk {
};
static void
-free_work_item(uintptr_t ptr)
+free_work_item(uintptr_t ptr, delayed_dealloc_cb cb, void *state)
{
if (ptr & 0x01) {
- PyObject_Free((char *)(ptr - 1));
+ PyObject *obj = (PyObject *)(ptr - 1);
+#ifdef Py_GIL_DISABLED
+ if (cb == NULL) {
+ assert(!_PyInterpreterState_GET()->stoptheworld.world_stopped);
+ Py_DECREF(obj);
+ return;
+ }
+
+ Py_ssize_t refcount = _Py_ExplicitMergeRefcount(obj, -1);
+ if (refcount == 0) {
+ cb(obj, state);
+ }
+#else
+ Py_DECREF(obj);
+#endif
}
else {
PyMem_Free((void *)ptr);
@@ -1107,7 +1121,7 @@ static void
free_delayed(uintptr_t ptr)
{
#ifndef Py_GIL_DISABLED
- free_work_item(ptr);
+ free_work_item(ptr, NULL, NULL);
#else
PyInterpreterState *interp = _PyInterpreterState_GET();
if (_PyInterpreterState_GetFinalizing(interp) != NULL ||
@@ -1115,7 +1129,8 @@ free_delayed(uintptr_t ptr)
{
// Free immediately during interpreter shutdown or if the world is
// stopped.
- free_work_item(ptr);
+ assert(!interp->stoptheworld.world_stopped || !(ptr & 0x01));
+ free_work_item(ptr, NULL, NULL);
return;
}
@@ -1142,7 +1157,8 @@ free_delayed(uintptr_t ptr)
if (buf == NULL) {
// failed to allocate a buffer, free immediately
_PyEval_StopTheWorld(tstate->base.interp);
- free_work_item(ptr);
+ // TODO: Fix me
+ free_work_item(ptr, NULL, NULL);
_PyEval_StartTheWorld(tstate->base.interp);
return;
}
@@ -1166,12 +1182,16 @@ _PyMem_FreeDelayed(void *ptr)
free_delayed((uintptr_t)ptr);
}
+#ifdef Py_GIL_DISABLED
void
-_PyObject_FreeDelayed(void *ptr)
+_PyObject_XDecRefDelayed(PyObject *ptr)
{
assert(!((uintptr_t)ptr & 0x01));
- free_delayed(((uintptr_t)ptr)|0x01);
+ if (ptr != NULL) {
+ free_delayed(((uintptr_t)ptr)|0x01);
+ }
}
+#endif
static struct _mem_work_chunk *
work_queue_first(struct llist_node *head)
@@ -1181,7 +1201,7 @@ work_queue_first(struct llist_node *head)
static void
process_queue(struct llist_node *head, struct _qsbr_thread_state *qsbr,
- bool keep_empty)
+ bool keep_empty, delayed_dealloc_cb cb, void *state)
{
while (!llist_empty(head)) {
struct _mem_work_chunk *buf = work_queue_first(head);
@@ -1192,7 +1212,7 @@ process_queue(struct llist_node *head, struct _qsbr_thread_state *qsbr,
return;
}
- free_work_item(item->ptr);
+ free_work_item(item->ptr, cb, state);
buf->rd_idx++;
}
@@ -1210,7 +1230,8 @@ process_queue(struct llist_node *head, struct _qsbr_thread_state *qsbr,
static void
process_interp_queue(struct _Py_mem_interp_free_queue *queue,
- struct _qsbr_thread_state *qsbr)
+ struct _qsbr_thread_state *qsbr, delayed_dealloc_cb cb,
+ void *state)
{
if (!_Py_atomic_load_int_relaxed(&queue->has_work)) {
return;
@@ -1218,7 +1239,7 @@ process_interp_queue(struct _Py_mem_interp_free_queue *queue,
// Try to acquire the lock, but don't block if it's already held.
if (_PyMutex_LockTimed(&queue->mutex, 0, 0) == PY_LOCK_ACQUIRED) {
- process_queue(&queue->head, qsbr, false);
+ process_queue(&queue->head, qsbr, false, cb, state);
int more_work = !llist_empty(&queue->head);
_Py_atomic_store_int_relaxed(&queue->has_work, more_work);
@@ -1234,10 +1255,23 @@ _PyMem_ProcessDelayed(PyThreadState *tstate)
_PyThreadStateImpl *tstate_impl = (_PyThreadStateImpl *)tstate;
// Process thread-local work
- process_queue(&tstate_impl->mem_free_queue, tstate_impl->qsbr, true);
+ process_queue(&tstate_impl->mem_free_queue, tstate_impl->qsbr, true, NULL, NULL);
+
+ // Process shared interpreter work
+ process_interp_queue(&interp->mem_free_queue, tstate_impl->qsbr, NULL, NULL);
+}
+
+void
+_PyMem_ProcessDelayedNoDealloc(PyThreadState *tstate, delayed_dealloc_cb cb, void *state)
+{
+ PyInterpreterState *interp = tstate->interp;
+ _PyThreadStateImpl *tstate_impl = (_PyThreadStateImpl *)tstate;
+
+ // Process thread-local work
+ process_queue(&tstate_impl->mem_free_queue, tstate_impl->qsbr, true, cb, state);
// Process shared interpreter work
- process_interp_queue(&interp->mem_free_queue, tstate_impl->qsbr);
+ process_interp_queue(&interp->mem_free_queue, tstate_impl->qsbr, cb, state);
}
void
@@ -1279,7 +1313,7 @@ _PyMem_FiniDelayed(PyInterpreterState *interp)
// Free the remaining items immediately. There should be no other
// threads accessing the memory at this point during shutdown.
struct _mem_work_item *item = &buf->array[buf->rd_idx];
- free_work_item(item->ptr);
+ free_work_item(item->ptr, NULL, NULL);
buf->rd_idx++;
}
diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c
index a6e0022..0920c61 100644
--- a/Python/gc_free_threading.c
+++ b/Python/gc_free_threading.c
@@ -394,6 +394,23 @@ gc_visit_thread_stacks(PyInterpreterState *interp)
}
static void
+queue_untracked_obj_decref(PyObject *op, struct collection_state *state)
+{
+ if (!_PyObject_GC_IS_TRACKED(op)) {
+ // GC objects with zero refcount are handled subsequently by the
+ // GC as if they were cyclic trash, but we have to handle dead
+ // non-GC objects here. Add one to the refcount so that we can
+ // decref and deallocate the object once we start the world again.
+ op->ob_ref_shared += (1 << _Py_REF_SHARED_SHIFT);
+#ifdef Py_REF_DEBUG
+ _Py_IncRefTotal(_PyThreadState_GET());
+#endif
+ worklist_push(&state->objs_to_decref, op);
+ }
+
+}
+
+static void
merge_queued_objects(_PyThreadStateImpl *tstate, struct collection_state *state)
{
struct _brc_thread_state *brc = &tstate->brc;
@@ -404,22 +421,20 @@ merge_queued_objects(_PyThreadStateImpl *tstate, struct collection_state *state)
// Subtract one when merging because the queue had a reference.
Py_ssize_t refcount = merge_refcount(op, -1);
- if (!_PyObject_GC_IS_TRACKED(op) && refcount == 0) {
- // GC objects with zero refcount are handled subsequently by the
- // GC as if they were cyclic trash, but we have to handle dead
- // non-GC objects here. Add one to the refcount so that we can
- // decref and deallocate the object once we start the world again.
- op->ob_ref_shared += (1 << _Py_REF_SHARED_SHIFT);
-#ifdef Py_REF_DEBUG
- _Py_IncRefTotal(_PyThreadState_GET());
-#endif
- worklist_push(&state->objs_to_decref, op);
+ if (refcount == 0) {
+ queue_untracked_obj_decref(op, state);
}
}
}
static void
-process_delayed_frees(PyInterpreterState *interp)
+queue_freed_object(PyObject *obj, void *arg)
+{
+ queue_untracked_obj_decref(obj, arg);
+}
+
+static void
+process_delayed_frees(PyInterpreterState *interp, struct collection_state *state)
{
// While we are in a "stop the world" pause, we can observe the latest
// write sequence by advancing the write sequence immediately.
@@ -438,7 +453,7 @@ process_delayed_frees(PyInterpreterState *interp)
}
HEAD_UNLOCK(&_PyRuntime);
- _PyMem_ProcessDelayed((PyThreadState *)current_tstate);
+ _PyMem_ProcessDelayedNoDealloc((PyThreadState *)current_tstate, queue_freed_object, state);
}
// Subtract an incoming reference from the computed "gc_refs" refcount.
@@ -1231,7 +1246,7 @@ gc_collect_internal(PyInterpreterState *interp, struct collection_state *state,
}
HEAD_UNLOCK(&_PyRuntime);
- process_delayed_frees(interp);
+ process_delayed_frees(interp, state);
// Find unreachable objects
int err = deduce_unreachable_heap(interp, state);
@@ -1910,13 +1925,7 @@ PyObject_GC_Del(void *op)
}
record_deallocation(_PyThreadState_GET());
- PyObject *self = (PyObject *)op;
- if (_PyObject_GC_IS_SHARED_INLINE(self)) {
- _PyObject_FreeDelayed(((char *)op)-presize);
- }
- else {
- PyObject_Free(((char *)op)-presize);
- }
+ PyObject_Free(((char *)op)-presize);
}
int