diff options
| author | mpage <mpage@meta.com> | 2024-03-16 12:56:30 (GMT) |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-03-16 12:56:30 (GMT) |
| commit | 33da0e844c922b3dcded75fbb9b7be67cb013a17 (patch) | |
| tree | 940ddc29bb00e450f9a984e01bff7674f929543e /Lib/test/test_threading.py | |
| parent | 86bc40dd414bceb3f93382cc9f670936de9d68be (diff) | |
| download | cpython-33da0e844c922b3dcded75fbb9b7be67cb013a17.zip cpython-33da0e844c922b3dcded75fbb9b7be67cb013a17.tar.gz cpython-33da0e844c922b3dcded75fbb9b7be67cb013a17.tar.bz2 | |
gh-114271: Fix race in `Thread.join()` (#114839)
There is a race between when `Thread._tstate_lock` is released[^1] in `Thread._wait_for_tstate_lock()`
and when `Thread._stop()` asserts[^2] that it is unlocked. Consider the following execution
involving threads A, B, and C:
1. A starts.
2. B joins A, blocking on its `_tstate_lock`.
3. C joins A, blocking on its `_tstate_lock`.
4. A finishes and releases its `_tstate_lock`.
5. B acquires A's `_tstate_lock` in `_wait_for_tstate_lock()`, releases it, but is swapped
out before calling `_stop()`.
6. C is scheduled, acquires A's `_tstate_lock` in `_wait_for_tstate_lock()` but is swapped
out before releasing it.
7. B is scheduled, calls `_stop()`, which asserts that A's `_tstate_lock` is not held.
However, C holds it, so the assertion fails.
The race can be reproduced[^3] by inserting sleeps at the appropriate points in
the threading code. To do so, run the `repro_join_race.py` from the linked repo.
There are two main parts to this PR:
1. `_tstate_lock` is replaced with an event that is attached to `PyThreadState`.
The event is set by the runtime prior to the thread being cleared (in the same
place that `_tstate_lock` was released). `Thread.join()` blocks waiting for the
event to be set.
2. `_PyInterpreterState_WaitForThreads()` provides the ability to wait for all
non-daemon threads to exit. To do so, an `is_daemon` predicate was added to
`PyThreadState`. This field is set each time a thread is created. `threading._shutdown()`
now calls into `_PyInterpreterState_WaitForThreads()` instead of waiting on
`_tstate_lock`s.
[^1]: https://github.com/python/cpython/blob/441affc9e7f419ef0b68f734505fa2f79fe653c7/Lib/threading.py#L1201
[^2]: https://github.com/python/cpython/blob/441affc9e7f419ef0b68f734505fa2f79fe653c7/Lib/threading.py#L1115
[^3]: https://github.com/mpage/cpython/commit/81946532792f938cd6f6ab4c4ff92a4edf61314f
---------
Co-authored-by: blurb-it[bot] <43283697+blurb-it[bot]@users.noreply.github.com>
Co-authored-by: Antoine Pitrou <antoine@python.org>
Diffstat (limited to 'Lib/test/test_threading.py')
| -rw-r--r-- | Lib/test/test_threading.py | 61 |
1 files changed, 1 insertions, 60 deletions
diff --git a/Lib/test/test_threading.py b/Lib/test/test_threading.py index 8868666..f1dc129 100644 --- a/Lib/test/test_threading.py +++ b/Lib/test/test_threading.py @@ -408,7 +408,7 @@ class ThreadTests(BaseTestCase): def test_limbo_cleanup(self): # Issue 7481: Failure to start thread should cleanup the limbo map. - def fail_new_thread(*args): + def fail_new_thread(*args, **kwargs): raise threading.ThreadError() _start_joinable_thread = threading._start_joinable_thread threading._start_joinable_thread = fail_new_thread @@ -912,41 +912,6 @@ class ThreadTests(BaseTestCase): rc, out, err = assert_python_ok("-c", code) self.assertEqual(err, b"") - def test_tstate_lock(self): - # Test an implementation detail of Thread objects. - started = _thread.allocate_lock() - finish = _thread.allocate_lock() - started.acquire() - finish.acquire() - def f(): - started.release() - finish.acquire() - time.sleep(0.01) - # The tstate lock is None until the thread is started - t = threading.Thread(target=f) - self.assertIs(t._tstate_lock, None) - t.start() - started.acquire() - self.assertTrue(t.is_alive()) - # The tstate lock can't be acquired when the thread is running - # (or suspended). - tstate_lock = t._tstate_lock - self.assertFalse(tstate_lock.acquire(timeout=0), False) - finish.release() - # When the thread ends, the state_lock can be successfully - # acquired. - self.assertTrue(tstate_lock.acquire(timeout=support.SHORT_TIMEOUT), False) - # But is_alive() is still True: we hold _tstate_lock now, which - # prevents is_alive() from knowing the thread's end-of-life C code - # is done. - self.assertTrue(t.is_alive()) - # Let is_alive() find out the C code is done. - tstate_lock.release() - self.assertFalse(t.is_alive()) - # And verify the thread disposed of _tstate_lock. - self.assertIsNone(t._tstate_lock) - t.join() - def test_repr_stopped(self): # Verify that "stopped" shows up in repr(Thread) appropriately. started = _thread.allocate_lock() @@ -1112,30 +1077,6 @@ class ThreadTests(BaseTestCase): self.assertEqual(threading.getprofile(), old_profile) self.assertEqual(sys.getprofile(), old_profile) - @cpython_only - def test_shutdown_locks(self): - for daemon in (False, True): - with self.subTest(daemon=daemon): - event = threading.Event() - thread = threading.Thread(target=event.wait, daemon=daemon) - - # Thread.start() must add lock to _shutdown_locks, - # but only for non-daemon thread - thread.start() - tstate_lock = thread._tstate_lock - if not daemon: - self.assertIn(tstate_lock, threading._shutdown_locks) - else: - self.assertNotIn(tstate_lock, threading._shutdown_locks) - - # unblock the thread and join it - event.set() - thread.join() - - # Thread._stop() must remove tstate_lock from _shutdown_locks. - # Daemon threads must never add it to _shutdown_locks. - self.assertNotIn(tstate_lock, threading._shutdown_locks) - def test_locals_at_exit(self): # bpo-19466: thread locals must not be deleted before destructors # are called |
