bpo-6721: Hold logging locks across fork() (GH-4071)

bpo-6721: When os.fork() was called while another thread holds a logging lock, the child process may deadlock when it tries to log. This fixes that by acquiring all logging locks before fork and releasing them afterwards. A regression test that fails before this change is included. Within the new unittest itself: There is a small _potential_ due to mixing of fork and a thread in the child process if the parent's thread happened to hold a non-reentrant library call lock (malloc?) when the os.fork() happens. buildbots and time will tell if this actually manifests itself in this test or not. :/ A functionality test that avoids that would be a challenge. An alternate test that isn't trying to produce the deadlock itself but just checking that the release and acquire calls are made would be the next best alternative if so.
author: Gregory P. Smith <greg@krypto.org> 2018-09-14 05:08:31 (GMT)
committer: GitHub <noreply@github.com> 2018-09-14 05:08:31 (GMT)
commit: 19003841e965bbf56fd06824d6093620c1b66f9e (patch)
tree: 626ed792088f60c7191d4815e847ad0925593b7d /Lib/test/test_logging.py
parent: ea13740a37347d68d096b11b87c9167917ccfc22 (diff)
download: cpython-19003841e965bbf56fd06824d6093620c1b66f9e.zip
cpython-19003841e965bbf56fd06824d6093620c1b66f9e.tar.gz
cpython-19003841e965bbf56fd06824d6093620c1b66f9e.tar.bz2
1 files changed, 67 insertions, 0 deletions
diff --git a/Lib/test/test_logging.py b/Lib/test/test_logging.py
index f7232f6..b9dad64 100644
--- a/Lib/test/test_logging.py
+++ b/Lib/test/test_logging.py
@@ -35,6 +35,7 @@ import os
 import queue
 import random
 import re
+import signal
 import socket
 import struct
 import sys
@@ -666,6 +667,72 @@ class HandlerTest(BaseTest):
                 if os.path.exists(fn):
                     os.unlink(fn)
 
+    # The implementation relies on os.register_at_fork existing, but we test
+    # based on os.fork existing because that is what users and this test use.
+    # This helps ensure that when fork exists (the important concept) that the
+    # register_at_fork mechanism is also present and used.
+    @unittest.skipIf(not hasattr(os, 'fork'), 'Test requires os.fork().')
+    def test_post_fork_child_no_deadlock(self):
+        """Ensure forked child logging locks are not held; bpo-6721."""
+        refed_h = logging.Handler()
+        refed_h.name = 'because we need at least one for this test'
+        self.assertGreater(len(logging._handlers), 0)
+
+        locks_held__ready_to_fork = threading.Event()
+        fork_happened__release_locks_and_end_thread = threading.Event()
+
+        def lock_holder_thread_fn():
+            logging._acquireLock()
+            try:
+                refed_h.acquire()
+                try:
+                    # Tell the main thread to do the fork.
+                    locks_held__ready_to_fork.set()
+
+                    # If the deadlock bug exists, the fork will happen
+                    # without dealing with the locks we hold, deadlocking
+                    # the child.
+
+                    # Wait for a successful fork or an unreasonable amount of
+                    # time before releasing our locks.  To avoid a timing based
+                    # test we'd need communication from os.fork() as to when it
+                    # has actually happened.  Given this is a regression test
+                    # for a fixed issue, potentially less reliably detecting
+                    # regression via timing is acceptable for simplicity.
+                    # The test will always take at least this long. :(
+                    fork_happened__release_locks_and_end_thread.wait(0.5)
+                finally:
+                    refed_h.release()
+            finally:
+                logging._releaseLock()
+
+        lock_holder_thread = threading.Thread(
+                target=lock_holder_thread_fn,
+                name='test_post_fork_child_no_deadlock lock holder')
+        lock_holder_thread.start()
+
+        locks_held__ready_to_fork.wait()
+        pid = os.fork()
+        if pid == 0:  # Child.
+            logging.error(r'Child process did not deadlock. \o/')
+            os._exit(0)
+        else:  # Parent.
+            fork_happened__release_locks_and_end_thread.set()
+            lock_holder_thread.join()
+            start_time = time.monotonic()
+            while True:
+                waited_pid, status = os.waitpid(pid, os.WNOHANG)
+                if waited_pid == pid:
+                    break  # child process exited.
+                if time.monotonic() - start_time > 7:
+                    break  # so long? implies child deadlock.
+                time.sleep(0.05)
+            if waited_pid != pid:
+                os.kill(pid, signal.SIGKILL)
+                waited_pid, status = os.waitpid(pid, 0)
+                self.fail("child process deadlocked.")
+            self.assertEqual(status, 0, msg="child process error")
+
 
 class BadStream(object):
     def write(self, data):
author	Gregory P. Smith <greg@krypto.org>	2018-09-14 05:08:31 (GMT)
committer	GitHub <noreply@github.com>	2018-09-14 05:08:31 (GMT)
commit	19003841e965bbf56fd06824d6093620c1b66f9e (patch)
tree	626ed792088f60c7191d4815e847ad0925593b7d /Lib/test/test_logging.py
parent	ea13740a37347d68d096b11b87c9167917ccfc22 (diff)
download	cpython-19003841e965bbf56fd06824d6093620c1b66f9e.zip cpython-19003841e965bbf56fd06824d6093620c1b66f9e.tar.gz cpython-19003841e965bbf56fd06824d6093620c1b66f9e.tar.bz2