[3.6] bpo-30703: Improve signal delivery (GH-2415) (#2527)

* [3.6] bpo-30703: Improve signal delivery (GH-2415) * Improve signal delivery Avoid using Py_AddPendingCall from signal handler, to avoid calling signal-unsafe functions. * Remove unused function * Improve comments * Add stress test * Adapt for --without-threads * Add second stress test * Add NEWS blurb * Address comments @haypo. (cherry picked from commit c08177a1ccad2ed0d50898c2731b518c631aed14) * bpo-30796: Fix failures in signal delivery stress test (#2488) * bpo-30796: Fix failures in signal delivery stress test setitimer() can have a poor minimum resolution on some machines, this would make the test reach its deadline (and a stray signal could then kill a subsequent test). * Make sure to clear the itimer after the test
author: Antoine Pitrou <pitrou@free.fr> 2017-07-01 17:12:05 (GMT)
committer: GitHub <noreply@github.com> 2017-07-01 17:12:05 (GMT)
commit: 3024c0529077f5cff0b32dc84b5923c8fba99a87 (patch)
tree: ede686f31152576c3fb678df98dca62ae21447ce
parent: 48290c1c3023b2386b229f133b8629ffe5e7dd47 (diff)
download: cpython-3024c0529077f5cff0b32dc84b5923c8fba99a87.zip
cpython-3024c0529077f5cff0b32dc84b5923c8fba99a87.tar.gz
cpython-3024c0529077f5cff0b32dc84b5923c8fba99a87.tar.bz2
5 files changed, 207 insertions, 38 deletions
diff --git a/Include/ceval.h b/Include/ceval.h
index 1e48272..38d4709 100644
--- a/Include/ceval.h
+++ b/Include/ceval.h
@@ -46,6 +46,7 @@ PyAPI_FUNC(int) PyEval_MergeCompilerFlags(PyCompilerFlags *cf);
 #endif
 
 PyAPI_FUNC(int) Py_AddPendingCall(int (*func)(void *), void *arg);
+PyAPI_FUNC(void) _PyEval_SignalReceived(void);
 PyAPI_FUNC(int) Py_MakePendingCalls(void);
 
 /* Protection against deeply nested recursive calls
diff --git a/Lib/test/test_signal.py b/Lib/test/test_signal.py
index aa2b8bb..8d63867 100644
--- a/Lib/test/test_signal.py
+++ b/Lib/test/test_signal.py
@@ -3,11 +3,13 @@ from test import support
 from contextlib import closing
 import enum
 import gc
+import os
 import pickle
+import random
 import select
 import signal
 import socket
-import struct
+import statistics
 import subprocess
 import traceback
 import sys, os, time, errno
@@ -955,6 +957,135 @@ class PendingSignalsTests(unittest.TestCase):
                                 (exitcode, stdout))
 
 
+class StressTest(unittest.TestCase):
+    """
+    Stress signal delivery, especially when a signal arrives in
+    the middle of recomputing the signal state or executing
+    previously tripped signal handlers.
+    """
+
+    def setsig(self, signum, handler):
+        old_handler = signal.signal(signum, handler)
+        self.addCleanup(signal.signal, signum, old_handler)
+
+    def measure_itimer_resolution(self):
+        N = 20
+        times = []
+
+        def handler(signum=None, frame=None):
+            if len(times) < N:
+                times.append(time.perf_counter())
+                # 1 µs is the smallest possible timer interval,
+                # we want to measure what the concrete duration
+                # will be on this platform
+                signal.setitimer(signal.ITIMER_REAL, 1e-6)
+
+        self.addCleanup(signal.setitimer, signal.ITIMER_REAL, 0)
+        self.setsig(signal.SIGALRM, handler)
+        handler()
+        while len(times) < N:
+            time.sleep(1e-3)
+
+        durations = [times[i+1] - times[i] for i in range(len(times) - 1)]
+        med = statistics.median(durations)
+        if support.verbose:
+            print("detected median itimer() resolution: %.6f s." % (med,))
+        return med
+
+    def decide_itimer_count(self):
+        # Some systems have poor setitimer() resolution (for example
+        # measured around 20 ms. on FreeBSD 9), so decide on a reasonable
+        # number of sequential timers based on that.
+        reso = self.measure_itimer_resolution()
+        if reso <= 1e-4:
+            return 10000
+        elif reso <= 1e-2:
+            return 100
+        else:
+            self.skipTest("detected itimer resolution (%.3f s.) too high "
+                          "(> 10 ms.) on this platform (or system too busy)"
+                          % (reso,))
+
+    @unittest.skipUnless(hasattr(signal, "setitimer"),
+                         "test needs setitimer()")
+    def test_stress_delivery_dependent(self):
+        """
+        This test uses dependent signal handlers.
+        """
+        N = self.decide_itimer_count()
+        sigs = []
+
+        def first_handler(signum, frame):
+            # 1e-6 is the minimum non-zero value for `setitimer()`.
+            # Choose a random delay so as to improve chances of
+            # triggering a race condition.  Ideally the signal is received
+            # when inside critical signal-handling routines such as
+            # Py_MakePendingCalls().
+            signal.setitimer(signal.ITIMER_REAL, 1e-6 + random.random() * 1e-5)
+
+        def second_handler(signum=None, frame=None):
+            sigs.append(signum)
+
+        # Here on Linux, SIGPROF > SIGALRM > SIGUSR1.  By using both
+        # ascending and descending sequences (SIGUSR1 then SIGALRM,
+        # SIGPROF then SIGALRM), we maximize chances of hitting a bug.
+        self.setsig(signal.SIGPROF, first_handler)
+        self.setsig(signal.SIGUSR1, first_handler)
+        self.setsig(signal.SIGALRM, second_handler)  # for ITIMER_REAL
+
+        expected_sigs = 0
+        deadline = time.time() + 15.0
+
+        while expected_sigs < N:
+            os.kill(os.getpid(), signal.SIGPROF)
+            expected_sigs += 1
+            # Wait for handlers to run to avoid signal coalescing
+            while len(sigs) < expected_sigs and time.time() < deadline:
+                time.sleep(1e-5)
+
+            os.kill(os.getpid(), signal.SIGUSR1)
+            expected_sigs += 1
+            while len(sigs) < expected_sigs and time.time() < deadline:
+                time.sleep(1e-5)
+
+        # All ITIMER_REAL signals should have been delivered to the
+        # Python handler
+        self.assertEqual(len(sigs), N, "Some signals were lost")
+
+    @unittest.skipUnless(hasattr(signal, "setitimer"),
+                         "test needs setitimer()")
+    def test_stress_delivery_simultaneous(self):
+        """
+        This test uses simultaneous signal handlers.
+        """
+        N = self.decide_itimer_count()
+        sigs = []
+
+        def handler(signum, frame):
+            sigs.append(signum)
+
+        self.setsig(signal.SIGUSR1, handler)
+        self.setsig(signal.SIGALRM, handler)  # for ITIMER_REAL
+
+        expected_sigs = 0
+        deadline = time.time() + 15.0
+
+        while expected_sigs < N:
+            # Hopefully the SIGALRM will be received somewhere during
+            # initial processing of SIGUSR1.
+            signal.setitimer(signal.ITIMER_REAL, 1e-6 + random.random() * 1e-5)
+            os.kill(os.getpid(), signal.SIGUSR1)
+
+            expected_sigs += 2
+            # Wait for handlers to run to avoid signal coalescing
+            while len(sigs) < expected_sigs and time.time() < deadline:
+                time.sleep(1e-5)
+
+        # All ITIMER_REAL signals should have been delivered to the
+        # Python handler
+        self.assertEqual(len(sigs), N, "Some signals were lost")
+
+
 def tearDownModule():
     support.reap_children()
 
diff --git a/Misc/NEWS.d/next/Core and Builtins/2017-06-28-21-07-32.bpo-30703.ULCdFp.rst b/Misc/NEWS.d/next/Core and Builtins/2017-06-28-21-07-32.bpo-30703.ULCdFp.rst
new file mode 100644
index 0000000..9adeb45
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2017-06-28-21-07-32.bpo-30703.ULCdFp.rst
@@ -0,0 +1,6 @@
+Improve signal delivery.
+
+Avoid using Py_AddPendingCall from signal handler, to avoid calling signal-
+unsafe functions. The tests I'm adding here fail without the rest of the
+patch, on Linux and OS X. This means our signal delivery logic had defects
+(some signals could be lost).
diff --git a/Modules/signalmodule.c b/Modules/signalmodule.c
index 55e4c2a..30990f8 100644
--- a/Modules/signalmodule.c
+++ b/Modules/signalmodule.c
@@ -193,12 +193,6 @@ It raises KeyboardInterrupt.");
 
 
 static int
-checksignals_witharg(void * unused)
-{
-    return PyErr_CheckSignals();
-}
-
-static int
 report_wakeup_write_error(void *data)
 {
     int save_errno = errno;
@@ -248,17 +242,15 @@ trip_signal(int sig_num)
 
     Handlers[sig_num].tripped = 1;
 
-    if (!is_tripped) {
-        /* Set is_tripped after setting .tripped, as it gets
-           cleared in PyErr_CheckSignals() before .tripped. */
-        is_tripped = 1;
-        Py_AddPendingCall(checksignals_witharg, NULL);
-    }
+    /* Set is_tripped after setting .tripped, as it gets
+       cleared in PyErr_CheckSignals() before .tripped. */
+    is_tripped = 1;
+    _PyEval_SignalReceived();
 
     /* And then write to the wakeup fd *after* setting all the globals and
-       doing the Py_AddPendingCall. We used to write to the wakeup fd and then
-       set the flag, but this allowed the following sequence of events
-       (especially on windows, where trip_signal runs in a new thread):
+       doing the _PyEval_SignalReceived. We used to write to the wakeup fd
+       and then set the flag, but this allowed the following sequence of events
+       (especially on windows, where trip_signal may run in a new thread):
 
        - main thread blocks on select([wakeup_fd], ...)
        - signal arrives
@@ -293,6 +285,8 @@ trip_signal(int sig_num)
                 wakeup.send_err_set = 1;
                 wakeup.send_errno = errno;
                 wakeup.send_win_error = GetLastError();
+                /* Py_AddPendingCall() isn't signal-safe, but we
+                   still use it for this exceptional case. */
                 Py_AddPendingCall(report_wakeup_send_error, NULL);
             }
         }
@@ -306,6 +300,8 @@ trip_signal(int sig_num)
             rc = _Py_write_noraise(fd, &byte, 1);
 
             if (rc < 0) {
+                /* Py_AddPendingCall() isn't signal-safe, but we
+                   still use it for this exceptional case. */
                 Py_AddPendingCall(report_wakeup_write_error,
                                   (void *)(intptr_t)errno);
             }
@@ -1555,8 +1551,10 @@ PyErr_CheckSignals(void)
                                            arglist);
                 Py_DECREF(arglist);
             }
-            if (!result)
+            if (!result) {
+                is_tripped = 1;
                 return -1;
+            }
 
             Py_DECREF(result);
         }
diff --git a/Python/ceval.c b/Python/ceval.c
index ea79f5f..bf9103d 100644
--- a/Python/ceval.c
+++ b/Python/ceval.c
@@ -195,6 +195,15 @@ PyEval_GetCallStats(PyObject *self)
     do { pending_async_exc = 0; COMPUTE_EVAL_BREAKER(); } while (0)
 
 
+/* This single variable consolidates all requests to break out of the fast path
+   in the eval loop. */
+static _Py_atomic_int eval_breaker = {0};
+/* Request for running pending calls. */
+static _Py_atomic_int pendingcalls_to_do = {0};
+/* Request for looking at the `async_exc` field of the current thread state.
+   Guarded by the GIL. */
+static int pending_async_exc = 0;
+
 #ifdef WITH_THREAD
 
 #ifdef HAVE_ERRNO_H
@@ -204,16 +213,8 @@ PyEval_GetCallStats(PyObject *self)
 
 static PyThread_type_lock pending_lock = 0; /* for pending calls */
 static long main_thread = 0;
-/* This single variable consolidates all requests to break out of the fast path
-   in the eval loop. */
-static _Py_atomic_int eval_breaker = {0};
 /* Request for dropping the GIL */
 static _Py_atomic_int gil_drop_request = {0};
-/* Request for running pending calls. */
-static _Py_atomic_int pendingcalls_to_do = {0};
-/* Request for looking at the `async_exc` field of the current thread state.
-   Guarded by the GIL. */
-static int pending_async_exc = 0;
 
 #include "ceval_gil.h"
 
@@ -326,9 +327,6 @@ PyEval_ReInitThreads(void)
     _PyThreadState_DeleteExcept(current_tstate);
 }
 
-#else
-static _Py_atomic_int eval_breaker = {0};
-static int pending_async_exc = 0;
 #endif /* WITH_THREAD */
 
 /* This function is used to signal that async exceptions are waiting to be
@@ -403,6 +401,15 @@ PyEval_RestoreThread(PyThreadState *tstate)
 #endif
 */
 
+void
+_PyEval_SignalReceived(void)
+{
+    /* bpo-30703: Function called when the C signal handler of Python gets a
+       signal. We cannot queue a callback using Py_AddPendingCall() since
+       that function is not async-signal-safe. */
+    SIGNAL_PENDING_CALLS();
+}
+
 #ifdef WITH_THREAD
 
 /* The WITH_THREAD implementation is thread-safe.  It allows
@@ -467,6 +474,8 @@ Py_MakePendingCalls(void)
     int i;
     int r = 0;
 
+    assert(PyGILState_Check());
+
     if (!pending_lock) {
         /* initial allocation of the lock */
         pending_lock = PyThread_allocate_lock();
@@ -481,6 +490,16 @@ Py_MakePendingCalls(void)
     if (busy)
         return 0;
     busy = 1;
+    /* unsignal before starting to call callbacks, so that any callback
+       added in-between re-signals */
+    UNSIGNAL_PENDING_CALLS();
+
+    /* Python signal handler doesn't really queue a callback: it only signals
+       that a signal was received, see _PyEval_SignalReceived(). */
+    if (PyErr_CheckSignals() < 0) {
+        goto error;
+    }
+
     /* perform a bounded number of calls, in case of recursion */
     for (i=0; i<NPENDINGCALLS; i++) {
         int j;
@@ -497,20 +516,23 @@ Py_MakePendingCalls(void)
             arg = pendingcalls[j].arg;
             pendingfirst = (j + 1) % NPENDINGCALLS;
         }
-        if (pendingfirst != pendinglast)
-            SIGNAL_PENDING_CALLS();
-        else
-            UNSIGNAL_PENDING_CALLS();
         PyThread_release_lock(pending_lock);
         /* having released the lock, perform the callback */
         if (func == NULL)
             break;
         r = func(arg);
-        if (r)
-            break;
+        if (r) {
+            goto error;
+        }
     }
+
     busy = 0;
     return r;
+
+error:
+    busy = 0;
+    SIGNAL_PENDING_CALLS(); /* We're not done yet */
+    return -1;
 }
 
 #else /* if ! defined WITH_THREAD */
@@ -545,7 +567,6 @@ static struct {
 } pendingcalls[NPENDINGCALLS];
 static volatile int pendingfirst = 0;
 static volatile int pendinglast = 0;
-static _Py_atomic_int pendingcalls_to_do = {0};
 
 int
 Py_AddPendingCall(int (*func)(void *), void *arg)
@@ -579,7 +600,16 @@ Py_MakePendingCalls(void)
     if (busy)
         return 0;
     busy = 1;
+
+    /* unsignal before starting to call callbacks, so that any callback
+       added in-between re-signals */
     UNSIGNAL_PENDING_CALLS();
+    /* Python signal handler doesn't really queue a callback: it only signals
+       that a signal was received, see _PyEval_SignalReceived(). */
+    if (PyErr_CheckSignals() < 0) {
+        goto error;
+    }
+
     for (;;) {
         int i;
         int (*func)(void *);
@@ -591,13 +621,16 @@ Py_MakePendingCalls(void)
         arg = pendingcalls[i].arg;
         pendingfirst = (i + 1) % NPENDINGCALLS;
         if (func(arg) < 0) {
-            busy = 0;
-            SIGNAL_PENDING_CALLS(); /* We're not done yet */
-            return -1;
+            goto error:
         }
     }
     busy = 0;
     return 0;
+
+error:
+    busy = 0;
+    SIGNAL_PENDING_CALLS(); /* We're not done yet */
+    return -1;
 }
 
 #endif /* WITH_THREAD */
author	Antoine Pitrou <pitrou@free.fr>	2017-07-01 17:12:05 (GMT)
committer	GitHub <noreply@github.com>	2017-07-01 17:12:05 (GMT)
commit	3024c0529077f5cff0b32dc84b5923c8fba99a87 (patch)
tree	ede686f31152576c3fb678df98dca62ae21447ce
parent	48290c1c3023b2386b229f133b8629ffe5e7dd47 (diff)
download	cpython-3024c0529077f5cff0b32dc84b5923c8fba99a87.zip cpython-3024c0529077f5cff0b32dc84b5923c8fba99a87.tar.gz cpython-3024c0529077f5cff0b32dc84b5923c8fba99a87.tar.bz2