diff options
author | Antoine Pitrou <pitrou@free.fr> | 2017-07-01 17:12:05 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-07-01 17:12:05 (GMT) |
commit | 3024c0529077f5cff0b32dc84b5923c8fba99a87 (patch) | |
tree | ede686f31152576c3fb678df98dca62ae21447ce | |
parent | 48290c1c3023b2386b229f133b8629ffe5e7dd47 (diff) | |
download | cpython-3024c0529077f5cff0b32dc84b5923c8fba99a87.zip cpython-3024c0529077f5cff0b32dc84b5923c8fba99a87.tar.gz cpython-3024c0529077f5cff0b32dc84b5923c8fba99a87.tar.bz2 |
[3.6] bpo-30703: Improve signal delivery (GH-2415) (#2527)
* [3.6] bpo-30703: Improve signal delivery (GH-2415)
* Improve signal delivery
Avoid using Py_AddPendingCall from signal handler, to avoid calling signal-unsafe functions.
* Remove unused function
* Improve comments
* Add stress test
* Adapt for --without-threads
* Add second stress test
* Add NEWS blurb
* Address comments @haypo.
(cherry picked from commit c08177a1ccad2ed0d50898c2731b518c631aed14)
* bpo-30796: Fix failures in signal delivery stress test (#2488)
* bpo-30796: Fix failures in signal delivery stress test
setitimer() can have a poor minimum resolution on some machines,
this would make the test reach its deadline (and a stray signal
could then kill a subsequent test).
* Make sure to clear the itimer after the test
-rw-r--r-- | Include/ceval.h | 1 | ||||
-rw-r--r-- | Lib/test/test_signal.py | 133 | ||||
-rw-r--r-- | Misc/NEWS.d/next/Core and Builtins/2017-06-28-21-07-32.bpo-30703.ULCdFp.rst | 6 | ||||
-rw-r--r-- | Modules/signalmodule.c | 30 | ||||
-rw-r--r-- | Python/ceval.c | 75 |
5 files changed, 207 insertions, 38 deletions
diff --git a/Include/ceval.h b/Include/ceval.h index 1e48272..38d4709 100644 --- a/Include/ceval.h +++ b/Include/ceval.h @@ -46,6 +46,7 @@ PyAPI_FUNC(int) PyEval_MergeCompilerFlags(PyCompilerFlags *cf); #endif PyAPI_FUNC(int) Py_AddPendingCall(int (*func)(void *), void *arg); +PyAPI_FUNC(void) _PyEval_SignalReceived(void); PyAPI_FUNC(int) Py_MakePendingCalls(void); /* Protection against deeply nested recursive calls diff --git a/Lib/test/test_signal.py b/Lib/test/test_signal.py index aa2b8bb..8d63867 100644 --- a/Lib/test/test_signal.py +++ b/Lib/test/test_signal.py @@ -3,11 +3,13 @@ from test import support from contextlib import closing import enum import gc +import os import pickle +import random import select import signal import socket -import struct +import statistics import subprocess import traceback import sys, os, time, errno @@ -955,6 +957,135 @@ class PendingSignalsTests(unittest.TestCase): (exitcode, stdout)) +class StressTest(unittest.TestCase): + """ + Stress signal delivery, especially when a signal arrives in + the middle of recomputing the signal state or executing + previously tripped signal handlers. + """ + + def setsig(self, signum, handler): + old_handler = signal.signal(signum, handler) + self.addCleanup(signal.signal, signum, old_handler) + + def measure_itimer_resolution(self): + N = 20 + times = [] + + def handler(signum=None, frame=None): + if len(times) < N: + times.append(time.perf_counter()) + # 1 µs is the smallest possible timer interval, + # we want to measure what the concrete duration + # will be on this platform + signal.setitimer(signal.ITIMER_REAL, 1e-6) + + self.addCleanup(signal.setitimer, signal.ITIMER_REAL, 0) + self.setsig(signal.SIGALRM, handler) + handler() + while len(times) < N: + time.sleep(1e-3) + + durations = [times[i+1] - times[i] for i in range(len(times) - 1)] + med = statistics.median(durations) + if support.verbose: + print("detected median itimer() resolution: %.6f s." % (med,)) + return med + + def decide_itimer_count(self): + # Some systems have poor setitimer() resolution (for example + # measured around 20 ms. on FreeBSD 9), so decide on a reasonable + # number of sequential timers based on that. + reso = self.measure_itimer_resolution() + if reso <= 1e-4: + return 10000 + elif reso <= 1e-2: + return 100 + else: + self.skipTest("detected itimer resolution (%.3f s.) too high " + "(> 10 ms.) on this platform (or system too busy)" + % (reso,)) + + @unittest.skipUnless(hasattr(signal, "setitimer"), + "test needs setitimer()") + def test_stress_delivery_dependent(self): + """ + This test uses dependent signal handlers. + """ + N = self.decide_itimer_count() + sigs = [] + + def first_handler(signum, frame): + # 1e-6 is the minimum non-zero value for `setitimer()`. + # Choose a random delay so as to improve chances of + # triggering a race condition. Ideally the signal is received + # when inside critical signal-handling routines such as + # Py_MakePendingCalls(). + signal.setitimer(signal.ITIMER_REAL, 1e-6 + random.random() * 1e-5) + + def second_handler(signum=None, frame=None): + sigs.append(signum) + + # Here on Linux, SIGPROF > SIGALRM > SIGUSR1. By using both + # ascending and descending sequences (SIGUSR1 then SIGALRM, + # SIGPROF then SIGALRM), we maximize chances of hitting a bug. + self.setsig(signal.SIGPROF, first_handler) + self.setsig(signal.SIGUSR1, first_handler) + self.setsig(signal.SIGALRM, second_handler) # for ITIMER_REAL + + expected_sigs = 0 + deadline = time.time() + 15.0 + + while expected_sigs < N: + os.kill(os.getpid(), signal.SIGPROF) + expected_sigs += 1 + # Wait for handlers to run to avoid signal coalescing + while len(sigs) < expected_sigs and time.time() < deadline: + time.sleep(1e-5) + + os.kill(os.getpid(), signal.SIGUSR1) + expected_sigs += 1 + while len(sigs) < expected_sigs and time.time() < deadline: + time.sleep(1e-5) + + # All ITIMER_REAL signals should have been delivered to the + # Python handler + self.assertEqual(len(sigs), N, "Some signals were lost") + + @unittest.skipUnless(hasattr(signal, "setitimer"), + "test needs setitimer()") + def test_stress_delivery_simultaneous(self): + """ + This test uses simultaneous signal handlers. + """ + N = self.decide_itimer_count() + sigs = [] + + def handler(signum, frame): + sigs.append(signum) + + self.setsig(signal.SIGUSR1, handler) + self.setsig(signal.SIGALRM, handler) # for ITIMER_REAL + + expected_sigs = 0 + deadline = time.time() + 15.0 + + while expected_sigs < N: + # Hopefully the SIGALRM will be received somewhere during + # initial processing of SIGUSR1. + signal.setitimer(signal.ITIMER_REAL, 1e-6 + random.random() * 1e-5) + os.kill(os.getpid(), signal.SIGUSR1) + + expected_sigs += 2 + # Wait for handlers to run to avoid signal coalescing + while len(sigs) < expected_sigs and time.time() < deadline: + time.sleep(1e-5) + + # All ITIMER_REAL signals should have been delivered to the + # Python handler + self.assertEqual(len(sigs), N, "Some signals were lost") + + def tearDownModule(): support.reap_children() diff --git a/Misc/NEWS.d/next/Core and Builtins/2017-06-28-21-07-32.bpo-30703.ULCdFp.rst b/Misc/NEWS.d/next/Core and Builtins/2017-06-28-21-07-32.bpo-30703.ULCdFp.rst new file mode 100644 index 0000000..9adeb45 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2017-06-28-21-07-32.bpo-30703.ULCdFp.rst @@ -0,0 +1,6 @@ +Improve signal delivery. + +Avoid using Py_AddPendingCall from signal handler, to avoid calling signal- +unsafe functions. The tests I'm adding here fail without the rest of the +patch, on Linux and OS X. This means our signal delivery logic had defects +(some signals could be lost). diff --git a/Modules/signalmodule.c b/Modules/signalmodule.c index 55e4c2a..30990f8 100644 --- a/Modules/signalmodule.c +++ b/Modules/signalmodule.c @@ -193,12 +193,6 @@ It raises KeyboardInterrupt."); static int -checksignals_witharg(void * unused) -{ - return PyErr_CheckSignals(); -} - -static int report_wakeup_write_error(void *data) { int save_errno = errno; @@ -248,17 +242,15 @@ trip_signal(int sig_num) Handlers[sig_num].tripped = 1; - if (!is_tripped) { - /* Set is_tripped after setting .tripped, as it gets - cleared in PyErr_CheckSignals() before .tripped. */ - is_tripped = 1; - Py_AddPendingCall(checksignals_witharg, NULL); - } + /* Set is_tripped after setting .tripped, as it gets + cleared in PyErr_CheckSignals() before .tripped. */ + is_tripped = 1; + _PyEval_SignalReceived(); /* And then write to the wakeup fd *after* setting all the globals and - doing the Py_AddPendingCall. We used to write to the wakeup fd and then - set the flag, but this allowed the following sequence of events - (especially on windows, where trip_signal runs in a new thread): + doing the _PyEval_SignalReceived. We used to write to the wakeup fd + and then set the flag, but this allowed the following sequence of events + (especially on windows, where trip_signal may run in a new thread): - main thread blocks on select([wakeup_fd], ...) - signal arrives @@ -293,6 +285,8 @@ trip_signal(int sig_num) wakeup.send_err_set = 1; wakeup.send_errno = errno; wakeup.send_win_error = GetLastError(); + /* Py_AddPendingCall() isn't signal-safe, but we + still use it for this exceptional case. */ Py_AddPendingCall(report_wakeup_send_error, NULL); } } @@ -306,6 +300,8 @@ trip_signal(int sig_num) rc = _Py_write_noraise(fd, &byte, 1); if (rc < 0) { + /* Py_AddPendingCall() isn't signal-safe, but we + still use it for this exceptional case. */ Py_AddPendingCall(report_wakeup_write_error, (void *)(intptr_t)errno); } @@ -1555,8 +1551,10 @@ PyErr_CheckSignals(void) arglist); Py_DECREF(arglist); } - if (!result) + if (!result) { + is_tripped = 1; return -1; + } Py_DECREF(result); } diff --git a/Python/ceval.c b/Python/ceval.c index ea79f5f..bf9103d 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -195,6 +195,15 @@ PyEval_GetCallStats(PyObject *self) do { pending_async_exc = 0; COMPUTE_EVAL_BREAKER(); } while (0) +/* This single variable consolidates all requests to break out of the fast path + in the eval loop. */ +static _Py_atomic_int eval_breaker = {0}; +/* Request for running pending calls. */ +static _Py_atomic_int pendingcalls_to_do = {0}; +/* Request for looking at the `async_exc` field of the current thread state. + Guarded by the GIL. */ +static int pending_async_exc = 0; + #ifdef WITH_THREAD #ifdef HAVE_ERRNO_H @@ -204,16 +213,8 @@ PyEval_GetCallStats(PyObject *self) static PyThread_type_lock pending_lock = 0; /* for pending calls */ static long main_thread = 0; -/* This single variable consolidates all requests to break out of the fast path - in the eval loop. */ -static _Py_atomic_int eval_breaker = {0}; /* Request for dropping the GIL */ static _Py_atomic_int gil_drop_request = {0}; -/* Request for running pending calls. */ -static _Py_atomic_int pendingcalls_to_do = {0}; -/* Request for looking at the `async_exc` field of the current thread state. - Guarded by the GIL. */ -static int pending_async_exc = 0; #include "ceval_gil.h" @@ -326,9 +327,6 @@ PyEval_ReInitThreads(void) _PyThreadState_DeleteExcept(current_tstate); } -#else -static _Py_atomic_int eval_breaker = {0}; -static int pending_async_exc = 0; #endif /* WITH_THREAD */ /* This function is used to signal that async exceptions are waiting to be @@ -403,6 +401,15 @@ PyEval_RestoreThread(PyThreadState *tstate) #endif */ +void +_PyEval_SignalReceived(void) +{ + /* bpo-30703: Function called when the C signal handler of Python gets a + signal. We cannot queue a callback using Py_AddPendingCall() since + that function is not async-signal-safe. */ + SIGNAL_PENDING_CALLS(); +} + #ifdef WITH_THREAD /* The WITH_THREAD implementation is thread-safe. It allows @@ -467,6 +474,8 @@ Py_MakePendingCalls(void) int i; int r = 0; + assert(PyGILState_Check()); + if (!pending_lock) { /* initial allocation of the lock */ pending_lock = PyThread_allocate_lock(); @@ -481,6 +490,16 @@ Py_MakePendingCalls(void) if (busy) return 0; busy = 1; + /* unsignal before starting to call callbacks, so that any callback + added in-between re-signals */ + UNSIGNAL_PENDING_CALLS(); + + /* Python signal handler doesn't really queue a callback: it only signals + that a signal was received, see _PyEval_SignalReceived(). */ + if (PyErr_CheckSignals() < 0) { + goto error; + } + /* perform a bounded number of calls, in case of recursion */ for (i=0; i<NPENDINGCALLS; i++) { int j; @@ -497,20 +516,23 @@ Py_MakePendingCalls(void) arg = pendingcalls[j].arg; pendingfirst = (j + 1) % NPENDINGCALLS; } - if (pendingfirst != pendinglast) - SIGNAL_PENDING_CALLS(); - else - UNSIGNAL_PENDING_CALLS(); PyThread_release_lock(pending_lock); /* having released the lock, perform the callback */ if (func == NULL) break; r = func(arg); - if (r) - break; + if (r) { + goto error; + } } + busy = 0; return r; + +error: + busy = 0; + SIGNAL_PENDING_CALLS(); /* We're not done yet */ + return -1; } #else /* if ! defined WITH_THREAD */ @@ -545,7 +567,6 @@ static struct { } pendingcalls[NPENDINGCALLS]; static volatile int pendingfirst = 0; static volatile int pendinglast = 0; -static _Py_atomic_int pendingcalls_to_do = {0}; int Py_AddPendingCall(int (*func)(void *), void *arg) @@ -579,7 +600,16 @@ Py_MakePendingCalls(void) if (busy) return 0; busy = 1; + + /* unsignal before starting to call callbacks, so that any callback + added in-between re-signals */ UNSIGNAL_PENDING_CALLS(); + /* Python signal handler doesn't really queue a callback: it only signals + that a signal was received, see _PyEval_SignalReceived(). */ + if (PyErr_CheckSignals() < 0) { + goto error; + } + for (;;) { int i; int (*func)(void *); @@ -591,13 +621,16 @@ Py_MakePendingCalls(void) arg = pendingcalls[i].arg; pendingfirst = (i + 1) % NPENDINGCALLS; if (func(arg) < 0) { - busy = 0; - SIGNAL_PENDING_CALLS(); /* We're not done yet */ - return -1; + goto error: } } busy = 0; return 0; + +error: + busy = 0; + SIGNAL_PENDING_CALLS(); /* We're not done yet */ + return -1; } #endif /* WITH_THREAD */ |