summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Misc/NEWS.d/next/Library/2020-10-16-07-45-35.bpo-35823.SNQo56.rst2
-rw-r--r--Modules/_posixsubprocess.c224
-rwxr-xr-xconfigure2
-rw-r--r--configure.ac2
-rw-r--r--pyconfig.h.in3
5 files changed, 204 insertions, 29 deletions
diff --git a/Misc/NEWS.d/next/Library/2020-10-16-07-45-35.bpo-35823.SNQo56.rst b/Misc/NEWS.d/next/Library/2020-10-16-07-45-35.bpo-35823.SNQo56.rst
new file mode 100644
index 0000000..cd428d3
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2020-10-16-07-45-35.bpo-35823.SNQo56.rst
@@ -0,0 +1,2 @@
+Use ``vfork()`` instead of ``fork()`` for :func:`subprocess.Popen` on Linux
+to improve performance in cases where it is deemed safe.
diff --git a/Modules/_posixsubprocess.c b/Modules/_posixsubprocess.c
index d08c479..ed49857 100644
--- a/Modules/_posixsubprocess.c
+++ b/Modules/_posixsubprocess.c
@@ -36,6 +36,12 @@
# define SYS_getdents64 __NR_getdents64
#endif
+#if defined(__linux__) && defined(HAVE_VFORK) && defined(HAVE_SIGNAL_H) && \
+ defined(HAVE_PTHREAD_SIGMASK) && !defined(HAVE_BROKEN_PTHREAD_SIGMASK)
+# include <signal.h>
+# define VFORK_USABLE 1
+#endif
+
#if defined(__sun) && defined(__SVR4)
/* readdir64 is used to work around Solaris 9 bug 6395699. */
# define readdir readdir64
@@ -407,9 +413,53 @@ _close_open_fds_maybe_unsafe(long start_fd, PyObject* py_fds_to_keep)
#endif /* else NOT (defined(__linux__) && defined(HAVE_SYS_SYSCALL_H)) */
+#ifdef VFORK_USABLE
+/* Reset dispositions for all signals to SIG_DFL except for ignored
+ * signals. This way we ensure that no signal handlers can run
+ * after we unblock signals in a child created by vfork().
+ */
+static void
+reset_signal_handlers(const sigset_t *child_sigmask)
+{
+ struct sigaction sa_dfl = {.sa_handler = SIG_DFL};
+ for (int sig = 1; sig < _NSIG; sig++) {
+ /* Dispositions for SIGKILL and SIGSTOP can't be changed. */
+ if (sig == SIGKILL || sig == SIGSTOP) {
+ continue;
+ }
+
+ /* There is no need to reset the disposition of signals that will
+ * remain blocked across execve() since the kernel will do it. */
+ if (sigismember(child_sigmask, sig) == 1) {
+ continue;
+ }
+
+ struct sigaction sa;
+ /* C libraries usually return EINVAL for signals used
+ * internally (e.g. for thread cancellation), so simply
+ * skip errors here. */
+ if (sigaction(sig, NULL, &sa) == -1) {
+ continue;
+ }
+
+ /* void *h works as these fields are both pointer types already. */
+ void *h = (sa.sa_flags & SA_SIGINFO ? (void *)sa.sa_sigaction :
+ (void *)sa.sa_handler);
+ if (h == SIG_IGN || h == SIG_DFL) {
+ continue;
+ }
+
+ /* This call can't reasonably fail, but if it does, terminating
+ * the child seems to be too harsh, so ignore errors. */
+ (void) sigaction(sig, &sa_dfl, NULL);
+ }
+}
+#endif /* VFORK_USABLE */
+
+
/*
- * This function is code executed in the child process immediately after fork
- * to set things up and call exec().
+ * This function is code executed in the child process immediately after
+ * (v)fork to set things up and call exec().
*
* All of the code in this function must only use async-signal-safe functions,
* listed at `man 7 signal` or
@@ -417,8 +467,28 @@ _close_open_fds_maybe_unsafe(long start_fd, PyObject* py_fds_to_keep)
*
* This restriction is documented at
* http://www.opengroup.org/onlinepubs/009695399/functions/fork.html.
+ *
+ * If this function is called after vfork(), even more care must be taken.
+ * The lack of preparations that C libraries normally take on fork(),
+ * as well as sharing the address space with the parent, might make even
+ * async-signal-safe functions vfork-unsafe. In particular, on Linux,
+ * set*id() and setgroups() library functions must not be called, since
+ * they have to interact with the library-level thread list and send
+ * library-internal signals to implement per-process credentials semantics
+ * required by POSIX but not supported natively on Linux. Another reason to
+ * avoid this family of functions is that sharing an address space between
+ * processes running with different privileges is inherently insecure.
+ * See bpo-35823 for further discussion and references.
+ *
+ * In some C libraries, setrlimit() has the same thread list/signalling
+ * behavior since resource limits were per-thread attributes before
+ * Linux 2.6.10. Musl, as of 1.2.1, is known to have this issue
+ * (https://www.openwall.com/lists/musl/2020/10/15/6).
+ *
+ * If vfork-unsafe functionality is desired after vfork(), consider using
+ * syscall() to obtain it.
*/
-static void
+_Py_NO_INLINE static void
child_exec(char *const exec_array[],
char *const argv[],
char *const envp[],
@@ -432,6 +502,7 @@ child_exec(char *const exec_array[],
int call_setgid, gid_t gid,
int call_setgroups, size_t groups_size, const gid_t *groups,
int call_setuid, uid_t uid, int child_umask,
+ const void *child_sigmask,
PyObject *py_fds_to_keep,
PyObject *preexec_fn,
PyObject *preexec_fn_args_tuple)
@@ -507,6 +578,13 @@ child_exec(char *const exec_array[],
if (restore_signals)
_Py_RestoreSignals();
+#ifdef VFORK_USABLE
+ if (child_sigmask) {
+ reset_signal_handlers(child_sigmask);
+ POSIX_CALL(pthread_sigmask(SIG_SETMASK, child_sigmask, NULL));
+ }
+#endif
+
#ifdef HAVE_SETSID
if (call_setsid)
POSIX_CALL(setsid());
@@ -599,6 +677,81 @@ error:
}
+/* The main purpose of this wrapper function is to isolate vfork() from both
+ * subprocess_fork_exec() and child_exec(). A child process created via
+ * vfork() executes on the same stack as the parent process while the latter is
+ * suspended, so this function should not be inlined to avoid compiler bugs
+ * that might clobber data needed by the parent later. Additionally,
+ * child_exec() should not be inlined to avoid spurious -Wclobber warnings from
+ * GCC (see bpo-35823).
+ */
+_Py_NO_INLINE static pid_t
+do_fork_exec(char *const exec_array[],
+ char *const argv[],
+ char *const envp[],
+ const char *cwd,
+ int p2cread, int p2cwrite,
+ int c2pread, int c2pwrite,
+ int errread, int errwrite,
+ int errpipe_read, int errpipe_write,
+ int close_fds, int restore_signals,
+ int call_setsid,
+ int call_setgid, gid_t gid,
+ int call_setgroups, size_t groups_size, const gid_t *groups,
+ int call_setuid, uid_t uid, int child_umask,
+ const void *child_sigmask,
+ PyObject *py_fds_to_keep,
+ PyObject *preexec_fn,
+ PyObject *preexec_fn_args_tuple)
+{
+
+ pid_t pid;
+
+#ifdef VFORK_USABLE
+ if (child_sigmask) {
+ /* These are checked by our caller; verify them in debug builds. */
+ assert(!call_setsid);
+ assert(!call_setuid);
+ assert(!call_setgid);
+ assert(!call_setgroups);
+ assert(preexec_fn == Py_None);
+
+ pid = vfork();
+ } else
+#endif
+ {
+ pid = fork();
+ }
+
+ if (pid != 0) {
+ return pid;
+ }
+
+ /* Child process.
+ * See the comment above child_exec() for restrictions imposed on
+ * the code below.
+ */
+
+ if (preexec_fn != Py_None) {
+ /* We'll be calling back into Python later so we need to do this.
+ * This call may not be async-signal-safe but neither is calling
+ * back into Python. The user asked us to use hope as a strategy
+ * to avoid deadlock... */
+ PyOS_AfterFork_Child();
+ }
+
+ child_exec(exec_array, argv, envp, cwd,
+ p2cread, p2cwrite, c2pread, c2pwrite,
+ errread, errwrite, errpipe_read, errpipe_write,
+ close_fds, restore_signals, call_setsid,
+ call_setgid, gid, call_setgroups, groups_size, groups,
+ call_setuid, uid, child_umask, child_sigmask,
+ py_fds_to_keep, preexec_fn, preexec_fn_args_tuple);
+ _exit(255);
+ return 0; /* Dead code to avoid a potential compiler warning. */
+}
+
+
static PyObject *
subprocess_fork_exec(PyObject* self, PyObject *args)
{
@@ -836,39 +989,56 @@ subprocess_fork_exec(PyObject* self, PyObject *args)
need_after_fork = 1;
}
- pid = fork();
- if (pid == 0) {
- /* Child process */
- /*
- * Code from here to _exit() must only use async-signal-safe functions,
- * listed at `man 7 signal` or
- * http://www.opengroup.org/onlinepubs/009695399/functions/xsh_chap02_04.html.
+ /* NOTE: When old_sigmask is non-NULL, do_fork_exec() may use vfork(). */
+ const void *old_sigmask = NULL;
+#ifdef VFORK_USABLE
+ /* Use vfork() only if it's safe. See the comment above child_exec(). */
+ sigset_t old_sigs;
+ if (preexec_fn == Py_None &&
+ !call_setuid && !call_setgid && !call_setgroups && !call_setsid) {
+ /* Block all signals to ensure that no signal handlers are run in the
+ * child process while it shares memory with us. Note that signals
+ * used internally by C libraries won't be blocked by
+ * pthread_sigmask(), but signal handlers installed by C libraries
+ * normally service only signals originating from *within the process*,
+ * so it should be sufficient to consider any library function that
+ * might send such a signal to be vfork-unsafe and do not call it in
+ * the child.
*/
+ sigset_t all_sigs;
+ sigfillset(&all_sigs);
+ pthread_sigmask(SIG_BLOCK, &all_sigs, &old_sigs);
+ old_sigmask = &old_sigs;
+ }
+#endif
- if (preexec_fn != Py_None) {
- /* We'll be calling back into Python later so we need to do this.
- * This call may not be async-signal-safe but neither is calling
- * back into Python. The user asked us to use hope as a strategy
- * to avoid deadlock... */
- PyOS_AfterFork_Child();
- }
+ pid = do_fork_exec(exec_array, argv, envp, cwd,
+ p2cread, p2cwrite, c2pread, c2pwrite,
+ errread, errwrite, errpipe_read, errpipe_write,
+ close_fds, restore_signals, call_setsid,
+ call_setgid, gid, call_setgroups, num_groups, groups,
+ call_setuid, uid, child_umask, old_sigmask,
+ py_fds_to_keep, preexec_fn, preexec_fn_args_tuple);
- child_exec(exec_array, argv, envp, cwd,
- p2cread, p2cwrite, c2pread, c2pwrite,
- errread, errwrite, errpipe_read, errpipe_write,
- close_fds, restore_signals, call_setsid,
- call_setgid, gid, call_setgroups, num_groups, groups,
- call_setuid, uid, child_umask,
- py_fds_to_keep, preexec_fn, preexec_fn_args_tuple);
- _exit(255);
- return NULL; /* Dead code to avoid a potential compiler warning. */
- }
/* Parent (original) process */
if (pid == -1) {
/* Capture errno for the exception. */
saved_errno = errno;
}
+#ifdef VFORK_USABLE
+ if (old_sigmask) {
+ /* vfork() semantics guarantees that the parent is blocked
+ * until the child performs _exit() or execve(), so it is safe
+ * to unblock signals once we're here.
+ * Note that in environments where vfork() is implemented as fork(),
+ * such as QEMU user-mode emulation, the parent won't be blocked,
+ * but it won't share the address space with the child,
+ * so it's still safe to unblock the signals. */
+ pthread_sigmask(SIG_SETMASK, old_sigmask, NULL);
+ }
+#endif
+
Py_XDECREF(cwd_obj2);
if (need_after_fork)
diff --git a/configure b/configure
index 29f33b5..bc87485 100755
--- a/configure
+++ b/configure
@@ -11732,7 +11732,7 @@ for ac_func in alarm accept4 setitimer getitimer bind_textdomain_codeset chown \
sigaction sigaltstack sigfillset siginterrupt sigpending sigrelse \
sigtimedwait sigwait sigwaitinfo snprintf strftime strlcpy strsignal symlinkat sync \
sysconf tcgetpgrp tcsetpgrp tempnam timegm times tmpfile tmpnam tmpnam_r \
- truncate uname unlinkat utimensat utimes waitid waitpid wait3 wait4 \
+ truncate uname unlinkat utimensat utimes vfork waitid waitpid wait3 wait4 \
wcscoll wcsftime wcsxfrm wmemcmp writev _getpty rtpSpawn
do :
as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh`
diff --git a/configure.ac b/configure.ac
index 9698c3c..49ed09a 100644
--- a/configure.ac
+++ b/configure.ac
@@ -3690,7 +3690,7 @@ AC_CHECK_FUNCS(alarm accept4 setitimer getitimer bind_textdomain_codeset chown \
sigaction sigaltstack sigfillset siginterrupt sigpending sigrelse \
sigtimedwait sigwait sigwaitinfo snprintf strftime strlcpy strsignal symlinkat sync \
sysconf tcgetpgrp tcsetpgrp tempnam timegm times tmpfile tmpnam tmpnam_r \
- truncate uname unlinkat utimensat utimes waitid waitpid wait3 wait4 \
+ truncate uname unlinkat utimensat utimes vfork waitid waitpid wait3 wait4 \
wcscoll wcsftime wcsxfrm wmemcmp writev _getpty rtpSpawn)
# Force lchmod off for Linux. Linux disallows changing the mode of symbolic
diff --git a/pyconfig.h.in b/pyconfig.h.in
index 298cb4f..af8a3d6 100644
--- a/pyconfig.h.in
+++ b/pyconfig.h.in
@@ -1301,6 +1301,9 @@
/* Define to 1 if you have the <uuid/uuid.h> header file. */
#undef HAVE_UUID_UUID_H
+/* Define to 1 if you have the `vfork' function. */
+#undef HAVE_VFORK
+
/* Define to 1 if you have the `wait3' function. */
#undef HAVE_WAIT3