summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNick Coghlan <ncoghlan@gmail.com>2013-12-17 12:17:26 (GMT)
committerNick Coghlan <ncoghlan@gmail.com>2013-12-17 12:17:26 (GMT)
commit9a76735b1dd427f4b7d6712ca3bb2b8958e689e8 (patch)
treecf7c4136675644172eb366d5ee6468b399c9880c
parent7cff4cd7e9f45cc4d2a1ee469c133b09e9ed7633 (diff)
downloadcpython-9a76735b1dd427f4b7d6712ca3bb2b8958e689e8.zip
cpython-9a76735b1dd427f4b7d6712ca3bb2b8958e689e8.tar.gz
cpython-9a76735b1dd427f4b7d6712ca3bb2b8958e689e8.tar.bz2
Close #19946: use runpy as needed in multiprocessing
- handles main files without a suffix - handles main submodules properly - adds test cases for the various kinds of __main__
-rw-r--r--Doc/whatsnew/3.4.rst15
-rw-r--r--Lib/multiprocessing/spawn.py121
-rw-r--r--Lib/test/test_multiprocessing_main_handling.py287
-rw-r--r--Misc/NEWS6
4 files changed, 375 insertions, 54 deletions
diff --git a/Doc/whatsnew/3.4.rst b/Doc/whatsnew/3.4.rst
index 4cbc1e4..5a4fa86 100644
--- a/Doc/whatsnew/3.4.rst
+++ b/Doc/whatsnew/3.4.rst
@@ -624,13 +624,22 @@ mmap objects can now be weakref'ed.
multiprocessing
---------------
-On Unix two new *start methods* have been added for starting processes
-using :mod:`multiprocessing`. These make the mixing of processes with
-threads more robust. See :issue:`8713`.
+On Unix, two new *start methods* (``spawn`` and ``forkserver``) have been
+added for starting processes using :mod:`multiprocessing`. These make
+the mixing of processes with threads more robust, and the ``spawn``
+method matches the semantics that multiprocessing has always used on
+Windows. (Contributed by Richard Oudkerk in :issue:`8713`).
Also, except when using the old *fork* start method, child processes
will no longer inherit unneeded handles/file descriptors from their parents.
+:mod:`multiprocessing` now relies on :mod:`runpy` (which implements the
+``-m`` switch) to initialise ``__main__`` appropriately in child processes
+when using the ``spawn`` or ``forkserver`` start methods. This resolves some
+edge cases where combining multiprocessing, the ``-m`` command line switch
+and explicit relative imports could cause obscure failures in child
+processes. (Contributed by Nick Coghlan in :issue:`19946`)
+
os
--
diff --git a/Lib/multiprocessing/spawn.py b/Lib/multiprocessing/spawn.py
index c3adfc1..c8297f3 100644
--- a/Lib/multiprocessing/spawn.py
+++ b/Lib/multiprocessing/spawn.py
@@ -11,6 +11,8 @@
import os
import pickle
import sys
+import runpy
+import types
from . import get_start_method, set_start_method
from . import process
@@ -157,15 +159,19 @@ def get_preparation_data(name):
start_method=get_start_method(),
)
- if sys.platform != 'win32' or (not WINEXE and not WINSERVICE):
- main_path = getattr(sys.modules['__main__'], '__file__', None)
- if not main_path and sys.argv[0] not in ('', '-c'):
- main_path = sys.argv[0]
+ # Figure out whether to initialise main in the subprocess as a module
+ # or through direct execution (or to leave it alone entirely)
+ main_module = sys.modules['__main__']
+ main_mod_name = getattr(main_module.__spec__, "name", None)
+ if main_mod_name is not None:
+ d['init_main_from_name'] = main_mod_name
+ elif sys.platform != 'win32' or (not WINEXE and not WINSERVICE):
+ main_path = getattr(main_module, '__file__', None)
if main_path is not None:
if (not os.path.isabs(main_path) and
process.ORIGINAL_DIR is not None):
main_path = os.path.join(process.ORIGINAL_DIR, main_path)
- d['main_path'] = os.path.normpath(main_path)
+ d['init_main_from_path'] = os.path.normpath(main_path)
return d
@@ -206,55 +212,68 @@ def prepare(data):
if 'start_method' in data:
set_start_method(data['start_method'])
- if 'main_path' in data:
- import_main_path(data['main_path'])
+ if 'init_main_from_name' in data:
+ _fixup_main_from_name(data['init_main_from_name'])
+ elif 'init_main_from_path' in data:
+ _fixup_main_from_path(data['init_main_from_path'])
+
+# Multiprocessing module helpers to fix up the main module in
+# spawned subprocesses
+def _fixup_main_from_name(mod_name):
+ # __main__.py files for packages, directories, zip archives, etc, run
+ # their "main only" code unconditionally, so we don't even try to
+ # populate anything in __main__, nor do we make any changes to
+ # __main__ attributes
+ current_main = sys.modules['__main__']
+ if mod_name == "__main__" or mod_name.endswith(".__main__"):
+ return
+
+ # If this process was forked, __main__ may already be populated
+ if getattr(current_main.__spec__, "name", None) == mod_name:
+ return
+
+ # Otherwise, __main__ may contain some non-main code where we need to
+ # support unpickling it properly. We rerun it as __mp_main__ and make
+ # the normal __main__ an alias to that
+ old_main_modules.append(current_main)
+ main_module = types.ModuleType("__mp_main__")
+ main_content = runpy.run_module(mod_name,
+ run_name="__mp_main__",
+ alter_sys=True)
+ main_module.__dict__.update(main_content)
+ sys.modules['__main__'] = sys.modules['__mp_main__'] = main_module
+
+
+def _fixup_main_from_path(main_path):
+ # If this process was forked, __main__ may already be populated
+ current_main = sys.modules['__main__']
+
+ # Unfortunately, the main ipython launch script historically had no
+ # "if __name__ == '__main__'" guard, so we work around that
+ # by treating it like a __main__.py file
+ # See https://github.com/ipython/ipython/issues/4698
+ main_name = os.path.splitext(os.path.basename(main_path))[0]
+ if main_name == 'ipython':
+ return
+
+ # Otherwise, if __file__ already has the setting we expect,
+ # there's nothing more to do
+ if getattr(current_main, '__file__', None) == main_path:
+ return
+
+ # If the parent process has sent a path through rather than a module
+ # name we assume it is an executable script that may contain
+ # non-main code that needs to be executed
+ old_main_modules.append(current_main)
+ main_module = types.ModuleType("__mp_main__")
+ main_content = runpy.run_path(main_path,
+ run_name="__mp_main__")
+ main_module.__dict__.update(main_content)
+ sys.modules['__main__'] = sys.modules['__mp_main__'] = main_module
def import_main_path(main_path):
'''
Set sys.modules['__main__'] to module at main_path
'''
- # XXX (ncoghlan): The following code makes several bogus
- # assumptions regarding the relationship between __file__
- # and a module's real name. See PEP 302 and issue #10845
- if getattr(sys.modules['__main__'], '__file__', None) == main_path:
- return
-
- main_name = os.path.splitext(os.path.basename(main_path))[0]
- if main_name == '__init__':
- main_name = os.path.basename(os.path.dirname(main_path))
-
- if main_name == '__main__':
- main_module = sys.modules['__main__']
- main_module.__file__ = main_path
- elif main_name != 'ipython':
- # Main modules not actually called __main__.py may
- # contain additional code that should still be executed
- import importlib
- import types
-
- if main_path is None:
- dirs = None
- elif os.path.basename(main_path).startswith('__init__.py'):
- dirs = [os.path.dirname(os.path.dirname(main_path))]
- else:
- dirs = [os.path.dirname(main_path)]
-
- assert main_name not in sys.modules, main_name
- sys.modules.pop('__mp_main__', None)
- # We should not try to load __main__
- # since that would execute 'if __name__ == "__main__"'
- # clauses, potentially causing a psuedo fork bomb.
- main_module = types.ModuleType(main_name)
- # XXX Use a target of main_module?
- spec = importlib.find_spec(main_name, path=dirs)
- if spec is None:
- raise ImportError(name=main_name)
- methods = importlib._bootstrap._SpecMethods(spec)
- methods.init_module_attrs(main_module)
- main_module.__name__ = '__mp_main__'
- code = spec.loader.get_code(main_name)
- exec(code, main_module.__dict__)
-
- old_main_modules.append(sys.modules['__main__'])
- sys.modules['__main__'] = sys.modules['__mp_main__'] = main_module
+ _fixup_main_from_path(main_path)
diff --git a/Lib/test/test_multiprocessing_main_handling.py b/Lib/test/test_multiprocessing_main_handling.py
new file mode 100644
index 0000000..007fc54
--- /dev/null
+++ b/Lib/test/test_multiprocessing_main_handling.py
@@ -0,0 +1,287 @@
+# tests __main__ module handling in multiprocessing
+
+import importlib
+import importlib.machinery
+import zipimport
+import unittest
+import sys
+import os
+import os.path
+import py_compile
+
+from test import support
+from test.script_helper import (
+ make_pkg, make_script, make_zip_pkg, make_zip_script,
+ assert_python_ok, assert_python_failure, temp_dir,
+ spawn_python, kill_python)
+
+# We look inside the context module to find out which
+# start methods we can check
+from multiprocessing.context import _concrete_contexts
+
+verbose = support.verbose
+
+test_source = """\
+# multiprocessing includes all sorts of shenanigans to make __main__
+# attributes accessible in the subprocess in a pickle compatible way.
+
+# We run the "doesn't work in the interactive interpreter" example from
+# the docs to make sure it *does* work from an executed __main__,
+# regardless of the invocation mechanism
+
+import sys
+import time
+from multiprocessing import Pool, set_start_method
+
+# We use this __main__ defined function in the map call below in order to
+# check that multiprocessing in correctly running the unguarded
+# code in child processes and then making it available as __main__
+def f(x):
+ return x*x
+
+# Check explicit relative imports
+if "check_sibling" in __file__:
+ # We're inside a package and not in a __main__.py file
+ # so make sure explicit relative imports work correctly
+ from . import sibling
+
+if __name__ == '__main__':
+ start_method = sys.argv[1]
+ set_start_method(start_method)
+ p = Pool(5)
+ results = []
+ p.map_async(f, [1, 2, 3], callback=results.extend)
+ deadline = time.time() + 2 # up to 2 s to report the results
+ while not results:
+ time.sleep(0.05)
+ if time.time() > deadline:
+ raise RuntimeError("Timed out waiting for results")
+ results.sort()
+ print(start_method, "->", results)
+"""
+
+test_source_main_skipped_in_children = """\
+# __main__.py files have an implied "if __name__ == '__main__'" so
+# multiprocessing should always skip running them in child processes
+
+# This means we can't use __main__ defined functions in child processes,
+# so we just use "int" as a passthrough operation below
+
+if __name__ != "__main__":
+ raise RuntimeError("Should only be called as __main__!")
+
+import sys
+import time
+from multiprocessing import Pool, set_start_method
+
+start_method = sys.argv[1]
+set_start_method(start_method)
+p = Pool(5)
+results = []
+p.map_async(int, [1, 4, 9], callback=results.extend)
+deadline = time.time() + 2 # up to 2 s to report the results
+while not results:
+ time.sleep(0.05)
+ if time.time() > deadline:
+ raise RuntimeError("Timed out waiting for results")
+results.sort()
+print(start_method, "->", results)
+"""
+
+# These helpers were copied from test_cmd_line_script & tweaked a bit...
+
+def _make_test_script(script_dir, script_basename,
+ source=test_source, omit_suffix=False):
+ to_return = make_script(script_dir, script_basename,
+ source, omit_suffix)
+ # Hack to check explicit relative imports
+ if script_basename == "check_sibling":
+ make_script(script_dir, "sibling", "")
+ importlib.invalidate_caches()
+ return to_return
+
+def _make_test_zip_pkg(zip_dir, zip_basename, pkg_name, script_basename,
+ source=test_source, depth=1):
+ to_return = make_zip_pkg(zip_dir, zip_basename, pkg_name, script_basename,
+ source, depth)
+ importlib.invalidate_caches()
+ return to_return
+
+# There's no easy way to pass the script directory in to get
+# -m to work (avoiding that is the whole point of making
+# directories and zipfiles executable!)
+# So we fake it for testing purposes with a custom launch script
+launch_source = """\
+import sys, os.path, runpy
+sys.path.insert(0, %s)
+runpy._run_module_as_main(%r)
+"""
+
+def _make_launch_script(script_dir, script_basename, module_name, path=None):
+ if path is None:
+ path = "os.path.dirname(__file__)"
+ else:
+ path = repr(path)
+ source = launch_source % (path, module_name)
+ to_return = make_script(script_dir, script_basename, source)
+ importlib.invalidate_caches()
+ return to_return
+
+class MultiProcessingCmdLineMixin():
+ maxDiff = None # Show full tracebacks on subprocess failure
+
+ def setupClass(cls):
+ if cls.start_method not in _concrete_contexts:
+ raise unittest.SkipTest("%r start method not available" %
+ cls.start_method)
+
+ def _check_output(self, script_name, exit_code, out, err):
+ if verbose > 1:
+ print("Output from test script %r:" % script_name)
+ print(out)
+ self.assertEqual(exit_code, 0)
+ self.assertEqual(err.decode('utf-8'), '')
+ expected_results = "%s -> [1, 4, 9]" % self.start_method
+ self.assertEqual(out.decode('utf-8').strip(), expected_results)
+
+ def _check_script(self, script_name, *cmd_line_switches):
+ if not __debug__:
+ cmd_line_switches += ('-' + 'O' * sys.flags.optimize,)
+ run_args = cmd_line_switches + (script_name, self.start_method)
+ rc, out, err = assert_python_ok(*run_args, __isolated=False)
+ self._check_output(script_name, rc, out, err)
+
+ def test_basic_script(self):
+ with temp_dir() as script_dir:
+ script_name = _make_test_script(script_dir, 'script')
+ self._check_script(script_name)
+
+ def test_basic_script_no_suffix(self):
+ with temp_dir() as script_dir:
+ script_name = _make_test_script(script_dir, 'script',
+ omit_suffix=True)
+ self._check_script(script_name)
+
+ def test_ipython_workaround(self):
+ # Some versions of the IPython launch script are missing the
+ # __name__ = "__main__" guard, and multiprocessing has long had
+ # a workaround for that case
+ # See https://github.com/ipython/ipython/issues/4698
+ source = test_source_main_skipped_in_children
+ with temp_dir() as script_dir:
+ script_name = _make_test_script(script_dir, 'ipython',
+ source=source)
+ self._check_script(script_name)
+ script_no_suffix = _make_test_script(script_dir, 'ipython',
+ source=source,
+ omit_suffix=True)
+ self._check_script(script_no_suffix)
+
+ def test_script_compiled(self):
+ with temp_dir() as script_dir:
+ script_name = _make_test_script(script_dir, 'script')
+ py_compile.compile(script_name, doraise=True)
+ os.remove(script_name)
+ pyc_file = support.make_legacy_pyc(script_name)
+ self._check_script(pyc_file)
+
+ def test_directory(self):
+ source = self.main_in_children_source
+ with temp_dir() as script_dir:
+ script_name = _make_test_script(script_dir, '__main__',
+ source=source)
+ self._check_script(script_dir)
+
+ def test_directory_compiled(self):
+ source = self.main_in_children_source
+ with temp_dir() as script_dir:
+ script_name = _make_test_script(script_dir, '__main__',
+ source=source)
+ py_compile.compile(script_name, doraise=True)
+ os.remove(script_name)
+ pyc_file = support.make_legacy_pyc(script_name)
+ self._check_script(script_dir)
+
+ def test_zipfile(self):
+ source = self.main_in_children_source
+ with temp_dir() as script_dir:
+ script_name = _make_test_script(script_dir, '__main__',
+ source=source)
+ zip_name, run_name = make_zip_script(script_dir, 'test_zip', script_name)
+ self._check_script(zip_name)
+
+ def test_zipfile_compiled(self):
+ source = self.main_in_children_source
+ with temp_dir() as script_dir:
+ script_name = _make_test_script(script_dir, '__main__',
+ source=source)
+ compiled_name = py_compile.compile(script_name, doraise=True)
+ zip_name, run_name = make_zip_script(script_dir, 'test_zip', compiled_name)
+ self._check_script(zip_name)
+
+ def test_module_in_package(self):
+ with temp_dir() as script_dir:
+ pkg_dir = os.path.join(script_dir, 'test_pkg')
+ make_pkg(pkg_dir)
+ script_name = _make_test_script(pkg_dir, 'check_sibling')
+ launch_name = _make_launch_script(script_dir, 'launch',
+ 'test_pkg.check_sibling')
+ self._check_script(launch_name)
+
+ def test_module_in_package_in_zipfile(self):
+ with temp_dir() as script_dir:
+ zip_name, run_name = _make_test_zip_pkg(script_dir, 'test_zip', 'test_pkg', 'script')
+ launch_name = _make_launch_script(script_dir, 'launch', 'test_pkg.script', zip_name)
+ self._check_script(launch_name)
+
+ def test_module_in_subpackage_in_zipfile(self):
+ with temp_dir() as script_dir:
+ zip_name, run_name = _make_test_zip_pkg(script_dir, 'test_zip', 'test_pkg', 'script', depth=2)
+ launch_name = _make_launch_script(script_dir, 'launch', 'test_pkg.test_pkg.script', zip_name)
+ self._check_script(launch_name)
+
+ def test_package(self):
+ source = self.main_in_children_source
+ with temp_dir() as script_dir:
+ pkg_dir = os.path.join(script_dir, 'test_pkg')
+ make_pkg(pkg_dir)
+ script_name = _make_test_script(pkg_dir, '__main__',
+ source=source)
+ launch_name = _make_launch_script(script_dir, 'launch', 'test_pkg')
+ self._check_script(launch_name)
+
+ def test_package_compiled(self):
+ source = self.main_in_children_source
+ with temp_dir() as script_dir:
+ pkg_dir = os.path.join(script_dir, 'test_pkg')
+ make_pkg(pkg_dir)
+ script_name = _make_test_script(pkg_dir, '__main__',
+ source=source)
+ compiled_name = py_compile.compile(script_name, doraise=True)
+ os.remove(script_name)
+ pyc_file = support.make_legacy_pyc(script_name)
+ launch_name = _make_launch_script(script_dir, 'launch', 'test_pkg')
+ self._check_script(launch_name)
+
+# Test all supported start methods (setupClass skips as appropriate)
+
+class SpawnCmdLineTest(MultiProcessingCmdLineMixin, unittest.TestCase):
+ start_method = 'spawn'
+ main_in_children_source = test_source_main_skipped_in_children
+
+class ForkCmdLineTest(MultiProcessingCmdLineMixin, unittest.TestCase):
+ start_method = 'fork'
+ main_in_children_source = test_source
+
+class ForkServerCmdLineTest(MultiProcessingCmdLineMixin, unittest.TestCase):
+ start_method = 'forkserver'
+ main_in_children_source = test_source_main_skipped_in_children
+
+def test_main():
+ support.run_unittest(SpawnCmdLineTest,
+ ForkCmdLineTest,
+ ForkServerCmdLineTest)
+ support.reap_children()
+
+if __name__ == '__main__':
+ test_main()
diff --git a/Misc/NEWS b/Misc/NEWS
index ee3c793..e8ab0e7 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -44,6 +44,12 @@ Core and Builtins
Library
-------
+- Issue #19946: multiprocessing now uses runpy to initialize __main__ in
+ child processes when necessary, allowing it to correctly handle scripts
+ without suffixes and submodules that use explicit relative imports or
+ otherwise rely on parent modules being correctly imported prior to
+ execution.
+
- Issue #19921: When Path.mkdir() is called with parents=True, any missing
parent is created with the default permissions, ignoring the mode argument
(mimicking the POSIX "mkdir -p" command).