From 81ee0260912dc4b55410f3c6ad755b5c4da82f4a Mon Sep 17 00:00:00 2001 From: pan324 <103143968+pan324@users.noreply.github.com> Date: Tue, 5 Dec 2023 09:11:44 +0100 Subject: gh-82300: Add track parameter to multiprocessing.shared_memory (#110778) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a track parameter to shared memory to allow resource tracking via the side-launched resource tracker process to be disabled on platforms that use it (POSIX). This allows people who do not want automated cleanup at process exit because they are using the shared memory with processes not participating in Python's resource tracking to use the shared_memory API. Co-authored-by: Ɓukasz Langa Co-authored-by: Guido van Rossum Co-authored-by: Antoine Pitrou Co-authored-by: Gregory P. Smith --- Doc/library/multiprocessing.shared_memory.rst | 51 ++++++++++++++------- Lib/multiprocessing/shared_memory.py | 24 +++++++--- Lib/test/_test_multiprocessing.py | 53 ++++++++++++++++++++++ .../2023-10-12-18-19-47.gh-issue-82300.P8-O38.rst | 1 + 4 files changed, 106 insertions(+), 23 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2023-10-12-18-19-47.gh-issue-82300.P8-O38.rst diff --git a/Doc/library/multiprocessing.shared_memory.rst b/Doc/library/multiprocessing.shared_memory.rst index f453e64..671130d 100644 --- a/Doc/library/multiprocessing.shared_memory.rst +++ b/Doc/library/multiprocessing.shared_memory.rst @@ -36,7 +36,7 @@ or other communications requiring the serialization/deserialization and copying of data. -.. class:: SharedMemory(name=None, create=False, size=0) +.. class:: SharedMemory(name=None, create=False, size=0, *, track=True) Creates a new shared memory block or attaches to an existing shared memory block. Each shared memory block is assigned a unique name. @@ -64,26 +64,45 @@ copying of data. memory block may be larger or equal to the size requested. When attaching to an existing shared memory block, the ``size`` parameter is ignored. + *track*, when enabled, registers the shared memory block with a resource + tracker process on platforms where the OS does not do this automatically. + The resource tracker ensures proper cleanup of the shared memory even + if all other processes with access to the memory exit without doing so. + Python processes created from a common ancestor using :mod:`multiprocessing` + facilities share a single resource tracker process, and the lifetime of + shared memory segments is handled automatically among these processes. + Python processes created in any other way will receive their own + resource tracker when accessing shared memory with *track* enabled. + This will cause the shared memory to be deleted by the resource tracker + of the first process that terminates. + To avoid this issue, users of :mod:`subprocess` or standalone Python + processes should set *track* to ``False`` when there is already another + process in place that does the bookkeeping. + *track* is ignored on Windows, which has its own tracking and + automatically deletes shared memory when all handles to it have been closed. + + .. versionchanged:: 3.13 Added *track* parameter. + .. method:: close() - Closes access to the shared memory from this instance. In order to - ensure proper cleanup of resources, all instances should call - ``close()`` once the instance is no longer needed. Note that calling - ``close()`` does not cause the shared memory block itself to be - destroyed. + Closes the file descriptor/handle to the shared memory from this + instance. :meth:`close()` should be called once access to the shared + memory block from this instance is no longer needed. Depending + on operating system, the underlying memory may or may not be freed + even if all handles to it have been closed. To ensure proper cleanup, + use the :meth:`unlink()` method. .. method:: unlink() - Requests that the underlying shared memory block be destroyed. In - order to ensure proper cleanup of resources, ``unlink()`` should be - called once (and only once) across all processes which have need - for the shared memory block. After requesting its destruction, a - shared memory block may or may not be immediately destroyed and - this behavior may differ across platforms. Attempts to access data - inside the shared memory block after ``unlink()`` has been called may - result in memory access errors. Note: the last process relinquishing - its hold on a shared memory block may call ``unlink()`` and - :meth:`close()` in either order. + Deletes the underlying shared memory block. This should be called only + once per shared memory block regardless of the number of handles to it, + even in other processes. + :meth:`unlink()` and :meth:`close()` can be called in any order, but + trying to access data inside a shared memory block after :meth:`unlink()` + may result in memory access errors, depending on platform. + + This method has no effect on Windows, where the only way to delete a + shared memory block is to close all handles. .. attribute:: buf diff --git a/Lib/multiprocessing/shared_memory.py b/Lib/multiprocessing/shared_memory.py index 9a1e5aa..67e70fd 100644 --- a/Lib/multiprocessing/shared_memory.py +++ b/Lib/multiprocessing/shared_memory.py @@ -71,8 +71,9 @@ class SharedMemory: _flags = os.O_RDWR _mode = 0o600 _prepend_leading_slash = True if _USE_POSIX else False + _track = True - def __init__(self, name=None, create=False, size=0): + def __init__(self, name=None, create=False, size=0, *, track=True): if not size >= 0: raise ValueError("'size' must be a positive integer") if create: @@ -82,6 +83,7 @@ class SharedMemory: if name is None and not self._flags & os.O_EXCL: raise ValueError("'name' can only be None if create=True") + self._track = track if _USE_POSIX: # POSIX Shared Memory @@ -116,8 +118,8 @@ class SharedMemory: except OSError: self.unlink() raise - - resource_tracker.register(self._name, "shared_memory") + if self._track: + resource_tracker.register(self._name, "shared_memory") else: @@ -236,12 +238,20 @@ class SharedMemory: def unlink(self): """Requests that the underlying shared memory block be destroyed. - In order to ensure proper cleanup of resources, unlink should be - called once (and only once) across all processes which have access - to the shared memory block.""" + Unlink should be called once (and only once) across all handles + which have access to the shared memory block, even if these + handles belong to different processes. Closing and unlinking may + happen in any order, but trying to access data inside a shared + memory block after unlinking may result in memory errors, + depending on platform. + + This method has no effect on Windows, where the only way to + delete a shared memory block is to close all handles.""" + if _USE_POSIX and self._name: _posixshmem.shm_unlink(self._name) - resource_tracker.unregister(self._name, "shared_memory") + if self._track: + resource_tracker.unregister(self._name, "shared_memory") _encoding = "utf8" diff --git a/Lib/test/_test_multiprocessing.py b/Lib/test/_test_multiprocessing.py index ec003d8..a94eb6c 100644 --- a/Lib/test/_test_multiprocessing.py +++ b/Lib/test/_test_multiprocessing.py @@ -4455,6 +4455,59 @@ class _TestSharedMemory(BaseTestCase): "resource_tracker: There appear to be 1 leaked " "shared_memory objects to clean up at shutdown", err) + @unittest.skipIf(os.name != "posix", "resource_tracker is posix only") + def test_shared_memory_untracking(self): + # gh-82300: When a separate Python process accesses shared memory + # with track=False, it must not cause the memory to be deleted + # when terminating. + cmd = '''if 1: + import sys + from multiprocessing.shared_memory import SharedMemory + mem = SharedMemory(create=False, name=sys.argv[1], track=False) + mem.close() + ''' + mem = shared_memory.SharedMemory(create=True, size=10) + # The resource tracker shares pipes with the subprocess, and so + # err existing means that the tracker process has terminated now. + try: + rc, out, err = script_helper.assert_python_ok("-c", cmd, mem.name) + self.assertNotIn(b"resource_tracker", err) + self.assertEqual(rc, 0) + mem2 = shared_memory.SharedMemory(create=False, name=mem.name) + mem2.close() + finally: + try: + mem.unlink() + except OSError: + pass + mem.close() + + @unittest.skipIf(os.name != "posix", "resource_tracker is posix only") + def test_shared_memory_tracking(self): + # gh-82300: When a separate Python process accesses shared memory + # with track=True, it must cause the memory to be deleted when + # terminating. + cmd = '''if 1: + import sys + from multiprocessing.shared_memory import SharedMemory + mem = SharedMemory(create=False, name=sys.argv[1], track=True) + mem.close() + ''' + mem = shared_memory.SharedMemory(create=True, size=10) + try: + rc, out, err = script_helper.assert_python_ok("-c", cmd, mem.name) + self.assertEqual(rc, 0) + self.assertIn( + b"resource_tracker: There appear to be 1 leaked " + b"shared_memory objects to clean up at shutdown", err) + finally: + try: + mem.unlink() + except OSError: + pass + resource_tracker.unregister(mem._name, "shared_memory") + mem.close() + # # Test to verify that `Finalize` works. # diff --git a/Misc/NEWS.d/next/Library/2023-10-12-18-19-47.gh-issue-82300.P8-O38.rst b/Misc/NEWS.d/next/Library/2023-10-12-18-19-47.gh-issue-82300.P8-O38.rst new file mode 100644 index 0000000..d7e6b22 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-10-12-18-19-47.gh-issue-82300.P8-O38.rst @@ -0,0 +1 @@ +Add ``track`` parameter to :class:`multiprocessing.shared_memory.SharedMemory` that allows using shared memory blocks without having to register with the POSIX resource tracker that automatically releases them upon process exit. -- cgit v0.12