diff options
author | Adam Simpkins <adam@adamsimpkins.net> | 2025-02-23 19:42:04 (GMT) |
---|---|---|
committer | Adam Simpkins <adam@adamsimpkins.net> | 2025-02-23 22:15:39 (GMT) |
commit | a6af8994c21c9b499f438574ac74d0749bfdac61 (patch) | |
tree | bd41693355879b13cd5cacbe1c2a2bd571ea1466 | |
parent | a6abb0fff2e8b836dfd58efec752def52faf16b8 (diff) | |
download | SCons-a6af8994c21c9b499f438574ac74d0749bfdac61.zip SCons-a6af8994c21c9b499f438574ac74d0749bfdac61.tar.gz SCons-a6af8994c21c9b499f438574ac74d0749bfdac61.tar.bz2 |
Fix infinite hang in wait_for_process_to_die() on Windows
The ninja generator uses wait_for_process_to_die() to wait for a
previous scons daemon process to terminate. It previously had 3
separate implemementations: one using psutil if that module was not
available, plus a fallback implementation for Windows, and one for
non-Windows platforms.
I was encountering problems with the fallback implementation on Windows
hanging forever, and not being interruptible even with Ctrl-C.
Apparently the win32 OpenProcess() function can return a valid process
handle even for already terminated processes.
This change adds an extra call to GetExitCodeProcess() to avoid
infinitely looping if the process has in fact already exited. I also
added a timeout so that this function will eventually fail rather than
hanging forever if a process does not exit.
I also removed the psutil implementation: it seemed simpler to me to
avoid having multiple separate implementations with different behaviors.
This appeared to be the only place in scons that depends on psutil
outside of tests. Also note that this wait_for_process_to_die()
function is only used by the ninja tool.
I have added unit tests checking the behavior of
wait_for_process_to_die(). Without the `GetExitCodeProcess()` check
added by this PR, the simple code in
`test_wait_for_process_to_die_success()` would hang forever.
-rw-r--r-- | SCons/Tool/ninja_tool/NinjaState.py | 5 | ||||
-rw-r--r-- | SCons/Util/UtilTests.py | 23 | ||||
-rw-r--r-- | SCons/Util/__init__.py | 76 |
3 files changed, 73 insertions, 31 deletions
diff --git a/SCons/Tool/ninja_tool/NinjaState.py b/SCons/Tool/ninja_tool/NinjaState.py index ef11b50..be64a13 100644 --- a/SCons/Tool/ninja_tool/NinjaState.py +++ b/SCons/Tool/ninja_tool/NinjaState.py @@ -694,9 +694,8 @@ class NinjaState: pass # wait for the server process to fully killed - # TODO: update wait_for_process_to_die() to handle timeout and then catch exception - # here and do something smart. - wait_for_process_to_die(pid) + # TODO: catch TimeoutException here and possibly do something smart. + wait_for_process_to_die(pid, timeout=10) if os.path.exists(scons_daemon_dirty): os.unlink(scons_daemon_dirty) diff --git a/SCons/Util/UtilTests.py b/SCons/Util/UtilTests.py index 0f457c3..57ea2c2 100644 --- a/SCons/Util/UtilTests.py +++ b/SCons/Util/UtilTests.py @@ -27,6 +27,7 @@ import functools import hashlib import io import os +import subprocess import sys import unittest import unittest.mock @@ -73,6 +74,7 @@ from SCons.Util import ( to_String, to_bytes, to_str, + wait_for_process_to_die, ) from SCons.Util.envs import is_valid_construction_var from SCons.Util.hashes import ( @@ -847,6 +849,27 @@ bling s4 = silent_intern("spam") assert id(s1) == id(s4) + def test_wait_for_process_to_die_success(self) -> None: + cmd = [sys.executable, "-c", ""] + p = subprocess.Popen(cmd) + p.wait() + wait_for_process_to_die(p.pid, timeout=10.0) + + def test_wait_for_process_to_die_timeout(self) -> None: + # Run a python script that will keep running until we close it's stdin + cmd = [sys.executable, "-c", "import sys; data = sys.stdin.read()"] + p = subprocess.Popen(cmd, stdin=subprocess.PIPE) + + # wait_for_process_to_die() should time out while the process is running + with self.assertRaises(TimeoutError): + wait_for_process_to_die(p.pid, timeout=0.2) + + p.stdin.close() + p.wait() + + # wait_for_process_to_die() should complete normally now + wait_for_process_to_die(p.pid, timeout=10.0) + class HashTestCase(unittest.TestCase): diff --git a/SCons/Util/__init__.py b/SCons/Util/__init__.py index 0398c1f..5bd84dd 100644 --- a/SCons/Util/__init__.py +++ b/SCons/Util/__init__.py @@ -1297,36 +1297,56 @@ def print_time(): return print_time -def wait_for_process_to_die(pid) -> None: +def wait_for_process_to_die(pid: int, timeout: float = 60.0) -> None: """ - Wait for specified process to die, or alternatively kill it - NOTE: This function operates best with psutil pypi package - TODO: Add timeout which raises exception + Wait for the specified process to die. + + A TimeoutError will be thrown if the timeout is hit before the process terminates. + Specifying a negative timeout will cause wait_for_process_to_die() to wait + indefinitely, and never throw TimeoutError. """ - # wait for the process to fully killed - try: - import psutil # pylint: disable=import-outside-toplevel - while True: - if pid not in [proc.pid for proc in psutil.process_iter()]: - break - time.sleep(0.1) - except ImportError: - # if psutil is not installed we can do this the hard way - while True: - if sys.platform == 'win32': - import ctypes # pylint: disable=import-outside-toplevel - PROCESS_QUERY_INFORMATION = 0x1000 - processHandle = ctypes.windll.kernel32.OpenProcess(PROCESS_QUERY_INFORMATION, 0, pid) - if processHandle == 0: - break - ctypes.windll.kernel32.CloseHandle(processHandle) - time.sleep(0.1) - else: - try: - os.kill(pid, 0) - except OSError: - break - time.sleep(0.1) + start_time = time.time() + while True: + if not _is_process_alive(pid): + break + if timeout >= 0.0 and time.time() - start_time > timeout: + raise TimeoutError(f"timed out waiting for process {pid}") + time.sleep(0.1) + + +if sys.platform == 'win32': + def _is_process_alive(pid: int) -> bool: + import ctypes # pylint: disable=import-outside-toplevel + PROCESS_QUERY_INFORMATION = 0x1000 + STILL_ACTIVE = 259 + + processHandle = ctypes.windll.kernel32.OpenProcess(PROCESS_QUERY_INFORMATION, 0, pid) + if processHandle == 0: + return False + + # OpenProcess() may successfully return a handle even for terminated + # processes when something else in the system is still holding a + # reference to their handle. Call GetExitCodeProcess() to check if the + # process has already exited. + try: + exit_code = ctypes.c_ulong() + success = ctypes.windll.kernel32.GetExitCodeProcess( + processHandle, ctypes.byref(exit_code)) + if success: + return exit_code.value == STILL_ACTIVE + finally: + ctypes.windll.kernel32.CloseHandle(processHandle) + + return True + +else: + def _is_process_alive(pid: int) -> bool: + try: + os.kill(pid, 0) + return True + except OSError: + return False + # From: https://stackoverflow.com/questions/1741972/how-to-use-different-formatters-with-the-same-logging-handler-in-python class DispatchingFormatter(Formatter): |