summaryrefslogtreecommitdiffstats
path: root/Lib/profiling/sampling/_child_monitor.py
blob: e06c550d938b132249c3da71fea771746072ed6e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
"""
Child process monitoring for the sampling profiler.

This module monitors a target process for child process creation and spawns
separate profiler instances for each discovered child.
"""

import subprocess
import sys
import threading
import time

import _remote_debugging

# Polling interval for child process discovery
_CHILD_POLL_INTERVAL_SEC = 0.1

# Default timeout for waiting on child profilers
_DEFAULT_WAIT_TIMEOUT = 30.0

# Maximum number of child profilers to spawn (prevents resource exhaustion)
_MAX_CHILD_PROFILERS = 100

# Interval for cleaning up completed profilers (in polling cycles)
_CLEANUP_INTERVAL_CYCLES = 10


def get_child_pids(pid, recursive=True):
    """
    Get all child process IDs of the given process.

    Args:
        pid: Process ID of the parent process
        recursive: If True, return all descendants (children, grandchildren, etc.)

    Returns:
        List of child PIDs
    """
    return _remote_debugging.get_child_pids(pid, recursive=recursive)


def is_python_process(pid):
    """
    Check if a process is a Python process.

    Args:
        pid: Process ID to check

    Returns:
        bool: True if the process appears to be a Python process, False otherwise
    """
    return _remote_debugging.is_python_process(pid)


class ChildProcessMonitor:
    """
    Monitors a target process for child processes and spawns profilers for them.

    Use as a context manager:
        with ChildProcessMonitor(pid, cli_args, output_pattern) as monitor:
            # monitoring runs here
            monitor.wait_for_profilers()  # optional: wait before cleanup
        # cleanup happens automatically
    """

    def __init__(self, pid, cli_args, output_pattern):
        """
        Initialize the child process monitor.

        Args:
            pid: Parent process ID to monitor
            cli_args: CLI arguments to pass to child profilers
            output_pattern: Pattern for output files (format string with {pid})
        """
        self.parent_pid = pid
        self.cli_args = cli_args
        self.output_pattern = output_pattern

        self._known_children = set()
        self._spawned_profilers = []
        self._lock = threading.Lock()
        self._stop_event = threading.Event()
        self._monitor_thread = None
        self._poll_count = 0

    def __enter__(self):
        self._monitor_thread = threading.Thread(
            target=self._monitor_loop,
            daemon=True,
            name=f"child-monitor-{self.parent_pid}",
        )
        self._monitor_thread.start()
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        self._stop_event.set()
        if self._monitor_thread is not None:
            self._monitor_thread.join(timeout=2.0)
            if self._monitor_thread.is_alive():
                print(
                    "Warning: Monitor thread did not stop cleanly",
                    file=sys.stderr,
                )

        # Wait for child profilers to complete naturally
        self.wait_for_profilers()

        # Terminate any remaining profilers
        with self._lock:
            profilers_to_cleanup = list(self._spawned_profilers)
            self._spawned_profilers.clear()

        for proc in profilers_to_cleanup:
            self._cleanup_process(proc)
        return False

    def _cleanup_process(self, proc, terminate_timeout=2.0, kill_timeout=1.0):
        if proc.poll() is not None:
            return  # Already terminated

        proc.terminate()
        try:
            proc.wait(timeout=terminate_timeout)
        except subprocess.TimeoutExpired:
            proc.kill()
            try:
                proc.wait(timeout=kill_timeout)
            except subprocess.TimeoutExpired:
                # Last resort: wait indefinitely to avoid zombie
                # SIGKILL should always work, but we must reap the process
                try:
                    proc.wait()
                except Exception:
                    pass

    @property
    def spawned_profilers(self):
        with self._lock:
            return list(self._spawned_profilers)

    def wait_for_profilers(self, timeout=_DEFAULT_WAIT_TIMEOUT):
        """
        Wait for all spawned child profilers to complete.

        Call this before exiting the context if you want profilers to finish
        their work naturally rather than being terminated.

        Args:
            timeout: Maximum time to wait in seconds
        """
        profilers = self.spawned_profilers
        if not profilers:
            return

        print(
            f"Waiting for {len(profilers)} child profiler(s) to complete...",
            file=sys.stderr,
        )

        deadline = time.monotonic() + timeout
        for proc in profilers:
            remaining = deadline - time.monotonic()
            if remaining <= 0:
                break
            try:
                proc.wait(timeout=max(0.1, remaining))
            except subprocess.TimeoutExpired:
                pass

    def _monitor_loop(self):
        # Note: There is an inherent TOCTOU race between discovering a child
        # process and checking if it's Python. This is expected for process monitoring.
        while not self._stop_event.is_set():
            try:
                self._poll_count += 1

                # Periodically clean up completed profilers to avoid memory buildup
                if self._poll_count % _CLEANUP_INTERVAL_CYCLES == 0:
                    self._cleanup_completed_profilers()

                children = set(get_child_pids(self.parent_pid, recursive=True))

                with self._lock:
                    new_children = children - self._known_children
                    self._known_children.update(new_children)

                for child_pid in new_children:
                    # Only spawn profiler if this is actually a Python process
                    if is_python_process(child_pid):
                        self._spawn_profiler_for_child(child_pid)

            except ProcessLookupError:
                # Parent process exited, stop monitoring
                break
            except Exception as e:
                # Log error but continue monitoring
                print(
                    f"Warning: Error in child monitor loop: {e}",
                    file=sys.stderr,
                )

            self._stop_event.wait(timeout=_CHILD_POLL_INTERVAL_SEC)

    def _cleanup_completed_profilers(self):
        with self._lock:
            # Keep only profilers that are still running
            self._spawned_profilers = [
                p for p in self._spawned_profilers if p.poll() is None
            ]

    def _spawn_profiler_for_child(self, child_pid):
        if self._stop_event.is_set():
            return

        # Check if we've reached the maximum number of child profilers
        with self._lock:
            if len(self._spawned_profilers) >= _MAX_CHILD_PROFILERS:
                print(
                    f"Warning: Max child profilers ({_MAX_CHILD_PROFILERS}) reached, "
                    f"skipping PID {child_pid}",
                    file=sys.stderr,
                )
                return

        cmd = [
            sys.executable,
            "-m",
            "profiling.sampling",
            "attach",
            str(child_pid),
        ]
        cmd.extend(self._build_child_cli_args(child_pid))

        proc = None
        try:
            proc = subprocess.Popen(
                cmd,
                stdin=subprocess.DEVNULL,
                stdout=subprocess.DEVNULL,
                stderr=subprocess.DEVNULL,
            )
            with self._lock:
                if self._stop_event.is_set():
                    self._cleanup_process(
                        proc, terminate_timeout=1.0, kill_timeout=1.0
                    )
                    return
                self._spawned_profilers.append(proc)

            print(
                f"Started profiler for child process {child_pid}",
                file=sys.stderr,
            )
        except Exception as e:
            if proc is not None:
                self._cleanup_process(
                    proc, terminate_timeout=1.0, kill_timeout=1.0
                )
            print(
                f"Warning: Failed to start profiler for child {child_pid}: {e}",
                file=sys.stderr,
            )

    def _build_child_cli_args(self, child_pid):
        args = list(self.cli_args)

        if self.output_pattern:
            # Use replace() instead of format() to handle user filenames with braces
            output_file = self.output_pattern.replace("{pid}", str(child_pid))
            found_output = False
            for i, arg in enumerate(args):
                if arg in ("-o", "--output") and i + 1 < len(args):
                    args[i + 1] = output_file
                    found_output = True
                    break
            if not found_output:
                args.extend(["-o", output_file])

        return args