From 3932b0f7b1566374427daa8bc47203032015e350 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C5=81ukasz=20Langa?= <lukasz@langa.pl>
Date: Fri, 10 Nov 2023 18:17:45 +0100
Subject: gh-110722: Make `-m test -T -j` use sys.monitoring (GH-111710)

Now all results from worker processes are aggregated and
displayed together as a summary at the end of a regrtest run.

The traditional trace is left in place for use with sequential
in-process test runs but now raises a warning that those
numbers are not precise.

`-T -j` requires `--with-pydebug` as it relies on `-Xpresite=`.
---
 Doc/library/trace.rst                              | 10 ++++-
 Lib/test/cov.py                                    | 48 ++++++++++++++++++++++
 Lib/test/libregrtest/cmdline.py                    | 14 +++++--
 Lib/test/libregrtest/main.py                       | 30 +++++++-------
 Lib/test/libregrtest/result.py                     | 12 ++++++
 Lib/test/libregrtest/results.py                    | 15 +++++--
 Lib/test/libregrtest/run_workers.py                |  2 +-
 Lib/test/libregrtest/runtests.py                   |  1 +
 Lib/test/libregrtest/worker.py                     | 16 +++++++-
 Lib/test/support/__init__.py                       | 22 +++++++---
 Lib/test/test_regrtest.py                          | 18 ++++++--
 Lib/trace.py                                       | 10 ++++-
 .../2023-11-03-18-59-13.gh-issue-110722.jvT1pb.rst |  2 +
 13 files changed, 166 insertions(+), 34 deletions(-)
 create mode 100644 Lib/test/cov.py
 create mode 100644 Misc/NEWS.d/next/Tests/2023-11-03-18-59-13.gh-issue-110722.jvT1pb.rst

diff --git a/Doc/library/trace.rst b/Doc/library/trace.rst
index e9b59a6..8854905 100644
--- a/Doc/library/trace.rst
+++ b/Doc/library/trace.rst
@@ -187,7 +187,8 @@ Programmatic Interface
 
       Merge in data from another :class:`CoverageResults` object.
 
-   .. method:: write_results(show_missing=True, summary=False, coverdir=None)
+   .. method:: write_results(show_missing=True, summary=False, coverdir=None,\
+                             *, ignore_missing_files=False)
 
       Write coverage results.  Set *show_missing* to show lines that had no
       hits.  Set *summary* to include in the output the coverage summary per
@@ -195,6 +196,13 @@ Programmatic Interface
       result files will be output.  If ``None``, the results for each source
       file are placed in its directory.
 
+      If *ignore_missing_files* is ``True``, coverage counts for files that no
+      longer exist are silently ignored. Otherwise, a missing file will
+      raise a :exc:`FileNotFoundError`.
+
+      .. versionchanged:: 3.13
+         Added *ignore_missing_files* parameter.
+
 A simple example demonstrating the use of the programmatic interface::
 
    import sys
diff --git a/Lib/test/cov.py b/Lib/test/cov.py
new file mode 100644
index 0000000..e4699c7
--- /dev/null
+++ b/Lib/test/cov.py
@@ -0,0 +1,48 @@
+"""A minimal hook for gathering line coverage of the standard library.
+
+Designed to be used with -Xpresite= which means:
+* it installs itself on import
+* it's not imported as `__main__` so can't use the ifmain idiom
+* it can't import anything besides `sys` to avoid tainting gathered coverage
+* filenames are not normalized
+
+To get gathered coverage back, look for 'test.cov' in `sys.modules`
+instead of importing directly. That way you can determine if the module
+was already in use.
+
+If you need to disable the hook, call the `disable()` function.
+"""
+
+import sys
+
+mon = sys.monitoring
+
+FileName = str
+LineNo = int
+Location = tuple[FileName, LineNo]
+
+coverage: set[Location] = set()
+
+
+# `types` and `typing` aren't imported to avoid invalid coverage
+def add_line(
+    code: "types.CodeType",
+    lineno: int,
+) -> "typing.Literal[sys.monitoring.DISABLE]":
+    coverage.add((code.co_filename, lineno))
+    return mon.DISABLE
+
+
+def enable():
+    mon.use_tool_id(mon.COVERAGE_ID, "regrtest coverage")
+    mon.register_callback(mon.COVERAGE_ID, mon.events.LINE, add_line)
+    mon.set_events(mon.COVERAGE_ID, mon.events.LINE)
+
+
+def disable():
+    mon.set_events(mon.COVERAGE_ID, 0)
+    mon.register_callback(mon.COVERAGE_ID, mon.events.LINE, None)
+    mon.free_tool_id(mon.COVERAGE_ID)
+
+
+enable()
diff --git a/Lib/test/libregrtest/cmdline.py b/Lib/test/libregrtest/cmdline.py
index 1747511..a5f02d6 100644
--- a/Lib/test/libregrtest/cmdline.py
+++ b/Lib/test/libregrtest/cmdline.py
@@ -2,7 +2,7 @@ import argparse
 import os.path
 import shlex
 import sys
-from test.support import os_helper
+from test.support import os_helper, Py_DEBUG
 from .utils import ALL_RESOURCES, RESOURCE_NAMES
 
 
@@ -448,8 +448,16 @@ def _parse_args(args, **kwargs):
 
     if ns.single and ns.fromfile:
         parser.error("-s and -f don't go together!")
-    if ns.use_mp is not None and ns.trace:
-        parser.error("-T and -j don't go together!")
+    if ns.trace:
+        if ns.use_mp is not None:
+            if not Py_DEBUG:
+                parser.error("need --with-pydebug to use -T and -j together")
+        else:
+            print(
+                "Warning: collecting coverage without -j is imprecise. Configure"
+                " --with-pydebug and run -m test -T -j for best results.",
+                file=sys.stderr
+            )
     if ns.python is not None:
         if ns.use_mp is None:
             parser.error("-p requires -j!")
diff --git a/Lib/test/libregrtest/main.py b/Lib/test/libregrtest/main.py
index 9b86548..8642894 100644
--- a/Lib/test/libregrtest/main.py
+++ b/Lib/test/libregrtest/main.py
@@ -5,6 +5,7 @@ import shlex
 import sys
 import sysconfig
 import time
+import trace
 
 from test import support
 from test.support import os_helper, MS_WINDOWS
@@ -13,7 +14,7 @@ from .cmdline import _parse_args, Namespace
 from .findtests import findtests, split_test_packages, list_cases
 from .logger import Logger
 from .pgo import setup_pgo_tests
-from .result import State
+from .result import State, TestResult
 from .results import TestResults, EXITCODE_INTERRUPTED
 from .runtests import RunTests, HuntRefleak
 from .setup import setup_process, setup_test_dir
@@ -284,7 +285,9 @@ class Regrtest:
         self.results.display_result(runtests.tests,
                                     self.quiet, self.print_slowest)
 
-    def run_test(self, test_name: TestName, runtests: RunTests, tracer):
+    def run_test(
+        self, test_name: TestName, runtests: RunTests, tracer: trace.Trace | None
+    ) -> TestResult:
         if tracer is not None:
             # If we're tracing code coverage, then we don't exit with status
             # if on a false return value from main.
@@ -292,6 +295,7 @@ class Regrtest:
             namespace = dict(locals())
             tracer.runctx(cmd, globals=globals(), locals=namespace)
             result = namespace['result']
+            result.covered_lines = list(tracer.counts)
         else:
             result = run_single_test(test_name, runtests)
 
@@ -299,9 +303,8 @@ class Regrtest:
 
         return result
 
-    def run_tests_sequentially(self, runtests):
+    def run_tests_sequentially(self, runtests) -> None:
         if self.coverage:
-            import trace
             tracer = trace.Trace(trace=False, count=True)
         else:
             tracer = None
@@ -349,8 +352,6 @@ class Regrtest:
         if previous_test:
             print(previous_test)
 
-        return tracer
-
     def get_state(self):
         state = self.results.get_state(self.fail_env_changed)
         if self.first_state:
@@ -361,7 +362,7 @@ class Regrtest:
         from .run_workers import RunWorkers
         RunWorkers(num_workers, runtests, self.logger, self.results).run()
 
-    def finalize_tests(self, tracer):
+    def finalize_tests(self, coverage: trace.CoverageResults | None) -> None:
         if self.next_single_filename:
             if self.next_single_test:
                 with open(self.next_single_filename, 'w') as fp:
@@ -369,10 +370,10 @@ class Regrtest:
             else:
                 os.unlink(self.next_single_filename)
 
-        if tracer is not None:
-            results = tracer.results()
-            results.write_results(show_missing=True, summary=True,
-                                  coverdir=self.coverage_dir)
+        if coverage is not None:
+            coverage.write_results(show_missing=True, summary=True,
+                                   coverdir=self.coverage_dir,
+                                   ignore_missing_files=True)
 
         if self.want_run_leaks:
             os.system("leaks %d" % os.getpid())
@@ -412,6 +413,7 @@ class Regrtest:
             hunt_refleak=self.hunt_refleak,
             test_dir=self.test_dir,
             use_junit=(self.junit_filename is not None),
+            coverage=self.coverage,
             memory_limit=self.memory_limit,
             gc_threshold=self.gc_threshold,
             use_resources=self.use_resources,
@@ -458,10 +460,10 @@ class Regrtest:
         try:
             if self.num_workers:
                 self._run_tests_mp(runtests, self.num_workers)
-                tracer = None
             else:
-                tracer = self.run_tests_sequentially(runtests)
+                self.run_tests_sequentially(runtests)
 
+            coverage = self.results.get_coverage_results()
             self.display_result(runtests)
 
             if self.want_rerun and self.results.need_rerun():
@@ -471,7 +473,7 @@ class Regrtest:
                 self.logger.stop_load_tracker()
 
         self.display_summary()
-        self.finalize_tests(tracer)
+        self.finalize_tests(coverage)
 
         return self.results.get_exitcode(self.fail_env_changed,
                                          self.fail_rerun)
diff --git a/Lib/test/libregrtest/result.py b/Lib/test/libregrtest/result.py
index 8bfd366..74eae40 100644
--- a/Lib/test/libregrtest/result.py
+++ b/Lib/test/libregrtest/result.py
@@ -78,6 +78,11 @@ class State:
         }
 
 
+FileName = str
+LineNo = int
+Location = tuple[FileName, LineNo]
+
+
 @dataclasses.dataclass(slots=True)
 class TestResult:
     test_name: TestName
@@ -91,6 +96,9 @@ class TestResult:
     errors: list[tuple[str, str]] | None = None
     failures: list[tuple[str, str]] | None = None
 
+    # partial coverage in a worker run; not used by sequential in-process runs
+    covered_lines: list[Location] | None = None
+
     def is_failed(self, fail_env_changed: bool) -> bool:
         if self.state == State.ENV_CHANGED:
             return fail_env_changed
@@ -207,6 +215,10 @@ def _decode_test_result(data: dict[str, Any]) -> TestResult | dict[str, Any]:
         data.pop('__test_result__')
         if data['stats'] is not None:
             data['stats'] = TestStats(**data['stats'])
+        if data['covered_lines'] is not None:
+            data['covered_lines'] = [
+                tuple(loc) for loc in data['covered_lines']
+            ]
         return TestResult(**data)
     else:
         return data
diff --git a/Lib/test/libregrtest/results.py b/Lib/test/libregrtest/results.py
index 1feb43f..71aaef3 100644
--- a/Lib/test/libregrtest/results.py
+++ b/Lib/test/libregrtest/results.py
@@ -1,13 +1,14 @@
 import sys
+import trace
 
 from .runtests import RunTests
-from .result import State, TestResult, TestStats
+from .result import State, TestResult, TestStats, Location
 from .utils import (
     StrPath, TestName, TestTuple, TestList, FilterDict,
     printlist, count, format_duration)
 
 
-# Python uses exit code 1 when an exception is not catched
+# Python uses exit code 1 when an exception is not caught
 # argparse.ArgumentParser.error() uses exit code 2
 EXITCODE_BAD_TEST = 2
 EXITCODE_ENV_CHANGED = 3
@@ -34,6 +35,8 @@ class TestResults:
         self.stats = TestStats()
         # used by --junit-xml
         self.testsuite_xml: list[str] = []
+        # used by -T with -j
+        self.covered_lines: set[Location] = set()
 
     def is_all_good(self):
         return (not self.bad
@@ -119,11 +122,17 @@ class TestResults:
             self.stats.accumulate(result.stats)
         if rerun:
             self.rerun.append(test_name)
-
+        if result.covered_lines:
+            # we don't care about trace counts so we don't have to sum them up
+            self.covered_lines.update(result.covered_lines)
         xml_data = result.xml_data
         if xml_data:
             self.add_junit(xml_data)
 
+    def get_coverage_results(self) -> trace.CoverageResults:
+        counts = {loc: 1 for loc in self.covered_lines}
+        return trace.CoverageResults(counts=counts)
+
     def need_rerun(self):
         return bool(self.rerun_results)
 
diff --git a/Lib/test/libregrtest/run_workers.py b/Lib/test/libregrtest/run_workers.py
index ab03cb5..99c2cf3 100644
--- a/Lib/test/libregrtest/run_workers.py
+++ b/Lib/test/libregrtest/run_workers.py
@@ -277,7 +277,7 @@ class WorkerThread(threading.Thread):
         # Python finalization: too late for libregrtest.
         if not support.is_wasi:
             # Don't check for leaked temporary files and directories if Python is
-            # run on WASI. WASI don't pass environment variables like TMPDIR to
+            # run on WASI. WASI doesn't pass environment variables like TMPDIR to
             # worker processes.
             tmp_dir = tempfile.mkdtemp(prefix="test_python_")
             tmp_dir = os.path.abspath(tmp_dir)
diff --git a/Lib/test/libregrtest/runtests.py b/Lib/test/libregrtest/runtests.py
index bfed1b4..ac47c07 100644
--- a/Lib/test/libregrtest/runtests.py
+++ b/Lib/test/libregrtest/runtests.py
@@ -85,6 +85,7 @@ class RunTests:
     hunt_refleak: HuntRefleak | None
     test_dir: StrPath | None
     use_junit: bool
+    coverage: bool
     memory_limit: str | None
     gc_threshold: int | None
     use_resources: tuple[str, ...]
diff --git a/Lib/test/libregrtest/worker.py b/Lib/test/libregrtest/worker.py
index 2eccfab..b3bb0b7 100644
--- a/Lib/test/libregrtest/worker.py
+++ b/Lib/test/libregrtest/worker.py
@@ -4,7 +4,7 @@ import os
 from typing import Any, NoReturn
 
 from test import support
-from test.support import os_helper
+from test.support import os_helper, Py_DEBUG
 
 from .setup import setup_process, setup_test_dir
 from .runtests import RunTests, JsonFile, JsonFileType
@@ -30,6 +30,8 @@ def create_worker_process(runtests: RunTests, output_fd: int,
         python_opts = [opt for opt in python_opts if opt != "-E"]
     else:
         executable = (sys.executable,)
+    if runtests.coverage:
+        python_opts.append("-Xpresite=test.cov")
     cmd = [*executable, *python_opts,
            '-u',    # Unbuffered stdout and stderr
            '-m', 'test.libregrtest.worker',
@@ -87,6 +89,18 @@ def worker_process(worker_json: StrJSON) -> NoReturn:
             print(f"Re-running {test_name} in verbose mode", flush=True)
 
     result = run_single_test(test_name, runtests)
+    if runtests.coverage:
+        if "test.cov" in sys.modules:  # imported by -Xpresite=
+            result.covered_lines = list(sys.modules["test.cov"].coverage)
+        elif not Py_DEBUG:
+            print(
+                "Gathering coverage in worker processes requires --with-pydebug",
+                flush=True,
+            )
+        else:
+            raise LookupError(
+                "`test.cov` not found in sys.modules but coverage wanted"
+            )
 
     if json_file.file_type == JsonFileType.STDOUT:
         print()
diff --git a/Lib/test/support/__init__.py b/Lib/test/support/__init__.py
index 1057aac..d476ba5 100644
--- a/Lib/test/support/__init__.py
+++ b/Lib/test/support/__init__.py
@@ -1082,18 +1082,30 @@ def check_impl_detail(**guards):
 
 def no_tracing(func):
     """Decorator to temporarily turn off tracing for the duration of a test."""
-    if not hasattr(sys, 'gettrace'):
-        return func
-    else:
+    trace_wrapper = func
+    if hasattr(sys, 'gettrace'):
         @functools.wraps(func)
-        def wrapper(*args, **kwargs):
+        def trace_wrapper(*args, **kwargs):
             original_trace = sys.gettrace()
             try:
                 sys.settrace(None)
                 return func(*args, **kwargs)
             finally:
                 sys.settrace(original_trace)
-        return wrapper
+
+    coverage_wrapper = trace_wrapper
+    if 'test.cov' in sys.modules:  # -Xpresite=test.cov used
+        cov = sys.monitoring.COVERAGE_ID
+        @functools.wraps(func)
+        def coverage_wrapper(*args, **kwargs):
+            original_events = sys.monitoring.get_events(cov)
+            try:
+                sys.monitoring.set_events(cov, 0)
+                return trace_wrapper(*args, **kwargs)
+            finally:
+                sys.monitoring.set_events(cov, original_events)
+
+    return coverage_wrapper
 
 
 def refcount_test(test):
diff --git a/Lib/test/test_regrtest.py b/Lib/test/test_regrtest.py
index 6b03ea0..d7b9f80 100644
--- a/Lib/test/test_regrtest.py
+++ b/Lib/test/test_regrtest.py
@@ -306,13 +306,23 @@ class ParseArgsTestCase(unittest.TestCase):
                 self.assertEqual(ns.use_mp, 2)
                 self.checkError([opt], 'expected one argument')
                 self.checkError([opt, 'foo'], 'invalid int value')
-                self.checkError([opt, '2', '-T'], "don't go together")
-                self.checkError([opt, '0', '-T'], "don't go together")
 
-    def test_coverage(self):
+    def test_coverage_sequential(self):
         for opt in '-T', '--coverage':
             with self.subTest(opt=opt):
-                ns = self.parse_args([opt])
+                with support.captured_stderr() as stderr:
+                    ns = self.parse_args([opt])
+                self.assertTrue(ns.trace)
+                self.assertIn(
+                    "collecting coverage without -j is imprecise",
+                    stderr.getvalue(),
+                )
+
+    @unittest.skipUnless(support.Py_DEBUG, 'need a debug build')
+    def test_coverage_mp(self):
+        for opt in '-T', '--coverage':
+            with self.subTest(opt=opt):
+                ns = self.parse_args([opt, '-j1'])
                 self.assertTrue(ns.trace)
 
     def test_coverdir(self):
diff --git a/Lib/trace.py b/Lib/trace.py
index fb9a423..7cb6f89 100755
--- a/Lib/trace.py
+++ b/Lib/trace.py
@@ -202,7 +202,8 @@ class CoverageResults:
         for key in other_callers:
             callers[key] = 1
 
-    def write_results(self, show_missing=True, summary=False, coverdir=None):
+    def write_results(self, show_missing=True, summary=False, coverdir=None, *,
+                      ignore_missing_files=False):
         """
         Write the coverage results.
 
@@ -211,6 +212,9 @@ class CoverageResults:
         :param coverdir: If None, the results of each module are placed in its
                          directory, otherwise it is included in the directory
                          specified.
+        :param ignore_missing_files: If True, counts for files that no longer
+                         exist are silently ignored. Otherwise, a missing file
+                         will raise a FileNotFoundError.
         """
         if self.calledfuncs:
             print()
@@ -253,6 +257,9 @@ class CoverageResults:
             if filename.endswith(".pyc"):
                 filename = filename[:-1]
 
+            if ignore_missing_files and not os.path.isfile(filename):
+                continue
+
             if coverdir is None:
                 dir = os.path.dirname(os.path.abspath(filename))
                 modulename = _modname(filename)
@@ -278,7 +285,6 @@ class CoverageResults:
                 percent = int(100 * n_hits / n_lines)
                 sums[modulename] = n_lines, percent, modulename, filename
 
-
         if summary and sums:
             print("lines   cov%   module   (path)")
             for m in sorted(sums):
diff --git a/Misc/NEWS.d/next/Tests/2023-11-03-18-59-13.gh-issue-110722.jvT1pb.rst b/Misc/NEWS.d/next/Tests/2023-11-03-18-59-13.gh-issue-110722.jvT1pb.rst
new file mode 100644
index 0000000..ad1ac53
--- /dev/null
+++ b/Misc/NEWS.d/next/Tests/2023-11-03-18-59-13.gh-issue-110722.jvT1pb.rst
@@ -0,0 +1,2 @@
+Gathering line coverage of standard libraries within the regression test
+suite is now precise, as well as much faster. Patch by Łukasz Langa.
-- 
cgit v0.12