From e38d0afe3548b856ccf0b05c01ed3eefc69cb3e7 Mon Sep 17 00:00:00 2001 From: Cody Maloney Date: Sat, 24 Aug 2024 13:42:41 -0700 Subject: GH-120754: Add a strace helper and test set of syscalls for open().read() (#121143) --- Lib/test/support/strace_helper.py | 166 ++++++++++++++++++++++++++++++++++++++ Lib/test/test_fileio.py | 93 ++++++++++++++++++++- Lib/test/test_subprocess.py | 53 +++++------- Misc/ACKS | 1 + 4 files changed, 280 insertions(+), 33 deletions(-) create mode 100644 Lib/test/support/strace_helper.py diff --git a/Lib/test/support/strace_helper.py b/Lib/test/support/strace_helper.py new file mode 100644 index 0000000..b881670 --- /dev/null +++ b/Lib/test/support/strace_helper.py @@ -0,0 +1,166 @@ +import re +import sys +import textwrap +import unittest +from dataclasses import dataclass +from functools import cache +from test import support +from test.support.script_helper import run_python_until_end + +_strace_binary = "/usr/bin/strace" +_syscall_regex = re.compile( + r"(?P[^(]*)\((?P[^)]*)\)\s*[=]\s*(?P.+)") +_returncode_regex = re.compile( + br"\+\+\+ exited with (?P\d+) \+\+\+") + + +@dataclass +class StraceEvent: + syscall: str + args: list[str] + returncode: str + + +@dataclass +class StraceResult: + strace_returncode: int + python_returncode: int + + """The event messages generated by strace. This is very similar to the + stderr strace produces with returncode marker section removed.""" + event_bytes: bytes + stdout: bytes + stderr: bytes + + def events(self): + """Parse event_bytes data into system calls for easier processing. + + This assumes the program under inspection doesn't print any non-utf8 + strings which would mix into the strace output.""" + decoded_events = self.event_bytes.decode('utf-8') + matches = [ + _syscall_regex.match(event) + for event in decoded_events.splitlines() + ] + return [ + StraceEvent(match["syscall"], + [arg.strip() for arg in (match["args"].split(","))], + match["returncode"]) for match in matches if match + ] + + def sections(self): + """Find all "MARK " writes and use them to make groups of events. + + This is useful to avoid variable / overhead events, like those at + interpreter startup or when opening a file so a test can verify just + the small case under study.""" + current_section = "__startup" + sections = {current_section: []} + for event in self.events(): + if event.syscall == 'write' and len( + event.args) > 2 and event.args[1].startswith("\"MARK "): + # Found a new section, don't include the write in the section + # but all events until next mark should be in that section + current_section = event.args[1].split( + " ", 1)[1].removesuffix('\\n"') + if current_section not in sections: + sections[current_section] = list() + else: + sections[current_section].append(event) + + return sections + + +@support.requires_subprocess() +def strace_python(code, strace_flags, check=True): + """Run strace and return the trace. + + Sets strace_returncode and python_returncode to `-1` on error.""" + res = None + + def _make_error(reason, details): + return StraceResult( + strace_returncode=-1, + python_returncode=-1, + event_bytes=f"error({reason},details={details}) = -1".encode('utf-8'), + stdout=res.out if res else "", + stderr=res.err if res else "") + + # Run strace, and get out the raw text + try: + res, cmd_line = run_python_until_end( + "-c", + textwrap.dedent(code), + __run_using_command=[_strace_binary] + strace_flags) + except OSError as err: + return _make_error("Caught OSError", err) + + if check and res.rc: + res.fail(cmd_line) + + # Get out program returncode + stripped = res.err.strip() + output = stripped.rsplit(b"\n", 1) + if len(output) != 2: + return _make_error("Expected strace events and exit code line", + stripped[-50:]) + + returncode_match = _returncode_regex.match(output[1]) + if not returncode_match: + return _make_error("Expected to find returncode in last line.", + output[1][:50]) + + python_returncode = int(returncode_match["returncode"]) + if check and python_returncode: + res.fail(cmd_line) + + return StraceResult(strace_returncode=res.rc, + python_returncode=python_returncode, + event_bytes=output[0], + stdout=res.out, + stderr=res.err) + + +def _get_events(code, strace_flags, prelude, cleanup): + # NOTE: The flush is currently required to prevent the prints from getting + # buffered and done all at once at exit + prelude = textwrap.dedent(prelude) + code = textwrap.dedent(code) + cleanup = textwrap.dedent(cleanup) + to_run = f""" +print("MARK prelude", flush=True) +{prelude} +print("MARK code", flush=True) +{code} +print("MARK cleanup", flush=True) +{cleanup} +print("MARK __shutdown", flush=True) + """ + trace = strace_python(to_run, strace_flags) + all_sections = trace.sections() + return all_sections['code'] + + +def get_syscalls(code, strace_flags, prelude="", cleanup=""): + """Get the syscalls which a given chunk of python code generates""" + events = _get_events(code, strace_flags, prelude=prelude, cleanup=cleanup) + return [ev.syscall for ev in events] + + +# Moderately expensive (spawns a subprocess), so share results when possible. +@cache +def _can_strace(): + res = strace_python("import sys; sys.exit(0)", [], check=False) + assert res.events(), "Should have parsed multiple calls" + + return res.strace_returncode == 0 and res.python_returncode == 0 + + +def requires_strace(): + if sys.platform != "linux": + return unittest.skip("Linux only, requires strace.") + + return unittest.skipUnless(_can_strace(), "Requires working strace") + + +__all__ = ["requires_strace", "strace_python", "StraceEvent", "StraceResult"] diff --git a/Lib/test/test_fileio.py b/Lib/test/test_fileio.py index 0611d17..bded9c8 100644 --- a/Lib/test/test_fileio.py +++ b/Lib/test/test_fileio.py @@ -11,7 +11,7 @@ from functools import wraps from test.support import ( cpython_only, swap_attr, gc_collect, is_emscripten, is_wasi, - infinite_recursion, + infinite_recursion, strace_helper ) from test.support.os_helper import ( TESTFN, TESTFN_ASCII, TESTFN_UNICODE, make_bad_fd, @@ -24,6 +24,9 @@ import _io # C implementation of io import _pyio # Python implementation of io +_strace_flags=["--trace=%file,%desc"] + + class AutoFileTests: # file tests for which a test file is automatically set up @@ -359,6 +362,94 @@ class AutoFileTests: a = array('b', b'x'*10) f.readinto(a) + @strace_helper.requires_strace() + def test_syscalls_read(self): + """Check that the set of system calls produced by the I/O stack is what + is expected for various read cases. + + It's expected as bits of the I/O implementation change, this will need + to change. The goal is to catch changes that unintentionally add + additional systemcalls (ex. additional calls have been looked at in + bpo-21679 and gh-120754). + """ + self.f.write(b"Hello, World!") + self.f.close() + + + def check_readall(name, code, prelude="", cleanup=""): + with self.subTest(name=name): + syscalls = strace_helper.get_syscalls(code, _strace_flags, + prelude=prelude, + cleanup=cleanup) + + # There are a number of related syscalls used to implement + # behaviors in a libc (ex. fstat, newfstatat, open, openat). + # Allow any that use the same substring. + def count_similarname(name): + return len([sc for sc in syscalls if name in sc]) + + # Should open and close the file exactly once + self.assertEqual(count_similarname('open'), 1) + self.assertEqual(count_similarname('close'), 1) + + # Should only have one fstat (bpo-21679, gh-120754) + self.assertEqual(count_similarname('fstat'), 1) + + + # "open, read, close" file using different common patterns. + check_readall( + "open builtin with default options", + f""" + f = open('{TESTFN}') + f.read() + f.close() + """ + ) + + check_readall( + "open in binary mode", + f""" + f = open('{TESTFN}', 'rb') + f.read() + f.close() + """ + ) + + check_readall( + "open in text mode", + f""" + f = open('{TESTFN}', 'rt') + f.read() + f.close() + """ + ) + + check_readall( + "pathlib read_bytes", + "p.read_bytes()", + prelude=f"""from pathlib import Path; p = Path("{TESTFN}")""" + + ) + + check_readall( + "pathlib read_text", + "p.read_text()", + prelude=f"""from pathlib import Path; p = Path("{TESTFN}")""" + ) + + # Focus on just `read()`. + calls = strace_helper.get_syscalls( + prelude=f"f = open('{TESTFN}')", + code="f.read()", + cleanup="f.close()", + strace_flags=_strace_flags + ) + # One to read all the bytes + # One to read the EOF and get a size 0 return. + self.assertEqual(calls.count("read"), 2) + + + class CAutoFileTests(AutoFileTests, unittest.TestCase): FileIO = _io.FileIO modulename = '_io' diff --git a/Lib/test/test_subprocess.py b/Lib/test/test_subprocess.py index f065b9c..e45701d 100644 --- a/Lib/test/test_subprocess.py +++ b/Lib/test/test_subprocess.py @@ -4,6 +4,7 @@ from test import support from test.support import check_sanitizer from test.support import import_helper from test.support import os_helper +from test.support import strace_helper from test.support import warnings_helper from test.support.script_helper import assert_python_ok import subprocess @@ -3415,7 +3416,7 @@ class POSIXProcessTestCase(BaseTestCase): @unittest.skipIf(not sysconfig.get_config_var("HAVE_VFORK"), "vfork() not enabled by configure.") - @unittest.skipIf(sys.platform != "linux", "Linux only, requires strace.") + @strace_helper.requires_strace() @mock.patch("subprocess._USE_POSIX_SPAWN", new=False) def test_vfork_used_when_expected(self): # This is a performance regression test to ensure we default to using @@ -3423,36 +3424,25 @@ class POSIXProcessTestCase(BaseTestCase): # Technically this test could pass when posix_spawn is used as well # because libc tends to implement that internally using vfork. But # that'd just be testing a libc+kernel implementation detail. - strace_binary = "/usr/bin/strace" - # The only system calls we are interested in. - strace_filter = "--trace=clone,clone2,clone3,fork,vfork,exit,exit_group" - true_binary = "/bin/true" - strace_command = [strace_binary, strace_filter] - try: - does_strace_work_process = subprocess.run( - strace_command + [true_binary], - stderr=subprocess.PIPE, - stdout=subprocess.DEVNULL, - ) - rc = does_strace_work_process.returncode - stderr = does_strace_work_process.stderr - except OSError: - rc = -1 - stderr = "" - if rc or (b"+++ exited with 0 +++" not in stderr): - self.skipTest("strace not found or not working as expected.") + # Are intersted in the system calls: + # clone,clone2,clone3,fork,vfork,exit,exit_group + # Unfortunately using `--trace` with that list to strace fails because + # not all are supported on all platforms (ex. clone2 is ia64 only...) + # So instead use `%process` which is recommended by strace, and contains + # the above. + true_binary = "/bin/true" + strace_args = ["--trace=%process"] with self.subTest(name="default_is_vfork"): - vfork_result = assert_python_ok( - "-c", - textwrap.dedent(f"""\ - import subprocess - subprocess.check_call([{true_binary!r}])"""), - __run_using_command=strace_command, + vfork_result = strace_helper.strace_python( + f"""\ + import subprocess + subprocess.check_call([{true_binary!r}])""", + strace_args ) # Match both vfork() and clone(..., flags=...|CLONE_VFORK|...) - self.assertRegex(vfork_result.err, br"(?i)vfork") + self.assertRegex(vfork_result.event_bytes, br"(?i)vfork") # Do NOT check that fork() or other clones did not happen. # If the OS denys the vfork it'll fallback to plain fork(). @@ -3465,9 +3455,8 @@ class POSIXProcessTestCase(BaseTestCase): ("setgroups", "", "extra_groups=[]", True), ): with self.subTest(name=sub_name): - non_vfork_result = assert_python_ok( - "-c", - textwrap.dedent(f"""\ + non_vfork_result = strace_helper.strace_python( + f"""\ import subprocess {preamble} try: @@ -3475,11 +3464,11 @@ class POSIXProcessTestCase(BaseTestCase): [{true_binary!r}], **dict({sp_kwarg})) except PermissionError: if not {expect_permission_error}: - raise"""), - __run_using_command=strace_command, + raise""", + strace_args ) # Ensure neither vfork() or clone(..., flags=...|CLONE_VFORK|...). - self.assertNotRegex(non_vfork_result.err, br"(?i)vfork") + self.assertNotRegex(non_vfork_result.event_bytes, br"(?i)vfork") @unittest.skipUnless(mswindows, "Windows specific tests") diff --git a/Misc/ACKS b/Misc/ACKS index b031eb7..aced869 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -1164,6 +1164,7 @@ Grzegorz Makarewicz David Malcolm Greg Malcolm William Mallard +Cody Maloney Ken Manheimer Vladimir Marangozov Colin Marc -- cgit v0.12