summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorCody Maloney <cmaloney@users.noreply.github.com>2024-11-03 03:37:21 (GMT)
committerGitHub <noreply@github.com>2024-11-03 03:37:21 (GMT)
commit9ad57cf58b76d1abe4786fb2e4c77a02e100d047 (patch)
treef57388eff9bec18044a4589e55efd2ef7d6feb97
parent7d7d56d8b1147a6b85e1c09d01b164df7c5c4942 (diff)
downloadcpython-9ad57cf58b76d1abe4786fb2e4c77a02e100d047.zip
cpython-9ad57cf58b76d1abe4786fb2e4c77a02e100d047.tar.gz
cpython-9ad57cf58b76d1abe4786fb2e4c77a02e100d047.tar.bz2
gh-120754: Add a strace helper and test set of syscalls for open().read(), Take 2 (#123413)
-rw-r--r--Lib/test/support/strace_helper.py170
-rw-r--r--Lib/test/test_fileio.py138
-rw-r--r--Lib/test/test_subprocess.py53
-rw-r--r--Misc/ACKS1
4 files changed, 329 insertions, 33 deletions
diff --git a/Lib/test/support/strace_helper.py b/Lib/test/support/strace_helper.py
new file mode 100644
index 0000000..90d4b5b
--- /dev/null
+++ b/Lib/test/support/strace_helper.py
@@ -0,0 +1,170 @@
+import re
+import sys
+import textwrap
+import unittest
+from dataclasses import dataclass
+from functools import cache
+from test import support
+from test.support.script_helper import run_python_until_end
+
+_strace_binary = "/usr/bin/strace"
+_syscall_regex = re.compile(
+ r"(?P<syscall>[^(]*)\((?P<args>[^)]*)\)\s*[=]\s*(?P<returncode>.+)")
+_returncode_regex = re.compile(
+ br"\+\+\+ exited with (?P<returncode>\d+) \+\+\+")
+
+
+@dataclass
+class StraceEvent:
+ syscall: str
+ args: list[str]
+ returncode: str
+
+
+@dataclass
+class StraceResult:
+ strace_returncode: int
+ python_returncode: int
+
+ """The event messages generated by strace. This is very similar to the
+ stderr strace produces with returncode marker section removed."""
+ event_bytes: bytes
+ stdout: bytes
+ stderr: bytes
+
+ def events(self):
+ """Parse event_bytes data into system calls for easier processing.
+
+ This assumes the program under inspection doesn't print any non-utf8
+ strings which would mix into the strace output."""
+ decoded_events = self.event_bytes.decode('utf-8')
+ matches = [
+ _syscall_regex.match(event)
+ for event in decoded_events.splitlines()
+ ]
+ return [
+ StraceEvent(match["syscall"],
+ [arg.strip() for arg in (match["args"].split(","))],
+ match["returncode"]) for match in matches if match
+ ]
+
+ def sections(self):
+ """Find all "MARK <X>" writes and use them to make groups of events.
+
+ This is useful to avoid variable / overhead events, like those at
+ interpreter startup or when opening a file so a test can verify just
+ the small case under study."""
+ current_section = "__startup"
+ sections = {current_section: []}
+ for event in self.events():
+ if event.syscall == 'write' and len(
+ event.args) > 2 and event.args[1].startswith("\"MARK "):
+ # Found a new section, don't include the write in the section
+ # but all events until next mark should be in that section
+ current_section = event.args[1].split(
+ " ", 1)[1].removesuffix('\\n"')
+ if current_section not in sections:
+ sections[current_section] = list()
+ else:
+ sections[current_section].append(event)
+
+ return sections
+
+
+@support.requires_subprocess()
+def strace_python(code, strace_flags, check=True):
+ """Run strace and return the trace.
+
+ Sets strace_returncode and python_returncode to `-1` on error."""
+ res = None
+
+ def _make_error(reason, details):
+ return StraceResult(
+ strace_returncode=-1,
+ python_returncode=-1,
+ event_bytes=f"error({reason},details={details}) = -1".encode('utf-8'),
+ stdout=res.out if res else b"",
+ stderr=res.err if res else b"")
+
+ # Run strace, and get out the raw text
+ try:
+ res, cmd_line = run_python_until_end(
+ "-c",
+ textwrap.dedent(code),
+ __run_using_command=[_strace_binary] + strace_flags)
+ except OSError as err:
+ return _make_error("Caught OSError", err)
+
+ if check and res.rc:
+ res.fail(cmd_line)
+
+ # Get out program returncode
+ stripped = res.err.strip()
+ output = stripped.rsplit(b"\n", 1)
+ if len(output) != 2:
+ return _make_error("Expected strace events and exit code line",
+ stripped[-50:])
+
+ returncode_match = _returncode_regex.match(output[1])
+ if not returncode_match:
+ return _make_error("Expected to find returncode in last line.",
+ output[1][:50])
+
+ python_returncode = int(returncode_match["returncode"])
+ if check and python_returncode:
+ res.fail(cmd_line)
+
+ return StraceResult(strace_returncode=res.rc,
+ python_returncode=python_returncode,
+ event_bytes=output[0],
+ stdout=res.out,
+ stderr=res.err)
+
+
+def get_events(code, strace_flags, prelude, cleanup):
+ # NOTE: The flush is currently required to prevent the prints from getting
+ # buffered and done all at once at exit
+ prelude = textwrap.dedent(prelude)
+ code = textwrap.dedent(code)
+ cleanup = textwrap.dedent(cleanup)
+ to_run = f"""
+print("MARK prelude", flush=True)
+{prelude}
+print("MARK code", flush=True)
+{code}
+print("MARK cleanup", flush=True)
+{cleanup}
+print("MARK __shutdown", flush=True)
+ """
+ trace = strace_python(to_run, strace_flags)
+ all_sections = trace.sections()
+ return all_sections['code']
+
+
+def get_syscalls(code, strace_flags, prelude="", cleanup=""):
+ """Get the syscalls which a given chunk of python code generates"""
+ events = get_events(code, strace_flags, prelude=prelude, cleanup=cleanup)
+ return [ev.syscall for ev in events]
+
+
+# Moderately expensive (spawns a subprocess), so share results when possible.
+@cache
+def _can_strace():
+ res = strace_python("import sys; sys.exit(0)", [], check=False)
+ assert res.events(), "Should have parsed multiple calls"
+
+ return res.strace_returncode == 0 and res.python_returncode == 0
+
+
+def requires_strace():
+ if sys.platform != "linux":
+ return unittest.skip("Linux only, requires strace.")
+
+ if support.check_sanitizer(address=True, memory=True):
+ return unittest.skip("LeakSanitizer does not work under ptrace (strace, gdb, etc)")
+
+ return unittest.skipUnless(_can_strace(), "Requires working strace")
+
+
+__all__ = ["get_events", "get_syscalls", "requires_strace", "strace_python",
+ "StraceEvent", "StraceResult"]
diff --git a/Lib/test/test_fileio.py b/Lib/test/test_fileio.py
index 0611d17..d60aabc 100644
--- a/Lib/test/test_fileio.py
+++ b/Lib/test/test_fileio.py
@@ -11,7 +11,7 @@ from functools import wraps
from test.support import (
cpython_only, swap_attr, gc_collect, is_emscripten, is_wasi,
- infinite_recursion,
+ infinite_recursion, strace_helper
)
from test.support.os_helper import (
TESTFN, TESTFN_ASCII, TESTFN_UNICODE, make_bad_fd,
@@ -24,6 +24,9 @@ import _io # C implementation of io
import _pyio # Python implementation of io
+_strace_flags=["--trace=%file,%desc"]
+
+
class AutoFileTests:
# file tests for which a test file is automatically set up
@@ -359,6 +362,139 @@ class AutoFileTests:
a = array('b', b'x'*10)
f.readinto(a)
+ @strace_helper.requires_strace()
+ def test_syscalls_read(self):
+ """Check that the set of system calls produced by the I/O stack is what
+ is expected for various read cases.
+
+ It's expected as bits of the I/O implementation change, this will need
+ to change. The goal is to catch changes that unintentionally add
+ additional systemcalls (ex. additional calls have been looked at in
+ bpo-21679 and gh-120754).
+ """
+ self.f.write(b"Hello, World!")
+ self.f.close()
+
+
+ def check_readall(name, code, prelude="", cleanup="",
+ extra_checks=None):
+ with self.subTest(name=name):
+ syscalls = strace_helper.get_events(code, _strace_flags,
+ prelude=prelude,
+ cleanup=cleanup)
+
+ # The first call should be an open that returns a
+ # file descriptor (fd). Afer that calls may vary. Once the file
+ # is opened, check calls refer to it by fd as the filename
+ # could be removed from the filesystem, renamed, etc. See:
+ # Time-of-check time-of-use (TOCTOU) software bug class.
+ #
+ # There are a number of related but distinct open system calls
+ # so not checking precise name here.
+ self.assertGreater(
+ len(syscalls),
+ 1,
+ f"Should have had at least an open call|calls={syscalls}")
+ fd_str = syscalls[0].returncode
+
+ # All other calls should contain the fd in their argument set.
+ for ev in syscalls[1:]:
+ self.assertIn(
+ fd_str,
+ ev.args,
+ f"Looking for file descriptor in arguments|ev={ev}"
+ )
+
+ # There are a number of related syscalls used to implement
+ # behaviors in a libc (ex. fstat, newfstatat, statx, open, openat).
+ # Allow any that use the same substring.
+ def count_similarname(name):
+ return len([ev for ev in syscalls if name in ev.syscall])
+
+ checks = [
+ # Should open and close the file exactly once
+ ("open", 1),
+ ("close", 1),
+ # There should no longer be an isatty call (All files being
+ # tested are block devices / not character devices).
+ ('ioctl', 0),
+ # Should only have one fstat (bpo-21679, gh-120754)
+ # note: It's important this uses a fd rather than filename,
+ # That is validated by the `fd` check above.
+ # note: fstat, newfstatat, and statx have all been observed
+ # here in the underlying C library implementations.
+ ("stat", 1)
+ ]
+
+ if extra_checks:
+ checks += extra_checks
+
+ for call, count in checks:
+ self.assertEqual(
+ count_similarname(call),
+ count,
+ msg=f"call={call}|count={count}|syscalls={syscalls}"
+ )
+
+ # "open, read, close" file using different common patterns.
+ check_readall(
+ "open builtin with default options",
+ f"""
+ f = open('{TESTFN}')
+ f.read()
+ f.close()
+ """
+ )
+
+ check_readall(
+ "open in binary mode",
+ f"""
+ f = open('{TESTFN}', 'rb')
+ f.read()
+ f.close()
+ """
+ )
+
+ check_readall(
+ "open in text mode",
+ f"""
+ f = open('{TESTFN}', 'rt')
+ f.read()
+ f.close()
+ """,
+ # GH-122111: read_text uses BufferedIO which requires looking up
+ # position in file. `read_bytes` disables that buffering and avoids
+ # these calls which is tested the `pathlib read_bytes` case.
+ extra_checks=[("seek", 1)]
+ )
+
+ check_readall(
+ "pathlib read_bytes",
+ "p.read_bytes()",
+ prelude=f"""from pathlib import Path; p = Path("{TESTFN}")""",
+ # GH-122111: Buffering is disabled so these calls are avoided.
+ extra_checks=[("seek", 0)]
+ )
+
+ check_readall(
+ "pathlib read_text",
+ "p.read_text()",
+ prelude=f"""from pathlib import Path; p = Path("{TESTFN}")"""
+ )
+
+ # Focus on just `read()`.
+ calls = strace_helper.get_syscalls(
+ prelude=f"f = open('{TESTFN}')",
+ code="f.read()",
+ cleanup="f.close()",
+ strace_flags=_strace_flags
+ )
+ # One to read all the bytes
+ # One to read the EOF and get a size 0 return.
+ self.assertEqual(calls.count("read"), 2)
+
+
+
class CAutoFileTests(AutoFileTests, unittest.TestCase):
FileIO = _io.FileIO
modulename = '_io'
diff --git a/Lib/test/test_subprocess.py b/Lib/test/test_subprocess.py
index f065b9c..e45701d 100644
--- a/Lib/test/test_subprocess.py
+++ b/Lib/test/test_subprocess.py
@@ -4,6 +4,7 @@ from test import support
from test.support import check_sanitizer
from test.support import import_helper
from test.support import os_helper
+from test.support import strace_helper
from test.support import warnings_helper
from test.support.script_helper import assert_python_ok
import subprocess
@@ -3415,7 +3416,7 @@ class POSIXProcessTestCase(BaseTestCase):
@unittest.skipIf(not sysconfig.get_config_var("HAVE_VFORK"),
"vfork() not enabled by configure.")
- @unittest.skipIf(sys.platform != "linux", "Linux only, requires strace.")
+ @strace_helper.requires_strace()
@mock.patch("subprocess._USE_POSIX_SPAWN", new=False)
def test_vfork_used_when_expected(self):
# This is a performance regression test to ensure we default to using
@@ -3423,36 +3424,25 @@ class POSIXProcessTestCase(BaseTestCase):
# Technically this test could pass when posix_spawn is used as well
# because libc tends to implement that internally using vfork. But
# that'd just be testing a libc+kernel implementation detail.
- strace_binary = "/usr/bin/strace"
- # The only system calls we are interested in.
- strace_filter = "--trace=clone,clone2,clone3,fork,vfork,exit,exit_group"
- true_binary = "/bin/true"
- strace_command = [strace_binary, strace_filter]
- try:
- does_strace_work_process = subprocess.run(
- strace_command + [true_binary],
- stderr=subprocess.PIPE,
- stdout=subprocess.DEVNULL,
- )
- rc = does_strace_work_process.returncode
- stderr = does_strace_work_process.stderr
- except OSError:
- rc = -1
- stderr = ""
- if rc or (b"+++ exited with 0 +++" not in stderr):
- self.skipTest("strace not found or not working as expected.")
+ # Are intersted in the system calls:
+ # clone,clone2,clone3,fork,vfork,exit,exit_group
+ # Unfortunately using `--trace` with that list to strace fails because
+ # not all are supported on all platforms (ex. clone2 is ia64 only...)
+ # So instead use `%process` which is recommended by strace, and contains
+ # the above.
+ true_binary = "/bin/true"
+ strace_args = ["--trace=%process"]
with self.subTest(name="default_is_vfork"):
- vfork_result = assert_python_ok(
- "-c",
- textwrap.dedent(f"""\
- import subprocess
- subprocess.check_call([{true_binary!r}])"""),
- __run_using_command=strace_command,
+ vfork_result = strace_helper.strace_python(
+ f"""\
+ import subprocess
+ subprocess.check_call([{true_binary!r}])""",
+ strace_args
)
# Match both vfork() and clone(..., flags=...|CLONE_VFORK|...)
- self.assertRegex(vfork_result.err, br"(?i)vfork")
+ self.assertRegex(vfork_result.event_bytes, br"(?i)vfork")
# Do NOT check that fork() or other clones did not happen.
# If the OS denys the vfork it'll fallback to plain fork().
@@ -3465,9 +3455,8 @@ class POSIXProcessTestCase(BaseTestCase):
("setgroups", "", "extra_groups=[]", True),
):
with self.subTest(name=sub_name):
- non_vfork_result = assert_python_ok(
- "-c",
- textwrap.dedent(f"""\
+ non_vfork_result = strace_helper.strace_python(
+ f"""\
import subprocess
{preamble}
try:
@@ -3475,11 +3464,11 @@ class POSIXProcessTestCase(BaseTestCase):
[{true_binary!r}], **dict({sp_kwarg}))
except PermissionError:
if not {expect_permission_error}:
- raise"""),
- __run_using_command=strace_command,
+ raise""",
+ strace_args
)
# Ensure neither vfork() or clone(..., flags=...|CLONE_VFORK|...).
- self.assertNotRegex(non_vfork_result.err, br"(?i)vfork")
+ self.assertNotRegex(non_vfork_result.event_bytes, br"(?i)vfork")
@unittest.skipUnless(mswindows, "Windows specific tests")
diff --git a/Misc/ACKS b/Misc/ACKS
index a1769d9..5e36eda 100644
--- a/Misc/ACKS
+++ b/Misc/ACKS
@@ -1164,6 +1164,7 @@ Grzegorz Makarewicz
David Malcolm
Greg Malcolm
William Mallard
+Cody Maloney
Ken Manheimer
Vladimir Marangozov
Colin Marc