diff options
author | Pablo Galindo <Pablogsal@gmail.com> | 2021-07-02 14:10:11 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-07-02 14:10:11 (GMT) |
commit | 98eee94421dcb42c15f2d7fc4cd21357722fbe2a (patch) | |
tree | 45a158d1e97f0b29d24ded80122559b50cc858f5 /Lib | |
parent | 943e77d42d3f84b581f32c05f1fc8c05366b8ed3 (diff) | |
download | cpython-98eee94421dcb42c15f2d7fc4cd21357722fbe2a.zip cpython-98eee94421dcb42c15f2d7fc4cd21357722fbe2a.tar.gz cpython-98eee94421dcb42c15f2d7fc4cd21357722fbe2a.tar.bz2 |
bpo-43950: Add code.co_positions (PEP 657) (GH-26955)
This PR is part of PEP 657 and augments the compiler to emit ending
line numbers as well as starting and ending columns from the AST
into compiled code objects. This allows bytecodes to be correlated
to the exact source code ranges that generated them.
This information is made available through the following public APIs:
* The `co_positions` method on code objects.
* The C API function `PyCode_Addr2Location`.
Co-authored-by: Batuhan Taskaya <isidentical@gmail.com>
Co-authored-by: Ammar Askar <ammar@ammaraskar.com>
Diffstat (limited to 'Lib')
-rw-r--r-- | Lib/ctypes/test/test_values.py | 6 | ||||
-rw-r--r-- | Lib/importlib/_bootstrap_external.py | 3 | ||||
-rw-r--r-- | Lib/test/test_code.py | 83 | ||||
-rw-r--r-- | Lib/test/test_compile.py | 114 |
4 files changed, 202 insertions, 4 deletions
diff --git a/Lib/ctypes/test/test_values.py b/Lib/ctypes/test/test_values.py index ca41ef1..ade0c99 100644 --- a/Lib/ctypes/test/test_values.py +++ b/Lib/ctypes/test/test_values.py @@ -80,9 +80,9 @@ class PythonValuesTestCase(unittest.TestCase): continue items.append((entry.name.decode("ascii"), entry.size)) - expected = [("__hello__", 133), - ("__phello__", -133), - ("__phello__.spam", 133), + expected = [("__hello__", 159), + ("__phello__", -159), + ("__phello__.spam", 159), ] self.assertEqual(items, expected, "PyImport_FrozenModules example " "in Doc/library/ctypes.rst may be out of date") diff --git a/Lib/importlib/_bootstrap_external.py b/Lib/importlib/_bootstrap_external.py index c3ce470..21449cb 100644 --- a/Lib/importlib/_bootstrap_external.py +++ b/Lib/importlib/_bootstrap_external.py @@ -361,6 +361,7 @@ _code_type = type(_write_atomic.__code__) # Python 3.11a1 3456 (interleave cell args bpo-43693) # Python 3.11a1 3457 (Change localsplus to a bytes object bpo-43693) # Python 3.11a1 3458 (imported objects now don't use LOAD_METHOD/CALL_METHOD) +# Python 3.11a1 3459 (PEP 657: add end line numbers and column offsets for instructions) # # MAGIC must change whenever the bytecode emitted by the compiler may no @@ -370,7 +371,7 @@ _code_type = type(_write_atomic.__code__) # Whenever MAGIC_NUMBER is changed, the ranges in the magic_values array # in PC/launcher.c must also be updated. -MAGIC_NUMBER = (3458).to_bytes(2, 'little') + b'\r\n' +MAGIC_NUMBER = (3459).to_bytes(2, 'little') + b'\r\n' _RAW_MAGIC_NUMBER = int.from_bytes(MAGIC_NUMBER, 'little') # For import.c _PYCACHE = '__pycache__' diff --git a/Lib/test/test_code.py b/Lib/test/test_code.py index 27342ad..55ba30a 100644 --- a/Lib/test/test_code.py +++ b/Lib/test/test_code.py @@ -129,7 +129,9 @@ import inspect import sys import threading import unittest +import textwrap import weakref + try: import ctypes except ImportError: @@ -223,6 +225,8 @@ class CodeTest(unittest.TestCase): co.co_name, co.co_firstlineno, co.co_lnotab, + co.co_endlinetable, + co.co_columntable, co.co_exceptiontable, co.co_freevars, co.co_cellvars) @@ -257,6 +261,8 @@ class CodeTest(unittest.TestCase): ("co_filename", "newfilename"), ("co_name", "newname"), ("co_linetable", code2.co_linetable), + ("co_endlinetable", code2.co_endlinetable), + ("co_columntable", code2.co_columntable), ): with self.subTest(attr=attr, value=value): new_code = code.replace(**{attr: value}) @@ -293,6 +299,8 @@ class CodeTest(unittest.TestCase): co.co_name, co.co_firstlineno, co.co_lnotab, + co.co_endlinetable, + co.co_columntable, co.co_exceptiontable, co.co_freevars, co.co_cellvars, @@ -309,6 +317,81 @@ class CodeTest(unittest.TestCase): new_code = code = func.__code__.replace(co_linetable=b'') self.assertEqual(list(new_code.co_lines()), []) + def test_co_positions_artificial_instructions(self): + import dis + + namespace = {} + exec(textwrap.dedent("""\ + try: + 1/0 + except Exception as e: + exc = e + """), namespace) + + exc = namespace['exc'] + traceback = exc.__traceback__ + code = traceback.tb_frame.f_code + + artificial_instructions = [] + for instr, positions in zip( + dis.get_instructions(code), + code.co_positions(), + strict=True + ): + # If any of the positions is None, then all have to + # be None as well for the case above. There are still + # some places in the compiler, where the artificial instructions + # get assigned the first_lineno but they don't have other positions. + # There is no easy way of inferring them at that stage, so for now + # we don't support it. + self.assertTrue(all(positions) or not any(positions)) + + if not any(positions): + artificial_instructions.append(instr) + + self.assertEqual( + [ + (instruction.opname, instruction.argval) + for instruction in artificial_instructions + ], + [ + ("PUSH_EXC_INFO", None), + ("LOAD_CONST", None), # artificial 'None' + ("STORE_NAME", "e"), # XX: we know the location for this + ("DELETE_NAME", "e"), + ("RERAISE", 1), + ("POP_EXCEPT_AND_RERAISE", None) + ] + ) + + # co_positions behavior when info is missing. + + def test_co_positions_empty_linetable(self): + def func(): + x = 1 + new_code = func.__code__.replace(co_linetable=b'') + for line, end_line, column, end_column in new_code.co_positions(): + self.assertIsNone(line) + self.assertEqual(end_line, new_code.co_firstlineno + 1) + + def test_co_positions_empty_endlinetable(self): + def func(): + x = 1 + new_code = func.__code__.replace(co_endlinetable=b'') + for line, end_line, column, end_column in new_code.co_positions(): + self.assertEqual(line, new_code.co_firstlineno + 1) + self.assertIsNone(end_line) + + def test_co_positions_empty_columntable(self): + def func(): + x = 1 + new_code = func.__code__.replace(co_columntable=b'') + for line, end_line, column, end_column in new_code.co_positions(): + self.assertEqual(line, new_code.co_firstlineno + 1) + self.assertEqual(end_line, new_code.co_firstlineno + 1) + self.assertIsNone(column) + self.assertIsNone(end_column) + def isinterned(s): return s is sys.intern(('_' + s + '_')[1:-1]) diff --git a/Lib/test/test_compile.py b/Lib/test/test_compile.py index 7de607c..47deda0 100644 --- a/Lib/test/test_compile.py +++ b/Lib/test/test_compile.py @@ -3,6 +3,7 @@ import math import os import unittest import sys +import ast import _ast import tempfile import types @@ -985,6 +986,119 @@ if 1: self.assertNotEqual(instr.arg, (line + 1)*INSTR_SIZE) +class TestSourcePositions(unittest.TestCase): + # Ensure that compiled code snippets have correct line and column numbers + # in `co_positions()`. + + def check_positions_against_ast(self, snippet): + # Basic check that makes sure each line and column is at least present + # in one of the AST nodes of the source code. + code = compile(snippet, 'test_compile.py', 'exec') + ast_tree = compile(snippet, 'test_compile.py', 'exec', _ast.PyCF_ONLY_AST) + self.assertTrue(type(ast_tree) == _ast.Module) + + # Use an AST visitor that notes all the offsets. + lines, end_lines, columns, end_columns = set(), set(), set(), set() + class SourceOffsetVisitor(ast.NodeVisitor): + def generic_visit(self, node): + super().generic_visit(node) + if not isinstance(node, ast.expr) and not isinstance(node, ast.stmt): + return + lines.add(node.lineno) + end_lines.add(node.end_lineno) + columns.add(node.col_offset + 1) + end_columns.add(node.end_col_offset + 1) + + SourceOffsetVisitor().visit(ast_tree) + + # Check against the positions in the code object. + for (line, end_line, col, end_col) in code.co_positions(): + # If the offset is not None (indicating missing data), ensure that + # it was part of one of the AST nodes. + if line is not None: + self.assertIn(line, lines) + if end_line is not None: + self.assertIn(end_line, end_lines) + if col is not None: + self.assertIn(col, columns) + if end_col is not None: + self.assertIn(end_col, end_columns) + + return code, ast_tree + + def assertOpcodeSourcePositionIs(self, code, opcode, + line, end_line, column, end_column): + + for instr, position in zip(dis.Bytecode(code), code.co_positions()): + if instr.opname == opcode: + self.assertEqual(position[0], line) + self.assertEqual(position[1], end_line) + self.assertEqual(position[2], column) + self.assertEqual(position[3], end_column) + return + + self.fail(f"Opcode {opcode} not found in code") + + def test_simple_assignment(self): + snippet = "x = 1" + self.check_positions_against_ast(snippet) + + def test_compiles_to_extended_op_arg(self): + # Make sure we still have valid positions when the code compiles to an + # EXTENDED_ARG by performing a loop which needs a JUMP_ABSOLUTE after + # a bunch of opcodes. + snippet = "x = x\n" * 10_000 + snippet += ("while x != 0:\n" + " x -= 1\n" + "while x != 0:\n" + " x += 1\n" + ) + + compiled_code, _ = self.check_positions_against_ast(snippet) + + self.assertOpcodeSourcePositionIs(compiled_code, 'INPLACE_SUBTRACT', + line=10_000 + 2, end_line=10_000 + 2, + column=3, end_column=9) + self.assertOpcodeSourcePositionIs(compiled_code, 'INPLACE_ADD', + line=10_000 + 4, end_line=10_000 + 4, + column=3, end_column=10) + + def test_multiline_expression(self): + snippet = """\ +f( + 1, 2, 3, 4 +) +""" + compiled_code, _ = self.check_positions_against_ast(snippet) + self.assertOpcodeSourcePositionIs(compiled_code, 'CALL_FUNCTION', + line=1, end_line=3, column=1, end_column=2) + + def test_very_long_line_end_offset(self): + # Make sure we get None for when the column offset is too large to + # store in a byte. + long_string = "a" * 1000 + snippet = f"g('{long_string}')" + + compiled_code, _ = self.check_positions_against_ast(snippet) + self.assertOpcodeSourcePositionIs(compiled_code, 'CALL_FUNCTION', + line=1, end_line=1, column=None, end_column=None) + + def test_complex_single_line_expression(self): + snippet = "a - b @ (c * x['key'] + 23)" + + compiled_code, _ = self.check_positions_against_ast(snippet) + self.assertOpcodeSourcePositionIs(compiled_code, 'BINARY_SUBSCR', + line=1, end_line=1, column=14, end_column=22) + self.assertOpcodeSourcePositionIs(compiled_code, 'BINARY_MULTIPLY', + line=1, end_line=1, column=10, end_column=22) + self.assertOpcodeSourcePositionIs(compiled_code, 'BINARY_ADD', + line=1, end_line=1, column=10, end_column=27) + self.assertOpcodeSourcePositionIs(compiled_code, 'BINARY_MATRIX_MULTIPLY', + line=1, end_line=1, column=5, end_column=28) + self.assertOpcodeSourcePositionIs(compiled_code, 'BINARY_SUBTRACT', + line=1, end_line=1, column=1, end_column=28) + + class TestExpressionStackSize(unittest.TestCase): # These tests check that the computed stack size for a code object # stays within reasonable bounds (see issue #21523 for an example |