From b1d3bd2e09d8b9d9f49cb8db9d47880ce2ec8f70 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Wed, 21 Aug 2024 15:46:24 +0200 Subject: gh-123165: make `dis` functions render positions on demand (#123168) --- Doc/library/dis.rst | 48 ++++++++-- Doc/whatsnew/3.14.rst | 16 ++++ Lib/dis.py | 101 +++++++++++++++------ Lib/test/test_dis.py | 80 ++++++++++++++++ .../2024-08-20-14-22-49.gh-issue-123165.vOZZOA.rst | 1 + 5 files changed, 209 insertions(+), 37 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-08-20-14-22-49.gh-issue-123165.vOZZOA.rst diff --git a/Doc/library/dis.rst b/Doc/library/dis.rst index a770ad8..cc8f636 100644 --- a/Doc/library/dis.rst +++ b/Doc/library/dis.rst @@ -56,6 +56,10 @@ interpreter. for jump targets and exception handlers. The ``-O`` command line option and the ``show_offsets`` argument were added. + .. versionchanged:: 3.14 + The :option:`-P ` command-line option + and the ``show_positions`` argument were added. + Example: Given the function :func:`!myfunc`:: def myfunc(alist): @@ -85,7 +89,7 @@ The :mod:`dis` module can be invoked as a script from the command line: .. code-block:: sh - python -m dis [-h] [-C] [-O] [infile] + python -m dis [-h] [-C] [-O] [-P] [infile] The following options are accepted: @@ -103,6 +107,10 @@ The following options are accepted: Show offsets of instructions. +.. cmdoption:: -P, --show-positions + + Show positions of instructions in the source code. + If :file:`infile` is specified, its disassembled code will be written to stdout. Otherwise, disassembly is performed on compiled source code received from stdin. @@ -116,7 +124,8 @@ The bytecode analysis API allows pieces of Python code to be wrapped in a code. .. class:: Bytecode(x, *, first_line=None, current_offset=None,\ - show_caches=False, adaptive=False, show_offsets=False) + show_caches=False, adaptive=False, show_offsets=False,\ + show_positions=False) Analyse the bytecode corresponding to a function, generator, asynchronous generator, coroutine, method, string of source code, or a code object (as @@ -144,6 +153,9 @@ code. If *show_offsets* is ``True``, :meth:`.dis` will include instruction offsets in the output. + If *show_positions* is ``True``, :meth:`.dis` will include instruction + source code positions in the output. + .. classmethod:: from_traceback(tb, *, show_caches=False) Construct a :class:`Bytecode` instance from the given traceback, setting @@ -173,6 +185,12 @@ code. .. versionchanged:: 3.11 Added the *show_caches* and *adaptive* parameters. + .. versionchanged:: 3.13 + Added the *show_offsets* parameter + + .. versionchanged:: 3.14 + Added the *show_positions* parameter. + Example: .. doctest:: @@ -226,7 +244,8 @@ operation is being performed, so the intermediate analysis object isn't useful: Added *file* parameter. -.. function:: dis(x=None, *, file=None, depth=None, show_caches=False, adaptive=False) +.. function:: dis(x=None, *, file=None, depth=None, show_caches=False,\ + adaptive=False, show_offsets=False, show_positions=False) Disassemble the *x* object. *x* can denote either a module, a class, a method, a function, a generator, an asynchronous generator, a coroutine, @@ -265,9 +284,14 @@ operation is being performed, so the intermediate analysis object isn't useful: .. versionchanged:: 3.11 Added the *show_caches* and *adaptive* parameters. + .. versionchanged:: 3.13 + Added the *show_offsets* parameter. + + .. versionchanged:: 3.14 + Added the *show_positions* parameter. -.. function:: distb(tb=None, *, file=None, show_caches=False, adaptive=False, - show_offset=False) +.. function:: distb(tb=None, *, file=None, show_caches=False, adaptive=False,\ + show_offset=False, show_positions=False) Disassemble the top-of-stack function of a traceback, using the last traceback if none was passed. The instruction causing the exception is @@ -285,14 +309,19 @@ operation is being performed, so the intermediate analysis object isn't useful: .. versionchanged:: 3.13 Added the *show_offsets* parameter. + .. versionchanged:: 3.14 + Added the *show_positions* parameter. + .. function:: disassemble(code, lasti=-1, *, file=None, show_caches=False, adaptive=False) - disco(code, lasti=-1, *, file=None, show_caches=False, adaptive=False, - show_offsets=False) + disco(code, lasti=-1, *, file=None, show_caches=False, adaptive=False,\ + show_offsets=False, show_positions=False) Disassemble a code object, indicating the last instruction if *lasti* was provided. The output is divided in the following columns: - #. the line number, for the first instruction of each line + #. the source code location of the instruction. Complete location information + is shown if *show_positions* is true. Otherwise (the default) only the + line number is displayed. #. the current instruction, indicated as ``-->``, #. a labelled instruction, indicated with ``>>``, #. the address of the instruction, @@ -315,6 +344,9 @@ operation is being performed, so the intermediate analysis object isn't useful: .. versionchanged:: 3.13 Added the *show_offsets* parameter. + .. versionchanged:: 3.14 + Added the *show_positions* parameter. + .. function:: get_instructions(x, *, first_line=None, show_caches=False, adaptive=False) Return an iterator over the instructions in the supplied function, method, diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index 088f70d..a34dc63 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -110,6 +110,22 @@ ast (Contributed by Bénédikt Tran in :gh:`121141`.) +dis +--- + +* Added support for rendering full source location information of + :class:`instructions `, rather than only the line number. + This feature is added to the following interfaces via the ``show_positions`` + keyword argument: + + - :class:`dis.Bytecode`, + - :func:`dis.dis`, :func:`dis.distb`, and + - :func:`dis.disassemble`. + + This feature is also exposed via :option:`dis --show-positions`. + + (Contributed by Bénédikt Tran in :gh:`123165`.) + fractions --------- diff --git a/Lib/dis.py b/Lib/dis.py index bb922b7..077c403 100644 --- a/Lib/dis.py +++ b/Lib/dis.py @@ -80,7 +80,7 @@ def _try_compile(source, name): return compile(source, name, 'exec') def dis(x=None, *, file=None, depth=None, show_caches=False, adaptive=False, - show_offsets=False): + show_offsets=False, show_positions=False): """Disassemble classes, methods, functions, and other compiled objects. With no argument, disassemble the last traceback. @@ -91,7 +91,7 @@ def dis(x=None, *, file=None, depth=None, show_caches=False, adaptive=False, """ if x is None: distb(file=file, show_caches=show_caches, adaptive=adaptive, - show_offsets=show_offsets) + show_offsets=show_offsets, show_positions=show_positions) return # Extract functions from methods. if hasattr(x, '__func__'): @@ -112,12 +112,12 @@ def dis(x=None, *, file=None, depth=None, show_caches=False, adaptive=False, if isinstance(x1, _have_code): print("Disassembly of %s:" % name, file=file) try: - dis(x1, file=file, depth=depth, show_caches=show_caches, adaptive=adaptive, show_offsets=show_offsets) + dis(x1, file=file, depth=depth, show_caches=show_caches, adaptive=adaptive, show_offsets=show_offsets, show_positions=show_positions) except TypeError as msg: print("Sorry:", msg, file=file) print(file=file) elif hasattr(x, 'co_code'): # Code object - _disassemble_recursive(x, file=file, depth=depth, show_caches=show_caches, adaptive=adaptive, show_offsets=show_offsets) + _disassemble_recursive(x, file=file, depth=depth, show_caches=show_caches, adaptive=adaptive, show_offsets=show_offsets, show_positions=show_positions) elif isinstance(x, (bytes, bytearray)): # Raw bytecode labels_map = _make_labels_map(x) label_width = 4 + len(str(len(labels_map))) @@ -128,12 +128,12 @@ def dis(x=None, *, file=None, depth=None, show_caches=False, adaptive=False, arg_resolver = ArgResolver(labels_map=labels_map) _disassemble_bytes(x, arg_resolver=arg_resolver, formatter=formatter) elif isinstance(x, str): # Source code - _disassemble_str(x, file=file, depth=depth, show_caches=show_caches, adaptive=adaptive, show_offsets=show_offsets) + _disassemble_str(x, file=file, depth=depth, show_caches=show_caches, adaptive=adaptive, show_offsets=show_offsets, show_positions=show_positions) else: raise TypeError("don't know how to disassemble %s objects" % type(x).__name__) -def distb(tb=None, *, file=None, show_caches=False, adaptive=False, show_offsets=False): +def distb(tb=None, *, file=None, show_caches=False, adaptive=False, show_offsets=False, show_positions=False): """Disassemble a traceback (default: last traceback).""" if tb is None: try: @@ -144,7 +144,7 @@ def distb(tb=None, *, file=None, show_caches=False, adaptive=False, show_offsets except AttributeError: raise RuntimeError("no last traceback to disassemble") from None while tb.tb_next: tb = tb.tb_next - disassemble(tb.tb_frame.f_code, tb.tb_lasti, file=file, show_caches=show_caches, adaptive=adaptive, show_offsets=show_offsets) + disassemble(tb.tb_frame.f_code, tb.tb_lasti, file=file, show_caches=show_caches, adaptive=adaptive, show_offsets=show_offsets, show_positions=show_positions) # The inspect module interrogates this dictionary to build its # list of CO_* constants. It is also used by pretty_flags to @@ -427,21 +427,25 @@ class Instruction(_Instruction): class Formatter: def __init__(self, file=None, lineno_width=0, offset_width=0, label_width=0, - line_offset=0, show_caches=False): + line_offset=0, show_caches=False, *, show_positions=False): """Create a Formatter *file* where to write the output - *lineno_width* sets the width of the line number field (0 omits it) + *lineno_width* sets the width of the source location field (0 omits it). + Should be large enough for a line number or full positions (depending + on the value of *show_positions*). *offset_width* sets the width of the instruction offset field *label_width* sets the width of the label field *show_caches* is a boolean indicating whether to display cache lines - + *show_positions* is a boolean indicating whether full positions should + be reported instead of only the line numbers. """ self.file = file self.lineno_width = lineno_width self.offset_width = offset_width self.label_width = label_width self.show_caches = show_caches + self.show_positions = show_positions def print_instruction(self, instr, mark_as_current=False): self.print_instruction_line(instr, mark_as_current) @@ -474,15 +478,27 @@ class Formatter: print(file=self.file) fields = [] - # Column: Source code line number + # Column: Source code locations information if lineno_width: - if instr.starts_line: - lineno_fmt = "%%%dd" if instr.line_number is not None else "%%%ds" - lineno_fmt = lineno_fmt % lineno_width - lineno = _NO_LINENO if instr.line_number is None else instr.line_number - fields.append(lineno_fmt % lineno) + if self.show_positions: + # reporting positions instead of just line numbers + if instr_positions := instr.positions: + if all(p is None for p in instr_positions): + positions_str = _NO_LINENO + else: + ps = tuple('?' if p is None else p for p in instr_positions) + positions_str = f"{ps[0]}:{ps[2]}-{ps[1]}:{ps[3]}" + fields.append(f'{positions_str:{lineno_width}}') + else: + fields.append(' ' * lineno_width) else: - fields.append(' ' * lineno_width) + if instr.starts_line: + lineno_fmt = "%%%dd" if instr.line_number is not None else "%%%ds" + lineno_fmt = lineno_fmt % lineno_width + lineno = _NO_LINENO if instr.line_number is None else instr.line_number + fields.append(lineno_fmt % lineno) + else: + fields.append(' ' * lineno_width) # Column: Label if instr.label is not None: lbl = f"L{instr.label}:" @@ -769,17 +785,22 @@ def _get_instructions_bytes(code, linestarts=None, line_offset=0, co_positions=N def disassemble(co, lasti=-1, *, file=None, show_caches=False, adaptive=False, - show_offsets=False): + show_offsets=False, show_positions=False): """Disassemble a code object.""" linestarts = dict(findlinestarts(co)) exception_entries = _parse_exception_table(co) + if show_positions: + lineno_width = _get_positions_width(co) + else: + lineno_width = _get_lineno_width(linestarts) labels_map = _make_labels_map(co.co_code, exception_entries=exception_entries) label_width = 4 + len(str(len(labels_map))) formatter = Formatter(file=file, - lineno_width=_get_lineno_width(linestarts), + lineno_width=lineno_width, offset_width=len(str(max(len(co.co_code) - 2, 9999))) if show_offsets else 0, label_width=label_width, - show_caches=show_caches) + show_caches=show_caches, + show_positions=show_positions) arg_resolver = ArgResolver(co_consts=co.co_consts, names=co.co_names, varname_from_oparg=co._varname_from_oparg, @@ -788,8 +809,8 @@ def disassemble(co, lasti=-1, *, file=None, show_caches=False, adaptive=False, exception_entries=exception_entries, co_positions=co.co_positions(), original_code=co.co_code, arg_resolver=arg_resolver, formatter=formatter) -def _disassemble_recursive(co, *, file=None, depth=None, show_caches=False, adaptive=False, show_offsets=False): - disassemble(co, file=file, show_caches=show_caches, adaptive=adaptive, show_offsets=show_offsets) +def _disassemble_recursive(co, *, file=None, depth=None, show_caches=False, adaptive=False, show_offsets=False, show_positions=False): + disassemble(co, file=file, show_caches=show_caches, adaptive=adaptive, show_offsets=show_offsets, show_positions=show_positions) if depth is None or depth > 0: if depth is not None: depth = depth - 1 @@ -799,7 +820,7 @@ def _disassemble_recursive(co, *, file=None, depth=None, show_caches=False, adap print("Disassembly of %r:" % (x,), file=file) _disassemble_recursive( x, file=file, depth=depth, show_caches=show_caches, - adaptive=adaptive, show_offsets=show_offsets + adaptive=adaptive, show_offsets=show_offsets, show_positions=show_positions ) @@ -832,6 +853,22 @@ def _get_lineno_width(linestarts): lineno_width = len(_NO_LINENO) return lineno_width +def _get_positions_width(code): + # Positions are formatted as 'LINE:COL-ENDLINE:ENDCOL ' (note trailing space). + # A missing component appears as '?', and when all components are None, we + # render '_NO_LINENO'. thus the minimum width is 1 + len(_NO_LINENO). + # + # If all values are missing, positions are not printed (i.e. positions_width = 0). + has_value = False + values_width = 0 + for positions in code.co_positions(): + has_value |= any(isinstance(p, int) for p in positions) + width = sum(1 if p is None else len(str(p)) for p in positions) + values_width = max(width, values_width) + if has_value: + # 3 = number of separators in a normal format + return 1 + max(len(_NO_LINENO), 3 + values_width) + return 0 def _disassemble_bytes(code, lasti=-1, linestarts=None, *, line_offset=0, exception_entries=(), @@ -978,7 +1015,7 @@ class Bytecode: Iterating over this yields the bytecode operations as Instruction instances. """ - def __init__(self, x, *, first_line=None, current_offset=None, show_caches=False, adaptive=False, show_offsets=False): + def __init__(self, x, *, first_line=None, current_offset=None, show_caches=False, adaptive=False, show_offsets=False, show_positions=False): self.codeobj = co = _get_code_object(x) if first_line is None: self.first_line = co.co_firstlineno @@ -993,6 +1030,7 @@ class Bytecode: self.show_caches = show_caches self.adaptive = adaptive self.show_offsets = show_offsets + self.show_positions = show_positions def __iter__(self): co = self.codeobj @@ -1036,16 +1074,19 @@ class Bytecode: with io.StringIO() as output: code = _get_code_array(co, self.adaptive) offset_width = len(str(max(len(code) - 2, 9999))) if self.show_offsets else 0 - - + if self.show_positions: + lineno_width = _get_positions_width(co) + else: + lineno_width = _get_lineno_width(self._linestarts) labels_map = _make_labels_map(co.co_code, self.exception_entries) label_width = 4 + len(str(len(labels_map))) formatter = Formatter(file=output, - lineno_width=_get_lineno_width(self._linestarts), + lineno_width=lineno_width, offset_width=offset_width, label_width=label_width, line_offset=self._line_offset, - show_caches=self.show_caches) + show_caches=self.show_caches, + show_positions=self.show_positions) arg_resolver = ArgResolver(co_consts=co.co_consts, names=co.co_names, @@ -1071,6 +1112,8 @@ def main(): help='show inline caches') parser.add_argument('-O', '--show-offsets', action='store_true', help='show instruction offsets') + parser.add_argument('-P', '--show-positions', action='store_true', + help='show instruction positions') parser.add_argument('infile', nargs='?', default='-') args = parser.parse_args() if args.infile == '-': @@ -1081,7 +1124,7 @@ def main(): with open(args.infile, 'rb') as infile: source = infile.read() code = compile(source, name, "exec") - dis(code, show_caches=args.show_caches, show_offsets=args.show_offsets) + dis(code, show_caches=args.show_caches, show_offsets=args.show_offsets, show_positions=args.show_positions) if __name__ == "__main__": main() diff --git a/Lib/test/test_dis.py b/Lib/test/test_dis.py index 80f66c1..5ec06d1 100644 --- a/Lib/test/test_dis.py +++ b/Lib/test/test_dis.py @@ -127,6 +127,16 @@ dis_f_with_offsets = """\ _f.__code__.co_firstlineno + 1, _f.__code__.co_firstlineno + 2) +dis_f_with_positions_format = f"""\ +%-14s RESUME 0 + +%-14s LOAD_GLOBAL 1 (print + NULL) +%-14s LOAD_FAST 0 (a) +%-14s CALL 1 +%-14s POP_TOP + +%-14s RETURN_CONST 1 (1) +""" dis_f_co_code = """\ RESUME 0 @@ -950,6 +960,76 @@ class DisTests(DisTestBase): def test_dis_with_offsets(self): self.do_disassembly_test(_f, dis_f_with_offsets, show_offsets=True) + @requires_debug_ranges() + def test_dis_with_all_positions(self): + def format_instr_positions(instr): + values = tuple('?' if p is None else p for p in instr.positions) + return '%s:%s-%s:%s' % (values[0], values[2], values[1], values[3]) + + instrs = list(dis.get_instructions(_f)) + for instr in instrs: + with self.subTest(instr=instr): + self.assertTrue(all(p is not None for p in instr.positions)) + positions = tuple(map(format_instr_positions, instrs)) + expected = dis_f_with_positions_format % positions + self.do_disassembly_test(_f, expected, show_positions=True) + + @requires_debug_ranges() + def test_dis_with_some_positions(self): + def f(): + pass + + PY_CODE_LOCATION_INFO_NO_COLUMNS = 13 + PY_CODE_LOCATION_INFO_WITH_COLUMNS = 14 + PY_CODE_LOCATION_INFO_NO_LOCATION = 15 + + f.__code__ = f.__code__.replace( + co_stacksize=1, + co_firstlineno=42, + co_code=bytes([ + dis.opmap["RESUME"], 0, + dis.opmap["NOP"], 0, + dis.opmap["RETURN_CONST"], 0, + ]), + co_linetable=bytes([ + (1 << 7) + | (PY_CODE_LOCATION_INFO_NO_COLUMNS << 3) + | (1 - 1), # 1 code unit (RESUME) + (1 << 1), # start line offset is 0 (encoded as an svarint) + (1 << 7) + | (PY_CODE_LOCATION_INFO_NO_LOCATION << 3) + | (1 - 1), # 1 code unit (NOP) + (1 << 7) + | (PY_CODE_LOCATION_INFO_WITH_COLUMNS << 3) + | (1 - 1), # 1 code unit (RETURN CONST) + (2 << 1), # start line offset is 0 (encoded as an svarint) + 3, # end line offset is 0 (varint encoded) + 1, # 1-based start column (reported as COL - 1) + 5, # 1-based end column (reported as ENDCOL - 1) + ] + )) + expect = '\n'.join([ + '43:?-43:? RESUME 0', + '', + ' -- NOP', + '', + '45:0-48:4 RETURN_CONST 0 (None)', + '', + ]) + self.do_disassembly_test(f, expect, show_positions=True) + + def test_dis_with_no_positions(self): + def f(): + pass + + f.__code__ = f.__code__.replace(co_linetable=b'') + expect = '\n'.join([ + ' RESUME 0', + ' RETURN_CONST 0 (None)', + '', + ]) + self.do_disassembly_test(f, expect, show_positions=True) + def test_bug_708901(self): self.do_disassembly_test(bug708901, dis_bug708901) diff --git a/Misc/NEWS.d/next/Library/2024-08-20-14-22-49.gh-issue-123165.vOZZOA.rst b/Misc/NEWS.d/next/Library/2024-08-20-14-22-49.gh-issue-123165.vOZZOA.rst new file mode 100644 index 0000000..05728ad --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-08-20-14-22-49.gh-issue-123165.vOZZOA.rst @@ -0,0 +1 @@ +Add support for rendering :class:`~dis.Positions` in :mod:`dis`. -- cgit v0.12