summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIrit Katriel <1055913+iritkatriel@users.noreply.github.com>2023-12-09 10:03:02 (GMT)
committerGitHub <noreply@github.com>2023-12-09 10:03:02 (GMT)
commitc98c40227e8cd976a08ff0f6dc386b5d33f62f84 (patch)
tree4971139832b108e158f5306fd02231db1c41be11
parent10e9bb13b8dcaa414645b9bd10718d8f7179e82b (diff)
downloadcpython-c98c40227e8cd976a08ff0f6dc386b5d33f62f84.zip
cpython-c98c40227e8cd976a08ff0f6dc386b5d33f62f84.tar.gz
cpython-c98c40227e8cd976a08ff0f6dc386b5d33f62f84.tar.bz2
gh-112720: Move instruction formatting from the dis.Instruction class to a new class dis.InstructionFormatter. Add the ArgResolver class. (#112722)
-rw-r--r--Lib/dis.py422
-rw-r--r--Lib/test/test_dis.py15
2 files changed, 232 insertions, 205 deletions
diff --git a/Lib/dis.py b/Lib/dis.py
index 8d3885d..efa935c 100644
--- a/Lib/dis.py
+++ b/Lib/dis.py
@@ -336,93 +336,6 @@ class Instruction(_Instruction):
covered by this instruction
"""
- @staticmethod
- def _get_argval_argrepr(op, arg, offset, co_consts, names, varname_from_oparg,
- labels_map):
- get_name = None if names is None else names.__getitem__
- argval = None
- argrepr = ''
- deop = _deoptop(op)
- if arg is not None:
- # Set argval to the dereferenced value of the argument when
- # available, and argrepr to the string representation of argval.
- # _disassemble_bytes needs the string repr of the
- # raw name index for LOAD_GLOBAL, LOAD_CONST, etc.
- argval = arg
- if deop in hasconst:
- argval, argrepr = _get_const_info(deop, arg, co_consts)
- elif deop in hasname:
- if deop == LOAD_GLOBAL:
- argval, argrepr = _get_name_info(arg//2, get_name)
- if (arg & 1) and argrepr:
- argrepr = f"{argrepr} + NULL"
- elif deop == LOAD_ATTR:
- argval, argrepr = _get_name_info(arg//2, get_name)
- if (arg & 1) and argrepr:
- argrepr = f"{argrepr} + NULL|self"
- elif deop == LOAD_SUPER_ATTR:
- argval, argrepr = _get_name_info(arg//4, get_name)
- if (arg & 1) and argrepr:
- argrepr = f"{argrepr} + NULL|self"
- else:
- argval, argrepr = _get_name_info(arg, get_name)
- elif deop in hasjabs:
- argval = arg*2
- argrepr = f"to L{labels_map[argval]}"
- elif deop in hasjrel:
- signed_arg = -arg if _is_backward_jump(deop) else arg
- argval = offset + 2 + signed_arg*2
- caches = _get_cache_size(_all_opname[deop])
- argval += 2 * caches
- if deop == ENTER_EXECUTOR:
- argval += 2
- argrepr = f"to L{labels_map[argval]}"
- elif deop in (LOAD_FAST_LOAD_FAST, STORE_FAST_LOAD_FAST, STORE_FAST_STORE_FAST):
- arg1 = arg >> 4
- arg2 = arg & 15
- val1, argrepr1 = _get_name_info(arg1, varname_from_oparg)
- val2, argrepr2 = _get_name_info(arg2, varname_from_oparg)
- argrepr = argrepr1 + ", " + argrepr2
- argval = val1, val2
- elif deop in haslocal or deop in hasfree:
- argval, argrepr = _get_name_info(arg, varname_from_oparg)
- elif deop in hascompare:
- argval = cmp_op[arg >> 5]
- argrepr = argval
- if arg & 16:
- argrepr = f"bool({argrepr})"
- elif deop == CONVERT_VALUE:
- argval = (None, str, repr, ascii)[arg]
- argrepr = ('', 'str', 'repr', 'ascii')[arg]
- elif deop == SET_FUNCTION_ATTRIBUTE:
- argrepr = ', '.join(s for i, s in enumerate(FUNCTION_ATTR_FLAGS)
- if arg & (1<<i))
- elif deop == BINARY_OP:
- _, argrepr = _nb_ops[arg]
- elif deop == CALL_INTRINSIC_1:
- argrepr = _intrinsic_1_descs[arg]
- elif deop == CALL_INTRINSIC_2:
- argrepr = _intrinsic_2_descs[arg]
- return argval, argrepr
-
-
- @classmethod
- def _create(cls, op, arg, offset, start_offset, starts_line, line_number,
- positions,
- co_consts=None, varname_from_oparg=None, names=None,
- labels_map=None, exceptions_map=None):
-
- argval, argrepr = cls._get_argval_argrepr(
- op, arg, offset,
- co_consts, names, varname_from_oparg, labels_map)
- label = labels_map.get(offset, None)
- instr = Instruction(_all_opname[op], op, arg, argval, argrepr,
- offset, start_offset, starts_line, line_number,
- label, positions)
- instr.label_width = 4 + len(str(len(labels_map)))
- instr.exc_handler = exceptions_map.get(offset, None)
- return instr
-
@property
def oparg(self):
"""Alias for Instruction.arg."""
@@ -467,56 +380,169 @@ class Instruction(_Instruction):
"""True if other code jumps to here, otherwise False"""
return self.label is not None
- def _disassemble(self, lineno_width=3, mark_as_current=False, offset_width=0,
- label_width=0):
- """Format instruction details for inclusion in disassembly output.
+ def __str__(self):
+ output = io.StringIO()
+ formatter = Formatter(file=output)
+ formatter.print_instruction(self, False)
+ return output.getvalue()
+
+class Formatter:
+
+ def __init__(self, file=None, lineno_width=0, offset_width=0, label_width=0,
+ line_offset=0):
+ """Create a Formatter
+
+ *file* where to write the output
*lineno_width* sets the width of the line number field (0 omits it)
- *mark_as_current* inserts a '-->' marker arrow as part of the line
*offset_width* sets the width of the instruction offset field
*label_width* sets the width of the label field
+
+ *line_offset* the line number (within the code unit)
"""
+ self.file = file
+ self.lineno_width = lineno_width
+ self.offset_width = offset_width
+ self.label_width = label_width
+
+
+ def print_instruction(self, instr, mark_as_current=False):
+ """Format instruction details for inclusion in disassembly output."""
+ lineno_width = self.lineno_width
+ offset_width = self.offset_width
+ label_width = self.label_width
+
+ new_source_line = (lineno_width > 0 and
+ instr.starts_line and
+ instr.offset > 0)
+ if new_source_line:
+ print(file=self.file)
+
fields = []
# Column: Source code line number
if lineno_width:
- if self.starts_line:
- lineno_fmt = "%%%dd" if self.line_number is not None else "%%%ds"
+ if instr.starts_line:
+ lineno_fmt = "%%%dd" if instr.line_number is not None else "%%%ds"
lineno_fmt = lineno_fmt % lineno_width
- lineno = self.line_number if self.line_number is not None else '--'
+ lineno = _NO_LINENO if instr.line_number is None else instr.line_number
fields.append(lineno_fmt % lineno)
else:
fields.append(' ' * lineno_width)
# Column: Label
- if self.label is not None:
- lbl = f"L{self.label}:"
+ if instr.label is not None:
+ lbl = f"L{instr.label}:"
fields.append(f"{lbl:>{label_width}}")
else:
fields.append(' ' * label_width)
# Column: Instruction offset from start of code sequence
if offset_width > 0:
- fields.append(f"{repr(self.offset):>{offset_width}} ")
+ fields.append(f"{repr(instr.offset):>{offset_width}} ")
# Column: Current instruction indicator
if mark_as_current:
fields.append('-->')
else:
fields.append(' ')
# Column: Opcode name
- fields.append(self.opname.ljust(_OPNAME_WIDTH))
+ fields.append(instr.opname.ljust(_OPNAME_WIDTH))
# Column: Opcode argument
- if self.arg is not None:
- arg = repr(self.arg)
+ if instr.arg is not None:
+ arg = repr(instr.arg)
# If opname is longer than _OPNAME_WIDTH, we allow it to overflow into
# the space reserved for oparg. This results in fewer misaligned opargs
# in the disassembly output.
- opname_excess = max(0, len(self.opname) - _OPNAME_WIDTH)
- fields.append(repr(self.arg).rjust(_OPARG_WIDTH - opname_excess))
+ opname_excess = max(0, len(instr.opname) - _OPNAME_WIDTH)
+ fields.append(repr(instr.arg).rjust(_OPARG_WIDTH - opname_excess))
# Column: Opcode argument details
- if self.argrepr:
- fields.append('(' + self.argrepr + ')')
- return ' '.join(fields).rstrip()
+ if instr.argrepr:
+ fields.append('(' + instr.argrepr + ')')
+ print(' '.join(fields).rstrip(), file=self.file)
+
+ def print_exception_table(self, exception_entries):
+ file = self.file
+ if exception_entries:
+ print("ExceptionTable:", file=file)
+ for entry in exception_entries:
+ lasti = " lasti" if entry.lasti else ""
+ start = entry.start_label
+ end = entry.end_label
+ target = entry.target_label
+ print(f" L{start} to L{end} -> L{target} [{entry.depth}]{lasti}", file=file)
+
+
+class ArgResolver:
+ def __init__(self, co_consts, names, varname_from_oparg, labels_map):
+ self.co_consts = co_consts
+ self.names = names
+ self.varname_from_oparg = varname_from_oparg
+ self.labels_map = labels_map
+
+ def get_argval_argrepr(self, op, arg, offset):
+ get_name = None if self.names is None else self.names.__getitem__
+ argval = None
+ argrepr = ''
+ deop = _deoptop(op)
+ if arg is not None:
+ # Set argval to the dereferenced value of the argument when
+ # available, and argrepr to the string representation of argval.
+ # _disassemble_bytes needs the string repr of the
+ # raw name index for LOAD_GLOBAL, LOAD_CONST, etc.
+ argval = arg
+ if deop in hasconst:
+ argval, argrepr = _get_const_info(deop, arg, self.co_consts)
+ elif deop in hasname:
+ if deop == LOAD_GLOBAL:
+ argval, argrepr = _get_name_info(arg//2, get_name)
+ if (arg & 1) and argrepr:
+ argrepr = f"{argrepr} + NULL"
+ elif deop == LOAD_ATTR:
+ argval, argrepr = _get_name_info(arg//2, get_name)
+ if (arg & 1) and argrepr:
+ argrepr = f"{argrepr} + NULL|self"
+ elif deop == LOAD_SUPER_ATTR:
+ argval, argrepr = _get_name_info(arg//4, get_name)
+ if (arg & 1) and argrepr:
+ argrepr = f"{argrepr} + NULL|self"
+ else:
+ argval, argrepr = _get_name_info(arg, get_name)
+ elif deop in hasjabs:
+ argval = arg*2
+ argrepr = f"to L{self.labels_map[argval]}"
+ elif deop in hasjrel:
+ signed_arg = -arg if _is_backward_jump(deop) else arg
+ argval = offset + 2 + signed_arg*2
+ caches = _get_cache_size(_all_opname[deop])
+ argval += 2 * caches
+ if deop == ENTER_EXECUTOR:
+ argval += 2
+ argrepr = f"to L{self.labels_map[argval]}"
+ elif deop in (LOAD_FAST_LOAD_FAST, STORE_FAST_LOAD_FAST, STORE_FAST_STORE_FAST):
+ arg1 = arg >> 4
+ arg2 = arg & 15
+ val1, argrepr1 = _get_name_info(arg1, self.varname_from_oparg)
+ val2, argrepr2 = _get_name_info(arg2, self.varname_from_oparg)
+ argrepr = argrepr1 + ", " + argrepr2
+ argval = val1, val2
+ elif deop in haslocal or deop in hasfree:
+ argval, argrepr = _get_name_info(arg, self.varname_from_oparg)
+ elif deop in hascompare:
+ argval = cmp_op[arg >> 5]
+ argrepr = argval
+ if arg & 16:
+ argrepr = f"bool({argrepr})"
+ elif deop == CONVERT_VALUE:
+ argval = (None, str, repr, ascii)[arg]
+ argrepr = ('', 'str', 'repr', 'ascii')[arg]
+ elif deop == SET_FUNCTION_ATTRIBUTE:
+ argrepr = ', '.join(s for i, s in enumerate(FUNCTION_ATTR_FLAGS)
+ if arg & (1<<i))
+ elif deop == BINARY_OP:
+ _, argrepr = _nb_ops[arg]
+ elif deop == CALL_INTRINSIC_1:
+ argrepr = _intrinsic_1_descs[arg]
+ elif deop == CALL_INTRINSIC_2:
+ argrepr = _intrinsic_2_descs[arg]
+ return argval, argrepr
- def __str__(self):
- return self._disassemble()
def get_instructions(x, *, first_line=None, show_caches=False, adaptive=False):
"""Iterator for the opcodes in methods, functions or code
@@ -535,13 +561,18 @@ def get_instructions(x, *, first_line=None, show_caches=False, adaptive=False):
line_offset = first_line - co.co_firstlineno
else:
line_offset = 0
+
+ original_code = co.co_code
+ labels_map = _make_labels_map(original_code)
+ arg_resolver = ArgResolver(co.co_consts, co.co_names, co._varname_from_oparg,
+ labels_map)
return _get_instructions_bytes(_get_code_array(co, adaptive),
- co._varname_from_oparg,
- co.co_names, co.co_consts,
- linestarts, line_offset,
+ linestarts=linestarts,
+ line_offset=line_offset,
co_positions=co.co_positions(),
show_caches=show_caches,
- original_code=co.co_code)
+ original_code=original_code,
+ arg_resolver=arg_resolver)
def _get_const_value(op, arg, co_consts):
"""Helper to get the value of the const in a hasconst op.
@@ -613,17 +644,13 @@ def _is_backward_jump(op):
'JUMP_BACKWARD_NO_INTERRUPT',
'ENTER_EXECUTOR')
-def _get_instructions_bytes(code, varname_from_oparg=None,
- names=None, co_consts=None,
- linestarts=None, line_offset=0,
- exception_entries=(), co_positions=None,
- show_caches=False, original_code=None):
+def _get_instructions_bytes(code, linestarts=None, line_offset=0, co_positions=None,
+ show_caches=False, original_code=None, labels_map=None,
+ arg_resolver=None):
"""Iterate over the instructions in a bytecode string.
Generates a sequence of Instruction namedtuples giving the details of each
- opcode. Additional information about the code's runtime environment
- (e.g. variable names, co_consts) can be specified using optional
- arguments.
+ opcode.
"""
# Use the basic, unadaptive code for finding labels and actually walking the
@@ -631,30 +658,8 @@ def _get_instructions_bytes(code, varname_from_oparg=None,
# mess that logic up pretty badly:
original_code = original_code or code
co_positions = co_positions or iter(())
- get_name = None if names is None else names.__getitem__
-
- def make_labels_map(original_code, exception_entries):
- jump_targets = set(findlabels(original_code))
- labels = set(jump_targets)
- for start, end, target, _, _ in exception_entries:
- labels.add(start)
- labels.add(end)
- labels.add(target)
- labels = sorted(labels)
- labels_map = {offset: i+1 for (i, offset) in enumerate(sorted(labels))}
- for e in exception_entries:
- e.start_label = labels_map[e.start]
- e.end_label = labels_map[e.end]
- e.target_label = labels_map[e.target]
- return labels_map
-
- labels_map = make_labels_map(original_code, exception_entries)
- label_width = 4 + len(str(len(labels_map)))
- exceptions_map = {}
- for start, end, target, _, _ in exception_entries:
- exceptions_map[start] = labels_map[target]
- exceptions_map[end] = -1
+ labels_map = labels_map or _make_labels_map(original_code)
starts_line = False
local_line_number = None
@@ -672,10 +677,14 @@ def _get_instructions_bytes(code, varname_from_oparg=None,
deop = _deoptop(op)
op = code[offset]
- yield Instruction._create(op, arg, offset, start_offset, starts_line, line_number,
- positions, co_consts=co_consts,
- varname_from_oparg=varname_from_oparg, names=names,
- labels_map=labels_map, exceptions_map=exceptions_map)
+ if arg_resolver:
+ argval, argrepr = arg_resolver.get_argval_argrepr(op, arg, offset)
+ else:
+ argval, argrepr = arg, repr(arg)
+
+ yield Instruction(_all_opname[op], op, arg, argval, argrepr,
+ offset, start_offset, starts_line, line_number,
+ labels_map.get(offset, None), positions)
caches = _get_cache_size(_all_opname[deop])
if not caches:
@@ -726,69 +735,77 @@ def _disassemble_recursive(co, *, file=None, depth=None, show_caches=False, adap
adaptive=adaptive, show_offsets=show_offsets
)
+
+def _make_labels_map(original_code, exception_entries=()):
+ jump_targets = set(findlabels(original_code))
+ labels = set(jump_targets)
+ for start, end, target, _, _ in exception_entries:
+ labels.add(start)
+ labels.add(end)
+ labels.add(target)
+ labels = sorted(labels)
+ labels_map = {offset: i+1 for (i, offset) in enumerate(sorted(labels))}
+ for e in exception_entries:
+ e.start_label = labels_map[e.start]
+ e.end_label = labels_map[e.end]
+ e.target_label = labels_map[e.target]
+ return labels_map
+
+_NO_LINENO = ' --'
+
+def _get_lineno_width(linestarts):
+ if linestarts is None:
+ return 0
+ maxlineno = max(filter(None, linestarts.values()), default=-1)
+ if maxlineno == -1:
+ # Omit the line number column entirely if we have no line number info
+ return 0
+ lineno_width = max(3, len(str(maxlineno)))
+ if lineno_width < len(_NO_LINENO) and None in linestarts.values():
+ lineno_width = len(_NO_LINENO)
+ return lineno_width
+
+
def _disassemble_bytes(code, lasti=-1, varname_from_oparg=None,
names=None, co_consts=None, linestarts=None,
*, file=None, line_offset=0, exception_entries=(),
co_positions=None, show_caches=False, original_code=None,
show_offsets=False):
- # Omit the line number column entirely if we have no line number info
- if bool(linestarts):
- linestarts_ints = [line for line in linestarts.values() if line is not None]
- show_lineno = len(linestarts_ints) > 0
- else:
- show_lineno = False
- if show_lineno:
- maxlineno = max(linestarts_ints) + line_offset
- if maxlineno >= 1000:
- lineno_width = len(str(maxlineno))
- else:
- lineno_width = 3
+ offset_width = len(str(max(len(code) - 2, 9999))) if show_offsets else 0
- if lineno_width < len(str(None)) and None in linestarts.values():
- lineno_width = len(str(None))
- else:
- lineno_width = 0
- if show_offsets:
- maxoffset = len(code) - 2
- if maxoffset >= 10000:
- offset_width = len(str(maxoffset))
- else:
- offset_width = 4
- else:
- offset_width = 0
-
- label_width = -1
- for instr in _get_instructions_bytes(code, varname_from_oparg, names,
- co_consts, linestarts,
- line_offset=line_offset,
- exception_entries=exception_entries,
- co_positions=co_positions,
- show_caches=show_caches,
- original_code=original_code):
- new_source_line = (show_lineno and
- instr.starts_line and
- instr.offset > 0)
- if new_source_line:
- print(file=file)
+ labels_map = _make_labels_map(original_code or code, exception_entries)
+ label_width = 4 + len(str(len(labels_map)))
+
+ formatter = Formatter(file=file,
+ lineno_width=_get_lineno_width(linestarts),
+ offset_width=offset_width,
+ label_width=label_width,
+ line_offset=line_offset)
+
+ arg_resolver = ArgResolver(co_consts, names, varname_from_oparg, labels_map)
+ instrs = _get_instructions_bytes(code, linestarts=linestarts,
+ line_offset=line_offset,
+ co_positions=co_positions,
+ show_caches=show_caches,
+ original_code=original_code,
+ labels_map=labels_map,
+ arg_resolver=arg_resolver)
+
+ print_instructions(instrs, exception_entries, formatter,
+ show_caches=show_caches, lasti=lasti)
+
+
+def print_instructions(instrs, exception_entries, formatter, show_caches=False, lasti=-1):
+ for instr in instrs:
if show_caches:
is_current_instr = instr.offset == lasti
else:
# Each CACHE takes 2 bytes
is_current_instr = instr.offset <= lasti \
<= instr.offset + 2 * _get_cache_size(_all_opname[_deoptop(instr.opcode)])
- label_width = getattr(instr, 'label_width', label_width)
- assert label_width >= 0
- print(instr._disassemble(lineno_width, is_current_instr, offset_width, label_width),
- file=file)
- if exception_entries:
- print("ExceptionTable:", file=file)
- for entry in exception_entries:
- lasti = " lasti" if entry.lasti else ""
- start = entry.start_label
- end = entry.end_label
- target = entry.target_label
- print(f" L{start} to L{end} -> L{target} [{entry.depth}]{lasti}", file=file)
+ formatter.print_instruction(instr, is_current_instr)
+ formatter.print_exception_table(exception_entries)
def _disassemble_str(source, **kwargs):
"""Compile the source string, then disassemble the code object."""
@@ -927,15 +944,18 @@ class Bytecode:
def __iter__(self):
co = self.codeobj
+ original_code = co.co_code
+ labels_map = _make_labels_map(original_code, self.exception_entries)
+ arg_resolver = ArgResolver(co.co_consts, co.co_names, co._varname_from_oparg,
+ labels_map)
return _get_instructions_bytes(_get_code_array(co, self.adaptive),
- co._varname_from_oparg,
- co.co_names, co.co_consts,
- self._linestarts,
+ linestarts=self._linestarts,
line_offset=self._line_offset,
- exception_entries=self.exception_entries,
co_positions=co.co_positions(),
show_caches=self.show_caches,
- original_code=co.co_code)
+ original_code=original_code,
+ labels_map=labels_map,
+ arg_resolver=arg_resolver)
def __repr__(self):
return "{}({!r})".format(self.__class__.__name__,
diff --git a/Lib/test/test_dis.py b/Lib/test/test_dis.py
index 349790e..0ea4dc4 100644
--- a/Lib/test/test_dis.py
+++ b/Lib/test/test_dis.py
@@ -1785,6 +1785,12 @@ class InstructionTests(InstructionTestCase):
super().__init__(*args)
self.maxDiff = None
+ def test_instruction_str(self):
+ # smoke test for __str__
+ instrs = dis.get_instructions(simple)
+ for instr in instrs:
+ str(instr)
+
def test_default_first_line(self):
actual = dis.get_instructions(simple)
self.assertInstructionsEqual(list(actual), expected_opinfo_simple)
@@ -1955,15 +1961,16 @@ class InstructionTests(InstructionTestCase):
self.assertEqual(10 + 2 + 1*2 + 100*2, instruction.jump_target)
def test_argval_argrepr(self):
- def f(*args):
- return dis.Instruction._get_argval_argrepr(
- *args, labels_map={24: 1})
+ def f(opcode, oparg, offset, *init_args):
+ arg_resolver = dis.ArgResolver(*init_args)
+ return arg_resolver.get_argval_argrepr(opcode, oparg, offset)
offset = 42
co_consts = (0, 1, 2, 3)
names = {1: 'a', 2: 'b'}
varname_from_oparg = lambda i : names[i]
- args = (offset, co_consts, names, varname_from_oparg)
+ labels_map = {24: 1}
+ args = (offset, co_consts, names, varname_from_oparg, labels_map)
self.assertEqual(f(opcode.opmap["POP_TOP"], None, *args), (None, ''))
self.assertEqual(f(opcode.opmap["LOAD_CONST"], 1, *args), (1, '1'))
self.assertEqual(f(opcode.opmap["LOAD_GLOBAL"], 2, *args), ('a', 'a'))