summaryrefslogtreecommitdiffstats
path: root/Lib/dis.py
diff options
context:
space:
mode:
authorNick Coghlan <ncoghlan@gmail.com>2013-05-06 13:59:20 (GMT)
committerNick Coghlan <ncoghlan@gmail.com>2013-05-06 13:59:20 (GMT)
commitb39fd0c9b8dc6683924205265ff43cc597d1dfb9 (patch)
treeb12d71e0b726b4b3e3c96ba229b0573a084df473 /Lib/dis.py
parent9d351332a7b65e8e03725bc469b7989ccd68143f (diff)
downloadcpython-b39fd0c9b8dc6683924205265ff43cc597d1dfb9.zip
cpython-b39fd0c9b8dc6683924205265ff43cc597d1dfb9.tar.gz
cpython-b39fd0c9b8dc6683924205265ff43cc597d1dfb9.tar.bz2
Issue #11816: multiple improvements to the dis module
* get_instructions generator * ability to redirect output to a file * Bytecode and Instruction abstractions Patch by Nick Coghlan, Ryan Kelly and Thomas Kluyver.
Diffstat (limited to 'Lib/dis.py')
-rw-r--r--Lib/dis.py335
1 files changed, 238 insertions, 97 deletions
diff --git a/Lib/dis.py b/Lib/dis.py
index 543fdc7..ca4094c 100644
--- a/Lib/dis.py
+++ b/Lib/dis.py
@@ -2,12 +2,14 @@
import sys
import types
+import collections
from opcode import *
from opcode import __all__ as _opcodes_all
__all__ = ["code_info", "dis", "disassemble", "distb", "disco",
- "findlinestarts", "findlabels", "show_code"] + _opcodes_all
+ "findlinestarts", "findlabels", "show_code",
+ "get_instructions", "Instruction", "Bytecode"] + _opcodes_all
del _opcodes_all
_have_code = (types.MethodType, types.FunctionType, types.CodeType, type)
@@ -25,7 +27,7 @@ def _try_compile(source, name):
c = compile(source, name, 'exec')
return c
-def dis(x=None):
+def dis(x=None, *, file=None):
"""Disassemble classes, methods, functions, or code.
With no argument, disassemble the last traceback.
@@ -42,23 +44,23 @@ def dis(x=None):
items = sorted(x.__dict__.items())
for name, x1 in items:
if isinstance(x1, _have_code):
- print("Disassembly of %s:" % name)
+ print("Disassembly of %s:" % name, file=file)
try:
dis(x1)
except TypeError as msg:
- print("Sorry:", msg)
- print()
+ print("Sorry:", msg, file=file)
+ print(file=file)
elif hasattr(x, 'co_code'): # Code object
- disassemble(x)
+ disassemble(x, file=file)
elif isinstance(x, (bytes, bytearray)): # Raw bytecode
- _disassemble_bytes(x)
+ _disassemble_bytes(x, file=file)
elif isinstance(x, str): # Source code
- _disassemble_str(x)
+ _disassemble_str(x, file=file)
else:
raise TypeError("don't know how to disassemble %s objects" %
type(x).__name__)
-def distb(tb=None):
+def distb(tb=None, *, file=None):
"""Disassemble a traceback (default: last traceback)."""
if tb is None:
try:
@@ -66,7 +68,7 @@ def distb(tb=None):
except AttributeError:
raise RuntimeError("no last traceback to disassemble")
while tb.tb_next: tb = tb.tb_next
- disassemble(tb.tb_frame.f_code, tb.tb_lasti)
+ disassemble(tb.tb_frame.f_code, tb.tb_lasti, file=file)
# The inspect module interrogates this dictionary to build its
# list of CO_* constants. It is also used by pretty_flags to
@@ -95,19 +97,22 @@ def pretty_flags(flags):
names.append(hex(flags))
return ", ".join(names)
-def code_info(x):
- """Formatted details of methods, functions, or code."""
+def _get_code_object(x):
+ """Helper to handle methods, functions, strings and raw code objects"""
if hasattr(x, '__func__'): # Method
x = x.__func__
if hasattr(x, '__code__'): # Function
x = x.__code__
if isinstance(x, str): # Source code
- x = _try_compile(x, "<code_info>")
+ x = _try_compile(x, "<disassembly>")
if hasattr(x, 'co_code'): # Code object
- return _format_code_info(x)
- else:
- raise TypeError("don't know how to disassemble %s objects" %
- type(x).__name__)
+ return x
+ raise TypeError("don't know how to disassemble %s objects" %
+ type(x).__name__)
+
+def code_info(x):
+ """Formatted details of methods, functions, or code."""
+ return _format_code_info(_get_code_object(x))
def _format_code_info(co):
lines = []
@@ -140,106 +145,196 @@ def _format_code_info(co):
lines.append("%4d: %s" % i_n)
return "\n".join(lines)
-def show_code(co):
+def show_code(co, *, file=None):
"""Print details of methods, functions, or code to stdout."""
- print(code_info(co))
+ print(code_info(co), file=file)
-def disassemble(co, lasti=-1):
- """Disassemble a code object."""
- code = co.co_code
- labels = findlabels(code)
+_Instruction = collections.namedtuple("_Instruction",
+ "opname opcode arg argval argrepr offset starts_line is_jump_target")
+
+class Instruction(_Instruction):
+ """Details for a bytecode operation
+
+ Defined fields:
+ opname - human readable name for operation
+ opcode - numeric code for operation
+ arg - numeric argument to operation (if any), otherwise None
+ argval - resolved arg value (if known), otherwise same as arg
+ argrepr - human readable description of operation argument
+ offset - start index of operation within bytecode sequence
+ starts_line - line started by this opcode (if any), otherwise None
+ is_jump_target - True if other code jumps to here, otherwise False
+ """
+
+ def _disassemble(self, lineno_width=3, mark_as_current=False):
+ """Format instruction details for inclusion in disassembly output
+
+ *lineno_width* sets the width of the line number field (0 omits it)
+ *mark_as_current* inserts a '-->' marker arrow as part of the line
+ """
+ fields = []
+ # Column: Source code line number
+ if lineno_width:
+ if self.starts_line is not None:
+ lineno_fmt = "%%%dd" % lineno_width
+ fields.append(lineno_fmt % self.starts_line)
+ else:
+ fields.append(' ' * lineno_width)
+ # Column: Current instruction indicator
+ if mark_as_current:
+ fields.append('-->')
+ else:
+ fields.append(' ')
+ # Column: Jump target marker
+ if self.is_jump_target:
+ fields.append('>>')
+ else:
+ fields.append(' ')
+ # Column: Instruction offset from start of code sequence
+ fields.append(repr(self.offset).rjust(4))
+ # Column: Opcode name
+ fields.append(self.opname.ljust(20))
+ # Column: Opcode argument
+ if self.arg is not None:
+ fields.append(repr(self.arg).rjust(5))
+ # Column: Opcode argument details
+ if self.argrepr:
+ fields.append('(' + self.argrepr + ')')
+ return ' '.join(fields)
+
+
+def get_instructions(x, *, line_offset=0):
+ """Iterator for the opcodes in methods, functions or code
+
+ Generates a series of Instruction named tuples giving the details of
+ each operations in the supplied code.
+
+ The given line offset is added to the 'starts_line' attribute of any
+ instructions that start a new line.
+ """
+ co = _get_code_object(x)
+ cell_names = co.co_cellvars + co.co_freevars
linestarts = dict(findlinestarts(co))
- n = len(code)
- i = 0
+ return _get_instructions_bytes(co.co_code, co.co_varnames, co.co_names,
+ co.co_consts, cell_names, linestarts,
+ line_offset)
+
+def _get_const_info(const_index, const_list):
+ """Helper to get optional details about const references
+
+ Returns the dereferenced constant and its repr if the constant
+ list is defined.
+ Otherwise returns the constant index and its repr().
+ """
+ argval = const_index
+ if const_list is not None:
+ argval = const_list[const_index]
+ return argval, repr(argval)
+
+def _get_name_info(name_index, name_list):
+ """Helper to get optional details about named references
+
+ Returns the dereferenced name as both value and repr if the name
+ list is defined.
+ Otherwise returns the name index and its repr().
+ """
+ argval = name_index
+ if name_list is not None:
+ argval = name_list[name_index]
+ argrepr = argval
+ else:
+ argrepr = repr(argval)
+ return argval, argrepr
+
+
+def _get_instructions_bytes(code, varnames=None, names=None, constants=None,
+ cells=None, linestarts=None, line_offset=0):
+ """Iterate over the instructions in a bytecode string.
+
+ Generates a sequence of Instruction namedtuples giving the details of each
+ opcode. Additional information about the code's runtime environment
+ (e.g. variable names, constants) can be specified using optional
+ arguments.
+
+ """
+ labels = findlabels(code)
extended_arg = 0
+ starts_line = None
free = None
+ # enumerate() is not an option, since we sometimes process
+ # multiple elements on a single pass through the loop
+ n = len(code)
+ i = 0
while i < n:
op = code[i]
- if i in linestarts:
- if i > 0:
- print()
- print("%3d" % linestarts[i], end=' ')
- else:
- print(' ', end=' ')
-
- if i == lasti: print('-->', end=' ')
- else: print(' ', end=' ')
- if i in labels: print('>>', end=' ')
- else: print(' ', end=' ')
- print(repr(i).rjust(4), end=' ')
- print(opname[op].ljust(20), end=' ')
+ offset = i
+ if linestarts is not None:
+ starts_line = linestarts.get(i, None)
+ if starts_line is not None:
+ starts_line += line_offset
+ is_jump_target = i in labels
i = i+1
+ arg = None
+ argval = None
+ argrepr = ''
if op >= HAVE_ARGUMENT:
- oparg = code[i] + code[i+1]*256 + extended_arg
+ arg = code[i] + code[i+1]*256 + extended_arg
extended_arg = 0
i = i+2
if op == EXTENDED_ARG:
- extended_arg = oparg*65536
- print(repr(oparg).rjust(5), end=' ')
+ extended_arg = arg*65536
+ # Set argval to the dereferenced value of the argument when
+ # availabe, and argrepr to the string representation of argval.
+ # _disassemble_bytes needs the string repr of the
+ # raw name index for LOAD_GLOBAL, LOAD_CONST, etc.
+ argval = arg
if op in hasconst:
- print('(' + repr(co.co_consts[oparg]) + ')', end=' ')
+ argval, argrepr = _get_const_info(arg, constants)
elif op in hasname:
- print('(' + co.co_names[oparg] + ')', end=' ')
+ argval, argrepr = _get_name_info(arg, names)
elif op in hasjrel:
- print('(to ' + repr(i + oparg) + ')', end=' ')
+ argval = i + arg
+ argrepr = "to " + repr(argval)
elif op in haslocal:
- print('(' + co.co_varnames[oparg] + ')', end=' ')
+ argval, argrepr = _get_name_info(arg, varnames)
elif op in hascompare:
- print('(' + cmp_op[oparg] + ')', end=' ')
+ argval = cmp_op[arg]
+ argrepr = argval
elif op in hasfree:
- if free is None:
- free = co.co_cellvars + co.co_freevars
- print('(' + free[oparg] + ')', end=' ')
+ argval, argrepr = _get_name_info(arg, cells)
elif op in hasnargs:
- print('(%d positional, %d keyword pair)'
- % (code[i-2], code[i-1]), end=' ')
- print()
+ argrepr = "%d positional, %d keyword pair" % (code[i-2], code[i-1])
+ yield Instruction(opname[op], op,
+ arg, argval, argrepr,
+ offset, starts_line, is_jump_target)
+
+def disassemble(co, lasti=-1, *, file=None):
+ """Disassemble a code object."""
+ cell_names = co.co_cellvars + co.co_freevars
+ linestarts = dict(findlinestarts(co))
+ _disassemble_bytes(co.co_code, lasti, co.co_varnames, co.co_names,
+ co.co_consts, cell_names, linestarts, file=file)
def _disassemble_bytes(code, lasti=-1, varnames=None, names=None,
- constants=None):
- labels = findlabels(code)
- n = len(code)
- i = 0
- while i < n:
- op = code[i]
- if i == lasti: print('-->', end=' ')
- else: print(' ', end=' ')
- if i in labels: print('>>', end=' ')
- else: print(' ', end=' ')
- print(repr(i).rjust(4), end=' ')
- print(opname[op].ljust(15), end=' ')
- i = i+1
- if op >= HAVE_ARGUMENT:
- oparg = code[i] + code[i+1]*256
- i = i+2
- print(repr(oparg).rjust(5), end=' ')
- if op in hasconst:
- if constants:
- print('(' + repr(constants[oparg]) + ')', end=' ')
- else:
- print('(%d)'%oparg, end=' ')
- elif op in hasname:
- if names is not None:
- print('(' + names[oparg] + ')', end=' ')
- else:
- print('(%d)'%oparg, end=' ')
- elif op in hasjrel:
- print('(to ' + repr(i + oparg) + ')', end=' ')
- elif op in haslocal:
- if varnames:
- print('(' + varnames[oparg] + ')', end=' ')
- else:
- print('(%d)' % oparg, end=' ')
- elif op in hascompare:
- print('(' + cmp_op[oparg] + ')', end=' ')
- elif op in hasnargs:
- print('(%d positional, %d keyword pair)'
- % (code[i-2], code[i-1]), end=' ')
- print()
+ constants=None, cells=None, linestarts=None,
+ *, file=None):
+ # Omit the line number column entirely if we have no line number info
+ show_lineno = linestarts is not None
+ # TODO?: Adjust width upwards if max(linestarts.values()) >= 1000?
+ lineno_width = 3 if show_lineno else 0
+ for instr in _get_instructions_bytes(code, varnames, names,
+ constants, cells, linestarts):
+ new_source_line = (show_lineno and
+ instr.starts_line is not None and
+ instr.offset > 0)
+ if new_source_line:
+ print(file=file)
+ is_current_instr = instr.offset == lasti
+ print(instr._disassemble(lineno_width, is_current_instr), file=file)
-def _disassemble_str(source):
+def _disassemble_str(source, *, file=None):
"""Compile the source string, then disassemble the code object."""
- disassemble(_try_compile(source, '<dis>'))
+ disassemble(_try_compile(source, '<dis>'), file=file)
disco = disassemble # XXX For backwards compatibility
@@ -250,19 +345,21 @@ def findlabels(code):
"""
labels = []
+ # enumerate() is not an option, since we sometimes process
+ # multiple elements on a single pass through the loop
n = len(code)
i = 0
while i < n:
op = code[i]
i = i+1
if op >= HAVE_ARGUMENT:
- oparg = code[i] + code[i+1]*256
+ arg = code[i] + code[i+1]*256
i = i+2
label = -1
if op in hasjrel:
- label = i+oparg
+ label = i+arg
elif op in hasjabs:
- label = oparg
+ label = arg
if label >= 0:
if label not in labels:
labels.append(label)
@@ -290,6 +387,50 @@ def findlinestarts(code):
if lineno != lastlineno:
yield (addr, lineno)
+class Bytecode:
+ """The bytecode operations of a piece of code
+
+ Instantiate this with a function, method, string of code, or a code object
+ (as returned by compile()).
+
+ Iterating over this yields the bytecode operations as Instruction instances.
+ """
+ def __init__(self, x):
+ self.codeobj = _get_code_object(x)
+ self.cell_names = self.codeobj.co_cellvars + self.codeobj.co_freevars
+ self.linestarts = dict(findlinestarts(self.codeobj))
+ self.line_offset = 0
+ self.original_object = x
+
+ def __iter__(self):
+ co = self.codeobj
+ return _get_instructions_bytes(co.co_code, co.co_varnames, co.co_names,
+ co.co_consts, self.cell_names,
+ self.linestarts, self.line_offset)
+
+ def __repr__(self):
+ return "{}({!r})".format(self.__class__.__name__, self.original_object)
+
+ def info(self):
+ """Return formatted information about the code object."""
+ return _format_code_info(self.codeobj)
+
+ def show_info(self, *, file=None):
+ """Print the information about the code object as returned by info()."""
+ print(self.info(), file=file)
+
+ def display_code(self, *, file=None):
+ """Print a formatted view of the bytecode operations.
+ """
+ co = self.codeobj
+ return _disassemble_bytes(co.co_code, varnames=co.co_varnames,
+ names=co.co_names, constants=co.co_consts,
+ cells=self.cell_names,
+ linestarts=self.linestarts,
+ file=file
+ )
+
+
def _test():
"""Simple test program to disassemble a file."""
if sys.argv[1:]: