summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSerhiy Storchaka <storchaka@gmail.com>2016-05-08 20:43:50 (GMT)
committerSerhiy Storchaka <storchaka@gmail.com>2016-05-08 20:43:50 (GMT)
commit02d9f5e5b2ee2662cb6776ebdafa2f3169452e41 (patch)
treeb5f50062b0813e8d9f6df7c63067bc21201b21e3
parentc7cc9850d49354e9e93601d649a3c2bf60f72df8 (diff)
downloadcpython-02d9f5e5b2ee2662cb6776ebdafa2f3169452e41.zip
cpython-02d9f5e5b2ee2662cb6776ebdafa2f3169452e41.tar.gz
cpython-02d9f5e5b2ee2662cb6776ebdafa2f3169452e41.tar.bz2
Issue #26881: The modulefinder module now supports extended opcode arguments.
-rw-r--r--Lib/dis.py55
-rw-r--r--Lib/modulefinder.py45
-rw-r--r--Lib/test/test_modulefinder.py13
-rw-r--r--Misc/NEWS4
4 files changed, 61 insertions, 56 deletions
diff --git a/Lib/dis.py b/Lib/dis.py
index 841208f..09776fe 100644
--- a/Lib/dis.py
+++ b/Lib/dis.py
@@ -275,31 +275,17 @@ def _get_instructions_bytes(code, varnames=None, names=None, constants=None,
"""
labels = findlabels(code)
- extended_arg = 0
starts_line = None
free = None
- # enumerate() is not an option, since we sometimes process
- # multiple elements on a single pass through the loop
- n = len(code)
- i = 0
- while i < n:
- op = code[i]
- offset = i
+ for offset, op, arg in _unpack_opargs(code):
if linestarts is not None:
- starts_line = linestarts.get(i, None)
+ starts_line = linestarts.get(offset, None)
if starts_line is not None:
starts_line += line_offset
- is_jump_target = i in labels
- i = i+1
- arg = None
+ is_jump_target = offset in labels
argval = None
argrepr = ''
- if op >= HAVE_ARGUMENT:
- arg = code[i] + code[i+1]*256 + extended_arg
- extended_arg = 0
- i = i+2
- if op == EXTENDED_ARG:
- extended_arg = arg*65536
+ if arg is not None:
# Set argval to the dereferenced value of the argument when
# availabe, and argrepr to the string representation of argval.
# _disassemble_bytes needs the string repr of the
@@ -310,7 +296,7 @@ def _get_instructions_bytes(code, varnames=None, names=None, constants=None,
elif op in hasname:
argval, argrepr = _get_name_info(arg, names)
elif op in hasjrel:
- argval = i + arg
+ argval = offset + 3 + arg
argrepr = "to " + repr(argval)
elif op in haslocal:
argval, argrepr = _get_name_info(arg, varnames)
@@ -320,7 +306,7 @@ def _get_instructions_bytes(code, varnames=None, names=None, constants=None,
elif op in hasfree:
argval, argrepr = _get_name_info(arg, cells)
elif op in hasnargs:
- argrepr = "%d positional, %d keyword pair" % (code[i-2], code[i-1])
+ argrepr = "%d positional, %d keyword pair" % (arg%256, arg//256)
yield Instruction(opname[op], op,
arg, argval, argrepr,
offset, starts_line, is_jump_target)
@@ -356,26 +342,37 @@ def _disassemble_str(source, *, file=None):
disco = disassemble # XXX For backwards compatibility
-def findlabels(code):
- """Detect all offsets in a byte code which are jump targets.
-
- Return the list of offsets.
-
- """
- labels = []
+def _unpack_opargs(code):
# enumerate() is not an option, since we sometimes process
# multiple elements on a single pass through the loop
+ extended_arg = 0
n = len(code)
i = 0
while i < n:
op = code[i]
+ offset = i
i = i+1
+ arg = None
if op >= HAVE_ARGUMENT:
- arg = code[i] + code[i+1]*256
+ arg = code[i] + code[i+1]*256 + extended_arg
+ extended_arg = 0
i = i+2
+ if op == EXTENDED_ARG:
+ extended_arg = arg*65536
+ yield (offset, op, arg)
+
+def findlabels(code):
+ """Detect all offsets in a byte code which are jump targets.
+
+ Return the list of offsets.
+
+ """
+ labels = []
+ for offset, op, arg in _unpack_opargs(code):
+ if arg is not None:
label = -1
if op in hasjrel:
- label = i+arg
+ label = offset + 3 + arg
elif op in hasjabs:
label = arg
if label >= 0:
diff --git a/Lib/modulefinder.py b/Lib/modulefinder.py
index 50f2462..b8cce1f 100644
--- a/Lib/modulefinder.py
+++ b/Lib/modulefinder.py
@@ -13,13 +13,12 @@ with warnings.catch_warnings():
warnings.simplefilter('ignore', PendingDeprecationWarning)
import imp
-# XXX Clean up once str8's cstor matches bytes.
-LOAD_CONST = bytes([dis.opname.index('LOAD_CONST')])
-IMPORT_NAME = bytes([dis.opname.index('IMPORT_NAME')])
-STORE_NAME = bytes([dis.opname.index('STORE_NAME')])
-STORE_GLOBAL = bytes([dis.opname.index('STORE_GLOBAL')])
-STORE_OPS = [STORE_NAME, STORE_GLOBAL]
-HAVE_ARGUMENT = bytes([dis.HAVE_ARGUMENT])
+LOAD_CONST = dis.opmap['LOAD_CONST']
+IMPORT_NAME = dis.opmap['IMPORT_NAME']
+STORE_NAME = dis.opmap['STORE_NAME']
+STORE_GLOBAL = dis.opmap['STORE_GLOBAL']
+STORE_OPS = STORE_NAME, STORE_GLOBAL
+EXTENDED_ARG = dis.EXTENDED_ARG
# Modulefinder does a good job at simulating Python's, but it can not
# handle __path__ modifications packages make at runtime. Therefore there
@@ -337,38 +336,30 @@ class ModuleFinder:
fullname = name + "." + sub
self._add_badmodule(fullname, caller)
- def scan_opcodes_25(self, co,
- unpack = struct.unpack):
+ def scan_opcodes(self, co):
# Scan the code, and yield 'interesting' opcode combinations
- # Python 2.5 version (has absolute and relative imports)
code = co.co_code
names = co.co_names
consts = co.co_consts
- LOAD_LOAD_AND_IMPORT = LOAD_CONST + LOAD_CONST + IMPORT_NAME
- while code:
- c = bytes([code[0]])
- if c in STORE_OPS:
- oparg, = unpack('<H', code[1:3])
+ opargs = [(op, arg) for _, op, arg in dis._unpack_opargs(code)
+ if op != EXTENDED_ARG]
+ for i, (op, oparg) in enumerate(opargs):
+ if op in STORE_OPS:
yield "store", (names[oparg],)
- code = code[3:]
continue
- if code[:9:3] == LOAD_LOAD_AND_IMPORT:
- oparg_1, oparg_2, oparg_3 = unpack('<xHxHxH', code[:9])
- level = consts[oparg_1]
+ if (op == IMPORT_NAME and i >= 2
+ and opargs[i-1][0] == opargs[i-2][0] == LOAD_CONST):
+ level = consts[opargs[i-2][1]]
+ fromlist = consts[opargs[i-1][1]]
if level == 0: # absolute import
- yield "absolute_import", (consts[oparg_2], names[oparg_3])
+ yield "absolute_import", (fromlist, names[oparg])
else: # relative import
- yield "relative_import", (level, consts[oparg_2], names[oparg_3])
- code = code[9:]
+ yield "relative_import", (level, fromlist, names[oparg])
continue
- if c >= HAVE_ARGUMENT:
- code = code[3:]
- else:
- code = code[1:]
def scan_code(self, co, m):
code = co.co_code
- scanner = self.scan_opcodes_25
+ scanner = self.scan_opcodes
for what, args in scanner(co):
if what == "store":
name, = args
diff --git a/Lib/test/test_modulefinder.py b/Lib/test/test_modulefinder.py
index 4c49e9a..e4df2a9 100644
--- a/Lib/test/test_modulefinder.py
+++ b/Lib/test/test_modulefinder.py
@@ -319,6 +319,19 @@ class ModuleFinderTest(unittest.TestCase):
expected = "co_filename %r changed to %r" % (old_path, new_path)
self.assertIn(expected, output)
+ def test_extended_opargs(self):
+ extended_opargs_test = [
+ "a",
+ ["a", "b"],
+ [], [],
+ """\
+a.py
+ %r
+ import b
+b.py
+""" % list(range(2**16))] # 2**16 constants
+ self._do_test(extended_opargs_test)
+
if __name__ == "__main__":
unittest.main()
diff --git a/Misc/NEWS b/Misc/NEWS
index 870f5a3..f9bb8ef 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -116,6 +116,8 @@ Core and Builtins
Library
-------
+- Issue #26881: The modulefinder module now supports extended opcode arguments.
+
- Issue #23815: Fixed crashes related to directly created instances of types in
_tkinter and curses.panel modules.
@@ -125,6 +127,8 @@ Library
- Issue #26873: xmlrpc now raises ResponseError on unsupported type tags
instead of silently return incorrect result.
+- Issue #26881: modulefinder now works with bytecode with extended args.
+
- Issue #26711: Fixed the comparison of plistlib.Data with other types.
- Issue #24114: Fix an uninitialized variable in `ctypes.util`.