bpo-43693: Add new internal code objects fields: co_fastlocalnames and co_fastlocalkinds. (gh-26388)

A number of places in the code base (notably ceval.c and frameobject.c) rely on mapping variable names to indices in the frame "locals plus" array (AKA fast locals), and thus opargs. Currently the compiler indirectly encodes that information on the code object as the tuples co_varnames, co_cellvars, and co_freevars. At runtime the dependent code must calculate the proper mapping from those, which isn't ideal and impacts performance-sensitive sections. This is something we can easily address in the compiler instead. This change addresses the situation by replacing internal use of co_varnames, etc. with a single combined tuple of names in locals-plus order, along with a minimal array mapping each to its kind (local vs. cell vs. free). These two new PyCodeObject fields, co_fastlocalnames and co_fastllocalkinds, are not exposed to Python code for now, but co_varnames, etc. are still available with the same values as before (though computed lazily). Aside from the (mild) performance impact, there are a number of other benefits: * there's now a clear, direct relationship between locals-plus and variables * code that relies on the locals-plus-to-name mapping is simpler * marshaled code objects are smaller and serialize/de-serialize faster Also note that we can take this approach further by expanding the possible values in co_fastlocalkinds to include specific argument types (e.g. positional-only, kwargs). Doing so would allow further speed-ups in _PyEval_MakeFrameVector(), which is where args get unpacked into the locals-plus array. It would also allow us to shrink marshaled code objects even further. https://bugs.python.org/issue43693
author: Eric Snow <ericsnowcurrently@gmail.com> 2021-06-03 16:28:27 (GMT)
committer: GitHub <noreply@github.com> 2021-06-03 16:28:27 (GMT)
commit: 2c1e2583fdc4db6b43d163239ea42b0e8394171f (patch)
tree: b4becea668a3bccc7ffbfcee3ca6b712d14f2131 /Lib/dis.py
parent: ea0210fa8ccca769896847f25fc6fadfe9a717bc (diff)
download: cpython-2c1e2583fdc4db6b43d163239ea42b0e8394171f.zip
cpython-2c1e2583fdc4db6b43d163239ea42b0e8394171f.tar.gz
cpython-2c1e2583fdc4db6b43d163239ea42b0e8394171f.tar.bz2
1 files changed, 28 insertions, 24 deletions
diff --git a/Lib/dis.py b/Lib/dis.py
index bc7c4d4..dfadad7 100644
--- a/Lib/dis.py
+++ b/Lib/dis.py
@@ -273,15 +273,15 @@ def get_instructions(x, *, first_line=None):
     the disassembled code object.
     """
     co = _get_code_object(x)
-    cell_names = co.co_cellvars + co.co_freevars
     linestarts = dict(findlinestarts(co))
     if first_line is not None:
         line_offset = first_line - co.co_firstlineno
     else:
         line_offset = 0
-    return _get_instructions_bytes(co.co_code, co.co_varnames, co.co_names,
-                                   co.co_consts, cell_names, linestarts,
-                                   line_offset)
+    return _get_instructions_bytes(co.co_code,
+                                   co._varname_from_oparg,
+                                   co.co_names, co.co_consts,
+                                   linestarts, line_offset)
 
 def _get_const_info(const_index, const_list):
     """Helper to get optional details about const references
@@ -295,7 +295,7 @@ def _get_const_info(const_index, const_list):
         argval = const_list[const_index]
     return argval, repr(argval)
 
-def _get_name_info(name_index, name_list):
+def _get_name_info(name_index, get_name, **extrainfo):
     """Helper to get optional details about named references
 
        Returns the dereferenced name as both value and repr if the name
@@ -303,8 +303,8 @@ def _get_name_info(name_index, name_list):
        Otherwise returns the name index and its repr().
     """
     argval = name_index
-    if name_list is not None:
-        argval = name_list[name_index]
+    if get_name is not None:
+        argval = get_name(name_index, **extrainfo)
         argrepr = argval
     else:
         argrepr = repr(argval)
@@ -336,8 +336,10 @@ def parse_exception_table(code):
     except StopIteration:
         return entries
 
-def _get_instructions_bytes(code, varnames=None, names=None, constants=None,
-                      cells=None, linestarts=None, line_offset=0, exception_entries=()):
+def _get_instructions_bytes(code, varname_from_oparg=None,
+                            names=None, constants=None,
+                            linestarts=None, line_offset=0,
+                            exception_entries=()):
     """Iterate over the instructions in a bytecode string.
 
     Generates a sequence of Instruction namedtuples giving the details of each
@@ -346,6 +348,7 @@ def _get_instructions_bytes(code, varnames=None, names=None, constants=None,
     arguments.
 
     """
+    get_name = None if names is None else names.__getitem__
     labels = set(findlabels(code))
     for start, end, target, _, _ in exception_entries:
         for i in range(start, end):
@@ -368,7 +371,7 @@ def _get_instructions_bytes(code, varnames=None, names=None, constants=None,
             if op in hasconst:
                 argval, argrepr = _get_const_info(arg, constants)
             elif op in hasname:
-                argval, argrepr = _get_name_info(arg, names)
+                argval, argrepr = _get_name_info(arg, get_name)
             elif op in hasjabs:
                 argval = arg*2
                 argrepr = "to " + repr(argval)
@@ -376,12 +379,13 @@ def _get_instructions_bytes(code, varnames=None, names=None, constants=None,
                 argval = offset + 2 + arg*2
                 argrepr = "to " + repr(argval)
             elif op in haslocal:
-                argval, argrepr = _get_name_info(arg, varnames)
+                argval, argrepr = _get_name_info(arg, varname_from_oparg)
             elif op in hascompare:
                 argval = cmp_op[arg]
                 argrepr = argval
             elif op in hasfree:
-                argval, argrepr = _get_name_info(arg, cells)
+                argval, argrepr = _get_name_info(arg, varname_from_oparg,
+                                                 cell=True)
             elif op == FORMAT_VALUE:
                 argval, argrepr = FORMAT_VALUE_CONVERTERS[arg & 0x3]
                 argval = (argval, bool(arg & 0x4))
@@ -398,11 +402,11 @@ def _get_instructions_bytes(code, varnames=None, names=None, constants=None,
 
 def disassemble(co, lasti=-1, *, file=None):
     """Disassemble a code object."""
-    cell_names = co.co_cellvars + co.co_freevars
     linestarts = dict(findlinestarts(co))
     exception_entries = parse_exception_table(co)
-    _disassemble_bytes(co.co_code, lasti, co.co_varnames, co.co_names,
-                       co.co_consts, cell_names, linestarts, file=file,
+    _disassemble_bytes(co.co_code, lasti,
+                       co._varname_from_oparg,
+                       co.co_names, co.co_consts, linestarts, file=file,
                        exception_entries=exception_entries)
 
 def _disassemble_recursive(co, *, file=None, depth=None):
@@ -416,8 +420,8 @@ def _disassemble_recursive(co, *, file=None, depth=None):
                 print("Disassembly of %r:" % (x,), file=file)
                 _disassemble_recursive(x, file=file, depth=depth)
 
-def _disassemble_bytes(code, lasti=-1, varnames=None, names=None,
-                       constants=None, cells=None, linestarts=None,
+def _disassemble_bytes(code, lasti=-1, varname_from_oparg=None,
+                       names=None, constants=None, linestarts=None,
                        *, file=None, line_offset=0, exception_entries=()):
     # Omit the line number column entirely if we have no line number info
     show_lineno = bool(linestarts)
@@ -434,8 +438,8 @@ def _disassemble_bytes(code, lasti=-1, varnames=None, names=None,
         offset_width = len(str(maxoffset))
     else:
         offset_width = 4
-    for instr in _get_instructions_bytes(code, varnames, names,
-                                         constants, cells, linestarts,
+    for instr in _get_instructions_bytes(code, varname_from_oparg, names,
+                                         constants, linestarts,
                                          line_offset=line_offset, exception_entries=exception_entries):
         new_source_line = (show_lineno and
                            instr.starts_line is not None and
@@ -517,7 +521,6 @@ class Bytecode:
         else:
             self.first_line = first_line
             self._line_offset = first_line - co.co_firstlineno
-        self._cell_names = co.co_cellvars + co.co_freevars
         self._linestarts = dict(findlinestarts(co))
         self._original_object = x
         self.current_offset = current_offset
@@ -525,8 +528,9 @@ class Bytecode:
 
     def __iter__(self):
         co = self.codeobj
-        return _get_instructions_bytes(co.co_code, co.co_varnames, co.co_names,
-                                       co.co_consts, self._cell_names,
+        return _get_instructions_bytes(co.co_code,
+                                       co._varname_from_oparg,
+                                       co.co_names, co.co_consts,
                                        self._linestarts,
                                        line_offset=self._line_offset,
                                        exception_entries=self.exception_entries)
@@ -554,9 +558,9 @@ class Bytecode:
         else:
             offset = -1
         with io.StringIO() as output:
-            _disassemble_bytes(co.co_code, varnames=co.co_varnames,
+            _disassemble_bytes(co.co_code,
+                               varname_from_oparg=co._varname_from_oparg,
                                names=co.co_names, constants=co.co_consts,
-                               cells=self._cell_names,
                                linestarts=self._linestarts,
                                line_offset=self._line_offset,
                                file=output,
author	Eric Snow <ericsnowcurrently@gmail.com>	2021-06-03 16:28:27 (GMT)
committer	GitHub <noreply@github.com>	2021-06-03 16:28:27 (GMT)
commit	2c1e2583fdc4db6b43d163239ea42b0e8394171f (patch)
tree	b4becea668a3bccc7ffbfcee3ca6b712d14f2131 /Lib/dis.py
parent	ea0210fa8ccca769896847f25fc6fadfe9a717bc (diff)
download	cpython-2c1e2583fdc4db6b43d163239ea42b0e8394171f.zip cpython-2c1e2583fdc4db6b43d163239ea42b0e8394171f.tar.gz cpython-2c1e2583fdc4db6b43d163239ea42b0e8394171f.tar.bz2