summaryrefslogtreecommitdiffstats
path: root/Tools
diff options
context:
space:
mode:
authorIrit Katriel <1055913+iritkatriel@users.noreply.github.com>2023-08-16 22:25:18 (GMT)
committerGitHub <noreply@github.com>2023-08-16 22:25:18 (GMT)
commit665a4391e10167dad1c854fb604c86f336fcd331 (patch)
treee1d55069486dd67e6857a13fafc12b6e865cacf7 /Tools
parente88eb3775ecdcb3af6c6d694a935b7fa5f41e5ce (diff)
downloadcpython-665a4391e10167dad1c854fb604c86f336fcd331.zip
cpython-665a4391e10167dad1c854fb604c86f336fcd331.tar.gz
cpython-665a4391e10167dad1c854fb604c86f336fcd331.tar.bz2
gh-105481: generate op IDs from bytecode.c instead of hard coding them in opcode.py (#107971)
Diffstat (limited to 'Tools')
-rw-r--r--Tools/build/deepfreeze.py8
-rw-r--r--Tools/build/generate_opcode_h.py106
-rw-r--r--Tools/cases_generator/generate_cases.py206
-rw-r--r--Tools/scripts/summarize_stats.py4
4 files changed, 208 insertions, 116 deletions
diff --git a/Tools/build/deepfreeze.py b/Tools/build/deepfreeze.py
index ce609bd..8dbb7bf 100644
--- a/Tools/build/deepfreeze.py
+++ b/Tools/build/deepfreeze.py
@@ -22,7 +22,7 @@ ROOT = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
verbose = False
# This must be kept in sync with opcode.py
-RESUME = 151
+RESUME = 166
def isprintable(b: bytes) -> bool:
return all(0x20 <= c < 0x7f for c in b)
@@ -297,10 +297,12 @@ class Printer:
self.write(f".co_linetable = {co_linetable},")
self.write(f"._co_cached = NULL,")
self.write(f".co_code_adaptive = {co_code_adaptive},")
- for i, op in enumerate(code.co_code[::2]):
+ first_traceable = 0
+ for op in code.co_code[::2]:
if op == RESUME:
- self.write(f"._co_firsttraceable = {i},")
break
+ first_traceable += 1
+ self.write(f"._co_firsttraceable = {first_traceable},")
name_as_code = f"(PyCodeObject *)&{name}"
self.finis.append(f"_PyStaticCode_Fini({name_as_code});")
self.inits.append(f"_PyStaticCode_Init({name_as_code})")
diff --git a/Tools/build/generate_opcode_h.py b/Tools/build/generate_opcode_h.py
index 67f4a2c..344709a 100644
--- a/Tools/build/generate_opcode_h.py
+++ b/Tools/build/generate_opcode_h.py
@@ -27,27 +27,6 @@ opcode_h_footer = """
#endif /* !Py_OPCODE_H */
"""
-opcode_ids_h_header = f"""
-// Auto-generated by {SCRIPT_NAME} from {PYTHON_OPCODE}
-
-#ifndef Py_OPCODE_IDS_H
-#define Py_OPCODE_IDS_H
-#ifdef __cplusplus
-extern "C" {{
-#endif
-
-
-/* Instruction opcodes for compiled code */
-""".lstrip()
-
-opcode_ids_h_footer = """
-
-#ifdef __cplusplus
-}
-#endif
-#endif /* !Py_OPCODE_IDS_H */
-"""
-
internal_header = f"""
// Auto-generated by {SCRIPT_NAME} from {PYTHON_OPCODE}
@@ -83,52 +62,10 @@ def get_python_module_dict(filename):
return mod
def main(opcode_py,
- _opcode_metadata_py='Lib/_opcode_metadata.py',
- opcode_ids_h='Include/opcode_ids.h',
opcode_h='Include/opcode.h',
- opcode_targets_h='Python/opcode_targets.h',
internal_opcode_h='Include/internal/pycore_opcode.h'):
- _opcode_metadata = get_python_module_dict(_opcode_metadata_py)
-
opcode = get_python_module_dict(opcode_py)
- opmap = opcode['opmap']
- opname = opcode['opname']
-
- MIN_INSTRUMENTED_OPCODE = opcode["MIN_INSTRUMENTED_OPCODE"]
-
- NUM_OPCODES = len(opname)
- used = [ False ] * len(opname)
- next_op = 1
-
- for name, op in opmap.items():
- used[op] = True
-
- specialized_opmap = {}
- opname_including_specialized = opname.copy()
- for name in _opcode_metadata['_specialized_instructions']:
- while used[next_op]:
- next_op += 1
- specialized_opmap[name] = next_op
- opname_including_specialized[next_op] = name
- used[next_op] = True
-
- with open(opcode_ids_h, 'w') as fobj:
- fobj.write(opcode_ids_h_header)
-
- for name in opname:
- if name in opmap:
- op = opmap[name]
- if op == MIN_INSTRUMENTED_OPCODE:
- fobj.write(DEFINE.format("MIN_INSTRUMENTED_OPCODE", MIN_INSTRUMENTED_OPCODE))
-
- fobj.write(DEFINE.format(name, op))
-
-
- for name, op in specialized_opmap.items():
- fobj.write(DEFINE.format(name, op))
-
- fobj.write(opcode_ids_h_footer)
with open(opcode_h, 'w') as fobj:
fobj.write(opcode_h_header)
@@ -143,7 +80,6 @@ def main(opcode_py,
iobj.write(internal_header)
iobj.write("\nextern const uint8_t _PyOpcode_Caches[256];\n")
- iobj.write("\nextern const uint8_t _PyOpcode_Deopt[256];\n")
iobj.write("\n#ifdef NEED_OPCODE_TABLES\n")
iobj.write("\nconst uint8_t _PyOpcode_Caches[256] = {\n")
@@ -151,52 +87,12 @@ def main(opcode_py,
iobj.write(f" [{name}] = {entries},\n")
iobj.write("};\n")
- deoptcodes = {}
- for basic, op in opmap.items():
- if op < 256:
- deoptcodes[basic] = basic
- for basic, family in _opcode_metadata["_specializations"].items():
- for specialized in family:
- deoptcodes[specialized] = basic
- iobj.write("\nconst uint8_t _PyOpcode_Deopt[256] = {\n")
- for opt, deopt in sorted(deoptcodes.items()):
- iobj.write(f" [{opt}] = {deopt},\n")
- iobj.write("};\n")
- iobj.write("#endif // NEED_OPCODE_TABLES\n")
-
- iobj.write("\n")
- iobj.write(f"\nextern const char *const _PyOpcode_OpName[{NUM_OPCODES}];\n")
- iobj.write("\n#ifdef NEED_OPCODE_TABLES\n")
- iobj.write(f"const char *const _PyOpcode_OpName[{NUM_OPCODES}] = {{\n")
- for op, name in enumerate(opname_including_specialized):
- if name[0] != "<":
- op = name
- iobj.write(f''' [{op}] = "{name}",\n''')
- iobj.write("};\n")
iobj.write("#endif // NEED_OPCODE_TABLES\n")
- iobj.write("\n")
- iobj.write("#define EXTRA_CASES \\\n")
- for i, flag in enumerate(used):
- if not flag:
- iobj.write(f" case {i}: \\\n")
- iobj.write(" ;\n")
-
iobj.write(internal_footer)
- with open(opcode_targets_h, "w") as f:
- targets = ["_unknown_opcode"] * 256
- for op, name in enumerate(opname_including_specialized):
- if op < 256 and not name.startswith("<"):
- targets[op] = f"TARGET_{name}"
-
- f.write("static void *opcode_targets[256] = {\n")
- f.write(",\n".join([f" &&{s}" for s in targets]))
- f.write("\n};\n")
-
print(f"{opcode_h} regenerated from {opcode_py}")
if __name__ == '__main__':
- main(sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4],
- sys.argv[5], sys.argv[6])
+ main(sys.argv[1], sys.argv[2], sys.argv[3])
diff --git a/Tools/cases_generator/generate_cases.py b/Tools/cases_generator/generate_cases.py
index e170e11..d991cb4 100644
--- a/Tools/cases_generator/generate_cases.py
+++ b/Tools/cases_generator/generate_cases.py
@@ -5,6 +5,7 @@ Writes the cases to generated_cases.c.h, which is #included in ceval.c.
import argparse
import contextlib
+import itertools
import os
import posixpath
import sys
@@ -36,6 +37,12 @@ THIS = os.path.relpath(__file__, ROOT).replace(os.path.sep, posixpath.sep)
DEFAULT_INPUT = os.path.relpath(os.path.join(ROOT, "Python/bytecodes.c"))
DEFAULT_OUTPUT = os.path.relpath(os.path.join(ROOT, "Python/generated_cases.c.h"))
+DEFAULT_OPCODE_IDS_H_OUTPUT = os.path.relpath(
+ os.path.join(ROOT, "Include/opcode_ids.h")
+)
+DEFAULT_OPCODE_TARGETS_H_OUTPUT = os.path.relpath(
+ os.path.join(ROOT, "Python/opcode_targets.h")
+)
DEFAULT_METADATA_OUTPUT = os.path.relpath(
os.path.join(ROOT, "Include/internal/pycore_opcode_metadata.h")
)
@@ -87,6 +94,20 @@ arg_parser.add_argument(
"-o", "--output", type=str, help="Generated code", default=DEFAULT_OUTPUT
)
arg_parser.add_argument(
+ "-n",
+ "--opcode_ids_h",
+ type=str,
+ help="Header file with opcode number definitions",
+ default=DEFAULT_OPCODE_IDS_H_OUTPUT,
+)
+arg_parser.add_argument(
+ "-t",
+ "--opcode_targets_h",
+ type=str,
+ help="File with opcode targets for computed gotos",
+ default=DEFAULT_OPCODE_TARGETS_H_OUTPUT,
+)
+arg_parser.add_argument(
"-m",
"--metadata",
type=str,
@@ -225,6 +246,129 @@ class Generator(Analyzer):
self.out.write_raw(self.from_source_files())
self.out.write_raw(f"{self.out.comment} Do not edit!\n")
+ def assign_opcode_ids(self):
+ """Assign IDs to opcodes"""
+
+ ops: list[(bool, str)] = [] # (has_arg, name) for each opcode
+ instrumented_ops: list[str] = []
+
+ for instr in itertools.chain(
+ [instr for instr in self.instrs.values() if instr.kind != "op"],
+ self.macro_instrs.values()):
+
+ name = instr.name
+ if name.startswith('INSTRUMENTED_'):
+ instrumented_ops.append(name)
+ else:
+ ops.append((instr.instr_flags.HAS_ARG_FLAG, name))
+
+ # Special case: this instruction is implemented in ceval.c
+ # rather than bytecodes.c, so we need to add it explicitly
+ # here (at least until we add something to bytecodes.c to
+ # declare external instructions).
+ instrumented_ops.append('INSTRUMENTED_LINE')
+
+ # assert lists are unique
+ assert len(set(ops)) == len(ops)
+ assert len(set(instrumented_ops)) == len(instrumented_ops)
+
+ opname: list[str or None] = [None] * 512
+ opmap: dict = {}
+ markers: dict = {}
+
+ def map_op(op, name):
+ assert op < len(opname)
+ assert opname[op] is None
+ assert name not in opmap
+ opname[op] = name
+ opmap[name] = op
+
+
+ # 0 is reserved for cache entries. This helps debugging.
+ map_op(0, 'CACHE')
+
+ # 17 is reserved as it is the initial value for the specializing counter.
+ # This helps catch cases where we attempt to execute a cache.
+ map_op(17, 'RESERVED')
+
+ # 166 is RESUME - it is hard coded as such in Tools/build/deepfreeze.py
+ map_op(166, 'RESUME')
+
+ next_opcode = 1
+
+ for has_arg, name in sorted(ops):
+ if name in opmap:
+ continue # an anchored name, like CACHE
+ while opname[next_opcode] is not None:
+ next_opcode += 1
+ assert next_opcode < 255
+ map_op(next_opcode, name)
+
+ if has_arg and 'HAVE_ARGUMENT' not in markers:
+ markers['HAVE_ARGUMENT'] = next_opcode
+
+ # Instrumented opcodes are at the end of the valid range
+ min_instrumented = 254 - (len(instrumented_ops) - 1)
+ assert next_opcode <= min_instrumented
+ markers['MIN_INSTRUMENTED_OPCODE'] = min_instrumented
+ for i, op in enumerate(instrumented_ops):
+ map_op(min_instrumented + i, op)
+
+ # Pseudo opcodes are after the valid range
+ for i, op in enumerate(sorted(self.pseudos)):
+ map_op(256 + i, op)
+
+ assert 255 not in opmap # 255 is reserved
+ self.opmap = opmap
+ self.markers = markers
+
+ def write_opcode_ids(self, opcode_ids_h_filename, opcode_targets_filename):
+ """Write header file that defined the opcode IDs"""
+
+ with open(opcode_ids_h_filename, "w") as f:
+ # Create formatter
+ self.out = Formatter(f, 0)
+
+ self.write_provenance_header()
+
+ self.out.emit("")
+ self.out.emit("#ifndef Py_OPCODE_IDS_H")
+ self.out.emit("#define Py_OPCODE_IDS_H")
+ self.out.emit("#ifdef __cplusplus")
+ self.out.emit("extern \"C\" {")
+ self.out.emit("#endif")
+ self.out.emit("")
+ self.out.emit("/* Instruction opcodes for compiled code */")
+
+ def define(name, opcode):
+ self.out.emit(f"#define {name:<38} {opcode:>3}")
+
+ all_pairs = []
+ # the second item in the tuple sorts the markers before the ops
+ all_pairs.extend((i, 1, name) for (name, i) in self.markers.items())
+ all_pairs.extend((i, 2, name) for (name, i) in self.opmap.items())
+ for i, _, name in sorted(all_pairs):
+ assert name is not None
+ define(name, i)
+
+ self.out.emit("")
+ self.out.emit("#ifdef __cplusplus")
+ self.out.emit("}")
+ self.out.emit("#endif")
+ self.out.emit("#endif /* !Py_OPCODE_IDS_H */")
+
+ with open(opcode_targets_filename, "w") as f:
+ # Create formatter
+ self.out = Formatter(f, 0)
+
+ with self.out.block("static void *opcode_targets[256] =", ";"):
+ targets = ["_unknown_opcode"] * 256
+ for name, op in self.opmap.items():
+ if op < 256:
+ targets[op] = f"TARGET_{name}"
+ f.write(",\n".join([f" &&{s}" for s in targets]))
+
+
def write_metadata(self, metadata_filename: str, pymetadata_filename: str) -> None:
"""Write instruction metadata to output file."""
@@ -378,12 +522,46 @@ class Generator(Analyzer):
):
self.write_uop_items(lambda name, counter: f'[{name}] = "{name}",')
+ with self.metadata_item(
+ f"const char *const _PyOpcode_OpName[{1 + max(self.opmap.values())}]", "=", ";"
+ ):
+ for name in self.opmap:
+ self.out.emit(f'[{name}] = "{name}",')
+
+ deoptcodes = {}
+ for name, op in self.opmap.items():
+ if op < 256:
+ deoptcodes[name] = name
+ for name, family in self.families.items():
+ for m in family.members:
+ deoptcodes[m] = name
+ # special case:
+ deoptcodes['BINARY_OP_INPLACE_ADD_UNICODE'] = 'BINARY_OP'
+
+ with self.metadata_item(
+ f"const uint8_t _PyOpcode_Deopt[256]", "=", ";"
+ ):
+ for opt, deopt in sorted(deoptcodes.items()):
+ self.out.emit(f"[{opt}] = {deopt},")
+
+ self.out.emit("")
+ self.out.emit("#define EXTRA_CASES \\")
+ valid_opcodes = set(self.opmap.values())
+ with self.out.indent():
+ for op in range(256):
+ if op not in valid_opcodes:
+ self.out.emit(f"case {op}: \\")
+ self.out.emit(" ;\n")
+
with open(pymetadata_filename, "w") as f:
# Create formatter
self.out = Formatter(f, 0, comment="#")
self.write_provenance_header()
+ # emit specializations
+ specialized_ops = set()
+
self.out.emit("")
self.out.emit("_specializations = {")
for name, family in self.families.items():
@@ -392,6 +570,7 @@ class Generator(Analyzer):
with self.out.indent():
for m in family.members:
self.out.emit(f'"{m}",')
+ specialized_ops.update(family.members)
self.out.emit(f"],")
self.out.emit("}")
@@ -402,14 +581,26 @@ class Generator(Analyzer):
'_specializations["BINARY_OP"].append('
'"BINARY_OP_INPLACE_ADD_UNICODE")'
)
+ specialized_ops.add("BINARY_OP_INPLACE_ADD_UNICODE")
- # Make list of specialized instructions
+ ops = sorted((id, name) for (name, id) in self.opmap.items())
+ # emit specialized opmap
self.out.emit("")
- self.out.emit(
- "_specialized_instructions = ["
- "opcode for family in _specializations.values() for opcode in family"
- "]"
- )
+ with self.out.block("_specialized_opmap ="):
+ for op, name in ops:
+ if name in specialized_ops:
+ self.out.emit(f"'{name}': {op},")
+
+ # emit opmap
+ self.out.emit("")
+ with self.out.block("opmap ="):
+ for op, name in ops:
+ if name not in specialized_ops:
+ self.out.emit(f"'{name}': {op},")
+
+ for name in ['MIN_INSTRUMENTED_OPCODE', 'HAVE_ARGUMENT']:
+ self.out.emit(f"{name} = {self.markers[name]}")
+
def write_pseudo_instrs(self) -> None:
"""Write the IS_PSEUDO_INSTR macro"""
@@ -683,6 +874,9 @@ def main():
# These raise OSError if output can't be written
a.write_instructions(args.output, args.emit_line_directives)
+
+ a.assign_opcode_ids()
+ a.write_opcode_ids(args.opcode_ids_h, args.opcode_targets_h)
a.write_metadata(args.metadata, args.pymetadata)
a.write_executor_instructions(args.executor_cases, args.emit_line_directives)
a.write_abstract_interpreter_instructions(args.abstract_interpreter_cases,
diff --git a/Tools/scripts/summarize_stats.py b/Tools/scripts/summarize_stats.py
index f798b2f..2d19850 100644
--- a/Tools/scripts/summarize_stats.py
+++ b/Tools/scripts/summarize_stats.py
@@ -17,7 +17,7 @@ else:
DEFAULT_DIR = "/tmp/py_stats/"
#Create list of all instruction names
-specialized = iter(opcode._specialized_instructions)
+specialized = iter(opcode._specialized_opmap.keys())
opname = ["<0>"]
for name in opcode.opname[1:]:
if name.startswith("<"):
@@ -244,7 +244,7 @@ def categorized_counts(opcode_stats):
specialized = 0
not_specialized = 0
specialized_instructions = {
- op for op in opcode._specialized_instructions
+ op for op in opcode._specialized_opmap.keys()
if "__" not in op}
for i, opcode_stat in enumerate(opcode_stats):
if "execution_count" not in opcode_stat: