diff options
author | Irit Katriel <1055913+iritkatriel@users.noreply.github.com> | 2023-08-16 22:25:18 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-08-16 22:25:18 (GMT) |
commit | 665a4391e10167dad1c854fb604c86f336fcd331 (patch) | |
tree | e1d55069486dd67e6857a13fafc12b6e865cacf7 /Tools/cases_generator | |
parent | e88eb3775ecdcb3af6c6d694a935b7fa5f41e5ce (diff) | |
download | cpython-665a4391e10167dad1c854fb604c86f336fcd331.zip cpython-665a4391e10167dad1c854fb604c86f336fcd331.tar.gz cpython-665a4391e10167dad1c854fb604c86f336fcd331.tar.bz2 |
gh-105481: generate op IDs from bytecode.c instead of hard coding them in opcode.py (#107971)
Diffstat (limited to 'Tools/cases_generator')
-rw-r--r-- | Tools/cases_generator/generate_cases.py | 206 |
1 files changed, 200 insertions, 6 deletions
diff --git a/Tools/cases_generator/generate_cases.py b/Tools/cases_generator/generate_cases.py index e170e11..d991cb4 100644 --- a/Tools/cases_generator/generate_cases.py +++ b/Tools/cases_generator/generate_cases.py @@ -5,6 +5,7 @@ Writes the cases to generated_cases.c.h, which is #included in ceval.c. import argparse import contextlib +import itertools import os import posixpath import sys @@ -36,6 +37,12 @@ THIS = os.path.relpath(__file__, ROOT).replace(os.path.sep, posixpath.sep) DEFAULT_INPUT = os.path.relpath(os.path.join(ROOT, "Python/bytecodes.c")) DEFAULT_OUTPUT = os.path.relpath(os.path.join(ROOT, "Python/generated_cases.c.h")) +DEFAULT_OPCODE_IDS_H_OUTPUT = os.path.relpath( + os.path.join(ROOT, "Include/opcode_ids.h") +) +DEFAULT_OPCODE_TARGETS_H_OUTPUT = os.path.relpath( + os.path.join(ROOT, "Python/opcode_targets.h") +) DEFAULT_METADATA_OUTPUT = os.path.relpath( os.path.join(ROOT, "Include/internal/pycore_opcode_metadata.h") ) @@ -87,6 +94,20 @@ arg_parser.add_argument( "-o", "--output", type=str, help="Generated code", default=DEFAULT_OUTPUT ) arg_parser.add_argument( + "-n", + "--opcode_ids_h", + type=str, + help="Header file with opcode number definitions", + default=DEFAULT_OPCODE_IDS_H_OUTPUT, +) +arg_parser.add_argument( + "-t", + "--opcode_targets_h", + type=str, + help="File with opcode targets for computed gotos", + default=DEFAULT_OPCODE_TARGETS_H_OUTPUT, +) +arg_parser.add_argument( "-m", "--metadata", type=str, @@ -225,6 +246,129 @@ class Generator(Analyzer): self.out.write_raw(self.from_source_files()) self.out.write_raw(f"{self.out.comment} Do not edit!\n") + def assign_opcode_ids(self): + """Assign IDs to opcodes""" + + ops: list[(bool, str)] = [] # (has_arg, name) for each opcode + instrumented_ops: list[str] = [] + + for instr in itertools.chain( + [instr for instr in self.instrs.values() if instr.kind != "op"], + self.macro_instrs.values()): + + name = instr.name + if name.startswith('INSTRUMENTED_'): + instrumented_ops.append(name) + else: + ops.append((instr.instr_flags.HAS_ARG_FLAG, name)) + + # Special case: this instruction is implemented in ceval.c + # rather than bytecodes.c, so we need to add it explicitly + # here (at least until we add something to bytecodes.c to + # declare external instructions). + instrumented_ops.append('INSTRUMENTED_LINE') + + # assert lists are unique + assert len(set(ops)) == len(ops) + assert len(set(instrumented_ops)) == len(instrumented_ops) + + opname: list[str or None] = [None] * 512 + opmap: dict = {} + markers: dict = {} + + def map_op(op, name): + assert op < len(opname) + assert opname[op] is None + assert name not in opmap + opname[op] = name + opmap[name] = op + + + # 0 is reserved for cache entries. This helps debugging. + map_op(0, 'CACHE') + + # 17 is reserved as it is the initial value for the specializing counter. + # This helps catch cases where we attempt to execute a cache. + map_op(17, 'RESERVED') + + # 166 is RESUME - it is hard coded as such in Tools/build/deepfreeze.py + map_op(166, 'RESUME') + + next_opcode = 1 + + for has_arg, name in sorted(ops): + if name in opmap: + continue # an anchored name, like CACHE + while opname[next_opcode] is not None: + next_opcode += 1 + assert next_opcode < 255 + map_op(next_opcode, name) + + if has_arg and 'HAVE_ARGUMENT' not in markers: + markers['HAVE_ARGUMENT'] = next_opcode + + # Instrumented opcodes are at the end of the valid range + min_instrumented = 254 - (len(instrumented_ops) - 1) + assert next_opcode <= min_instrumented + markers['MIN_INSTRUMENTED_OPCODE'] = min_instrumented + for i, op in enumerate(instrumented_ops): + map_op(min_instrumented + i, op) + + # Pseudo opcodes are after the valid range + for i, op in enumerate(sorted(self.pseudos)): + map_op(256 + i, op) + + assert 255 not in opmap # 255 is reserved + self.opmap = opmap + self.markers = markers + + def write_opcode_ids(self, opcode_ids_h_filename, opcode_targets_filename): + """Write header file that defined the opcode IDs""" + + with open(opcode_ids_h_filename, "w") as f: + # Create formatter + self.out = Formatter(f, 0) + + self.write_provenance_header() + + self.out.emit("") + self.out.emit("#ifndef Py_OPCODE_IDS_H") + self.out.emit("#define Py_OPCODE_IDS_H") + self.out.emit("#ifdef __cplusplus") + self.out.emit("extern \"C\" {") + self.out.emit("#endif") + self.out.emit("") + self.out.emit("/* Instruction opcodes for compiled code */") + + def define(name, opcode): + self.out.emit(f"#define {name:<38} {opcode:>3}") + + all_pairs = [] + # the second item in the tuple sorts the markers before the ops + all_pairs.extend((i, 1, name) for (name, i) in self.markers.items()) + all_pairs.extend((i, 2, name) for (name, i) in self.opmap.items()) + for i, _, name in sorted(all_pairs): + assert name is not None + define(name, i) + + self.out.emit("") + self.out.emit("#ifdef __cplusplus") + self.out.emit("}") + self.out.emit("#endif") + self.out.emit("#endif /* !Py_OPCODE_IDS_H */") + + with open(opcode_targets_filename, "w") as f: + # Create formatter + self.out = Formatter(f, 0) + + with self.out.block("static void *opcode_targets[256] =", ";"): + targets = ["_unknown_opcode"] * 256 + for name, op in self.opmap.items(): + if op < 256: + targets[op] = f"TARGET_{name}" + f.write(",\n".join([f" &&{s}" for s in targets])) + + def write_metadata(self, metadata_filename: str, pymetadata_filename: str) -> None: """Write instruction metadata to output file.""" @@ -378,12 +522,46 @@ class Generator(Analyzer): ): self.write_uop_items(lambda name, counter: f'[{name}] = "{name}",') + with self.metadata_item( + f"const char *const _PyOpcode_OpName[{1 + max(self.opmap.values())}]", "=", ";" + ): + for name in self.opmap: + self.out.emit(f'[{name}] = "{name}",') + + deoptcodes = {} + for name, op in self.opmap.items(): + if op < 256: + deoptcodes[name] = name + for name, family in self.families.items(): + for m in family.members: + deoptcodes[m] = name + # special case: + deoptcodes['BINARY_OP_INPLACE_ADD_UNICODE'] = 'BINARY_OP' + + with self.metadata_item( + f"const uint8_t _PyOpcode_Deopt[256]", "=", ";" + ): + for opt, deopt in sorted(deoptcodes.items()): + self.out.emit(f"[{opt}] = {deopt},") + + self.out.emit("") + self.out.emit("#define EXTRA_CASES \\") + valid_opcodes = set(self.opmap.values()) + with self.out.indent(): + for op in range(256): + if op not in valid_opcodes: + self.out.emit(f"case {op}: \\") + self.out.emit(" ;\n") + with open(pymetadata_filename, "w") as f: # Create formatter self.out = Formatter(f, 0, comment="#") self.write_provenance_header() + # emit specializations + specialized_ops = set() + self.out.emit("") self.out.emit("_specializations = {") for name, family in self.families.items(): @@ -392,6 +570,7 @@ class Generator(Analyzer): with self.out.indent(): for m in family.members: self.out.emit(f'"{m}",') + specialized_ops.update(family.members) self.out.emit(f"],") self.out.emit("}") @@ -402,14 +581,26 @@ class Generator(Analyzer): '_specializations["BINARY_OP"].append(' '"BINARY_OP_INPLACE_ADD_UNICODE")' ) + specialized_ops.add("BINARY_OP_INPLACE_ADD_UNICODE") - # Make list of specialized instructions + ops = sorted((id, name) for (name, id) in self.opmap.items()) + # emit specialized opmap self.out.emit("") - self.out.emit( - "_specialized_instructions = [" - "opcode for family in _specializations.values() for opcode in family" - "]" - ) + with self.out.block("_specialized_opmap ="): + for op, name in ops: + if name in specialized_ops: + self.out.emit(f"'{name}': {op},") + + # emit opmap + self.out.emit("") + with self.out.block("opmap ="): + for op, name in ops: + if name not in specialized_ops: + self.out.emit(f"'{name}': {op},") + + for name in ['MIN_INSTRUMENTED_OPCODE', 'HAVE_ARGUMENT']: + self.out.emit(f"{name} = {self.markers[name]}") + def write_pseudo_instrs(self) -> None: """Write the IS_PSEUDO_INSTR macro""" @@ -683,6 +874,9 @@ def main(): # These raise OSError if output can't be written a.write_instructions(args.output, args.emit_line_directives) + + a.assign_opcode_ids() + a.write_opcode_ids(args.opcode_ids_h, args.opcode_targets_h) a.write_metadata(args.metadata, args.pymetadata) a.write_executor_instructions(args.executor_cases, args.emit_line_directives) a.write_abstract_interpreter_instructions(args.abstract_interpreter_cases, |