diff options
author | Irit Katriel <1055913+iritkatriel@users.noreply.github.com> | 2023-08-16 22:25:18 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-08-16 22:25:18 (GMT) |
commit | 665a4391e10167dad1c854fb604c86f336fcd331 (patch) | |
tree | e1d55069486dd67e6857a13fafc12b6e865cacf7 /Tools | |
parent | e88eb3775ecdcb3af6c6d694a935b7fa5f41e5ce (diff) | |
download | cpython-665a4391e10167dad1c854fb604c86f336fcd331.zip cpython-665a4391e10167dad1c854fb604c86f336fcd331.tar.gz cpython-665a4391e10167dad1c854fb604c86f336fcd331.tar.bz2 |
gh-105481: generate op IDs from bytecode.c instead of hard coding them in opcode.py (#107971)
Diffstat (limited to 'Tools')
-rw-r--r-- | Tools/build/deepfreeze.py | 8 | ||||
-rw-r--r-- | Tools/build/generate_opcode_h.py | 106 | ||||
-rw-r--r-- | Tools/cases_generator/generate_cases.py | 206 | ||||
-rw-r--r-- | Tools/scripts/summarize_stats.py | 4 |
4 files changed, 208 insertions, 116 deletions
diff --git a/Tools/build/deepfreeze.py b/Tools/build/deepfreeze.py index ce609bd..8dbb7bf 100644 --- a/Tools/build/deepfreeze.py +++ b/Tools/build/deepfreeze.py @@ -22,7 +22,7 @@ ROOT = os.path.dirname(os.path.dirname(os.path.dirname(__file__))) verbose = False # This must be kept in sync with opcode.py -RESUME = 151 +RESUME = 166 def isprintable(b: bytes) -> bool: return all(0x20 <= c < 0x7f for c in b) @@ -297,10 +297,12 @@ class Printer: self.write(f".co_linetable = {co_linetable},") self.write(f"._co_cached = NULL,") self.write(f".co_code_adaptive = {co_code_adaptive},") - for i, op in enumerate(code.co_code[::2]): + first_traceable = 0 + for op in code.co_code[::2]: if op == RESUME: - self.write(f"._co_firsttraceable = {i},") break + first_traceable += 1 + self.write(f"._co_firsttraceable = {first_traceable},") name_as_code = f"(PyCodeObject *)&{name}" self.finis.append(f"_PyStaticCode_Fini({name_as_code});") self.inits.append(f"_PyStaticCode_Init({name_as_code})") diff --git a/Tools/build/generate_opcode_h.py b/Tools/build/generate_opcode_h.py index 67f4a2c..344709a 100644 --- a/Tools/build/generate_opcode_h.py +++ b/Tools/build/generate_opcode_h.py @@ -27,27 +27,6 @@ opcode_h_footer = """ #endif /* !Py_OPCODE_H */ """ -opcode_ids_h_header = f""" -// Auto-generated by {SCRIPT_NAME} from {PYTHON_OPCODE} - -#ifndef Py_OPCODE_IDS_H -#define Py_OPCODE_IDS_H -#ifdef __cplusplus -extern "C" {{ -#endif - - -/* Instruction opcodes for compiled code */ -""".lstrip() - -opcode_ids_h_footer = """ - -#ifdef __cplusplus -} -#endif -#endif /* !Py_OPCODE_IDS_H */ -""" - internal_header = f""" // Auto-generated by {SCRIPT_NAME} from {PYTHON_OPCODE} @@ -83,52 +62,10 @@ def get_python_module_dict(filename): return mod def main(opcode_py, - _opcode_metadata_py='Lib/_opcode_metadata.py', - opcode_ids_h='Include/opcode_ids.h', opcode_h='Include/opcode.h', - opcode_targets_h='Python/opcode_targets.h', internal_opcode_h='Include/internal/pycore_opcode.h'): - _opcode_metadata = get_python_module_dict(_opcode_metadata_py) - opcode = get_python_module_dict(opcode_py) - opmap = opcode['opmap'] - opname = opcode['opname'] - - MIN_INSTRUMENTED_OPCODE = opcode["MIN_INSTRUMENTED_OPCODE"] - - NUM_OPCODES = len(opname) - used = [ False ] * len(opname) - next_op = 1 - - for name, op in opmap.items(): - used[op] = True - - specialized_opmap = {} - opname_including_specialized = opname.copy() - for name in _opcode_metadata['_specialized_instructions']: - while used[next_op]: - next_op += 1 - specialized_opmap[name] = next_op - opname_including_specialized[next_op] = name - used[next_op] = True - - with open(opcode_ids_h, 'w') as fobj: - fobj.write(opcode_ids_h_header) - - for name in opname: - if name in opmap: - op = opmap[name] - if op == MIN_INSTRUMENTED_OPCODE: - fobj.write(DEFINE.format("MIN_INSTRUMENTED_OPCODE", MIN_INSTRUMENTED_OPCODE)) - - fobj.write(DEFINE.format(name, op)) - - - for name, op in specialized_opmap.items(): - fobj.write(DEFINE.format(name, op)) - - fobj.write(opcode_ids_h_footer) with open(opcode_h, 'w') as fobj: fobj.write(opcode_h_header) @@ -143,7 +80,6 @@ def main(opcode_py, iobj.write(internal_header) iobj.write("\nextern const uint8_t _PyOpcode_Caches[256];\n") - iobj.write("\nextern const uint8_t _PyOpcode_Deopt[256];\n") iobj.write("\n#ifdef NEED_OPCODE_TABLES\n") iobj.write("\nconst uint8_t _PyOpcode_Caches[256] = {\n") @@ -151,52 +87,12 @@ def main(opcode_py, iobj.write(f" [{name}] = {entries},\n") iobj.write("};\n") - deoptcodes = {} - for basic, op in opmap.items(): - if op < 256: - deoptcodes[basic] = basic - for basic, family in _opcode_metadata["_specializations"].items(): - for specialized in family: - deoptcodes[specialized] = basic - iobj.write("\nconst uint8_t _PyOpcode_Deopt[256] = {\n") - for opt, deopt in sorted(deoptcodes.items()): - iobj.write(f" [{opt}] = {deopt},\n") - iobj.write("};\n") - iobj.write("#endif // NEED_OPCODE_TABLES\n") - - iobj.write("\n") - iobj.write(f"\nextern const char *const _PyOpcode_OpName[{NUM_OPCODES}];\n") - iobj.write("\n#ifdef NEED_OPCODE_TABLES\n") - iobj.write(f"const char *const _PyOpcode_OpName[{NUM_OPCODES}] = {{\n") - for op, name in enumerate(opname_including_specialized): - if name[0] != "<": - op = name - iobj.write(f''' [{op}] = "{name}",\n''') - iobj.write("};\n") iobj.write("#endif // NEED_OPCODE_TABLES\n") - iobj.write("\n") - iobj.write("#define EXTRA_CASES \\\n") - for i, flag in enumerate(used): - if not flag: - iobj.write(f" case {i}: \\\n") - iobj.write(" ;\n") - iobj.write(internal_footer) - with open(opcode_targets_h, "w") as f: - targets = ["_unknown_opcode"] * 256 - for op, name in enumerate(opname_including_specialized): - if op < 256 and not name.startswith("<"): - targets[op] = f"TARGET_{name}" - - f.write("static void *opcode_targets[256] = {\n") - f.write(",\n".join([f" &&{s}" for s in targets])) - f.write("\n};\n") - print(f"{opcode_h} regenerated from {opcode_py}") if __name__ == '__main__': - main(sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4], - sys.argv[5], sys.argv[6]) + main(sys.argv[1], sys.argv[2], sys.argv[3]) diff --git a/Tools/cases_generator/generate_cases.py b/Tools/cases_generator/generate_cases.py index e170e11..d991cb4 100644 --- a/Tools/cases_generator/generate_cases.py +++ b/Tools/cases_generator/generate_cases.py @@ -5,6 +5,7 @@ Writes the cases to generated_cases.c.h, which is #included in ceval.c. import argparse import contextlib +import itertools import os import posixpath import sys @@ -36,6 +37,12 @@ THIS = os.path.relpath(__file__, ROOT).replace(os.path.sep, posixpath.sep) DEFAULT_INPUT = os.path.relpath(os.path.join(ROOT, "Python/bytecodes.c")) DEFAULT_OUTPUT = os.path.relpath(os.path.join(ROOT, "Python/generated_cases.c.h")) +DEFAULT_OPCODE_IDS_H_OUTPUT = os.path.relpath( + os.path.join(ROOT, "Include/opcode_ids.h") +) +DEFAULT_OPCODE_TARGETS_H_OUTPUT = os.path.relpath( + os.path.join(ROOT, "Python/opcode_targets.h") +) DEFAULT_METADATA_OUTPUT = os.path.relpath( os.path.join(ROOT, "Include/internal/pycore_opcode_metadata.h") ) @@ -87,6 +94,20 @@ arg_parser.add_argument( "-o", "--output", type=str, help="Generated code", default=DEFAULT_OUTPUT ) arg_parser.add_argument( + "-n", + "--opcode_ids_h", + type=str, + help="Header file with opcode number definitions", + default=DEFAULT_OPCODE_IDS_H_OUTPUT, +) +arg_parser.add_argument( + "-t", + "--opcode_targets_h", + type=str, + help="File with opcode targets for computed gotos", + default=DEFAULT_OPCODE_TARGETS_H_OUTPUT, +) +arg_parser.add_argument( "-m", "--metadata", type=str, @@ -225,6 +246,129 @@ class Generator(Analyzer): self.out.write_raw(self.from_source_files()) self.out.write_raw(f"{self.out.comment} Do not edit!\n") + def assign_opcode_ids(self): + """Assign IDs to opcodes""" + + ops: list[(bool, str)] = [] # (has_arg, name) for each opcode + instrumented_ops: list[str] = [] + + for instr in itertools.chain( + [instr for instr in self.instrs.values() if instr.kind != "op"], + self.macro_instrs.values()): + + name = instr.name + if name.startswith('INSTRUMENTED_'): + instrumented_ops.append(name) + else: + ops.append((instr.instr_flags.HAS_ARG_FLAG, name)) + + # Special case: this instruction is implemented in ceval.c + # rather than bytecodes.c, so we need to add it explicitly + # here (at least until we add something to bytecodes.c to + # declare external instructions). + instrumented_ops.append('INSTRUMENTED_LINE') + + # assert lists are unique + assert len(set(ops)) == len(ops) + assert len(set(instrumented_ops)) == len(instrumented_ops) + + opname: list[str or None] = [None] * 512 + opmap: dict = {} + markers: dict = {} + + def map_op(op, name): + assert op < len(opname) + assert opname[op] is None + assert name not in opmap + opname[op] = name + opmap[name] = op + + + # 0 is reserved for cache entries. This helps debugging. + map_op(0, 'CACHE') + + # 17 is reserved as it is the initial value for the specializing counter. + # This helps catch cases where we attempt to execute a cache. + map_op(17, 'RESERVED') + + # 166 is RESUME - it is hard coded as such in Tools/build/deepfreeze.py + map_op(166, 'RESUME') + + next_opcode = 1 + + for has_arg, name in sorted(ops): + if name in opmap: + continue # an anchored name, like CACHE + while opname[next_opcode] is not None: + next_opcode += 1 + assert next_opcode < 255 + map_op(next_opcode, name) + + if has_arg and 'HAVE_ARGUMENT' not in markers: + markers['HAVE_ARGUMENT'] = next_opcode + + # Instrumented opcodes are at the end of the valid range + min_instrumented = 254 - (len(instrumented_ops) - 1) + assert next_opcode <= min_instrumented + markers['MIN_INSTRUMENTED_OPCODE'] = min_instrumented + for i, op in enumerate(instrumented_ops): + map_op(min_instrumented + i, op) + + # Pseudo opcodes are after the valid range + for i, op in enumerate(sorted(self.pseudos)): + map_op(256 + i, op) + + assert 255 not in opmap # 255 is reserved + self.opmap = opmap + self.markers = markers + + def write_opcode_ids(self, opcode_ids_h_filename, opcode_targets_filename): + """Write header file that defined the opcode IDs""" + + with open(opcode_ids_h_filename, "w") as f: + # Create formatter + self.out = Formatter(f, 0) + + self.write_provenance_header() + + self.out.emit("") + self.out.emit("#ifndef Py_OPCODE_IDS_H") + self.out.emit("#define Py_OPCODE_IDS_H") + self.out.emit("#ifdef __cplusplus") + self.out.emit("extern \"C\" {") + self.out.emit("#endif") + self.out.emit("") + self.out.emit("/* Instruction opcodes for compiled code */") + + def define(name, opcode): + self.out.emit(f"#define {name:<38} {opcode:>3}") + + all_pairs = [] + # the second item in the tuple sorts the markers before the ops + all_pairs.extend((i, 1, name) for (name, i) in self.markers.items()) + all_pairs.extend((i, 2, name) for (name, i) in self.opmap.items()) + for i, _, name in sorted(all_pairs): + assert name is not None + define(name, i) + + self.out.emit("") + self.out.emit("#ifdef __cplusplus") + self.out.emit("}") + self.out.emit("#endif") + self.out.emit("#endif /* !Py_OPCODE_IDS_H */") + + with open(opcode_targets_filename, "w") as f: + # Create formatter + self.out = Formatter(f, 0) + + with self.out.block("static void *opcode_targets[256] =", ";"): + targets = ["_unknown_opcode"] * 256 + for name, op in self.opmap.items(): + if op < 256: + targets[op] = f"TARGET_{name}" + f.write(",\n".join([f" &&{s}" for s in targets])) + + def write_metadata(self, metadata_filename: str, pymetadata_filename: str) -> None: """Write instruction metadata to output file.""" @@ -378,12 +522,46 @@ class Generator(Analyzer): ): self.write_uop_items(lambda name, counter: f'[{name}] = "{name}",') + with self.metadata_item( + f"const char *const _PyOpcode_OpName[{1 + max(self.opmap.values())}]", "=", ";" + ): + for name in self.opmap: + self.out.emit(f'[{name}] = "{name}",') + + deoptcodes = {} + for name, op in self.opmap.items(): + if op < 256: + deoptcodes[name] = name + for name, family in self.families.items(): + for m in family.members: + deoptcodes[m] = name + # special case: + deoptcodes['BINARY_OP_INPLACE_ADD_UNICODE'] = 'BINARY_OP' + + with self.metadata_item( + f"const uint8_t _PyOpcode_Deopt[256]", "=", ";" + ): + for opt, deopt in sorted(deoptcodes.items()): + self.out.emit(f"[{opt}] = {deopt},") + + self.out.emit("") + self.out.emit("#define EXTRA_CASES \\") + valid_opcodes = set(self.opmap.values()) + with self.out.indent(): + for op in range(256): + if op not in valid_opcodes: + self.out.emit(f"case {op}: \\") + self.out.emit(" ;\n") + with open(pymetadata_filename, "w") as f: # Create formatter self.out = Formatter(f, 0, comment="#") self.write_provenance_header() + # emit specializations + specialized_ops = set() + self.out.emit("") self.out.emit("_specializations = {") for name, family in self.families.items(): @@ -392,6 +570,7 @@ class Generator(Analyzer): with self.out.indent(): for m in family.members: self.out.emit(f'"{m}",') + specialized_ops.update(family.members) self.out.emit(f"],") self.out.emit("}") @@ -402,14 +581,26 @@ class Generator(Analyzer): '_specializations["BINARY_OP"].append(' '"BINARY_OP_INPLACE_ADD_UNICODE")' ) + specialized_ops.add("BINARY_OP_INPLACE_ADD_UNICODE") - # Make list of specialized instructions + ops = sorted((id, name) for (name, id) in self.opmap.items()) + # emit specialized opmap self.out.emit("") - self.out.emit( - "_specialized_instructions = [" - "opcode for family in _specializations.values() for opcode in family" - "]" - ) + with self.out.block("_specialized_opmap ="): + for op, name in ops: + if name in specialized_ops: + self.out.emit(f"'{name}': {op},") + + # emit opmap + self.out.emit("") + with self.out.block("opmap ="): + for op, name in ops: + if name not in specialized_ops: + self.out.emit(f"'{name}': {op},") + + for name in ['MIN_INSTRUMENTED_OPCODE', 'HAVE_ARGUMENT']: + self.out.emit(f"{name} = {self.markers[name]}") + def write_pseudo_instrs(self) -> None: """Write the IS_PSEUDO_INSTR macro""" @@ -683,6 +874,9 @@ def main(): # These raise OSError if output can't be written a.write_instructions(args.output, args.emit_line_directives) + + a.assign_opcode_ids() + a.write_opcode_ids(args.opcode_ids_h, args.opcode_targets_h) a.write_metadata(args.metadata, args.pymetadata) a.write_executor_instructions(args.executor_cases, args.emit_line_directives) a.write_abstract_interpreter_instructions(args.abstract_interpreter_cases, diff --git a/Tools/scripts/summarize_stats.py b/Tools/scripts/summarize_stats.py index f798b2f..2d19850 100644 --- a/Tools/scripts/summarize_stats.py +++ b/Tools/scripts/summarize_stats.py @@ -17,7 +17,7 @@ else: DEFAULT_DIR = "/tmp/py_stats/" #Create list of all instruction names -specialized = iter(opcode._specialized_instructions) +specialized = iter(opcode._specialized_opmap.keys()) opname = ["<0>"] for name in opcode.opname[1:]: if name.startswith("<"): @@ -244,7 +244,7 @@ def categorized_counts(opcode_stats): specialized = 0 not_specialized = 0 specialized_instructions = { - op for op in opcode._specialized_instructions + op for op in opcode._specialized_opmap.keys() if "__" not in op} for i, opcode_stat in enumerate(opcode_stats): if "execution_count" not in opcode_stat: |