summaryrefslogtreecommitdiffstats
path: root/Tools/cases_generator
diff options
context:
space:
mode:
authorMark Shannon <mark@hotpy.org>2023-12-21 12:46:28 (GMT)
committerGitHub <noreply@github.com>2023-12-21 12:46:28 (GMT)
commit723f4d66982e4d2c54f8e874d6084ab7b2ff5833 (patch)
treead23ec3de46304048a9ea1aa757ccb2196007425 /Tools/cases_generator
parentfae096cd4b7025f91473546ca6a243c86374dd6a (diff)
downloadcpython-723f4d66982e4d2c54f8e874d6084ab7b2ff5833.zip
cpython-723f4d66982e4d2c54f8e874d6084ab7b2ff5833.tar.gz
cpython-723f4d66982e4d2c54f8e874d6084ab7b2ff5833.tar.bz2
GH-111485: Delete the old generator code. (GH-113321)
Diffstat (limited to 'Tools/cases_generator')
-rw-r--r--Tools/cases_generator/analysis.py487
-rw-r--r--Tools/cases_generator/analyzer.py76
-rw-r--r--Tools/cases_generator/flags.py191
-rw-r--r--Tools/cases_generator/formatting.py206
-rw-r--r--Tools/cases_generator/generate_cases.py848
-rw-r--r--Tools/cases_generator/instructions.py355
-rw-r--r--Tools/cases_generator/parser.py12
-rw-r--r--Tools/cases_generator/stack.py18
-rw-r--r--Tools/cases_generator/stacking.py534
9 files changed, 102 insertions, 2625 deletions
diff --git a/Tools/cases_generator/analysis.py b/Tools/cases_generator/analysis.py
deleted file mode 100644
index 26d92c1..0000000
--- a/Tools/cases_generator/analysis.py
+++ /dev/null
@@ -1,487 +0,0 @@
-import re
-import sys
-import typing
-
-from _typing_backports import assert_never
-from flags import InstructionFlags, variable_used
-from formatting import prettify_filename, UNUSED
-from instructions import (
- ActiveCacheEffect,
- Component,
- Instruction,
- InstructionOrCacheEffect,
- MacroInstruction,
- MacroParts,
- PseudoInstruction,
-)
-import parsing
-from parsing import StackEffect
-
-BEGIN_MARKER = "// BEGIN BYTECODES //"
-END_MARKER = "// END BYTECODES //"
-
-RESERVED_WORDS = {
- "co_consts": "Use FRAME_CO_CONSTS.",
- "co_names": "Use FRAME_CO_NAMES.",
-}
-
-RE_GO_TO_INSTR = r"^\s*GO_TO_INSTRUCTION\((\w+)\);\s*(?://.*)?$"
-
-
-class Analyzer:
- """Parse input, analyze it, and write to output."""
-
- input_filenames: list[str]
- errors: int = 0
- warnings: int = 0
-
- def __init__(self, input_filenames: list[str]):
- self.input_filenames = input_filenames
-
- def message(self, msg: str, node: parsing.Node) -> None:
- lineno = 0
- filename = "<unknown file>"
- if context := node.context:
- filename = context.owner.filename
- # Use line number of first non-comment in the node
- for token in context.owner.tokens[context.begin : context.end]:
- lineno = token.line
- if token.kind != "COMMENT":
- break
- print(f"{filename}:{lineno}: {msg}", file=sys.stderr)
-
- def error(self, msg: str, node: parsing.Node) -> None:
- self.message("error: " + msg, node)
- self.errors += 1
-
- def warning(self, msg: str, node: parsing.Node) -> None:
- self.message("warning: " + msg, node)
- self.warnings += 1
-
- def note(self, msg: str, node: parsing.Node) -> None:
- self.message("note: " + msg, node)
-
- everything: list[
- parsing.InstDef
- | parsing.Macro
- | parsing.Pseudo
- ]
- instrs: dict[str, Instruction] # Includes ops
- macros: dict[str, parsing.Macro]
- macro_instrs: dict[str, MacroInstruction]
- families: dict[str, parsing.Family]
- pseudos: dict[str, parsing.Pseudo]
- pseudo_instrs: dict[str, PseudoInstruction]
-
- def parse(self) -> None:
- """Parse the source text.
-
- We only want the parser to see the stuff between the
- begin and end markers.
- """
-
- self.everything = []
- self.instrs = {}
- self.macros = {}
- self.families = {}
- self.pseudos = {}
-
- instrs_idx: dict[str, int] = dict()
-
- for filename in self.input_filenames:
- self.parse_file(filename, instrs_idx)
-
- files = " + ".join(self.input_filenames)
- n_instrs = len(set(self.instrs) & set(self.macros))
- n_ops = len(self.instrs) - n_instrs
- print(
- f"Read {n_instrs} instructions, {n_ops} ops, "
- f"{len(self.macros)} macros, {len(self.pseudos)} pseudos, "
- f"and {len(self.families)} families from {files}",
- file=sys.stderr,
- )
-
- def parse_file(self, filename: str, instrs_idx: dict[str, int]) -> None:
- with open(filename) as file:
- src = file.read()
-
- psr = parsing.Parser(src, filename=prettify_filename(filename))
-
- # Skip until begin marker
- while tkn := psr.next(raw=True):
- if tkn.text == BEGIN_MARKER:
- break
- else:
- raise psr.make_syntax_error(
- f"Couldn't find {BEGIN_MARKER!r} in {psr.filename}"
- )
- start = psr.getpos()
-
- # Find end marker, then delete everything after it
- while tkn := psr.next(raw=True):
- if tkn.text == END_MARKER:
- break
- del psr.tokens[psr.getpos() - 1 :]
-
- # Parse from start
- psr.setpos(start)
- thing: parsing.Node | None
- thing_first_token = psr.peek()
- while thing := psr.definition():
- thing = typing.cast(
- parsing.InstDef | parsing.Macro | parsing.Pseudo | parsing.Family, thing
- )
- if ws := [w for w in RESERVED_WORDS if variable_used(thing, w)]:
- self.error(
- f"'{ws[0]}' is a reserved word. {RESERVED_WORDS[ws[0]]}", thing
- )
-
- match thing:
- case parsing.InstDef(name=name):
- macro: parsing.Macro | None = None
- if thing.kind == "inst" and "override" not in thing.annotations:
- macro = parsing.Macro(name, [parsing.OpName(name)])
- if name in self.instrs:
- if "override" not in thing.annotations:
- raise psr.make_syntax_error(
- f"Duplicate definition of '{name}' @ {thing.context} "
- f"previous definition @ {self.instrs[name].inst.context}",
- thing_first_token,
- )
- self.everything[instrs_idx[name]] = thing
- if name not in self.instrs and "override" in thing.annotations:
- raise psr.make_syntax_error(
- f"Definition of '{name}' @ {thing.context} is supposed to be "
- "an override but no previous definition exists.",
- thing_first_token,
- )
- self.instrs[name] = Instruction(thing)
- instrs_idx[name] = len(self.everything)
- self.everything.append(thing)
- if macro is not None:
- self.macros[macro.name] = macro
- self.everything.append(macro)
- case parsing.Macro(name):
- self.macros[name] = thing
- self.everything.append(thing)
- case parsing.Family(name):
- self.families[name] = thing
- case parsing.Pseudo(name):
- self.pseudos[name] = thing
- self.everything.append(thing)
- case _:
- assert_never(thing)
- if not psr.eof():
- raise psr.make_syntax_error(f"Extra stuff at the end of {filename}")
-
- def analyze(self) -> None:
- """Analyze the inputs.
-
- Raises SystemExit if there is an error.
- """
- self.analyze_macros_and_pseudos()
- self.map_families()
- self.mark_predictions()
- self.check_families()
-
- def mark_predictions(self) -> None:
- """Mark the instructions that need PREDICTED() labels."""
- # Start with family heads
- for family in self.families.values():
- if family.name in self.instrs:
- self.instrs[family.name].predicted = True
- if family.name in self.macro_instrs:
- self.macro_instrs[family.name].predicted = True
- # Also look for GO_TO_INSTRUCTION() calls
- for instr in self.instrs.values():
- targets: set[str] = set()
- for line in instr.block_text:
- if m := re.match(RE_GO_TO_INSTR, line):
- targets.add(m.group(1))
- for target in targets:
- if target_instr := self.instrs.get(target):
- target_instr.predicted = True
- if target_macro := self.macro_instrs.get(target):
- target_macro.predicted = True
- if not target_instr and not target_macro:
- self.error(
- f"Unknown instruction {target!r} predicted in {instr.name!r}",
- instr.inst, # TODO: Use better location
- )
-
- def map_families(self) -> None:
- """Link instruction names back to their family, if they have one."""
- for family in self.families.values():
- for member in [family.name] + family.members:
- if member_instr := self.instrs.get(member):
- if (
- member_instr.family is not family
- and member_instr.family is not None
- ):
- self.error(
- f"Instruction {member} is a member of multiple families "
- f"({member_instr.family.name}, {family.name}).",
- family,
- )
- else:
- member_instr.family = family
- if member_mac := self.macro_instrs.get(member):
- assert member_mac.family is None, (member, member_mac.family.name)
- member_mac.family = family
- if not member_instr and not member_mac:
- self.error(
- f"Unknown instruction {member!r} referenced in family {family.name!r}",
- family,
- )
- # A sanctioned exception:
- # This opcode is a member of the family but it doesn't pass the checks.
- if mac := self.macro_instrs.get("BINARY_OP_INPLACE_ADD_UNICODE"):
- mac.family = self.families.get("BINARY_OP")
-
- def check_families(self) -> None:
- """Check each family:
-
- - Must have at least 2 members (including head)
- - Head and all members must be known instructions
- - Head and all members must have the same cache, input and output effects
- """
- for family in self.families.values():
- if family.name not in self.macro_instrs and family.name not in self.instrs:
- self.error(
- f"Family {family.name!r} has unknown instruction {family.name!r}",
- family,
- )
- members = [
- member
- for member in family.members
- if member in self.instrs or member in self.macro_instrs
- ]
- if members != family.members:
- unknown = set(family.members) - set(members)
- self.error(
- f"Family {family.name!r} has unknown members: {unknown}", family
- )
- expected_effects = self.effect_counts(family.name)
- for member in members:
- member_effects = self.effect_counts(member)
- if member_effects != expected_effects:
- self.error(
- f"Family {family.name!r} has inconsistent "
- f"(cache, input, output) effects:\n"
- f" {family.name} = {expected_effects}; "
- f"{member} = {member_effects}",
- family,
- )
-
- def effect_counts(self, name: str) -> tuple[int, int, int]:
- if mac := self.macro_instrs.get(name):
- cache = mac.cache_offset
- input, output = 0, 0
- for part in mac.parts:
- if isinstance(part, Component):
- # A component may pop what the previous component pushed,
- # so we offset the input/output counts by that.
- delta_i = len(part.instr.input_effects)
- delta_o = len(part.instr.output_effects)
- offset = min(delta_i, output)
- input += delta_i - offset
- output += delta_o - offset
- else:
- assert False, f"Unknown instruction {name!r}"
- return cache, input, output
-
- def analyze_macros_and_pseudos(self) -> None:
- """Analyze each macro and pseudo instruction."""
- self.macro_instrs = {}
- self.pseudo_instrs = {}
- for name, macro in self.macros.items():
- self.macro_instrs[name] = mac = self.analyze_macro(macro)
- self.check_macro_consistency(mac)
- for name, pseudo in self.pseudos.items():
- self.pseudo_instrs[name] = self.analyze_pseudo(pseudo)
-
- # TODO: Merge with similar code in stacking.py, write_components()
- def check_macro_consistency(self, mac: MacroInstruction) -> None:
- def get_var_names(instr: Instruction) -> dict[str, StackEffect]:
- vars: dict[str, StackEffect] = {}
- for eff in instr.input_effects + instr.output_effects:
- if eff.name == UNUSED:
- continue
- if eff.name in vars:
- if vars[eff.name] != eff:
- self.error(
- f"Instruction {instr.name!r} has "
- f"inconsistent type/cond/size for variable "
- f"{eff.name!r}: {vars[eff.name]} vs {eff}",
- instr.inst,
- )
- else:
- vars[eff.name] = eff
- return vars
-
- all_vars: dict[str, StackEffect] = {}
- # print("Checking", mac.name)
- prevop: Instruction | None = None
- for part in mac.parts:
- if not isinstance(part, Component):
- continue
- vars = get_var_names(part.instr)
- # print(" //", part.instr.name, "//", vars)
- for name, eff in vars.items():
- if name in all_vars:
- if all_vars[name] != eff:
- self.error(
- f"Macro {mac.name!r} has "
- f"inconsistent type/cond/size for variable "
- f"{name!r}: "
- f"{all_vars[name]} vs {eff} in {part.instr.name!r}",
- mac.macro,
- )
- else:
- all_vars[name] = eff
- if prevop is not None:
- pushes = list(prevop.output_effects)
- pops = list(reversed(part.instr.input_effects))
- copies: list[tuple[StackEffect, StackEffect]] = []
- while pushes and pops and pushes[-1] == pops[0]:
- src, dst = pushes.pop(), pops.pop(0)
- if src.name == dst.name or dst.name == UNUSED:
- continue
- copies.append((src, dst))
- reads = set(copy[0].name for copy in copies)
- writes = set(copy[1].name for copy in copies)
- if reads & writes:
- self.error(
- f"Macro {mac.name!r} has conflicting copies "
- f"(source of one copy is destination of another): "
- f"{reads & writes}",
- mac.macro,
- )
- prevop = part.instr
-
- def analyze_macro(self, macro: parsing.Macro) -> MacroInstruction:
- components = self.check_macro_components(macro)
- parts: MacroParts = []
- flags = InstructionFlags.newEmpty()
- offset = 0
- for component in components:
- match component:
- case parsing.CacheEffect() as ceffect:
- parts.append(ceffect)
- offset += ceffect.size
- case Instruction() as instr:
- part, offset = self.analyze_instruction(instr, offset)
- parts.append(part)
- if instr.name != "_SAVE_RETURN_OFFSET":
- # _SAVE_RETURN_OFFSET's oparg does not transfer
- flags.add(instr.instr_flags)
- case _:
- assert_never(component)
- format = "IB" if flags.HAS_ARG_FLAG else "IX"
- if offset:
- format += "C" + "0" * (offset - 1)
- return MacroInstruction(macro.name, format, flags, macro, parts, offset)
-
- def analyze_pseudo(self, pseudo: parsing.Pseudo) -> PseudoInstruction:
- targets: list[Instruction | MacroInstruction] = []
- for target_name in pseudo.targets:
- if target_name in self.instrs:
- targets.append(self.instrs[target_name])
- else:
- targets.append(self.macro_instrs[target_name])
- assert targets
- ignored_flags = {"HAS_EVAL_BREAK_FLAG", "HAS_DEOPT_FLAG", "HAS_ERROR_FLAG",
- "HAS_ESCAPES_FLAG"}
- assert len({t.instr_flags.bitmap(ignore=ignored_flags) for t in targets}) == 1
-
- flags = InstructionFlags(**{f"{f}_FLAG" : True for f in pseudo.flags})
- for t in targets:
- flags.add(t.instr_flags)
- return PseudoInstruction(pseudo.name, targets, flags)
-
- def analyze_instruction(
- self, instr: Instruction, offset: int
- ) -> tuple[Component, int]:
- active_effects: list[ActiveCacheEffect] = []
- for ceffect in instr.cache_effects:
- if ceffect.name != UNUSED:
- active_effects.append(ActiveCacheEffect(ceffect, offset))
- offset += ceffect.size
- return (
- Component(instr, active_effects),
- offset,
- )
-
- def check_macro_components(
- self, macro: parsing.Macro
- ) -> list[InstructionOrCacheEffect]:
- components: list[InstructionOrCacheEffect] = []
- for uop in macro.uops:
- match uop:
- case parsing.OpName(name):
- if name not in self.instrs:
- self.error(f"Unknown instruction {name!r}", macro)
- else:
- components.append(self.instrs[name])
- case parsing.CacheEffect():
- components.append(uop)
- case _:
- assert_never(uop)
- return components
-
- def report_non_viable_uops(self, jsonfile: str) -> None:
- print("The following ops are not viable uops:")
- skips = {
- "CACHE",
- "RESERVED",
- "INTERPRETER_EXIT",
- "JUMP_BACKWARD",
- "LOAD_FAST_LOAD_FAST",
- "LOAD_CONST_LOAD_FAST",
- "STORE_FAST_STORE_FAST",
- "POP_JUMP_IF_TRUE",
- "POP_JUMP_IF_FALSE",
- "_ITER_JUMP_LIST",
- "_ITER_JUMP_TUPLE",
- "_ITER_JUMP_RANGE",
- }
- try:
- # Secret feature: if bmraw.json exists, print and sort by execution count
- counts = load_execution_counts(jsonfile)
- except FileNotFoundError as err:
- counts = {}
- non_viable = [
- instr
- for instr in self.instrs.values()
- if instr.name not in skips
- and not instr.name.startswith("INSTRUMENTED_")
- and not instr.is_viable_uop()
- ]
- non_viable.sort(key=lambda instr: (-counts.get(instr.name, 0), instr.name))
- for instr in non_viable:
- if instr.name in counts:
- scount = f"{counts[instr.name]:,}"
- else:
- scount = ""
- print(f" {scount:>15} {instr.name:<35}", end="")
- if instr.name in self.families:
- print(" (unspecialized)", end="")
- elif instr.family is not None:
- print(f" (specialization of {instr.family.name})", end="")
- print()
-
-
-def load_execution_counts(jsonfile: str) -> dict[str, int]:
- import json
-
- with open(jsonfile) as f:
- jsondata = json.load(f)
-
- # Look for keys like "opcode[LOAD_FAST].execution_count"
- prefix = "opcode["
- suffix = "].execution_count"
- res: dict[str, int] = {}
- for key, value in jsondata.items():
- if key.startswith(prefix) and key.endswith(suffix):
- res[key[len(prefix) : -len(suffix)]] = value
- return res
diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py
index d7aca50..82ef888 100644
--- a/Tools/cases_generator/analyzer.py
+++ b/Tools/cases_generator/analyzer.py
@@ -302,7 +302,81 @@ def is_infallible(op: parser.InstDef) -> bool:
)
-from flags import makes_escaping_api_call
+NON_ESCAPING_FUNCTIONS = (
+ "Py_INCREF",
+ "_PyDictOrValues_IsValues",
+ "_PyObject_DictOrValuesPointer",
+ "_PyDictOrValues_GetValues",
+ "_PyObject_MakeInstanceAttributesFromDict",
+ "Py_DECREF",
+ "_Py_DECREF_SPECIALIZED",
+ "DECREF_INPUTS_AND_REUSE_FLOAT",
+ "PyUnicode_Append",
+ "_PyLong_IsZero",
+ "Py_SIZE",
+ "Py_TYPE",
+ "PyList_GET_ITEM",
+ "PyTuple_GET_ITEM",
+ "PyList_GET_SIZE",
+ "PyTuple_GET_SIZE",
+ "Py_ARRAY_LENGTH",
+ "Py_Unicode_GET_LENGTH",
+ "PyUnicode_READ_CHAR",
+ "_Py_SINGLETON",
+ "PyUnicode_GET_LENGTH",
+ "_PyLong_IsCompact",
+ "_PyLong_IsNonNegativeCompact",
+ "_PyLong_CompactValue",
+ "_Py_NewRef",
+ "_Py_IsImmortal",
+ "_Py_STR",
+ "_PyLong_Add",
+ "_PyLong_Multiply",
+ "_PyLong_Subtract",
+ "Py_NewRef",
+ "_PyList_ITEMS",
+ "_PyTuple_ITEMS",
+ "_PyList_AppendTakeRef",
+ "_Py_atomic_load_uintptr_relaxed",
+ "_PyFrame_GetCode",
+ "_PyThreadState_HasStackSpace",
+)
+
+ESCAPING_FUNCTIONS = (
+ "import_name",
+ "import_from",
+)
+
+
+def makes_escaping_api_call(instr: parser.InstDef) -> bool:
+ if "CALL_INTRINSIC" in instr.name:
+ return True
+ tkns = iter(instr.tokens)
+ for tkn in tkns:
+ if tkn.kind != lexer.IDENTIFIER:
+ continue
+ try:
+ next_tkn = next(tkns)
+ except StopIteration:
+ return False
+ if next_tkn.kind != lexer.LPAREN:
+ continue
+ if tkn.text in ESCAPING_FUNCTIONS:
+ return True
+ if not tkn.text.startswith("Py") and not tkn.text.startswith("_Py"):
+ continue
+ if tkn.text.endswith("Check"):
+ continue
+ if tkn.text.startswith("Py_Is"):
+ continue
+ if tkn.text.endswith("CheckExact"):
+ continue
+ if tkn.text in NON_ESCAPING_FUNCTIONS:
+ continue
+ return True
+ return False
+
+
EXITS = {
"DISPATCH",
diff --git a/Tools/cases_generator/flags.py b/Tools/cases_generator/flags.py
deleted file mode 100644
index bf76112..0000000
--- a/Tools/cases_generator/flags.py
+++ /dev/null
@@ -1,191 +0,0 @@
-import dataclasses
-
-from formatting import Formatter
-import lexer as lx
-import parsing
-from typing import AbstractSet
-
-NON_ESCAPING_FUNCTIONS = (
- "Py_INCREF",
- "_PyDictOrValues_IsValues",
- "_PyObject_DictOrValuesPointer",
- "_PyDictOrValues_GetValues",
- "_PyObject_MakeInstanceAttributesFromDict",
- "Py_DECREF",
- "_Py_DECREF_SPECIALIZED",
- "DECREF_INPUTS_AND_REUSE_FLOAT",
- "PyUnicode_Append",
- "_PyLong_IsZero",
- "Py_SIZE",
- "Py_TYPE",
- "PyList_GET_ITEM",
- "PyTuple_GET_ITEM",
- "PyList_GET_SIZE",
- "PyTuple_GET_SIZE",
- "Py_ARRAY_LENGTH",
- "Py_Unicode_GET_LENGTH",
- "PyUnicode_READ_CHAR",
- "_Py_SINGLETON",
- "PyUnicode_GET_LENGTH",
- "_PyLong_IsCompact",
- "_PyLong_IsNonNegativeCompact",
- "_PyLong_CompactValue",
- "_Py_NewRef",
- "_Py_IsImmortal",
- "_Py_STR",
- "_PyLong_Add",
- "_PyLong_Multiply",
- "_PyLong_Subtract",
- "Py_NewRef",
- "_PyList_ITEMS",
- "_PyTuple_ITEMS",
- "_PyList_AppendTakeRef",
- "_Py_atomic_load_uintptr_relaxed",
- "_PyFrame_GetCode",
- "_PyThreadState_HasStackSpace",
-)
-
-ESCAPING_FUNCTIONS = (
- "import_name",
- "import_from",
-)
-
-
-def makes_escaping_api_call(instr: parsing.InstDef) -> bool:
- if "CALL_INTRINSIC" in instr.name:
- return True
- tkns = iter(instr.tokens)
- for tkn in tkns:
- if tkn.kind != lx.IDENTIFIER:
- continue
- try:
- next_tkn = next(tkns)
- except StopIteration:
- return False
- if next_tkn.kind != lx.LPAREN:
- continue
- if tkn.text in ESCAPING_FUNCTIONS:
- return True
- if not tkn.text.startswith("Py") and not tkn.text.startswith("_Py"):
- continue
- if tkn.text.endswith("Check"):
- continue
- if tkn.text.startswith("Py_Is"):
- continue
- if tkn.text.endswith("CheckExact"):
- continue
- if tkn.text in NON_ESCAPING_FUNCTIONS:
- continue
- return True
- return False
-
-
-@dataclasses.dataclass
-class InstructionFlags:
- """Construct and manipulate instruction flags"""
-
- HAS_ARG_FLAG: bool = False
- HAS_CONST_FLAG: bool = False
- HAS_NAME_FLAG: bool = False
- HAS_JUMP_FLAG: bool = False
- HAS_FREE_FLAG: bool = False
- HAS_LOCAL_FLAG: bool = False
- HAS_EVAL_BREAK_FLAG: bool = False
- HAS_DEOPT_FLAG: bool = False
- HAS_ERROR_FLAG: bool = False
- HAS_ESCAPES_FLAG: bool = False
-
- def __post_init__(self) -> None:
- self.bitmask = {name: (1 << i) for i, name in enumerate(self.names())}
-
- @staticmethod
- def fromInstruction(instr: parsing.InstDef) -> "InstructionFlags":
- has_free = (
- variable_used(instr, "PyCell_New")
- or variable_used(instr, "PyCell_GET")
- or variable_used(instr, "PyCell_SET")
- )
-
- return InstructionFlags(
- HAS_ARG_FLAG=variable_used(instr, "oparg"),
- HAS_CONST_FLAG=variable_used(instr, "FRAME_CO_CONSTS"),
- HAS_NAME_FLAG=variable_used(instr, "FRAME_CO_NAMES"),
- HAS_JUMP_FLAG=variable_used(instr, "JUMPBY"),
- HAS_FREE_FLAG=has_free,
- HAS_LOCAL_FLAG=(
- variable_used(instr, "GETLOCAL") or variable_used(instr, "SETLOCAL")
- )
- and not has_free,
- HAS_EVAL_BREAK_FLAG=variable_used(instr, "CHECK_EVAL_BREAKER"),
- HAS_DEOPT_FLAG=variable_used(instr, "DEOPT_IF"),
- HAS_ERROR_FLAG=(
- variable_used(instr, "ERROR_IF")
- or variable_used(instr, "error")
- or variable_used(instr, "pop_1_error")
- or variable_used(instr, "exception_unwind")
- or variable_used(instr, "resume_with_error")
- ),
- HAS_ESCAPES_FLAG=makes_escaping_api_call(instr),
- )
-
- @staticmethod
- def newEmpty() -> "InstructionFlags":
- return InstructionFlags()
-
- def add(self, other: "InstructionFlags") -> None:
- for name, value in dataclasses.asdict(other).items():
- if value:
- setattr(self, name, value)
-
- def names(self, value: bool | None = None) -> list[str]:
- if value is None:
- return list(dataclasses.asdict(self).keys())
- return [n for n, v in dataclasses.asdict(self).items() if v == value]
-
- def bitmap(self, ignore: AbstractSet[str] = frozenset()) -> int:
- flags = 0
- assert all(hasattr(self, name) for name in ignore)
- for name in self.names():
- if getattr(self, name) and name not in ignore:
- flags |= self.bitmask[name]
- return flags
-
- @classmethod
- def emit_macros(cls, out: Formatter) -> None:
- flags = cls.newEmpty()
- for name, value in flags.bitmask.items():
- out.emit(f"#define {name} ({value})")
-
- for name, value in flags.bitmask.items():
- out.emit(
- f"#define OPCODE_{name[:-len('_FLAG')]}(OP) "
- f"(_PyOpcode_opcode_metadata[OP].flags & ({name}))"
- )
-
-
-def variable_used(node: parsing.Node, name: str) -> bool:
- """Determine whether a variable with a given name is used in a node."""
- return any(
- token.kind == "IDENTIFIER" and token.text == name for token in node.tokens
- )
-
-
-def variable_used_unspecialized(node: parsing.Node, name: str) -> bool:
- """Like variable_used(), but skips #if ENABLE_SPECIALIZATION blocks."""
- tokens: list[lx.Token] = []
- skipping = False
- for i, token in enumerate(node.tokens):
- if token.kind == "CMACRO":
- text = "".join(token.text.split())
- # TODO: Handle nested #if
- if text == "#if":
- if i + 1 < len(node.tokens) and node.tokens[i + 1].text in (
- "ENABLE_SPECIALIZATION",
- "TIER_ONE",
- ):
- skipping = True
- elif text in ("#else", "#endif"):
- skipping = False
- if not skipping:
- tokens.append(token)
- return any(token.kind == "IDENTIFIER" and token.text == name for token in tokens)
diff --git a/Tools/cases_generator/formatting.py b/Tools/cases_generator/formatting.py
deleted file mode 100644
index 4fd9172..0000000
--- a/Tools/cases_generator/formatting.py
+++ /dev/null
@@ -1,206 +0,0 @@
-import contextlib
-import re
-import typing
-from collections.abc import Iterator
-
-from parsing import StackEffect, Family
-
-UNUSED = "unused"
-
-
-class Formatter:
- """Wraps an output stream with the ability to indent etc."""
-
- stream: typing.TextIO
- prefix: str
- emit_line_directives: bool = False
- lineno: int # Next line number, 1-based
- filename: str # Slightly improved stream.filename
- nominal_lineno: int
- nominal_filename: str
-
- def __init__(
- self,
- stream: typing.TextIO,
- indent: int,
- emit_line_directives: bool = False,
- comment: str = "//",
- ) -> None:
- self.stream = stream
- self.prefix = " " * indent
- self.emit_line_directives = emit_line_directives
- self.comment = comment
- self.lineno = 1
- self.filename = prettify_filename(self.stream.name)
- self.nominal_lineno = 1
- self.nominal_filename = self.filename
-
- def write_raw(self, s: str) -> None:
- self.stream.write(s)
- newlines = s.count("\n")
- self.lineno += newlines
- self.nominal_lineno += newlines
-
- def emit(self, arg: str) -> None:
- if arg:
- self.write_raw(f"{self.prefix}{arg}\n")
- else:
- self.write_raw("\n")
-
- def set_lineno(self, lineno: int, filename: str) -> None:
- if self.emit_line_directives:
- if lineno != self.nominal_lineno or filename != self.nominal_filename:
- self.emit(f'#line {lineno} "{filename}"')
- self.nominal_lineno = lineno
- self.nominal_filename = filename
-
- def reset_lineno(self) -> None:
- if self.lineno != self.nominal_lineno or self.filename != self.nominal_filename:
- self.set_lineno(self.lineno + 1, self.filename)
-
- @contextlib.contextmanager
- def indent(self) -> Iterator[None]:
- self.prefix += " "
- yield
- self.prefix = self.prefix[:-4]
-
- @contextlib.contextmanager
- def block(self, head: str, tail: str = "") -> Iterator[None]:
- if head:
- self.emit(head + " {")
- else:
- self.emit("{")
- with self.indent():
- yield
- self.emit("}" + tail)
-
- def stack_adjust(
- self,
- input_effects: list[StackEffect],
- output_effects: list[StackEffect],
- ) -> None:
- shrink, isym = list_effect_size(input_effects)
- grow, osym = list_effect_size(output_effects)
- diff = grow - shrink
- if isym and isym != osym:
- self.emit(f"STACK_SHRINK({isym});")
- if diff < 0:
- self.emit(f"STACK_SHRINK({-diff});")
- if diff > 0:
- self.emit(f"STACK_GROW({diff});")
- if osym and osym != isym:
- self.emit(f"STACK_GROW({osym});")
-
- def declare(self, dst: StackEffect, src: StackEffect | None) -> None:
- if dst.name == UNUSED or dst.cond == "0":
- return
- typ = f"{dst.type}" if dst.type else "PyObject *"
- if src:
- cast = self.cast(dst, src)
- initexpr = f"{cast}{src.name}"
- if src.cond and src.cond != "1":
- initexpr = f"{parenthesize_cond(src.cond)} ? {initexpr} : NULL"
- init = f" = {initexpr}"
- elif dst.cond and dst.cond != "1":
- init = " = NULL"
- else:
- init = ""
- sepa = "" if typ.endswith("*") else " "
- self.emit(f"{typ}{sepa}{dst.name}{init};")
-
- def assign(self, dst: StackEffect, src: StackEffect) -> None:
- if src.name == UNUSED or dst.name == UNUSED:
- return
- cast = self.cast(dst, src)
- if re.match(r"^REG\(oparg(\d+)\)$", dst.name):
- self.emit(f"Py_XSETREF({dst.name}, {cast}{src.name});")
- else:
- stmt = f"{dst.name} = {cast}{src.name};"
- if src.cond and src.cond != "1":
- if src.cond == "0":
- # It will not be executed
- return
- stmt = f"if ({src.cond}) {{ {stmt} }}"
- self.emit(stmt)
-
- def cast(self, dst: StackEffect, src: StackEffect) -> str:
- return f"({dst.type or 'PyObject *'})" if src.type != dst.type else ""
-
- def static_assert_family_size(
- self, name: str, family: Family | None, cache_offset: int
- ) -> None:
- """Emit a static_assert for the size of a family, if known.
-
- This will fail at compile time if the cache size computed from
- the instruction definition does not match the size of the struct
- used by specialize.c.
- """
- if family and name == family.name:
- cache_size = family.size
- if cache_size:
- self.emit(
- f"static_assert({cache_size} == {cache_offset}, "
- f'"incorrect cache size");'
- )
-
-
-def prettify_filename(filename: str) -> str:
- # Make filename more user-friendly and less platform-specific,
- # it is only used for error reporting at this point.
- filename = filename.replace("\\", "/")
- if filename.startswith("./"):
- filename = filename[2:]
- if filename.endswith(".new"):
- filename = filename[:-4]
- return filename
-
-
-def list_effect_size(effects: list[StackEffect]) -> tuple[int, str]:
- numeric = 0
- symbolic: list[str] = []
- for effect in effects:
- diff, sym = effect_size(effect)
- numeric += diff
- if sym:
- symbolic.append(maybe_parenthesize(sym))
- return numeric, " + ".join(symbolic)
-
-
-def effect_size(effect: StackEffect) -> tuple[int, str]:
- """Return the 'size' impact of a stack effect.
-
- Returns a tuple (numeric, symbolic) where:
-
- - numeric is an int giving the statically analyzable size of the effect
- - symbolic is a string representing a variable effect (e.g. 'oparg*2')
-
- At most one of these will be non-zero / non-empty.
- """
- if effect.size:
- assert not effect.cond, "Array effects cannot have a condition"
- return 0, effect.size
- elif effect.cond:
- if effect.cond in ("0", "1"):
- return int(effect.cond), ""
- return 0, f"{maybe_parenthesize(effect.cond)} ? 1 : 0"
- else:
- return 1, ""
-
-
-def maybe_parenthesize(sym: str) -> str:
- """Add parentheses around a string if it contains an operator.
-
- An exception is made for '*' which is common and harmless
- in the context where the symbolic size is used.
- """
- if re.match(r"^[\s\w*]+$", sym):
- return sym
- else:
- return f"({sym})"
-
-
-def parenthesize_cond(cond: str) -> str:
- """Parenthesize a condition, but only if it contains ?: itself."""
- if "?" in cond:
- cond = f"({cond})"
- return cond
diff --git a/Tools/cases_generator/generate_cases.py b/Tools/cases_generator/generate_cases.py
deleted file mode 100644
index 73b5fc2..0000000
--- a/Tools/cases_generator/generate_cases.py
+++ /dev/null
@@ -1,848 +0,0 @@
-"""Generate the main interpreter switch.
-Reads the instruction definitions from bytecodes.c.
-Writes the cases to generated_cases.c.h, which is #included in ceval.c.
-"""
-
-import argparse
-import contextlib
-import itertools
-import os
-import posixpath
-import sys
-import textwrap
-import typing
-from collections.abc import Iterator
-
-import stacking # Early import to avoid circular import
-from _typing_backports import assert_never
-from analysis import Analyzer
-from formatting import Formatter, list_effect_size
-from flags import InstructionFlags, variable_used
-from instructions import (
- AnyInstruction,
- AbstractInstruction,
- Component,
- Instruction,
- MacroInstruction,
- MacroParts,
- PseudoInstruction,
- TIER_ONE,
- TIER_TWO,
-)
-import parsing
-from parsing import StackEffect
-
-
-HERE = os.path.dirname(__file__)
-ROOT = os.path.join(HERE, "../..")
-THIS = os.path.relpath(__file__, ROOT).replace(os.path.sep, posixpath.sep)
-
-DEFAULT_INPUT = os.path.relpath(os.path.join(ROOT, "Python/bytecodes.c"))
-DEFAULT_OUTPUT = os.path.relpath(os.path.join(ROOT, "Python/generated_cases.c.h"))
-DEFAULT_OPCODE_IDS_H_OUTPUT = os.path.relpath(
- os.path.join(ROOT, "Include/opcode_ids.h")
-)
-DEFAULT_OPCODE_TARGETS_H_OUTPUT = os.path.relpath(
- os.path.join(ROOT, "Python/opcode_targets.h")
-)
-DEFAULT_METADATA_OUTPUT = os.path.relpath(
- os.path.join(ROOT, "Include/internal/pycore_opcode_metadata.h")
-)
-DEFAULT_PYMETADATA_OUTPUT = os.path.relpath(
- os.path.join(ROOT, "Lib/_opcode_metadata.py")
-)
-DEFAULT_EXECUTOR_OUTPUT = os.path.relpath(
- os.path.join(ROOT, "Python/executor_cases.c.h")
-)
-DEFAULT_ABSTRACT_INTERPRETER_OUTPUT = os.path.relpath(
- os.path.join(ROOT, "Python/abstract_interp_cases.c.h")
-)
-
-# Constants used instead of size for macro expansions.
-# Note: 1, 2, 4 must match actual cache entry sizes.
-OPARG_SIZES = {
- "OPARG_FULL": 0,
- "OPARG_CACHE_1": 1,
- "OPARG_CACHE_2": 2,
- "OPARG_CACHE_4": 4,
- "OPARG_TOP": 5,
- "OPARG_BOTTOM": 6,
- "OPARG_SAVE_RETURN_OFFSET": 7,
-}
-
-INSTR_FMT_PREFIX = "INSTR_FMT_"
-
-# TODO: generate all these after updating the DSL
-SPECIALLY_HANDLED_ABSTRACT_INSTR = {
- "LOAD_FAST",
- "LOAD_FAST_CHECK",
- "LOAD_FAST_AND_CLEAR",
- "LOAD_CONST",
- "STORE_FAST",
- "STORE_FAST_MAYBE_NULL",
- "COPY",
- # Arithmetic
- "_BINARY_OP_MULTIPLY_INT",
- "_BINARY_OP_ADD_INT",
- "_BINARY_OP_SUBTRACT_INT",
-}
-
-arg_parser = argparse.ArgumentParser(
- description="Generate the code for the interpreter switch.",
- formatter_class=argparse.ArgumentDefaultsHelpFormatter,
-)
-
-arg_parser.add_argument(
- "-v",
- "--viable",
- help="Print list of non-viable uops and exit",
- action="store_true",
-)
-arg_parser.add_argument(
- "-o", "--output", type=str, help="Generated code", default=DEFAULT_OUTPUT
-)
-arg_parser.add_argument(
- "-t",
- "--opcode_targets_h",
- type=str,
- help="File with opcode targets for computed gotos",
- default=DEFAULT_OPCODE_TARGETS_H_OUTPUT,
-)
-arg_parser.add_argument(
- "-m",
- "--metadata",
- type=str,
- help="Generated C metadata",
- default=DEFAULT_METADATA_OUTPUT,
-)
-arg_parser.add_argument(
- "-p",
- "--pymetadata",
- type=str,
- help="Generated Python metadata",
- default=DEFAULT_PYMETADATA_OUTPUT,
-)
-arg_parser.add_argument(
- "-l", "--emit-line-directives", help="Emit #line directives", action="store_true"
-)
-arg_parser.add_argument(
- "input", nargs=argparse.REMAINDER, help="Instruction definition file(s)"
-)
-arg_parser.add_argument(
- "-a",
- "--abstract-interpreter-cases",
- type=str,
- help="Write abstract interpreter cases to this file",
- default=DEFAULT_ABSTRACT_INTERPRETER_OUTPUT,
-)
-
-
-class Generator(Analyzer):
- def get_stack_effect_info(
- self, thing: parsing.InstDef | parsing.Macro | parsing.Pseudo
- ) -> tuple[AnyInstruction | None, str, str]:
- def effect_str(effects: list[StackEffect]) -> str:
- n_effect, sym_effect = list_effect_size(effects)
- if sym_effect:
- return f"{sym_effect} + {n_effect}" if n_effect else sym_effect
- return str(n_effect)
-
- instr: AnyInstruction | None
- popped: str | None = None
- pushed: str | None = None
- match thing:
- case parsing.InstDef():
- instr = self.instrs[thing.name]
- popped = effect_str(instr.input_effects)
- pushed = effect_str(instr.output_effects)
- case parsing.Macro():
- instr = self.macro_instrs[thing.name]
- popped, pushed = stacking.get_stack_effect_info_for_macro(instr)
- case parsing.Pseudo():
- instr = self.pseudo_instrs[thing.name]
- # Calculate stack effect, and check that it's the same
- # for all targets.
- for target in self.pseudos[thing.name].targets:
- target_instr = self.instrs.get(target)
- if target_instr is None:
- macro_instr = self.macro_instrs[target]
- popped, pushed = stacking.get_stack_effect_info_for_macro(macro_instr)
- else:
- target_popped = effect_str(target_instr.input_effects)
- target_pushed = effect_str(target_instr.output_effects)
- if popped is None:
- popped, pushed = target_popped, target_pushed
- else:
- assert popped == target_popped
- assert pushed == target_pushed
- case _:
- assert_never(thing)
- assert popped is not None and pushed is not None
- return instr, popped, pushed
-
- @contextlib.contextmanager
- def metadata_item(self, signature: str, open: str, close: str) -> Iterator[None]:
- self.out.emit("")
- self.out.emit(f"extern {signature};")
- self.out.emit("#ifdef NEED_OPCODE_METADATA")
- with self.out.block(f"{signature} {open}", close):
- yield
- self.out.emit("#endif // NEED_OPCODE_METADATA")
-
- def write_stack_effect_functions(self) -> None:
- popped_data: list[tuple[AnyInstruction, str]] = []
- pushed_data: list[tuple[AnyInstruction, str]] = []
- for thing in self.everything:
- if isinstance(thing, parsing.Macro) and thing.name in self.instrs:
- continue
- instr, popped, pushed = self.get_stack_effect_info(thing)
- if instr is not None:
- popped_data.append((instr, popped))
- pushed_data.append((instr, pushed))
-
- def write_function(
- direction: str, data: list[tuple[AnyInstruction, str]]
- ) -> None:
- with self.metadata_item(
- f"int _PyOpcode_num_{direction}(int opcode, int oparg, bool jump)",
- "",
- "",
- ):
- with self.out.block("switch(opcode)"):
- effects = [(instr.name, effect) for instr, effect in data]
- for name, effect in sorted(effects):
- self.out.emit(f"case {name}:")
- self.out.emit(f" return {effect};")
- self.out.emit("default:")
- self.out.emit(" return -1;")
-
- write_function("popped", popped_data)
- write_function("pushed", pushed_data)
- self.out.emit("")
-
- def from_source_files(self) -> str:
- filenames = []
- for filename in self.input_filenames:
- try:
- filename = os.path.relpath(filename, ROOT)
- except ValueError:
- # May happen on Windows if root and temp on different volumes
- pass
- filenames.append(filename.replace(os.path.sep, posixpath.sep))
- paths = f"\n{self.out.comment} ".join(filenames)
- return f"{self.out.comment} from:\n{self.out.comment} {paths}\n"
-
- def write_provenance_header(self) -> None:
- self.out.write_raw(f"{self.out.comment} This file is generated by {THIS}\n")
- self.out.write_raw(self.from_source_files())
- self.out.write_raw(f"{self.out.comment} Do not edit!\n")
-
- def assign_opcode_ids(self) -> None:
- """Assign IDs to opcodes"""
-
- ops: list[tuple[bool, str]] = [] # (has_arg, name) for each opcode
- instrumented_ops: list[str] = []
-
- specialized_ops: set[str] = set()
- for name, family in self.families.items():
- specialized_ops.update(family.members)
-
- for instr in self.macro_instrs.values():
- name = instr.name
- if name in specialized_ops:
- continue
- if name.startswith("INSTRUMENTED_"):
- instrumented_ops.append(name)
- else:
- ops.append((instr.instr_flags.HAS_ARG_FLAG, name))
-
- # Special case: this instruction is implemented in ceval.c
- # rather than bytecodes.c, so we need to add it explicitly
- # here (at least until we add something to bytecodes.c to
- # declare external instructions).
- instrumented_ops.append("INSTRUMENTED_LINE")
-
- # assert lists are unique
- assert len(set(ops)) == len(ops)
- assert len(set(instrumented_ops)) == len(instrumented_ops)
-
- opname: list[str | None] = [None] * 512
- opmap: dict[str, int] = {}
- markers: dict[str, int] = {}
-
- def map_op(op: int, name: str) -> None:
- assert op < len(opname)
- assert opname[op] is None, (op, name)
- assert name not in opmap
- opname[op] = name
- opmap[name] = op
-
- # 0 is reserved for cache entries. This helps debugging.
- map_op(0, "CACHE")
-
- # 17 is reserved as it is the initial value for the specializing counter.
- # This helps catch cases where we attempt to execute a cache.
- map_op(17, "RESERVED")
-
- # 149 is RESUME - it is hard coded as such in Tools/build/deepfreeze.py
- map_op(149, "RESUME")
-
- # Specialized ops appear in their own section
- # Instrumented opcodes are at the end of the valid range
- min_internal = 150
- min_instrumented = 254 - (len(instrumented_ops) - 1)
- assert min_internal + len(specialized_ops) < min_instrumented
-
- next_opcode = 1
- for has_arg, name in sorted(ops):
- if name in opmap:
- continue # an anchored name, like CACHE
- map_op(next_opcode, name)
- if has_arg and "HAVE_ARGUMENT" not in markers:
- markers["HAVE_ARGUMENT"] = next_opcode
-
- while opname[next_opcode] is not None:
- next_opcode += 1
-
- assert next_opcode < min_internal, next_opcode
-
- for i, op in enumerate(sorted(specialized_ops)):
- map_op(min_internal + i, op)
-
- markers["MIN_INSTRUMENTED_OPCODE"] = min_instrumented
- for i, op in enumerate(instrumented_ops):
- map_op(min_instrumented + i, op)
-
- # Pseudo opcodes are after the valid range
- for i, op in enumerate(sorted(self.pseudos)):
- map_op(256 + i, op)
-
- assert 255 not in opmap.values() # 255 is reserved
- self.opmap = opmap
- self.markers = markers
-
- def write_opcode_targets(self, opcode_targets_filename: str) -> None:
- """Write header file that defines the jump target table"""
-
- with open(opcode_targets_filename, "w") as f:
- # Create formatter
- self.out = Formatter(f, 0)
-
- with self.out.block("static void *opcode_targets[256] =", ";"):
- targets = ["_unknown_opcode"] * 256
- for name, op in self.opmap.items():
- if op < 256:
- targets[op] = f"TARGET_{name}"
- f.write(",\n".join([f" &&{s}" for s in targets]))
-
- def write_metadata(self, metadata_filename: str, pymetadata_filename: str) -> None:
- """Write instruction metadata to output file."""
-
- # Compute the set of all instruction formats.
- all_formats: set[str] = set()
- for thing in self.everything:
- format: str | None = None
- match thing:
- case parsing.InstDef():
- format = self.instrs[thing.name].instr_fmt
- case parsing.Macro():
- format = self.macro_instrs[thing.name].instr_fmt
- case parsing.Pseudo():
- # Pseudo instructions exist only in the compiler,
- # so do not have a format
- continue
- case _:
- assert_never(thing)
- assert format is not None
- all_formats.add(format)
-
- # Turn it into a sorted list of enum values.
- format_enums = [INSTR_FMT_PREFIX + format for format in sorted(all_formats)]
-
- with open(metadata_filename, "w") as f:
- # Create formatter
- self.out = Formatter(f, 0)
-
- self.write_provenance_header()
-
- self.out.emit("")
- self.out.emit("#ifndef Py_BUILD_CORE")
- self.out.emit('# error "this header requires Py_BUILD_CORE define"')
- self.out.emit("#endif")
- self.out.emit("")
- self.out.emit("#include <stdbool.h> // bool")
-
- self.write_pseudo_instrs()
-
- self.out.emit("")
- self.out.emit('#include "pycore_uop_ids.h"')
-
- self.write_stack_effect_functions()
-
- # Write the enum definition for instruction formats.
- with self.out.block("enum InstructionFormat", ";"):
- for enum in format_enums:
- self.out.emit(enum + ",")
-
- self.out.emit("")
- self.out.emit(
- "#define IS_VALID_OPCODE(OP) \\\n"
- " (((OP) >= 0) && ((OP) < OPCODE_METADATA_SIZE) && \\\n"
- " (_PyOpcode_opcode_metadata[(OP)].valid_entry))"
- )
-
- self.out.emit("")
- InstructionFlags.emit_macros(self.out)
-
- self.out.emit("")
- with self.out.block("struct opcode_metadata", ";"):
- self.out.emit("bool valid_entry;")
- self.out.emit("enum InstructionFormat instr_format;")
- self.out.emit("int flags;")
- self.out.emit("")
-
- with self.out.block("struct opcode_macro_expansion", ";"):
- self.out.emit("int nuops;")
- self.out.emit(
- "struct { int16_t uop; int8_t size; int8_t offset; } uops[12];"
- )
- self.out.emit("")
-
- for key, value in OPARG_SIZES.items():
- self.out.emit(f"#define {key} {value}")
- self.out.emit("")
-
- self.out.emit(
- "#define OPCODE_METADATA_FLAGS(OP) "
- "(_PyOpcode_opcode_metadata[(OP)].flags & (HAS_ARG_FLAG | HAS_JUMP_FLAG))"
- )
- self.out.emit("#define SAME_OPCODE_METADATA(OP1, OP2) \\")
- self.out.emit(
- " (OPCODE_METADATA_FLAGS(OP1) == OPCODE_METADATA_FLAGS(OP2))"
- )
- self.out.emit("")
-
- # Write metadata array declaration
- self.out.emit("#define OPCODE_METADATA_SIZE 512")
- self.out.emit("#define OPCODE_UOP_NAME_SIZE 512")
- self.out.emit("#define OPCODE_MACRO_EXPANSION_SIZE 256")
-
- with self.metadata_item(
- "const struct opcode_metadata "
- "_PyOpcode_opcode_metadata[OPCODE_METADATA_SIZE]",
- "=",
- ";",
- ):
- # Write metadata for each instruction
- sorted_things = sorted(self.everything, key = lambda t:t.name)
- for thing in sorted_things:
- match thing:
- case parsing.InstDef():
- self.write_metadata_for_inst(self.instrs[thing.name])
- case parsing.Macro():
- if thing.name not in self.instrs:
- self.write_metadata_for_macro(
- self.macro_instrs[thing.name]
- )
- case parsing.Pseudo():
- self.write_metadata_for_pseudo(
- self.pseudo_instrs[thing.name]
- )
- case _:
- assert_never(thing)
-
- with self.metadata_item(
- "const struct opcode_macro_expansion "
- "_PyOpcode_macro_expansion[OPCODE_MACRO_EXPANSION_SIZE]",
- "=",
- ";",
- ):
- # Write macro expansion for each non-pseudo instruction
- for mac in sorted(self.macro_instrs.values(), key=lambda t: t.name):
- if is_super_instruction(mac):
- # Special-case the heck out of super-instructions
- self.write_super_expansions(mac.name)
- else:
- self.write_macro_expansions(
- mac.name, mac.parts, mac.cache_offset
- )
-
- with self.metadata_item(
- "const char * const _PyOpcode_uop_name[OPCODE_UOP_NAME_SIZE]", "=", ";"
- ):
- self.write_uop_items(lambda name, counter: f'[{name}] = "{name}",')
-
- with self.metadata_item(
- f"const char *const _PyOpcode_OpName[{1 + max(self.opmap.values())}]",
- "=",
- ";",
- ):
- for name in sorted(self.opmap):
- self.out.emit(f'[{name}] = "{name}",')
-
- with self.metadata_item(
- f"const uint8_t _PyOpcode_Caches[256]",
- "=",
- ";",
- ):
- family_member_names: set[str] = set()
- for family in self.families.values():
- family_member_names.update(family.members)
- for mac in self.macro_instrs.values():
- if (
- mac.cache_offset > 0
- and mac.name not in family_member_names
- and not mac.name.startswith("INSTRUMENTED_")
- ):
- self.out.emit(f"[{mac.name}] = {mac.cache_offset},")
-
- deoptcodes = {}
- for name, op in self.opmap.items():
- if op < 256:
- deoptcodes[name] = name
- for name, family in self.families.items():
- for m in family.members:
- deoptcodes[m] = name
- # special case:
- deoptcodes["BINARY_OP_INPLACE_ADD_UNICODE"] = "BINARY_OP"
-
- with self.metadata_item(f"const uint8_t _PyOpcode_Deopt[256]", "=", ";"):
- for opt, deopt in sorted(deoptcodes.items()):
- self.out.emit(f"[{opt}] = {deopt},")
-
- self.out.emit("")
- self.out.emit("#define EXTRA_CASES \\")
- valid_opcodes = set(self.opmap.values())
- with self.out.indent():
- for op in range(256):
- if op not in valid_opcodes:
- self.out.emit(f"case {op}: \\")
- self.out.emit(" ;\n")
-
- with open(pymetadata_filename, "w") as f:
- # Create formatter
- self.out = Formatter(f, 0, comment="#")
-
- self.write_provenance_header()
-
- # emit specializations
- specialized_ops = set()
-
- self.out.emit("")
- self.out.emit("_specializations = {")
- for name, family in self.families.items():
- with self.out.indent():
- self.out.emit(f'"{family.name}": [')
- with self.out.indent():
- for m in family.members:
- self.out.emit(f'"{m}",')
- specialized_ops.update(family.members)
- self.out.emit(f"],")
- self.out.emit("}")
-
- # Handle special case
- self.out.emit("")
- self.out.emit("# An irregular case:")
- self.out.emit(
- '_specializations["BINARY_OP"].append('
- '"BINARY_OP_INPLACE_ADD_UNICODE")'
- )
- specialized_ops.add("BINARY_OP_INPLACE_ADD_UNICODE")
-
- ops = sorted((id, name) for (name, id) in self.opmap.items())
- # emit specialized opmap
- self.out.emit("")
- with self.out.block("_specialized_opmap ="):
- for op, name in ops:
- if name in specialized_ops:
- self.out.emit(f"'{name}': {op},")
-
- # emit opmap
- self.out.emit("")
- with self.out.block("opmap ="):
- for op, name in ops:
- if name not in specialized_ops:
- self.out.emit(f"'{name}': {op},")
-
- for name in ["MIN_INSTRUMENTED_OPCODE", "HAVE_ARGUMENT"]:
- self.out.emit(f"{name} = {self.markers[name]}")
-
- def write_pseudo_instrs(self) -> None:
- """Write the IS_PSEUDO_INSTR macro"""
- self.out.emit("\n\n#define IS_PSEUDO_INSTR(OP) ( \\")
- for op in self.pseudos:
- self.out.emit(f" ((OP) == {op}) || \\")
- self.out.emit(f" 0)")
-
- def write_uop_items(self, make_text: typing.Callable[[str, int], str]) -> None:
- """Write '#define XXX NNN' for each uop"""
- counter = 300 # TODO: Avoid collision with pseudo instructions
- seen = set()
-
- def add(name: str) -> None:
- if name in seen:
- return
- nonlocal counter
- self.out.emit(make_text(name, counter))
- counter += 1
- seen.add(name)
-
- # These two are first by convention
- add("_EXIT_TRACE")
- add("_SET_IP")
-
- for instr in sorted(self.instrs.values(), key=lambda t:t.name):
- # Skip ops that are also macros -- those are desugared inst()s
- if instr.name not in self.macros:
- add(instr.name)
-
- def write_macro_expansions(
- self, name: str, parts: MacroParts, cache_offset: int
- ) -> None:
- """Write the macro expansions for a macro-instruction."""
- # TODO: Refactor to share code with write_cody(), is_viaible_uop(), etc.
- offset = 0 # Cache effect offset
- expansions: list[tuple[str, int, int]] = [] # [(name, size, offset), ...]
- for part in parts:
- if isinstance(part, Component):
- # Skip specializations
- if "specializing" in part.instr.annotations:
- continue
- # All other component instructions must be viable uops
- if not part.instr.is_viable_uop() and "replaced" not in part.instr.annotations:
- # This note just reminds us about macros that cannot
- # be expanded to Tier 2 uops. It is not an error.
- # Suppress it using 'replaced op(...)' for macros having
- # manual translation in translate_bytecode_to_trace()
- # in Python/optimizer.c.
- if len(parts) > 1 or part.instr.name != name:
- self.note(
- f"Part {part.instr.name} of {name} is not a viable uop",
- part.instr.inst,
- )
- return
- if not part.active_caches:
- if part.instr.name == "_SAVE_RETURN_OFFSET":
- size, offset = OPARG_SIZES["OPARG_SAVE_RETURN_OFFSET"], cache_offset
- else:
- size, offset = OPARG_SIZES["OPARG_FULL"], 0
- else:
- # If this assert triggers, is_viable_uops() lied
- assert len(part.active_caches) == 1, (name, part.instr.name)
- cache = part.active_caches[0]
- size, offset = cache.effect.size, cache.offset
- expansions.append((part.instr.name, size, offset))
- assert len(expansions) > 0, f"Macro {name} has empty expansion?!"
- self.write_expansions(name, expansions)
-
- def write_super_expansions(self, name: str) -> None:
- """Write special macro expansions for super-instructions.
-
- If you get an assertion failure here, you probably have accidentally
- violated one of the assumptions here.
-
- - A super-instruction's name is of the form FIRST_SECOND where
- FIRST and SECOND are regular instructions whose name has the
- form FOO_BAR. Thus, there must be exactly 3 underscores.
- Example: LOAD_CONST_STORE_FAST.
-
- - A super-instruction's body uses `oparg1 and `oparg2`, and no
- other instruction's body uses those variable names.
-
- - A super-instruction has no active (used) cache entries.
-
- In the expansion, the first instruction's operand is all but the
- bottom 4 bits of the super-instruction's oparg, and the second
- instruction's operand is the bottom 4 bits. We use the special
- size codes OPARG_TOP and OPARG_BOTTOM for these.
- """
- pieces = name.split("_")
- assert len(pieces) == 4, f"{name} doesn't look like a super-instr"
- name1 = "_".join(pieces[:2])
- name2 = "_".join(pieces[2:])
- assert name1 in self.instrs, f"{name1} doesn't match any instr"
- assert name2 in self.instrs, f"{name2} doesn't match any instr"
- instr1 = self.instrs[name1]
- instr2 = self.instrs[name2]
- assert not instr1.active_caches, f"{name1} has active caches"
- assert not instr2.active_caches, f"{name2} has active caches"
- expansions: list[tuple[str, int, int]] = [
- (name1, OPARG_SIZES["OPARG_TOP"], 0),
- (name2, OPARG_SIZES["OPARG_BOTTOM"], 0),
- ]
- self.write_expansions(name, expansions)
-
- def write_expansions(
- self, name: str, expansions: list[tuple[str, int, int]]
- ) -> None:
- pieces = [
- f"{{ {name}, {size}, {offset} }}" for name, size, offset in expansions
- ]
- self.out.emit(
- f"[{name}] = "
- f"{{ .nuops = {len(pieces)}, .uops = {{ {', '.join(pieces)} }} }},"
- )
-
- def emit_metadata_entry(self, name: str, fmt: str | None, flags: InstructionFlags) -> None:
- flag_names = flags.names(value=True)
- if not flag_names:
- flag_names.append("0")
- fmt_macro = "0" if fmt is None else INSTR_FMT_PREFIX + fmt
- self.out.emit(
- f"[{name}] = {{ true, {fmt_macro},"
- f" {' | '.join(flag_names)} }},"
- )
-
- def write_metadata_for_inst(self, instr: Instruction) -> None:
- """Write metadata for a single instruction."""
- self.emit_metadata_entry(instr.name, instr.instr_fmt, instr.instr_flags)
-
- def write_metadata_for_macro(self, mac: MacroInstruction) -> None:
- """Write metadata for a macro-instruction."""
- self.emit_metadata_entry(mac.name, mac.instr_fmt, mac.instr_flags)
-
- def write_metadata_for_pseudo(self, ps: PseudoInstruction) -> None:
- """Write metadata for a macro-instruction."""
- self.emit_metadata_entry(ps.name, None, ps.instr_flags)
-
- def write_instructions(
- self, output_filename: str, emit_line_directives: bool
- ) -> None:
- """Write instructions to output file."""
- with open(output_filename, "w") as f:
- # Create formatter
- self.out = Formatter(f, 8, emit_line_directives)
-
- self.write_provenance_header()
-
- self.out.write_raw("\n")
- self.out.write_raw("#ifdef TIER_TWO\n")
- self.out.write_raw(" #error \"This file is for Tier 1 only\"\n")
- self.out.write_raw("#endif\n")
- self.out.write_raw("#define TIER_ONE 1\n")
-
- # Write and count instructions of all kinds
- n_macros = 0
- cases = []
- for thing in self.everything:
- match thing:
- case parsing.InstDef():
- pass
- case parsing.Macro():
- n_macros += 1
- mac = self.macro_instrs[thing.name]
- cases.append((mac.name, mac))
- case parsing.Pseudo():
- pass
- case _:
- assert_never(thing)
- cases.sort()
- for _, mac in cases:
- stacking.write_macro_instr(mac, self.out)
-
- self.out.write_raw("\n")
- self.out.write_raw("#undef TIER_ONE\n")
-
- print(
- f"Wrote {n_macros} cases to {output_filename}",
- file=sys.stderr,
- )
-
- def write_executor_instructions(
- self, executor_filename: str, emit_line_directives: bool
- ) -> None:
- """Generate cases for the Tier 2 interpreter."""
- n_uops = 0
- with open(executor_filename, "w") as f:
- self.out = Formatter(f, 8, emit_line_directives)
- self.write_provenance_header()
-
- self.out.write_raw("\n")
- self.out.write_raw("#ifdef TIER_ONE\n")
- self.out.write_raw(" #error \"This file is for Tier 2 only\"\n")
- self.out.write_raw("#endif\n")
- self.out.write_raw("#define TIER_TWO 2\n")
-
- for instr in self.instrs.values():
- if instr.is_viable_uop():
- n_uops += 1
- self.out.emit("")
- with self.out.block(f"case {instr.name}:"):
- if instr.instr_flags.HAS_ARG_FLAG:
- self.out.emit("oparg = CURRENT_OPARG();")
- stacking.write_single_instr(instr, self.out, tier=TIER_TWO)
- if instr.check_eval_breaker:
- self.out.emit("CHECK_EVAL_BREAKER();")
- self.out.emit("break;")
-
- self.out.write_raw("\n")
- self.out.write_raw("#undef TIER_TWO\n")
-
- print(
- f"Wrote {n_uops} cases to {executor_filename}",
- file=sys.stderr,
- )
-
- def write_abstract_interpreter_instructions(
- self, abstract_interpreter_filename: str, emit_line_directives: bool
- ) -> None:
- """Generate cases for the Tier 2 abstract interpreter/analzyer."""
- with open(abstract_interpreter_filename, "w") as f:
- self.out = Formatter(f, 8, emit_line_directives)
- self.write_provenance_header()
- for instr in self.instrs.values():
- instr = AbstractInstruction(instr.inst)
- if (
- instr.is_viable_uop()
- and instr.name not in SPECIALLY_HANDLED_ABSTRACT_INSTR
- ):
- self.out.emit("")
- with self.out.block(f"case {instr.name}:"):
- instr.write(self.out, tier=TIER_TWO)
- self.out.emit("break;")
- print(
- f"Wrote some stuff to {abstract_interpreter_filename}",
- file=sys.stderr,
- )
-
-
-def is_super_instruction(mac: MacroInstruction) -> bool:
- if (
- len(mac.parts) == 1
- and isinstance(mac.parts[0], Component)
- and variable_used(mac.parts[0].instr.inst, "oparg1")
- ):
- assert variable_used(mac.parts[0].instr.inst, "oparg2")
- return True
- else:
- return False
-
-
-def main() -> None:
- """Parse command line, parse input, analyze, write output."""
- args = arg_parser.parse_args() # Prints message and sys.exit(2) on error
- if len(args.input) == 0:
- args.input.append(DEFAULT_INPUT)
-
- # Raises OSError if input unreadable
- a = Generator(args.input)
-
- a.parse() # Raises SyntaxError on failure
- a.analyze() # Prints messages and sets a.errors on failure
- if a.errors:
- sys.exit(f"Found {a.errors} errors")
- if args.viable:
- # Load execution counts from bmraw.json, if it exists
- a.report_non_viable_uops("bmraw.json")
- return
-
- # These raise OSError if output can't be written
-
- a.assign_opcode_ids()
- a.write_abstract_interpreter_instructions(
- args.abstract_interpreter_cases, args.emit_line_directives
- )
-
-
-if __name__ == "__main__":
- main()
diff --git a/Tools/cases_generator/instructions.py b/Tools/cases_generator/instructions.py
deleted file mode 100644
index 149a088..0000000
--- a/Tools/cases_generator/instructions.py
+++ /dev/null
@@ -1,355 +0,0 @@
-import dataclasses
-import re
-import typing
-
-from flags import InstructionFlags, variable_used, variable_used_unspecialized
-from formatting import (
- Formatter,
- UNUSED,
- list_effect_size,
-)
-import lexer as lx
-import parsing
-from parsing import StackEffect
-import stacking
-
-BITS_PER_CODE_UNIT = 16
-
-
-@dataclasses.dataclass
-class ActiveCacheEffect:
- """Wraps a CacheEffect that is actually used, in context."""
-
- effect: parsing.CacheEffect
- offset: int
-
-
-FORBIDDEN_NAMES_IN_UOPS = (
- "next_instr",
- "oparg1", # Proxy for super-instructions like LOAD_FAST_LOAD_FAST
- "JUMPBY",
- "DISPATCH",
- "TIER_ONE_ONLY",
-)
-
-
-# Interpreter tiers
-TIER_ONE: typing.Final = 1 # Specializing adaptive interpreter (PEP 659)
-TIER_TWO: typing.Final = 2 # Experimental tracing interpreter
-Tiers: typing.TypeAlias = typing.Literal[1, 2]
-
-
-@dataclasses.dataclass
-class Instruction:
- """An instruction with additional data and code."""
-
- # Parts of the underlying instruction definition
- inst: parsing.InstDef
- name: str
- annotations: list[str]
- block: parsing.Block
- block_text: list[str] # Block.text, less curlies, less PREDICT() calls
- block_line: int # First line of block in original code
-
- # Computed by constructor
- always_exits: str # If the block always exits, its last line; else ""
- has_deopt: bool
- needs_this_instr: bool
- cache_offset: int
- cache_effects: list[parsing.CacheEffect]
- input_effects: list[StackEffect]
- output_effects: list[StackEffect]
- unmoved_names: frozenset[str]
- instr_fmt: str
- instr_flags: InstructionFlags
- active_caches: list[ActiveCacheEffect]
-
- # Set later
- family: parsing.Family | None = None
- predicted: bool = False
-
- def __init__(self, inst: parsing.InstDef):
- self.inst = inst
- self.name = inst.name
- self.annotations = inst.annotations
- self.block = inst.block
- self.block_text, self.check_eval_breaker, self.block_line = extract_block_text(
- self.block
- )
- self.always_exits = always_exits(self.block_text)
- self.has_deopt = variable_used(self.inst, "DEOPT_IF")
- self.cache_effects = [
- effect for effect in inst.inputs if isinstance(effect, parsing.CacheEffect)
- ]
- self.cache_offset = sum(c.size for c in self.cache_effects)
- self.needs_this_instr = variable_used(self.inst, "this_instr") or any(c.name != UNUSED for c in self.cache_effects)
- self.input_effects = [
- effect for effect in inst.inputs if isinstance(effect, StackEffect)
- ]
- self.output_effects = inst.outputs # For consistency/completeness
- unmoved_names: set[str] = set()
- for ieffect, oeffect in zip(self.input_effects, self.output_effects):
- if ieffect == oeffect and ieffect.name == oeffect.name:
- unmoved_names.add(ieffect.name)
- else:
- break
- self.unmoved_names = frozenset(unmoved_names)
-
- self.instr_flags = InstructionFlags.fromInstruction(inst)
-
- self.active_caches = []
- offset = 0
- for effect in self.cache_effects:
- if effect.name != UNUSED:
- self.active_caches.append(ActiveCacheEffect(effect, offset))
- offset += effect.size
-
- if self.instr_flags.HAS_ARG_FLAG:
- fmt = "IB"
- else:
- fmt = "IX"
- if offset:
- fmt += "C" + "0" * (offset - 1)
- self.instr_fmt = fmt
-
- def is_viable_uop(self) -> bool:
- """Whether this instruction is viable as a uop."""
- dprint: typing.Callable[..., None] = lambda *args, **kwargs: None
- if "FRAME" in self.name:
- dprint = print
-
- if self.name == "_EXIT_TRACE":
- return True # This has 'return frame' but it's okay
- if self.name == "_SAVE_RETURN_OFFSET":
- return True # Adjusts next_instr, but only in tier 1 code
- if self.always_exits:
- dprint(f"Skipping {self.name} because it always exits: {self.always_exits}")
- return False
- if len(self.active_caches) > 1:
- # print(f"Skipping {self.name} because it has >1 cache entries")
- return False
- res = True
- for forbidden in FORBIDDEN_NAMES_IN_UOPS:
- # NOTE: To disallow unspecialized uops, use
- # if variable_used(self.inst, forbidden):
- if variable_used_unspecialized(self.inst, forbidden):
- dprint(f"Skipping {self.name} because it uses {forbidden}")
- res = False
- return res
-
- def write_body(
- self,
- out: Formatter,
- dedent: int,
- active_caches: list[ActiveCacheEffect],
- tier: Tiers,
- family: parsing.Family | None,
- ) -> None:
- """Write the instruction body."""
- # Write cache effect variable declarations and initializations
- for active in active_caches:
- ceffect = active.effect
- bits = ceffect.size * BITS_PER_CODE_UNIT
- if bits == 64:
- # NOTE: We assume that 64-bit data in the cache
- # is always an object pointer.
- # If this becomes false, we need a way to specify
- # syntactically what type the cache data is.
- typ = "PyObject *"
- func = "read_obj"
- else:
- typ = f"uint{bits}_t "
- func = f"read_u{bits}"
- if tier == TIER_ONE:
- out.emit(
- f"{typ}{ceffect.name} = "
- f"{func}(&this_instr[{active.offset + 1}].cache);"
- )
- else:
- out.emit(f"{typ}{ceffect.name} = ({typ.strip()})CURRENT_OPERAND();")
-
- # Write the body, substituting a goto for ERROR_IF() and other stuff
- assert dedent <= 0
- extra = " " * -dedent
- names_to_skip = self.unmoved_names | frozenset({UNUSED, "null"})
- offset = 0
- context = self.block.context
- assert context is not None and context.owner is not None
- filename = context.owner.filename
- for line in self.block_text:
- out.set_lineno(self.block_line + offset, filename)
- offset += 1
- if m := re.match(r"(\s*)ERROR_IF\((.+), (\w+)\);\s*(?://.*)?$", line):
- space, cond, label = m.groups()
- space = extra + space
- # ERROR_IF() must pop the inputs from the stack.
- # The code block is responsible for DECREF()ing them.
- # NOTE: If the label doesn't exist, just add it to ceval.c.
-
- # Don't pop common input/output effects at the bottom!
- # These aren't DECREF'ed so they can stay.
- ieffs = list(self.input_effects)
- oeffs = list(self.output_effects)
- while (
- ieffs
- and oeffs
- and ieffs[0] == oeffs[0]
- and ieffs[0].name == oeffs[0].name
- ):
- ieffs.pop(0)
- oeffs.pop(0)
- ninputs, symbolic = list_effect_size(ieffs)
- if ninputs:
- label = f"pop_{ninputs}_{label}"
- if tier == TIER_TWO:
- label = label + "_tier_two"
- if symbolic:
- out.write_raw(
- f"{space}if ({cond}) {{ STACK_SHRINK({symbolic}); goto {label}; }}\n"
- )
- else:
- out.write_raw(f"{space}if ({cond}) goto {label};\n")
- elif m := re.match(r"(\s*)DEOPT_IF\((.+)\);\s*(?://.*)?$", line):
- space, cond = m.groups()
- space = extra + space
- target = family.name if family else self.name
- out.write_raw(f"{space}DEOPT_IF({cond}, {target});\n")
- elif "DEOPT" in line:
- filename = context.owner.filename
- lineno = context.owner.tokens[context.begin].line
- print(f"{filename}:{lineno}: ERROR: DEOPT_IF() must be all on one line")
- out.write_raw(extra + line)
- elif m := re.match(r"(\s*)DECREF_INPUTS\(\);\s*(?://.*)?$", line):
- out.reset_lineno()
- space = extra + m.group(1)
- for ieff in self.input_effects:
- if ieff.name in names_to_skip:
- continue
- if ieff.size:
- out.write_raw(
- f"{space}for (int _i = {ieff.size}; --_i >= 0;) {{\n"
- )
- out.write_raw(f"{space} Py_DECREF({ieff.name}[_i]);\n")
- out.write_raw(f"{space}}}\n")
- else:
- decref = "XDECREF" if ieff.cond else "DECREF"
- out.write_raw(f"{space}Py_{decref}({ieff.name});\n")
- else:
- out.write_raw(extra + line)
- out.reset_lineno()
-
-
-InstructionOrCacheEffect = Instruction | parsing.CacheEffect
-
-
-# Instruction used for abstract interpretation.
-class AbstractInstruction(Instruction):
- def __init__(self, inst: parsing.InstDef):
- super().__init__(inst)
-
- def write(self, out: Formatter, tier: Tiers = TIER_ONE) -> None:
- """Write one abstract instruction, sans prologue and epilogue."""
- stacking.write_single_instr_for_abstract_interp(self, out)
-
- def write_body(
- self,
- out: Formatter,
- dedent: int,
- active_caches: list[ActiveCacheEffect],
- tier: Tiers,
- family: parsing.Family | None,
- ) -> None:
- pass
-
-
-@dataclasses.dataclass
-class Component:
- instr: Instruction
- active_caches: list[ActiveCacheEffect]
-
-
-MacroParts = list[Component | parsing.CacheEffect]
-
-
-@dataclasses.dataclass
-class MacroInstruction:
- """A macro instruction."""
-
- name: str
- instr_fmt: str
- instr_flags: InstructionFlags
- macro: parsing.Macro
- parts: MacroParts
- cache_offset: int
- # Set later
- predicted: bool = False
- family: parsing.Family | None = None
-
-
-@dataclasses.dataclass
-class PseudoInstruction:
- """A pseudo instruction."""
-
- name: str
- targets: list[Instruction | MacroInstruction]
- instr_flags: InstructionFlags
-
-
-AnyInstruction = Instruction | MacroInstruction | PseudoInstruction
-
-
-def extract_block_text(block: parsing.Block) -> tuple[list[str], bool, int]:
- # Get lines of text with proper dedent
- blocklines = block.text.splitlines(True)
- first_token: lx.Token = block.tokens[0] # IndexError means the context is broken
- block_line = first_token.begin[0]
-
- # Remove blank lines from both ends
- while blocklines and not blocklines[0].strip():
- blocklines.pop(0)
- block_line += 1
- while blocklines and not blocklines[-1].strip():
- blocklines.pop()
-
- # Remove leading and trailing braces
- assert blocklines and blocklines[0].strip() == "{"
- assert blocklines and blocklines[-1].strip() == "}"
- blocklines.pop()
- blocklines.pop(0)
- block_line += 1
-
- # Remove trailing blank lines
- while blocklines and not blocklines[-1].strip():
- blocklines.pop()
-
- # Separate CHECK_EVAL_BREAKER() macro from end
- check_eval_breaker = (
- blocklines != [] and blocklines[-1].strip() == "CHECK_EVAL_BREAKER();"
- )
- if check_eval_breaker:
- del blocklines[-1]
-
- return blocklines, check_eval_breaker, block_line
-
-
-def always_exits(lines: list[str]) -> str:
- """Determine whether a block always ends in a return/goto/etc."""
- if not lines:
- return ""
- line = lines[-1].rstrip()
- # Indent must match exactly (TODO: Do something better)
- if line[:12] != " " * 12:
- return ""
- line = line[12:]
- if line.startswith(
- (
- "goto ",
- "return ",
- "DISPATCH",
- "GO_TO_",
- "Py_UNREACHABLE()",
- "ERROR_IF(true, ",
- )
- ):
- return line
- return ""
diff --git a/Tools/cases_generator/parser.py b/Tools/cases_generator/parser.py
index fe4e8e4..2b77d14 100644
--- a/Tools/cases_generator/parser.py
+++ b/Tools/cases_generator/parser.py
@@ -11,7 +11,17 @@ from parsing import (
OpName,
AstNode,
)
-from formatting import prettify_filename
+
+
+def prettify_filename(filename: str) -> str:
+ # Make filename more user-friendly and less platform-specific,
+ # it is only used for error reporting at this point.
+ filename = filename.replace("\\", "/")
+ if filename.startswith("./"):
+ filename = filename[2:]
+ if filename.endswith(".new"):
+ filename = filename[:-4]
+ return filename
BEGIN_MARKER = "// BEGIN BYTECODES //"
diff --git a/Tools/cases_generator/stack.py b/Tools/cases_generator/stack.py
index 94fb82d..d351037 100644
--- a/Tools/cases_generator/stack.py
+++ b/Tools/cases_generator/stack.py
@@ -1,10 +1,24 @@
-import sys
+import re
from analyzer import StackItem, Instruction, Uop
from dataclasses import dataclass
-from formatting import maybe_parenthesize
from cwriter import CWriter
+def maybe_parenthesize(sym: str) -> str:
+ """Add parentheses around a string if it contains an operator
+ and is not already parenthesized.
+
+ An exception is made for '*' which is common and harmless
+ in the context where the symbolic size is used.
+ """
+ if sym.startswith("(") and sym.endswith(")"):
+ return sym
+ if re.match(r"^[\s\w*]+$", sym):
+ return sym
+ else:
+ return f"({sym})"
+
+
def var_size(var: StackItem) -> str:
if var.condition:
# Special case simplification
diff --git a/Tools/cases_generator/stacking.py b/Tools/cases_generator/stacking.py
deleted file mode 100644
index 123e38c..0000000
--- a/Tools/cases_generator/stacking.py
+++ /dev/null
@@ -1,534 +0,0 @@
-import dataclasses
-import typing
-
-from flags import variable_used_unspecialized
-from formatting import (
- Formatter,
- UNUSED,
- maybe_parenthesize,
- parenthesize_cond,
-)
-from instructions import (
- ActiveCacheEffect,
- Instruction,
- MacroInstruction,
- Component,
- Tiers,
- TIER_ONE,
-)
-from parsing import StackEffect, CacheEffect, Family
-
-
-@dataclasses.dataclass
-class StackOffset:
- """Represent the stack offset for a PEEK or POKE.
-
- - At stack_pointer[0], deep and high are both empty.
- (Note that that is an invalid stack reference.)
- - Below stack top, only deep is non-empty.
- - Above stack top, only high is non-empty.
- - In complex cases, both deep and high may be non-empty.
-
- All this would be much simpler if all stack entries were the same
- size, but with conditional and array effects, they aren't.
- The offsets are each represented by a list of StackEffect objects.
- The name in the StackEffects is unused.
- """
-
- deep: list[StackEffect] = dataclasses.field(default_factory=list)
- high: list[StackEffect] = dataclasses.field(default_factory=list)
-
- def clone(self) -> "StackOffset":
- return StackOffset(list(self.deep), list(self.high))
-
- def negate(self) -> "StackOffset":
- return StackOffset(list(self.high), list(self.deep))
-
- def deeper(self, eff: StackEffect) -> None:
- if eff in self.high:
- self.high.remove(eff)
- else:
- self.deep.append(eff)
-
- def higher(self, eff: StackEffect) -> None:
- if eff in self.deep:
- self.deep.remove(eff)
- else:
- self.high.append(eff)
-
- def as_terms(self) -> list[tuple[str, str]]:
- num = 0
- terms: list[tuple[str, str]] = []
- for eff in self.deep:
- if eff.size:
- terms.append(("-", maybe_parenthesize(eff.size)))
- elif eff.cond and eff.cond not in ("0", "1"):
- terms.append(("-", f"({parenthesize_cond(eff.cond)} ? 1 : 0)"))
- elif eff.cond != "0":
- num -= 1
- for eff in self.high:
- if eff.size:
- terms.append(("+", maybe_parenthesize(eff.size)))
- elif eff.cond and eff.cond not in ("0", "1"):
- terms.append(("+", f"({parenthesize_cond(eff.cond)} ? 1 : 0)"))
- elif eff.cond != "0":
- num += 1
- if num < 0:
- terms.insert(0, ("-", str(-num)))
- elif num > 0:
- terms.append(("+", str(num)))
- return terms
-
- def as_index(self) -> str:
- terms = self.as_terms()
- return make_index(terms)
-
- def equivalent_to(self, other: "StackOffset") -> bool:
- if self.deep == other.deep and self.high == other.high:
- return True
- deep = list(self.deep)
- for x in other.deep:
- try:
- deep.remove(x)
- except ValueError:
- return False
- if deep:
- return False
- high = list(self.high)
- for x in other.high:
- try:
- high.remove(x)
- except ValueError:
- return False
- if high:
- return False
- return True
-
-
-def make_index(terms: list[tuple[str, str]]) -> str:
- # Produce an index expression from the terms honoring PEP 8,
- # surrounding binary ops with spaces but not unary minus
- index = ""
- for sign, term in terms:
- if index:
- index += f" {sign} {term}"
- elif sign == "+":
- index = term
- else:
- index = sign + term
- return index or "0"
-
-
-@dataclasses.dataclass
-class StackItem:
- offset: StackOffset
- effect: StackEffect
-
- def as_variable(self, lax: bool = False) -> str:
- """Return e.g. stack_pointer[-1]."""
- terms = self.offset.as_terms()
- if self.effect.size:
- terms.insert(0, ("+", "stack_pointer"))
- index = make_index(terms)
- if self.effect.size:
- res = index
- else:
- res = f"stack_pointer[{index}]"
- if not lax:
- # Check that we're not reading or writing above stack top.
- # Skip this for output variable initialization (lax=True).
- assert (
- self.effect in self.offset.deep and not self.offset.high
- ), f"Push or pop above current stack level: {res}"
- return res
-
- def as_stack_effect(self, lax: bool = False) -> StackEffect:
- return StackEffect(
- self.as_variable(lax=lax),
- self.effect.type if self.effect.size else "",
- self.effect.cond,
- self.effect.size,
- )
-
-
-@dataclasses.dataclass
-class CopyItem:
- src: StackItem
- dst: StackItem
-
-
-class EffectManager:
- """Manage stack effects and offsets for an instruction."""
-
- instr: Instruction
- active_caches: list[ActiveCacheEffect]
- peeks: list[StackItem]
- pokes: list[StackItem]
- copies: list[CopyItem] # See merge()
- # Track offsets from stack pointer
- min_offset: StackOffset
- final_offset: StackOffset
- # Link to previous manager
- pred: "EffectManager | None" = None
-
- def __init__(
- self,
- instr: Instruction,
- active_caches: list[ActiveCacheEffect],
- pred: "EffectManager | None" = None,
- ):
- self.instr = instr
- self.active_caches = active_caches
- self.peeks = []
- self.pokes = []
- self.copies = []
- self.final_offset = pred.final_offset.clone() if pred else StackOffset()
- for eff in reversed(instr.input_effects):
- self.final_offset.deeper(eff)
- self.peeks.append(StackItem(offset=self.final_offset.clone(), effect=eff))
- self.min_offset = self.final_offset.clone()
- for eff in instr.output_effects:
- self.pokes.append(StackItem(offset=self.final_offset.clone(), effect=eff))
- self.final_offset.higher(eff)
-
- self.pred = pred
- while pred:
- # Replace push(x) + pop(y) with copy(x, y).
- # Check that the sources and destinations are disjoint.
- sources: set[str] = set()
- destinations: set[str] = set()
- while (
- pred.pokes
- and self.peeks
- and pred.pokes[-1].effect == self.peeks[0].effect
- ):
- src = pred.pokes.pop(-1)
- dst = self.peeks.pop(0)
- assert src.offset.equivalent_to(dst.offset), (src, dst)
- pred.final_offset.deeper(src.effect)
- if dst.effect.name != src.effect.name:
- if dst.effect.name != UNUSED:
- destinations.add(dst.effect.name)
- if src.effect.name != UNUSED:
- sources.add(src.effect.name)
- self.copies.append(CopyItem(src, dst))
- # TODO: Turn this into an error (pass an Analyzer instance?)
- assert sources & destinations == set(), (
- pred.instr.name,
- self.instr.name,
- sources,
- destinations,
- )
- # See if we can get more copies of a earlier predecessor.
- if self.peeks and not pred.pokes and not pred.peeks:
- pred = pred.pred
- else:
- pred = None # Break
-
- # Fix up patterns of copies through UNUSED,
- # e.g. cp(a, UNUSED) + cp(UNUSED, b) -> cp(a, b).
- if any(copy.src.effect.name == UNUSED for copy in self.copies):
- pred = self.pred
- while pred is not None:
- for copy in self.copies:
- if copy.src.effect.name == UNUSED:
- for pred_copy in pred.copies:
- if pred_copy.dst == copy.src:
- copy.src = pred_copy.src
- break
- pred = pred.pred
-
- def adjust_deeper(self, eff: StackEffect) -> None:
- for peek in self.peeks:
- peek.offset.deeper(eff)
- for poke in self.pokes:
- poke.offset.deeper(eff)
- for copy in self.copies:
- copy.src.offset.deeper(eff)
- copy.dst.offset.deeper(eff)
- self.min_offset.deeper(eff)
- self.final_offset.deeper(eff)
-
- def adjust_higher(self, eff: StackEffect) -> None:
- for peek in self.peeks:
- peek.offset.higher(eff)
- for poke in self.pokes:
- poke.offset.higher(eff)
- for copy in self.copies:
- copy.src.offset.higher(eff)
- copy.dst.offset.higher(eff)
- self.min_offset.higher(eff)
- self.final_offset.higher(eff)
-
- def adjust(self, offset: StackOffset) -> None:
- deep = list(offset.deep)
- high = list(offset.high)
- for down in deep:
- self.adjust_deeper(down)
- for up in high:
- self.adjust_higher(up)
-
- def adjust_inverse(self, offset: StackOffset) -> None:
- deep = list(offset.deep)
- high = list(offset.high)
- for down in deep:
- self.adjust_higher(down)
- for up in high:
- self.adjust_deeper(up)
-
- def collect_vars(self) -> dict[str, StackEffect]:
- """Collect all variables, skipping unused ones."""
- vars: dict[str, StackEffect] = {}
-
- def add(eff: StackEffect) -> None:
- if eff.name != UNUSED:
- if eff.name in vars:
- # TODO: Make this an error
- assert vars[eff.name] == eff, (
- self.instr.name,
- eff.name,
- vars[eff.name],
- eff,
- )
- else:
- vars[eff.name] = eff
-
- for copy in self.copies:
- add(copy.src.effect)
- add(copy.dst.effect)
- for peek in self.peeks:
- add(peek.effect)
- for poke in self.pokes:
- add(poke.effect)
-
- return vars
-
-
-def less_than(a: StackOffset, b: StackOffset) -> bool:
- # TODO: Handle more cases
- if a.high != b.high:
- return False
- return a.deep[: len(b.deep)] == b.deep
-
-
-def get_managers(parts: list[Component]) -> list[EffectManager]:
- managers: list[EffectManager] = []
- pred: EffectManager | None = None
- for part in parts:
- mgr = EffectManager(part.instr, part.active_caches, pred)
- managers.append(mgr)
- pred = mgr
- return managers
-
-
-def get_stack_effect_info_for_macro(mac: MacroInstruction) -> tuple[str, str]:
- """Get the stack effect info for a macro instruction.
-
- Returns a tuple (popped, pushed) where each is a string giving a
- symbolic expression for the number of values popped/pushed.
- """
- parts = [part for part in mac.parts if isinstance(part, Component)]
- managers = get_managers(parts)
- popped = StackOffset()
- for mgr in managers:
- if less_than(mgr.min_offset, popped):
- popped = mgr.min_offset.clone()
- # Compute pushed = final - popped
- pushed = managers[-1].final_offset.clone()
- for effect in popped.deep:
- pushed.higher(effect)
- for effect in popped.high:
- pushed.deeper(effect)
- return popped.negate().as_index(), pushed.as_index()
-
-
-def write_single_instr(
- instr: Instruction, out: Formatter, tier: Tiers = TIER_ONE
-) -> None:
- try:
- write_components(
- [Component(instr, instr.active_caches)],
- out,
- tier,
- 0,
- instr.family,
- )
- except AssertionError as err:
- raise AssertionError(f"Error writing instruction {instr.name}") from err
-
-
-def write_macro_instr(mac: MacroInstruction, out: Formatter) -> None:
- parts = [
- part
- for part in mac.parts
- if isinstance(part, Component) and part.instr.name != "_SET_IP"
- ]
- out.emit("")
- with out.block(f"TARGET({mac.name})"):
- needs_this = any(part.instr.needs_this_instr for part in parts)
- if needs_this and not mac.predicted:
- out.emit(f"_Py_CODEUNIT *this_instr = frame->instr_ptr = next_instr;")
- else:
- out.emit(f"frame->instr_ptr = next_instr;")
- out.emit(f"next_instr += {mac.cache_offset+1};")
- out.emit(f"INSTRUCTION_STATS({mac.name});")
- if mac.predicted:
- out.emit(f"PREDICTED({mac.name});")
- if needs_this:
- out.emit(f"_Py_CODEUNIT *this_instr = next_instr - {mac.cache_offset+1};")
- out.static_assert_family_size(mac.name, mac.family, mac.cache_offset)
- try:
- next_instr_is_set = write_components(
- parts, out, TIER_ONE, mac.cache_offset, mac.family
- )
- except AssertionError as err:
- raise AssertionError(f"Error writing macro {mac.name}") from err
- if not parts[-1].instr.always_exits:
- if parts[-1].instr.check_eval_breaker:
- out.emit("CHECK_EVAL_BREAKER();")
- out.emit("DISPATCH();")
-
-
-def write_components(
- parts: list[Component],
- out: Formatter,
- tier: Tiers,
- cache_offset: int,
- family: Family | None,
-) -> bool:
- managers = get_managers(parts)
-
- all_vars: dict[str, StackEffect] = {}
- for mgr in managers:
- for name, eff in mgr.collect_vars().items():
- if name in all_vars:
- # TODO: Turn this into an error -- variable conflict
- assert all_vars[name] == eff, (
- name,
- mgr.instr.name,
- all_vars[name],
- eff,
- )
- else:
- all_vars[name] = eff
-
- # Declare all variables
- for name, eff in all_vars.items():
- out.declare(eff, None)
-
- next_instr_is_set = False
- for mgr in managers:
- if len(parts) > 1:
- out.emit(f"// {mgr.instr.name}")
-
- for copy in mgr.copies:
- copy_src_effect = copy.src.effect
- if copy_src_effect.name != copy.dst.effect.name:
- if copy_src_effect.name == UNUSED:
- copy_src_effect = copy.src.as_stack_effect()
- out.assign(copy.dst.effect, copy_src_effect)
- for peek in mgr.peeks:
- out.assign(
- peek.effect,
- peek.as_stack_effect(),
- )
- # Initialize array outputs
- for poke in mgr.pokes:
- if poke.effect.size and poke.effect.name not in mgr.instr.unmoved_names:
- out.assign(
- poke.effect,
- poke.as_stack_effect(lax=True),
- )
-
- if mgr.instr.name in ("_PUSH_FRAME", "_POP_FRAME"):
- # Adjust stack to min_offset.
- # This means that all input effects of this instruction
- # are materialized, but not its output effects.
- # That's as intended, since these two are so special.
- out.stack_adjust(mgr.min_offset.deep, mgr.min_offset.high)
- # However, for tier 2, pretend the stack is at final offset.
- mgr.adjust_inverse(mgr.final_offset)
- if tier == TIER_ONE:
- # TODO: Check in analyzer that _{PUSH,POP}_FRAME is last.
- assert (
- mgr is managers[-1]
- ), f"Expected {mgr.instr.name!r} to be the last uop"
- assert_no_pokes(managers)
-
- if mgr.instr.name == "_SAVE_RETURN_OFFSET":
- next_instr_is_set = True
- if tier == TIER_ONE:
- assert_no_pokes(managers)
-
- if len(parts) == 1:
- mgr.instr.write_body(out, 0, mgr.active_caches, tier, family)
- else:
- with out.block(""):
- mgr.instr.write_body(out, -4, mgr.active_caches, tier, family)
-
- if mgr is managers[-1] and not next_instr_is_set and not mgr.instr.always_exits:
- # Adjust the stack to its final depth, *then* write the
- # pokes for all preceding uops.
- # Note that for array output effects we may still write
- # past the stack top.
- out.stack_adjust(mgr.final_offset.deep, mgr.final_offset.high)
- write_all_pokes(mgr.final_offset, managers, out)
-
- return next_instr_is_set
-
-
-def assert_no_pokes(managers: list[EffectManager]) -> None:
- for mgr in managers:
- for poke in mgr.pokes:
- if not poke.effect.size and poke.effect.name not in mgr.instr.unmoved_names:
- assert (
- poke.effect.name == UNUSED
- ), f"Unexpected poke of {poke.effect.name} in {mgr.instr.name!r}"
-
-
-def write_all_pokes(
- offset: StackOffset, managers: list[EffectManager], out: Formatter
-) -> None:
- # Emit all remaining pushes (pokes)
- for m in managers:
- m.adjust_inverse(offset)
- write_pokes(m, out)
-
-
-def write_pokes(mgr: EffectManager, out: Formatter) -> None:
- for poke in mgr.pokes:
- if not poke.effect.size and poke.effect.name not in mgr.instr.unmoved_names:
- out.assign(
- poke.as_stack_effect(),
- poke.effect,
- )
-
-
-def write_single_instr_for_abstract_interp(instr: Instruction, out: Formatter) -> None:
- try:
- _write_components_for_abstract_interp(
- [Component(instr, instr.active_caches)],
- out,
- )
- except AssertionError as err:
- raise AssertionError(
- f"Error writing abstract instruction {instr.name}"
- ) from err
-
-
-def _write_components_for_abstract_interp(
- parts: list[Component],
- out: Formatter,
-) -> None:
- managers = get_managers(parts)
- for mgr in managers:
- if mgr is managers[-1]:
- out.stack_adjust(mgr.final_offset.deep, mgr.final_offset.high)
- mgr.adjust_inverse(mgr.final_offset)
- # NULL out the output stack effects
- for poke in mgr.pokes:
- if not poke.effect.size and poke.effect.name not in mgr.instr.unmoved_names:
- out.emit(
- f"PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)"
- f"PARTITIONNODE_NULLROOT, PEEK(-({poke.offset.as_index()})), true);"
- )