summaryrefslogtreecommitdiffstats
path: root/Tools/cases_generator/generate_cases.py
diff options
context:
space:
mode:
authorGuido van Rossum <guido@python.org>2023-07-24 16:38:23 (GMT)
committerGitHub <noreply@github.com>2023-07-24 16:38:23 (GMT)
commit032f4809094bf03d92c54e46b305c499ef7e3165 (patch)
treec48dfaa67e6201d3ee3c65682081701f0479ec99 /Tools/cases_generator/generate_cases.py
parentff5f94b72c8aad8e45c397c263dbe7f19221735f (diff)
downloadcpython-032f4809094bf03d92c54e46b305c499ef7e3165.zip
cpython-032f4809094bf03d92c54e46b305c499ef7e3165.tar.gz
cpython-032f4809094bf03d92c54e46b305c499ef7e3165.tar.bz2
Thoroughly refactor the cases generator (#107151)
This mostly extracts a whole bunch of stuff out of generate_cases.py into separate files, but there are a few other things going on here. - analysis.py: `Analyzer` etc. - instructions.py: `Instruction` etc. - flags.py: `InstructionFlags`, `variable_used`, `variable_used_unspecialized` - formatting.py: `Formatter` etc. - Rename parser.py to parsing.py, to avoid conflict with stdlib parser.py - Blackify most things - Fix most mypy errors - Remove output filenames from Generator state, add them to `write_instructions()` etc. - Fix unit tests
Diffstat (limited to 'Tools/cases_generator/generate_cases.py')
-rw-r--r--Tools/cases_generator/generate_cases.py1287
1 files changed, 144 insertions, 1143 deletions
diff --git a/Tools/cases_generator/generate_cases.py b/Tools/cases_generator/generate_cases.py
index 3a679b2..967e1e2 100644
--- a/Tools/cases_generator/generate_cases.py
+++ b/Tools/cases_generator/generate_cases.py
@@ -1,21 +1,31 @@
"""Generate the main interpreter switch.
-
Reads the instruction definitions from bytecodes.c.
Writes the cases to generated_cases.c.h, which is #included in ceval.c.
"""
import argparse
import contextlib
-import dataclasses
import os
import posixpath
-import re
import sys
import typing
-import lexer as lx
-import parser
-from parser import StackEffect
+from analysis import Analyzer
+from formatting import Formatter, list_effect_size, maybe_parenthesize
+from flags import InstructionFlags, variable_used
+from instructions import (
+ AnyInstruction,
+ Component,
+ Instruction,
+ MacroInstruction,
+ MacroParts,
+ PseudoInstruction,
+ StackEffect,
+ OverriddenInstructionPlaceHolder,
+ TIER_TWO,
+)
+import parsing
+from parsing import StackEffect
HERE = os.path.dirname(__file__)
@@ -33,13 +43,6 @@ DEFAULT_PYMETADATA_OUTPUT = os.path.relpath(
DEFAULT_EXECUTOR_OUTPUT = os.path.relpath(
os.path.join(ROOT, "Python/executor_cases.c.h")
)
-BEGIN_MARKER = "// BEGIN BYTECODES //"
-END_MARKER = "// END BYTECODES //"
-RE_PREDICTED = (
- r"^\s*(?:GO_TO_INSTRUCTION\(|DEOPT_IF\(.*?,\s*)(\w+)\);\s*(?://.*)?$"
-)
-UNUSED = "unused"
-BITS_PER_CODE_UNIT = 16
# Constants used instead of size for macro expansions.
# Note: 1, 2, 4 must match actual cache entry sizes.
@@ -52,10 +55,7 @@ OPARG_SIZES = {
"OPARG_BOTTOM": 6,
}
-RESERVED_WORDS = {
- "co_consts" : "Use FRAME_CO_CONSTS.",
- "co_names": "Use FRAME_CO_NAMES.",
-}
+INSTR_FMT_PREFIX = "INSTR_FMT_"
arg_parser = argparse.ArgumentParser(
description="Generate the code for the interpreter switch.",
@@ -65,10 +65,18 @@ arg_parser.add_argument(
"-o", "--output", type=str, help="Generated code", default=DEFAULT_OUTPUT
)
arg_parser.add_argument(
- "-m", "--metadata", type=str, help="Generated C metadata", default=DEFAULT_METADATA_OUTPUT
+ "-m",
+ "--metadata",
+ type=str,
+ help="Generated C metadata",
+ default=DEFAULT_METADATA_OUTPUT,
)
arg_parser.add_argument(
- "-p", "--pymetadata", type=str, help="Generated Python metadata", default=DEFAULT_PYMETADATA_OUTPUT
+ "-p",
+ "--pymetadata",
+ type=str,
+ help="Generated Python metadata",
+ default=DEFAULT_PYMETADATA_OUTPUT,
)
arg_parser.add_argument(
"-l", "--emit-line-directives", help="Emit #line directives", action="store_true"
@@ -85,966 +93,9 @@ arg_parser.add_argument(
)
-def effect_size(effect: StackEffect) -> tuple[int, str]:
- """Return the 'size' impact of a stack effect.
-
- Returns a tuple (numeric, symbolic) where:
-
- - numeric is an int giving the statically analyzable size of the effect
- - symbolic is a string representing a variable effect (e.g. 'oparg*2')
-
- At most one of these will be non-zero / non-empty.
- """
- if effect.size:
- assert not effect.cond, "Array effects cannot have a condition"
- return 0, effect.size
- elif effect.cond:
- if effect.cond in ("0", "1"):
- return int(effect.cond), ""
- return 0, f"{maybe_parenthesize(effect.cond)} ? 1 : 0"
- else:
- return 1, ""
-
-
-def maybe_parenthesize(sym: str) -> str:
- """Add parentheses around a string if it contains an operator.
-
- An exception is made for '*' which is common and harmless
- in the context where the symbolic size is used.
- """
- if re.match(r"^[\s\w*]+$", sym):
- return sym
- else:
- return f"({sym})"
-
-
-def list_effect_size(effects: list[StackEffect]) -> tuple[int, str]:
- numeric = 0
- symbolic: list[str] = []
- for effect in effects:
- diff, sym = effect_size(effect)
- numeric += diff
- if sym:
- symbolic.append(maybe_parenthesize(sym))
- return numeric, " + ".join(symbolic)
-
-
-def string_effect_size(arg: tuple[int, str]) -> str:
- numeric, symbolic = arg
- if numeric and symbolic:
- return f"{numeric} + {symbolic}"
- elif symbolic:
- return symbolic
- else:
- return str(numeric)
-
-
-class Formatter:
- """Wraps an output stream with the ability to indent etc."""
-
- stream: typing.TextIO
- prefix: str
- emit_line_directives: bool = False
- lineno: int # Next line number, 1-based
- filename: str # Slightly improved stream.filename
- nominal_lineno: int
- nominal_filename: str
-
- def __init__(
- self, stream: typing.TextIO, indent: int,
- emit_line_directives: bool = False, comment: str = "//",
- ) -> None:
- self.stream = stream
- self.prefix = " " * indent
- self.emit_line_directives = emit_line_directives
- self.comment = comment
- self.lineno = 1
- self.filename = prettify_filename(self.stream.name)
- self.nominal_lineno = 1
- self.nominal_filename = self.filename
-
- def write_raw(self, s: str) -> None:
- self.stream.write(s)
- newlines = s.count("\n")
- self.lineno += newlines
- self.nominal_lineno += newlines
-
- def emit(self, arg: str) -> None:
- if arg:
- self.write_raw(f"{self.prefix}{arg}\n")
- else:
- self.write_raw("\n")
-
- def set_lineno(self, lineno: int, filename: str) -> None:
- if self.emit_line_directives:
- if lineno != self.nominal_lineno or filename != self.nominal_filename:
- self.emit(f'#line {lineno} "{filename}"')
- self.nominal_lineno = lineno
- self.nominal_filename = filename
-
- def reset_lineno(self) -> None:
- if self.lineno != self.nominal_lineno or self.filename != self.nominal_filename:
- self.set_lineno(self.lineno + 1, self.filename)
-
- @contextlib.contextmanager
- def indent(self):
- self.prefix += " "
- yield
- self.prefix = self.prefix[:-4]
-
- @contextlib.contextmanager
- def block(self, head: str, tail: str = ""):
- if head:
- self.emit(head + " {")
- else:
- self.emit("{")
- with self.indent():
- yield
- self.emit("}" + tail)
-
- def stack_adjust(
- self,
- input_effects: list[StackEffect],
- output_effects: list[StackEffect],
- ):
- shrink, isym = list_effect_size(input_effects)
- grow, osym = list_effect_size(output_effects)
- diff = grow - shrink
- if isym and isym != osym:
- self.emit(f"STACK_SHRINK({isym});")
- if diff < 0:
- self.emit(f"STACK_SHRINK({-diff});")
- if diff > 0:
- self.emit(f"STACK_GROW({diff});")
- if osym and osym != isym:
- self.emit(f"STACK_GROW({osym});")
-
- def declare(self, dst: StackEffect, src: StackEffect | None):
- if dst.name == UNUSED or dst.cond == "0":
- return
- typ = f"{dst.type}" if dst.type else "PyObject *"
- if src:
- cast = self.cast(dst, src)
- init = f" = {cast}{src.name}"
- elif dst.cond:
- init = " = NULL"
- else:
- init = ""
- sepa = "" if typ.endswith("*") else " "
- self.emit(f"{typ}{sepa}{dst.name}{init};")
-
- def assign(self, dst: StackEffect, src: StackEffect):
- if src.name == UNUSED:
- return
- if src.size:
- # Don't write sized arrays -- it's up to the user code.
- return
- cast = self.cast(dst, src)
- if re.match(r"^REG\(oparg(\d+)\)$", dst.name):
- self.emit(f"Py_XSETREF({dst.name}, {cast}{src.name});")
- else:
- stmt = f"{dst.name} = {cast}{src.name};"
- if src.cond and src.cond != "1":
- if src.cond == "0":
- # It will not be executed
- return
- stmt = f"if ({src.cond}) {{ {stmt} }}"
- self.emit(stmt)
-
- def cast(self, dst: StackEffect, src: StackEffect) -> str:
- return f"({dst.type or 'PyObject *'})" if src.type != dst.type else ""
-
-@dataclasses.dataclass
-class InstructionFlags:
- """Construct and manipulate instruction flags"""
-
- HAS_ARG_FLAG: bool
- HAS_CONST_FLAG: bool
- HAS_NAME_FLAG: bool
- HAS_JUMP_FLAG: bool
- HAS_FREE_FLAG: bool
- HAS_LOCAL_FLAG: bool
-
- def __post_init__(self):
- self.bitmask = {
- name : (1 << i) for i, name in enumerate(self.names())
- }
-
- @staticmethod
- def fromInstruction(instr: "AnyInstruction"):
-
- has_free = (variable_used(instr, "PyCell_New") or
- variable_used(instr, "PyCell_GET") or
- variable_used(instr, "PyCell_SET"))
-
- return InstructionFlags(
- HAS_ARG_FLAG=variable_used(instr, "oparg"),
- HAS_CONST_FLAG=variable_used(instr, "FRAME_CO_CONSTS"),
- HAS_NAME_FLAG=variable_used(instr, "FRAME_CO_NAMES"),
- HAS_JUMP_FLAG=variable_used(instr, "JUMPBY"),
- HAS_FREE_FLAG=has_free,
- HAS_LOCAL_FLAG=(variable_used(instr, "GETLOCAL") or
- variable_used(instr, "SETLOCAL")) and
- not has_free,
- )
-
- @staticmethod
- def newEmpty():
- return InstructionFlags(False, False, False, False, False, False)
-
- def add(self, other: "InstructionFlags") -> None:
- for name, value in dataclasses.asdict(other).items():
- if value:
- setattr(self, name, value)
-
- def names(self, value=None):
- if value is None:
- return dataclasses.asdict(self).keys()
- return [n for n, v in dataclasses.asdict(self).items() if v == value]
-
- def bitmap(self) -> int:
- flags = 0
- for name in self.names():
- if getattr(self, name):
- flags |= self.bitmask[name]
- return flags
-
- @classmethod
- def emit_macros(cls, out: Formatter):
- flags = cls.newEmpty()
- for name, value in flags.bitmask.items():
- out.emit(f"#define {name} ({value})");
-
- for name, value in flags.bitmask.items():
- out.emit(
- f"#define OPCODE_{name[:-len('_FLAG')]}(OP) "
- f"(_PyOpcode_opcode_metadata[OP].flags & ({name}))")
-
-
-@dataclasses.dataclass
-class ActiveCacheEffect:
- """Wraps a CacheEffect that is actually used, in context."""
- effect: parser.CacheEffect
- offset: int
-
-
-FORBIDDEN_NAMES_IN_UOPS = (
- "resume_with_error",
- "kwnames",
- "next_instr",
- "oparg1", # Proxy for super-instructions like LOAD_FAST_LOAD_FAST
- "JUMPBY",
- "DISPATCH",
- "INSTRUMENTED_JUMP",
- "throwflag",
- "exception_unwind",
- "import_from",
- "import_name",
- "_PyObject_CallNoArgs", # Proxy for BEFORE_WITH
-)
-
-
-# Interpreter tiers
-TIER_ONE = 1 # Specializing adaptive interpreter (PEP 659)
-TIER_TWO = 2 # Experimental tracing interpreter
-Tiers: typing.TypeAlias = typing.Literal[1, 2]
-
-
-@dataclasses.dataclass
-class Instruction:
- """An instruction with additional data and code."""
-
- # Parts of the underlying instruction definition
- inst: parser.InstDef
- kind: typing.Literal["inst", "op"]
- name: str
- block: parser.Block
- block_text: list[str] # Block.text, less curlies, less PREDICT() calls
- block_line: int # First line of block in original code
-
- # Computed by constructor
- always_exits: bool
- cache_offset: int
- cache_effects: list[parser.CacheEffect]
- input_effects: list[StackEffect]
- output_effects: list[StackEffect]
- unmoved_names: frozenset[str]
- instr_fmt: str
- instr_flags: InstructionFlags
- active_caches: list[ActiveCacheEffect]
-
- # Set later
- family: parser.Family | None = None
- predicted: bool = False
-
- def __init__(self, inst: parser.InstDef):
- self.inst = inst
- self.kind = inst.kind
- self.name = inst.name
- self.block = inst.block
- self.block_text, self.check_eval_breaker, self.block_line = \
- extract_block_text(self.block)
- self.always_exits = always_exits(self.block_text)
- self.cache_effects = [
- effect for effect in inst.inputs if isinstance(effect, parser.CacheEffect)
- ]
- self.cache_offset = sum(c.size for c in self.cache_effects)
- self.input_effects = [
- effect for effect in inst.inputs if isinstance(effect, StackEffect)
- ]
- self.output_effects = inst.outputs # For consistency/completeness
- unmoved_names: set[str] = set()
- for ieffect, oeffect in zip(self.input_effects, self.output_effects):
- if ieffect.name == oeffect.name:
- unmoved_names.add(ieffect.name)
- else:
- break
- self.unmoved_names = frozenset(unmoved_names)
-
- self.instr_flags = InstructionFlags.fromInstruction(inst)
-
- self.active_caches = []
- offset = 0
- for effect in self.cache_effects:
- if effect.name != UNUSED:
- self.active_caches.append(ActiveCacheEffect(effect, offset))
- offset += effect.size
-
- if self.instr_flags.HAS_ARG_FLAG:
- fmt = "IB"
- else:
- fmt = "IX"
- if offset:
- fmt += "C" + "0"*(offset-1)
- self.instr_fmt = fmt
-
- def is_viable_uop(self) -> bool:
- """Whether this instruction is viable as a uop."""
- dprint: typing.Callable[..., None] = lambda *args, **kwargs: None
- # if self.name.startswith("CALL"):
- # dprint = print
-
- if self.name == "EXIT_TRACE":
- return True # This has 'return frame' but it's okay
- if self.always_exits:
- dprint(f"Skipping {self.name} because it always exits")
- return False
- if len(self.active_caches) > 1:
- # print(f"Skipping {self.name} because it has >1 cache entries")
- return False
- res = True
- for forbidden in FORBIDDEN_NAMES_IN_UOPS:
- # NOTE: To disallow unspecialized uops, use
- # if variable_used(self.inst, forbidden):
- if variable_used_unspecialized(self.inst, forbidden):
- dprint(f"Skipping {self.name} because it uses {forbidden}")
- res = False
- return res
-
- def write(self, out: Formatter, tier: Tiers = TIER_ONE) -> None:
- """Write one instruction, sans prologue and epilogue."""
- # Write a static assertion that a family's cache size is correct
- if family := self.family:
- if self.name == family.name:
- if cache_size := family.size:
- out.emit(
- f"static_assert({cache_size} == "
- f'{self.cache_offset}, "incorrect cache size");'
- )
-
- # Write input stack effect variable declarations and initializations
- ieffects = list(reversed(self.input_effects))
- for i, ieffect in enumerate(ieffects):
- isize = string_effect_size(
- list_effect_size([ieff for ieff in ieffects[: i + 1]])
- )
- if ieffect.size:
- src = StackEffect(f"(stack_pointer - {maybe_parenthesize(isize)})", "PyObject **")
- elif ieffect.cond:
- src = StackEffect(f"({ieffect.cond}) ? stack_pointer[-{maybe_parenthesize(isize)}] : NULL", "")
- else:
- src = StackEffect(f"stack_pointer[-{maybe_parenthesize(isize)}]", "")
- out.declare(ieffect, src)
-
- # Write output stack effect variable declarations
- isize = string_effect_size(list_effect_size(self.input_effects))
- input_names = {ieffect.name for ieffect in self.input_effects}
- for i, oeffect in enumerate(self.output_effects):
- if oeffect.name not in input_names:
- if oeffect.size:
- osize = string_effect_size(
- list_effect_size([oeff for oeff in self.output_effects[:i]])
- )
- offset = "stack_pointer"
- if isize != osize:
- if isize != "0":
- offset += f" - ({isize})"
- if osize != "0":
- offset += f" + {osize}"
- src = StackEffect(offset, "PyObject **")
- out.declare(oeffect, src)
- else:
- out.declare(oeffect, None)
-
- # out.emit(f"next_instr += OPSIZE({self.inst.name}) - 1;")
-
- self.write_body(out, 0, self.active_caches, tier=tier)
-
- # Skip the rest if the block always exits
- if self.always_exits:
- return
-
- # Write net stack growth/shrinkage
- out.stack_adjust(
- [ieff for ieff in self.input_effects],
- [oeff for oeff in self.output_effects],
- )
-
- # Write output stack effect assignments
- oeffects = list(reversed(self.output_effects))
- for i, oeffect in enumerate(oeffects):
- if oeffect.name in self.unmoved_names:
- continue
- osize = string_effect_size(
- list_effect_size([oeff for oeff in oeffects[: i + 1]])
- )
- if oeffect.size:
- dst = StackEffect(f"stack_pointer - {maybe_parenthesize(osize)}", "PyObject **")
- else:
- dst = StackEffect(f"stack_pointer[-{maybe_parenthesize(osize)}]", "")
- out.assign(dst, oeffect)
-
- # Write cache effect
- if tier == TIER_ONE and self.cache_offset:
- out.emit(f"next_instr += {self.cache_offset};")
-
- def write_body(
- self,
- out: Formatter,
- dedent: int,
- active_caches: list[ActiveCacheEffect],
- tier: Tiers = TIER_ONE,
- ) -> None:
- """Write the instruction body."""
- # Write cache effect variable declarations and initializations
- for active in active_caches:
- ceffect = active.effect
- bits = ceffect.size * BITS_PER_CODE_UNIT
- if bits == 64:
- # NOTE: We assume that 64-bit data in the cache
- # is always an object pointer.
- # If this becomes false, we need a way to specify
- # syntactically what type the cache data is.
- typ = "PyObject *"
- func = "read_obj"
- else:
- typ = f"uint{bits}_t "
- func = f"read_u{bits}"
- if tier == TIER_ONE:
- out.emit(
- f"{typ}{ceffect.name} = {func}(&next_instr[{active.offset}].cache);"
- )
- else:
- out.emit(f"{typ}{ceffect.name} = ({typ.strip()})operand;")
-
- # Write the body, substituting a goto for ERROR_IF() and other stuff
- assert dedent <= 0
- extra = " " * -dedent
- names_to_skip = self.unmoved_names | frozenset({UNUSED, "null"})
- offset = 0
- context = self.block.context
- assert context is not None and context.owner is not None
- filename = context.owner.filename
- for line in self.block_text:
- out.set_lineno(self.block_line + offset, filename)
- offset += 1
- if m := re.match(r"(\s*)ERROR_IF\((.+), (\w+)\);\s*(?://.*)?$", line):
- space, cond, label = m.groups()
- space = extra + space
- # ERROR_IF() must pop the inputs from the stack.
- # The code block is responsible for DECREF()ing them.
- # NOTE: If the label doesn't exist, just add it to ceval.c.
-
- # Don't pop common input/output effects at the bottom!
- # These aren't DECREF'ed so they can stay.
- ieffs = list(self.input_effects)
- oeffs = list(self.output_effects)
- while ieffs and oeffs and ieffs[0] == oeffs[0]:
- ieffs.pop(0)
- oeffs.pop(0)
- ninputs, symbolic = list_effect_size(ieffs)
- if ninputs:
- label = f"pop_{ninputs}_{label}"
- if symbolic:
- out.write_raw(
- f"{space}if ({cond}) {{ STACK_SHRINK({symbolic}); goto {label}; }}\n"
- )
- else:
- out.write_raw(f"{space}if ({cond}) goto {label};\n")
- elif m := re.match(r"(\s*)DECREF_INPUTS\(\);\s*(?://.*)?$", line):
- out.reset_lineno()
- space = extra + m.group(1)
- for ieff in self.input_effects:
- if ieff.name in names_to_skip:
- continue
- if ieff.size:
- out.write_raw(
- f"{space}for (int _i = {ieff.size}; --_i >= 0;) {{\n"
- )
- out.write_raw(f"{space} Py_DECREF({ieff.name}[_i]);\n")
- out.write_raw(f"{space}}}\n")
- else:
- decref = "XDECREF" if ieff.cond else "DECREF"
- out.write_raw(f"{space}Py_{decref}({ieff.name});\n")
- else:
- out.write_raw(extra + line)
- out.reset_lineno()
-
-
-InstructionOrCacheEffect = Instruction | parser.CacheEffect
-StackEffectMapping = list[tuple[StackEffect, StackEffect]]
-
-
-@dataclasses.dataclass
-class Component:
- instr: Instruction
- input_mapping: StackEffectMapping
- output_mapping: StackEffectMapping
- active_caches: list[ActiveCacheEffect]
-
- def write_body(self, out: Formatter) -> None:
- with out.block(""):
- input_names = {ieffect.name for _, ieffect in self.input_mapping}
- for var, ieffect in self.input_mapping:
- out.declare(ieffect, var)
- for _, oeffect in self.output_mapping:
- if oeffect.name not in input_names:
- out.declare(oeffect, None)
-
- self.instr.write_body(out, -4, self.active_caches)
-
- for var, oeffect in self.output_mapping:
- out.assign(var, oeffect)
-
-
-MacroParts = list[Component | parser.CacheEffect]
-
-
-@dataclasses.dataclass
-class MacroInstruction:
- """A macro instruction."""
-
- name: str
- stack: list[StackEffect]
- initial_sp: int
- final_sp: int
- instr_fmt: str
- instr_flags: InstructionFlags
- macro: parser.Macro
- parts: MacroParts
- cache_offset: int
- predicted: bool = False
-
-
-@dataclasses.dataclass
-class PseudoInstruction:
- """A pseudo instruction."""
-
- name: str
- targets: list[Instruction]
- instr_fmt: str
- instr_flags: InstructionFlags
-
-
-@dataclasses.dataclass
-class OverriddenInstructionPlaceHolder:
- name: str
-
-
-AnyInstruction = Instruction | MacroInstruction | PseudoInstruction
-INSTR_FMT_PREFIX = "INSTR_FMT_"
-
-
-class Analyzer:
- """Parse input, analyze it, and write to output."""
-
- input_filenames: list[str]
- output_filename: str
- metadata_filename: str
- pymetadata_filename: str
- executor_filename: str
- errors: int = 0
- emit_line_directives: bool = False
-
- def __init__(
- self,
- input_filenames: list[str],
- output_filename: str,
- metadata_filename: str,
- pymetadata_filename: str,
- executor_filename: str,
- ):
- """Read the input file."""
- self.input_filenames = input_filenames
- self.output_filename = output_filename
- self.metadata_filename = metadata_filename
- self.pymetadata_filename = pymetadata_filename
- self.executor_filename = executor_filename
-
- def error(self, msg: str, node: parser.Node) -> None:
- lineno = 0
- filename = "<unknown file>"
- if context := node.context:
- filename = context.owner.filename
- # Use line number of first non-comment in the node
- for token in context.owner.tokens[context.begin : context.end]:
- lineno = token.line
- if token.kind != "COMMENT":
- break
- print(f"{filename}:{lineno}: {msg}", file=sys.stderr)
- self.errors += 1
-
- everything: list[
- parser.InstDef | parser.Macro | parser.Pseudo | OverriddenInstructionPlaceHolder
- ]
- instrs: dict[str, Instruction] # Includes ops
- macros: dict[str, parser.Macro]
- macro_instrs: dict[str, MacroInstruction]
- families: dict[str, parser.Family]
- pseudos: dict[str, parser.Pseudo]
- pseudo_instrs: dict[str, PseudoInstruction]
-
- def parse(self) -> None:
- """Parse the source text.
-
- We only want the parser to see the stuff between the
- begin and end markers.
- """
-
- self.everything = []
- self.instrs = {}
- self.macros = {}
- self.families = {}
- self.pseudos = {}
-
- instrs_idx: dict[str, int] = dict()
-
- for filename in self.input_filenames:
- self.parse_file(filename, instrs_idx)
-
- files = " + ".join(self.input_filenames)
- print(
- f"Read {len(self.instrs)} instructions/ops, "
- f"{len(self.macros)} macros, {len(self.pseudos)} pseudos, "
- f"and {len(self.families)} families from {files}",
- file=sys.stderr,
- )
-
- def parse_file(self, filename: str, instrs_idx: dict[str, int]) -> None:
- with open(filename) as file:
- src = file.read()
-
-
- psr = parser.Parser(src, filename=prettify_filename(filename))
-
- # Skip until begin marker
- while tkn := psr.next(raw=True):
- if tkn.text == BEGIN_MARKER:
- break
- else:
- raise psr.make_syntax_error(
- f"Couldn't find {BEGIN_MARKER!r} in {psr.filename}"
- )
- start = psr.getpos()
-
- # Find end marker, then delete everything after it
- while tkn := psr.next(raw=True):
- if tkn.text == END_MARKER:
- break
- del psr.tokens[psr.getpos() - 1 :]
-
- # Parse from start
- psr.setpos(start)
- thing: parser.InstDef | parser.Macro | parser.Pseudo | parser.Family | None
- thing_first_token = psr.peek()
- while thing := psr.definition():
- if ws := [w for w in RESERVED_WORDS if variable_used(thing, w)]:
- self.error(f"'{ws[0]}' is a reserved word. {RESERVED_WORDS[ws[0]]}", thing)
-
- match thing:
- case parser.InstDef(name=name):
- if name in self.instrs:
- if not thing.override:
- raise psr.make_syntax_error(
- f"Duplicate definition of '{name}' @ {thing.context} "
- f"previous definition @ {self.instrs[name].inst.context}",
- thing_first_token,
- )
- self.everything[instrs_idx[name]] = OverriddenInstructionPlaceHolder(name=name)
- if name not in self.instrs and thing.override:
- raise psr.make_syntax_error(
- f"Definition of '{name}' @ {thing.context} is supposed to be "
- "an override but no previous definition exists.",
- thing_first_token,
- )
- self.instrs[name] = Instruction(thing)
- instrs_idx[name] = len(self.everything)
- self.everything.append(thing)
- case parser.Macro(name):
- self.macros[name] = thing
- self.everything.append(thing)
- case parser.Family(name):
- self.families[name] = thing
- case parser.Pseudo(name):
- self.pseudos[name] = thing
- self.everything.append(thing)
- case _:
- typing.assert_never(thing)
- if not psr.eof():
- raise psr.make_syntax_error(f"Extra stuff at the end of {filename}")
-
- def analyze(self) -> None:
- """Analyze the inputs.
-
- Raises SystemExit if there is an error.
- """
- self.analyze_macros_and_pseudos()
- self.find_predictions()
- self.map_families()
- self.check_families()
-
- def find_predictions(self) -> None:
- """Find the instructions that need PREDICTED() labels."""
- for instr in self.instrs.values():
- targets: set[str] = set()
- for line in instr.block_text:
- if m := re.match(RE_PREDICTED, line):
- targets.add(m.group(1))
- for target in targets:
- if target_instr := self.instrs.get(target):
- target_instr.predicted = True
- elif target_macro := self.macro_instrs.get(target):
- target_macro.predicted = True
- else:
- self.error(
- f"Unknown instruction {target!r} predicted in {instr.name!r}",
- instr.inst, # TODO: Use better location
- )
-
- def map_families(self) -> None:
- """Link instruction names back to their family, if they have one."""
- for family in self.families.values():
- for member in [family.name] + family.members:
- if member_instr := self.instrs.get(member):
- if member_instr.family not in (family, None):
- self.error(
- f"Instruction {member} is a member of multiple families "
- f"({member_instr.family.name}, {family.name}).",
- family,
- )
- else:
- member_instr.family = family
- elif not self.macro_instrs.get(member):
- self.error(
- f"Unknown instruction {member!r} referenced in family {family.name!r}",
- family,
- )
-
- def check_families(self) -> None:
- """Check each family:
-
- - Must have at least 2 members (including head)
- - Head and all members must be known instructions
- - Head and all members must have the same cache, input and output effects
- """
- for family in self.families.values():
- if family.name not in self.macro_instrs and family.name not in self.instrs:
- self.error(
- f"Family {family.name!r} has unknown instruction {family.name!r}",
- family,
- )
- members = [
- member
- for member in family.members
- if member in self.instrs or member in self.macro_instrs
- ]
- if members != family.members:
- unknown = set(family.members) - set(members)
- self.error(
- f"Family {family.name!r} has unknown members: {unknown}", family
- )
- expected_effects = self.effect_counts(family.name)
- for member in members:
- member_effects = self.effect_counts(member)
- if member_effects != expected_effects:
- self.error(
- f"Family {family.name!r} has inconsistent "
- f"(cache, input, output) effects:\n"
- f" {family.name} = {expected_effects}; "
- f"{member} = {member_effects}",
- family,
- )
-
- def effect_counts(self, name: str) -> tuple[int, int, int]:
- if instr := self.instrs.get(name):
- cache = instr.cache_offset
- input = len(instr.input_effects)
- output = len(instr.output_effects)
- elif mac := self.macro_instrs.get(name):
- cache = mac.cache_offset
- input, output = 0, 0
- for part in mac.parts:
- if isinstance(part, Component):
- # A component may pop what the previous component pushed,
- # so we offset the input/output counts by that.
- delta_i = len(part.instr.input_effects)
- delta_o = len(part.instr.output_effects)
- offset = min(delta_i, output)
- input += delta_i - offset
- output += delta_o - offset
- else:
- assert False, f"Unknown instruction {name!r}"
- return cache, input, output
-
- def analyze_macros_and_pseudos(self) -> None:
- """Analyze each macro and pseudo instruction."""
- self.macro_instrs = {}
- self.pseudo_instrs = {}
- for name, macro in self.macros.items():
- self.macro_instrs[name] = self.analyze_macro(macro)
- for name, pseudo in self.pseudos.items():
- self.pseudo_instrs[name] = self.analyze_pseudo(pseudo)
-
- def analyze_macro(self, macro: parser.Macro) -> MacroInstruction:
- components = self.check_macro_components(macro)
- stack, initial_sp = self.stack_analysis(components)
- sp = initial_sp
- parts: MacroParts = []
- flags = InstructionFlags.newEmpty()
- offset = 0
- for component in components:
- match component:
- case parser.CacheEffect() as ceffect:
- parts.append(ceffect)
- offset += ceffect.size
- case Instruction() as instr:
- part, sp, offset = self.analyze_instruction(instr, stack, sp, offset)
- parts.append(part)
- flags.add(instr.instr_flags)
- case _:
- typing.assert_never(component)
- final_sp = sp
- format = "IB"
- if offset:
- format += "C" + "0"*(offset-1)
- return MacroInstruction(
- macro.name, stack, initial_sp, final_sp, format, flags, macro, parts, offset
- )
-
- def analyze_pseudo(self, pseudo: parser.Pseudo) -> PseudoInstruction:
- targets = [self.instrs[target] for target in pseudo.targets]
- assert targets
- # Make sure the targets have the same fmt
- fmts = list(set([t.instr_fmt for t in targets]))
- assert(len(fmts) == 1)
- assert(len(list(set([t.instr_flags.bitmap() for t in targets]))) == 1)
- return PseudoInstruction(pseudo.name, targets, fmts[0], targets[0].instr_flags)
-
- def analyze_instruction(
- self, instr: Instruction, stack: list[StackEffect], sp: int, offset: int
- ) -> tuple[Component, int, int]:
- input_mapping: StackEffectMapping = []
- for ieffect in reversed(instr.input_effects):
- sp -= 1
- input_mapping.append((stack[sp], ieffect))
- output_mapping: StackEffectMapping = []
- for oeffect in instr.output_effects:
- output_mapping.append((stack[sp], oeffect))
- sp += 1
- active_effects: list[ActiveCacheEffect] = []
- for ceffect in instr.cache_effects:
- if ceffect.name != UNUSED:
- active_effects.append(ActiveCacheEffect(ceffect, offset))
- offset += ceffect.size
- return Component(instr, input_mapping, output_mapping, active_effects), sp, offset
-
- def check_macro_components(
- self, macro: parser.Macro
- ) -> list[InstructionOrCacheEffect]:
- components: list[InstructionOrCacheEffect] = []
- for uop in macro.uops:
- match uop:
- case parser.OpName(name):
- if name not in self.instrs:
- self.error(f"Unknown instruction {name!r}", macro)
- components.append(self.instrs[name])
- case parser.CacheEffect():
- components.append(uop)
- case _:
- typing.assert_never(uop)
- return components
-
- def stack_analysis(
- self, components: typing.Iterable[InstructionOrCacheEffect]
- ) -> tuple[list[StackEffect], int]:
- """Analyze a macro.
-
- Ignore cache effects.
-
- Return the list of variables (as StackEffects) and the initial stack pointer.
- """
- lowest = current = highest = 0
- conditions: dict[int, str] = {} # Indexed by 'current'.
- last_instr: Instruction | None = None
- for thing in components:
- if isinstance(thing, Instruction):
- last_instr = thing
- for thing in components:
- match thing:
- case Instruction() as instr:
- if any(
- eff.size for eff in instr.input_effects + instr.output_effects
- ):
- # TODO: Eventually this will be needed, at least for macros.
- self.error(
- f"Instruction {instr.name!r} has variable-sized stack effect, "
- "which are not supported in macro instructions",
- instr.inst, # TODO: Pass name+location of macro
- )
- if any(eff.cond for eff in instr.input_effects):
- self.error(
- f"Instruction {instr.name!r} has conditional input stack effect, "
- "which are not supported in macro instructions",
- instr.inst, # TODO: Pass name+location of macro
- )
- if any(eff.cond for eff in instr.output_effects) and instr is not last_instr:
- self.error(
- f"Instruction {instr.name!r} has conditional output stack effect, "
- "but is not the last instruction in a macro",
- instr.inst, # TODO: Pass name+location of macro
- )
- current -= len(instr.input_effects)
- lowest = min(lowest, current)
- for eff in instr.output_effects:
- if eff.cond:
- conditions[current] = eff.cond
- current += 1
- highest = max(highest, current)
- case parser.CacheEffect():
- pass
- case _:
- typing.assert_never(thing)
- # At this point, 'current' is the net stack effect,
- # and 'lowest' and 'highest' are the extremes.
- # Note that 'lowest' may be negative.
- stack = [
- StackEffect(f"_tmp_{i}", "", conditions.get(highest - i, ""))
- for i in reversed(range(1, highest - lowest + 1))
- ]
- return stack, -lowest
-
+class Generator(Analyzer):
def get_stack_effect_info(
- self, thing: parser.InstDef | parser.Macro | parser.Pseudo
+ self, thing: parsing.InstDef | parsing.Macro | parsing.Pseudo
) -> tuple[AnyInstruction | None, str | None, str | None]:
def effect_str(effects: list[StackEffect]) -> str:
n_effect, sym_effect = list_effect_size(effects)
@@ -1053,8 +104,10 @@ class Analyzer:
return str(n_effect)
instr: AnyInstruction | None
+ popped: str | None
+ pushed: str | None
match thing:
- case parser.InstDef():
+ case parsing.InstDef():
if thing.kind != "op":
instr = self.instrs[thing.name]
popped = effect_str(instr.input_effects)
@@ -1063,7 +116,7 @@ class Analyzer:
instr = None
popped = ""
pushed = ""
- case parser.Macro():
+ case parsing.Macro():
instr = self.macro_instrs[thing.name]
parts = [comp for comp in instr.parts if isinstance(comp, Component)]
# Note: stack_analysis() already verifies that macro components
@@ -1084,7 +137,11 @@ class Analyzer:
if effect.cond in ("0", "1"):
pushed_symbolic.append(effect.cond)
else:
- pushed_symbolic.append(maybe_parenthesize(f"{maybe_parenthesize(effect.cond)} ? 1 : 0"))
+ pushed_symbolic.append(
+ maybe_parenthesize(
+ f"{maybe_parenthesize(effect.cond)} ? 1 : 0"
+ )
+ )
sp += 1
high = max(sp, high)
if high != max(0, sp):
@@ -1096,7 +153,7 @@ class Analyzer:
popped = str(-low)
pushed_symbolic.append(str(sp - low - len(pushed_symbolic)))
pushed = " + ".join(pushed_symbolic)
- case parser.Pseudo():
+ case parsing.Pseudo():
instr = self.pseudo_instrs[thing.name]
popped = pushed = None
# Calculate stack effect, and check that it's the the same
@@ -1135,10 +192,14 @@ class Analyzer:
) -> None:
self.out.emit("")
self.out.emit("#ifndef NEED_OPCODE_METADATA")
- self.out.emit(f"extern int _PyOpcode_num_{direction}(int opcode, int oparg, bool jump);")
+ self.out.emit(
+ f"extern int _PyOpcode_num_{direction}(int opcode, int oparg, bool jump);"
+ )
self.out.emit("#else")
self.out.emit("int")
- self.out.emit(f"_PyOpcode_num_{direction}(int opcode, int oparg, bool jump) {{")
+ self.out.emit(
+ f"_PyOpcode_num_{direction}(int opcode, int oparg, bool jump) {{"
+ )
self.out.emit(" switch(opcode) {")
for instr, effect in data:
self.out.emit(f" case {instr.name}:")
@@ -1159,7 +220,7 @@ class Analyzer:
try:
filename = os.path.relpath(filename, ROOT)
except ValueError:
- # May happen on Windows if root and temp on different volumes
+ # May happen on Windows if root and temp on different volumes
pass
filenames.append(filename)
paths = f"\n{self.out.comment} ".join(filenames)
@@ -1170,20 +231,21 @@ class Analyzer:
self.out.write_raw(self.from_source_files())
self.out.write_raw(f"{self.out.comment} Do not edit!\n")
- def write_metadata(self) -> None:
+ def write_metadata(self, metadata_filename: str, pymetadata_filename: str) -> None:
"""Write instruction metadata to output file."""
# Compute the set of all instruction formats.
all_formats: set[str] = set()
for thing in self.everything:
+ format: str | None
match thing:
case OverriddenInstructionPlaceHolder():
continue
- case parser.InstDef():
+ case parsing.InstDef():
format = self.instrs[thing.name].instr_fmt
- case parser.Macro():
+ case parsing.Macro():
format = self.macro_instrs[thing.name].instr_fmt
- case parser.Pseudo():
+ case parsing.Pseudo():
format = None
for target in self.pseudos[thing.name].targets:
target_instr = self.instrs.get(target)
@@ -1192,13 +254,14 @@ class Analyzer:
format = target_instr.instr_fmt
else:
assert format == target_instr.instr_fmt
+ assert format is not None
case _:
typing.assert_never(thing)
all_formats.add(format)
# Turn it into a list of enum definitions.
format_enums = [INSTR_FMT_PREFIX + format for format in sorted(all_formats)]
- with open(self.metadata_filename, "w") as f:
+ with open(metadata_filename, "w") as f:
# Create formatter
self.out = Formatter(f, 0)
@@ -1220,7 +283,8 @@ class Analyzer:
self.out.emit(
"#define IS_VALID_OPCODE(OP) \\\n"
" (((OP) >= 0) && ((OP) < OPCODE_METADATA_SIZE) && \\\n"
- " (_PyOpcode_opcode_metadata[(OP)].valid_entry))")
+ " (_PyOpcode_opcode_metadata[(OP)].valid_entry))"
+ )
self.out.emit("")
InstructionFlags.emit_macros(self.out)
@@ -1234,17 +298,23 @@ class Analyzer:
with self.out.block("struct opcode_macro_expansion", ";"):
self.out.emit("int nuops;")
- self.out.emit("struct { int16_t uop; int8_t size; int8_t offset; } uops[8];")
+ self.out.emit(
+ "struct { int16_t uop; int8_t size; int8_t offset; } uops[8];"
+ )
self.out.emit("")
for key, value in OPARG_SIZES.items():
self.out.emit(f"#define {key} {value}")
self.out.emit("")
- self.out.emit("#define OPCODE_METADATA_FMT(OP) "
- "(_PyOpcode_opcode_metadata[(OP)].instr_format)")
+ self.out.emit(
+ "#define OPCODE_METADATA_FMT(OP) "
+ "(_PyOpcode_opcode_metadata[(OP)].instr_format)"
+ )
self.out.emit("#define SAME_OPCODE_METADATA(OP1, OP2) \\")
- self.out.emit(" (OPCODE_METADATA_FMT(OP1) == OPCODE_METADATA_FMT(OP2))")
+ self.out.emit(
+ " (OPCODE_METADATA_FMT(OP1) == OPCODE_METADATA_FMT(OP2))"
+ )
self.out.emit("")
# Write metadata array declaration
@@ -1253,27 +323,35 @@ class Analyzer:
self.out.emit("#define OPCODE_MACRO_EXPANSION_SIZE 256")
self.out.emit("")
self.out.emit("#ifndef NEED_OPCODE_METADATA")
- self.out.emit("extern const struct opcode_metadata "
- "_PyOpcode_opcode_metadata[OPCODE_METADATA_SIZE];")
- self.out.emit("extern const struct opcode_macro_expansion "
- "_PyOpcode_macro_expansion[OPCODE_MACRO_EXPANSION_SIZE];")
- self.out.emit("extern const char * const _PyOpcode_uop_name[OPCODE_UOP_NAME_SIZE];")
+ self.out.emit(
+ "extern const struct opcode_metadata "
+ "_PyOpcode_opcode_metadata[OPCODE_METADATA_SIZE];"
+ )
+ self.out.emit(
+ "extern const struct opcode_macro_expansion "
+ "_PyOpcode_macro_expansion[OPCODE_MACRO_EXPANSION_SIZE];"
+ )
+ self.out.emit(
+ "extern const char * const _PyOpcode_uop_name[OPCODE_UOP_NAME_SIZE];"
+ )
self.out.emit("#else // if NEED_OPCODE_METADATA")
- self.out.emit("const struct opcode_metadata "
- "_PyOpcode_opcode_metadata[OPCODE_METADATA_SIZE] = {")
+ self.out.emit(
+ "const struct opcode_metadata "
+ "_PyOpcode_opcode_metadata[OPCODE_METADATA_SIZE] = {"
+ )
# Write metadata for each instruction
for thing in self.everything:
match thing:
case OverriddenInstructionPlaceHolder():
continue
- case parser.InstDef():
+ case parsing.InstDef():
if thing.kind != "op":
self.write_metadata_for_inst(self.instrs[thing.name])
- case parser.Macro():
+ case parsing.Macro():
self.write_metadata_for_macro(self.macro_instrs[thing.name])
- case parser.Pseudo():
+ case parsing.Pseudo():
self.write_metadata_for_pseudo(self.pseudo_instrs[thing.name])
case _:
typing.assert_never(thing)
@@ -1291,32 +369,38 @@ class Analyzer:
match thing:
case OverriddenInstructionPlaceHolder():
pass
- case parser.InstDef(name=name):
+ case parsing.InstDef(name=name):
instr = self.instrs[name]
# Since an 'op' is not a bytecode, it has no expansion; but 'inst' is
if instr.kind == "inst" and instr.is_viable_uop():
# Construct a dummy Component -- input/output mappings are not used
part = Component(instr, [], [], instr.active_caches)
self.write_macro_expansions(instr.name, [part])
- elif instr.kind == "inst" and variable_used(instr.inst, "oparg1"):
- assert variable_used(instr.inst, "oparg2"), "Half super-instr?"
+ elif instr.kind == "inst" and variable_used(
+ instr.inst, "oparg1"
+ ):
+ assert variable_used(
+ instr.inst, "oparg2"
+ ), "Half super-instr?"
self.write_super_expansions(instr.name)
- case parser.Macro():
+ case parsing.Macro():
mac = self.macro_instrs[thing.name]
self.write_macro_expansions(mac.name, mac.parts)
- case parser.Pseudo():
+ case parsing.Pseudo():
pass
case _:
typing.assert_never(thing)
- with self.out.block("const char * const _PyOpcode_uop_name[OPCODE_UOP_NAME_SIZE] =", ";"):
- self.write_uop_items(lambda name, counter: f"[{name}] = \"{name}\",")
+ with self.out.block(
+ "const char * const _PyOpcode_uop_name[OPCODE_UOP_NAME_SIZE] =", ";"
+ ):
+ self.write_uop_items(lambda name, counter: f'[{name}] = "{name}",')
self.out.emit("#endif // NEED_OPCODE_METADATA")
- with open(self.pymetadata_filename, "w") as f:
+ with open(pymetadata_filename, "w") as f:
# Create formatter
- self.out = Formatter(f, 0, comment = "#")
+ self.out = Formatter(f, 0, comment="#")
self.write_provenance_header()
@@ -1324,10 +408,10 @@ class Analyzer:
self.out.emit("_specializations = {")
for name, family in self.families.items():
with self.out.indent():
- self.out.emit(f"\"{family.name}\": [")
+ self.out.emit(f'"{family.name}": [')
with self.out.indent():
for m in family.members:
- self.out.emit(f"\"{m}\",")
+ self.out.emit(f'"{m}",')
self.out.emit(f"],")
self.out.emit("}")
@@ -1335,15 +419,17 @@ class Analyzer:
self.out.emit("")
self.out.emit("# An irregular case:")
self.out.emit(
- "_specializations[\"BINARY_OP\"].append("
- "\"BINARY_OP_INPLACE_ADD_UNICODE\")")
+ '_specializations["BINARY_OP"].append('
+ '"BINARY_OP_INPLACE_ADD_UNICODE")'
+ )
# Make list of specialized instructions
self.out.emit("")
self.out.emit(
"_specialized_instructions = ["
- "opcode for family in _specializations.values() for opcode in family"
- "]")
+ "opcode for family in _specializations.values() for opcode in family"
+ "]"
+ )
def write_pseudo_instrs(self) -> None:
"""Write the IS_PSEUDO_INSTR macro"""
@@ -1432,16 +518,18 @@ class Analyzer:
]
self.write_expansions(name, expansions)
- def write_expansions(self, name: str, expansions: list[tuple[str, int, int]]) -> None:
- pieces = [f"{{ {name}, {size}, {offset} }}" for name, size, offset in expansions]
+ def write_expansions(
+ self, name: str, expansions: list[tuple[str, int, int]]
+ ) -> None:
+ pieces = [
+ f"{{ {name}, {size}, {offset} }}" for name, size, offset in expansions
+ ]
self.out.emit(
f"[{name}] = "
f"{{ .nuops = {len(pieces)}, .uops = {{ {', '.join(pieces)} }} }},"
)
- def emit_metadata_entry(
- self, name: str, fmt: str, flags: InstructionFlags
- ) -> None:
+ def emit_metadata_entry(self, name: str, fmt: str, flags: InstructionFlags) -> None:
flag_names = flags.names(value=True)
if not flag_names:
flag_names.append("0")
@@ -1462,11 +550,13 @@ class Analyzer:
"""Write metadata for a macro-instruction."""
self.emit_metadata_entry(ps.name, ps.instr_fmt, ps.instr_flags)
- def write_instructions(self) -> None:
+ def write_instructions(
+ self, output_filename: str, emit_line_directives: bool
+ ) -> None:
"""Write instructions to output file."""
- with open(self.output_filename, "w") as f:
+ with open(output_filename, "w") as f:
# Create formatter
- self.out = Formatter(f, 8, self.emit_line_directives)
+ self.out = Formatter(f, 8, emit_line_directives)
self.write_provenance_header()
@@ -1478,35 +568,37 @@ class Analyzer:
match thing:
case OverriddenInstructionPlaceHolder():
self.write_overridden_instr_place_holder(thing)
- case parser.InstDef():
+ case parsing.InstDef():
if thing.kind != "op":
n_instrs += 1
self.write_instr(self.instrs[thing.name])
- case parser.Macro():
+ case parsing.Macro():
n_macros += 1
self.write_macro(self.macro_instrs[thing.name])
- case parser.Pseudo():
+ case parsing.Pseudo():
n_pseudos += 1
case _:
typing.assert_never(thing)
print(
f"Wrote {n_instrs} instructions, {n_macros} macros, "
- f"and {n_pseudos} pseudos to {self.output_filename}",
+ f"and {n_pseudos} pseudos to {output_filename}",
file=sys.stderr,
)
- def write_executor_instructions(self) -> None:
+ def write_executor_instructions(
+ self, executor_filename: str, emit_line_directives: bool
+ ) -> None:
"""Generate cases for the Tier 2 interpreter."""
- with open(self.executor_filename, "w") as f:
- self.out = Formatter(f, 8, self.emit_line_directives)
+ with open(executor_filename, "w") as f:
+ self.out = Formatter(f, 8, emit_line_directives)
self.write_provenance_header()
for thing in self.everything:
match thing:
case OverriddenInstructionPlaceHolder():
# TODO: Is this helpful?
self.write_overridden_instr_place_holder(thing)
- case parser.InstDef():
+ case parsing.InstDef():
instr = self.instrs[thing.name]
if instr.is_viable_uop():
self.out.emit("")
@@ -1517,22 +609,24 @@ class Analyzer:
self.out.emit("break;")
# elif instr.kind != "op":
# print(f"NOTE: {thing.name} is not a viable uop")
- case parser.Macro():
+ case parsing.Macro():
pass
- case parser.Pseudo():
+ case parsing.Pseudo():
pass
case _:
typing.assert_never(thing)
print(
- f"Wrote some stuff to {self.executor_filename}",
+ f"Wrote some stuff to {executor_filename}",
file=sys.stderr,
)
- def write_overridden_instr_place_holder(self,
- place_holder: OverriddenInstructionPlaceHolder) -> None:
+ def write_overridden_instr_place_holder(
+ self, place_holder: OverriddenInstructionPlaceHolder
+ ) -> None:
self.out.emit("")
self.out.emit(
- f"{self.out.comment} TARGET({place_holder.name}) overridden by later definition")
+ f"{self.out.comment} TARGET({place_holder.name}) overridden by later definition"
+ )
def write_instr(self, instr: Instruction) -> None:
name = instr.name
@@ -1555,7 +649,7 @@ class Analyzer:
cache_adjust = 0
for part in mac.parts:
match part:
- case parser.CacheEffect(size=size):
+ case parsing.CacheEffect(size=size):
cache_adjust += size
case Component() as comp:
last_instr = comp.instr
@@ -1603,7 +697,7 @@ class Analyzer:
yield
- self.out.stack_adjust(ieffects[:mac.initial_sp], mac.stack[:mac.final_sp])
+ self.out.stack_adjust(ieffects[: mac.initial_sp], mac.stack[: mac.final_sp])
for i, var in enumerate(reversed(mac.stack[: mac.final_sp]), 1):
dst = StackEffect(f"stack_pointer[-{i}]", "")
@@ -1612,99 +706,6 @@ class Analyzer:
self.out.emit(f"DISPATCH();")
-def prettify_filename(filename: str) -> str:
- # Make filename more user-friendly and less platform-specific,
- # it is only used for error reporting at this point.
- filename = filename.replace("\\", "/")
- if filename.startswith("./"):
- filename = filename[2:]
- if filename.endswith(".new"):
- filename = filename[:-4]
- return filename
-
-
-def extract_block_text(block: parser.Block) -> tuple[list[str], bool, int]:
- # Get lines of text with proper dedent
- blocklines = block.text.splitlines(True)
- first_token: lx.Token = block.tokens[0] # IndexError means the context is broken
- block_line = first_token.begin[0]
-
- # Remove blank lines from both ends
- while blocklines and not blocklines[0].strip():
- blocklines.pop(0)
- block_line += 1
- while blocklines and not blocklines[-1].strip():
- blocklines.pop()
-
- # Remove leading and trailing braces
- assert blocklines and blocklines[0].strip() == "{"
- assert blocklines and blocklines[-1].strip() == "}"
- blocklines.pop()
- blocklines.pop(0)
- block_line += 1
-
- # Remove trailing blank lines
- while blocklines and not blocklines[-1].strip():
- blocklines.pop()
-
- # Separate CHECK_EVAL_BREAKER() macro from end
- check_eval_breaker = \
- blocklines != [] and blocklines[-1].strip() == "CHECK_EVAL_BREAKER();"
- if check_eval_breaker:
- del blocklines[-1]
-
- return blocklines, check_eval_breaker, block_line
-
-
-def always_exits(lines: list[str]) -> bool:
- """Determine whether a block always ends in a return/goto/etc."""
- if not lines:
- return False
- line = lines[-1].rstrip()
- # Indent must match exactly (TODO: Do something better)
- if line[:12] != " " * 12:
- return False
- line = line[12:]
- return line.startswith(
- (
- "goto ",
- "return ",
- "DISPATCH",
- "GO_TO_",
- "Py_UNREACHABLE()",
- "ERROR_IF(true, ",
- )
- )
-
-
-def variable_used(node: parser.Node, name: str) -> bool:
- """Determine whether a variable with a given name is used in a node."""
- return any(
- token.kind == "IDENTIFIER" and token.text == name for token in node.tokens
- )
-
-
-def variable_used_unspecialized(node: parser.Node, name: str) -> bool:
- """Like variable_used(), but skips #if ENABLE_SPECIALIZATION blocks."""
- tokens: list[lx.Token] = []
- skipping = False
- for i, token in enumerate(node.tokens):
- if token.kind == "MACRO":
- text = "".join(token.text.split())
- # TODO: Handle nested #if
- if text == "#if":
- if (
- i + 1 < len(node.tokens)
- and node.tokens[i + 1].text == "ENABLE_SPECIALIZATION"
- ):
- skipping = True
- elif text in ("#else", "#endif"):
- skipping = False
- if not skipping:
- tokens.append(token)
- return any(token.kind == "IDENTIFIER" and token.text == name for token in tokens)
-
-
def main():
"""Parse command line, parse input, analyze, write output."""
args = arg_parser.parse_args() # Prints message and sys.exit(2) on error
@@ -1712,17 +713,17 @@ def main():
args.input.append(DEFAULT_INPUT)
# Raises OSError if input unreadable
- a = Analyzer(args.input, args.output, args.metadata, args.pymetadata, args.executor_cases)
+ a = Generator(args.input)
- if args.emit_line_directives:
- a.emit_line_directives = True
a.parse() # Raises SyntaxError on failure
a.analyze() # Prints messages and sets a.errors on failure
if a.errors:
sys.exit(f"Found {a.errors} errors")
- a.write_instructions() # Raises OSError if output can't be written
- a.write_metadata()
- a.write_executor_instructions()
+
+ # These raise OSError if output can't be written
+ a.write_instructions(args.output, args.emit_line_directives)
+ a.write_metadata(args.metadata, args.pymetadata)
+ a.write_executor_instructions(args.executor_cases, args.emit_line_directives)
if __name__ == "__main__":