diff options
author | Guido van Rossum <guido@python.org> | 2023-07-24 16:38:23 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-07-24 16:38:23 (GMT) |
commit | 032f4809094bf03d92c54e46b305c499ef7e3165 (patch) | |
tree | c48dfaa67e6201d3ee3c65682081701f0479ec99 /Tools/cases_generator/generate_cases.py | |
parent | ff5f94b72c8aad8e45c397c263dbe7f19221735f (diff) | |
download | cpython-032f4809094bf03d92c54e46b305c499ef7e3165.zip cpython-032f4809094bf03d92c54e46b305c499ef7e3165.tar.gz cpython-032f4809094bf03d92c54e46b305c499ef7e3165.tar.bz2 |
Thoroughly refactor the cases generator (#107151)
This mostly extracts a whole bunch of stuff out of generate_cases.py into separate files, but there are a few other things going on here.
- analysis.py: `Analyzer` etc.
- instructions.py: `Instruction` etc.
- flags.py: `InstructionFlags`, `variable_used`, `variable_used_unspecialized`
- formatting.py: `Formatter` etc.
- Rename parser.py to parsing.py, to avoid conflict with stdlib parser.py
- Blackify most things
- Fix most mypy errors
- Remove output filenames from Generator state, add them to `write_instructions()` etc.
- Fix unit tests
Diffstat (limited to 'Tools/cases_generator/generate_cases.py')
-rw-r--r-- | Tools/cases_generator/generate_cases.py | 1287 |
1 files changed, 144 insertions, 1143 deletions
diff --git a/Tools/cases_generator/generate_cases.py b/Tools/cases_generator/generate_cases.py index 3a679b2..967e1e2 100644 --- a/Tools/cases_generator/generate_cases.py +++ b/Tools/cases_generator/generate_cases.py @@ -1,21 +1,31 @@ """Generate the main interpreter switch. - Reads the instruction definitions from bytecodes.c. Writes the cases to generated_cases.c.h, which is #included in ceval.c. """ import argparse import contextlib -import dataclasses import os import posixpath -import re import sys import typing -import lexer as lx -import parser -from parser import StackEffect +from analysis import Analyzer +from formatting import Formatter, list_effect_size, maybe_parenthesize +from flags import InstructionFlags, variable_used +from instructions import ( + AnyInstruction, + Component, + Instruction, + MacroInstruction, + MacroParts, + PseudoInstruction, + StackEffect, + OverriddenInstructionPlaceHolder, + TIER_TWO, +) +import parsing +from parsing import StackEffect HERE = os.path.dirname(__file__) @@ -33,13 +43,6 @@ DEFAULT_PYMETADATA_OUTPUT = os.path.relpath( DEFAULT_EXECUTOR_OUTPUT = os.path.relpath( os.path.join(ROOT, "Python/executor_cases.c.h") ) -BEGIN_MARKER = "// BEGIN BYTECODES //" -END_MARKER = "// END BYTECODES //" -RE_PREDICTED = ( - r"^\s*(?:GO_TO_INSTRUCTION\(|DEOPT_IF\(.*?,\s*)(\w+)\);\s*(?://.*)?$" -) -UNUSED = "unused" -BITS_PER_CODE_UNIT = 16 # Constants used instead of size for macro expansions. # Note: 1, 2, 4 must match actual cache entry sizes. @@ -52,10 +55,7 @@ OPARG_SIZES = { "OPARG_BOTTOM": 6, } -RESERVED_WORDS = { - "co_consts" : "Use FRAME_CO_CONSTS.", - "co_names": "Use FRAME_CO_NAMES.", -} +INSTR_FMT_PREFIX = "INSTR_FMT_" arg_parser = argparse.ArgumentParser( description="Generate the code for the interpreter switch.", @@ -65,10 +65,18 @@ arg_parser.add_argument( "-o", "--output", type=str, help="Generated code", default=DEFAULT_OUTPUT ) arg_parser.add_argument( - "-m", "--metadata", type=str, help="Generated C metadata", default=DEFAULT_METADATA_OUTPUT + "-m", + "--metadata", + type=str, + help="Generated C metadata", + default=DEFAULT_METADATA_OUTPUT, ) arg_parser.add_argument( - "-p", "--pymetadata", type=str, help="Generated Python metadata", default=DEFAULT_PYMETADATA_OUTPUT + "-p", + "--pymetadata", + type=str, + help="Generated Python metadata", + default=DEFAULT_PYMETADATA_OUTPUT, ) arg_parser.add_argument( "-l", "--emit-line-directives", help="Emit #line directives", action="store_true" @@ -85,966 +93,9 @@ arg_parser.add_argument( ) -def effect_size(effect: StackEffect) -> tuple[int, str]: - """Return the 'size' impact of a stack effect. - - Returns a tuple (numeric, symbolic) where: - - - numeric is an int giving the statically analyzable size of the effect - - symbolic is a string representing a variable effect (e.g. 'oparg*2') - - At most one of these will be non-zero / non-empty. - """ - if effect.size: - assert not effect.cond, "Array effects cannot have a condition" - return 0, effect.size - elif effect.cond: - if effect.cond in ("0", "1"): - return int(effect.cond), "" - return 0, f"{maybe_parenthesize(effect.cond)} ? 1 : 0" - else: - return 1, "" - - -def maybe_parenthesize(sym: str) -> str: - """Add parentheses around a string if it contains an operator. - - An exception is made for '*' which is common and harmless - in the context where the symbolic size is used. - """ - if re.match(r"^[\s\w*]+$", sym): - return sym - else: - return f"({sym})" - - -def list_effect_size(effects: list[StackEffect]) -> tuple[int, str]: - numeric = 0 - symbolic: list[str] = [] - for effect in effects: - diff, sym = effect_size(effect) - numeric += diff - if sym: - symbolic.append(maybe_parenthesize(sym)) - return numeric, " + ".join(symbolic) - - -def string_effect_size(arg: tuple[int, str]) -> str: - numeric, symbolic = arg - if numeric and symbolic: - return f"{numeric} + {symbolic}" - elif symbolic: - return symbolic - else: - return str(numeric) - - -class Formatter: - """Wraps an output stream with the ability to indent etc.""" - - stream: typing.TextIO - prefix: str - emit_line_directives: bool = False - lineno: int # Next line number, 1-based - filename: str # Slightly improved stream.filename - nominal_lineno: int - nominal_filename: str - - def __init__( - self, stream: typing.TextIO, indent: int, - emit_line_directives: bool = False, comment: str = "//", - ) -> None: - self.stream = stream - self.prefix = " " * indent - self.emit_line_directives = emit_line_directives - self.comment = comment - self.lineno = 1 - self.filename = prettify_filename(self.stream.name) - self.nominal_lineno = 1 - self.nominal_filename = self.filename - - def write_raw(self, s: str) -> None: - self.stream.write(s) - newlines = s.count("\n") - self.lineno += newlines - self.nominal_lineno += newlines - - def emit(self, arg: str) -> None: - if arg: - self.write_raw(f"{self.prefix}{arg}\n") - else: - self.write_raw("\n") - - def set_lineno(self, lineno: int, filename: str) -> None: - if self.emit_line_directives: - if lineno != self.nominal_lineno or filename != self.nominal_filename: - self.emit(f'#line {lineno} "{filename}"') - self.nominal_lineno = lineno - self.nominal_filename = filename - - def reset_lineno(self) -> None: - if self.lineno != self.nominal_lineno or self.filename != self.nominal_filename: - self.set_lineno(self.lineno + 1, self.filename) - - @contextlib.contextmanager - def indent(self): - self.prefix += " " - yield - self.prefix = self.prefix[:-4] - - @contextlib.contextmanager - def block(self, head: str, tail: str = ""): - if head: - self.emit(head + " {") - else: - self.emit("{") - with self.indent(): - yield - self.emit("}" + tail) - - def stack_adjust( - self, - input_effects: list[StackEffect], - output_effects: list[StackEffect], - ): - shrink, isym = list_effect_size(input_effects) - grow, osym = list_effect_size(output_effects) - diff = grow - shrink - if isym and isym != osym: - self.emit(f"STACK_SHRINK({isym});") - if diff < 0: - self.emit(f"STACK_SHRINK({-diff});") - if diff > 0: - self.emit(f"STACK_GROW({diff});") - if osym and osym != isym: - self.emit(f"STACK_GROW({osym});") - - def declare(self, dst: StackEffect, src: StackEffect | None): - if dst.name == UNUSED or dst.cond == "0": - return - typ = f"{dst.type}" if dst.type else "PyObject *" - if src: - cast = self.cast(dst, src) - init = f" = {cast}{src.name}" - elif dst.cond: - init = " = NULL" - else: - init = "" - sepa = "" if typ.endswith("*") else " " - self.emit(f"{typ}{sepa}{dst.name}{init};") - - def assign(self, dst: StackEffect, src: StackEffect): - if src.name == UNUSED: - return - if src.size: - # Don't write sized arrays -- it's up to the user code. - return - cast = self.cast(dst, src) - if re.match(r"^REG\(oparg(\d+)\)$", dst.name): - self.emit(f"Py_XSETREF({dst.name}, {cast}{src.name});") - else: - stmt = f"{dst.name} = {cast}{src.name};" - if src.cond and src.cond != "1": - if src.cond == "0": - # It will not be executed - return - stmt = f"if ({src.cond}) {{ {stmt} }}" - self.emit(stmt) - - def cast(self, dst: StackEffect, src: StackEffect) -> str: - return f"({dst.type or 'PyObject *'})" if src.type != dst.type else "" - -@dataclasses.dataclass -class InstructionFlags: - """Construct and manipulate instruction flags""" - - HAS_ARG_FLAG: bool - HAS_CONST_FLAG: bool - HAS_NAME_FLAG: bool - HAS_JUMP_FLAG: bool - HAS_FREE_FLAG: bool - HAS_LOCAL_FLAG: bool - - def __post_init__(self): - self.bitmask = { - name : (1 << i) for i, name in enumerate(self.names()) - } - - @staticmethod - def fromInstruction(instr: "AnyInstruction"): - - has_free = (variable_used(instr, "PyCell_New") or - variable_used(instr, "PyCell_GET") or - variable_used(instr, "PyCell_SET")) - - return InstructionFlags( - HAS_ARG_FLAG=variable_used(instr, "oparg"), - HAS_CONST_FLAG=variable_used(instr, "FRAME_CO_CONSTS"), - HAS_NAME_FLAG=variable_used(instr, "FRAME_CO_NAMES"), - HAS_JUMP_FLAG=variable_used(instr, "JUMPBY"), - HAS_FREE_FLAG=has_free, - HAS_LOCAL_FLAG=(variable_used(instr, "GETLOCAL") or - variable_used(instr, "SETLOCAL")) and - not has_free, - ) - - @staticmethod - def newEmpty(): - return InstructionFlags(False, False, False, False, False, False) - - def add(self, other: "InstructionFlags") -> None: - for name, value in dataclasses.asdict(other).items(): - if value: - setattr(self, name, value) - - def names(self, value=None): - if value is None: - return dataclasses.asdict(self).keys() - return [n for n, v in dataclasses.asdict(self).items() if v == value] - - def bitmap(self) -> int: - flags = 0 - for name in self.names(): - if getattr(self, name): - flags |= self.bitmask[name] - return flags - - @classmethod - def emit_macros(cls, out: Formatter): - flags = cls.newEmpty() - for name, value in flags.bitmask.items(): - out.emit(f"#define {name} ({value})"); - - for name, value in flags.bitmask.items(): - out.emit( - f"#define OPCODE_{name[:-len('_FLAG')]}(OP) " - f"(_PyOpcode_opcode_metadata[OP].flags & ({name}))") - - -@dataclasses.dataclass -class ActiveCacheEffect: - """Wraps a CacheEffect that is actually used, in context.""" - effect: parser.CacheEffect - offset: int - - -FORBIDDEN_NAMES_IN_UOPS = ( - "resume_with_error", - "kwnames", - "next_instr", - "oparg1", # Proxy for super-instructions like LOAD_FAST_LOAD_FAST - "JUMPBY", - "DISPATCH", - "INSTRUMENTED_JUMP", - "throwflag", - "exception_unwind", - "import_from", - "import_name", - "_PyObject_CallNoArgs", # Proxy for BEFORE_WITH -) - - -# Interpreter tiers -TIER_ONE = 1 # Specializing adaptive interpreter (PEP 659) -TIER_TWO = 2 # Experimental tracing interpreter -Tiers: typing.TypeAlias = typing.Literal[1, 2] - - -@dataclasses.dataclass -class Instruction: - """An instruction with additional data and code.""" - - # Parts of the underlying instruction definition - inst: parser.InstDef - kind: typing.Literal["inst", "op"] - name: str - block: parser.Block - block_text: list[str] # Block.text, less curlies, less PREDICT() calls - block_line: int # First line of block in original code - - # Computed by constructor - always_exits: bool - cache_offset: int - cache_effects: list[parser.CacheEffect] - input_effects: list[StackEffect] - output_effects: list[StackEffect] - unmoved_names: frozenset[str] - instr_fmt: str - instr_flags: InstructionFlags - active_caches: list[ActiveCacheEffect] - - # Set later - family: parser.Family | None = None - predicted: bool = False - - def __init__(self, inst: parser.InstDef): - self.inst = inst - self.kind = inst.kind - self.name = inst.name - self.block = inst.block - self.block_text, self.check_eval_breaker, self.block_line = \ - extract_block_text(self.block) - self.always_exits = always_exits(self.block_text) - self.cache_effects = [ - effect for effect in inst.inputs if isinstance(effect, parser.CacheEffect) - ] - self.cache_offset = sum(c.size for c in self.cache_effects) - self.input_effects = [ - effect for effect in inst.inputs if isinstance(effect, StackEffect) - ] - self.output_effects = inst.outputs # For consistency/completeness - unmoved_names: set[str] = set() - for ieffect, oeffect in zip(self.input_effects, self.output_effects): - if ieffect.name == oeffect.name: - unmoved_names.add(ieffect.name) - else: - break - self.unmoved_names = frozenset(unmoved_names) - - self.instr_flags = InstructionFlags.fromInstruction(inst) - - self.active_caches = [] - offset = 0 - for effect in self.cache_effects: - if effect.name != UNUSED: - self.active_caches.append(ActiveCacheEffect(effect, offset)) - offset += effect.size - - if self.instr_flags.HAS_ARG_FLAG: - fmt = "IB" - else: - fmt = "IX" - if offset: - fmt += "C" + "0"*(offset-1) - self.instr_fmt = fmt - - def is_viable_uop(self) -> bool: - """Whether this instruction is viable as a uop.""" - dprint: typing.Callable[..., None] = lambda *args, **kwargs: None - # if self.name.startswith("CALL"): - # dprint = print - - if self.name == "EXIT_TRACE": - return True # This has 'return frame' but it's okay - if self.always_exits: - dprint(f"Skipping {self.name} because it always exits") - return False - if len(self.active_caches) > 1: - # print(f"Skipping {self.name} because it has >1 cache entries") - return False - res = True - for forbidden in FORBIDDEN_NAMES_IN_UOPS: - # NOTE: To disallow unspecialized uops, use - # if variable_used(self.inst, forbidden): - if variable_used_unspecialized(self.inst, forbidden): - dprint(f"Skipping {self.name} because it uses {forbidden}") - res = False - return res - - def write(self, out: Formatter, tier: Tiers = TIER_ONE) -> None: - """Write one instruction, sans prologue and epilogue.""" - # Write a static assertion that a family's cache size is correct - if family := self.family: - if self.name == family.name: - if cache_size := family.size: - out.emit( - f"static_assert({cache_size} == " - f'{self.cache_offset}, "incorrect cache size");' - ) - - # Write input stack effect variable declarations and initializations - ieffects = list(reversed(self.input_effects)) - for i, ieffect in enumerate(ieffects): - isize = string_effect_size( - list_effect_size([ieff for ieff in ieffects[: i + 1]]) - ) - if ieffect.size: - src = StackEffect(f"(stack_pointer - {maybe_parenthesize(isize)})", "PyObject **") - elif ieffect.cond: - src = StackEffect(f"({ieffect.cond}) ? stack_pointer[-{maybe_parenthesize(isize)}] : NULL", "") - else: - src = StackEffect(f"stack_pointer[-{maybe_parenthesize(isize)}]", "") - out.declare(ieffect, src) - - # Write output stack effect variable declarations - isize = string_effect_size(list_effect_size(self.input_effects)) - input_names = {ieffect.name for ieffect in self.input_effects} - for i, oeffect in enumerate(self.output_effects): - if oeffect.name not in input_names: - if oeffect.size: - osize = string_effect_size( - list_effect_size([oeff for oeff in self.output_effects[:i]]) - ) - offset = "stack_pointer" - if isize != osize: - if isize != "0": - offset += f" - ({isize})" - if osize != "0": - offset += f" + {osize}" - src = StackEffect(offset, "PyObject **") - out.declare(oeffect, src) - else: - out.declare(oeffect, None) - - # out.emit(f"next_instr += OPSIZE({self.inst.name}) - 1;") - - self.write_body(out, 0, self.active_caches, tier=tier) - - # Skip the rest if the block always exits - if self.always_exits: - return - - # Write net stack growth/shrinkage - out.stack_adjust( - [ieff for ieff in self.input_effects], - [oeff for oeff in self.output_effects], - ) - - # Write output stack effect assignments - oeffects = list(reversed(self.output_effects)) - for i, oeffect in enumerate(oeffects): - if oeffect.name in self.unmoved_names: - continue - osize = string_effect_size( - list_effect_size([oeff for oeff in oeffects[: i + 1]]) - ) - if oeffect.size: - dst = StackEffect(f"stack_pointer - {maybe_parenthesize(osize)}", "PyObject **") - else: - dst = StackEffect(f"stack_pointer[-{maybe_parenthesize(osize)}]", "") - out.assign(dst, oeffect) - - # Write cache effect - if tier == TIER_ONE and self.cache_offset: - out.emit(f"next_instr += {self.cache_offset};") - - def write_body( - self, - out: Formatter, - dedent: int, - active_caches: list[ActiveCacheEffect], - tier: Tiers = TIER_ONE, - ) -> None: - """Write the instruction body.""" - # Write cache effect variable declarations and initializations - for active in active_caches: - ceffect = active.effect - bits = ceffect.size * BITS_PER_CODE_UNIT - if bits == 64: - # NOTE: We assume that 64-bit data in the cache - # is always an object pointer. - # If this becomes false, we need a way to specify - # syntactically what type the cache data is. - typ = "PyObject *" - func = "read_obj" - else: - typ = f"uint{bits}_t " - func = f"read_u{bits}" - if tier == TIER_ONE: - out.emit( - f"{typ}{ceffect.name} = {func}(&next_instr[{active.offset}].cache);" - ) - else: - out.emit(f"{typ}{ceffect.name} = ({typ.strip()})operand;") - - # Write the body, substituting a goto for ERROR_IF() and other stuff - assert dedent <= 0 - extra = " " * -dedent - names_to_skip = self.unmoved_names | frozenset({UNUSED, "null"}) - offset = 0 - context = self.block.context - assert context is not None and context.owner is not None - filename = context.owner.filename - for line in self.block_text: - out.set_lineno(self.block_line + offset, filename) - offset += 1 - if m := re.match(r"(\s*)ERROR_IF\((.+), (\w+)\);\s*(?://.*)?$", line): - space, cond, label = m.groups() - space = extra + space - # ERROR_IF() must pop the inputs from the stack. - # The code block is responsible for DECREF()ing them. - # NOTE: If the label doesn't exist, just add it to ceval.c. - - # Don't pop common input/output effects at the bottom! - # These aren't DECREF'ed so they can stay. - ieffs = list(self.input_effects) - oeffs = list(self.output_effects) - while ieffs and oeffs and ieffs[0] == oeffs[0]: - ieffs.pop(0) - oeffs.pop(0) - ninputs, symbolic = list_effect_size(ieffs) - if ninputs: - label = f"pop_{ninputs}_{label}" - if symbolic: - out.write_raw( - f"{space}if ({cond}) {{ STACK_SHRINK({symbolic}); goto {label}; }}\n" - ) - else: - out.write_raw(f"{space}if ({cond}) goto {label};\n") - elif m := re.match(r"(\s*)DECREF_INPUTS\(\);\s*(?://.*)?$", line): - out.reset_lineno() - space = extra + m.group(1) - for ieff in self.input_effects: - if ieff.name in names_to_skip: - continue - if ieff.size: - out.write_raw( - f"{space}for (int _i = {ieff.size}; --_i >= 0;) {{\n" - ) - out.write_raw(f"{space} Py_DECREF({ieff.name}[_i]);\n") - out.write_raw(f"{space}}}\n") - else: - decref = "XDECREF" if ieff.cond else "DECREF" - out.write_raw(f"{space}Py_{decref}({ieff.name});\n") - else: - out.write_raw(extra + line) - out.reset_lineno() - - -InstructionOrCacheEffect = Instruction | parser.CacheEffect -StackEffectMapping = list[tuple[StackEffect, StackEffect]] - - -@dataclasses.dataclass -class Component: - instr: Instruction - input_mapping: StackEffectMapping - output_mapping: StackEffectMapping - active_caches: list[ActiveCacheEffect] - - def write_body(self, out: Formatter) -> None: - with out.block(""): - input_names = {ieffect.name for _, ieffect in self.input_mapping} - for var, ieffect in self.input_mapping: - out.declare(ieffect, var) - for _, oeffect in self.output_mapping: - if oeffect.name not in input_names: - out.declare(oeffect, None) - - self.instr.write_body(out, -4, self.active_caches) - - for var, oeffect in self.output_mapping: - out.assign(var, oeffect) - - -MacroParts = list[Component | parser.CacheEffect] - - -@dataclasses.dataclass -class MacroInstruction: - """A macro instruction.""" - - name: str - stack: list[StackEffect] - initial_sp: int - final_sp: int - instr_fmt: str - instr_flags: InstructionFlags - macro: parser.Macro - parts: MacroParts - cache_offset: int - predicted: bool = False - - -@dataclasses.dataclass -class PseudoInstruction: - """A pseudo instruction.""" - - name: str - targets: list[Instruction] - instr_fmt: str - instr_flags: InstructionFlags - - -@dataclasses.dataclass -class OverriddenInstructionPlaceHolder: - name: str - - -AnyInstruction = Instruction | MacroInstruction | PseudoInstruction -INSTR_FMT_PREFIX = "INSTR_FMT_" - - -class Analyzer: - """Parse input, analyze it, and write to output.""" - - input_filenames: list[str] - output_filename: str - metadata_filename: str - pymetadata_filename: str - executor_filename: str - errors: int = 0 - emit_line_directives: bool = False - - def __init__( - self, - input_filenames: list[str], - output_filename: str, - metadata_filename: str, - pymetadata_filename: str, - executor_filename: str, - ): - """Read the input file.""" - self.input_filenames = input_filenames - self.output_filename = output_filename - self.metadata_filename = metadata_filename - self.pymetadata_filename = pymetadata_filename - self.executor_filename = executor_filename - - def error(self, msg: str, node: parser.Node) -> None: - lineno = 0 - filename = "<unknown file>" - if context := node.context: - filename = context.owner.filename - # Use line number of first non-comment in the node - for token in context.owner.tokens[context.begin : context.end]: - lineno = token.line - if token.kind != "COMMENT": - break - print(f"{filename}:{lineno}: {msg}", file=sys.stderr) - self.errors += 1 - - everything: list[ - parser.InstDef | parser.Macro | parser.Pseudo | OverriddenInstructionPlaceHolder - ] - instrs: dict[str, Instruction] # Includes ops - macros: dict[str, parser.Macro] - macro_instrs: dict[str, MacroInstruction] - families: dict[str, parser.Family] - pseudos: dict[str, parser.Pseudo] - pseudo_instrs: dict[str, PseudoInstruction] - - def parse(self) -> None: - """Parse the source text. - - We only want the parser to see the stuff between the - begin and end markers. - """ - - self.everything = [] - self.instrs = {} - self.macros = {} - self.families = {} - self.pseudos = {} - - instrs_idx: dict[str, int] = dict() - - for filename in self.input_filenames: - self.parse_file(filename, instrs_idx) - - files = " + ".join(self.input_filenames) - print( - f"Read {len(self.instrs)} instructions/ops, " - f"{len(self.macros)} macros, {len(self.pseudos)} pseudos, " - f"and {len(self.families)} families from {files}", - file=sys.stderr, - ) - - def parse_file(self, filename: str, instrs_idx: dict[str, int]) -> None: - with open(filename) as file: - src = file.read() - - - psr = parser.Parser(src, filename=prettify_filename(filename)) - - # Skip until begin marker - while tkn := psr.next(raw=True): - if tkn.text == BEGIN_MARKER: - break - else: - raise psr.make_syntax_error( - f"Couldn't find {BEGIN_MARKER!r} in {psr.filename}" - ) - start = psr.getpos() - - # Find end marker, then delete everything after it - while tkn := psr.next(raw=True): - if tkn.text == END_MARKER: - break - del psr.tokens[psr.getpos() - 1 :] - - # Parse from start - psr.setpos(start) - thing: parser.InstDef | parser.Macro | parser.Pseudo | parser.Family | None - thing_first_token = psr.peek() - while thing := psr.definition(): - if ws := [w for w in RESERVED_WORDS if variable_used(thing, w)]: - self.error(f"'{ws[0]}' is a reserved word. {RESERVED_WORDS[ws[0]]}", thing) - - match thing: - case parser.InstDef(name=name): - if name in self.instrs: - if not thing.override: - raise psr.make_syntax_error( - f"Duplicate definition of '{name}' @ {thing.context} " - f"previous definition @ {self.instrs[name].inst.context}", - thing_first_token, - ) - self.everything[instrs_idx[name]] = OverriddenInstructionPlaceHolder(name=name) - if name not in self.instrs and thing.override: - raise psr.make_syntax_error( - f"Definition of '{name}' @ {thing.context} is supposed to be " - "an override but no previous definition exists.", - thing_first_token, - ) - self.instrs[name] = Instruction(thing) - instrs_idx[name] = len(self.everything) - self.everything.append(thing) - case parser.Macro(name): - self.macros[name] = thing - self.everything.append(thing) - case parser.Family(name): - self.families[name] = thing - case parser.Pseudo(name): - self.pseudos[name] = thing - self.everything.append(thing) - case _: - typing.assert_never(thing) - if not psr.eof(): - raise psr.make_syntax_error(f"Extra stuff at the end of {filename}") - - def analyze(self) -> None: - """Analyze the inputs. - - Raises SystemExit if there is an error. - """ - self.analyze_macros_and_pseudos() - self.find_predictions() - self.map_families() - self.check_families() - - def find_predictions(self) -> None: - """Find the instructions that need PREDICTED() labels.""" - for instr in self.instrs.values(): - targets: set[str] = set() - for line in instr.block_text: - if m := re.match(RE_PREDICTED, line): - targets.add(m.group(1)) - for target in targets: - if target_instr := self.instrs.get(target): - target_instr.predicted = True - elif target_macro := self.macro_instrs.get(target): - target_macro.predicted = True - else: - self.error( - f"Unknown instruction {target!r} predicted in {instr.name!r}", - instr.inst, # TODO: Use better location - ) - - def map_families(self) -> None: - """Link instruction names back to their family, if they have one.""" - for family in self.families.values(): - for member in [family.name] + family.members: - if member_instr := self.instrs.get(member): - if member_instr.family not in (family, None): - self.error( - f"Instruction {member} is a member of multiple families " - f"({member_instr.family.name}, {family.name}).", - family, - ) - else: - member_instr.family = family - elif not self.macro_instrs.get(member): - self.error( - f"Unknown instruction {member!r} referenced in family {family.name!r}", - family, - ) - - def check_families(self) -> None: - """Check each family: - - - Must have at least 2 members (including head) - - Head and all members must be known instructions - - Head and all members must have the same cache, input and output effects - """ - for family in self.families.values(): - if family.name not in self.macro_instrs and family.name not in self.instrs: - self.error( - f"Family {family.name!r} has unknown instruction {family.name!r}", - family, - ) - members = [ - member - for member in family.members - if member in self.instrs or member in self.macro_instrs - ] - if members != family.members: - unknown = set(family.members) - set(members) - self.error( - f"Family {family.name!r} has unknown members: {unknown}", family - ) - expected_effects = self.effect_counts(family.name) - for member in members: - member_effects = self.effect_counts(member) - if member_effects != expected_effects: - self.error( - f"Family {family.name!r} has inconsistent " - f"(cache, input, output) effects:\n" - f" {family.name} = {expected_effects}; " - f"{member} = {member_effects}", - family, - ) - - def effect_counts(self, name: str) -> tuple[int, int, int]: - if instr := self.instrs.get(name): - cache = instr.cache_offset - input = len(instr.input_effects) - output = len(instr.output_effects) - elif mac := self.macro_instrs.get(name): - cache = mac.cache_offset - input, output = 0, 0 - for part in mac.parts: - if isinstance(part, Component): - # A component may pop what the previous component pushed, - # so we offset the input/output counts by that. - delta_i = len(part.instr.input_effects) - delta_o = len(part.instr.output_effects) - offset = min(delta_i, output) - input += delta_i - offset - output += delta_o - offset - else: - assert False, f"Unknown instruction {name!r}" - return cache, input, output - - def analyze_macros_and_pseudos(self) -> None: - """Analyze each macro and pseudo instruction.""" - self.macro_instrs = {} - self.pseudo_instrs = {} - for name, macro in self.macros.items(): - self.macro_instrs[name] = self.analyze_macro(macro) - for name, pseudo in self.pseudos.items(): - self.pseudo_instrs[name] = self.analyze_pseudo(pseudo) - - def analyze_macro(self, macro: parser.Macro) -> MacroInstruction: - components = self.check_macro_components(macro) - stack, initial_sp = self.stack_analysis(components) - sp = initial_sp - parts: MacroParts = [] - flags = InstructionFlags.newEmpty() - offset = 0 - for component in components: - match component: - case parser.CacheEffect() as ceffect: - parts.append(ceffect) - offset += ceffect.size - case Instruction() as instr: - part, sp, offset = self.analyze_instruction(instr, stack, sp, offset) - parts.append(part) - flags.add(instr.instr_flags) - case _: - typing.assert_never(component) - final_sp = sp - format = "IB" - if offset: - format += "C" + "0"*(offset-1) - return MacroInstruction( - macro.name, stack, initial_sp, final_sp, format, flags, macro, parts, offset - ) - - def analyze_pseudo(self, pseudo: parser.Pseudo) -> PseudoInstruction: - targets = [self.instrs[target] for target in pseudo.targets] - assert targets - # Make sure the targets have the same fmt - fmts = list(set([t.instr_fmt for t in targets])) - assert(len(fmts) == 1) - assert(len(list(set([t.instr_flags.bitmap() for t in targets]))) == 1) - return PseudoInstruction(pseudo.name, targets, fmts[0], targets[0].instr_flags) - - def analyze_instruction( - self, instr: Instruction, stack: list[StackEffect], sp: int, offset: int - ) -> tuple[Component, int, int]: - input_mapping: StackEffectMapping = [] - for ieffect in reversed(instr.input_effects): - sp -= 1 - input_mapping.append((stack[sp], ieffect)) - output_mapping: StackEffectMapping = [] - for oeffect in instr.output_effects: - output_mapping.append((stack[sp], oeffect)) - sp += 1 - active_effects: list[ActiveCacheEffect] = [] - for ceffect in instr.cache_effects: - if ceffect.name != UNUSED: - active_effects.append(ActiveCacheEffect(ceffect, offset)) - offset += ceffect.size - return Component(instr, input_mapping, output_mapping, active_effects), sp, offset - - def check_macro_components( - self, macro: parser.Macro - ) -> list[InstructionOrCacheEffect]: - components: list[InstructionOrCacheEffect] = [] - for uop in macro.uops: - match uop: - case parser.OpName(name): - if name not in self.instrs: - self.error(f"Unknown instruction {name!r}", macro) - components.append(self.instrs[name]) - case parser.CacheEffect(): - components.append(uop) - case _: - typing.assert_never(uop) - return components - - def stack_analysis( - self, components: typing.Iterable[InstructionOrCacheEffect] - ) -> tuple[list[StackEffect], int]: - """Analyze a macro. - - Ignore cache effects. - - Return the list of variables (as StackEffects) and the initial stack pointer. - """ - lowest = current = highest = 0 - conditions: dict[int, str] = {} # Indexed by 'current'. - last_instr: Instruction | None = None - for thing in components: - if isinstance(thing, Instruction): - last_instr = thing - for thing in components: - match thing: - case Instruction() as instr: - if any( - eff.size for eff in instr.input_effects + instr.output_effects - ): - # TODO: Eventually this will be needed, at least for macros. - self.error( - f"Instruction {instr.name!r} has variable-sized stack effect, " - "which are not supported in macro instructions", - instr.inst, # TODO: Pass name+location of macro - ) - if any(eff.cond for eff in instr.input_effects): - self.error( - f"Instruction {instr.name!r} has conditional input stack effect, " - "which are not supported in macro instructions", - instr.inst, # TODO: Pass name+location of macro - ) - if any(eff.cond for eff in instr.output_effects) and instr is not last_instr: - self.error( - f"Instruction {instr.name!r} has conditional output stack effect, " - "but is not the last instruction in a macro", - instr.inst, # TODO: Pass name+location of macro - ) - current -= len(instr.input_effects) - lowest = min(lowest, current) - for eff in instr.output_effects: - if eff.cond: - conditions[current] = eff.cond - current += 1 - highest = max(highest, current) - case parser.CacheEffect(): - pass - case _: - typing.assert_never(thing) - # At this point, 'current' is the net stack effect, - # and 'lowest' and 'highest' are the extremes. - # Note that 'lowest' may be negative. - stack = [ - StackEffect(f"_tmp_{i}", "", conditions.get(highest - i, "")) - for i in reversed(range(1, highest - lowest + 1)) - ] - return stack, -lowest - +class Generator(Analyzer): def get_stack_effect_info( - self, thing: parser.InstDef | parser.Macro | parser.Pseudo + self, thing: parsing.InstDef | parsing.Macro | parsing.Pseudo ) -> tuple[AnyInstruction | None, str | None, str | None]: def effect_str(effects: list[StackEffect]) -> str: n_effect, sym_effect = list_effect_size(effects) @@ -1053,8 +104,10 @@ class Analyzer: return str(n_effect) instr: AnyInstruction | None + popped: str | None + pushed: str | None match thing: - case parser.InstDef(): + case parsing.InstDef(): if thing.kind != "op": instr = self.instrs[thing.name] popped = effect_str(instr.input_effects) @@ -1063,7 +116,7 @@ class Analyzer: instr = None popped = "" pushed = "" - case parser.Macro(): + case parsing.Macro(): instr = self.macro_instrs[thing.name] parts = [comp for comp in instr.parts if isinstance(comp, Component)] # Note: stack_analysis() already verifies that macro components @@ -1084,7 +137,11 @@ class Analyzer: if effect.cond in ("0", "1"): pushed_symbolic.append(effect.cond) else: - pushed_symbolic.append(maybe_parenthesize(f"{maybe_parenthesize(effect.cond)} ? 1 : 0")) + pushed_symbolic.append( + maybe_parenthesize( + f"{maybe_parenthesize(effect.cond)} ? 1 : 0" + ) + ) sp += 1 high = max(sp, high) if high != max(0, sp): @@ -1096,7 +153,7 @@ class Analyzer: popped = str(-low) pushed_symbolic.append(str(sp - low - len(pushed_symbolic))) pushed = " + ".join(pushed_symbolic) - case parser.Pseudo(): + case parsing.Pseudo(): instr = self.pseudo_instrs[thing.name] popped = pushed = None # Calculate stack effect, and check that it's the the same @@ -1135,10 +192,14 @@ class Analyzer: ) -> None: self.out.emit("") self.out.emit("#ifndef NEED_OPCODE_METADATA") - self.out.emit(f"extern int _PyOpcode_num_{direction}(int opcode, int oparg, bool jump);") + self.out.emit( + f"extern int _PyOpcode_num_{direction}(int opcode, int oparg, bool jump);" + ) self.out.emit("#else") self.out.emit("int") - self.out.emit(f"_PyOpcode_num_{direction}(int opcode, int oparg, bool jump) {{") + self.out.emit( + f"_PyOpcode_num_{direction}(int opcode, int oparg, bool jump) {{" + ) self.out.emit(" switch(opcode) {") for instr, effect in data: self.out.emit(f" case {instr.name}:") @@ -1159,7 +220,7 @@ class Analyzer: try: filename = os.path.relpath(filename, ROOT) except ValueError: - # May happen on Windows if root and temp on different volumes + # May happen on Windows if root and temp on different volumes pass filenames.append(filename) paths = f"\n{self.out.comment} ".join(filenames) @@ -1170,20 +231,21 @@ class Analyzer: self.out.write_raw(self.from_source_files()) self.out.write_raw(f"{self.out.comment} Do not edit!\n") - def write_metadata(self) -> None: + def write_metadata(self, metadata_filename: str, pymetadata_filename: str) -> None: """Write instruction metadata to output file.""" # Compute the set of all instruction formats. all_formats: set[str] = set() for thing in self.everything: + format: str | None match thing: case OverriddenInstructionPlaceHolder(): continue - case parser.InstDef(): + case parsing.InstDef(): format = self.instrs[thing.name].instr_fmt - case parser.Macro(): + case parsing.Macro(): format = self.macro_instrs[thing.name].instr_fmt - case parser.Pseudo(): + case parsing.Pseudo(): format = None for target in self.pseudos[thing.name].targets: target_instr = self.instrs.get(target) @@ -1192,13 +254,14 @@ class Analyzer: format = target_instr.instr_fmt else: assert format == target_instr.instr_fmt + assert format is not None case _: typing.assert_never(thing) all_formats.add(format) # Turn it into a list of enum definitions. format_enums = [INSTR_FMT_PREFIX + format for format in sorted(all_formats)] - with open(self.metadata_filename, "w") as f: + with open(metadata_filename, "w") as f: # Create formatter self.out = Formatter(f, 0) @@ -1220,7 +283,8 @@ class Analyzer: self.out.emit( "#define IS_VALID_OPCODE(OP) \\\n" " (((OP) >= 0) && ((OP) < OPCODE_METADATA_SIZE) && \\\n" - " (_PyOpcode_opcode_metadata[(OP)].valid_entry))") + " (_PyOpcode_opcode_metadata[(OP)].valid_entry))" + ) self.out.emit("") InstructionFlags.emit_macros(self.out) @@ -1234,17 +298,23 @@ class Analyzer: with self.out.block("struct opcode_macro_expansion", ";"): self.out.emit("int nuops;") - self.out.emit("struct { int16_t uop; int8_t size; int8_t offset; } uops[8];") + self.out.emit( + "struct { int16_t uop; int8_t size; int8_t offset; } uops[8];" + ) self.out.emit("") for key, value in OPARG_SIZES.items(): self.out.emit(f"#define {key} {value}") self.out.emit("") - self.out.emit("#define OPCODE_METADATA_FMT(OP) " - "(_PyOpcode_opcode_metadata[(OP)].instr_format)") + self.out.emit( + "#define OPCODE_METADATA_FMT(OP) " + "(_PyOpcode_opcode_metadata[(OP)].instr_format)" + ) self.out.emit("#define SAME_OPCODE_METADATA(OP1, OP2) \\") - self.out.emit(" (OPCODE_METADATA_FMT(OP1) == OPCODE_METADATA_FMT(OP2))") + self.out.emit( + " (OPCODE_METADATA_FMT(OP1) == OPCODE_METADATA_FMT(OP2))" + ) self.out.emit("") # Write metadata array declaration @@ -1253,27 +323,35 @@ class Analyzer: self.out.emit("#define OPCODE_MACRO_EXPANSION_SIZE 256") self.out.emit("") self.out.emit("#ifndef NEED_OPCODE_METADATA") - self.out.emit("extern const struct opcode_metadata " - "_PyOpcode_opcode_metadata[OPCODE_METADATA_SIZE];") - self.out.emit("extern const struct opcode_macro_expansion " - "_PyOpcode_macro_expansion[OPCODE_MACRO_EXPANSION_SIZE];") - self.out.emit("extern const char * const _PyOpcode_uop_name[OPCODE_UOP_NAME_SIZE];") + self.out.emit( + "extern const struct opcode_metadata " + "_PyOpcode_opcode_metadata[OPCODE_METADATA_SIZE];" + ) + self.out.emit( + "extern const struct opcode_macro_expansion " + "_PyOpcode_macro_expansion[OPCODE_MACRO_EXPANSION_SIZE];" + ) + self.out.emit( + "extern const char * const _PyOpcode_uop_name[OPCODE_UOP_NAME_SIZE];" + ) self.out.emit("#else // if NEED_OPCODE_METADATA") - self.out.emit("const struct opcode_metadata " - "_PyOpcode_opcode_metadata[OPCODE_METADATA_SIZE] = {") + self.out.emit( + "const struct opcode_metadata " + "_PyOpcode_opcode_metadata[OPCODE_METADATA_SIZE] = {" + ) # Write metadata for each instruction for thing in self.everything: match thing: case OverriddenInstructionPlaceHolder(): continue - case parser.InstDef(): + case parsing.InstDef(): if thing.kind != "op": self.write_metadata_for_inst(self.instrs[thing.name]) - case parser.Macro(): + case parsing.Macro(): self.write_metadata_for_macro(self.macro_instrs[thing.name]) - case parser.Pseudo(): + case parsing.Pseudo(): self.write_metadata_for_pseudo(self.pseudo_instrs[thing.name]) case _: typing.assert_never(thing) @@ -1291,32 +369,38 @@ class Analyzer: match thing: case OverriddenInstructionPlaceHolder(): pass - case parser.InstDef(name=name): + case parsing.InstDef(name=name): instr = self.instrs[name] # Since an 'op' is not a bytecode, it has no expansion; but 'inst' is if instr.kind == "inst" and instr.is_viable_uop(): # Construct a dummy Component -- input/output mappings are not used part = Component(instr, [], [], instr.active_caches) self.write_macro_expansions(instr.name, [part]) - elif instr.kind == "inst" and variable_used(instr.inst, "oparg1"): - assert variable_used(instr.inst, "oparg2"), "Half super-instr?" + elif instr.kind == "inst" and variable_used( + instr.inst, "oparg1" + ): + assert variable_used( + instr.inst, "oparg2" + ), "Half super-instr?" self.write_super_expansions(instr.name) - case parser.Macro(): + case parsing.Macro(): mac = self.macro_instrs[thing.name] self.write_macro_expansions(mac.name, mac.parts) - case parser.Pseudo(): + case parsing.Pseudo(): pass case _: typing.assert_never(thing) - with self.out.block("const char * const _PyOpcode_uop_name[OPCODE_UOP_NAME_SIZE] =", ";"): - self.write_uop_items(lambda name, counter: f"[{name}] = \"{name}\",") + with self.out.block( + "const char * const _PyOpcode_uop_name[OPCODE_UOP_NAME_SIZE] =", ";" + ): + self.write_uop_items(lambda name, counter: f'[{name}] = "{name}",') self.out.emit("#endif // NEED_OPCODE_METADATA") - with open(self.pymetadata_filename, "w") as f: + with open(pymetadata_filename, "w") as f: # Create formatter - self.out = Formatter(f, 0, comment = "#") + self.out = Formatter(f, 0, comment="#") self.write_provenance_header() @@ -1324,10 +408,10 @@ class Analyzer: self.out.emit("_specializations = {") for name, family in self.families.items(): with self.out.indent(): - self.out.emit(f"\"{family.name}\": [") + self.out.emit(f'"{family.name}": [') with self.out.indent(): for m in family.members: - self.out.emit(f"\"{m}\",") + self.out.emit(f'"{m}",') self.out.emit(f"],") self.out.emit("}") @@ -1335,15 +419,17 @@ class Analyzer: self.out.emit("") self.out.emit("# An irregular case:") self.out.emit( - "_specializations[\"BINARY_OP\"].append(" - "\"BINARY_OP_INPLACE_ADD_UNICODE\")") + '_specializations["BINARY_OP"].append(' + '"BINARY_OP_INPLACE_ADD_UNICODE")' + ) # Make list of specialized instructions self.out.emit("") self.out.emit( "_specialized_instructions = [" - "opcode for family in _specializations.values() for opcode in family" - "]") + "opcode for family in _specializations.values() for opcode in family" + "]" + ) def write_pseudo_instrs(self) -> None: """Write the IS_PSEUDO_INSTR macro""" @@ -1432,16 +518,18 @@ class Analyzer: ] self.write_expansions(name, expansions) - def write_expansions(self, name: str, expansions: list[tuple[str, int, int]]) -> None: - pieces = [f"{{ {name}, {size}, {offset} }}" for name, size, offset in expansions] + def write_expansions( + self, name: str, expansions: list[tuple[str, int, int]] + ) -> None: + pieces = [ + f"{{ {name}, {size}, {offset} }}" for name, size, offset in expansions + ] self.out.emit( f"[{name}] = " f"{{ .nuops = {len(pieces)}, .uops = {{ {', '.join(pieces)} }} }}," ) - def emit_metadata_entry( - self, name: str, fmt: str, flags: InstructionFlags - ) -> None: + def emit_metadata_entry(self, name: str, fmt: str, flags: InstructionFlags) -> None: flag_names = flags.names(value=True) if not flag_names: flag_names.append("0") @@ -1462,11 +550,13 @@ class Analyzer: """Write metadata for a macro-instruction.""" self.emit_metadata_entry(ps.name, ps.instr_fmt, ps.instr_flags) - def write_instructions(self) -> None: + def write_instructions( + self, output_filename: str, emit_line_directives: bool + ) -> None: """Write instructions to output file.""" - with open(self.output_filename, "w") as f: + with open(output_filename, "w") as f: # Create formatter - self.out = Formatter(f, 8, self.emit_line_directives) + self.out = Formatter(f, 8, emit_line_directives) self.write_provenance_header() @@ -1478,35 +568,37 @@ class Analyzer: match thing: case OverriddenInstructionPlaceHolder(): self.write_overridden_instr_place_holder(thing) - case parser.InstDef(): + case parsing.InstDef(): if thing.kind != "op": n_instrs += 1 self.write_instr(self.instrs[thing.name]) - case parser.Macro(): + case parsing.Macro(): n_macros += 1 self.write_macro(self.macro_instrs[thing.name]) - case parser.Pseudo(): + case parsing.Pseudo(): n_pseudos += 1 case _: typing.assert_never(thing) print( f"Wrote {n_instrs} instructions, {n_macros} macros, " - f"and {n_pseudos} pseudos to {self.output_filename}", + f"and {n_pseudos} pseudos to {output_filename}", file=sys.stderr, ) - def write_executor_instructions(self) -> None: + def write_executor_instructions( + self, executor_filename: str, emit_line_directives: bool + ) -> None: """Generate cases for the Tier 2 interpreter.""" - with open(self.executor_filename, "w") as f: - self.out = Formatter(f, 8, self.emit_line_directives) + with open(executor_filename, "w") as f: + self.out = Formatter(f, 8, emit_line_directives) self.write_provenance_header() for thing in self.everything: match thing: case OverriddenInstructionPlaceHolder(): # TODO: Is this helpful? self.write_overridden_instr_place_holder(thing) - case parser.InstDef(): + case parsing.InstDef(): instr = self.instrs[thing.name] if instr.is_viable_uop(): self.out.emit("") @@ -1517,22 +609,24 @@ class Analyzer: self.out.emit("break;") # elif instr.kind != "op": # print(f"NOTE: {thing.name} is not a viable uop") - case parser.Macro(): + case parsing.Macro(): pass - case parser.Pseudo(): + case parsing.Pseudo(): pass case _: typing.assert_never(thing) print( - f"Wrote some stuff to {self.executor_filename}", + f"Wrote some stuff to {executor_filename}", file=sys.stderr, ) - def write_overridden_instr_place_holder(self, - place_holder: OverriddenInstructionPlaceHolder) -> None: + def write_overridden_instr_place_holder( + self, place_holder: OverriddenInstructionPlaceHolder + ) -> None: self.out.emit("") self.out.emit( - f"{self.out.comment} TARGET({place_holder.name}) overridden by later definition") + f"{self.out.comment} TARGET({place_holder.name}) overridden by later definition" + ) def write_instr(self, instr: Instruction) -> None: name = instr.name @@ -1555,7 +649,7 @@ class Analyzer: cache_adjust = 0 for part in mac.parts: match part: - case parser.CacheEffect(size=size): + case parsing.CacheEffect(size=size): cache_adjust += size case Component() as comp: last_instr = comp.instr @@ -1603,7 +697,7 @@ class Analyzer: yield - self.out.stack_adjust(ieffects[:mac.initial_sp], mac.stack[:mac.final_sp]) + self.out.stack_adjust(ieffects[: mac.initial_sp], mac.stack[: mac.final_sp]) for i, var in enumerate(reversed(mac.stack[: mac.final_sp]), 1): dst = StackEffect(f"stack_pointer[-{i}]", "") @@ -1612,99 +706,6 @@ class Analyzer: self.out.emit(f"DISPATCH();") -def prettify_filename(filename: str) -> str: - # Make filename more user-friendly and less platform-specific, - # it is only used for error reporting at this point. - filename = filename.replace("\\", "/") - if filename.startswith("./"): - filename = filename[2:] - if filename.endswith(".new"): - filename = filename[:-4] - return filename - - -def extract_block_text(block: parser.Block) -> tuple[list[str], bool, int]: - # Get lines of text with proper dedent - blocklines = block.text.splitlines(True) - first_token: lx.Token = block.tokens[0] # IndexError means the context is broken - block_line = first_token.begin[0] - - # Remove blank lines from both ends - while blocklines and not blocklines[0].strip(): - blocklines.pop(0) - block_line += 1 - while blocklines and not blocklines[-1].strip(): - blocklines.pop() - - # Remove leading and trailing braces - assert blocklines and blocklines[0].strip() == "{" - assert blocklines and blocklines[-1].strip() == "}" - blocklines.pop() - blocklines.pop(0) - block_line += 1 - - # Remove trailing blank lines - while blocklines and not blocklines[-1].strip(): - blocklines.pop() - - # Separate CHECK_EVAL_BREAKER() macro from end - check_eval_breaker = \ - blocklines != [] and blocklines[-1].strip() == "CHECK_EVAL_BREAKER();" - if check_eval_breaker: - del blocklines[-1] - - return blocklines, check_eval_breaker, block_line - - -def always_exits(lines: list[str]) -> bool: - """Determine whether a block always ends in a return/goto/etc.""" - if not lines: - return False - line = lines[-1].rstrip() - # Indent must match exactly (TODO: Do something better) - if line[:12] != " " * 12: - return False - line = line[12:] - return line.startswith( - ( - "goto ", - "return ", - "DISPATCH", - "GO_TO_", - "Py_UNREACHABLE()", - "ERROR_IF(true, ", - ) - ) - - -def variable_used(node: parser.Node, name: str) -> bool: - """Determine whether a variable with a given name is used in a node.""" - return any( - token.kind == "IDENTIFIER" and token.text == name for token in node.tokens - ) - - -def variable_used_unspecialized(node: parser.Node, name: str) -> bool: - """Like variable_used(), but skips #if ENABLE_SPECIALIZATION blocks.""" - tokens: list[lx.Token] = [] - skipping = False - for i, token in enumerate(node.tokens): - if token.kind == "MACRO": - text = "".join(token.text.split()) - # TODO: Handle nested #if - if text == "#if": - if ( - i + 1 < len(node.tokens) - and node.tokens[i + 1].text == "ENABLE_SPECIALIZATION" - ): - skipping = True - elif text in ("#else", "#endif"): - skipping = False - if not skipping: - tokens.append(token) - return any(token.kind == "IDENTIFIER" and token.text == name for token in tokens) - - def main(): """Parse command line, parse input, analyze, write output.""" args = arg_parser.parse_args() # Prints message and sys.exit(2) on error @@ -1712,17 +713,17 @@ def main(): args.input.append(DEFAULT_INPUT) # Raises OSError if input unreadable - a = Analyzer(args.input, args.output, args.metadata, args.pymetadata, args.executor_cases) + a = Generator(args.input) - if args.emit_line_directives: - a.emit_line_directives = True a.parse() # Raises SyntaxError on failure a.analyze() # Prints messages and sets a.errors on failure if a.errors: sys.exit(f"Found {a.errors} errors") - a.write_instructions() # Raises OSError if output can't be written - a.write_metadata() - a.write_executor_instructions() + + # These raise OSError if output can't be written + a.write_instructions(args.output, args.emit_line_directives) + a.write_metadata(args.metadata, args.pymetadata) + a.write_executor_instructions(args.executor_cases, args.emit_line_directives) if __name__ == "__main__": |