diff options
author | Guido van Rossum <guido@python.org> | 2022-12-08 21:31:27 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-12-08 21:31:27 (GMT) |
commit | c85be734d1823f02a3600d7cd9195cecbf51afe8 (patch) | |
tree | 676c83c4c423c817c554228d1badf20110f47d32 /Tools | |
parent | 35cc0ea736a323119157117d93e5d68d8247e89f (diff) | |
download | cpython-c85be734d1823f02a3600d7cd9195cecbf51afe8.zip cpython-c85be734d1823f02a3600d7cd9195cecbf51afe8.tar.gz cpython-c85be734d1823f02a3600d7cd9195cecbf51afe8.tar.bz2 |
GH-98831: Typed stack effects, and more instructions converted (#99764)
Stack effects can now have a type, e.g. `inst(X, (left, right -- jump/uint64_t)) { ... }`.
Instructions converted to the non-legacy format:
* COMPARE_OP
* COMPARE_OP_FLOAT_JUMP
* COMPARE_OP_INT_JUMP
* COMPARE_OP_STR_JUMP
* STORE_ATTR
* DELETE_ATTR
* STORE_GLOBAL
* STORE_ATTR_INSTANCE_VALUE
* STORE_ATTR_WITH_HINT
* STORE_ATTR_SLOT, and complete the store_attr family
* Complete the store_subscr family: STORE_SUBSCR{,DICT,LIST_INT}
(STORE_SUBSCR was alread half converted,
but wasn't using cache effects yet.)
* DELETE_SUBSCR
* PRINT_EXPR
* INTERPRETER_EXIT (a bit weird, ends in return)
* RETURN_VALUE
* GET_AITER (had to restructure it some)
The original had mysterious `SET_TOP(NULL)` before `goto error`.
I assume those just account for `obj` having been decref'ed,
so I got rid of them in favor of the cleanup implied by `ERROR_IF()`.
* LIST_APPEND (a bit unhappy with it)
* SET_ADD (also a bit unhappy with it)
Various other improvements/refactorings as well.
Diffstat (limited to 'Tools')
-rw-r--r-- | Tools/cases_generator/generate_cases.py | 265 | ||||
-rw-r--r-- | Tools/cases_generator/parser.py | 49 |
2 files changed, 179 insertions, 135 deletions
diff --git a/Tools/cases_generator/generate_cases.py b/Tools/cases_generator/generate_cases.py index 2952634..8442722 100644 --- a/Tools/cases_generator/generate_cases.py +++ b/Tools/cases_generator/generate_cases.py @@ -13,6 +13,7 @@ import sys import typing import parser +from parser import StackEffect DEFAULT_INPUT = os.path.relpath( os.path.join(os.path.dirname(__file__), "../../Python/bytecodes.c") @@ -22,7 +23,7 @@ DEFAULT_OUTPUT = os.path.relpath( ) BEGIN_MARKER = "// BEGIN BYTECODES //" END_MARKER = "// END BYTECODES //" -RE_PREDICTED = r"(?s)(?:PREDICT\(|GO_TO_INSTRUCTION\(|DEOPT_IF\(.*?,\s*)(\w+)\);" +RE_PREDICTED = r"^\s*(?:PREDICT\(|GO_TO_INSTRUCTION\(|DEOPT_IF\(.*?,\s*)(\w+)\);\s*$" UNUSED = "unused" BITS_PER_CODE_UNIT = 16 @@ -73,6 +74,34 @@ class Formatter: yield self.emit("}") + def stack_adjust(self, diff: int): + if diff > 0: + self.emit(f"STACK_GROW({diff});") + elif diff < 0: + self.emit(f"STACK_SHRINK({-diff});") + + def declare(self, dst: StackEffect, src: StackEffect | None): + if dst.name == UNUSED: + return + typ = f"{dst.type} " if dst.type else "PyObject *" + init = "" + if src: + cast = self.cast(dst, src) + init = f" = {cast}{src.name}" + self.emit(f"{typ}{dst.name}{init};") + + def assign(self, dst: StackEffect, src: StackEffect): + if src.name == UNUSED: + return + cast = self.cast(dst, src) + if m := re.match(r"^PEEK\((\d+)\)$", dst.name): + self.emit(f"POKE({m.group(1)}, {cast}{src.name});") + else: + self.emit(f"{dst.name} = {cast}{src.name};") + + def cast(self, dst: StackEffect, src: StackEffect) -> str: + return f"({dst.type or 'PyObject *'})" if src.type != dst.type else "" + @dataclasses.dataclass class Instruction: @@ -83,13 +112,15 @@ class Instruction: kind: typing.Literal["inst", "op"] name: str block: parser.Block + block_text: list[str] # Block.text, less curlies, less PREDICT() calls + predictions: list[str] # Prediction targets (instruction names) # Computed by constructor always_exits: bool cache_offset: int cache_effects: list[parser.CacheEffect] - input_effects: list[parser.StackEffect] - output_effects: list[parser.StackEffect] + input_effects: list[StackEffect] + output_effects: list[StackEffect] # Set later family: parser.Family | None = None @@ -100,13 +131,14 @@ class Instruction: self.kind = inst.kind self.name = inst.name self.block = inst.block - self.always_exits = always_exits(self.block) + self.block_text, self.predictions = extract_block_text(self.block) + self.always_exits = always_exits(self.block_text) self.cache_effects = [ effect for effect in inst.inputs if isinstance(effect, parser.CacheEffect) ] self.cache_offset = sum(c.size for c in self.cache_effects) self.input_effects = [ - effect for effect in inst.inputs if isinstance(effect, parser.StackEffect) + effect for effect in inst.inputs if isinstance(effect, StackEffect) ] self.output_effects = inst.outputs # For consistency/completeness @@ -122,42 +154,39 @@ class Instruction: ) # Write input stack effect variable declarations and initializations - for i, seffect in enumerate(reversed(self.input_effects), 1): - if seffect.name != UNUSED: - out.emit(f"PyObject *{seffect.name} = PEEK({i});") + for i, ieffect in enumerate(reversed(self.input_effects), 1): + src = StackEffect(f"PEEK({i})", "") + out.declare(ieffect, src) # Write output stack effect variable declarations - input_names = {seffect.name for seffect in self.input_effects} - input_names.add(UNUSED) - for seffect in self.output_effects: - if seffect.name not in input_names: - out.emit(f"PyObject *{seffect.name};") + input_names = {ieffect.name for ieffect in self.input_effects} + for oeffect in self.output_effects: + if oeffect.name not in input_names: + out.declare(oeffect, None) self.write_body(out, 0) # Skip the rest if the block always exits - if always_exits(self.block): + if self.always_exits: return # Write net stack growth/shrinkage diff = len(self.output_effects) - len(self.input_effects) - if diff > 0: - out.emit(f"STACK_GROW({diff});") - elif diff < 0: - out.emit(f"STACK_SHRINK({-diff});") + out.stack_adjust(diff) # Write output stack effect assignments - unmoved_names = {UNUSED} + unmoved_names: set[str] = set() for ieffect, oeffect in zip(self.input_effects, self.output_effects): if ieffect.name == oeffect.name: unmoved_names.add(ieffect.name) - for i, seffect in enumerate(reversed(self.output_effects)): - if seffect.name not in unmoved_names: - out.emit(f"POKE({i+1}, {seffect.name});") + for i, oeffect in enumerate(reversed(self.output_effects), 1): + if oeffect.name not in unmoved_names: + dst = StackEffect(f"PEEK({i})", "") + out.assign(dst, oeffect) # Write cache effect if self.cache_offset: - out.emit(f"next_instr += {self.cache_offset};") + out.emit(f"JUMPBY({self.cache_offset});") def write_body(self, out: Formatter, dedent: int, cache_adjust: int = 0) -> None: """Write the instruction body.""" @@ -171,36 +200,19 @@ class Instruction: # is always an object pointer. # If this becomes false, we need a way to specify # syntactically what type the cache data is. - type = "PyObject *" + typ = "PyObject *" func = "read_obj" else: - type = f"uint{bits}_t " + typ = f"uint{bits}_t " func = f"read_u{bits}" - out.emit(f"{type}{ceffect.name} = {func}(next_instr + {cache_offset});") + out.emit(f"{typ}{ceffect.name} = {func}(next_instr + {cache_offset});") cache_offset += ceffect.size assert cache_offset == self.cache_offset + cache_adjust - # Get lines of text with proper dedent - blocklines = self.block.to_text(dedent=dedent).splitlines(True) - - # Remove blank lines from both ends - while blocklines and not blocklines[0].strip(): - blocklines.pop(0) - while blocklines and not blocklines[-1].strip(): - blocklines.pop() - - # Remove leading and trailing braces - assert blocklines and blocklines[0].strip() == "{" - assert blocklines and blocklines[-1].strip() == "}" - blocklines.pop() - blocklines.pop(0) - - # Remove trailing blank lines - while blocklines and not blocklines[-1].strip(): - blocklines.pop() - # Write the body, substituting a goto for ERROR_IF() - for line in blocklines: + assert dedent <= 0 + extra = " " * -dedent + for line in self.block_text: if m := re.match(r"(\s*)ERROR_IF\((.+), (\w+)\);\s*$", line): space, cond, label = m.groups() # ERROR_IF() must pop the inputs from the stack. @@ -215,34 +227,36 @@ class Instruction: else: break if ninputs: - out.write_raw(f"{space}if ({cond}) goto pop_{ninputs}_{label};\n") + out.write_raw( + f"{extra}{space}if ({cond}) goto pop_{ninputs}_{label};\n" + ) else: - out.write_raw(f"{space}if ({cond}) goto {label};\n") + out.write_raw(f"{extra}{space}if ({cond}) goto {label};\n") else: - out.write_raw(line) + out.write_raw(extra + line) InstructionOrCacheEffect = Instruction | parser.CacheEffect +StackEffectMapping = list[tuple[StackEffect, StackEffect]] @dataclasses.dataclass class Component: instr: Instruction - input_mapping: dict[str, parser.StackEffect] - output_mapping: dict[str, parser.StackEffect] + input_mapping: StackEffectMapping + output_mapping: StackEffectMapping def write_body(self, out: Formatter, cache_adjust: int) -> None: with out.block(""): - for var, ieffect in self.input_mapping.items(): - out.emit(f"PyObject *{ieffect.name} = {var};") - for oeffect in self.output_mapping.values(): - out.emit(f"PyObject *{oeffect.name};") - self.instr.write_body(out, dedent=-4, cache_adjust=cache_adjust) - for var, oeffect in self.output_mapping.items(): - out.emit(f"{var} = {oeffect.name};") + for var, ieffect in self.input_mapping: + out.declare(ieffect, var) + for _, oeffect in self.output_mapping: + out.declare(oeffect, None) + self.instr.write_body(out, dedent=-4, cache_adjust=cache_adjust) -# TODO: Use a common base class for {Super,Macro}Instruction + for var, oeffect in self.output_mapping: + out.assign(var, oeffect) @dataclasses.dataclass @@ -250,7 +264,7 @@ class SuperOrMacroInstruction: """Common fields for super- and macro instructions.""" name: str - stack: list[str] + stack: list[StackEffect] initial_sp: int final_sp: int @@ -369,7 +383,11 @@ class Analyzer: def find_predictions(self) -> None: """Find the instructions that need PREDICTED() labels.""" for instr in self.instrs.values(): - for target in re.findall(RE_PREDICTED, instr.block.text): + targets = set(instr.predictions) + for line in instr.block_text: + if m := re.match(RE_PREDICTED, line): + targets.add(m.group(1)) + for target in targets: if target_instr := self.instrs.get(target): target_instr.predicted = True else: @@ -440,24 +458,9 @@ class Analyzer: stack, initial_sp = self.stack_analysis(components) sp = initial_sp parts: list[Component] = [] - for component in components: - match component: - case parser.CacheEffect() as ceffect: - parts.append(ceffect) - case Instruction() as instr: - input_mapping = {} - for ieffect in reversed(instr.input_effects): - sp -= 1 - if ieffect.name != UNUSED: - input_mapping[stack[sp]] = ieffect - output_mapping = {} - for oeffect in instr.output_effects: - if oeffect.name != UNUSED: - output_mapping[stack[sp]] = oeffect - sp += 1 - parts.append(Component(instr, input_mapping, output_mapping)) - case _: - typing.assert_never(component) + for instr in components: + part, sp = self.analyze_instruction(instr, stack, sp) + parts.append(part) final_sp = sp return SuperInstruction(super.name, stack, initial_sp, final_sp, super, parts) @@ -471,22 +474,26 @@ class Analyzer: case parser.CacheEffect() as ceffect: parts.append(ceffect) case Instruction() as instr: - input_mapping = {} - for ieffect in reversed(instr.input_effects): - sp -= 1 - if ieffect.name != UNUSED: - input_mapping[stack[sp]] = ieffect - output_mapping = {} - for oeffect in instr.output_effects: - if oeffect.name != UNUSED: - output_mapping[stack[sp]] = oeffect - sp += 1 - parts.append(Component(instr, input_mapping, output_mapping)) + part, sp = self.analyze_instruction(instr, stack, sp) + parts.append(part) case _: typing.assert_never(component) final_sp = sp return MacroInstruction(macro.name, stack, initial_sp, final_sp, macro, parts) + def analyze_instruction( + self, instr: Instruction, stack: list[StackEffect], sp: int + ) -> tuple[Component, int]: + input_mapping: StackEffectMapping = [] + for ieffect in reversed(instr.input_effects): + sp -= 1 + input_mapping.append((stack[sp], ieffect)) + output_mapping: StackEffectMapping = [] + for oeffect in instr.output_effects: + output_mapping.append((stack[sp], oeffect)) + sp += 1 + return Component(instr, input_mapping, output_mapping), sp + def check_super_components(self, super: parser.Super) -> list[Instruction]: components: list[Instruction] = [] for op in super.ops: @@ -514,7 +521,7 @@ class Analyzer: def stack_analysis( self, components: typing.Iterable[InstructionOrCacheEffect] - ) -> tuple[list[str], int]: + ) -> tuple[list[StackEffect], int]: """Analyze a super-instruction or macro. Print an error if there's a cache effect (which we don't support yet). @@ -536,7 +543,10 @@ class Analyzer: # At this point, 'current' is the net stack effect, # and 'lowest' and 'highest' are the extremes. # Note that 'lowest' may be negative. - stack = [f"_tmp_{i+1}" for i in range(highest - lowest)] + # TODO: Reverse the numbering. + stack = [ + StackEffect(f"_tmp_{i+1}", "") for i in reversed(range(highest - lowest)) + ] return stack, -lowest def write_instructions(self) -> None: @@ -561,7 +571,9 @@ class Analyzer: if instr.predicted: self.out.emit(f"PREDICTED({name});") instr.write(self.out) - if not always_exits(instr.block): + if not instr.always_exits: + for prediction in instr.predictions: + self.out.emit(f"PREDICT({prediction});") self.out.emit(f"DISPATCH();") # Write and count super-instructions @@ -589,11 +601,11 @@ class Analyzer: for comp in sup.parts: if not first: self.out.emit("NEXTOPARG();") - self.out.emit("next_instr++;") + self.out.emit("JUMPBY(1);") first = False comp.write_body(self.out, 0) if comp.instr.cache_offset: - self.out.emit(f"next_instr += {comp.instr.cache_offset};") + self.out.emit(f"JUMPBY({comp.instr.cache_offset});") def write_macro(self, mac: MacroInstruction) -> None: """Write code for a macro instruction.""" @@ -608,43 +620,68 @@ class Analyzer: cache_adjust += comp.instr.cache_offset if cache_adjust: - self.out.emit(f"next_instr += {cache_adjust};") + self.out.emit(f"JUMPBY({cache_adjust});") @contextlib.contextmanager def wrap_super_or_macro(self, up: SuperOrMacroInstruction): """Shared boilerplate for super- and macro instructions.""" + # TODO: Somewhere (where?) make it so that if one instruction + # has an output that is input to another, and the variable names + # and types match and don't conflict with other instructions, + # that variable is declared with the right name and type in the + # outer block, rather than trusting the compiler to optimize it. self.out.emit("") with self.out.block(f"TARGET({up.name})"): - for i, var in enumerate(up.stack): + for i, var in reversed(list(enumerate(up.stack))): + src = None if i < up.initial_sp: - self.out.emit(f"PyObject *{var} = PEEK({up.initial_sp - i});") - else: - self.out.emit(f"PyObject *{var};") + src = StackEffect(f"PEEK({up.initial_sp - i})", "") + self.out.declare(var, src) yield - if up.final_sp > up.initial_sp: - self.out.emit(f"STACK_GROW({up.final_sp - up.initial_sp});") - elif up.final_sp < up.initial_sp: - self.out.emit(f"STACK_SHRINK({up.initial_sp - up.final_sp});") + self.out.stack_adjust(up.final_sp - up.initial_sp) for i, var in enumerate(reversed(up.stack[: up.final_sp]), 1): - self.out.emit(f"POKE({i}, {var});") + dst = StackEffect(f"PEEK({i})", "") + self.out.assign(dst, var) self.out.emit(f"DISPATCH();") -def always_exits(block: parser.Block) -> bool: +def extract_block_text(block: parser.Block) -> tuple[list[str], list[str]]: + # Get lines of text with proper dedent + blocklines = block.text.splitlines(True) + + # Remove blank lines from both ends + while blocklines and not blocklines[0].strip(): + blocklines.pop(0) + while blocklines and not blocklines[-1].strip(): + blocklines.pop() + + # Remove leading and trailing braces + assert blocklines and blocklines[0].strip() == "{" + assert blocklines and blocklines[-1].strip() == "}" + blocklines.pop() + blocklines.pop(0) + + # Remove trailing blank lines + while blocklines and not blocklines[-1].strip(): + blocklines.pop() + + # Separate PREDICT(...) macros from end + predictions: list[str] = [] + while blocklines and (m := re.match(r"^\s*PREDICT\((\w+)\);\s*$", blocklines[-1])): + predictions.insert(0, m.group(1)) + blocklines.pop() + + return blocklines, predictions + + +def always_exits(lines: list[str]) -> bool: """Determine whether a block always ends in a return/goto/etc.""" - text = block.text - lines = text.splitlines() - while lines and not lines[-1].strip(): - lines.pop() - if not lines or lines[-1].strip() != "}": - return False - lines.pop() if not lines: return False - line = lines.pop().rstrip() + line = lines[-1].rstrip() # Indent must match exactly (TODO: Do something better) if line[:12] != " " * 12: return False diff --git a/Tools/cases_generator/parser.py b/Tools/cases_generator/parser.py index 02a7834..d802c73 100644 --- a/Tools/cases_generator/parser.py +++ b/Tools/cases_generator/parser.py @@ -62,7 +62,8 @@ class Block(Node): @dataclass class StackEffect(Node): name: str - # TODO: type, condition + type: str = "" + # TODO: array, condition @dataclass @@ -147,7 +148,7 @@ class Parser(PLexer): if self.expect(lx.LPAREN) and (tkn := self.expect(lx.IDENTIFIER)): name = tkn.text if self.expect(lx.COMMA): - inp, outp = self.stack_effect() + inp, outp = self.io_effect() if self.expect(lx.RPAREN): if (tkn := self.peek()) and tkn.kind == lx.LBRACE: return InstHeader(kind, name, inp, outp) @@ -156,7 +157,7 @@ class Parser(PLexer): return InstHeader(kind, name, [], []) return None - def stack_effect(self) -> tuple[list[InputEffect], list[OutputEffect]]: + def io_effect(self) -> tuple[list[InputEffect], list[OutputEffect]]: # '(' [inputs] '--' [outputs] ')' if self.expect(lx.LPAREN): inputs = self.inputs() or [] @@ -181,23 +182,7 @@ class Parser(PLexer): @contextual def input(self) -> InputEffect | None: - # IDENTIFIER '/' INTEGER (CacheEffect) - # IDENTIFIER (StackEffect) - if tkn := self.expect(lx.IDENTIFIER): - if self.expect(lx.DIVIDE): - if num := self.expect(lx.NUMBER): - try: - size = int(num.text) - except ValueError: - raise self.make_syntax_error( - f"Expected integer, got {num.text!r}" - ) - else: - return CacheEffect(tkn.text, size) - raise self.make_syntax_error("Expected integer") - else: - # TODO: Arrays, conditions - return StackEffect(tkn.text) + return self.cache_effect() or self.stack_effect() def outputs(self) -> list[OutputEffect] | None: # output (, output)* @@ -214,8 +199,30 @@ class Parser(PLexer): @contextual def output(self) -> OutputEffect | None: + return self.stack_effect() + + @contextual + def cache_effect(self) -> CacheEffect | None: + # IDENTIFIER '/' NUMBER + if tkn := self.expect(lx.IDENTIFIER): + if self.expect(lx.DIVIDE): + num = self.require(lx.NUMBER).text + try: + size = int(num) + except ValueError: + raise self.make_syntax_error(f"Expected integer, got {num!r}") + else: + return CacheEffect(tkn.text, size) + + @contextual + def stack_effect(self) -> StackEffect | None: + # IDENTIFIER [':' IDENTIFIER] + # TODO: Arrays, conditions if tkn := self.expect(lx.IDENTIFIER): - return StackEffect(tkn.text) + type = "" + if self.expect(lx.COLON): + type = self.require(lx.IDENTIFIER).text + return StackEffect(tkn.text, type) @contextual def super_def(self) -> Super | None: |