summaryrefslogtreecommitdiffstats
path: root/Tools/cases_generator
diff options
context:
space:
mode:
authorGuido van Rossum <guido@python.org>2022-12-08 21:31:27 (GMT)
committerGitHub <noreply@github.com>2022-12-08 21:31:27 (GMT)
commitc85be734d1823f02a3600d7cd9195cecbf51afe8 (patch)
tree676c83c4c423c817c554228d1badf20110f47d32 /Tools/cases_generator
parent35cc0ea736a323119157117d93e5d68d8247e89f (diff)
downloadcpython-c85be734d1823f02a3600d7cd9195cecbf51afe8.zip
cpython-c85be734d1823f02a3600d7cd9195cecbf51afe8.tar.gz
cpython-c85be734d1823f02a3600d7cd9195cecbf51afe8.tar.bz2
GH-98831: Typed stack effects, and more instructions converted (#99764)
Stack effects can now have a type, e.g. `inst(X, (left, right -- jump/uint64_t)) { ... }`. Instructions converted to the non-legacy format: * COMPARE_OP * COMPARE_OP_FLOAT_JUMP * COMPARE_OP_INT_JUMP * COMPARE_OP_STR_JUMP * STORE_ATTR * DELETE_ATTR * STORE_GLOBAL * STORE_ATTR_INSTANCE_VALUE * STORE_ATTR_WITH_HINT * STORE_ATTR_SLOT, and complete the store_attr family * Complete the store_subscr family: STORE_SUBSCR{,DICT,LIST_INT} (STORE_SUBSCR was alread half converted, but wasn't using cache effects yet.) * DELETE_SUBSCR * PRINT_EXPR * INTERPRETER_EXIT (a bit weird, ends in return) * RETURN_VALUE * GET_AITER (had to restructure it some) The original had mysterious `SET_TOP(NULL)` before `goto error`. I assume those just account for `obj` having been decref'ed, so I got rid of them in favor of the cleanup implied by `ERROR_IF()`. * LIST_APPEND (a bit unhappy with it) * SET_ADD (also a bit unhappy with it) Various other improvements/refactorings as well.
Diffstat (limited to 'Tools/cases_generator')
-rw-r--r--Tools/cases_generator/generate_cases.py265
-rw-r--r--Tools/cases_generator/parser.py49
2 files changed, 179 insertions, 135 deletions
diff --git a/Tools/cases_generator/generate_cases.py b/Tools/cases_generator/generate_cases.py
index 2952634..8442722 100644
--- a/Tools/cases_generator/generate_cases.py
+++ b/Tools/cases_generator/generate_cases.py
@@ -13,6 +13,7 @@ import sys
import typing
import parser
+from parser import StackEffect
DEFAULT_INPUT = os.path.relpath(
os.path.join(os.path.dirname(__file__), "../../Python/bytecodes.c")
@@ -22,7 +23,7 @@ DEFAULT_OUTPUT = os.path.relpath(
)
BEGIN_MARKER = "// BEGIN BYTECODES //"
END_MARKER = "// END BYTECODES //"
-RE_PREDICTED = r"(?s)(?:PREDICT\(|GO_TO_INSTRUCTION\(|DEOPT_IF\(.*?,\s*)(\w+)\);"
+RE_PREDICTED = r"^\s*(?:PREDICT\(|GO_TO_INSTRUCTION\(|DEOPT_IF\(.*?,\s*)(\w+)\);\s*$"
UNUSED = "unused"
BITS_PER_CODE_UNIT = 16
@@ -73,6 +74,34 @@ class Formatter:
yield
self.emit("}")
+ def stack_adjust(self, diff: int):
+ if diff > 0:
+ self.emit(f"STACK_GROW({diff});")
+ elif diff < 0:
+ self.emit(f"STACK_SHRINK({-diff});")
+
+ def declare(self, dst: StackEffect, src: StackEffect | None):
+ if dst.name == UNUSED:
+ return
+ typ = f"{dst.type} " if dst.type else "PyObject *"
+ init = ""
+ if src:
+ cast = self.cast(dst, src)
+ init = f" = {cast}{src.name}"
+ self.emit(f"{typ}{dst.name}{init};")
+
+ def assign(self, dst: StackEffect, src: StackEffect):
+ if src.name == UNUSED:
+ return
+ cast = self.cast(dst, src)
+ if m := re.match(r"^PEEK\((\d+)\)$", dst.name):
+ self.emit(f"POKE({m.group(1)}, {cast}{src.name});")
+ else:
+ self.emit(f"{dst.name} = {cast}{src.name};")
+
+ def cast(self, dst: StackEffect, src: StackEffect) -> str:
+ return f"({dst.type or 'PyObject *'})" if src.type != dst.type else ""
+
@dataclasses.dataclass
class Instruction:
@@ -83,13 +112,15 @@ class Instruction:
kind: typing.Literal["inst", "op"]
name: str
block: parser.Block
+ block_text: list[str] # Block.text, less curlies, less PREDICT() calls
+ predictions: list[str] # Prediction targets (instruction names)
# Computed by constructor
always_exits: bool
cache_offset: int
cache_effects: list[parser.CacheEffect]
- input_effects: list[parser.StackEffect]
- output_effects: list[parser.StackEffect]
+ input_effects: list[StackEffect]
+ output_effects: list[StackEffect]
# Set later
family: parser.Family | None = None
@@ -100,13 +131,14 @@ class Instruction:
self.kind = inst.kind
self.name = inst.name
self.block = inst.block
- self.always_exits = always_exits(self.block)
+ self.block_text, self.predictions = extract_block_text(self.block)
+ self.always_exits = always_exits(self.block_text)
self.cache_effects = [
effect for effect in inst.inputs if isinstance(effect, parser.CacheEffect)
]
self.cache_offset = sum(c.size for c in self.cache_effects)
self.input_effects = [
- effect for effect in inst.inputs if isinstance(effect, parser.StackEffect)
+ effect for effect in inst.inputs if isinstance(effect, StackEffect)
]
self.output_effects = inst.outputs # For consistency/completeness
@@ -122,42 +154,39 @@ class Instruction:
)
# Write input stack effect variable declarations and initializations
- for i, seffect in enumerate(reversed(self.input_effects), 1):
- if seffect.name != UNUSED:
- out.emit(f"PyObject *{seffect.name} = PEEK({i});")
+ for i, ieffect in enumerate(reversed(self.input_effects), 1):
+ src = StackEffect(f"PEEK({i})", "")
+ out.declare(ieffect, src)
# Write output stack effect variable declarations
- input_names = {seffect.name for seffect in self.input_effects}
- input_names.add(UNUSED)
- for seffect in self.output_effects:
- if seffect.name not in input_names:
- out.emit(f"PyObject *{seffect.name};")
+ input_names = {ieffect.name for ieffect in self.input_effects}
+ for oeffect in self.output_effects:
+ if oeffect.name not in input_names:
+ out.declare(oeffect, None)
self.write_body(out, 0)
# Skip the rest if the block always exits
- if always_exits(self.block):
+ if self.always_exits:
return
# Write net stack growth/shrinkage
diff = len(self.output_effects) - len(self.input_effects)
- if diff > 0:
- out.emit(f"STACK_GROW({diff});")
- elif diff < 0:
- out.emit(f"STACK_SHRINK({-diff});")
+ out.stack_adjust(diff)
# Write output stack effect assignments
- unmoved_names = {UNUSED}
+ unmoved_names: set[str] = set()
for ieffect, oeffect in zip(self.input_effects, self.output_effects):
if ieffect.name == oeffect.name:
unmoved_names.add(ieffect.name)
- for i, seffect in enumerate(reversed(self.output_effects)):
- if seffect.name not in unmoved_names:
- out.emit(f"POKE({i+1}, {seffect.name});")
+ for i, oeffect in enumerate(reversed(self.output_effects), 1):
+ if oeffect.name not in unmoved_names:
+ dst = StackEffect(f"PEEK({i})", "")
+ out.assign(dst, oeffect)
# Write cache effect
if self.cache_offset:
- out.emit(f"next_instr += {self.cache_offset};")
+ out.emit(f"JUMPBY({self.cache_offset});")
def write_body(self, out: Formatter, dedent: int, cache_adjust: int = 0) -> None:
"""Write the instruction body."""
@@ -171,36 +200,19 @@ class Instruction:
# is always an object pointer.
# If this becomes false, we need a way to specify
# syntactically what type the cache data is.
- type = "PyObject *"
+ typ = "PyObject *"
func = "read_obj"
else:
- type = f"uint{bits}_t "
+ typ = f"uint{bits}_t "
func = f"read_u{bits}"
- out.emit(f"{type}{ceffect.name} = {func}(next_instr + {cache_offset});")
+ out.emit(f"{typ}{ceffect.name} = {func}(next_instr + {cache_offset});")
cache_offset += ceffect.size
assert cache_offset == self.cache_offset + cache_adjust
- # Get lines of text with proper dedent
- blocklines = self.block.to_text(dedent=dedent).splitlines(True)
-
- # Remove blank lines from both ends
- while blocklines and not blocklines[0].strip():
- blocklines.pop(0)
- while blocklines and not blocklines[-1].strip():
- blocklines.pop()
-
- # Remove leading and trailing braces
- assert blocklines and blocklines[0].strip() == "{"
- assert blocklines and blocklines[-1].strip() == "}"
- blocklines.pop()
- blocklines.pop(0)
-
- # Remove trailing blank lines
- while blocklines and not blocklines[-1].strip():
- blocklines.pop()
-
# Write the body, substituting a goto for ERROR_IF()
- for line in blocklines:
+ assert dedent <= 0
+ extra = " " * -dedent
+ for line in self.block_text:
if m := re.match(r"(\s*)ERROR_IF\((.+), (\w+)\);\s*$", line):
space, cond, label = m.groups()
# ERROR_IF() must pop the inputs from the stack.
@@ -215,34 +227,36 @@ class Instruction:
else:
break
if ninputs:
- out.write_raw(f"{space}if ({cond}) goto pop_{ninputs}_{label};\n")
+ out.write_raw(
+ f"{extra}{space}if ({cond}) goto pop_{ninputs}_{label};\n"
+ )
else:
- out.write_raw(f"{space}if ({cond}) goto {label};\n")
+ out.write_raw(f"{extra}{space}if ({cond}) goto {label};\n")
else:
- out.write_raw(line)
+ out.write_raw(extra + line)
InstructionOrCacheEffect = Instruction | parser.CacheEffect
+StackEffectMapping = list[tuple[StackEffect, StackEffect]]
@dataclasses.dataclass
class Component:
instr: Instruction
- input_mapping: dict[str, parser.StackEffect]
- output_mapping: dict[str, parser.StackEffect]
+ input_mapping: StackEffectMapping
+ output_mapping: StackEffectMapping
def write_body(self, out: Formatter, cache_adjust: int) -> None:
with out.block(""):
- for var, ieffect in self.input_mapping.items():
- out.emit(f"PyObject *{ieffect.name} = {var};")
- for oeffect in self.output_mapping.values():
- out.emit(f"PyObject *{oeffect.name};")
- self.instr.write_body(out, dedent=-4, cache_adjust=cache_adjust)
- for var, oeffect in self.output_mapping.items():
- out.emit(f"{var} = {oeffect.name};")
+ for var, ieffect in self.input_mapping:
+ out.declare(ieffect, var)
+ for _, oeffect in self.output_mapping:
+ out.declare(oeffect, None)
+ self.instr.write_body(out, dedent=-4, cache_adjust=cache_adjust)
-# TODO: Use a common base class for {Super,Macro}Instruction
+ for var, oeffect in self.output_mapping:
+ out.assign(var, oeffect)
@dataclasses.dataclass
@@ -250,7 +264,7 @@ class SuperOrMacroInstruction:
"""Common fields for super- and macro instructions."""
name: str
- stack: list[str]
+ stack: list[StackEffect]
initial_sp: int
final_sp: int
@@ -369,7 +383,11 @@ class Analyzer:
def find_predictions(self) -> None:
"""Find the instructions that need PREDICTED() labels."""
for instr in self.instrs.values():
- for target in re.findall(RE_PREDICTED, instr.block.text):
+ targets = set(instr.predictions)
+ for line in instr.block_text:
+ if m := re.match(RE_PREDICTED, line):
+ targets.add(m.group(1))
+ for target in targets:
if target_instr := self.instrs.get(target):
target_instr.predicted = True
else:
@@ -440,24 +458,9 @@ class Analyzer:
stack, initial_sp = self.stack_analysis(components)
sp = initial_sp
parts: list[Component] = []
- for component in components:
- match component:
- case parser.CacheEffect() as ceffect:
- parts.append(ceffect)
- case Instruction() as instr:
- input_mapping = {}
- for ieffect in reversed(instr.input_effects):
- sp -= 1
- if ieffect.name != UNUSED:
- input_mapping[stack[sp]] = ieffect
- output_mapping = {}
- for oeffect in instr.output_effects:
- if oeffect.name != UNUSED:
- output_mapping[stack[sp]] = oeffect
- sp += 1
- parts.append(Component(instr, input_mapping, output_mapping))
- case _:
- typing.assert_never(component)
+ for instr in components:
+ part, sp = self.analyze_instruction(instr, stack, sp)
+ parts.append(part)
final_sp = sp
return SuperInstruction(super.name, stack, initial_sp, final_sp, super, parts)
@@ -471,22 +474,26 @@ class Analyzer:
case parser.CacheEffect() as ceffect:
parts.append(ceffect)
case Instruction() as instr:
- input_mapping = {}
- for ieffect in reversed(instr.input_effects):
- sp -= 1
- if ieffect.name != UNUSED:
- input_mapping[stack[sp]] = ieffect
- output_mapping = {}
- for oeffect in instr.output_effects:
- if oeffect.name != UNUSED:
- output_mapping[stack[sp]] = oeffect
- sp += 1
- parts.append(Component(instr, input_mapping, output_mapping))
+ part, sp = self.analyze_instruction(instr, stack, sp)
+ parts.append(part)
case _:
typing.assert_never(component)
final_sp = sp
return MacroInstruction(macro.name, stack, initial_sp, final_sp, macro, parts)
+ def analyze_instruction(
+ self, instr: Instruction, stack: list[StackEffect], sp: int
+ ) -> tuple[Component, int]:
+ input_mapping: StackEffectMapping = []
+ for ieffect in reversed(instr.input_effects):
+ sp -= 1
+ input_mapping.append((stack[sp], ieffect))
+ output_mapping: StackEffectMapping = []
+ for oeffect in instr.output_effects:
+ output_mapping.append((stack[sp], oeffect))
+ sp += 1
+ return Component(instr, input_mapping, output_mapping), sp
+
def check_super_components(self, super: parser.Super) -> list[Instruction]:
components: list[Instruction] = []
for op in super.ops:
@@ -514,7 +521,7 @@ class Analyzer:
def stack_analysis(
self, components: typing.Iterable[InstructionOrCacheEffect]
- ) -> tuple[list[str], int]:
+ ) -> tuple[list[StackEffect], int]:
"""Analyze a super-instruction or macro.
Print an error if there's a cache effect (which we don't support yet).
@@ -536,7 +543,10 @@ class Analyzer:
# At this point, 'current' is the net stack effect,
# and 'lowest' and 'highest' are the extremes.
# Note that 'lowest' may be negative.
- stack = [f"_tmp_{i+1}" for i in range(highest - lowest)]
+ # TODO: Reverse the numbering.
+ stack = [
+ StackEffect(f"_tmp_{i+1}", "") for i in reversed(range(highest - lowest))
+ ]
return stack, -lowest
def write_instructions(self) -> None:
@@ -561,7 +571,9 @@ class Analyzer:
if instr.predicted:
self.out.emit(f"PREDICTED({name});")
instr.write(self.out)
- if not always_exits(instr.block):
+ if not instr.always_exits:
+ for prediction in instr.predictions:
+ self.out.emit(f"PREDICT({prediction});")
self.out.emit(f"DISPATCH();")
# Write and count super-instructions
@@ -589,11 +601,11 @@ class Analyzer:
for comp in sup.parts:
if not first:
self.out.emit("NEXTOPARG();")
- self.out.emit("next_instr++;")
+ self.out.emit("JUMPBY(1);")
first = False
comp.write_body(self.out, 0)
if comp.instr.cache_offset:
- self.out.emit(f"next_instr += {comp.instr.cache_offset};")
+ self.out.emit(f"JUMPBY({comp.instr.cache_offset});")
def write_macro(self, mac: MacroInstruction) -> None:
"""Write code for a macro instruction."""
@@ -608,43 +620,68 @@ class Analyzer:
cache_adjust += comp.instr.cache_offset
if cache_adjust:
- self.out.emit(f"next_instr += {cache_adjust};")
+ self.out.emit(f"JUMPBY({cache_adjust});")
@contextlib.contextmanager
def wrap_super_or_macro(self, up: SuperOrMacroInstruction):
"""Shared boilerplate for super- and macro instructions."""
+ # TODO: Somewhere (where?) make it so that if one instruction
+ # has an output that is input to another, and the variable names
+ # and types match and don't conflict with other instructions,
+ # that variable is declared with the right name and type in the
+ # outer block, rather than trusting the compiler to optimize it.
self.out.emit("")
with self.out.block(f"TARGET({up.name})"):
- for i, var in enumerate(up.stack):
+ for i, var in reversed(list(enumerate(up.stack))):
+ src = None
if i < up.initial_sp:
- self.out.emit(f"PyObject *{var} = PEEK({up.initial_sp - i});")
- else:
- self.out.emit(f"PyObject *{var};")
+ src = StackEffect(f"PEEK({up.initial_sp - i})", "")
+ self.out.declare(var, src)
yield
- if up.final_sp > up.initial_sp:
- self.out.emit(f"STACK_GROW({up.final_sp - up.initial_sp});")
- elif up.final_sp < up.initial_sp:
- self.out.emit(f"STACK_SHRINK({up.initial_sp - up.final_sp});")
+ self.out.stack_adjust(up.final_sp - up.initial_sp)
for i, var in enumerate(reversed(up.stack[: up.final_sp]), 1):
- self.out.emit(f"POKE({i}, {var});")
+ dst = StackEffect(f"PEEK({i})", "")
+ self.out.assign(dst, var)
self.out.emit(f"DISPATCH();")
-def always_exits(block: parser.Block) -> bool:
+def extract_block_text(block: parser.Block) -> tuple[list[str], list[str]]:
+ # Get lines of text with proper dedent
+ blocklines = block.text.splitlines(True)
+
+ # Remove blank lines from both ends
+ while blocklines and not blocklines[0].strip():
+ blocklines.pop(0)
+ while blocklines and not blocklines[-1].strip():
+ blocklines.pop()
+
+ # Remove leading and trailing braces
+ assert blocklines and blocklines[0].strip() == "{"
+ assert blocklines and blocklines[-1].strip() == "}"
+ blocklines.pop()
+ blocklines.pop(0)
+
+ # Remove trailing blank lines
+ while blocklines and not blocklines[-1].strip():
+ blocklines.pop()
+
+ # Separate PREDICT(...) macros from end
+ predictions: list[str] = []
+ while blocklines and (m := re.match(r"^\s*PREDICT\((\w+)\);\s*$", blocklines[-1])):
+ predictions.insert(0, m.group(1))
+ blocklines.pop()
+
+ return blocklines, predictions
+
+
+def always_exits(lines: list[str]) -> bool:
"""Determine whether a block always ends in a return/goto/etc."""
- text = block.text
- lines = text.splitlines()
- while lines and not lines[-1].strip():
- lines.pop()
- if not lines or lines[-1].strip() != "}":
- return False
- lines.pop()
if not lines:
return False
- line = lines.pop().rstrip()
+ line = lines[-1].rstrip()
# Indent must match exactly (TODO: Do something better)
if line[:12] != " " * 12:
return False
diff --git a/Tools/cases_generator/parser.py b/Tools/cases_generator/parser.py
index 02a7834..d802c73 100644
--- a/Tools/cases_generator/parser.py
+++ b/Tools/cases_generator/parser.py
@@ -62,7 +62,8 @@ class Block(Node):
@dataclass
class StackEffect(Node):
name: str
- # TODO: type, condition
+ type: str = ""
+ # TODO: array, condition
@dataclass
@@ -147,7 +148,7 @@ class Parser(PLexer):
if self.expect(lx.LPAREN) and (tkn := self.expect(lx.IDENTIFIER)):
name = tkn.text
if self.expect(lx.COMMA):
- inp, outp = self.stack_effect()
+ inp, outp = self.io_effect()
if self.expect(lx.RPAREN):
if (tkn := self.peek()) and tkn.kind == lx.LBRACE:
return InstHeader(kind, name, inp, outp)
@@ -156,7 +157,7 @@ class Parser(PLexer):
return InstHeader(kind, name, [], [])
return None
- def stack_effect(self) -> tuple[list[InputEffect], list[OutputEffect]]:
+ def io_effect(self) -> tuple[list[InputEffect], list[OutputEffect]]:
# '(' [inputs] '--' [outputs] ')'
if self.expect(lx.LPAREN):
inputs = self.inputs() or []
@@ -181,23 +182,7 @@ class Parser(PLexer):
@contextual
def input(self) -> InputEffect | None:
- # IDENTIFIER '/' INTEGER (CacheEffect)
- # IDENTIFIER (StackEffect)
- if tkn := self.expect(lx.IDENTIFIER):
- if self.expect(lx.DIVIDE):
- if num := self.expect(lx.NUMBER):
- try:
- size = int(num.text)
- except ValueError:
- raise self.make_syntax_error(
- f"Expected integer, got {num.text!r}"
- )
- else:
- return CacheEffect(tkn.text, size)
- raise self.make_syntax_error("Expected integer")
- else:
- # TODO: Arrays, conditions
- return StackEffect(tkn.text)
+ return self.cache_effect() or self.stack_effect()
def outputs(self) -> list[OutputEffect] | None:
# output (, output)*
@@ -214,8 +199,30 @@ class Parser(PLexer):
@contextual
def output(self) -> OutputEffect | None:
+ return self.stack_effect()
+
+ @contextual
+ def cache_effect(self) -> CacheEffect | None:
+ # IDENTIFIER '/' NUMBER
+ if tkn := self.expect(lx.IDENTIFIER):
+ if self.expect(lx.DIVIDE):
+ num = self.require(lx.NUMBER).text
+ try:
+ size = int(num)
+ except ValueError:
+ raise self.make_syntax_error(f"Expected integer, got {num!r}")
+ else:
+ return CacheEffect(tkn.text, size)
+
+ @contextual
+ def stack_effect(self) -> StackEffect | None:
+ # IDENTIFIER [':' IDENTIFIER]
+ # TODO: Arrays, conditions
if tkn := self.expect(lx.IDENTIFIER):
- return StackEffect(tkn.text)
+ type = ""
+ if self.expect(lx.COLON):
+ type = self.require(lx.IDENTIFIER).text
+ return StackEffect(tkn.text, type)
@contextual
def super_def(self) -> Super | None: