summaryrefslogtreecommitdiffstats
path: root/Tools
diff options
context:
space:
mode:
authorGuido van Rossum <guido@python.org>2023-01-17 23:59:19 (GMT)
committerGitHub <noreply@github.com>2023-01-17 23:59:19 (GMT)
commit80e3e3423cb456ba341f95d6a07e94cf3b16c0be (patch)
tree32ddc8bc516b2010d48a529c9cc03fb68631c80e /Tools
parentf34176b77f222726d901595968a4b44456186da4 (diff)
downloadcpython-80e3e3423cb456ba341f95d6a07e94cf3b16c0be.zip
cpython-80e3e3423cb456ba341f95d6a07e94cf3b16c0be.tar.gz
cpython-80e3e3423cb456ba341f95d6a07e94cf3b16c0be.tar.bz2
GH-98831: Implement array support in cases generator (#100912)
You can now write things like this: ``` inst(BUILD_STRING, (pieces[oparg] -- str)) { ... } inst(LIST_APPEND, (list, unused[oparg-1], v -- list, unused[oparg-1])) { ... } ``` Note that array output effects are only partially supported (they must be named `unused` or correspond to an input effect).
Diffstat (limited to 'Tools')
-rw-r--r--Tools/cases_generator/generate_cases.py173
-rw-r--r--Tools/cases_generator/parser.py60
-rw-r--r--Tools/cases_generator/test_generator.py126
3 files changed, 303 insertions, 56 deletions
diff --git a/Tools/cases_generator/generate_cases.py b/Tools/cases_generator/generate_cases.py
index 4bd99b7..59e2499 100644
--- a/Tools/cases_generator/generate_cases.py
+++ b/Tools/cases_generator/generate_cases.py
@@ -15,14 +15,14 @@ import typing
import parser
from parser import StackEffect
-DEFAULT_INPUT = os.path.relpath(
- os.path.join(os.path.dirname(__file__), "../../Python/bytecodes.c")
-)
-DEFAULT_OUTPUT = os.path.relpath(
- os.path.join(os.path.dirname(__file__), "../../Python/generated_cases.c.h")
-)
+HERE = os.path.dirname(__file__)
+ROOT = os.path.join(HERE, "../..")
+THIS = os.path.relpath(__file__, ROOT)
+
+DEFAULT_INPUT = os.path.relpath(os.path.join(ROOT, "Python/bytecodes.c"))
+DEFAULT_OUTPUT = os.path.relpath(os.path.join(ROOT, "Python/generated_cases.c.h"))
DEFAULT_METADATA_OUTPUT = os.path.relpath(
- os.path.join(os.path.dirname(__file__), "../../Python/opcode_metadata.h")
+ os.path.join(ROOT, "Python/opcode_metadata.h")
)
BEGIN_MARKER = "// BEGIN BYTECODES //"
END_MARKER = "// END BYTECODES //"
@@ -48,6 +48,55 @@ arg_parser.add_argument(
)
+def effect_size(effect: StackEffect) -> tuple[int, str]:
+ """Return the 'size' impact of a stack effect.
+
+ Returns a tuple (numeric, symbolic) where:
+
+ - numeric is an int giving the statically analyzable size of the effect
+ - symbolic is a string representing a variable effect (e.g. 'oparg*2')
+
+ At most one of these will be non-zero / non-empty.
+ """
+ if effect.size:
+ return 0, effect.size
+ else:
+ return 1, ""
+
+
+def maybe_parenthesize(sym: str) -> str:
+ """Add parentheses around a string if it contains an operator.
+
+ An exception is made for '*' which is common and harmless
+ in the context where the symbolic size is used.
+ """
+ if re.match(r"^[\s\w*]+$", sym):
+ return sym
+ else:
+ return f"({sym})"
+
+
+def list_effect_size(effects: list[StackEffect]) -> tuple[int, str]:
+ numeric = 0
+ symbolic: list[str] = []
+ for effect in effects:
+ diff, sym = effect_size(effect)
+ numeric += diff
+ if sym:
+ symbolic.append(maybe_parenthesize(sym))
+ return numeric, " + ".join(symbolic)
+
+
+def string_effect_size(arg: tuple[int, str]) -> str:
+ numeric, symbolic = arg
+ if numeric and symbolic:
+ return f"{numeric} + {symbolic}"
+ elif symbolic:
+ return symbolic
+ else:
+ return str(numeric)
+
+
class Formatter:
"""Wraps an output stream with the ability to indent etc."""
@@ -83,28 +132,43 @@ class Formatter:
yield
self.emit("}")
- def stack_adjust(self, diff: int):
+ def stack_adjust(self, diff: int, input_effects: list[StackEffect], output_effects: list[StackEffect]):
+ # TODO: Get rid of 'diff' parameter
+ shrink, isym = list_effect_size(input_effects)
+ grow, osym = list_effect_size(output_effects)
+ diff += grow - shrink
+ if isym and isym != osym:
+ self.emit(f"STACK_SHRINK({isym});")
+ if diff < 0:
+ self.emit(f"STACK_SHRINK({-diff});")
if diff > 0:
self.emit(f"STACK_GROW({diff});")
- elif diff < 0:
- self.emit(f"STACK_SHRINK({-diff});")
+ if osym and osym != isym:
+ self.emit(f"STACK_GROW({osym});")
def declare(self, dst: StackEffect, src: StackEffect | None):
if dst.name == UNUSED:
return
- typ = f"{dst.type} " if dst.type else "PyObject *"
+ typ = f"{dst.type}" if dst.type else "PyObject *"
init = ""
if src:
cast = self.cast(dst, src)
init = f" = {cast}{src.name}"
- self.emit(f"{typ}{dst.name}{init};")
+ sepa = "" if typ.endswith("*") else " "
+ self.emit(f"{typ}{sepa}{dst.name}{init};")
def assign(self, dst: StackEffect, src: StackEffect):
if src.name == UNUSED:
return
cast = self.cast(dst, src)
- if m := re.match(r"^PEEK\((\d+)\)$", dst.name):
+ if m := re.match(r"^PEEK\((.*)\)$", dst.name):
self.emit(f"POKE({m.group(1)}, {cast}{src.name});")
+ elif m := re.match(r"^&PEEK\(.*\)$", dst.name):
+ # NOTE: MOVE_ITEMS() does not actually exist.
+ # The only supported output array forms are:
+ # - unused[...]
+ # - X[...] where X[...] matches an input array exactly
+ self.emit(f"MOVE_ITEMS({dst.name}, {src.name}, {src.size});")
elif m := re.match(r"^REG\(oparg(\d+)\)$", dst.name):
self.emit(f"Py_XSETREF({dst.name}, {cast}{src.name});")
else:
@@ -172,7 +236,7 @@ class Instruction:
num_dummies = (num_regs // 2) * 2 + 1 - num_regs
fmt = "I" + "B"*num_regs + "X"*num_dummies
else:
- if variable_used(inst.block, "oparg"):
+ if variable_used(inst, "oparg"):
fmt = "IB"
else:
fmt = "IX"
@@ -200,7 +264,6 @@ class Instruction:
def write(self, out: Formatter) -> None:
"""Write one instruction, sans prologue and epilogue."""
# Write a static assertion that a family's cache size is correct
-
if family := self.family:
if self.name == family.members[0]:
if cache_size := family.size:
@@ -211,8 +274,13 @@ class Instruction:
if not self.register:
# Write input stack effect variable declarations and initializations
- for i, ieffect in enumerate(reversed(self.input_effects), 1):
- src = StackEffect(f"PEEK({i})", "")
+ ieffects = list(reversed(self.input_effects))
+ for i, ieffect in enumerate(ieffects):
+ isize = string_effect_size(list_effect_size(ieffects[:i+1]))
+ if ieffect.size:
+ src = StackEffect(f"&PEEK({isize})", "PyObject **")
+ else:
+ src = StackEffect(f"PEEK({isize})", "")
out.declare(ieffect, src)
else:
# Write input register variable declarations and initializations
@@ -236,14 +304,19 @@ class Instruction:
if not self.register:
# Write net stack growth/shrinkage
- diff = len(self.output_effects) - len(self.input_effects)
- out.stack_adjust(diff)
+ out.stack_adjust(0, self.input_effects, self.output_effects)
# Write output stack effect assignments
- for i, oeffect in enumerate(reversed(self.output_effects), 1):
- if oeffect.name not in self.unmoved_names:
- dst = StackEffect(f"PEEK({i})", "")
- out.assign(dst, oeffect)
+ oeffects = list(reversed(self.output_effects))
+ for i, oeffect in enumerate(oeffects):
+ if oeffect.name in self.unmoved_names:
+ continue
+ osize = string_effect_size(list_effect_size(oeffects[:i+1]))
+ if oeffect.size:
+ dst = StackEffect(f"&PEEK({osize})", "PyObject **")
+ else:
+ dst = StackEffect(f"PEEK({osize})", "")
+ out.assign(dst, oeffect)
else:
# Write output register assignments
for oeffect, reg in zip(self.output_effects, self.output_registers):
@@ -287,19 +360,21 @@ class Instruction:
# The code block is responsible for DECREF()ing them.
# NOTE: If the label doesn't exist, just add it to ceval.c.
if not self.register:
- ninputs = len(self.input_effects)
# Don't pop common input/output effects at the bottom!
# These aren't DECREF'ed so they can stay.
- for ieff, oeff in zip(self.input_effects, self.output_effects):
- if ieff.name == oeff.name:
- ninputs -= 1
- else:
- break
+ ieffs = list(self.input_effects)
+ oeffs = list(self.output_effects)
+ while ieffs and oeffs and ieffs[0] == oeffs[0]:
+ ieffs.pop(0)
+ oeffs.pop(0)
+ ninputs, symbolic = list_effect_size(ieffs)
+ if ninputs:
+ label = f"pop_{ninputs}_{label}"
else:
- ninputs = 0
- if ninputs:
+ symbolic = ""
+ if symbolic:
out.write_raw(
- f"{extra}{space}if ({cond}) goto pop_{ninputs}_{label};\n"
+ f"{extra}{space}if ({cond}) {{ STACK_SHRINK({symbolic}); goto {label}; }}\n"
)
else:
out.write_raw(f"{extra}{space}if ({cond}) goto {label};\n")
@@ -635,6 +710,13 @@ class Analyzer:
for thing in components:
match thing:
case Instruction() as instr:
+ if any(eff.size for eff in instr.input_effects + instr.output_effects):
+ # TODO: Eventually this will be needed, at least for macros.
+ self.error(
+ f"Instruction {instr.name!r} has variable-sized stack effect, "
+ "which are not supported in super- or macro instructions",
+ instr.inst, # TODO: Pass name+location of super/macro
+ )
current -= len(instr.input_effects)
lowest = min(lowest, current)
current += len(instr.output_effects)
@@ -673,10 +755,8 @@ class Analyzer:
with open(self.output_filename, "w") as f:
# Write provenance header
- f.write(
- f"// This file is generated by {os.path.relpath(__file__)} --metadata\n"
- )
- f.write(f"// from {os.path.relpath(self.filename)}\n")
+ f.write(f"// This file is generated by {THIS} --metadata\n")
+ f.write(f"// from {os.path.relpath(self.filename, ROOT)}\n")
f.write(f"// Do not edit!\n")
# Create formatter; the rest of the code uses this
@@ -719,8 +799,11 @@ class Analyzer:
n_popped = n_pushed = -1
assert not instr.register
else:
- n_popped = len(instr.input_effects)
- n_pushed = len(instr.output_effects)
+ n_popped, sym_popped = list_effect_size(instr.input_effects)
+ n_pushed, sym_pushed = list_effect_size(instr.output_effects)
+ if sym_popped or sym_pushed:
+ # TODO: Record symbolic effects (how?)
+ n_popped = n_pushed = -1
if instr.register:
directions: list[str] = []
directions.extend("DIR_READ" for _ in instr.input_effects)
@@ -754,8 +837,8 @@ class Analyzer:
"""Write instructions to output file."""
with open(self.output_filename, "w") as f:
# Write provenance header
- f.write(f"// This file is generated by {os.path.relpath(__file__)}\n")
- f.write(f"// from {os.path.relpath(self.filename)}\n")
+ f.write(f"// This file is generated by {THIS}\n")
+ f.write(f"// from {os.path.relpath(self.filename, ROOT)}\n")
f.write(f"// Do not edit!\n")
# Create formatter; the rest of the code uses this
@@ -848,7 +931,9 @@ class Analyzer:
yield
- self.out.stack_adjust(up.final_sp - up.initial_sp)
+ # TODO: Use slices of up.stack instead of numeric values
+ self.out.stack_adjust(up.final_sp - up.initial_sp, [], [])
+
for i, var in enumerate(reversed(up.stack[: up.final_sp]), 1):
dst = StackEffect(f"PEEK({i})", "")
self.out.assign(dst, var)
@@ -899,9 +984,9 @@ def always_exits(lines: list[str]) -> bool:
)
-def variable_used(block: parser.Block, name: str) -> bool:
- """Determine whether a variable with a given name is used in a block."""
- return any(token.kind == "IDENTIFIER" and token.text == name for token in block.tokens)
+def variable_used(node: parser.Node, name: str) -> bool:
+ """Determine whether a variable with a given name is used in a node."""
+ return any(token.kind == "IDENTIFIER" and token.text == name for token in node.tokens)
def main():
diff --git a/Tools/cases_generator/parser.py b/Tools/cases_generator/parser.py
index fad0cb9..c2cebe9 100644
--- a/Tools/cases_generator/parser.py
+++ b/Tools/cases_generator/parser.py
@@ -38,7 +38,7 @@ class Context(NamedTuple):
@dataclass
class Node:
- context: Context | None = field(init=False, default=None)
+ context: Context | None = field(init=False, compare=False, default=None)
@property
def text(self) -> str:
@@ -51,19 +51,37 @@ class Node:
tokens = context.owner.tokens
begin = context.begin
end = context.end
- return lx.to_text(tokens[begin:end], dedent)
+ return lx.to_text(self.tokens, dedent)
+
+ @property
+ def tokens(self) -> list[lx.Token]:
+ context = self.context
+ if not context:
+ return []
+ tokens = context.owner.tokens
+ begin = context.begin
+ end = context.end
+ return tokens[begin:end]
@dataclass
class Block(Node):
- tokens: list[lx.Token]
+ # This just holds a context which has the list of tokens.
+ pass
@dataclass
class StackEffect(Node):
name: str
- type: str = ""
- # TODO: array, condition
+ type: str = "" # Optional `:type`
+ size: str = "" # Optional `[size]`
+ # Note: we can have type or size but not both
+ # TODO: condition (can be combined with type but not size)
+
+
+@dataclass
+class Dimension(Node):
+ size: str
@dataclass
@@ -221,13 +239,31 @@ class Parser(PLexer):
@contextual
def stack_effect(self) -> StackEffect | None:
- # IDENTIFIER [':' IDENTIFIER]
- # TODO: Arrays, conditions
+ # IDENTIFIER
+ # | IDENTIFIER ':' IDENTIFIER
+ # | IDENTIFIER '[' dimension ']'
+ # TODO: Conditions
if tkn := self.expect(lx.IDENTIFIER):
- type = ""
if self.expect(lx.COLON):
- type = self.require(lx.IDENTIFIER).text
- return StackEffect(tkn.text, type)
+ typ = self.require(lx.IDENTIFIER)
+ return StackEffect(tkn.text, typ.text)
+ elif self.expect(lx.LBRACKET):
+ if not (dim := self.dimension()):
+ raise self.make_syntax_error("Expected dimension")
+ self.require(lx.RBRACKET)
+ return StackEffect(tkn.text, "PyObject **", dim.text.strip())
+ else:
+ return StackEffect(tkn.text)
+
+ @contextual
+ def dimension(self) -> Dimension | None:
+ tokens: list[lx.Token] = []
+ while (tkn := self.peek()) and tkn.kind != lx.RBRACKET:
+ tokens.append(tkn)
+ self.next()
+ if not tokens:
+ return None
+ return Dimension(lx.to_text(tokens).strip())
@contextual
def super_def(self) -> Super | None:
@@ -331,8 +367,8 @@ class Parser(PLexer):
@contextual
def block(self) -> Block:
- tokens = self.c_blob()
- return Block(tokens)
+ if self.c_blob():
+ return Block()
def c_blob(self) -> list[lx.Token]:
tokens: list[lx.Token] = []
diff --git a/Tools/cases_generator/test_generator.py b/Tools/cases_generator/test_generator.py
index e707a53..ae0c1e2 100644
--- a/Tools/cases_generator/test_generator.py
+++ b/Tools/cases_generator/test_generator.py
@@ -4,6 +4,36 @@
import tempfile
import generate_cases
+from parser import StackEffect
+
+
+def test_effect_sizes():
+ input_effects = [
+ x := StackEffect("x", "", ""),
+ y := StackEffect("y", "", "oparg"),
+ z := StackEffect("z", "", "oparg*2"),
+ ]
+ output_effects = [
+ a := StackEffect("a", "", ""),
+ b := StackEffect("b", "", "oparg*4"),
+ c := StackEffect("c", "", ""),
+ ]
+ other_effects = [
+ p := StackEffect("p", "", "oparg<<1"),
+ q := StackEffect("q", "", ""),
+ r := StackEffect("r", "", ""),
+ ]
+ assert generate_cases.effect_size(x) == (1, "")
+ assert generate_cases.effect_size(y) == (0, "oparg")
+ assert generate_cases.effect_size(z) == (0, "oparg*2")
+
+ assert generate_cases.list_effect_size(input_effects) == (1, "oparg + oparg*2")
+ assert generate_cases.list_effect_size(output_effects) == (2, "oparg*4")
+ assert generate_cases.list_effect_size(other_effects) == (2, "(oparg<<1)")
+
+ assert generate_cases.string_effect_size(generate_cases.list_effect_size(input_effects)) == "1 + oparg + oparg*2"
+ assert generate_cases.string_effect_size(generate_cases.list_effect_size(output_effects)) == "2 + oparg*4"
+ assert generate_cases.string_effect_size(generate_cases.list_effect_size(other_effects)) == "2 + (oparg<<1)"
def run_cases_test(input: str, expected: str):
@@ -123,6 +153,24 @@ def test_binary_op():
"""
run_cases_test(input, output)
+def test_overlap():
+ input = """
+ inst(OP, (left, right -- left, result)) {
+ spam();
+ }
+ """
+ output = """
+ TARGET(OP) {
+ PyObject *right = PEEK(1);
+ PyObject *left = PEEK(2);
+ PyObject *result;
+ spam();
+ POKE(1, result);
+ DISPATCH();
+ }
+ """
+ run_cases_test(input, output)
+
def test_predictions():
input = """
inst(OP1, (--)) {
@@ -308,3 +356,81 @@ def test_macro_instruction():
}
"""
run_cases_test(input, output)
+
+def test_array_input():
+ input = """
+ inst(OP, (below, values[oparg*2], above --)) {
+ spam();
+ }
+ """
+ output = """
+ TARGET(OP) {
+ PyObject *above = PEEK(1);
+ PyObject **values = &PEEK(1 + oparg*2);
+ PyObject *below = PEEK(2 + oparg*2);
+ spam();
+ STACK_SHRINK(oparg*2);
+ STACK_SHRINK(2);
+ DISPATCH();
+ }
+ """
+ run_cases_test(input, output)
+
+def test_array_output():
+ input = """
+ inst(OP, (-- below, values[oparg*3], above)) {
+ spam();
+ }
+ """
+ output = """
+ TARGET(OP) {
+ PyObject *below;
+ PyObject **values;
+ PyObject *above;
+ spam();
+ STACK_GROW(2);
+ STACK_GROW(oparg*3);
+ POKE(1, above);
+ MOVE_ITEMS(&PEEK(1 + oparg*3), values, oparg*3);
+ POKE(2 + oparg*3, below);
+ DISPATCH();
+ }
+ """
+ run_cases_test(input, output)
+
+def test_array_input_output():
+ input = """
+ inst(OP, (below, values[oparg] -- values[oparg], above)) {
+ spam();
+ }
+ """
+ output = """
+ TARGET(OP) {
+ PyObject **values = &PEEK(oparg);
+ PyObject *below = PEEK(1 + oparg);
+ PyObject *above;
+ spam();
+ POKE(1, above);
+ MOVE_ITEMS(&PEEK(1 + oparg), values, oparg);
+ DISPATCH();
+ }
+ """
+ run_cases_test(input, output)
+
+def test_array_error_if():
+ input = """
+ inst(OP, (extra, values[oparg] --)) {
+ ERROR_IF(oparg == 0, somewhere);
+ }
+ """
+ output = """
+ TARGET(OP) {
+ PyObject **values = &PEEK(oparg);
+ PyObject *extra = PEEK(1 + oparg);
+ if (oparg == 0) { STACK_SHRINK(oparg); goto pop_1_somewhere; }
+ STACK_SHRINK(oparg);
+ STACK_SHRINK(1);
+ DISPATCH();
+ }
+ """
+ run_cases_test(input, output)