diff options
author | Guido van Rossum <guido@python.org> | 2023-01-17 23:59:19 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-01-17 23:59:19 (GMT) |
commit | 80e3e3423cb456ba341f95d6a07e94cf3b16c0be (patch) | |
tree | 32ddc8bc516b2010d48a529c9cc03fb68631c80e /Tools | |
parent | f34176b77f222726d901595968a4b44456186da4 (diff) | |
download | cpython-80e3e3423cb456ba341f95d6a07e94cf3b16c0be.zip cpython-80e3e3423cb456ba341f95d6a07e94cf3b16c0be.tar.gz cpython-80e3e3423cb456ba341f95d6a07e94cf3b16c0be.tar.bz2 |
GH-98831: Implement array support in cases generator (#100912)
You can now write things like this:
```
inst(BUILD_STRING, (pieces[oparg] -- str)) { ... }
inst(LIST_APPEND, (list, unused[oparg-1], v -- list, unused[oparg-1])) { ... }
```
Note that array output effects are only partially supported (they must be named `unused` or correspond to an input effect).
Diffstat (limited to 'Tools')
-rw-r--r-- | Tools/cases_generator/generate_cases.py | 173 | ||||
-rw-r--r-- | Tools/cases_generator/parser.py | 60 | ||||
-rw-r--r-- | Tools/cases_generator/test_generator.py | 126 |
3 files changed, 303 insertions, 56 deletions
diff --git a/Tools/cases_generator/generate_cases.py b/Tools/cases_generator/generate_cases.py index 4bd99b7..59e2499 100644 --- a/Tools/cases_generator/generate_cases.py +++ b/Tools/cases_generator/generate_cases.py @@ -15,14 +15,14 @@ import typing import parser from parser import StackEffect -DEFAULT_INPUT = os.path.relpath( - os.path.join(os.path.dirname(__file__), "../../Python/bytecodes.c") -) -DEFAULT_OUTPUT = os.path.relpath( - os.path.join(os.path.dirname(__file__), "../../Python/generated_cases.c.h") -) +HERE = os.path.dirname(__file__) +ROOT = os.path.join(HERE, "../..") +THIS = os.path.relpath(__file__, ROOT) + +DEFAULT_INPUT = os.path.relpath(os.path.join(ROOT, "Python/bytecodes.c")) +DEFAULT_OUTPUT = os.path.relpath(os.path.join(ROOT, "Python/generated_cases.c.h")) DEFAULT_METADATA_OUTPUT = os.path.relpath( - os.path.join(os.path.dirname(__file__), "../../Python/opcode_metadata.h") + os.path.join(ROOT, "Python/opcode_metadata.h") ) BEGIN_MARKER = "// BEGIN BYTECODES //" END_MARKER = "// END BYTECODES //" @@ -48,6 +48,55 @@ arg_parser.add_argument( ) +def effect_size(effect: StackEffect) -> tuple[int, str]: + """Return the 'size' impact of a stack effect. + + Returns a tuple (numeric, symbolic) where: + + - numeric is an int giving the statically analyzable size of the effect + - symbolic is a string representing a variable effect (e.g. 'oparg*2') + + At most one of these will be non-zero / non-empty. + """ + if effect.size: + return 0, effect.size + else: + return 1, "" + + +def maybe_parenthesize(sym: str) -> str: + """Add parentheses around a string if it contains an operator. + + An exception is made for '*' which is common and harmless + in the context where the symbolic size is used. + """ + if re.match(r"^[\s\w*]+$", sym): + return sym + else: + return f"({sym})" + + +def list_effect_size(effects: list[StackEffect]) -> tuple[int, str]: + numeric = 0 + symbolic: list[str] = [] + for effect in effects: + diff, sym = effect_size(effect) + numeric += diff + if sym: + symbolic.append(maybe_parenthesize(sym)) + return numeric, " + ".join(symbolic) + + +def string_effect_size(arg: tuple[int, str]) -> str: + numeric, symbolic = arg + if numeric and symbolic: + return f"{numeric} + {symbolic}" + elif symbolic: + return symbolic + else: + return str(numeric) + + class Formatter: """Wraps an output stream with the ability to indent etc.""" @@ -83,28 +132,43 @@ class Formatter: yield self.emit("}") - def stack_adjust(self, diff: int): + def stack_adjust(self, diff: int, input_effects: list[StackEffect], output_effects: list[StackEffect]): + # TODO: Get rid of 'diff' parameter + shrink, isym = list_effect_size(input_effects) + grow, osym = list_effect_size(output_effects) + diff += grow - shrink + if isym and isym != osym: + self.emit(f"STACK_SHRINK({isym});") + if diff < 0: + self.emit(f"STACK_SHRINK({-diff});") if diff > 0: self.emit(f"STACK_GROW({diff});") - elif diff < 0: - self.emit(f"STACK_SHRINK({-diff});") + if osym and osym != isym: + self.emit(f"STACK_GROW({osym});") def declare(self, dst: StackEffect, src: StackEffect | None): if dst.name == UNUSED: return - typ = f"{dst.type} " if dst.type else "PyObject *" + typ = f"{dst.type}" if dst.type else "PyObject *" init = "" if src: cast = self.cast(dst, src) init = f" = {cast}{src.name}" - self.emit(f"{typ}{dst.name}{init};") + sepa = "" if typ.endswith("*") else " " + self.emit(f"{typ}{sepa}{dst.name}{init};") def assign(self, dst: StackEffect, src: StackEffect): if src.name == UNUSED: return cast = self.cast(dst, src) - if m := re.match(r"^PEEK\((\d+)\)$", dst.name): + if m := re.match(r"^PEEK\((.*)\)$", dst.name): self.emit(f"POKE({m.group(1)}, {cast}{src.name});") + elif m := re.match(r"^&PEEK\(.*\)$", dst.name): + # NOTE: MOVE_ITEMS() does not actually exist. + # The only supported output array forms are: + # - unused[...] + # - X[...] where X[...] matches an input array exactly + self.emit(f"MOVE_ITEMS({dst.name}, {src.name}, {src.size});") elif m := re.match(r"^REG\(oparg(\d+)\)$", dst.name): self.emit(f"Py_XSETREF({dst.name}, {cast}{src.name});") else: @@ -172,7 +236,7 @@ class Instruction: num_dummies = (num_regs // 2) * 2 + 1 - num_regs fmt = "I" + "B"*num_regs + "X"*num_dummies else: - if variable_used(inst.block, "oparg"): + if variable_used(inst, "oparg"): fmt = "IB" else: fmt = "IX" @@ -200,7 +264,6 @@ class Instruction: def write(self, out: Formatter) -> None: """Write one instruction, sans prologue and epilogue.""" # Write a static assertion that a family's cache size is correct - if family := self.family: if self.name == family.members[0]: if cache_size := family.size: @@ -211,8 +274,13 @@ class Instruction: if not self.register: # Write input stack effect variable declarations and initializations - for i, ieffect in enumerate(reversed(self.input_effects), 1): - src = StackEffect(f"PEEK({i})", "") + ieffects = list(reversed(self.input_effects)) + for i, ieffect in enumerate(ieffects): + isize = string_effect_size(list_effect_size(ieffects[:i+1])) + if ieffect.size: + src = StackEffect(f"&PEEK({isize})", "PyObject **") + else: + src = StackEffect(f"PEEK({isize})", "") out.declare(ieffect, src) else: # Write input register variable declarations and initializations @@ -236,14 +304,19 @@ class Instruction: if not self.register: # Write net stack growth/shrinkage - diff = len(self.output_effects) - len(self.input_effects) - out.stack_adjust(diff) + out.stack_adjust(0, self.input_effects, self.output_effects) # Write output stack effect assignments - for i, oeffect in enumerate(reversed(self.output_effects), 1): - if oeffect.name not in self.unmoved_names: - dst = StackEffect(f"PEEK({i})", "") - out.assign(dst, oeffect) + oeffects = list(reversed(self.output_effects)) + for i, oeffect in enumerate(oeffects): + if oeffect.name in self.unmoved_names: + continue + osize = string_effect_size(list_effect_size(oeffects[:i+1])) + if oeffect.size: + dst = StackEffect(f"&PEEK({osize})", "PyObject **") + else: + dst = StackEffect(f"PEEK({osize})", "") + out.assign(dst, oeffect) else: # Write output register assignments for oeffect, reg in zip(self.output_effects, self.output_registers): @@ -287,19 +360,21 @@ class Instruction: # The code block is responsible for DECREF()ing them. # NOTE: If the label doesn't exist, just add it to ceval.c. if not self.register: - ninputs = len(self.input_effects) # Don't pop common input/output effects at the bottom! # These aren't DECREF'ed so they can stay. - for ieff, oeff in zip(self.input_effects, self.output_effects): - if ieff.name == oeff.name: - ninputs -= 1 - else: - break + ieffs = list(self.input_effects) + oeffs = list(self.output_effects) + while ieffs and oeffs and ieffs[0] == oeffs[0]: + ieffs.pop(0) + oeffs.pop(0) + ninputs, symbolic = list_effect_size(ieffs) + if ninputs: + label = f"pop_{ninputs}_{label}" else: - ninputs = 0 - if ninputs: + symbolic = "" + if symbolic: out.write_raw( - f"{extra}{space}if ({cond}) goto pop_{ninputs}_{label};\n" + f"{extra}{space}if ({cond}) {{ STACK_SHRINK({symbolic}); goto {label}; }}\n" ) else: out.write_raw(f"{extra}{space}if ({cond}) goto {label};\n") @@ -635,6 +710,13 @@ class Analyzer: for thing in components: match thing: case Instruction() as instr: + if any(eff.size for eff in instr.input_effects + instr.output_effects): + # TODO: Eventually this will be needed, at least for macros. + self.error( + f"Instruction {instr.name!r} has variable-sized stack effect, " + "which are not supported in super- or macro instructions", + instr.inst, # TODO: Pass name+location of super/macro + ) current -= len(instr.input_effects) lowest = min(lowest, current) current += len(instr.output_effects) @@ -673,10 +755,8 @@ class Analyzer: with open(self.output_filename, "w") as f: # Write provenance header - f.write( - f"// This file is generated by {os.path.relpath(__file__)} --metadata\n" - ) - f.write(f"// from {os.path.relpath(self.filename)}\n") + f.write(f"// This file is generated by {THIS} --metadata\n") + f.write(f"// from {os.path.relpath(self.filename, ROOT)}\n") f.write(f"// Do not edit!\n") # Create formatter; the rest of the code uses this @@ -719,8 +799,11 @@ class Analyzer: n_popped = n_pushed = -1 assert not instr.register else: - n_popped = len(instr.input_effects) - n_pushed = len(instr.output_effects) + n_popped, sym_popped = list_effect_size(instr.input_effects) + n_pushed, sym_pushed = list_effect_size(instr.output_effects) + if sym_popped or sym_pushed: + # TODO: Record symbolic effects (how?) + n_popped = n_pushed = -1 if instr.register: directions: list[str] = [] directions.extend("DIR_READ" for _ in instr.input_effects) @@ -754,8 +837,8 @@ class Analyzer: """Write instructions to output file.""" with open(self.output_filename, "w") as f: # Write provenance header - f.write(f"// This file is generated by {os.path.relpath(__file__)}\n") - f.write(f"// from {os.path.relpath(self.filename)}\n") + f.write(f"// This file is generated by {THIS}\n") + f.write(f"// from {os.path.relpath(self.filename, ROOT)}\n") f.write(f"// Do not edit!\n") # Create formatter; the rest of the code uses this @@ -848,7 +931,9 @@ class Analyzer: yield - self.out.stack_adjust(up.final_sp - up.initial_sp) + # TODO: Use slices of up.stack instead of numeric values + self.out.stack_adjust(up.final_sp - up.initial_sp, [], []) + for i, var in enumerate(reversed(up.stack[: up.final_sp]), 1): dst = StackEffect(f"PEEK({i})", "") self.out.assign(dst, var) @@ -899,9 +984,9 @@ def always_exits(lines: list[str]) -> bool: ) -def variable_used(block: parser.Block, name: str) -> bool: - """Determine whether a variable with a given name is used in a block.""" - return any(token.kind == "IDENTIFIER" and token.text == name for token in block.tokens) +def variable_used(node: parser.Node, name: str) -> bool: + """Determine whether a variable with a given name is used in a node.""" + return any(token.kind == "IDENTIFIER" and token.text == name for token in node.tokens) def main(): diff --git a/Tools/cases_generator/parser.py b/Tools/cases_generator/parser.py index fad0cb9..c2cebe9 100644 --- a/Tools/cases_generator/parser.py +++ b/Tools/cases_generator/parser.py @@ -38,7 +38,7 @@ class Context(NamedTuple): @dataclass class Node: - context: Context | None = field(init=False, default=None) + context: Context | None = field(init=False, compare=False, default=None) @property def text(self) -> str: @@ -51,19 +51,37 @@ class Node: tokens = context.owner.tokens begin = context.begin end = context.end - return lx.to_text(tokens[begin:end], dedent) + return lx.to_text(self.tokens, dedent) + + @property + def tokens(self) -> list[lx.Token]: + context = self.context + if not context: + return [] + tokens = context.owner.tokens + begin = context.begin + end = context.end + return tokens[begin:end] @dataclass class Block(Node): - tokens: list[lx.Token] + # This just holds a context which has the list of tokens. + pass @dataclass class StackEffect(Node): name: str - type: str = "" - # TODO: array, condition + type: str = "" # Optional `:type` + size: str = "" # Optional `[size]` + # Note: we can have type or size but not both + # TODO: condition (can be combined with type but not size) + + +@dataclass +class Dimension(Node): + size: str @dataclass @@ -221,13 +239,31 @@ class Parser(PLexer): @contextual def stack_effect(self) -> StackEffect | None: - # IDENTIFIER [':' IDENTIFIER] - # TODO: Arrays, conditions + # IDENTIFIER + # | IDENTIFIER ':' IDENTIFIER + # | IDENTIFIER '[' dimension ']' + # TODO: Conditions if tkn := self.expect(lx.IDENTIFIER): - type = "" if self.expect(lx.COLON): - type = self.require(lx.IDENTIFIER).text - return StackEffect(tkn.text, type) + typ = self.require(lx.IDENTIFIER) + return StackEffect(tkn.text, typ.text) + elif self.expect(lx.LBRACKET): + if not (dim := self.dimension()): + raise self.make_syntax_error("Expected dimension") + self.require(lx.RBRACKET) + return StackEffect(tkn.text, "PyObject **", dim.text.strip()) + else: + return StackEffect(tkn.text) + + @contextual + def dimension(self) -> Dimension | None: + tokens: list[lx.Token] = [] + while (tkn := self.peek()) and tkn.kind != lx.RBRACKET: + tokens.append(tkn) + self.next() + if not tokens: + return None + return Dimension(lx.to_text(tokens).strip()) @contextual def super_def(self) -> Super | None: @@ -331,8 +367,8 @@ class Parser(PLexer): @contextual def block(self) -> Block: - tokens = self.c_blob() - return Block(tokens) + if self.c_blob(): + return Block() def c_blob(self) -> list[lx.Token]: tokens: list[lx.Token] = [] diff --git a/Tools/cases_generator/test_generator.py b/Tools/cases_generator/test_generator.py index e707a53..ae0c1e2 100644 --- a/Tools/cases_generator/test_generator.py +++ b/Tools/cases_generator/test_generator.py @@ -4,6 +4,36 @@ import tempfile import generate_cases +from parser import StackEffect + + +def test_effect_sizes(): + input_effects = [ + x := StackEffect("x", "", ""), + y := StackEffect("y", "", "oparg"), + z := StackEffect("z", "", "oparg*2"), + ] + output_effects = [ + a := StackEffect("a", "", ""), + b := StackEffect("b", "", "oparg*4"), + c := StackEffect("c", "", ""), + ] + other_effects = [ + p := StackEffect("p", "", "oparg<<1"), + q := StackEffect("q", "", ""), + r := StackEffect("r", "", ""), + ] + assert generate_cases.effect_size(x) == (1, "") + assert generate_cases.effect_size(y) == (0, "oparg") + assert generate_cases.effect_size(z) == (0, "oparg*2") + + assert generate_cases.list_effect_size(input_effects) == (1, "oparg + oparg*2") + assert generate_cases.list_effect_size(output_effects) == (2, "oparg*4") + assert generate_cases.list_effect_size(other_effects) == (2, "(oparg<<1)") + + assert generate_cases.string_effect_size(generate_cases.list_effect_size(input_effects)) == "1 + oparg + oparg*2" + assert generate_cases.string_effect_size(generate_cases.list_effect_size(output_effects)) == "2 + oparg*4" + assert generate_cases.string_effect_size(generate_cases.list_effect_size(other_effects)) == "2 + (oparg<<1)" def run_cases_test(input: str, expected: str): @@ -123,6 +153,24 @@ def test_binary_op(): """ run_cases_test(input, output) +def test_overlap(): + input = """ + inst(OP, (left, right -- left, result)) { + spam(); + } + """ + output = """ + TARGET(OP) { + PyObject *right = PEEK(1); + PyObject *left = PEEK(2); + PyObject *result; + spam(); + POKE(1, result); + DISPATCH(); + } + """ + run_cases_test(input, output) + def test_predictions(): input = """ inst(OP1, (--)) { @@ -308,3 +356,81 @@ def test_macro_instruction(): } """ run_cases_test(input, output) + +def test_array_input(): + input = """ + inst(OP, (below, values[oparg*2], above --)) { + spam(); + } + """ + output = """ + TARGET(OP) { + PyObject *above = PEEK(1); + PyObject **values = &PEEK(1 + oparg*2); + PyObject *below = PEEK(2 + oparg*2); + spam(); + STACK_SHRINK(oparg*2); + STACK_SHRINK(2); + DISPATCH(); + } + """ + run_cases_test(input, output) + +def test_array_output(): + input = """ + inst(OP, (-- below, values[oparg*3], above)) { + spam(); + } + """ + output = """ + TARGET(OP) { + PyObject *below; + PyObject **values; + PyObject *above; + spam(); + STACK_GROW(2); + STACK_GROW(oparg*3); + POKE(1, above); + MOVE_ITEMS(&PEEK(1 + oparg*3), values, oparg*3); + POKE(2 + oparg*3, below); + DISPATCH(); + } + """ + run_cases_test(input, output) + +def test_array_input_output(): + input = """ + inst(OP, (below, values[oparg] -- values[oparg], above)) { + spam(); + } + """ + output = """ + TARGET(OP) { + PyObject **values = &PEEK(oparg); + PyObject *below = PEEK(1 + oparg); + PyObject *above; + spam(); + POKE(1, above); + MOVE_ITEMS(&PEEK(1 + oparg), values, oparg); + DISPATCH(); + } + """ + run_cases_test(input, output) + +def test_array_error_if(): + input = """ + inst(OP, (extra, values[oparg] --)) { + ERROR_IF(oparg == 0, somewhere); + } + """ + output = """ + TARGET(OP) { + PyObject **values = &PEEK(oparg); + PyObject *extra = PEEK(1 + oparg); + if (oparg == 0) { STACK_SHRINK(oparg); goto pop_1_somewhere; } + STACK_SHRINK(oparg); + STACK_SHRINK(1); + DISPATCH(); + } + """ + run_cases_test(input, output) |