diff options
author | Ken Jin <kenjin@python.org> | 2024-01-12 17:30:27 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-01-12 17:30:27 (GMT) |
commit | ac92527c08d917dffdb9c0a218d06f21114614a2 (patch) | |
tree | 702ac2873cca646b59c49fdb9e591e31fb23b151 /Tools | |
parent | 79970792fd2c70f77c38e08c7b3a9daf6a11bde1 (diff) | |
download | cpython-ac92527c08d917dffdb9c0a218d06f21114614a2.zip cpython-ac92527c08d917dffdb9c0a218d06f21114614a2.tar.gz cpython-ac92527c08d917dffdb9c0a218d06f21114614a2.tar.bz2 |
gh-113710: Add types to the interpreter DSL (#113711)
Co-authored-by: Jules <57632293+JuliaPoo@users.noreply.github.com>
Co-authored-by: blurb-it[bot] <43283697+blurb-it[bot]@users.noreply.github.com>
Diffstat (limited to 'Tools')
-rw-r--r-- | Tools/cases_generator/analyzer.py | 48 | ||||
-rw-r--r-- | Tools/cases_generator/generators_common.py | 8 | ||||
-rw-r--r-- | Tools/cases_generator/interpreter_definition.md | 37 | ||||
-rw-r--r-- | Tools/cases_generator/lexer.py | 12 | ||||
-rw-r--r-- | Tools/cases_generator/opcode_metadata_generator.py | 2 | ||||
-rw-r--r-- | Tools/cases_generator/parsing.py | 36 | ||||
-rw-r--r-- | Tools/cases_generator/stack.py | 18 |
7 files changed, 131 insertions, 30 deletions
diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index 82ef888..7ed3b57 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -1,4 +1,4 @@ -from dataclasses import dataclass +from dataclasses import dataclass, field import lexer import parser from typing import Optional @@ -22,6 +22,10 @@ class Properties: uses_locals: bool has_free: bool + pure: bool + passthrough: bool + guard: bool + def dump(self, indent: str) -> None: print(indent, end="") text = ", ".join([f"{key}: {value}" for (key, value) in self.__dict__.items()]) @@ -45,6 +49,9 @@ class Properties: uses_co_names=any(p.uses_co_names for p in properties), uses_locals=any(p.uses_locals for p in properties), has_free=any(p.has_free for p in properties), + pure=all(p.pure for p in properties), + passthrough=all(p.passthrough for p in properties), + guard=all(p.guard for p in properties), ) @@ -64,6 +71,9 @@ SKIP_PROPERTIES = Properties( uses_co_names=False, uses_locals=False, has_free=False, + pure=False, + passthrough=False, + guard=False, ) @@ -88,6 +98,9 @@ class StackItem: condition: str | None size: str peek: bool = False + type_prop: None | tuple[str, None | str] = field( + default_factory=lambda: None, init=True, compare=False, hash=False + ) def __str__(self) -> str: cond = f" if ({self.condition})" if self.condition else "" @@ -259,7 +272,9 @@ def override_error( def convert_stack_item(item: parser.StackEffect) -> StackItem: - return StackItem(item.name, item.type, item.cond, (item.size or "1")) + return StackItem( + item.name, item.type, item.cond, (item.size or "1"), type_prop=item.type_prop + ) def analyze_stack(op: parser.InstDef) -> StackEffect: @@ -377,7 +392,6 @@ def makes_escaping_api_call(instr: parser.InstDef) -> bool: return False - EXITS = { "DISPATCH", "GO_TO_INSTRUCTION", @@ -417,16 +431,33 @@ def always_exits(op: parser.InstDef) -> bool: return False +def stack_effect_only_peeks(instr: parser.InstDef) -> bool: + stack_inputs = [s for s in instr.inputs if not isinstance(s, parser.CacheEffect)] + if len(stack_inputs) != len(instr.outputs): + return False + if len(stack_inputs) == 0: + return False + if any(s.cond for s in stack_inputs) or any(s.cond for s in instr.outputs): + return False + return all( + (s.name == other.name and s.type == other.type and s.size == other.size) + for s, other in zip(stack_inputs, instr.outputs) + ) + + def compute_properties(op: parser.InstDef) -> Properties: has_free = ( variable_used(op, "PyCell_New") or variable_used(op, "PyCell_GET") or variable_used(op, "PyCell_SET") ) + infallible = is_infallible(op) + deopts = variable_used(op, "DEOPT_IF") + passthrough = stack_effect_only_peeks(op) and infallible return Properties( escapes=makes_escaping_api_call(op), - infallible=is_infallible(op), - deopts=variable_used(op, "DEOPT_IF"), + infallible=infallible, + deopts=deopts, oparg=variable_used(op, "oparg"), jumps=variable_used(op, "JUMPBY"), eval_breaker=variable_used(op, "CHECK_EVAL_BREAKER"), @@ -440,6 +471,9 @@ def compute_properties(op: parser.InstDef) -> Properties: uses_locals=(variable_used(op, "GETLOCAL") or variable_used(op, "SETLOCAL")) and not has_free, has_free=has_free, + pure="pure" in op.annotations, + passthrough=passthrough, + guard=passthrough and deopts, ) @@ -686,9 +720,7 @@ def analyze_forest(forest: list[parser.AstNode]) -> Analysis: inst = instructions["BINARY_OP_INPLACE_ADD_UNICODE"] inst.family = families["BINARY_OP"] families["BINARY_OP"].members.append(inst) - opmap, first_arg, min_instrumented = assign_opcodes( - instructions, families, pseudos - ) + opmap, first_arg, min_instrumented = assign_opcodes(instructions, families, pseudos) return Analysis( instructions, uops, families, pseudos, opmap, first_arg, min_instrumented ) diff --git a/Tools/cases_generator/generators_common.py b/Tools/cases_generator/generators_common.py index 5a42a05..c6c602c 100644 --- a/Tools/cases_generator/generators_common.py +++ b/Tools/cases_generator/generators_common.py @@ -26,7 +26,9 @@ def root_relative_path(filename: str) -> str: return filename -def write_header(generator: str, sources: list[str], outfile: TextIO, comment: str = "//") -> None: +def write_header( + generator: str, sources: list[str], outfile: TextIO, comment: str = "//" +) -> None: outfile.write( f"""{comment} This file is generated by {root_relative_path(generator)} {comment} from: @@ -209,6 +211,10 @@ def cflags(p: Properties) -> str: flags.append("HAS_ERROR_FLAG") if p.escapes: flags.append("HAS_ESCAPES_FLAG") + if p.pure: + flags.append("HAS_PURE_FLAG") + if p.passthrough: + flags.append("HAS_PASSTHROUGH_FLAG") if flags: return " | ".join(flags) else: diff --git a/Tools/cases_generator/interpreter_definition.md b/Tools/cases_generator/interpreter_definition.md index 5c42387..e5a4899 100644 --- a/Tools/cases_generator/interpreter_definition.md +++ b/Tools/cases_generator/interpreter_definition.md @@ -15,6 +15,7 @@ These tools would be used to: * Generate the tier 2 interpreter * Generate documentation for instructions * Generate metadata about instructions, such as stack use (done). +* Generate the tier 2 optimizer's abstract interpreter. Having a single definition file ensures that there is a single source of truth for bytecode semantics. @@ -108,7 +109,10 @@ and a piece of C code describing its semantics:: NAME [":" type] [ "if" "(" C-expression ")" ] type: - NAME ["*"] + NAME ["*"] | type_prop + + type_prop: + "&" "(" NAME ["+" NAME] ")" stream: NAME "/" size @@ -138,7 +142,27 @@ The following definitions may occur: The optional `type` in an `object` is the C type. It defaults to `PyObject *`. The objects before the "--" are the objects on top of the stack at the start of the instruction. Those after the "--" are the objects on top of the stack at the -end of the instruction. +end of the instruction. When prefixed by a `&`, the `type` production rule follows the +`type_prop` production rule. This indicates the type of the value is of that specific type +after the operation. In this case, the type may also contain 64-bit refinement information +that is fetched from a previously defined operand in the instruction header, such as +a type version tag. This follows the format `type + refinement`. The list of possible types +and their refinements are below. They obey the following predicates: + + +* `PYLONG_TYPE`: `Py_TYPE(val) == &PyLong_Type` +* `PYFLOAT_TYPE`: `Py_TYPE(val) == &PyFloat_Type` +* `PYUNICODE_TYPE`: `Py_TYPE(val) == &PYUNICODE_TYPE` +* `NULL_TYPE`: `val == NULL` +* `GUARD_TYPE_VERSION_TYPE`: `type->tp_version_tag == auxillary` +* `GUARD_DORV_VALUES_TYPE`: `_PyDictOrValues_IsValues(obj)` +* `GUARD_DORV_VALUES_INST_ATTR_FROM_DICT_TYPE`: + `_PyDictOrValues_IsValues(obj) || _PyObject_MakeInstanceAttributesFromDict(obj, dorv)` +* `GUARD_KEYS_VERSION_TYPE`: `owner_heap_type->ht_cached_keys->dk_version == auxillary` +* `PYMETHOD_TYPE`: `Py_TYPE(val) == &PyMethod_Type` +* `PYFUNCTION_TYPE_VERSION_TYPE`: + `PyFunction_Check(callable) && func->func_version == auxillary && code->co_argcount == oparg + (self_or_null != NULL)` + An `inst` without `stack_effect` is a transitional form to allow the original C code definitions to be copied. It lacks information to generate anything other than the @@ -158,6 +182,15 @@ By convention cache effects (`stream`) must precede the input effects. The name `oparg` is pre-defined as a 32 bit value fetched from the instruction stream. +### Special instruction annotations + +Instruction headers may be prefixed by one or more annotations. The non-exhaustive +list of annotations and their meanings are as follows: + +* `override`. For external use by other interpreter definitions to override the current + instruction definition. +* `pure`. This instruction has no side effects. + ### Special functions/macros The C code may include special functions that are understood by the tools as diff --git a/Tools/cases_generator/lexer.py b/Tools/cases_generator/lexer.py index c3c2954..4f8d01c 100644 --- a/Tools/cases_generator/lexer.py +++ b/Tools/cases_generator/lexer.py @@ -216,7 +216,13 @@ kwds.append(MACRO) keywords = {name.lower(): name for name in kwds} ANNOTATION = "ANNOTATION" -annotations = {"specializing", "guard", "override", "register", "replaced"} +annotations = { + "specializing", + "override", + "register", + "replaced", + "pure", +} __all__ = [] __all__.extend(kwds) @@ -324,7 +330,9 @@ def tokenize(src: str, line: int = 1, filename: str = "") -> Iterator[Token]: else: begin = line, start - linestart if kind != "\n": - yield Token(filename, kind, text, begin, (line, start - linestart + len(text))) + yield Token( + filename, kind, text, begin, (line, start - linestart + len(text)) + ) def to_text(tkns: list[Token], dedent: int = 0) -> str: diff --git a/Tools/cases_generator/opcode_metadata_generator.py b/Tools/cases_generator/opcode_metadata_generator.py index 9b7df9a..1826a0b 100644 --- a/Tools/cases_generator/opcode_metadata_generator.py +++ b/Tools/cases_generator/opcode_metadata_generator.py @@ -50,6 +50,8 @@ FLAGS = [ "DEOPT", "ERROR", "ESCAPES", + "PURE", + "PASSTHROUGH", ] diff --git a/Tools/cases_generator/parsing.py b/Tools/cases_generator/parsing.py index 60c185d..307919c 100644 --- a/Tools/cases_generator/parsing.py +++ b/Tools/cases_generator/parsing.py @@ -75,6 +75,11 @@ class StackEffect(Node): size: str = "" # Optional `[size]` # Note: size cannot be combined with type or cond + # Optional `(type, refinement)` + type_prop: None | tuple[str, None | str] = field( + default_factory=lambda: None, init=True, compare=False, hash=False + ) + def __repr__(self) -> str: items = [self.name, self.type, self.cond, self.size] while items and items[-1] == "": @@ -138,11 +143,13 @@ class Family(Node): @dataclass class Pseudo(Node): name: str - flags: list[str] # instr flags to set on the pseudo instruction - targets: list[str] # opcodes this can be replaced by + flags: list[str] # instr flags to set on the pseudo instruction + targets: list[str] # opcodes this can be replaced by + AstNode = InstDef | Macro | Pseudo | Family + class Parser(PLexer): @contextual def definition(self) -> AstNode | None: @@ -253,14 +260,25 @@ class Parser(PLexer): @contextual def stack_effect(self) -> StackEffect | None: - # IDENTIFIER [':' IDENTIFIER [TIMES]] ['if' '(' expression ')'] + # IDENTIFIER [':' [IDENTIFIER [TIMES]] ['&' '(' IDENTIFIER ['+' IDENTIFIER] ')']] ['if' '(' expression ')'] # | IDENTIFIER '[' expression ']' if tkn := self.expect(lx.IDENTIFIER): type_text = "" + type_prop = None if self.expect(lx.COLON): - type_text = self.require(lx.IDENTIFIER).text.strip() - if self.expect(lx.TIMES): - type_text += " *" + if i := self.expect(lx.IDENTIFIER): + type_text = i.text.strip() + if self.expect(lx.TIMES): + type_text += " *" + if self.expect(lx.AND): + consumed_bracket = self.expect(lx.LPAREN) is not None + type_prop_text = self.require(lx.IDENTIFIER).text.strip() + refinement = None + if self.expect(lx.PLUS): + refinement = self.require(lx.IDENTIFIER).text.strip() + type_prop = (type_prop_text, refinement) + if consumed_bracket: + self.require(lx.RPAREN) cond_text = "" if self.expect(lx.IF): self.require(lx.LPAREN) @@ -277,7 +295,7 @@ class Parser(PLexer): self.require(lx.RBRACKET) type_text = "PyObject **" size_text = size.text.strip() - return StackEffect(tkn.text, type_text, cond_text, size_text) + return StackEffect(tkn.text, type_text, cond_text, size_text, type_prop) return None @contextual @@ -364,7 +382,9 @@ class Parser(PLexer): if self.expect(lx.COMMA): if not (size := self.expect(lx.IDENTIFIER)): if not (size := self.expect(lx.NUMBER)): - raise self.make_syntax_error("Expected identifier or number") + raise self.make_syntax_error( + "Expected identifier or number" + ) if self.expect(lx.RPAREN): if self.expect(lx.EQUALS): if not self.expect(lx.LBRACE): diff --git a/Tools/cases_generator/stack.py b/Tools/cases_generator/stack.py index d351037..6633950 100644 --- a/Tools/cases_generator/stack.py +++ b/Tools/cases_generator/stack.py @@ -3,6 +3,8 @@ from analyzer import StackItem, Instruction, Uop from dataclasses import dataclass from cwriter import CWriter +UNUSED = {"unused"} + def maybe_parenthesize(sym: str) -> str: """Add parentheses around a string if it contains an operator @@ -29,6 +31,7 @@ def var_size(var: StackItem) -> str: else: return var.size + @dataclass class StackOffset: "The stack offset of the virtual base of the stack from the physical stack pointer" @@ -47,10 +50,7 @@ class StackOffset: self.pushed.append(var_size(item)) def __sub__(self, other: "StackOffset") -> "StackOffset": - return StackOffset( - self.popped + other.pushed, - self.pushed + other.popped - ) + return StackOffset(self.popped + other.pushed, self.pushed + other.popped) def __neg__(self) -> "StackOffset": return StackOffset(self.pushed, self.popped) @@ -134,18 +134,18 @@ class Stack: ) if popped.name == var.name: return "" - elif popped.name == "unused": + elif popped.name in UNUSED: self.defined.add(var.name) return ( f"{var.name} = {indirect}stack_pointer[{self.top_offset.to_c()}];\n" ) - elif var.name == "unused": + elif var.name in UNUSED: return "" else: self.defined.add(var.name) return f"{var.name} = {popped.name};\n" self.base_offset.pop(var) - if var.name == "unused": + if var.name in UNUSED: return "" else: self.defined.add(var.name) @@ -159,7 +159,7 @@ class Stack: def push(self, var: StackItem) -> str: self.variables.append(var) - if var.is_array() and var.name not in self.defined and var.name != "unused": + if var.is_array() and var.name not in self.defined and var.name not in UNUSED: c_offset = self.top_offset.to_c() self.top_offset.push(var) self.defined.add(var.name) @@ -172,7 +172,7 @@ class Stack: for var in self.variables: if not var.peek: cast = "(PyObject *)" if var.type else "" - if var.name != "unused" and not var.is_array(): + if var.name not in UNUSED and not var.is_array(): if var.condition: out.emit(f"if ({var.condition}) ") out.emit( |