gh-113710: Add types to the interpreter DSL (#113711)

Co-authored-by: Jules <57632293+JuliaPoo@users.noreply.github.com> Co-authored-by: blurb-it[bot] <43283697+blurb-it[bot]@users.noreply.github.com>
author: Ken Jin <kenjin@python.org> 2024-01-12 17:30:27 (GMT)
committer: GitHub <noreply@github.com> 2024-01-12 17:30:27 (GMT)
commit: ac92527c08d917dffdb9c0a218d06f21114614a2 (patch)
tree: 702ac2873cca646b59c49fdb9e591e31fb23b151 /Tools
parent: 79970792fd2c70f77c38e08c7b3a9daf6a11bde1 (diff)
download: cpython-ac92527c08d917dffdb9c0a218d06f21114614a2.zip
cpython-ac92527c08d917dffdb9c0a218d06f21114614a2.tar.gz
cpython-ac92527c08d917dffdb9c0a218d06f21114614a2.tar.bz2
7 files changed, 131 insertions, 30 deletions
diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py
index 82ef888..7ed3b57 100644
--- a/Tools/cases_generator/analyzer.py
+++ b/Tools/cases_generator/analyzer.py
@@ -1,4 +1,4 @@
-from dataclasses import dataclass
+from dataclasses import dataclass, field
 import lexer
 import parser
 from typing import Optional
@@ -22,6 +22,10 @@ class Properties:
     uses_locals: bool
     has_free: bool
 
+    pure: bool
+    passthrough: bool
+    guard: bool
+
     def dump(self, indent: str) -> None:
         print(indent, end="")
         text = ", ".join([f"{key}: {value}" for (key, value) in self.__dict__.items()])
@@ -45,6 +49,9 @@ class Properties:
             uses_co_names=any(p.uses_co_names for p in properties),
             uses_locals=any(p.uses_locals for p in properties),
             has_free=any(p.has_free for p in properties),
+            pure=all(p.pure for p in properties),
+            passthrough=all(p.passthrough for p in properties),
+            guard=all(p.guard for p in properties),
         )
 
 
@@ -64,6 +71,9 @@ SKIP_PROPERTIES = Properties(
     uses_co_names=False,
     uses_locals=False,
     has_free=False,
+    pure=False,
+    passthrough=False,
+    guard=False,
 )
 
 
@@ -88,6 +98,9 @@ class StackItem:
     condition: str | None
     size: str
     peek: bool = False
+    type_prop: None | tuple[str, None | str] = field(
+        default_factory=lambda: None, init=True, compare=False, hash=False
+    )
 
     def __str__(self) -> str:
         cond = f" if ({self.condition})" if self.condition else ""
@@ -259,7 +272,9 @@ def override_error(
 
 
 def convert_stack_item(item: parser.StackEffect) -> StackItem:
-    return StackItem(item.name, item.type, item.cond, (item.size or "1"))
+    return StackItem(
+        item.name, item.type, item.cond, (item.size or "1"), type_prop=item.type_prop
+    )
 
 
 def analyze_stack(op: parser.InstDef) -> StackEffect:
@@ -377,7 +392,6 @@ def makes_escaping_api_call(instr: parser.InstDef) -> bool:
     return False
 
 
-
 EXITS = {
     "DISPATCH",
     "GO_TO_INSTRUCTION",
@@ -417,16 +431,33 @@ def always_exits(op: parser.InstDef) -> bool:
     return False
 
 
+def stack_effect_only_peeks(instr: parser.InstDef) -> bool:
+    stack_inputs = [s for s in instr.inputs if not isinstance(s, parser.CacheEffect)]
+    if len(stack_inputs) != len(instr.outputs):
+        return False
+    if len(stack_inputs) == 0:
+        return False
+    if any(s.cond for s in stack_inputs) or any(s.cond for s in instr.outputs):
+        return False
+    return all(
+        (s.name == other.name and s.type == other.type and s.size == other.size)
+        for s, other in zip(stack_inputs, instr.outputs)
+    )
+
+
 def compute_properties(op: parser.InstDef) -> Properties:
     has_free = (
         variable_used(op, "PyCell_New")
         or variable_used(op, "PyCell_GET")
         or variable_used(op, "PyCell_SET")
     )
+    infallible = is_infallible(op)
+    deopts = variable_used(op, "DEOPT_IF")
+    passthrough = stack_effect_only_peeks(op) and infallible
     return Properties(
         escapes=makes_escaping_api_call(op),
-        infallible=is_infallible(op),
-        deopts=variable_used(op, "DEOPT_IF"),
+        infallible=infallible,
+        deopts=deopts,
         oparg=variable_used(op, "oparg"),
         jumps=variable_used(op, "JUMPBY"),
         eval_breaker=variable_used(op, "CHECK_EVAL_BREAKER"),
@@ -440,6 +471,9 @@ def compute_properties(op: parser.InstDef) -> Properties:
         uses_locals=(variable_used(op, "GETLOCAL") or variable_used(op, "SETLOCAL"))
         and not has_free,
         has_free=has_free,
+        pure="pure" in op.annotations,
+        passthrough=passthrough,
+        guard=passthrough and deopts,
     )
 
 
@@ -686,9 +720,7 @@ def analyze_forest(forest: list[parser.AstNode]) -> Analysis:
         inst = instructions["BINARY_OP_INPLACE_ADD_UNICODE"]
         inst.family = families["BINARY_OP"]
         families["BINARY_OP"].members.append(inst)
-    opmap, first_arg, min_instrumented = assign_opcodes(
-        instructions, families, pseudos
-    )
+    opmap, first_arg, min_instrumented = assign_opcodes(instructions, families, pseudos)
     return Analysis(
         instructions, uops, families, pseudos, opmap, first_arg, min_instrumented
     )
diff --git a/Tools/cases_generator/generators_common.py b/Tools/cases_generator/generators_common.py
index 5a42a05..c6c602c 100644
--- a/Tools/cases_generator/generators_common.py
+++ b/Tools/cases_generator/generators_common.py
@@ -26,7 +26,9 @@ def root_relative_path(filename: str) -> str:
         return filename
 
 
-def write_header(generator: str, sources: list[str], outfile: TextIO, comment: str = "//") -> None:
+def write_header(
+    generator: str, sources: list[str], outfile: TextIO, comment: str = "//"
+) -> None:
     outfile.write(
         f"""{comment} This file is generated by {root_relative_path(generator)}
 {comment} from:
@@ -209,6 +211,10 @@ def cflags(p: Properties) -> str:
         flags.append("HAS_ERROR_FLAG")
     if p.escapes:
         flags.append("HAS_ESCAPES_FLAG")
+    if p.pure:
+        flags.append("HAS_PURE_FLAG")
+    if p.passthrough:
+        flags.append("HAS_PASSTHROUGH_FLAG")
     if flags:
         return " | ".join(flags)
     else:
diff --git a/Tools/cases_generator/interpreter_definition.md b/Tools/cases_generator/interpreter_definition.md
index 5c42387..e5a4899 100644
--- a/Tools/cases_generator/interpreter_definition.md
+++ b/Tools/cases_generator/interpreter_definition.md
@@ -15,6 +15,7 @@ These tools would be used to:
 * Generate the tier 2 interpreter
 * Generate documentation for instructions
 * Generate metadata about instructions, such as stack use (done).
+* Generate the tier 2 optimizer's abstract interpreter.
 
 Having a single definition file ensures that there is a single source
 of truth for bytecode semantics.
@@ -108,7 +109,10 @@ and a piece of C code describing its semantics::
     NAME [":" type] [ "if" "(" C-expression ")" ]
 
   type:
-    NAME ["*"]
+    NAME ["*"] | type_prop
+
+  type_prop:
+    "&" "(" NAME ["+" NAME] ")"
 
   stream:
     NAME "/" size
@@ -138,7 +142,27 @@ The following definitions may occur:
 The optional `type` in an `object` is the C type. It defaults to `PyObject *`.
 The objects before the "--" are the objects on top of the stack at the start of
 the instruction. Those after the "--" are the objects on top of the stack at the
-end of the instruction.
+end of the instruction. When prefixed by a `&`, the `type` production rule follows the
+`type_prop` production rule. This indicates the type of the value is of that specific type
+after the operation. In this case, the type may also contain 64-bit refinement information
+that is fetched from a previously defined operand in the instruction header, such as
+a type version tag. This follows the format `type + refinement`. The list of possible types
+and their refinements are below. They obey the following predicates:
+
+
+* `PYLONG_TYPE`: `Py_TYPE(val) == &PyLong_Type`
+* `PYFLOAT_TYPE`: `Py_TYPE(val) == &PyFloat_Type`
+* `PYUNICODE_TYPE`: `Py_TYPE(val) == &PYUNICODE_TYPE`
+* `NULL_TYPE`: `val == NULL`
+* `GUARD_TYPE_VERSION_TYPE`: `type->tp_version_tag == auxillary`
+* `GUARD_DORV_VALUES_TYPE`: `_PyDictOrValues_IsValues(obj)`
+* `GUARD_DORV_VALUES_INST_ATTR_FROM_DICT_TYPE`:
+  `_PyDictOrValues_IsValues(obj) || _PyObject_MakeInstanceAttributesFromDict(obj, dorv)`
+* `GUARD_KEYS_VERSION_TYPE`: `owner_heap_type->ht_cached_keys->dk_version == auxillary`
+* `PYMETHOD_TYPE`: `Py_TYPE(val) == &PyMethod_Type`
+* `PYFUNCTION_TYPE_VERSION_TYPE`:
+  `PyFunction_Check(callable) && func->func_version == auxillary && code->co_argcount == oparg + (self_or_null != NULL)`
+
 
 An `inst` without `stack_effect` is a transitional form to allow the original C code
 definitions to be copied. It lacks information to generate anything other than the
@@ -158,6 +182,15 @@ By convention cache effects (`stream`) must precede the input effects.
 
 The name `oparg` is pre-defined as a 32 bit value fetched from the instruction stream.
 
+### Special instruction annotations
+
+Instruction headers may be prefixed by one or more annotations. The non-exhaustive
+list of annotations and their meanings are as follows:
+
+* `override`. For external use by other interpreter definitions to override the current
+   instruction definition.
+* `pure`. This instruction has no side effects.
+
 ### Special functions/macros
 
 The C code may include special functions that are understood by the tools as
diff --git a/Tools/cases_generator/lexer.py b/Tools/cases_generator/lexer.py
index c3c2954..4f8d01c 100644
--- a/Tools/cases_generator/lexer.py
+++ b/Tools/cases_generator/lexer.py
@@ -216,7 +216,13 @@ kwds.append(MACRO)
 keywords = {name.lower(): name for name in kwds}
 
 ANNOTATION = "ANNOTATION"
-annotations = {"specializing", "guard", "override", "register", "replaced"}
+annotations = {
+    "specializing",
+    "override",
+    "register",
+    "replaced",
+    "pure",
+}
 
 __all__ = []
 __all__.extend(kwds)
@@ -324,7 +330,9 @@ def tokenize(src: str, line: int = 1, filename: str = "") -> Iterator[Token]:
         else:
             begin = line, start - linestart
         if kind != "\n":
-            yield Token(filename, kind, text, begin, (line, start - linestart + len(text)))
+            yield Token(
+                filename, kind, text, begin, (line, start - linestart + len(text))
+            )
 
 
 def to_text(tkns: list[Token], dedent: int = 0) -> str:
diff --git a/Tools/cases_generator/opcode_metadata_generator.py b/Tools/cases_generator/opcode_metadata_generator.py
index 9b7df9a..1826a0b 100644
--- a/Tools/cases_generator/opcode_metadata_generator.py
+++ b/Tools/cases_generator/opcode_metadata_generator.py
@@ -50,6 +50,8 @@ FLAGS = [
     "DEOPT",
     "ERROR",
     "ESCAPES",
+    "PURE",
+    "PASSTHROUGH",
 ]
 
 
diff --git a/Tools/cases_generator/parsing.py b/Tools/cases_generator/parsing.py
index 60c185d..307919c 100644
--- a/Tools/cases_generator/parsing.py
+++ b/Tools/cases_generator/parsing.py
@@ -75,6 +75,11 @@ class StackEffect(Node):
     size: str = ""  # Optional `[size]`
     # Note: size cannot be combined with type or cond
 
+    # Optional `(type, refinement)`
+    type_prop: None | tuple[str, None | str] = field(
+        default_factory=lambda: None, init=True, compare=False, hash=False
+    )
+
     def __repr__(self) -> str:
         items = [self.name, self.type, self.cond, self.size]
         while items and items[-1] == "":
@@ -138,11 +143,13 @@ class Family(Node):
 @dataclass
 class Pseudo(Node):
     name: str
-    flags: list[str]   # instr flags to set on the pseudo instruction
-    targets: list[str] # opcodes this can be replaced by
+    flags: list[str]  # instr flags to set on the pseudo instruction
+    targets: list[str]  # opcodes this can be replaced by
+
 
 AstNode = InstDef | Macro | Pseudo | Family
 
+
 class Parser(PLexer):
     @contextual
     def definition(self) -> AstNode | None:
@@ -253,14 +260,25 @@ class Parser(PLexer):
 
     @contextual
     def stack_effect(self) -> StackEffect | None:
-        #   IDENTIFIER [':' IDENTIFIER [TIMES]] ['if' '(' expression ')']
+        #   IDENTIFIER [':' [IDENTIFIER [TIMES]] ['&' '(' IDENTIFIER ['+' IDENTIFIER] ')']] ['if' '(' expression ')']
         # | IDENTIFIER '[' expression ']'
         if tkn := self.expect(lx.IDENTIFIER):
             type_text = ""
+            type_prop = None
             if self.expect(lx.COLON):
-                type_text = self.require(lx.IDENTIFIER).text.strip()
-                if self.expect(lx.TIMES):
-                    type_text += " *"
+                if i := self.expect(lx.IDENTIFIER):
+                    type_text = i.text.strip()
+                    if self.expect(lx.TIMES):
+                        type_text += " *"
+                if self.expect(lx.AND):
+                    consumed_bracket = self.expect(lx.LPAREN) is not None
+                    type_prop_text = self.require(lx.IDENTIFIER).text.strip()
+                    refinement = None
+                    if self.expect(lx.PLUS):
+                        refinement = self.require(lx.IDENTIFIER).text.strip()
+                    type_prop = (type_prop_text, refinement)
+                    if consumed_bracket:
+                        self.require(lx.RPAREN)
             cond_text = ""
             if self.expect(lx.IF):
                 self.require(lx.LPAREN)
@@ -277,7 +295,7 @@ class Parser(PLexer):
                 self.require(lx.RBRACKET)
                 type_text = "PyObject **"
                 size_text = size.text.strip()
-            return StackEffect(tkn.text, type_text, cond_text, size_text)
+            return StackEffect(tkn.text, type_text, cond_text, size_text, type_prop)
         return None
 
     @contextual
@@ -364,7 +382,9 @@ class Parser(PLexer):
                     if self.expect(lx.COMMA):
                         if not (size := self.expect(lx.IDENTIFIER)):
                             if not (size := self.expect(lx.NUMBER)):
-                                raise self.make_syntax_error("Expected identifier or number")
+                                raise self.make_syntax_error(
+                                    "Expected identifier or number"
+                                )
                     if self.expect(lx.RPAREN):
                         if self.expect(lx.EQUALS):
                             if not self.expect(lx.LBRACE):
diff --git a/Tools/cases_generator/stack.py b/Tools/cases_generator/stack.py
index d351037..6633950 100644
--- a/Tools/cases_generator/stack.py
+++ b/Tools/cases_generator/stack.py
@@ -3,6 +3,8 @@ from analyzer import StackItem, Instruction, Uop
 from dataclasses import dataclass
 from cwriter import CWriter
 
+UNUSED = {"unused"}
+
 
 def maybe_parenthesize(sym: str) -> str:
     """Add parentheses around a string if it contains an operator
@@ -29,6 +31,7 @@ def var_size(var: StackItem) -> str:
     else:
         return var.size
 
+
 @dataclass
 class StackOffset:
     "The stack offset of the virtual base of the stack from the physical stack pointer"
@@ -47,10 +50,7 @@ class StackOffset:
         self.pushed.append(var_size(item))
 
     def __sub__(self, other: "StackOffset") -> "StackOffset":
-        return StackOffset(
-            self.popped + other.pushed,
-            self.pushed + other.popped
-        )
+        return StackOffset(self.popped + other.pushed, self.pushed + other.popped)
 
     def __neg__(self) -> "StackOffset":
         return StackOffset(self.pushed, self.popped)
@@ -134,18 +134,18 @@ class Stack:
                 )
             if popped.name == var.name:
                 return ""
-            elif popped.name == "unused":
+            elif popped.name in UNUSED:
                 self.defined.add(var.name)
                 return (
                     f"{var.name} = {indirect}stack_pointer[{self.top_offset.to_c()}];\n"
                 )
-            elif var.name == "unused":
+            elif var.name in UNUSED:
                 return ""
             else:
                 self.defined.add(var.name)
                 return f"{var.name} = {popped.name};\n"
         self.base_offset.pop(var)
-        if var.name == "unused":
+        if var.name in UNUSED:
             return ""
         else:
             self.defined.add(var.name)
@@ -159,7 +159,7 @@ class Stack:
 
     def push(self, var: StackItem) -> str:
         self.variables.append(var)
-        if var.is_array() and var.name not in self.defined and var.name != "unused":
+        if var.is_array() and var.name not in self.defined and var.name not in UNUSED:
             c_offset = self.top_offset.to_c()
             self.top_offset.push(var)
             self.defined.add(var.name)
@@ -172,7 +172,7 @@ class Stack:
         for var in self.variables:
             if not var.peek:
                 cast = "(PyObject *)" if var.type else ""
-                if var.name != "unused" and not var.is_array():
+                if var.name not in UNUSED and not var.is_array():
                     if var.condition:
                         out.emit(f"if ({var.condition}) ")
                     out.emit(
author	Ken Jin <kenjin@python.org>	2024-01-12 17:30:27 (GMT)
committer	GitHub <noreply@github.com>	2024-01-12 17:30:27 (GMT)
commit	ac92527c08d917dffdb9c0a218d06f21114614a2 (patch)
tree	702ac2873cca646b59c49fdb9e591e31fb23b151 /Tools
parent	79970792fd2c70f77c38e08c7b3a9daf6a11bde1 (diff)
download	cpython-ac92527c08d917dffdb9c0a218d06f21114614a2.zip cpython-ac92527c08d917dffdb9c0a218d06f21114614a2.tar.gz cpython-ac92527c08d917dffdb9c0a218d06f21114614a2.tar.bz2