summaryrefslogtreecommitdiffstats
path: root/Tools
diff options
context:
space:
mode:
authorKen Jin <kenjin@python.org>2024-01-12 17:30:27 (GMT)
committerGitHub <noreply@github.com>2024-01-12 17:30:27 (GMT)
commitac92527c08d917dffdb9c0a218d06f21114614a2 (patch)
tree702ac2873cca646b59c49fdb9e591e31fb23b151 /Tools
parent79970792fd2c70f77c38e08c7b3a9daf6a11bde1 (diff)
downloadcpython-ac92527c08d917dffdb9c0a218d06f21114614a2.zip
cpython-ac92527c08d917dffdb9c0a218d06f21114614a2.tar.gz
cpython-ac92527c08d917dffdb9c0a218d06f21114614a2.tar.bz2
gh-113710: Add types to the interpreter DSL (#113711)
Co-authored-by: Jules <57632293+JuliaPoo@users.noreply.github.com> Co-authored-by: blurb-it[bot] <43283697+blurb-it[bot]@users.noreply.github.com>
Diffstat (limited to 'Tools')
-rw-r--r--Tools/cases_generator/analyzer.py48
-rw-r--r--Tools/cases_generator/generators_common.py8
-rw-r--r--Tools/cases_generator/interpreter_definition.md37
-rw-r--r--Tools/cases_generator/lexer.py12
-rw-r--r--Tools/cases_generator/opcode_metadata_generator.py2
-rw-r--r--Tools/cases_generator/parsing.py36
-rw-r--r--Tools/cases_generator/stack.py18
7 files changed, 131 insertions, 30 deletions
diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py
index 82ef888..7ed3b57 100644
--- a/Tools/cases_generator/analyzer.py
+++ b/Tools/cases_generator/analyzer.py
@@ -1,4 +1,4 @@
-from dataclasses import dataclass
+from dataclasses import dataclass, field
import lexer
import parser
from typing import Optional
@@ -22,6 +22,10 @@ class Properties:
uses_locals: bool
has_free: bool
+ pure: bool
+ passthrough: bool
+ guard: bool
+
def dump(self, indent: str) -> None:
print(indent, end="")
text = ", ".join([f"{key}: {value}" for (key, value) in self.__dict__.items()])
@@ -45,6 +49,9 @@ class Properties:
uses_co_names=any(p.uses_co_names for p in properties),
uses_locals=any(p.uses_locals for p in properties),
has_free=any(p.has_free for p in properties),
+ pure=all(p.pure for p in properties),
+ passthrough=all(p.passthrough for p in properties),
+ guard=all(p.guard for p in properties),
)
@@ -64,6 +71,9 @@ SKIP_PROPERTIES = Properties(
uses_co_names=False,
uses_locals=False,
has_free=False,
+ pure=False,
+ passthrough=False,
+ guard=False,
)
@@ -88,6 +98,9 @@ class StackItem:
condition: str | None
size: str
peek: bool = False
+ type_prop: None | tuple[str, None | str] = field(
+ default_factory=lambda: None, init=True, compare=False, hash=False
+ )
def __str__(self) -> str:
cond = f" if ({self.condition})" if self.condition else ""
@@ -259,7 +272,9 @@ def override_error(
def convert_stack_item(item: parser.StackEffect) -> StackItem:
- return StackItem(item.name, item.type, item.cond, (item.size or "1"))
+ return StackItem(
+ item.name, item.type, item.cond, (item.size or "1"), type_prop=item.type_prop
+ )
def analyze_stack(op: parser.InstDef) -> StackEffect:
@@ -377,7 +392,6 @@ def makes_escaping_api_call(instr: parser.InstDef) -> bool:
return False
-
EXITS = {
"DISPATCH",
"GO_TO_INSTRUCTION",
@@ -417,16 +431,33 @@ def always_exits(op: parser.InstDef) -> bool:
return False
+def stack_effect_only_peeks(instr: parser.InstDef) -> bool:
+ stack_inputs = [s for s in instr.inputs if not isinstance(s, parser.CacheEffect)]
+ if len(stack_inputs) != len(instr.outputs):
+ return False
+ if len(stack_inputs) == 0:
+ return False
+ if any(s.cond for s in stack_inputs) or any(s.cond for s in instr.outputs):
+ return False
+ return all(
+ (s.name == other.name and s.type == other.type and s.size == other.size)
+ for s, other in zip(stack_inputs, instr.outputs)
+ )
+
+
def compute_properties(op: parser.InstDef) -> Properties:
has_free = (
variable_used(op, "PyCell_New")
or variable_used(op, "PyCell_GET")
or variable_used(op, "PyCell_SET")
)
+ infallible = is_infallible(op)
+ deopts = variable_used(op, "DEOPT_IF")
+ passthrough = stack_effect_only_peeks(op) and infallible
return Properties(
escapes=makes_escaping_api_call(op),
- infallible=is_infallible(op),
- deopts=variable_used(op, "DEOPT_IF"),
+ infallible=infallible,
+ deopts=deopts,
oparg=variable_used(op, "oparg"),
jumps=variable_used(op, "JUMPBY"),
eval_breaker=variable_used(op, "CHECK_EVAL_BREAKER"),
@@ -440,6 +471,9 @@ def compute_properties(op: parser.InstDef) -> Properties:
uses_locals=(variable_used(op, "GETLOCAL") or variable_used(op, "SETLOCAL"))
and not has_free,
has_free=has_free,
+ pure="pure" in op.annotations,
+ passthrough=passthrough,
+ guard=passthrough and deopts,
)
@@ -686,9 +720,7 @@ def analyze_forest(forest: list[parser.AstNode]) -> Analysis:
inst = instructions["BINARY_OP_INPLACE_ADD_UNICODE"]
inst.family = families["BINARY_OP"]
families["BINARY_OP"].members.append(inst)
- opmap, first_arg, min_instrumented = assign_opcodes(
- instructions, families, pseudos
- )
+ opmap, first_arg, min_instrumented = assign_opcodes(instructions, families, pseudos)
return Analysis(
instructions, uops, families, pseudos, opmap, first_arg, min_instrumented
)
diff --git a/Tools/cases_generator/generators_common.py b/Tools/cases_generator/generators_common.py
index 5a42a05..c6c602c 100644
--- a/Tools/cases_generator/generators_common.py
+++ b/Tools/cases_generator/generators_common.py
@@ -26,7 +26,9 @@ def root_relative_path(filename: str) -> str:
return filename
-def write_header(generator: str, sources: list[str], outfile: TextIO, comment: str = "//") -> None:
+def write_header(
+ generator: str, sources: list[str], outfile: TextIO, comment: str = "//"
+) -> None:
outfile.write(
f"""{comment} This file is generated by {root_relative_path(generator)}
{comment} from:
@@ -209,6 +211,10 @@ def cflags(p: Properties) -> str:
flags.append("HAS_ERROR_FLAG")
if p.escapes:
flags.append("HAS_ESCAPES_FLAG")
+ if p.pure:
+ flags.append("HAS_PURE_FLAG")
+ if p.passthrough:
+ flags.append("HAS_PASSTHROUGH_FLAG")
if flags:
return " | ".join(flags)
else:
diff --git a/Tools/cases_generator/interpreter_definition.md b/Tools/cases_generator/interpreter_definition.md
index 5c42387..e5a4899 100644
--- a/Tools/cases_generator/interpreter_definition.md
+++ b/Tools/cases_generator/interpreter_definition.md
@@ -15,6 +15,7 @@ These tools would be used to:
* Generate the tier 2 interpreter
* Generate documentation for instructions
* Generate metadata about instructions, such as stack use (done).
+* Generate the tier 2 optimizer's abstract interpreter.
Having a single definition file ensures that there is a single source
of truth for bytecode semantics.
@@ -108,7 +109,10 @@ and a piece of C code describing its semantics::
NAME [":" type] [ "if" "(" C-expression ")" ]
type:
- NAME ["*"]
+ NAME ["*"] | type_prop
+
+ type_prop:
+ "&" "(" NAME ["+" NAME] ")"
stream:
NAME "/" size
@@ -138,7 +142,27 @@ The following definitions may occur:
The optional `type` in an `object` is the C type. It defaults to `PyObject *`.
The objects before the "--" are the objects on top of the stack at the start of
the instruction. Those after the "--" are the objects on top of the stack at the
-end of the instruction.
+end of the instruction. When prefixed by a `&`, the `type` production rule follows the
+`type_prop` production rule. This indicates the type of the value is of that specific type
+after the operation. In this case, the type may also contain 64-bit refinement information
+that is fetched from a previously defined operand in the instruction header, such as
+a type version tag. This follows the format `type + refinement`. The list of possible types
+and their refinements are below. They obey the following predicates:
+
+
+* `PYLONG_TYPE`: `Py_TYPE(val) == &PyLong_Type`
+* `PYFLOAT_TYPE`: `Py_TYPE(val) == &PyFloat_Type`
+* `PYUNICODE_TYPE`: `Py_TYPE(val) == &PYUNICODE_TYPE`
+* `NULL_TYPE`: `val == NULL`
+* `GUARD_TYPE_VERSION_TYPE`: `type->tp_version_tag == auxillary`
+* `GUARD_DORV_VALUES_TYPE`: `_PyDictOrValues_IsValues(obj)`
+* `GUARD_DORV_VALUES_INST_ATTR_FROM_DICT_TYPE`:
+ `_PyDictOrValues_IsValues(obj) || _PyObject_MakeInstanceAttributesFromDict(obj, dorv)`
+* `GUARD_KEYS_VERSION_TYPE`: `owner_heap_type->ht_cached_keys->dk_version == auxillary`
+* `PYMETHOD_TYPE`: `Py_TYPE(val) == &PyMethod_Type`
+* `PYFUNCTION_TYPE_VERSION_TYPE`:
+ `PyFunction_Check(callable) && func->func_version == auxillary && code->co_argcount == oparg + (self_or_null != NULL)`
+
An `inst` without `stack_effect` is a transitional form to allow the original C code
definitions to be copied. It lacks information to generate anything other than the
@@ -158,6 +182,15 @@ By convention cache effects (`stream`) must precede the input effects.
The name `oparg` is pre-defined as a 32 bit value fetched from the instruction stream.
+### Special instruction annotations
+
+Instruction headers may be prefixed by one or more annotations. The non-exhaustive
+list of annotations and their meanings are as follows:
+
+* `override`. For external use by other interpreter definitions to override the current
+ instruction definition.
+* `pure`. This instruction has no side effects.
+
### Special functions/macros
The C code may include special functions that are understood by the tools as
diff --git a/Tools/cases_generator/lexer.py b/Tools/cases_generator/lexer.py
index c3c2954..4f8d01c 100644
--- a/Tools/cases_generator/lexer.py
+++ b/Tools/cases_generator/lexer.py
@@ -216,7 +216,13 @@ kwds.append(MACRO)
keywords = {name.lower(): name for name in kwds}
ANNOTATION = "ANNOTATION"
-annotations = {"specializing", "guard", "override", "register", "replaced"}
+annotations = {
+ "specializing",
+ "override",
+ "register",
+ "replaced",
+ "pure",
+}
__all__ = []
__all__.extend(kwds)
@@ -324,7 +330,9 @@ def tokenize(src: str, line: int = 1, filename: str = "") -> Iterator[Token]:
else:
begin = line, start - linestart
if kind != "\n":
- yield Token(filename, kind, text, begin, (line, start - linestart + len(text)))
+ yield Token(
+ filename, kind, text, begin, (line, start - linestart + len(text))
+ )
def to_text(tkns: list[Token], dedent: int = 0) -> str:
diff --git a/Tools/cases_generator/opcode_metadata_generator.py b/Tools/cases_generator/opcode_metadata_generator.py
index 9b7df9a..1826a0b 100644
--- a/Tools/cases_generator/opcode_metadata_generator.py
+++ b/Tools/cases_generator/opcode_metadata_generator.py
@@ -50,6 +50,8 @@ FLAGS = [
"DEOPT",
"ERROR",
"ESCAPES",
+ "PURE",
+ "PASSTHROUGH",
]
diff --git a/Tools/cases_generator/parsing.py b/Tools/cases_generator/parsing.py
index 60c185d..307919c 100644
--- a/Tools/cases_generator/parsing.py
+++ b/Tools/cases_generator/parsing.py
@@ -75,6 +75,11 @@ class StackEffect(Node):
size: str = "" # Optional `[size]`
# Note: size cannot be combined with type or cond
+ # Optional `(type, refinement)`
+ type_prop: None | tuple[str, None | str] = field(
+ default_factory=lambda: None, init=True, compare=False, hash=False
+ )
+
def __repr__(self) -> str:
items = [self.name, self.type, self.cond, self.size]
while items and items[-1] == "":
@@ -138,11 +143,13 @@ class Family(Node):
@dataclass
class Pseudo(Node):
name: str
- flags: list[str] # instr flags to set on the pseudo instruction
- targets: list[str] # opcodes this can be replaced by
+ flags: list[str] # instr flags to set on the pseudo instruction
+ targets: list[str] # opcodes this can be replaced by
+
AstNode = InstDef | Macro | Pseudo | Family
+
class Parser(PLexer):
@contextual
def definition(self) -> AstNode | None:
@@ -253,14 +260,25 @@ class Parser(PLexer):
@contextual
def stack_effect(self) -> StackEffect | None:
- # IDENTIFIER [':' IDENTIFIER [TIMES]] ['if' '(' expression ')']
+ # IDENTIFIER [':' [IDENTIFIER [TIMES]] ['&' '(' IDENTIFIER ['+' IDENTIFIER] ')']] ['if' '(' expression ')']
# | IDENTIFIER '[' expression ']'
if tkn := self.expect(lx.IDENTIFIER):
type_text = ""
+ type_prop = None
if self.expect(lx.COLON):
- type_text = self.require(lx.IDENTIFIER).text.strip()
- if self.expect(lx.TIMES):
- type_text += " *"
+ if i := self.expect(lx.IDENTIFIER):
+ type_text = i.text.strip()
+ if self.expect(lx.TIMES):
+ type_text += " *"
+ if self.expect(lx.AND):
+ consumed_bracket = self.expect(lx.LPAREN) is not None
+ type_prop_text = self.require(lx.IDENTIFIER).text.strip()
+ refinement = None
+ if self.expect(lx.PLUS):
+ refinement = self.require(lx.IDENTIFIER).text.strip()
+ type_prop = (type_prop_text, refinement)
+ if consumed_bracket:
+ self.require(lx.RPAREN)
cond_text = ""
if self.expect(lx.IF):
self.require(lx.LPAREN)
@@ -277,7 +295,7 @@ class Parser(PLexer):
self.require(lx.RBRACKET)
type_text = "PyObject **"
size_text = size.text.strip()
- return StackEffect(tkn.text, type_text, cond_text, size_text)
+ return StackEffect(tkn.text, type_text, cond_text, size_text, type_prop)
return None
@contextual
@@ -364,7 +382,9 @@ class Parser(PLexer):
if self.expect(lx.COMMA):
if not (size := self.expect(lx.IDENTIFIER)):
if not (size := self.expect(lx.NUMBER)):
- raise self.make_syntax_error("Expected identifier or number")
+ raise self.make_syntax_error(
+ "Expected identifier or number"
+ )
if self.expect(lx.RPAREN):
if self.expect(lx.EQUALS):
if not self.expect(lx.LBRACE):
diff --git a/Tools/cases_generator/stack.py b/Tools/cases_generator/stack.py
index d351037..6633950 100644
--- a/Tools/cases_generator/stack.py
+++ b/Tools/cases_generator/stack.py
@@ -3,6 +3,8 @@ from analyzer import StackItem, Instruction, Uop
from dataclasses import dataclass
from cwriter import CWriter
+UNUSED = {"unused"}
+
def maybe_parenthesize(sym: str) -> str:
"""Add parentheses around a string if it contains an operator
@@ -29,6 +31,7 @@ def var_size(var: StackItem) -> str:
else:
return var.size
+
@dataclass
class StackOffset:
"The stack offset of the virtual base of the stack from the physical stack pointer"
@@ -47,10 +50,7 @@ class StackOffset:
self.pushed.append(var_size(item))
def __sub__(self, other: "StackOffset") -> "StackOffset":
- return StackOffset(
- self.popped + other.pushed,
- self.pushed + other.popped
- )
+ return StackOffset(self.popped + other.pushed, self.pushed + other.popped)
def __neg__(self) -> "StackOffset":
return StackOffset(self.pushed, self.popped)
@@ -134,18 +134,18 @@ class Stack:
)
if popped.name == var.name:
return ""
- elif popped.name == "unused":
+ elif popped.name in UNUSED:
self.defined.add(var.name)
return (
f"{var.name} = {indirect}stack_pointer[{self.top_offset.to_c()}];\n"
)
- elif var.name == "unused":
+ elif var.name in UNUSED:
return ""
else:
self.defined.add(var.name)
return f"{var.name} = {popped.name};\n"
self.base_offset.pop(var)
- if var.name == "unused":
+ if var.name in UNUSED:
return ""
else:
self.defined.add(var.name)
@@ -159,7 +159,7 @@ class Stack:
def push(self, var: StackItem) -> str:
self.variables.append(var)
- if var.is_array() and var.name not in self.defined and var.name != "unused":
+ if var.is_array() and var.name not in self.defined and var.name not in UNUSED:
c_offset = self.top_offset.to_c()
self.top_offset.push(var)
self.defined.add(var.name)
@@ -172,7 +172,7 @@ class Stack:
for var in self.variables:
if not var.peek:
cast = "(PyObject *)" if var.type else ""
- if var.name != "unused" and not var.is_array():
+ if var.name not in UNUSED and not var.is_array():
if var.condition:
out.emit(f"if ({var.condition}) ")
out.emit(