summaryrefslogtreecommitdiffstats
path: root/Tools
diff options
context:
space:
mode:
authorKen Jin <kenjin@python.org>2024-02-13 13:24:48 (GMT)
committerGitHub <noreply@github.com>2024-02-13 13:24:48 (GMT)
commit7cce8576226249461baa91c4a89770a1823b44a4 (patch)
treec9df9d9fa2fa090706b11982d014b07c2221fcce /Tools
parentccc76c3e88647e416184bb1f5210b4e8946ae358 (diff)
downloadcpython-7cce8576226249461baa91c4a89770a1823b44a4.zip
cpython-7cce8576226249461baa91c4a89770a1823b44a4.tar.gz
cpython-7cce8576226249461baa91c4a89770a1823b44a4.tar.bz2
gh-114058: Foundations of the Tier2 redundancy eliminator (GH-115085)
--------- Co-authored-by: Mark Shannon <9448417+markshannon@users.noreply.github.com> Co-authored-by: Jules <57632293+JuliaPoo@users.noreply.github.com> Co-authored-by: Guido van Rossum <gvanrossum@users.noreply.github.com>
Diffstat (limited to 'Tools')
-rw-r--r--Tools/c-analyzer/cpython/_parser.py2
-rw-r--r--Tools/c-analyzer/cpython/ignored.tsv2
-rw-r--r--Tools/cases_generator/README.md3
-rw-r--r--Tools/cases_generator/analyzer.py6
-rw-r--r--Tools/cases_generator/interpreter_definition.md26
-rw-r--r--Tools/cases_generator/parsing.py26
-rw-r--r--Tools/cases_generator/stack.py4
-rw-r--r--Tools/cases_generator/tier2_abstract_generator.py235
8 files changed, 251 insertions, 53 deletions
diff --git a/Tools/c-analyzer/cpython/_parser.py b/Tools/c-analyzer/cpython/_parser.py
index 444063d..be89a26 100644
--- a/Tools/c-analyzer/cpython/_parser.py
+++ b/Tools/c-analyzer/cpython/_parser.py
@@ -83,9 +83,11 @@ Python/deepfreeze/*.c
Python/frozen_modules/*.h
Python/generated_cases.c.h
Python/executor_cases.c.h
+Python/tier2_redundancy_eliminator_cases.c.h
# not actually source
Python/bytecodes.c
+Python/tier2_redundancy_eliminator_bytecodes.c
# mimalloc
Objects/mimalloc/*.c
diff --git a/Tools/c-analyzer/cpython/ignored.tsv b/Tools/c-analyzer/cpython/ignored.tsv
index c75aff8..14bcd85 100644
--- a/Tools/c-analyzer/cpython/ignored.tsv
+++ b/Tools/c-analyzer/cpython/ignored.tsv
@@ -734,6 +734,6 @@ Modules/expat/xmlrole.c - error -
## other
Modules/_io/_iomodule.c - _PyIO_Module -
Modules/_sqlite/module.c - _sqlite3module -
-Python/optimizer_analysis.c - _Py_PartitionRootNode_Type -
+Python/optimizer_analysis.c - _Py_UOpsAbstractFrame_Type -
Python/optimizer_analysis.c - _Py_UOpsAbstractInterpContext_Type -
Modules/clinic/md5module.c.h _md5_md5 _keywords -
diff --git a/Tools/cases_generator/README.md b/Tools/cases_generator/README.md
index 7fec8a8..d35a868 100644
--- a/Tools/cases_generator/README.md
+++ b/Tools/cases_generator/README.md
@@ -13,6 +13,9 @@ What's currently here:
- `parser.py` helper for interactions with `parsing.py`
- `tierN_generator.py`: a couple of driver scripts to read `Python/bytecodes.c` and
write `Python/generated_cases.c.h` (and several other files)
+- `tier2_abstract_generator.py`: reads `Python/bytecodes.c` and
+ `Python/tier2_redundancy_eliminator_bytecodes.c` and writes
+ `Python/tier2_redundancy_eliminator_cases.c.h`
- `stack.py`: code to handle generalized stack effects
- `cwriter.py`: code which understands tokens and how to format C code;
main class: `CWriter`
diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py
index b80fa66..3497b7f 100644
--- a/Tools/cases_generator/analyzer.py
+++ b/Tools/cases_generator/analyzer.py
@@ -24,7 +24,6 @@ class Properties:
pure: bool
passthrough: bool
- guard: bool
def dump(self, indent: str) -> None:
print(indent, end="")
@@ -51,7 +50,6 @@ class Properties:
has_free=any(p.has_free for p in properties),
pure=all(p.pure for p in properties),
passthrough=all(p.passthrough for p in properties),
- guard=all(p.guard for p in properties),
)
@@ -73,7 +71,6 @@ SKIP_PROPERTIES = Properties(
has_free=False,
pure=False,
passthrough=False,
- guard=False,
)
@@ -273,7 +270,7 @@ def override_error(
def convert_stack_item(item: parser.StackEffect) -> StackItem:
return StackItem(
- item.name, item.type, item.cond, (item.size or "1"), type_prop=item.type_prop
+ item.name, item.type, item.cond, (item.size or "1")
)
@@ -473,7 +470,6 @@ def compute_properties(op: parser.InstDef) -> Properties:
has_free=has_free,
pure="pure" in op.annotations,
passthrough=passthrough,
- guard=passthrough and deopts,
)
diff --git a/Tools/cases_generator/interpreter_definition.md b/Tools/cases_generator/interpreter_definition.md
index e87aff4..9b57335 100644
--- a/Tools/cases_generator/interpreter_definition.md
+++ b/Tools/cases_generator/interpreter_definition.md
@@ -109,10 +109,7 @@ and a piece of C code describing its semantics::
NAME [":" type] [ "if" "(" C-expression ")" ]
type:
- NAME ["*"] | type_prop
-
- type_prop:
- "&" "(" NAME ["+" NAME] ")"
+ NAME ["*"]
stream:
NAME "/" size
@@ -142,26 +139,7 @@ The following definitions may occur:
The optional `type` in an `object` is the C type. It defaults to `PyObject *`.
The objects before the "--" are the objects on top of the stack at the start of
the instruction. Those after the "--" are the objects on top of the stack at the
-end of the instruction. When prefixed by a `&`, the `type` production rule follows the
-`type_prop` production rule. This indicates the type of the value is of that specific type
-after the operation. In this case, the type may also contain 64-bit refinement information
-that is fetched from a previously defined operand in the instruction header, such as
-a type version tag. This follows the format `type + refinement`. The list of possible types
-and their refinements are below. They obey the following predicates:
-
-
-* `PYLONG_TYPE`: `Py_TYPE(val) == &PyLong_Type`
-* `PYFLOAT_TYPE`: `Py_TYPE(val) == &PyFloat_Type`
-* `PYUNICODE_TYPE`: `Py_TYPE(val) == &PYUNICODE_TYPE`
-* `NULL_TYPE`: `val == NULL`
-* `GUARD_TYPE_VERSION_TYPE`: `type->tp_version_tag == auxillary`
-* `GUARD_DORV_VALUES_TYPE`: `_PyDictOrValues_IsValues(obj)`
-* `GUARD_DORV_VALUES_INST_ATTR_FROM_DICT_TYPE`:
- `_PyDictOrValues_IsValues(obj) || _PyObject_MakeInstanceAttributesFromDict(obj, dorv)`
-* `GUARD_KEYS_VERSION_TYPE`: `owner_heap_type->ht_cached_keys->dk_version == auxillary`
-* `PYMETHOD_TYPE`: `Py_TYPE(val) == &PyMethod_Type`
-* `PYFUNCTION_TYPE_VERSION_TYPE`:
- `PyFunction_Check(callable) && func->func_version == auxillary && code->co_argcount == oparg + (self_or_null != NULL)`
+end of the instruction.
An `inst` without `stack_effect` is a transitional form to allow the original C code
diff --git a/Tools/cases_generator/parsing.py b/Tools/cases_generator/parsing.py
index 307919c..a8961f2 100644
--- a/Tools/cases_generator/parsing.py
+++ b/Tools/cases_generator/parsing.py
@@ -75,11 +75,6 @@ class StackEffect(Node):
size: str = "" # Optional `[size]`
# Note: size cannot be combined with type or cond
- # Optional `(type, refinement)`
- type_prop: None | tuple[str, None | str] = field(
- default_factory=lambda: None, init=True, compare=False, hash=False
- )
-
def __repr__(self) -> str:
items = [self.name, self.type, self.cond, self.size]
while items and items[-1] == "":
@@ -260,25 +255,14 @@ class Parser(PLexer):
@contextual
def stack_effect(self) -> StackEffect | None:
- # IDENTIFIER [':' [IDENTIFIER [TIMES]] ['&' '(' IDENTIFIER ['+' IDENTIFIER] ')']] ['if' '(' expression ')']
+ # IDENTIFIER [':' IDENTIFIER [TIMES]] ['if' '(' expression ')']
# | IDENTIFIER '[' expression ']'
if tkn := self.expect(lx.IDENTIFIER):
type_text = ""
- type_prop = None
if self.expect(lx.COLON):
- if i := self.expect(lx.IDENTIFIER):
- type_text = i.text.strip()
- if self.expect(lx.TIMES):
- type_text += " *"
- if self.expect(lx.AND):
- consumed_bracket = self.expect(lx.LPAREN) is not None
- type_prop_text = self.require(lx.IDENTIFIER).text.strip()
- refinement = None
- if self.expect(lx.PLUS):
- refinement = self.require(lx.IDENTIFIER).text.strip()
- type_prop = (type_prop_text, refinement)
- if consumed_bracket:
- self.require(lx.RPAREN)
+ type_text = self.require(lx.IDENTIFIER).text.strip()
+ if self.expect(lx.TIMES):
+ type_text += " *"
cond_text = ""
if self.expect(lx.IF):
self.require(lx.LPAREN)
@@ -295,7 +279,7 @@ class Parser(PLexer):
self.require(lx.RBRACKET)
type_text = "PyObject **"
size_text = size.text.strip()
- return StackEffect(tkn.text, type_text, cond_text, size_text, type_prop)
+ return StackEffect(tkn.text, type_text, cond_text, size_text)
return None
@contextual
diff --git a/Tools/cases_generator/stack.py b/Tools/cases_generator/stack.py
index f62ece4..97a3011 100644
--- a/Tools/cases_generator/stack.py
+++ b/Tools/cases_generator/stack.py
@@ -168,11 +168,11 @@ class Stack:
self.top_offset.push(var)
return ""
- def flush(self, out: CWriter) -> None:
+ def flush(self, out: CWriter, cast_type: str = "PyObject *") -> None:
out.start_line()
for var in self.variables:
if not var.peek:
- cast = "(PyObject *)" if var.type else ""
+ cast = f"({cast_type})" if var.type else ""
if var.name not in UNUSED and not var.is_array():
if var.condition:
out.emit(f"if ({var.condition}) ")
diff --git a/Tools/cases_generator/tier2_abstract_generator.py b/Tools/cases_generator/tier2_abstract_generator.py
new file mode 100644
index 0000000..cc29b16
--- /dev/null
+++ b/Tools/cases_generator/tier2_abstract_generator.py
@@ -0,0 +1,235 @@
+"""Generate the cases for the tier 2 redundancy eliminator/abstract interpreter.
+Reads the instruction definitions from bytecodes.c. and tier2_redundancy_eliminator.bytecodes.c
+Writes the cases to tier2_redundancy_eliminator_cases.c.h, which is #included in Python/optimizer_analysis.c.
+"""
+
+import argparse
+import os.path
+import sys
+
+from analyzer import (
+ Analysis,
+ Instruction,
+ Uop,
+ Part,
+ analyze_files,
+ Skip,
+ StackItem,
+ analysis_error,
+)
+from generators_common import (
+ DEFAULT_INPUT,
+ ROOT,
+ write_header,
+ emit_tokens,
+ emit_to,
+ replace_sync_sp,
+)
+from cwriter import CWriter
+from typing import TextIO, Iterator
+from lexer import Token
+from stack import StackOffset, Stack, SizeMismatch, UNUSED
+
+DEFAULT_OUTPUT = ROOT / "Python/tier2_redundancy_eliminator_cases.c.h"
+DEFAULT_ABSTRACT_INPUT = ROOT / "Python/tier2_redundancy_eliminator_bytecodes.c"
+
+
+def validate_uop(override: Uop, uop: Uop) -> None:
+ # To do
+ pass
+
+
+def type_name(var: StackItem) -> str:
+ if var.is_array():
+ return f"_Py_UOpsSymType **"
+ if var.type:
+ return var.type
+ return f"_Py_UOpsSymType *"
+
+
+def declare_variables(uop: Uop, out: CWriter, skip_inputs: bool) -> None:
+ variables = {"unused"}
+ if not skip_inputs:
+ for var in reversed(uop.stack.inputs):
+ if var.name not in variables:
+ variables.add(var.name)
+ if var.condition:
+ out.emit(f"{type_name(var)}{var.name} = NULL;\n")
+ else:
+ out.emit(f"{type_name(var)}{var.name};\n")
+ for var in uop.stack.outputs:
+ if var.peek:
+ continue
+ if var.name not in variables:
+ variables.add(var.name)
+ if var.condition:
+ out.emit(f"{type_name(var)}{var.name} = NULL;\n")
+ else:
+ out.emit(f"{type_name(var)}{var.name};\n")
+
+
+def decref_inputs(
+ out: CWriter,
+ tkn: Token,
+ tkn_iter: Iterator[Token],
+ uop: Uop,
+ stack: Stack,
+ inst: Instruction | None,
+) -> None:
+ next(tkn_iter)
+ next(tkn_iter)
+ next(tkn_iter)
+ out.emit_at("", tkn)
+
+
+def emit_default(out: CWriter, uop: Uop) -> None:
+ for i, var in enumerate(uop.stack.outputs):
+ if var.name != "unused" and not var.peek:
+ if var.is_array():
+ out.emit(f"for (int _i = {var.size}; --_i >= 0;) {{\n")
+ out.emit(f"{var.name}[_i] = sym_new_unknown(ctx);\n")
+ out.emit(f"if ({var.name}[_i] == NULL) goto out_of_space;\n")
+ out.emit("}\n")
+ elif var.name == "null":
+ out.emit(f"{var.name} = sym_new_null(ctx);\n")
+ out.emit(f"if ({var.name} == NULL) goto out_of_space;\n")
+ else:
+ out.emit(f"{var.name} = sym_new_unknown(ctx);\n")
+ out.emit(f"if ({var.name} == NULL) goto out_of_space;\n")
+
+
+def write_uop(
+ override: Uop | None,
+ uop: Uop,
+ out: CWriter,
+ stack: Stack,
+ debug: bool,
+ skip_inputs: bool,
+) -> None:
+ try:
+ prototype = override if override else uop
+ is_override = override is not None
+ out.start_line()
+ for var in reversed(prototype.stack.inputs):
+ res = stack.pop(var)
+ if not skip_inputs:
+ out.emit(res)
+ if not prototype.properties.stores_sp:
+ for i, var in enumerate(prototype.stack.outputs):
+ res = stack.push(var)
+ if not var.peek or is_override:
+ out.emit(res)
+ if debug:
+ args = []
+ for var in prototype.stack.inputs:
+ if not var.peek or is_override:
+ args.append(var.name)
+ out.emit(f'DEBUG_PRINTF({", ".join(args)});\n')
+ if override:
+ for cache in uop.caches:
+ if cache.name != "unused":
+ if cache.size == 4:
+ type = cast = "PyObject *"
+ else:
+ type = f"uint{cache.size*16}_t "
+ cast = f"uint{cache.size*16}_t"
+ out.emit(f"{type}{cache.name} = ({cast})this_instr->operand;\n")
+ if override:
+ replacement_funcs = {
+ "DECREF_INPUTS": decref_inputs,
+ "SYNC_SP": replace_sync_sp,
+ }
+ emit_tokens(out, override, stack, None, replacement_funcs)
+ else:
+ emit_default(out, uop)
+
+ if prototype.properties.stores_sp:
+ for i, var in enumerate(prototype.stack.outputs):
+ if not var.peek or is_override:
+ out.emit(stack.push(var))
+ out.start_line()
+ stack.flush(out, cast_type="_Py_UOpsSymType *")
+ except SizeMismatch as ex:
+ raise analysis_error(ex.args[0], uop.body[0])
+
+
+SKIPS = ("_EXTENDED_ARG",)
+
+
+def generate_abstract_interpreter(
+ filenames: list[str],
+ abstract: Analysis,
+ base: Analysis,
+ outfile: TextIO,
+ debug: bool,
+) -> None:
+ write_header(__file__, filenames, outfile)
+ out = CWriter(outfile, 2, False)
+ out.emit("\n")
+ base_uop_names = set([uop.name for uop in base.uops.values()])
+ for abstract_uop_name in abstract.uops:
+ assert abstract_uop_name in base_uop_names,\
+ f"All abstract uops should override base uops, but {abstract_uop_name} is not."
+
+ for uop in base.uops.values():
+ override: Uop | None = None
+ if uop.name in abstract.uops:
+ override = abstract.uops[uop.name]
+ validate_uop(override, uop)
+ if uop.properties.tier_one_only:
+ continue
+ if uop.is_super():
+ continue
+ if not uop.is_viable():
+ out.emit(f"/* {uop.name} is not a viable micro-op for tier 2 */\n\n")
+ continue
+ out.emit(f"case {uop.name}: {{\n")
+ if override:
+ declare_variables(override, out, skip_inputs=False)
+ else:
+ declare_variables(uop, out, skip_inputs=True)
+ stack = Stack()
+ write_uop(override, uop, out, stack, debug, skip_inputs=(override is None))
+ out.start_line()
+ out.emit("break;\n")
+ out.emit("}")
+ out.emit("\n\n")
+
+
+def generate_tier2_abstract_from_files(
+ filenames: list[str], outfilename: str, debug: bool=False
+) -> None:
+ assert len(filenames) == 2, "Need a base file and an abstract cases file."
+ base = analyze_files([filenames[0]])
+ abstract = analyze_files([filenames[1]])
+ with open(outfilename, "w") as outfile:
+ generate_abstract_interpreter(filenames, abstract, base, outfile, debug)
+
+
+arg_parser = argparse.ArgumentParser(
+ description="Generate the code for the tier 2 interpreter.",
+ formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+)
+
+arg_parser.add_argument(
+ "-o", "--output", type=str, help="Generated code", default=DEFAULT_OUTPUT
+)
+
+
+arg_parser.add_argument("input", nargs=1, help="Abstract interpreter definition file")
+
+arg_parser.add_argument(
+ "base", nargs=argparse.REMAINDER, help="The base instruction definition file(s)"
+)
+
+arg_parser.add_argument("-d", "--debug", help="Insert debug calls", action="store_true")
+
+if __name__ == "__main__":
+ args = arg_parser.parse_args()
+ if len(args.base) == 0:
+ args.input.append(DEFAULT_INPUT)
+ args.input.append(DEFAULT_ABSTRACT_INPUT)
+ abstract = analyze_files(args.input)
+ base = analyze_files(args.base)
+ with open(args.output, "w") as outfile:
+ generate_abstract_interpreter(args.input, abstract, base, outfile, args.debug)