diff options
author | Ken Jin <kenjin@python.org> | 2024-02-13 13:24:48 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-02-13 13:24:48 (GMT) |
commit | 7cce8576226249461baa91c4a89770a1823b44a4 (patch) | |
tree | c9df9d9fa2fa090706b11982d014b07c2221fcce /Tools | |
parent | ccc76c3e88647e416184bb1f5210b4e8946ae358 (diff) | |
download | cpython-7cce8576226249461baa91c4a89770a1823b44a4.zip cpython-7cce8576226249461baa91c4a89770a1823b44a4.tar.gz cpython-7cce8576226249461baa91c4a89770a1823b44a4.tar.bz2 |
gh-114058: Foundations of the Tier2 redundancy eliminator (GH-115085)
---------
Co-authored-by: Mark Shannon <9448417+markshannon@users.noreply.github.com>
Co-authored-by: Jules <57632293+JuliaPoo@users.noreply.github.com>
Co-authored-by: Guido van Rossum <gvanrossum@users.noreply.github.com>
Diffstat (limited to 'Tools')
-rw-r--r-- | Tools/c-analyzer/cpython/_parser.py | 2 | ||||
-rw-r--r-- | Tools/c-analyzer/cpython/ignored.tsv | 2 | ||||
-rw-r--r-- | Tools/cases_generator/README.md | 3 | ||||
-rw-r--r-- | Tools/cases_generator/analyzer.py | 6 | ||||
-rw-r--r-- | Tools/cases_generator/interpreter_definition.md | 26 | ||||
-rw-r--r-- | Tools/cases_generator/parsing.py | 26 | ||||
-rw-r--r-- | Tools/cases_generator/stack.py | 4 | ||||
-rw-r--r-- | Tools/cases_generator/tier2_abstract_generator.py | 235 |
8 files changed, 251 insertions, 53 deletions
diff --git a/Tools/c-analyzer/cpython/_parser.py b/Tools/c-analyzer/cpython/_parser.py index 444063d..be89a26 100644 --- a/Tools/c-analyzer/cpython/_parser.py +++ b/Tools/c-analyzer/cpython/_parser.py @@ -83,9 +83,11 @@ Python/deepfreeze/*.c Python/frozen_modules/*.h Python/generated_cases.c.h Python/executor_cases.c.h +Python/tier2_redundancy_eliminator_cases.c.h # not actually source Python/bytecodes.c +Python/tier2_redundancy_eliminator_bytecodes.c # mimalloc Objects/mimalloc/*.c diff --git a/Tools/c-analyzer/cpython/ignored.tsv b/Tools/c-analyzer/cpython/ignored.tsv index c75aff8..14bcd85 100644 --- a/Tools/c-analyzer/cpython/ignored.tsv +++ b/Tools/c-analyzer/cpython/ignored.tsv @@ -734,6 +734,6 @@ Modules/expat/xmlrole.c - error - ## other Modules/_io/_iomodule.c - _PyIO_Module - Modules/_sqlite/module.c - _sqlite3module - -Python/optimizer_analysis.c - _Py_PartitionRootNode_Type - +Python/optimizer_analysis.c - _Py_UOpsAbstractFrame_Type - Python/optimizer_analysis.c - _Py_UOpsAbstractInterpContext_Type - Modules/clinic/md5module.c.h _md5_md5 _keywords - diff --git a/Tools/cases_generator/README.md b/Tools/cases_generator/README.md index 7fec8a8..d35a868 100644 --- a/Tools/cases_generator/README.md +++ b/Tools/cases_generator/README.md @@ -13,6 +13,9 @@ What's currently here: - `parser.py` helper for interactions with `parsing.py` - `tierN_generator.py`: a couple of driver scripts to read `Python/bytecodes.c` and write `Python/generated_cases.c.h` (and several other files) +- `tier2_abstract_generator.py`: reads `Python/bytecodes.c` and + `Python/tier2_redundancy_eliminator_bytecodes.c` and writes + `Python/tier2_redundancy_eliminator_cases.c.h` - `stack.py`: code to handle generalized stack effects - `cwriter.py`: code which understands tokens and how to format C code; main class: `CWriter` diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index b80fa66..3497b7f 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -24,7 +24,6 @@ class Properties: pure: bool passthrough: bool - guard: bool def dump(self, indent: str) -> None: print(indent, end="") @@ -51,7 +50,6 @@ class Properties: has_free=any(p.has_free for p in properties), pure=all(p.pure for p in properties), passthrough=all(p.passthrough for p in properties), - guard=all(p.guard for p in properties), ) @@ -73,7 +71,6 @@ SKIP_PROPERTIES = Properties( has_free=False, pure=False, passthrough=False, - guard=False, ) @@ -273,7 +270,7 @@ def override_error( def convert_stack_item(item: parser.StackEffect) -> StackItem: return StackItem( - item.name, item.type, item.cond, (item.size or "1"), type_prop=item.type_prop + item.name, item.type, item.cond, (item.size or "1") ) @@ -473,7 +470,6 @@ def compute_properties(op: parser.InstDef) -> Properties: has_free=has_free, pure="pure" in op.annotations, passthrough=passthrough, - guard=passthrough and deopts, ) diff --git a/Tools/cases_generator/interpreter_definition.md b/Tools/cases_generator/interpreter_definition.md index e87aff4..9b57335 100644 --- a/Tools/cases_generator/interpreter_definition.md +++ b/Tools/cases_generator/interpreter_definition.md @@ -109,10 +109,7 @@ and a piece of C code describing its semantics:: NAME [":" type] [ "if" "(" C-expression ")" ] type: - NAME ["*"] | type_prop - - type_prop: - "&" "(" NAME ["+" NAME] ")" + NAME ["*"] stream: NAME "/" size @@ -142,26 +139,7 @@ The following definitions may occur: The optional `type` in an `object` is the C type. It defaults to `PyObject *`. The objects before the "--" are the objects on top of the stack at the start of the instruction. Those after the "--" are the objects on top of the stack at the -end of the instruction. When prefixed by a `&`, the `type` production rule follows the -`type_prop` production rule. This indicates the type of the value is of that specific type -after the operation. In this case, the type may also contain 64-bit refinement information -that is fetched from a previously defined operand in the instruction header, such as -a type version tag. This follows the format `type + refinement`. The list of possible types -and their refinements are below. They obey the following predicates: - - -* `PYLONG_TYPE`: `Py_TYPE(val) == &PyLong_Type` -* `PYFLOAT_TYPE`: `Py_TYPE(val) == &PyFloat_Type` -* `PYUNICODE_TYPE`: `Py_TYPE(val) == &PYUNICODE_TYPE` -* `NULL_TYPE`: `val == NULL` -* `GUARD_TYPE_VERSION_TYPE`: `type->tp_version_tag == auxillary` -* `GUARD_DORV_VALUES_TYPE`: `_PyDictOrValues_IsValues(obj)` -* `GUARD_DORV_VALUES_INST_ATTR_FROM_DICT_TYPE`: - `_PyDictOrValues_IsValues(obj) || _PyObject_MakeInstanceAttributesFromDict(obj, dorv)` -* `GUARD_KEYS_VERSION_TYPE`: `owner_heap_type->ht_cached_keys->dk_version == auxillary` -* `PYMETHOD_TYPE`: `Py_TYPE(val) == &PyMethod_Type` -* `PYFUNCTION_TYPE_VERSION_TYPE`: - `PyFunction_Check(callable) && func->func_version == auxillary && code->co_argcount == oparg + (self_or_null != NULL)` +end of the instruction. An `inst` without `stack_effect` is a transitional form to allow the original C code diff --git a/Tools/cases_generator/parsing.py b/Tools/cases_generator/parsing.py index 307919c..a8961f2 100644 --- a/Tools/cases_generator/parsing.py +++ b/Tools/cases_generator/parsing.py @@ -75,11 +75,6 @@ class StackEffect(Node): size: str = "" # Optional `[size]` # Note: size cannot be combined with type or cond - # Optional `(type, refinement)` - type_prop: None | tuple[str, None | str] = field( - default_factory=lambda: None, init=True, compare=False, hash=False - ) - def __repr__(self) -> str: items = [self.name, self.type, self.cond, self.size] while items and items[-1] == "": @@ -260,25 +255,14 @@ class Parser(PLexer): @contextual def stack_effect(self) -> StackEffect | None: - # IDENTIFIER [':' [IDENTIFIER [TIMES]] ['&' '(' IDENTIFIER ['+' IDENTIFIER] ')']] ['if' '(' expression ')'] + # IDENTIFIER [':' IDENTIFIER [TIMES]] ['if' '(' expression ')'] # | IDENTIFIER '[' expression ']' if tkn := self.expect(lx.IDENTIFIER): type_text = "" - type_prop = None if self.expect(lx.COLON): - if i := self.expect(lx.IDENTIFIER): - type_text = i.text.strip() - if self.expect(lx.TIMES): - type_text += " *" - if self.expect(lx.AND): - consumed_bracket = self.expect(lx.LPAREN) is not None - type_prop_text = self.require(lx.IDENTIFIER).text.strip() - refinement = None - if self.expect(lx.PLUS): - refinement = self.require(lx.IDENTIFIER).text.strip() - type_prop = (type_prop_text, refinement) - if consumed_bracket: - self.require(lx.RPAREN) + type_text = self.require(lx.IDENTIFIER).text.strip() + if self.expect(lx.TIMES): + type_text += " *" cond_text = "" if self.expect(lx.IF): self.require(lx.LPAREN) @@ -295,7 +279,7 @@ class Parser(PLexer): self.require(lx.RBRACKET) type_text = "PyObject **" size_text = size.text.strip() - return StackEffect(tkn.text, type_text, cond_text, size_text, type_prop) + return StackEffect(tkn.text, type_text, cond_text, size_text) return None @contextual diff --git a/Tools/cases_generator/stack.py b/Tools/cases_generator/stack.py index f62ece4..97a3011 100644 --- a/Tools/cases_generator/stack.py +++ b/Tools/cases_generator/stack.py @@ -168,11 +168,11 @@ class Stack: self.top_offset.push(var) return "" - def flush(self, out: CWriter) -> None: + def flush(self, out: CWriter, cast_type: str = "PyObject *") -> None: out.start_line() for var in self.variables: if not var.peek: - cast = "(PyObject *)" if var.type else "" + cast = f"({cast_type})" if var.type else "" if var.name not in UNUSED and not var.is_array(): if var.condition: out.emit(f"if ({var.condition}) ") diff --git a/Tools/cases_generator/tier2_abstract_generator.py b/Tools/cases_generator/tier2_abstract_generator.py new file mode 100644 index 0000000..cc29b16 --- /dev/null +++ b/Tools/cases_generator/tier2_abstract_generator.py @@ -0,0 +1,235 @@ +"""Generate the cases for the tier 2 redundancy eliminator/abstract interpreter. +Reads the instruction definitions from bytecodes.c. and tier2_redundancy_eliminator.bytecodes.c +Writes the cases to tier2_redundancy_eliminator_cases.c.h, which is #included in Python/optimizer_analysis.c. +""" + +import argparse +import os.path +import sys + +from analyzer import ( + Analysis, + Instruction, + Uop, + Part, + analyze_files, + Skip, + StackItem, + analysis_error, +) +from generators_common import ( + DEFAULT_INPUT, + ROOT, + write_header, + emit_tokens, + emit_to, + replace_sync_sp, +) +from cwriter import CWriter +from typing import TextIO, Iterator +from lexer import Token +from stack import StackOffset, Stack, SizeMismatch, UNUSED + +DEFAULT_OUTPUT = ROOT / "Python/tier2_redundancy_eliminator_cases.c.h" +DEFAULT_ABSTRACT_INPUT = ROOT / "Python/tier2_redundancy_eliminator_bytecodes.c" + + +def validate_uop(override: Uop, uop: Uop) -> None: + # To do + pass + + +def type_name(var: StackItem) -> str: + if var.is_array(): + return f"_Py_UOpsSymType **" + if var.type: + return var.type + return f"_Py_UOpsSymType *" + + +def declare_variables(uop: Uop, out: CWriter, skip_inputs: bool) -> None: + variables = {"unused"} + if not skip_inputs: + for var in reversed(uop.stack.inputs): + if var.name not in variables: + variables.add(var.name) + if var.condition: + out.emit(f"{type_name(var)}{var.name} = NULL;\n") + else: + out.emit(f"{type_name(var)}{var.name};\n") + for var in uop.stack.outputs: + if var.peek: + continue + if var.name not in variables: + variables.add(var.name) + if var.condition: + out.emit(f"{type_name(var)}{var.name} = NULL;\n") + else: + out.emit(f"{type_name(var)}{var.name};\n") + + +def decref_inputs( + out: CWriter, + tkn: Token, + tkn_iter: Iterator[Token], + uop: Uop, + stack: Stack, + inst: Instruction | None, +) -> None: + next(tkn_iter) + next(tkn_iter) + next(tkn_iter) + out.emit_at("", tkn) + + +def emit_default(out: CWriter, uop: Uop) -> None: + for i, var in enumerate(uop.stack.outputs): + if var.name != "unused" and not var.peek: + if var.is_array(): + out.emit(f"for (int _i = {var.size}; --_i >= 0;) {{\n") + out.emit(f"{var.name}[_i] = sym_new_unknown(ctx);\n") + out.emit(f"if ({var.name}[_i] == NULL) goto out_of_space;\n") + out.emit("}\n") + elif var.name == "null": + out.emit(f"{var.name} = sym_new_null(ctx);\n") + out.emit(f"if ({var.name} == NULL) goto out_of_space;\n") + else: + out.emit(f"{var.name} = sym_new_unknown(ctx);\n") + out.emit(f"if ({var.name} == NULL) goto out_of_space;\n") + + +def write_uop( + override: Uop | None, + uop: Uop, + out: CWriter, + stack: Stack, + debug: bool, + skip_inputs: bool, +) -> None: + try: + prototype = override if override else uop + is_override = override is not None + out.start_line() + for var in reversed(prototype.stack.inputs): + res = stack.pop(var) + if not skip_inputs: + out.emit(res) + if not prototype.properties.stores_sp: + for i, var in enumerate(prototype.stack.outputs): + res = stack.push(var) + if not var.peek or is_override: + out.emit(res) + if debug: + args = [] + for var in prototype.stack.inputs: + if not var.peek or is_override: + args.append(var.name) + out.emit(f'DEBUG_PRINTF({", ".join(args)});\n') + if override: + for cache in uop.caches: + if cache.name != "unused": + if cache.size == 4: + type = cast = "PyObject *" + else: + type = f"uint{cache.size*16}_t " + cast = f"uint{cache.size*16}_t" + out.emit(f"{type}{cache.name} = ({cast})this_instr->operand;\n") + if override: + replacement_funcs = { + "DECREF_INPUTS": decref_inputs, + "SYNC_SP": replace_sync_sp, + } + emit_tokens(out, override, stack, None, replacement_funcs) + else: + emit_default(out, uop) + + if prototype.properties.stores_sp: + for i, var in enumerate(prototype.stack.outputs): + if not var.peek or is_override: + out.emit(stack.push(var)) + out.start_line() + stack.flush(out, cast_type="_Py_UOpsSymType *") + except SizeMismatch as ex: + raise analysis_error(ex.args[0], uop.body[0]) + + +SKIPS = ("_EXTENDED_ARG",) + + +def generate_abstract_interpreter( + filenames: list[str], + abstract: Analysis, + base: Analysis, + outfile: TextIO, + debug: bool, +) -> None: + write_header(__file__, filenames, outfile) + out = CWriter(outfile, 2, False) + out.emit("\n") + base_uop_names = set([uop.name for uop in base.uops.values()]) + for abstract_uop_name in abstract.uops: + assert abstract_uop_name in base_uop_names,\ + f"All abstract uops should override base uops, but {abstract_uop_name} is not." + + for uop in base.uops.values(): + override: Uop | None = None + if uop.name in abstract.uops: + override = abstract.uops[uop.name] + validate_uop(override, uop) + if uop.properties.tier_one_only: + continue + if uop.is_super(): + continue + if not uop.is_viable(): + out.emit(f"/* {uop.name} is not a viable micro-op for tier 2 */\n\n") + continue + out.emit(f"case {uop.name}: {{\n") + if override: + declare_variables(override, out, skip_inputs=False) + else: + declare_variables(uop, out, skip_inputs=True) + stack = Stack() + write_uop(override, uop, out, stack, debug, skip_inputs=(override is None)) + out.start_line() + out.emit("break;\n") + out.emit("}") + out.emit("\n\n") + + +def generate_tier2_abstract_from_files( + filenames: list[str], outfilename: str, debug: bool=False +) -> None: + assert len(filenames) == 2, "Need a base file and an abstract cases file." + base = analyze_files([filenames[0]]) + abstract = analyze_files([filenames[1]]) + with open(outfilename, "w") as outfile: + generate_abstract_interpreter(filenames, abstract, base, outfile, debug) + + +arg_parser = argparse.ArgumentParser( + description="Generate the code for the tier 2 interpreter.", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, +) + +arg_parser.add_argument( + "-o", "--output", type=str, help="Generated code", default=DEFAULT_OUTPUT +) + + +arg_parser.add_argument("input", nargs=1, help="Abstract interpreter definition file") + +arg_parser.add_argument( + "base", nargs=argparse.REMAINDER, help="The base instruction definition file(s)" +) + +arg_parser.add_argument("-d", "--debug", help="Insert debug calls", action="store_true") + +if __name__ == "__main__": + args = arg_parser.parse_args() + if len(args.base) == 0: + args.input.append(DEFAULT_INPUT) + args.input.append(DEFAULT_ABSTRACT_INPUT) + abstract = analyze_files(args.input) + base = analyze_files(args.base) + with open(args.output, "w") as outfile: + generate_abstract_interpreter(args.input, abstract, base, outfile, args.debug) |