gh-126612: Include stack effects of uops when computing maximum stack depth (#126894)

author: mpage <mpage@meta.com> 2024-11-26 00:53:49 (GMT)
committer: GitHub <noreply@github.com> 2024-11-26 00:53:49 (GMT)
commit: 193890c1ccab4b398a218c6c8e91831477aa2ebb (patch)
tree: 28dc52defa3f5059b964dbcd3657dec47f88d2dd /Tools
parent: 26ff32b30553e1f7b0cc822835ad2da8890c180c (diff)
download: cpython-193890c1ccab4b398a218c6c8e91831477aa2ebb.zip
cpython-193890c1ccab4b398a218c6c8e91831477aa2ebb.tar.gz
cpython-193890c1ccab4b398a218c6c8e91831477aa2ebb.tar.bz2
2 files changed, 134 insertions, 22 deletions
diff --git a/Tools/cases_generator/opcode_metadata_generator.py b/Tools/cases_generator/opcode_metadata_generator.py
index 2ad7604..1a9849c 100644
--- a/Tools/cases_generator/opcode_metadata_generator.py
+++ b/Tools/cases_generator/opcode_metadata_generator.py
@@ -19,8 +19,9 @@ from generators_common import (
     cflags,
 )
 from cwriter import CWriter
+from dataclasses import dataclass
 from typing import TextIO
-from stack import get_stack_effect
+from stack import Stack, get_stack_effect, get_stack_effects
 
 # Constants used instead of size for macro expansions.
 # Note: 1, 2, 4 must match actual cache entry sizes.
@@ -107,6 +108,101 @@ def generate_stack_effect_functions(analysis: Analysis, out: CWriter) -> None:
     emit_stack_effect_function(out, "popped", sorted(popped_data))
     emit_stack_effect_function(out, "pushed", sorted(pushed_data))
 
+    generate_max_stack_effect_function(analysis, out)
+
+
+def emit_max_stack_effect_function(
+    out: CWriter, effects: list[tuple[str, list[str]]]
+) -> None:
+    out.emit("extern int _PyOpcode_max_stack_effect(int opcode, int oparg, int *effect);\n")
+    out.emit("#ifdef NEED_OPCODE_METADATA\n")
+    out.emit(f"int _PyOpcode_max_stack_effect(int opcode, int oparg, int *effect)  {{\n")
+    out.emit("switch(opcode) {\n")
+    for name, exprs in effects:
+        out.emit(f"case {name}: {{\n")
+        if len(exprs) == 1:
+            out.emit(f"*effect = {exprs[0]};\n")
+        elif len(exprs) == 2:
+            out.emit(f"*effect = Py_MAX({exprs[0]}, {exprs[1]});\n")
+        else:
+            assert len(exprs) > 2
+            out.emit(f"int max_eff = Py_MAX({exprs[0]}, {exprs[1]});\n")
+            for expr in exprs[2:]:
+                out.emit(f"max_eff = Py_MAX(max_eff, {expr});\n")
+            out.emit(f"*effect = max_eff;\n")
+        out.emit(f"return 0;\n")
+        out.emit("}\n")
+    out.emit("default:\n")
+    out.emit("    return -1;\n")
+    out.emit("}\n")
+    out.emit("}\n\n")
+    out.emit("#endif\n\n")
+
+
+@dataclass
+class MaxStackEffectSet:
+    int_effect: int | None
+    cond_effects: set[str]
+
+    def __init__(self) -> None:
+        self.int_effect = None
+        self.cond_effects = set()
+
+    def add(self, stack: Stack) -> None:
+        top_off = stack.top_offset
+        top_off_int = top_off.as_int()
+        if top_off_int is not None:
+            if self.int_effect is None or top_off_int > self.int_effect:
+                self.int_effect = top_off_int
+        else:
+            self.cond_effects.add(top_off.to_c())
+
+    def update(self, other: "MaxStackEffectSet") -> None:
+        if self.int_effect is None:
+            if other.int_effect is not None:
+                self.int_effect = other.int_effect
+        elif other.int_effect is not None:
+            self.int_effect = max(self.int_effect, other.int_effect)
+        self.cond_effects.update(other.cond_effects)
+
+
+def generate_max_stack_effect_function(analysis: Analysis, out: CWriter) -> None:
+    """Generate a function that returns the maximum stack effect of an
+    instruction while it is executing.
+
+    Specialized instructions that are composed of uops may have a greater stack
+    effect during instruction execution than the net stack effect of the
+    instruction if the uops pass values on the stack.
+    """
+    effects: dict[str, MaxStackEffectSet] = {}
+
+    def add(inst: Instruction | PseudoInstruction) -> None:
+        inst_effect = MaxStackEffectSet()
+        for stack in get_stack_effects(inst):
+            inst_effect.add(stack)
+        effects[inst.name] = inst_effect
+
+    # Collect unique stack effects for each instruction
+    for inst in analysis.instructions.values():
+        add(inst)
+    for pseudo in analysis.pseudos.values():
+        add(pseudo)
+
+    # Merge the effects of all specializations in a family into the generic
+    # instruction
+    for family in analysis.families.values():
+        for inst in family.members:
+            effects[family.name].update(effects[inst.name])
+
+    data: list[tuple[str, list[str]]] = []
+    for name, effs in sorted(effects.items(), key=lambda kv: kv[0]):
+        exprs = []
+        if effs.int_effect is not None:
+            exprs.append(str(effs.int_effect))
+        exprs.extend(sorted(effs.cond_effects))
+        data.append((name, exprs))
+    emit_max_stack_effect_function(out, data)
+
 
 def generate_is_pseudo(analysis: Analysis, out: CWriter) -> None:
     """Write the IS_PSEUDO_INSTR macro"""
diff --git a/Tools/cases_generator/stack.py b/Tools/cases_generator/stack.py
index a954bed..286f47d 100644
--- a/Tools/cases_generator/stack.py
+++ b/Tools/cases_generator/stack.py
@@ -1,8 +1,9 @@
 import re
 from analyzer import StackItem, StackEffect, Instruction, Uop, PseudoInstruction
+from collections import defaultdict
 from dataclasses import dataclass
 from cwriter import CWriter
-from typing import Iterator
+from typing import Iterator, Tuple
 
 UNUSED = {"unused"}
 
@@ -385,31 +386,46 @@ class Stack:
         self.align(other, out)
 
 
+def stacks(inst: Instruction | PseudoInstruction) -> Iterator[StackEffect]:
+    if isinstance(inst, Instruction):
+        for uop in inst.parts:
+            if isinstance(uop, Uop):
+                yield uop.stack
+    else:
+        assert isinstance(inst, PseudoInstruction)
+        yield inst.stack
+
+
+def apply_stack_effect(stack: Stack, effect: StackEffect) -> None:
+    locals: dict[str, Local] = {}
+    for var in reversed(effect.inputs):
+        _, local = stack.pop(var)
+        if var.name != "unused":
+            locals[local.name] = local
+    for var in effect.outputs:
+        if var.name in locals:
+            local = locals[var.name]
+        else:
+            local = Local.unused(var)
+        stack.push(local)
+
+
 def get_stack_effect(inst: Instruction | PseudoInstruction) -> Stack:
     stack = Stack()
+    for s in stacks(inst):
+        apply_stack_effect(stack, s)
+    return stack
 
-    def stacks(inst: Instruction | PseudoInstruction) -> Iterator[StackEffect]:
-        if isinstance(inst, Instruction):
-            for uop in inst.parts:
-                if isinstance(uop, Uop):
-                    yield uop.stack
-        else:
-            assert isinstance(inst, PseudoInstruction)
-            yield inst.stack
 
+def get_stack_effects(inst: Instruction | PseudoInstruction) -> list[Stack]:
+    """Returns a list of stack effects after each uop"""
+    result = []
+    stack = Stack()
     for s in stacks(inst):
-        locals: dict[str, Local] = {}
-        for var in reversed(s.inputs):
-            _, local = stack.pop(var)
-            if var.name != "unused":
-                locals[local.name] = local
-        for var in s.outputs:
-            if var.name in locals:
-                local = locals[var.name]
-            else:
-                local = Local.unused(var)
-            stack.push(local)
-    return stack
+        apply_stack_effect(stack, s)
+        result.append(stack.copy())
+    return result
+
 
 @dataclass
 class Storage:
author	mpage <mpage@meta.com>	2024-11-26 00:53:49 (GMT)
committer	GitHub <noreply@github.com>	2024-11-26 00:53:49 (GMT)
commit	193890c1ccab4b398a218c6c8e91831477aa2ebb (patch)
tree	28dc52defa3f5059b964dbcd3657dec47f88d2dd /Tools
parent	26ff32b30553e1f7b0cc822835ad2da8890c180c (diff)
download	cpython-193890c1ccab4b398a218c6c8e91831477aa2ebb.zip cpython-193890c1ccab4b398a218c6c8e91831477aa2ebb.tar.gz cpython-193890c1ccab4b398a218c6c8e91831477aa2ebb.tar.bz2