summaryrefslogtreecommitdiffstats
path: root/Tools
diff options
context:
space:
mode:
authorSam Gross <colesbury@gmail.com>2024-08-07 17:23:53 (GMT)
committerGitHub <noreply@github.com>2024-08-07 17:23:53 (GMT)
commit3e753c689a802d2e6d909cce3e22173977b2edbf (patch)
tree7330f998c15e42dbf12622dffccd773e5c5520dd /Tools
parent967a4f1d180d4cd669d5c6e3ac5ba99af4e72d4e (diff)
downloadcpython-3e753c689a802d2e6d909cce3e22173977b2edbf.zip
cpython-3e753c689a802d2e6d909cce3e22173977b2edbf.tar.gz
cpython-3e753c689a802d2e6d909cce3e22173977b2edbf.tar.bz2
gh-118926: Spill deferred references to stack in cases generator (#122748)
This automatically spills the results from `_PyStackRef_FromPyObjectNew` to the in-memory stack so that the deferred references are visible to the GC before we make any possibly escaping call. Co-authored-by: Ken Jin <kenjin@python.org>
Diffstat (limited to 'Tools')
-rw-r--r--Tools/cases_generator/analyzer.py45
-rw-r--r--Tools/cases_generator/generators_common.py25
-rw-r--r--Tools/cases_generator/lexer.py2
-rw-r--r--Tools/cases_generator/stack.py43
-rw-r--r--Tools/cases_generator/tier1_generator.py24
-rw-r--r--Tools/cases_generator/tier2_generator.py18
6 files changed, 132 insertions, 25 deletions
diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py
index f6625a3..3dc9838 100644
--- a/Tools/cases_generator/analyzer.py
+++ b/Tools/cases_generator/analyzer.py
@@ -157,6 +157,7 @@ class Uop:
annotations: list[str]
stack: StackEffect
caches: list[CacheEntry]
+ deferred_refs: dict[lexer.Token, str | None]
body: list[lexer.Token]
properties: Properties
_size: int = -1
@@ -352,6 +353,47 @@ def analyze_caches(inputs: list[parser.InputEffect]) -> list[CacheEntry]:
return [CacheEntry(i.name, int(i.size)) for i in caches]
+def analyze_deferred_refs(node: parser.InstDef) -> dict[lexer.Token, str | None]:
+ """Look for PyStackRef_FromPyObjectNew() calls"""
+
+ def find_assignment_target(idx: int) -> list[lexer.Token]:
+ """Find the tokens that make up the left-hand side of an assignment"""
+ offset = 1
+ for tkn in reversed(node.block.tokens[:idx-1]):
+ if tkn.kind == "SEMI" or tkn.kind == "LBRACE" or tkn.kind == "RBRACE":
+ return node.block.tokens[idx-offset:idx-1]
+ offset += 1
+ return []
+
+ refs: dict[lexer.Token, str | None] = {}
+ for idx, tkn in enumerate(node.block.tokens):
+ if tkn.kind != "IDENTIFIER" or tkn.text != "PyStackRef_FromPyObjectNew":
+ continue
+
+ if idx == 0 or node.block.tokens[idx-1].kind != "EQUALS":
+ raise analysis_error("Expected '=' before PyStackRef_FromPyObjectNew", tkn)
+
+ lhs = find_assignment_target(idx)
+ if len(lhs) == 0:
+ raise analysis_error("PyStackRef_FromPyObjectNew() must be assigned to an output", tkn)
+
+ if lhs[0].kind == "TIMES" or any(t.kind == "ARROW" or t.kind == "LBRACKET" for t in lhs[1:]):
+ # Don't handle: *ptr = ..., ptr->field = ..., or ptr[field] = ...
+ # Assume that they are visible to the GC.
+ refs[tkn] = None
+ continue
+
+ if len(lhs) != 1 or lhs[0].kind != "IDENTIFIER":
+ raise analysis_error("PyStackRef_FromPyObjectNew() must be assigned to an output", tkn)
+
+ name = lhs[0].text
+ if not any(var.name == name for var in node.outputs):
+ raise analysis_error(f"PyStackRef_FromPyObjectNew() must be assigned to an output, not '{name}'", tkn)
+
+ refs[tkn] = name
+
+ return refs
+
def variable_used(node: parser.InstDef, name: str) -> bool:
"""Determine whether a variable with a given name is used in a node."""
return any(
@@ -632,6 +674,7 @@ def make_uop(name: str, op: parser.InstDef, inputs: list[parser.InputEffect], uo
annotations=op.annotations,
stack=analyze_stack(op),
caches=analyze_caches(inputs),
+ deferred_refs=analyze_deferred_refs(op),
body=op.block.tokens,
properties=compute_properties(op),
)
@@ -649,6 +692,7 @@ def make_uop(name: str, op: parser.InstDef, inputs: list[parser.InputEffect], uo
annotations=op.annotations,
stack=analyze_stack(op, bit),
caches=analyze_caches(inputs),
+ deferred_refs=analyze_deferred_refs(op),
body=op.block.tokens,
properties=properties,
)
@@ -671,6 +715,7 @@ def make_uop(name: str, op: parser.InstDef, inputs: list[parser.InputEffect], uo
annotations=op.annotations,
stack=analyze_stack(op),
caches=analyze_caches(inputs),
+ deferred_refs=analyze_deferred_refs(op),
body=op.block.tokens,
properties=properties,
)
diff --git a/Tools/cases_generator/generators_common.py b/Tools/cases_generator/generators_common.py
index 2a339f8..37060e2 100644
--- a/Tools/cases_generator/generators_common.py
+++ b/Tools/cases_generator/generators_common.py
@@ -6,6 +6,7 @@ from analyzer import (
Uop,
Properties,
StackItem,
+ analysis_error,
)
from cwriter import CWriter
from typing import Callable, Mapping, TextIO, Iterator
@@ -75,6 +76,7 @@ class Emitter:
"DECREF_INPUTS": self.decref_inputs,
"CHECK_EVAL_BREAKER": self.check_eval_breaker,
"SYNC_SP": self.sync_sp,
+ "PyStackRef_FromPyObjectNew": self.py_stack_ref_from_py_object_new,
}
self.out = out
@@ -203,6 +205,29 @@ class Emitter:
if not uop.properties.ends_with_eval_breaker:
self.out.emit_at("CHECK_EVAL_BREAKER();", tkn)
+ def py_stack_ref_from_py_object_new(
+ self,
+ tkn: Token,
+ tkn_iter: Iterator[Token],
+ uop: Uop,
+ stack: Stack,
+ inst: Instruction | None,
+ ) -> None:
+ self.out.emit(tkn)
+ emit_to(self.out, tkn_iter, "SEMI")
+ self.out.emit(";\n")
+
+ target = uop.deferred_refs[tkn]
+ if target is None:
+ # An assignment we don't handle, such as to a pointer or array.
+ return
+
+ # Flush the assignment to the stack. Note that we don't flush the
+ # stack pointer here, and instead are currently relying on initializing
+ # unused portions of the stack to NULL.
+ stack.flush_single_var(self.out, target, uop.stack.outputs)
+
+
def emit_tokens(
self,
uop: Uop,
diff --git a/Tools/cases_generator/lexer.py b/Tools/cases_generator/lexer.py
index 13aee94..d583159 100644
--- a/Tools/cases_generator/lexer.py
+++ b/Tools/cases_generator/lexer.py
@@ -242,7 +242,7 @@ def make_syntax_error(
return SyntaxError(message, (filename, line, column, line_text))
-@dataclass(slots=True)
+@dataclass(slots=True, frozen=True)
class Token:
filename: str
kind: str
diff --git a/Tools/cases_generator/stack.py b/Tools/cases_generator/stack.py
index d2d598a..b44e48a 100644
--- a/Tools/cases_generator/stack.py
+++ b/Tools/cases_generator/stack.py
@@ -257,20 +257,25 @@ class Stack:
return "\n".join(res)
@staticmethod
+ def _do_emit(out: CWriter, var: StackItem, base_offset: StackOffset,
+ cast_type: str = "uintptr_t", extract_bits: bool = False) -> None:
+ cast = f"({cast_type})" if var.type else ""
+ bits = ".bits" if cast and not extract_bits else ""
+ if var.condition == "0":
+ return
+ if var.condition and var.condition != "1":
+ out.emit(f"if ({var.condition}) ")
+ out.emit(
+ f"stack_pointer[{base_offset.to_c()}]{bits} = {cast}{var.name};\n"
+ )
+
+ @staticmethod
def _do_flush(out: CWriter, variables: list[Local], base_offset: StackOffset, top_offset: StackOffset,
cast_type: str = "uintptr_t", extract_bits: bool = False) -> None:
out.start_line()
for var in variables:
if var.cached and not var.in_memory and not var.item.peek and not var.name in UNUSED:
- cast = f"({cast_type})" if var.item.type else ""
- bits = ".bits" if cast and not extract_bits else ""
- if var.condition == "0":
- continue
- if var.condition and var.condition != "1":
- out.emit(f"if ({var.condition}) ")
- out.emit(
- f"stack_pointer[{base_offset.to_c()}]{bits} = {cast}{var.name};\n"
- )
+ Stack._do_emit(out, var.item, base_offset, cast_type, extract_bits)
base_offset.push(var.item)
if base_offset.to_c() != top_offset.to_c():
print("base", base_offset, "top", top_offset)
@@ -290,6 +295,26 @@ class Stack:
self.base_offset.clear()
self.top_offset.clear()
+ def flush_single_var(self, out: CWriter, var_name: str, outputs: list[StackItem],
+ cast_type: str = "uintptr_t", extract_bits: bool = False) -> None:
+ assert any(var.name == var_name for var in outputs)
+ base_offset = self.base_offset.copy()
+ top_offset = self.top_offset.copy()
+ for var in self.variables:
+ base_offset.push(var.item)
+ for var in outputs:
+ if any(var == v.item for v in self.variables):
+ # The variable is already on the stack, such as a peeked value
+ # in the tier1 generator
+ continue
+ if var.name == var_name:
+ Stack._do_emit(out, var, base_offset, cast_type, extract_bits)
+ base_offset.push(var)
+ top_offset.push(var)
+ if base_offset.to_c() != top_offset.to_c():
+ print("base", base_offset, "top", top_offset)
+ assert False
+
def peek_offset(self) -> str:
return self.top_offset.to_c()
diff --git a/Tools/cases_generator/tier1_generator.py b/Tools/cases_generator/tier1_generator.py
index 6c13d1f..1ea31a0 100644
--- a/Tools/cases_generator/tier1_generator.py
+++ b/Tools/cases_generator/tier1_generator.py
@@ -93,6 +93,16 @@ def write_uop(
if braces:
emitter.emit("{\n")
emitter.out.emit(stack.define_output_arrays(uop.stack.outputs))
+ outputs: list[Local] = []
+ for var in uop.stack.outputs:
+ if not var.peek:
+ if var.name in locals:
+ local = locals[var.name]
+ elif var.name == "unused":
+ local = Local.unused(var)
+ else:
+ local = Local.local(var)
+ outputs.append(local)
for cache in uop.caches:
if cache.name != "unused":
@@ -109,15 +119,11 @@ def write_uop(
emitter.emit(f"(void){cache.name};\n")
offset += cache.size
emitter.emit_tokens(uop, stack, inst)
- for i, var in enumerate(uop.stack.outputs):
- if not var.peek:
- if var.name in locals:
- local = locals[var.name]
- elif var.name == "unused":
- local = Local.unused(var)
- else:
- local = Local.local(var)
- emitter.emit(stack.push(local))
+ for output in outputs:
+ if output.name in uop.deferred_refs.values():
+ # We've already spilled this when emitting tokens
+ output.cached = False
+ emitter.emit(stack.push(output))
if braces:
emitter.out.start_line()
emitter.emit("}\n")
diff --git a/Tools/cases_generator/tier2_generator.py b/Tools/cases_generator/tier2_generator.py
index 8c212f7..461375c 100644
--- a/Tools/cases_generator/tier2_generator.py
+++ b/Tools/cases_generator/tier2_generator.py
@@ -166,6 +166,13 @@ def write_uop(uop: Uop, emitter: Emitter, stack: Stack) -> None:
if local.defined:
locals[local.name] = local
emitter.emit(stack.define_output_arrays(uop.stack.outputs))
+ outputs: list[Local] = []
+ for var in uop.stack.outputs:
+ if var.name in locals:
+ local = locals[var.name]
+ else:
+ local = Local.local(var)
+ outputs.append(local)
for cache in uop.caches:
if cache.name != "unused":
if cache.size == 4:
@@ -175,12 +182,11 @@ def write_uop(uop: Uop, emitter: Emitter, stack: Stack) -> None:
cast = f"uint{cache.size*16}_t"
emitter.emit(f"{type}{cache.name} = ({cast})CURRENT_OPERAND();\n")
emitter.emit_tokens(uop, stack, None)
- for i, var in enumerate(uop.stack.outputs):
- if var.name in locals:
- local = locals[var.name]
- else:
- local = Local.local(var)
- emitter.emit(stack.push(local))
+ for output in outputs:
+ if output.name in uop.deferred_refs.values():
+ # We've already spilled this when emitting tokens
+ output.cached = False
+ emitter.emit(stack.push(output))
except StackError as ex:
raise analysis_error(ex.args[0], uop.body[0]) from None