diff options
author | Sam Gross <colesbury@gmail.com> | 2024-08-07 17:23:53 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-08-07 17:23:53 (GMT) |
commit | 3e753c689a802d2e6d909cce3e22173977b2edbf (patch) | |
tree | 7330f998c15e42dbf12622dffccd773e5c5520dd /Tools | |
parent | 967a4f1d180d4cd669d5c6e3ac5ba99af4e72d4e (diff) | |
download | cpython-3e753c689a802d2e6d909cce3e22173977b2edbf.zip cpython-3e753c689a802d2e6d909cce3e22173977b2edbf.tar.gz cpython-3e753c689a802d2e6d909cce3e22173977b2edbf.tar.bz2 |
gh-118926: Spill deferred references to stack in cases generator (#122748)
This automatically spills the results from `_PyStackRef_FromPyObjectNew`
to the in-memory stack so that the deferred references are visible to
the GC before we make any possibly escaping call.
Co-authored-by: Ken Jin <kenjin@python.org>
Diffstat (limited to 'Tools')
-rw-r--r-- | Tools/cases_generator/analyzer.py | 45 | ||||
-rw-r--r-- | Tools/cases_generator/generators_common.py | 25 | ||||
-rw-r--r-- | Tools/cases_generator/lexer.py | 2 | ||||
-rw-r--r-- | Tools/cases_generator/stack.py | 43 | ||||
-rw-r--r-- | Tools/cases_generator/tier1_generator.py | 24 | ||||
-rw-r--r-- | Tools/cases_generator/tier2_generator.py | 18 |
6 files changed, 132 insertions, 25 deletions
diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index f6625a3..3dc9838 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -157,6 +157,7 @@ class Uop: annotations: list[str] stack: StackEffect caches: list[CacheEntry] + deferred_refs: dict[lexer.Token, str | None] body: list[lexer.Token] properties: Properties _size: int = -1 @@ -352,6 +353,47 @@ def analyze_caches(inputs: list[parser.InputEffect]) -> list[CacheEntry]: return [CacheEntry(i.name, int(i.size)) for i in caches] +def analyze_deferred_refs(node: parser.InstDef) -> dict[lexer.Token, str | None]: + """Look for PyStackRef_FromPyObjectNew() calls""" + + def find_assignment_target(idx: int) -> list[lexer.Token]: + """Find the tokens that make up the left-hand side of an assignment""" + offset = 1 + for tkn in reversed(node.block.tokens[:idx-1]): + if tkn.kind == "SEMI" or tkn.kind == "LBRACE" or tkn.kind == "RBRACE": + return node.block.tokens[idx-offset:idx-1] + offset += 1 + return [] + + refs: dict[lexer.Token, str | None] = {} + for idx, tkn in enumerate(node.block.tokens): + if tkn.kind != "IDENTIFIER" or tkn.text != "PyStackRef_FromPyObjectNew": + continue + + if idx == 0 or node.block.tokens[idx-1].kind != "EQUALS": + raise analysis_error("Expected '=' before PyStackRef_FromPyObjectNew", tkn) + + lhs = find_assignment_target(idx) + if len(lhs) == 0: + raise analysis_error("PyStackRef_FromPyObjectNew() must be assigned to an output", tkn) + + if lhs[0].kind == "TIMES" or any(t.kind == "ARROW" or t.kind == "LBRACKET" for t in lhs[1:]): + # Don't handle: *ptr = ..., ptr->field = ..., or ptr[field] = ... + # Assume that they are visible to the GC. + refs[tkn] = None + continue + + if len(lhs) != 1 or lhs[0].kind != "IDENTIFIER": + raise analysis_error("PyStackRef_FromPyObjectNew() must be assigned to an output", tkn) + + name = lhs[0].text + if not any(var.name == name for var in node.outputs): + raise analysis_error(f"PyStackRef_FromPyObjectNew() must be assigned to an output, not '{name}'", tkn) + + refs[tkn] = name + + return refs + def variable_used(node: parser.InstDef, name: str) -> bool: """Determine whether a variable with a given name is used in a node.""" return any( @@ -632,6 +674,7 @@ def make_uop(name: str, op: parser.InstDef, inputs: list[parser.InputEffect], uo annotations=op.annotations, stack=analyze_stack(op), caches=analyze_caches(inputs), + deferred_refs=analyze_deferred_refs(op), body=op.block.tokens, properties=compute_properties(op), ) @@ -649,6 +692,7 @@ def make_uop(name: str, op: parser.InstDef, inputs: list[parser.InputEffect], uo annotations=op.annotations, stack=analyze_stack(op, bit), caches=analyze_caches(inputs), + deferred_refs=analyze_deferred_refs(op), body=op.block.tokens, properties=properties, ) @@ -671,6 +715,7 @@ def make_uop(name: str, op: parser.InstDef, inputs: list[parser.InputEffect], uo annotations=op.annotations, stack=analyze_stack(op), caches=analyze_caches(inputs), + deferred_refs=analyze_deferred_refs(op), body=op.block.tokens, properties=properties, ) diff --git a/Tools/cases_generator/generators_common.py b/Tools/cases_generator/generators_common.py index 2a339f8..37060e2 100644 --- a/Tools/cases_generator/generators_common.py +++ b/Tools/cases_generator/generators_common.py @@ -6,6 +6,7 @@ from analyzer import ( Uop, Properties, StackItem, + analysis_error, ) from cwriter import CWriter from typing import Callable, Mapping, TextIO, Iterator @@ -75,6 +76,7 @@ class Emitter: "DECREF_INPUTS": self.decref_inputs, "CHECK_EVAL_BREAKER": self.check_eval_breaker, "SYNC_SP": self.sync_sp, + "PyStackRef_FromPyObjectNew": self.py_stack_ref_from_py_object_new, } self.out = out @@ -203,6 +205,29 @@ class Emitter: if not uop.properties.ends_with_eval_breaker: self.out.emit_at("CHECK_EVAL_BREAKER();", tkn) + def py_stack_ref_from_py_object_new( + self, + tkn: Token, + tkn_iter: Iterator[Token], + uop: Uop, + stack: Stack, + inst: Instruction | None, + ) -> None: + self.out.emit(tkn) + emit_to(self.out, tkn_iter, "SEMI") + self.out.emit(";\n") + + target = uop.deferred_refs[tkn] + if target is None: + # An assignment we don't handle, such as to a pointer or array. + return + + # Flush the assignment to the stack. Note that we don't flush the + # stack pointer here, and instead are currently relying on initializing + # unused portions of the stack to NULL. + stack.flush_single_var(self.out, target, uop.stack.outputs) + + def emit_tokens( self, uop: Uop, diff --git a/Tools/cases_generator/lexer.py b/Tools/cases_generator/lexer.py index 13aee94..d583159 100644 --- a/Tools/cases_generator/lexer.py +++ b/Tools/cases_generator/lexer.py @@ -242,7 +242,7 @@ def make_syntax_error( return SyntaxError(message, (filename, line, column, line_text)) -@dataclass(slots=True) +@dataclass(slots=True, frozen=True) class Token: filename: str kind: str diff --git a/Tools/cases_generator/stack.py b/Tools/cases_generator/stack.py index d2d598a..b44e48a 100644 --- a/Tools/cases_generator/stack.py +++ b/Tools/cases_generator/stack.py @@ -257,20 +257,25 @@ class Stack: return "\n".join(res) @staticmethod + def _do_emit(out: CWriter, var: StackItem, base_offset: StackOffset, + cast_type: str = "uintptr_t", extract_bits: bool = False) -> None: + cast = f"({cast_type})" if var.type else "" + bits = ".bits" if cast and not extract_bits else "" + if var.condition == "0": + return + if var.condition and var.condition != "1": + out.emit(f"if ({var.condition}) ") + out.emit( + f"stack_pointer[{base_offset.to_c()}]{bits} = {cast}{var.name};\n" + ) + + @staticmethod def _do_flush(out: CWriter, variables: list[Local], base_offset: StackOffset, top_offset: StackOffset, cast_type: str = "uintptr_t", extract_bits: bool = False) -> None: out.start_line() for var in variables: if var.cached and not var.in_memory and not var.item.peek and not var.name in UNUSED: - cast = f"({cast_type})" if var.item.type else "" - bits = ".bits" if cast and not extract_bits else "" - if var.condition == "0": - continue - if var.condition and var.condition != "1": - out.emit(f"if ({var.condition}) ") - out.emit( - f"stack_pointer[{base_offset.to_c()}]{bits} = {cast}{var.name};\n" - ) + Stack._do_emit(out, var.item, base_offset, cast_type, extract_bits) base_offset.push(var.item) if base_offset.to_c() != top_offset.to_c(): print("base", base_offset, "top", top_offset) @@ -290,6 +295,26 @@ class Stack: self.base_offset.clear() self.top_offset.clear() + def flush_single_var(self, out: CWriter, var_name: str, outputs: list[StackItem], + cast_type: str = "uintptr_t", extract_bits: bool = False) -> None: + assert any(var.name == var_name for var in outputs) + base_offset = self.base_offset.copy() + top_offset = self.top_offset.copy() + for var in self.variables: + base_offset.push(var.item) + for var in outputs: + if any(var == v.item for v in self.variables): + # The variable is already on the stack, such as a peeked value + # in the tier1 generator + continue + if var.name == var_name: + Stack._do_emit(out, var, base_offset, cast_type, extract_bits) + base_offset.push(var) + top_offset.push(var) + if base_offset.to_c() != top_offset.to_c(): + print("base", base_offset, "top", top_offset) + assert False + def peek_offset(self) -> str: return self.top_offset.to_c() diff --git a/Tools/cases_generator/tier1_generator.py b/Tools/cases_generator/tier1_generator.py index 6c13d1f..1ea31a0 100644 --- a/Tools/cases_generator/tier1_generator.py +++ b/Tools/cases_generator/tier1_generator.py @@ -93,6 +93,16 @@ def write_uop( if braces: emitter.emit("{\n") emitter.out.emit(stack.define_output_arrays(uop.stack.outputs)) + outputs: list[Local] = [] + for var in uop.stack.outputs: + if not var.peek: + if var.name in locals: + local = locals[var.name] + elif var.name == "unused": + local = Local.unused(var) + else: + local = Local.local(var) + outputs.append(local) for cache in uop.caches: if cache.name != "unused": @@ -109,15 +119,11 @@ def write_uop( emitter.emit(f"(void){cache.name};\n") offset += cache.size emitter.emit_tokens(uop, stack, inst) - for i, var in enumerate(uop.stack.outputs): - if not var.peek: - if var.name in locals: - local = locals[var.name] - elif var.name == "unused": - local = Local.unused(var) - else: - local = Local.local(var) - emitter.emit(stack.push(local)) + for output in outputs: + if output.name in uop.deferred_refs.values(): + # We've already spilled this when emitting tokens + output.cached = False + emitter.emit(stack.push(output)) if braces: emitter.out.start_line() emitter.emit("}\n") diff --git a/Tools/cases_generator/tier2_generator.py b/Tools/cases_generator/tier2_generator.py index 8c212f7..461375c 100644 --- a/Tools/cases_generator/tier2_generator.py +++ b/Tools/cases_generator/tier2_generator.py @@ -166,6 +166,13 @@ def write_uop(uop: Uop, emitter: Emitter, stack: Stack) -> None: if local.defined: locals[local.name] = local emitter.emit(stack.define_output_arrays(uop.stack.outputs)) + outputs: list[Local] = [] + for var in uop.stack.outputs: + if var.name in locals: + local = locals[var.name] + else: + local = Local.local(var) + outputs.append(local) for cache in uop.caches: if cache.name != "unused": if cache.size == 4: @@ -175,12 +182,11 @@ def write_uop(uop: Uop, emitter: Emitter, stack: Stack) -> None: cast = f"uint{cache.size*16}_t" emitter.emit(f"{type}{cache.name} = ({cast})CURRENT_OPERAND();\n") emitter.emit_tokens(uop, stack, None) - for i, var in enumerate(uop.stack.outputs): - if var.name in locals: - local = locals[var.name] - else: - local = Local.local(var) - emitter.emit(stack.push(local)) + for output in outputs: + if output.name in uop.deferred_refs.values(): + # We've already spilled this when emitting tokens + output.cached = False + emitter.emit(stack.push(output)) except StackError as ex: raise analysis_error(ex.args[0], uop.body[0]) from None |