summaryrefslogtreecommitdiffstats
path: root/Tools
diff options
context:
space:
mode:
Diffstat (limited to 'Tools')
-rw-r--r--Tools/cases_generator/analyzer.py371
-rw-r--r--Tools/cases_generator/cwriter.py5
-rw-r--r--Tools/cases_generator/generators_common.py385
-rw-r--r--Tools/cases_generator/lexer.py5
-rw-r--r--Tools/cases_generator/optimizer_generator.py78
-rw-r--r--Tools/cases_generator/stack.py433
-rw-r--r--Tools/cases_generator/tier1_generator.py54
-rw-r--r--Tools/cases_generator/tier2_generator.py90
8 files changed, 1040 insertions, 381 deletions
diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py
index a4ce207..9c2981a 100644
--- a/Tools/cases_generator/analyzer.py
+++ b/Tools/cases_generator/analyzer.py
@@ -1,13 +1,13 @@
-from dataclasses import dataclass
+from dataclasses import dataclass, field
+import itertools
import lexer
import parser
import re
from typing import Optional
-
@dataclass
class Properties:
- escapes: bool
+ escaping_calls: dict[lexer.Token, tuple[lexer.Token, lexer.Token]]
error_with_pop: bool
error_without_pop: bool
deopts: bool
@@ -29,14 +29,21 @@ class Properties:
needs_prev: bool = False
def dump(self, indent: str) -> None:
- print(indent, end="")
- text = ", ".join([f"{key}: {value}" for (key, value) in self.__dict__.items()])
+ simple_properties = self.__dict__.copy()
+ del simple_properties["escaping_calls"]
+ text = "escaping_calls:\n"
+ for tkns in self.escaping_calls.values():
+ text += f"{indent} {tkns}\n"
+ text += ", ".join([f"{key}: {value}" for (key, value) in simple_properties.items()])
print(indent, text, sep="")
@staticmethod
def from_list(properties: list["Properties"]) -> "Properties":
+ escaping_calls: dict[lexer.Token, tuple[lexer.Token, lexer.Token]] = {}
+ for p in properties:
+ escaping_calls.update(p.escaping_calls)
return Properties(
- escapes=any(p.escapes for p in properties),
+ escaping_calls=escaping_calls,
error_with_pop=any(p.error_with_pop for p in properties),
error_without_pop=any(p.error_without_pop for p in properties),
deopts=any(p.deopts for p in properties),
@@ -59,9 +66,12 @@ class Properties:
def infallible(self) -> bool:
return not self.error_with_pop and not self.error_without_pop
+ @property
+ def escapes(self) -> bool:
+ return bool(self.escaping_calls)
SKIP_PROPERTIES = Properties(
- escapes=False,
+ escaping_calls={},
error_with_pop=False,
error_without_pop=False,
deopts=False,
@@ -156,6 +166,7 @@ class Uop:
stack: StackEffect
caches: list[CacheEntry]
deferred_refs: dict[lexer.Token, str | None]
+ output_stores: list[lexer.Token]
body: list[lexer.Token]
properties: Properties
_size: int = -1
@@ -322,11 +333,24 @@ def analyze_stack(
]
# Mark variables with matching names at the base of the stack as "peek"
modified = False
- for input, output in zip(inputs, outputs):
- if input.name == output.name and not modified:
- input.peek = output.peek = True
+ input_names: dict[str, lexer.Token] = { i.name : i.first_token for i in op.inputs if i.name != "unused" }
+ for input, output in itertools.zip_longest(inputs, outputs):
+ if output is None:
+ pass
+ elif input is None:
+ if output.name in input_names:
+ raise analysis_error(
+ f"Reuse of variable '{output.name}' at different stack location",
+ input_names[output.name])
+ elif input.name == output.name:
+ if not modified:
+ input.peek = output.peek = True
else:
modified = True
+ if output.name in input_names:
+ raise analysis_error(
+ f"Reuse of variable '{output.name}' at different stack location",
+ input_names[output.name])
if isinstance(op, parser.InstDef):
output_names = [out.name for out in outputs]
for input in inputs:
@@ -354,21 +378,46 @@ def analyze_caches(inputs: list[parser.InputEffect]) -> list[CacheEntry]:
return [CacheEntry(i.name, int(i.size)) for i in caches]
+def find_assignment_target(node: parser.InstDef, idx: int) -> list[lexer.Token]:
+ """Find the tokens that make up the left-hand side of an assignment"""
+ offset = 0
+ for tkn in reversed(node.block.tokens[: idx]):
+ if tkn.kind in {"SEMI", "LBRACE", "RBRACE"}:
+ return node.block.tokens[idx - offset : idx]
+ offset += 1
+ return []
+
+
+def find_stores_outputs(node: parser.InstDef) -> list[lexer.Token]:
+ res: list[lexer.Token] = []
+ outnames = { out.name for out in node.outputs }
+ innames = { out.name for out in node.inputs }
+ for idx, tkn in enumerate(node.block.tokens):
+ if tkn.kind == "AND":
+ name = node.block.tokens[idx+1]
+ if name.text in outnames:
+ res.append(name)
+ if tkn.kind != "EQUALS":
+ continue
+ lhs = find_assignment_target(node, idx)
+ assert lhs
+ while lhs and lhs[0].kind == "COMMENT":
+ lhs = lhs[1:]
+ if len(lhs) != 1 or lhs[0].kind != "IDENTIFIER":
+ continue
+ name = lhs[0]
+ if name.text in innames:
+ raise analysis_error(f"Cannot assign to input variable '{name.text}'", name)
+ if name.text in outnames:
+ res.append(name)
+ return res
+
def analyze_deferred_refs(node: parser.InstDef) -> dict[lexer.Token, str | None]:
"""Look for PyStackRef_FromPyObjectNew() calls"""
- def find_assignment_target(idx: int) -> list[lexer.Token]:
- """Find the tokens that make up the left-hand side of an assignment"""
- offset = 1
- for tkn in reversed(node.block.tokens[: idx - 1]):
- if tkn.kind == "SEMI" or tkn.kind == "LBRACE" or tkn.kind == "RBRACE":
- return node.block.tokens[idx - offset : idx - 1]
- offset += 1
- return []
-
def in_frame_push(idx: int) -> bool:
for tkn in reversed(node.block.tokens[: idx - 1]):
- if tkn.kind == "SEMI" or tkn.kind == "LBRACE" or tkn.kind == "RBRACE":
+ if tkn.kind in {"SEMI", "LBRACE", "RBRACE"}:
return False
if tkn.kind == "IDENTIFIER" and tkn.text == "_PyFrame_PushUnchecked":
return True
@@ -386,7 +435,7 @@ def analyze_deferred_refs(node: parser.InstDef) -> dict[lexer.Token, str | None]
continue
raise analysis_error("Expected '=' before PyStackRef_FromPyObjectNew", tkn)
- lhs = find_assignment_target(idx)
+ lhs = find_assignment_target(node, idx - 1)
if len(lhs) == 0:
raise analysis_error(
"PyStackRef_FromPyObjectNew() must be assigned to an output", tkn
@@ -406,9 +455,13 @@ def analyze_deferred_refs(node: parser.InstDef) -> dict[lexer.Token, str | None]
)
name = lhs[0].text
- if not any(var.name == name for var in node.outputs):
+ match = (
+ any(var.name == name for var in node.inputs)
+ or any(var.name == name for var in node.outputs)
+ )
+ if not match:
raise analysis_error(
- f"PyStackRef_FromPyObjectNew() must be assigned to an output, not '{name}'",
+ f"PyStackRef_FromPyObjectNew() must be assigned to an input or output, not '{name}'",
tkn,
)
@@ -461,125 +514,203 @@ def has_error_without_pop(op: parser.InstDef) -> bool:
NON_ESCAPING_FUNCTIONS = (
- "PyStackRef_FromPyObjectSteal",
+ "PyCFunction_GET_FLAGS",
+ "PyCFunction_GET_FUNCTION",
+ "PyCFunction_GET_SELF",
+ "PyCell_GetRef",
+ "PyCell_New",
+ "PyCell_SwapTakeRef",
+ "PyExceptionInstance_Class",
+ "PyException_GetCause",
+ "PyException_GetContext",
+ "PyException_GetTraceback",
+ "PyFloat_AS_DOUBLE",
+ "PyFloat_FromDouble",
+ "PyFunction_GET_CODE",
+ "PyFunction_GET_GLOBALS",
+ "PyList_GET_ITEM",
+ "PyList_GET_SIZE",
+ "PyList_SET_ITEM",
+ "PyLong_AsLong",
+ "PyLong_FromLong",
+ "PyLong_FromSsize_t",
+ "PySlice_New",
"PyStackRef_AsPyObjectBorrow",
+ "PyStackRef_AsPyObjectNew",
"PyStackRef_AsPyObjectSteal",
+ "PyStackRef_CLEAR",
"PyStackRef_CLOSE",
"PyStackRef_DUP",
- "PyStackRef_CLEAR",
+ "PyStackRef_False",
+ "PyStackRef_FromPyObjectImmortal",
+ "PyStackRef_FromPyObjectNew",
+ "PyStackRef_FromPyObjectSteal",
+ "PyStackRef_Is",
"PyStackRef_IsNull",
+ "PyStackRef_None",
"PyStackRef_TYPE",
- "PyStackRef_False",
"PyStackRef_True",
- "PyStackRef_None",
- "PyStackRef_Is",
- "PyStackRef_FromPyObjectNew",
- "PyStackRef_AsPyObjectNew",
- "PyStackRef_FromPyObjectImmortal",
- "Py_INCREF",
- "_PyManagedDictPointer_IsValues",
- "_PyObject_GetManagedDict",
- "_PyObject_ManagedDictPointer",
- "_PyObject_InlineValues",
- "_PyDictValues_AddToInsertionOrder",
- "Py_DECREF",
- "Py_XDECREF",
- "_Py_DECREF_SPECIALIZED",
- "DECREF_INPUTS_AND_REUSE_FLOAT",
- "PyUnicode_Append",
- "_PyLong_IsZero",
- "Py_SIZE",
- "Py_TYPE",
- "PyList_GET_ITEM",
- "PyList_SET_ITEM",
"PyTuple_GET_ITEM",
- "PyList_GET_SIZE",
"PyTuple_GET_SIZE",
+ "PyType_HasFeature",
+ "PyUnicode_Append",
+ "PyUnicode_Concat",
+ "PyUnicode_GET_LENGTH",
+ "PyUnicode_READ_CHAR",
"Py_ARRAY_LENGTH",
+ "Py_CLEAR",
+ "Py_DECREF",
+ "Py_FatalError",
+ "Py_INCREF",
+ "Py_IS_TYPE",
+ "Py_NewRef",
+ "Py_REFCNT",
+ "Py_SIZE",
+ "Py_TYPE",
+ "Py_UNREACHABLE",
"Py_Unicode_GET_LENGTH",
- "PyUnicode_READ_CHAR",
- "_Py_SINGLETON",
- "PyUnicode_GET_LENGTH",
- "_PyLong_IsCompact",
- "_PyLong_IsNonNegativeCompact",
+ "Py_XDECREF",
+ "_PyCode_CODE",
+ "_PyDictValues_AddToInsertionOrder",
+ "_PyErr_Occurred",
+ "_PyEval_FrameClearAndPop",
+ "_PyFrame_GetCode",
+ "_PyFrame_IsIncomplete",
+ "_PyFrame_PushUnchecked",
+ "_PyFrame_SetStackPointer",
+ "_PyFrame_StackPush",
+ "_PyFunction_SetVersion",
+ "_PyGen_GetGeneratorFromFrame",
+ "_PyInterpreterState_GET",
+ "_PyList_AppendTakeRef",
+ "_PyList_FromStackRefSteal",
+ "_PyList_ITEMS",
+ "_PyLong_Add",
"_PyLong_CompactValue",
"_PyLong_DigitCount",
- "_Py_NewRef",
- "_Py_IsImmortal",
- "PyLong_FromLong",
- "_Py_STR",
- "_PyLong_Add",
+ "_PyLong_IsCompact",
+ "_PyLong_IsNonNegativeCompact",
+ "_PyLong_IsZero",
"_PyLong_Multiply",
"_PyLong_Subtract",
- "Py_NewRef",
- "_PyList_ITEMS",
- "_PyTuple_ITEMS",
- "_PyList_AppendTakeRef",
- "_Py_atomic_load_uintptr_relaxed",
- "_PyFrame_GetCode",
+ "_PyManagedDictPointer_IsValues",
+ "_PyObject_GC_IS_TRACKED",
+ "_PyObject_GC_MAY_BE_TRACKED",
+ "_PyObject_GC_TRACK",
+ "_PyObject_GetManagedDict",
+ "_PyObject_InlineValues",
+ "_PyObject_ManagedDictPointer",
"_PyThreadState_HasStackSpace",
- "_PyUnicode_Equal",
- "_PyFrame_SetStackPointer",
+ "_PyTuple_FromArraySteal",
+ "_PyTuple_FromStackRefSteal",
+ "_PyTuple_ITEMS",
"_PyType_HasFeature",
- "PyUnicode_Concat",
- "PySlice_New",
+ "_PyType_NewManagedObject",
+ "_PyUnicode_Equal",
+ "_PyUnicode_JoinArray",
+ "_Py_CHECK_EMSCRIPTEN_SIGNALS_PERIODICALLY",
+ "_Py_DECREF_NO_DEALLOC",
+ "_Py_DECREF_SPECIALIZED",
+ "_Py_EnterRecursiveCallTstateUnchecked",
+ "_Py_ID",
+ "_Py_IsImmortal",
+ "_Py_IsImmortalLoose",
"_Py_LeaveRecursiveCallPy",
- "CALL_STAT_INC",
- "STAT_INC",
+ "_Py_LeaveRecursiveCallTstate",
+ "_Py_NewRef",
+ "_Py_SINGLETON",
+ "_Py_STR",
+ "_Py_atomic_load_uintptr_relaxed",
+ "_Py_set_eval_breaker_bit",
+ "advance_backoff_counter",
+ "assert",
+ "backoff_counter_triggers",
+ "initial_temperature_backoff_counter",
"maybe_lltrace_resume_frame",
- "_PyUnicode_JoinArray",
- "_PyEval_FrameClearAndPop",
- "_PyFrame_StackPush",
- "PyCell_New",
- "PyFloat_AS_DOUBLE",
- "_PyFrame_PushUnchecked",
- "Py_FatalError",
- "STACKREFS_TO_PYOBJECTS",
- "STACKREFS_TO_PYOBJECTS_CLEANUP",
- "CONVERSION_FAILED",
- "_PyList_FromStackRefSteal",
- "_PyTuple_FromArraySteal",
- "_PyTuple_FromStackRefSteal",
- "_Py_set_eval_breaker_bit"
+ "restart_backoff_counter",
)
-ESCAPING_FUNCTIONS = (
- "import_name",
- "import_from",
-)
-
-
-def makes_escaping_api_call(instr: parser.InstDef) -> bool:
- if "CALL_INTRINSIC" in instr.name:
- return True
- if instr.name == "_BINARY_OP":
- return True
- tkns = iter(instr.tokens)
- for tkn in tkns:
- if tkn.kind != lexer.IDENTIFIER:
- continue
+def find_stmt_start(node: parser.InstDef, idx: int) -> lexer.Token:
+ assert idx < len(node.block.tokens)
+ while True:
+ tkn = node.block.tokens[idx-1]
+ if tkn.kind in {"SEMI", "LBRACE", "RBRACE"}:
+ break
+ idx -= 1
+ assert idx > 0
+ while node.block.tokens[idx].kind == "COMMENT":
+ idx += 1
+ return node.block.tokens[idx]
+
+
+def find_stmt_end(node: parser.InstDef, idx: int) -> lexer.Token:
+ assert idx < len(node.block.tokens)
+ while True:
+ idx += 1
+ tkn = node.block.tokens[idx]
+ if tkn.kind == "SEMI":
+ return node.block.tokens[idx+1]
+
+def check_escaping_calls(instr: parser.InstDef, escapes: dict[lexer.Token, tuple[lexer.Token, lexer.Token]]) -> None:
+ calls = {escapes[t][0] for t in escapes}
+ in_if = 0
+ tkn_iter = iter(instr.block.tokens)
+ for tkn in tkn_iter:
+ if tkn.kind == "IF":
+ next(tkn_iter)
+ in_if = 1
+ if tkn.kind == "IDENTIFIER" and tkn.text in ("DEOPT_IF", "ERROR_IF"):
+ next(tkn_iter)
+ in_if = 1
+ elif tkn.kind == "LPAREN" and in_if:
+ in_if += 1
+ elif tkn.kind == "RPAREN":
+ if in_if:
+ in_if -= 1
+ elif tkn in calls and in_if:
+ raise analysis_error(f"Escaping call '{tkn.text} in condition", tkn)
+
+def find_escaping_api_calls(instr: parser.InstDef) -> dict[lexer.Token, tuple[lexer.Token, lexer.Token]]:
+ result: dict[lexer.Token, tuple[lexer.Token, lexer.Token]] = {}
+ tokens = instr.block.tokens
+ for idx, tkn in enumerate(tokens):
try:
- next_tkn = next(tkns)
- except StopIteration:
- return False
+ next_tkn = tokens[idx+1]
+ except IndexError:
+ break
+ if tkn.kind == "SWITCH":
+ raise analysis_error(f"switch statements are not supported due to their complex flow control. Sorry.", tkn)
if next_tkn.kind != lexer.LPAREN:
continue
- if tkn.text in ESCAPING_FUNCTIONS:
- return True
- if tkn.text == "tp_vectorcall":
- return True
- if not tkn.text.startswith("Py") and not tkn.text.startswith("_Py"):
- continue
- if tkn.text.endswith("Check"):
- continue
- if tkn.text.startswith("Py_Is"):
- continue
- if tkn.text.endswith("CheckExact"):
- continue
- if tkn.text in NON_ESCAPING_FUNCTIONS:
+ if tkn.kind == lexer.IDENTIFIER:
+ if tkn.text.upper() == tkn.text:
+ # simple macro
+ continue
+ #if not tkn.text.startswith(("Py", "_Py", "monitor")):
+ # continue
+ if tkn.text.startswith(("sym_", "optimize_")):
+ # Optimize functions
+ continue
+ if tkn.text.endswith("Check"):
+ continue
+ if tkn.text.startswith("Py_Is"):
+ continue
+ if tkn.text.endswith("CheckExact"):
+ continue
+ if tkn.text in NON_ESCAPING_FUNCTIONS:
+ continue
+ elif tkn.kind == "RPAREN":
+ prev = tokens[idx-1]
+ if prev.text.endswith("_t") or prev.text == "*" or prev.text == "int":
+ #cast
+ continue
+ elif tkn.kind != "RBRACKET":
continue
- return True
- return False
+ start = find_stmt_start(instr, idx)
+ end = find_stmt_end(instr, idx)
+ result[start] = tkn, end
+ check_escaping_calls(instr, result)
+ return result
EXITS = {
@@ -651,6 +782,7 @@ def effect_depends_on_oparg_1(op: parser.InstDef) -> bool:
def compute_properties(op: parser.InstDef) -> Properties:
+ escaping_calls = find_escaping_api_calls(op)
has_free = (
variable_used(op, "PyCell_New")
or variable_used(op, "PyCell_GetRef")
@@ -671,7 +803,7 @@ def compute_properties(op: parser.InstDef) -> Properties:
error_with_pop = has_error_with_pop(op)
error_without_pop = has_error_without_pop(op)
return Properties(
- escapes=makes_escaping_api_call(op),
+ escaping_calls=escaping_calls,
error_with_pop=error_with_pop,
error_without_pop=error_without_pop,
deopts=deopts_if,
@@ -706,6 +838,7 @@ def make_uop(
stack=analyze_stack(op),
caches=analyze_caches(inputs),
deferred_refs=analyze_deferred_refs(op),
+ output_stores=find_stores_outputs(op),
body=op.block.tokens,
properties=compute_properties(op),
)
@@ -726,6 +859,7 @@ def make_uop(
stack=analyze_stack(op, bit),
caches=analyze_caches(inputs),
deferred_refs=analyze_deferred_refs(op),
+ output_stores=find_stores_outputs(op),
body=op.block.tokens,
properties=properties,
)
@@ -749,6 +883,7 @@ def make_uop(
stack=analyze_stack(op),
caches=analyze_caches(inputs),
deferred_refs=analyze_deferred_refs(op),
+ output_stores=find_stores_outputs(op),
body=op.block.tokens,
properties=properties,
)
diff --git a/Tools/cases_generator/cwriter.py b/Tools/cases_generator/cwriter.py
index 069f017..8cba912 100644
--- a/Tools/cases_generator/cwriter.py
+++ b/Tools/cases_generator/cwriter.py
@@ -18,8 +18,9 @@ class CWriter:
def set_position(self, tkn: Token) -> None:
if self.last_token is not None:
- if self.last_token.line < tkn.line:
+ if self.last_token.end_line < tkn.line:
self.out.write("\n")
+ if self.last_token.line < tkn.line:
if self.line_directives:
self.out.write(f'#line {tkn.line} "{tkn.filename}"\n')
self.out.write(" " * self.indents[-1])
@@ -91,6 +92,8 @@ class CWriter:
self.maybe_dedent(tkn.text)
self.set_position(tkn)
self.emit_text(tkn.text)
+ if tkn.kind == "CMACRO":
+ self.newline = True
self.maybe_indent(tkn.text)
def emit_str(self, txt: str) -> None:
diff --git a/Tools/cases_generator/generators_common.py b/Tools/cases_generator/generators_common.py
index 4cfd4ad..f32a20b 100644
--- a/Tools/cases_generator/generators_common.py
+++ b/Tools/cases_generator/generators_common.py
@@ -9,10 +9,39 @@ from analyzer import (
analysis_error,
)
from cwriter import CWriter
-from typing import Callable, Mapping, TextIO, Iterator
+from typing import Callable, Mapping, TextIO, Iterator, Iterable
from lexer import Token
-from stack import Stack
+from stack import Stack, Local, Storage, StackError
+# Set this to true for voluminous output showing state of stack and locals
+PRINT_STACKS = False
+
+class TokenIterator:
+
+ look_ahead: Token | None
+ iterator: Iterator[Token]
+
+ def __init__(self, tkns: Iterable[Token]):
+ self.iterator = iter(tkns)
+ self.look_ahead = None
+
+ def __iter__(self) -> "TokenIterator":
+ return self
+
+ def __next__(self) -> Token:
+ if self.look_ahead is None:
+ return next(self.iterator)
+ else:
+ res = self.look_ahead
+ self.look_ahead = None
+ return res
+
+ def peek(self) -> Token | None:
+ if self.look_ahead is None:
+ for tkn in self.iterator:
+ self.look_ahead = tkn
+ break
+ return self.look_ahead
ROOT = Path(__file__).parent.parent.parent
DEFAULT_INPUT = (ROOT / "Python/bytecodes.c").absolute().as_posix()
@@ -47,22 +76,28 @@ def write_header(
)
-def emit_to(out: CWriter, tkn_iter: Iterator[Token], end: str) -> None:
+def emit_to(out: CWriter, tkn_iter: TokenIterator, end: str) -> Token:
parens = 0
for tkn in tkn_iter:
if tkn.kind == end and parens == 0:
- return
+ return tkn
if tkn.kind == "LPAREN":
parens += 1
if tkn.kind == "RPAREN":
parens -= 1
out.emit(tkn)
+ raise analysis_error(f"Expecting {end}. Reached end of file", tkn)
ReplacementFunctionType = Callable[
- [Token, Iterator[Token], Uop, Stack, Instruction | None], None
+ [Token, TokenIterator, Uop, Storage, Instruction | None], bool
]
+def always_true(tkn: Token | None) -> bool:
+ if tkn is None:
+ return False
+ return tkn.text in {"true", "1"}
+
class Emitter:
out: CWriter
@@ -75,21 +110,41 @@ class Emitter:
"ERROR_IF": self.error_if,
"ERROR_NO_POP": self.error_no_pop,
"DECREF_INPUTS": self.decref_inputs,
+ "DEAD": self.kill,
+ "INPUTS_DEAD": self.kill_inputs,
"SYNC_SP": self.sync_sp,
- "PyStackRef_FromPyObjectNew": self.py_stack_ref_from_py_object_new,
+ "SAVE_STACK": self.save_stack,
+ "RELOAD_STACK": self.reload_stack,
+ "PyStackRef_CLOSE": self.stackref_close,
+ "PyStackRef_AsPyObjectSteal": self.stackref_steal,
+ "DISPATCH": self.dispatch
}
self.out = out
+ def dispatch(
+ self,
+ tkn: Token,
+ tkn_iter: TokenIterator,
+ uop: Uop,
+ storage: Storage,
+ inst: Instruction | None,
+ ) -> bool:
+ self.emit(tkn)
+ return False
+
def deopt_if(
self,
tkn: Token,
- tkn_iter: Iterator[Token],
+ tkn_iter: TokenIterator,
uop: Uop,
- unused: Stack,
+ storage: Storage,
inst: Instruction | None,
- ) -> None:
+ ) -> bool:
self.out.emit_at("DEOPT_IF", tkn)
- self.out.emit(next(tkn_iter))
+ lparen = next(tkn_iter)
+ self.emit(lparen)
+ assert lparen.kind == "LPAREN"
+ first_tkn = tkn_iter.peek()
emit_to(self.out, tkn_iter, "RPAREN")
next(tkn_iter) # Semi colon
self.out.emit(", ")
@@ -97,25 +152,30 @@ class Emitter:
assert inst.family is not None
self.out.emit(inst.family.name)
self.out.emit(");\n")
+ return not always_true(first_tkn)
exit_if = deopt_if
def error_if(
self,
tkn: Token,
- tkn_iter: Iterator[Token],
+ tkn_iter: TokenIterator,
uop: Uop,
- stack: Stack,
+ storage: Storage,
inst: Instruction | None,
- ) -> None:
+ ) -> bool:
self.out.emit_at("if ", tkn)
- self.out.emit(next(tkn_iter))
+ lparen = next(tkn_iter)
+ self.emit(lparen)
+ assert lparen.kind == "LPAREN"
+ first_tkn = tkn_iter.peek()
emit_to(self.out, tkn_iter, "COMMA")
label = next(tkn_iter).text
next(tkn_iter) # RPAREN
next(tkn_iter) # Semi colon
self.out.emit(") ")
- c_offset = stack.peek_offset()
+ storage.clear_inputs("at ERROR_IF")
+ c_offset = storage.stack.peek_offset()
try:
offset = -int(c_offset)
except ValueError:
@@ -130,33 +190,35 @@ class Emitter:
self.out.emit(";\n")
else:
self.out.emit("{\n")
- stack.flush_locally(self.out)
+ storage.copy().flush(self.out)
self.out.emit("goto ")
self.out.emit(label)
self.out.emit(";\n")
self.out.emit("}\n")
+ return not always_true(first_tkn)
def error_no_pop(
self,
tkn: Token,
- tkn_iter: Iterator[Token],
+ tkn_iter: TokenIterator,
uop: Uop,
- stack: Stack,
+ storage: Storage,
inst: Instruction | None,
- ) -> None:
+ ) -> bool:
next(tkn_iter) # LPAREN
next(tkn_iter) # RPAREN
next(tkn_iter) # Semi colon
self.out.emit_at("goto error;", tkn)
+ return False
def decref_inputs(
self,
tkn: Token,
- tkn_iter: Iterator[Token],
+ tkn_iter: TokenIterator,
uop: Uop,
- stack: Stack,
+ storage: Storage,
inst: Instruction | None,
- ) -> None:
+ ) -> bool:
next(tkn_iter)
next(tkn_iter)
next(tkn_iter)
@@ -178,59 +240,278 @@ class Emitter:
self.out.emit(f"PyStackRef_XCLOSE({var.name});\n")
else:
self.out.emit(f"PyStackRef_CLOSE({var.name});\n")
+ for input in storage.inputs:
+ input.defined = False
+ return True
+
+ def kill_inputs(
+ self,
+ tkn: Token,
+ tkn_iter: TokenIterator,
+ uop: Uop,
+ storage: Storage,
+ inst: Instruction | None,
+ ) -> bool:
+ next(tkn_iter)
+ next(tkn_iter)
+ next(tkn_iter)
+ for var in storage.inputs:
+ var.defined = False
+ return True
+
+ def kill(
+ self,
+ tkn: Token,
+ tkn_iter: TokenIterator,
+ uop: Uop,
+ storage: Storage,
+ inst: Instruction | None,
+ ) -> bool:
+ next(tkn_iter)
+ name_tkn = next(tkn_iter)
+ name = name_tkn.text
+ next(tkn_iter)
+ next(tkn_iter)
+ for var in storage.inputs:
+ if var.name == name:
+ var.defined = False
+ break
+ else:
+ raise analysis_error(f"'{name}' is not a live input-only variable", name_tkn)
+ return True
+
+ def stackref_close(
+ self,
+ tkn: Token,
+ tkn_iter: TokenIterator,
+ uop: Uop,
+ storage: Storage,
+ inst: Instruction | None,
+ ) -> bool:
+ self.out.emit(tkn)
+ tkn = next(tkn_iter)
+ assert tkn.kind == "LPAREN"
+ self.out.emit(tkn)
+ name = next(tkn_iter)
+ self.out.emit(name)
+ if name.kind == "IDENTIFIER":
+ for var in storage.inputs:
+ if var.name == name.text:
+ var.defined = False
+ rparen = emit_to(self.out, tkn_iter, "RPAREN")
+ self.emit(rparen)
+ return True
+
+ stackref_steal = stackref_close
def sync_sp(
self,
tkn: Token,
- tkn_iter: Iterator[Token],
+ tkn_iter: TokenIterator,
uop: Uop,
- stack: Stack,
+ storage: Storage,
inst: Instruction | None,
- ) -> None:
+ ) -> bool:
next(tkn_iter)
next(tkn_iter)
next(tkn_iter)
- stack.flush(self.out)
+ storage.clear_inputs("when syncing stack")
+ storage.flush(self.out)
+ self._print_storage(storage)
+ return True
+
+ def emit_save(self, storage: Storage) -> None:
+ storage.save(self.out)
+ self._print_storage(storage)
- def py_stack_ref_from_py_object_new(
+ def save_stack(
self,
tkn: Token,
- tkn_iter: Iterator[Token],
+ tkn_iter: TokenIterator,
uop: Uop,
- stack: Stack,
+ storage: Storage,
inst: Instruction | None,
- ) -> None:
- target = uop.deferred_refs[tkn]
- if target is None:
- # An assignment we don't handle, such as to a pointer or array.
- self.out.emit(tkn)
- return
+ ) -> bool:
+ next(tkn_iter)
+ next(tkn_iter)
+ next(tkn_iter)
+ self.emit_save(storage)
+ return True
+
+ def emit_reload(self, storage: Storage) -> None:
+ storage.reload(self.out)
+ self._print_storage(storage)
+
+ def reload_stack(
+ self,
+ tkn: Token,
+ tkn_iter: TokenIterator,
+ uop: Uop,
+ storage: Storage,
+ inst: Instruction | None,
+ ) -> bool:
+ next(tkn_iter)
+ next(tkn_iter)
+ next(tkn_iter)
+ self.emit_reload(storage)
+ return True
+ def _print_storage(self, storage: Storage) -> None:
+ if PRINT_STACKS:
+ self.out.start_line()
+ self.emit(storage.as_comment())
+ self.out.start_line()
+
+ def _emit_if(
+ self,
+ tkn_iter: TokenIterator,
+ uop: Uop,
+ storage: Storage,
+ inst: Instruction | None,
+ ) -> tuple[bool, Token, Storage]:
+ """Returns (reachable?, closing '}', stack)."""
+ tkn = next(tkn_iter)
+ assert tkn.kind == "LPAREN"
self.out.emit(tkn)
- emit_to(self.out, tkn_iter, "SEMI")
- self.out.emit(";\n")
+ rparen = emit_to(self.out, tkn_iter, "RPAREN")
+ self.emit(rparen)
+ if_storage = storage.copy()
+ reachable, rbrace, if_storage = self._emit_block(tkn_iter, uop, if_storage, inst, True)
+ try:
+ maybe_else = tkn_iter.peek()
+ if maybe_else and maybe_else.kind == "ELSE":
+ self._print_storage(storage)
+ self.emit(rbrace)
+ self.emit(next(tkn_iter))
+ maybe_if = tkn_iter.peek()
+ if maybe_if and maybe_if.kind == "IF":
+ #Emit extra braces around the if to get scoping right
+ self.emit(" {\n")
+ self.emit(next(tkn_iter))
+ else_reachable, rbrace, else_storage = self._emit_if(tkn_iter, uop, storage, inst)
+ self.out.start_line()
+ self.emit("}\n")
+ else:
+ else_reachable, rbrace, else_storage = self._emit_block(tkn_iter, uop, storage, inst, True)
+ if not reachable:
+ # Discard the if storage
+ reachable = else_reachable
+ storage = else_storage
+ elif not else_reachable:
+ # Discard the else storage
+ storage = if_storage
+ reachable = True
+ else:
+ if PRINT_STACKS:
+ self.emit("/* Merge */\n")
+ else_storage.merge(if_storage, self.out)
+ storage = else_storage
+ self._print_storage(storage)
+ else:
+ if reachable:
+ if PRINT_STACKS:
+ self.emit("/* Merge */\n")
+ if_storage.merge(storage, self.out)
+ storage = if_storage
+ self._print_storage(storage)
+ else:
+ # Discard the if storage
+ reachable = True
+ except StackError as ex:
+ self._print_storage(if_storage)
+ raise analysis_error(ex.args[0], rbrace) # from None
+ return reachable, rbrace, storage
+
+ def _emit_block(
+ self,
+ tkn_iter: TokenIterator,
+ uop: Uop,
+ storage: Storage,
+ inst: Instruction | None,
+ emit_first_brace: bool
+ ) -> tuple[bool, Token, Storage]:
+ """ Returns (reachable?, closing '}', stack)."""
+ braces = 1
+ out_stores = set(uop.output_stores)
+ tkn = next(tkn_iter)
+ reload: Token | None = None
+ try:
+ reachable = True
+ line : int = -1
+ if tkn.kind != "LBRACE":
+ raise analysis_error(f"PEP 7: expected '{{', found: {tkn.text}", tkn)
+ escaping_calls = uop.properties.escaping_calls
+ if emit_first_brace:
+ self.emit(tkn)
+ self._print_storage(storage)
+ for tkn in tkn_iter:
+ if PRINT_STACKS and tkn.line != line:
+ self.out.start_line()
+ self.emit(storage.as_comment())
+ self.out.start_line()
+ line = tkn.line
+ if tkn in escaping_calls:
+ if tkn != reload:
+ self.emit_save(storage)
+ _, reload = escaping_calls[tkn]
+ elif tkn == reload:
+ self.emit_reload(storage)
+ if tkn.kind == "LBRACE":
+ self.out.emit(tkn)
+ braces += 1
+ elif tkn.kind == "RBRACE":
+ self._print_storage(storage)
+ braces -= 1
+ if braces == 0:
+ return reachable, tkn, storage
+ self.out.emit(tkn)
+ elif tkn.kind == "GOTO":
+ reachable = False;
+ self.out.emit(tkn)
+ elif tkn.kind == "IDENTIFIER":
+ if tkn.text in self._replacers:
+ if not self._replacers[tkn.text](tkn, tkn_iter, uop, storage, inst):
+ reachable = False
+ else:
+ if tkn in out_stores:
+ for out in storage.outputs:
+ if out.name == tkn.text:
+ out.defined = True
+ out.in_memory = False
+ break
+ if tkn.text.startswith("DISPATCH"):
+ self._print_storage(storage)
+ reachable = False
+ self.out.emit(tkn)
+ elif tkn.kind == "IF":
+ self.out.emit(tkn)
+ if_reachable, rbrace, storage = self._emit_if(tkn_iter, uop, storage, inst)
+ if reachable:
+ reachable = if_reachable
+ self.out.emit(rbrace)
+ else:
+ self.out.emit(tkn)
+ except StackError as ex:
+ raise analysis_error(ex.args[0], tkn) from None
+ raise analysis_error("Expecting closing brace. Reached end of file", tkn)
- # Flush the assignment to the stack. Note that we don't flush the
- # stack pointer here, and instead are currently relying on initializing
- # unused portions of the stack to NULL.
- stack.flush_single_var(self.out, target, uop.stack.outputs)
def emit_tokens(
self,
uop: Uop,
- stack: Stack,
+ storage: Storage,
inst: Instruction | None,
- ) -> None:
- tkns = uop.body[1:-1]
- if not tkns:
- return
- tkn_iter = iter(tkns)
+ ) -> Storage:
+ tkn_iter = TokenIterator(uop.body)
self.out.start_line()
- for tkn in tkn_iter:
- if tkn.kind == "IDENTIFIER" and tkn.text in self._replacers:
- self._replacers[tkn.text](tkn, tkn_iter, uop, stack, inst)
- else:
- self.out.emit(tkn)
+ _, rbrace, storage = self._emit_block(tkn_iter, uop, storage, inst, False)
+ try:
+ self._print_storage(storage)
+ storage.push_outputs()
+ self._print_storage(storage)
+ except StackError as ex:
+ raise analysis_error(ex.args[0], rbrace)
+ return storage
def emit(self, txt: str | Token) -> None:
self.out.emit(txt)
diff --git a/Tools/cases_generator/lexer.py b/Tools/cases_generator/lexer.py
index d583159..37f9639 100644
--- a/Tools/cases_generator/lexer.py
+++ b/Tools/cases_generator/lexer.py
@@ -79,7 +79,7 @@ for op in operators:
opmap = {pattern.replace("\\", "") or "\\": op for op, pattern in operators.items()}
# Macros
-macro = r"# *(ifdef|ifndef|undef|define|error|endif|if|else|include|#)"
+macro = r"#.*\n"
CMACRO = "CMACRO"
id_re = r"[a-zA-Z_][0-9a-zA-Z_]*"
@@ -333,6 +333,9 @@ def tokenize(src: str, line: int = 1, filename: str = "") -> Iterator[Token]:
line += newlines
else:
begin = line, start - linestart
+ if kind == CMACRO:
+ linestart = end
+ line += 1
if kind != "\n":
yield Token(
filename, kind, text, begin, (line, start - linestart + len(text))
diff --git a/Tools/cases_generator/optimizer_generator.py b/Tools/cases_generator/optimizer_generator.py
index b74f627..7a1dfe1 100644
--- a/Tools/cases_generator/optimizer_generator.py
+++ b/Tools/cases_generator/optimizer_generator.py
@@ -18,11 +18,12 @@ from generators_common import (
ROOT,
write_header,
Emitter,
+ TokenIterator,
)
from cwriter import CWriter
from typing import TextIO, Iterator
from lexer import Token
-from stack import Local, Stack, StackError
+from stack import Local, Stack, StackError, Storage
DEFAULT_OUTPUT = ROOT / "Python/optimizer_cases.c.h"
DEFAULT_ABSTRACT_INPUT = (ROOT / "Python/optimizer_bytecodes.c").absolute().as_posix()
@@ -45,7 +46,7 @@ def declare_variables(uop: Uop, out: CWriter, skip_inputs: bool) -> None:
variables = {"unused"}
if not skip_inputs:
for var in reversed(uop.stack.inputs):
- if var.name not in variables:
+ if var.used and var.name not in variables:
variables.add(var.name)
if var.condition:
out.emit(f"{type_name(var)}{var.name} = NULL;\n")
@@ -65,7 +66,7 @@ def declare_variables(uop: Uop, out: CWriter, skip_inputs: bool) -> None:
def decref_inputs(
out: CWriter,
tkn: Token,
- tkn_iter: Iterator[Token],
+ tkn_iter: TokenIterator,
uop: Uop,
stack: Stack,
inst: Instruction | None,
@@ -76,13 +77,27 @@ def decref_inputs(
out.emit_at("", tkn)
-def emit_default(out: CWriter, uop: Uop) -> None:
- for i, var in enumerate(uop.stack.outputs):
+def emit_default(out: CWriter, uop: Uop, stack: Stack) -> None:
+ for var in reversed(uop.stack.inputs):
+ stack.pop(var)
+ top_offset = stack.top_offset.copy()
+ for var in uop.stack.outputs:
+ if var.is_array() and not var.peek and not var.name == "unused":
+ c_offset = top_offset.to_c()
+ out.emit(f"{var.name} = &stack_pointer[{c_offset}];\n")
+ top_offset.push(var)
+ for var in uop.stack.outputs:
+ local = Local.undefined(var)
+ stack.push(local)
if var.name != "unused" and not var.peek:
+ local.defined = True
if var.is_array():
- out.emit(f"for (int _i = {var.size}; --_i >= 0;) {{\n")
- out.emit(f"{var.name}[_i] = sym_new_not_null(ctx);\n")
- out.emit("}\n")
+ if var.size == "1":
+ out.emit(f"{var.name}[0] = sym_new_not_null(ctx);\n")
+ else:
+ out.emit(f"for (int _i = {var.size}; --_i >= 0;) {{\n")
+ out.emit(f"{var.name}[_i] = sym_new_not_null(ctx);\n")
+ out.emit("}\n")
elif var.name == "null":
out.emit(f"{var.name} = sym_new_null(ctx);\n")
else:
@@ -90,7 +105,12 @@ def emit_default(out: CWriter, uop: Uop) -> None:
class OptimizerEmitter(Emitter):
- pass
+
+ def emit_save(self, storage: Storage) -> None:
+ storage.flush(self.out)
+
+ def emit_reload(self, storage: Storage) -> None:
+ pass
def write_uop(
@@ -102,22 +122,18 @@ def write_uop(
skip_inputs: bool,
) -> None:
locals: dict[str, Local] = {}
+ prototype = override if override else uop
try:
- prototype = override if override else uop
- is_override = override is not None
out.start_line()
- for var in reversed(prototype.stack.inputs):
- code, local = stack.pop(var, extract_bits=True)
- if not skip_inputs:
+ if override:
+ code_list, storage = Storage.for_uop(stack, prototype, extract_bits=False)
+ for code in code_list:
out.emit(code)
- if local.defined:
- locals[local.name] = local
- out.emit(stack.define_output_arrays(prototype.stack.outputs))
if debug:
args = []
- for var in prototype.stack.inputs:
- if not var.peek or is_override:
- args.append(var.name)
+ for input in prototype.stack.inputs:
+ if not input.peek or override:
+ args.append(input.name)
out.emit(f'DEBUG_PRINTF({", ".join(args)});\n')
if override:
for cache in uop.caches:
@@ -130,20 +146,18 @@ def write_uop(
out.emit(f"{type}{cache.name} = ({cast})this_instr->operand;\n")
if override:
emitter = OptimizerEmitter(out)
- emitter.emit_tokens(override, stack, None)
+ # No reference management of inputs needed.
+ for var in storage.inputs: # type: ignore[possibly-undefined]
+ var.defined = False
+ storage = emitter.emit_tokens(override, storage, None)
+ out.start_line()
+ storage.flush(out, cast_type="_Py_UopsSymbol *", extract_bits=False)
else:
- emit_default(out, uop)
-
- for var in prototype.stack.outputs:
- if var.name in locals:
- local = locals[var.name]
- else:
- local = Local.local(var)
- stack.push(local)
- out.start_line()
- stack.flush(out, cast_type="_Py_UopsSymbol *", extract_bits=True)
+ emit_default(out, uop, stack)
+ out.start_line()
+ stack.flush(out, cast_type="_Py_UopsSymbol *", extract_bits=False)
except StackError as ex:
- raise analysis_error(ex.args[0], uop.body[0])
+ raise analysis_error(ex.args[0], prototype.body[0]) # from None
SKIPS = ("_EXTENDED_ARG",)
diff --git a/Tools/cases_generator/stack.py b/Tools/cases_generator/stack.py
index de4d900..a954bed 100644
--- a/Tools/cases_generator/stack.py
+++ b/Tools/cases_generator/stack.py
@@ -46,20 +46,41 @@ class Local:
in_memory: bool
defined: bool
+ def __repr__(self) -> str:
+ return f"Local('{self.item.name}', mem={self.in_memory}, defined={self.defined}, array={self.is_array()})"
+
+ def compact_str(self) -> str:
+ mtag = "M" if self.in_memory else ""
+ dtag = "D" if self.defined else ""
+ atag = "A" if self.is_array() else ""
+ return f"'{self.item.name}'{mtag}{dtag}{atag}"
+
@staticmethod
def unused(defn: StackItem) -> "Local":
return Local(defn, False, defn.is_array(), False)
@staticmethod
- def local(defn: StackItem) -> "Local":
+ def undefined(defn: StackItem) -> "Local":
array = defn.is_array()
- return Local(defn, not array, array, True)
+ return Local(defn, not array, array, False)
@staticmethod
def redefinition(var: StackItem, prev: "Local") -> "Local":
assert var.is_array() == prev.is_array()
return Local(var, prev.cached, prev.in_memory, True)
+ @staticmethod
+ def from_memory(defn: StackItem) -> "Local":
+ return Local(defn, True, True, True)
+
+ def copy(self) -> "Local":
+ return Local(
+ self.item,
+ self.cached,
+ self.in_memory,
+ self.defined
+ )
+
@property
def size(self) -> str:
return self.item.size
@@ -75,6 +96,16 @@ class Local:
def is_array(self) -> bool:
return self.item.is_array()
+ def __eq__(self, other: object) -> bool:
+ if not isinstance(other, Local):
+ return NotImplemented
+ return (
+ self.item is other.item
+ and self.cached is other.cached
+ and self.in_memory is other.in_memory
+ and self.defined is other.defined
+ )
+
@dataclass
class StackOffset:
@@ -156,10 +187,34 @@ class StackOffset:
res = "-" + res[3:]
return res
+ def as_int(self) -> int | None:
+ self.simplify()
+ int_offset = 0
+ for item in self.popped:
+ try:
+ int_offset -= int(item)
+ except ValueError:
+ return None
+ for item in self.pushed:
+ try:
+ int_offset += int(item)
+ except ValueError:
+ return None
+ return int_offset
+
def clear(self) -> None:
self.popped = []
self.pushed = []
+ def __bool__(self) -> bool:
+ self.simplify()
+ return bool(self.popped) or bool(self.pushed)
+
+ def __eq__(self, other: object) -> bool:
+ if not isinstance(other, StackOffset):
+ return NotImplemented
+ return self.to_c() == other.to_c()
+
class StackError(Exception):
pass
@@ -174,7 +229,7 @@ class Stack:
self.variables: list[Local] = []
self.defined: set[str] = set()
- def pop(self, var: StackItem, extract_bits: bool = False) -> tuple[str, Local]:
+ def pop(self, var: StackItem, extract_bits: bool = True) -> tuple[str, Local]:
self.top_offset.pop(var)
indirect = "&" if var.is_array() else ""
if self.variables:
@@ -192,7 +247,7 @@ class Stack:
if var.name in UNUSED:
if popped.name not in UNUSED and popped.name in self.defined:
raise StackError(
- f"Value is declared unused, but is already cached by prior operation"
+ f"Value is declared unused, but is already cached by prior operation as '{popped.name}'"
)
return "", popped
if not var.used:
@@ -208,6 +263,7 @@ class Stack:
defn = f"{var.name} = &stack_pointer[{self.top_offset.to_c()}];\n"
else:
defn = f"{var.name} = stack_pointer[{self.top_offset.to_c()}];\n"
+ popped.in_memory = True
return defn, Local.redefinition(var, popped)
self.base_offset.pop(var)
@@ -215,7 +271,7 @@ class Stack:
return "", Local.unused(var)
self.defined.add(var.name)
cast = f"({var.type})" if (not indirect and var.type) else ""
- bits = ".bits" if cast and not extract_bits else ""
+ bits = ".bits" if cast and extract_bits else ""
assign = f"{var.name} = {cast}{indirect}stack_pointer[{self.base_offset.to_c()}]{bits};"
if var.condition:
if var.condition == "1":
@@ -226,27 +282,14 @@ class Stack:
assign = f"if ({var.condition}) {{ {assign} }}\n"
else:
assign = f"{assign}\n"
- in_memory = var.is_array() or var.peek
- return assign, Local(var, not var.is_array(), in_memory, True)
+ return assign, Local.from_memory(var)
def push(self, var: Local) -> None:
+ assert(var not in self.variables)
self.variables.append(var)
self.top_offset.push(var.item)
if var.item.used:
self.defined.add(var.name)
- var.defined = True
-
- def define_output_arrays(self, outputs: list[StackItem]) -> str:
- res = []
- top_offset = self.top_offset.copy()
- for var in outputs:
- if var.is_array() and var.used and not var.peek:
- c_offset = top_offset.to_c()
- top_offset.push(var)
- res.append(f"{var.name} = &stack_pointer[{c_offset}];\n")
- else:
- top_offset.push(var)
- return "\n".join(res)
@staticmethod
def _do_emit(
@@ -254,102 +297,92 @@ class Stack:
var: StackItem,
base_offset: StackOffset,
cast_type: str = "uintptr_t",
- extract_bits: bool = False,
+ extract_bits: bool = True,
) -> None:
cast = f"({cast_type})" if var.type else ""
- bits = ".bits" if cast and not extract_bits else ""
+ bits = ".bits" if cast and extract_bits else ""
if var.condition == "0":
return
if var.condition and var.condition != "1":
out.emit(f"if ({var.condition}) ")
out.emit(f"stack_pointer[{base_offset.to_c()}]{bits} = {cast}{var.name};\n")
- @staticmethod
- def _do_flush(
- out: CWriter,
- variables: list[Local],
- base_offset: StackOffset,
- top_offset: StackOffset,
- cast_type: str = "uintptr_t",
- extract_bits: bool = False,
- ) -> None:
- out.start_line()
- for var in variables:
- if (
- var.cached
- and not var.in_memory
- and not var.item.peek
- and not var.name in UNUSED
- ):
- Stack._do_emit(out, var.item, base_offset, cast_type, extract_bits)
- base_offset.push(var.item)
- if base_offset.to_c() != top_offset.to_c():
- print("base", base_offset, "top", top_offset)
- assert False
- number = base_offset.to_c()
+ def _adjust_stack_pointer(self, out: CWriter, number: str) -> None:
if number != "0":
+ out.start_line()
out.emit(f"stack_pointer += {number};\n")
out.emit("assert(WITHIN_STACK_BOUNDS());\n")
- out.start_line()
-
- def flush_locally(
- self, out: CWriter, cast_type: str = "uintptr_t", extract_bits: bool = False
- ) -> None:
- self._do_flush(
- out,
- self.variables[:],
- self.base_offset.copy(),
- self.top_offset.copy(),
- cast_type,
- extract_bits,
- )
def flush(
- self, out: CWriter, cast_type: str = "uintptr_t", extract_bits: bool = False
+ self, out: CWriter, cast_type: str = "uintptr_t", extract_bits: bool = True
) -> None:
- self._do_flush(
- out,
- self.variables,
- self.base_offset,
- self.top_offset,
- cast_type,
- extract_bits,
- )
- self.variables = []
- self.base_offset.clear()
+ out.start_line()
+ var_offset = self.base_offset.copy()
+ for var in self.variables:
+ if (
+ var.defined and
+ not var.in_memory
+ ):
+ Stack._do_emit(out, var.item, var_offset, cast_type, extract_bits)
+ var.in_memory = True
+ var_offset.push(var.item)
+ number = self.top_offset.to_c()
+ self._adjust_stack_pointer(out, number)
+ self.base_offset -= self.top_offset
self.top_offset.clear()
+ out.start_line()
- def flush_single_var(
- self,
- out: CWriter,
- var_name: str,
- outputs: list[StackItem],
- cast_type: str = "uintptr_t",
- extract_bits: bool = False,
- ) -> None:
- assert any(var.name == var_name for var in outputs)
- base_offset = self.base_offset.copy()
- top_offset = self.top_offset.copy()
- for var in self.variables:
- base_offset.push(var.item)
- for output in outputs:
- if any(output == v.item for v in self.variables):
- # The variable is already on the stack, such as a peeked value
- # in the tier1 generator
- continue
- if output.name == var_name:
- Stack._do_emit(out, output, base_offset, cast_type, extract_bits)
- base_offset.push(output)
- top_offset.push(output)
- if base_offset.to_c() != top_offset.to_c():
- print("base", base_offset, "top", top_offset)
- assert False
+ def is_flushed(self) -> bool:
+ return not self.variables and not self.base_offset and not self.top_offset
def peek_offset(self) -> str:
return self.top_offset.to_c()
def as_comment(self) -> str:
- return f"/* Variables: {[v.name for v in self.variables]}. Base offset: {self.base_offset.to_c()}. Top offset: {self.top_offset.to_c()} */"
+ variables = ", ".join([v.compact_str() for v in self.variables])
+ return (
+ f"/* Variables: {variables}. base: {self.base_offset.to_c()}. top: {self.top_offset.to_c()} */"
+ )
+
+ def copy(self) -> "Stack":
+ other = Stack()
+ other.top_offset = self.top_offset.copy()
+ other.base_offset = self.base_offset.copy()
+ other.variables = [var.copy() for var in self.variables]
+ other.defined = set(self.defined)
+ return other
+
+ def __eq__(self, other: object) -> bool:
+ if not isinstance(other, Stack):
+ return NotImplemented
+ return (
+ self.top_offset == other.top_offset
+ and self.base_offset == other.base_offset
+ and self.variables == other.variables
+ )
+
+ def align(self, other: "Stack", out: CWriter) -> None:
+ if len(self.variables) != len(other.variables):
+ raise StackError("Cannot align stacks: differing variables")
+ if self.top_offset == other.top_offset:
+ return
+ diff = self.top_offset - other.top_offset
+ try:
+ self.top_offset -= diff
+ self.base_offset -= diff
+ self._adjust_stack_pointer(out, diff.to_c())
+ except ValueError:
+ raise StackError("Cannot align stacks: cannot adjust stack pointer")
+
+ def merge(self, other: "Stack", out: CWriter) -> None:
+ if len(self.variables) != len(other.variables):
+ raise StackError("Cannot merge stacks: differing variables")
+ for self_var, other_var in zip(self.variables, other.variables):
+ if self_var.name != other_var.name:
+ raise StackError(f"Mismatched variables on stack: {self_var.name} and {other_var.name}")
+ self_var.defined = self_var.defined and other_var.defined
+ self_var.in_memory = self_var.in_memory and other_var.in_memory
+ self.align(other, out)
def get_stack_effect(inst: Instruction | PseudoInstruction) -> Stack:
@@ -377,3 +410,213 @@ def get_stack_effect(inst: Instruction | PseudoInstruction) -> Stack:
local = Local.unused(var)
stack.push(local)
return stack
+
+@dataclass
+class Storage:
+
+ stack: Stack
+ inputs: list[Local]
+ outputs: list[Local]
+ peeks: list[Local]
+ spilled: int = 0
+
+ @staticmethod
+ def needs_defining(var: Local) -> bool:
+ return (
+ not var.defined and
+ not var.is_array() and
+ var.name != "unused"
+ )
+
+ @staticmethod
+ def is_live(var: Local) -> bool:
+ return (
+ var.defined and
+ var.name != "unused"
+ )
+
+ def first_input_not_cleared(self) -> str:
+ for input in self.inputs:
+ if input.defined:
+ return input.name
+ return ""
+
+ def clear_inputs(self, reason:str) -> None:
+ while self.inputs:
+ tos = self.inputs.pop()
+ if self.is_live(tos) and not tos.is_array():
+ raise StackError(
+ f"Input '{tos.name}' is still live {reason}"
+ )
+ self.stack.pop(tos.item)
+
+ def clear_dead_inputs(self) -> None:
+ live = ""
+ while self.inputs:
+ tos = self.inputs[-1]
+ if self.is_live(tos):
+ live = tos.name
+ break
+ self.inputs.pop()
+ self.stack.pop(tos.item)
+ for var in self.inputs:
+ if not var.defined and not var.is_array() and var.name != "unused":
+ raise StackError(
+ f"Input '{var.name}' is not live, but '{live}' is"
+ )
+
+ def _push_defined_outputs(self) -> None:
+ defined_output = ""
+ for output in self.outputs:
+ if output.defined and not output.in_memory:
+ defined_output = output.name
+ if not defined_output:
+ return
+ self.clear_inputs(f"when output '{defined_output}' is defined")
+ undefined = ""
+ for out in self.outputs:
+ if out.defined:
+ if undefined:
+ f"Locals not defined in stack order. "
+ f"Expected '{undefined}' to be defined before '{out.name}'"
+ else:
+ undefined = out.name
+ while self.outputs and not self.needs_defining(self.outputs[0]):
+ out = self.outputs.pop(0)
+ self.stack.push(out)
+
+ def locals_cached(self) -> bool:
+ for out in self.outputs:
+ if out.defined:
+ return True
+ return False
+
+ def flush(self, out: CWriter, cast_type: str = "uintptr_t", extract_bits: bool = True) -> None:
+ self.clear_dead_inputs()
+ self._push_defined_outputs()
+ self.stack.flush(out, cast_type, extract_bits)
+
+ def save(self, out: CWriter) -> None:
+ assert self.spilled >= 0
+ if self.spilled == 0:
+ self.flush(out)
+ out.start_line()
+ out.emit("_PyFrame_SetStackPointer(frame, stack_pointer);\n")
+ self.spilled += 1
+
+ def reload(self, out: CWriter) -> None:
+ if self.spilled == 0:
+ raise StackError("Cannot reload stack as it hasn't been saved")
+ assert self.spilled > 0
+ self.spilled -= 1
+ if self.spilled == 0:
+ out.start_line()
+ out.emit("stack_pointer = _PyFrame_GetStackPointer(frame);\n")
+
+ @staticmethod
+ def for_uop(stack: Stack, uop: Uop, extract_bits: bool = True) -> tuple[list[str], "Storage"]:
+ code_list: list[str] = []
+ inputs: list[Local] = []
+ peeks: list[Local] = []
+ for input in reversed(uop.stack.inputs):
+ code, local = stack.pop(input, extract_bits)
+ code_list.append(code)
+ if input.peek:
+ peeks.append(local)
+ else:
+ inputs.append(local)
+ inputs.reverse()
+ peeks.reverse()
+ for peek in peeks:
+ stack.push(peek)
+ top_offset = stack.top_offset.copy()
+ for ouput in uop.stack.outputs:
+ if ouput.is_array() and ouput.used and not ouput.peek:
+ c_offset = top_offset.to_c()
+ top_offset.push(ouput)
+ code_list.append(f"{ouput.name} = &stack_pointer[{c_offset}];\n")
+ else:
+ top_offset.push(ouput)
+ for var in inputs:
+ stack.push(var)
+ outputs = [ Local.undefined(var) for var in uop.stack.outputs if not var.peek ]
+ return code_list, Storage(stack, inputs, outputs, peeks)
+
+ @staticmethod
+ def copy_list(arg: list[Local]) -> list[Local]:
+ return [ l.copy() for l in arg ]
+
+ def copy(self) -> "Storage":
+ new_stack = self.stack.copy()
+ variables = { var.name: var for var in new_stack.variables }
+ inputs = [ variables[var.name] for var in self.inputs]
+ assert [v.name for v in inputs] == [v.name for v in self.inputs], (inputs, self.inputs)
+ return Storage(
+ new_stack, inputs,
+ self.copy_list(self.outputs), self.copy_list(self.peeks)
+ )
+
+ def sanity_check(self) -> None:
+ names: set[str] = set()
+ for var in self.inputs:
+ if var.name in names:
+ raise StackError(f"Duplicate name {var.name}")
+ names.add(var.name)
+ names = set()
+ for var in self.outputs:
+ if var.name in names:
+ raise StackError(f"Duplicate name {var.name}")
+ names.add(var.name)
+ names = set()
+ for var in self.stack.variables:
+ if var.name in names:
+ raise StackError(f"Duplicate name {var.name}")
+ names.add(var.name)
+
+ def is_flushed(self) -> bool:
+ for var in self.outputs:
+ if var.defined and not var.in_memory:
+ return False
+ return self.stack.is_flushed()
+
+ def merge(self, other: "Storage", out: CWriter) -> None:
+ self.sanity_check()
+ if len(self.inputs) != len(other.inputs):
+ self.clear_dead_inputs()
+ other.clear_dead_inputs()
+ if len(self.inputs) != len(other.inputs):
+ diff = self.inputs[-1] if len(self.inputs) > len(other.inputs) else other.inputs[-1]
+ raise StackError(f"Unmergeable inputs. Differing state of '{diff.name}'")
+ for var, other_var in zip(self.inputs, other.inputs):
+ if var.defined != other_var.defined:
+ raise StackError(f"'{var.name}' is cleared on some paths, but not all")
+ if len(self.outputs) != len(other.outputs):
+ self._push_defined_outputs()
+ other._push_defined_outputs()
+ if len(self.outputs) != len(other.outputs):
+ var = self.outputs[0] if len(self.outputs) > len(other.outputs) else other.outputs[0]
+ raise StackError(f"'{var.name}' is set on some paths, but not all")
+ self.stack.merge(other.stack, out)
+ self.sanity_check()
+
+ def push_outputs(self) -> None:
+ if self.spilled:
+ raise StackError(f"Unbalanced stack spills")
+ self.clear_inputs("at the end of the micro-op")
+ if self.inputs:
+ raise StackError(f"Input variable '{self.inputs[-1].name}' is still live")
+ self._push_defined_outputs()
+ if self.outputs:
+ for out in self.outputs:
+ if self.needs_defining(out):
+ raise StackError(f"Output variable '{self.outputs[0].name}' is not defined")
+ self.stack.push(out)
+ self.outputs = []
+
+ def as_comment(self) -> str:
+ stack_comment = self.stack.as_comment()
+ next_line = "\n "
+ inputs = ", ".join([var.compact_str() for var in self.inputs])
+ outputs = ", ".join([var.compact_str() for var in self.outputs])
+ peeks = ", ".join([var.name for var in self.peeks])
+ return f"{stack_comment[:-2]}{next_line}inputs: {inputs}{next_line}outputs: {outputs}{next_line}peeks: {peeks} */"
diff --git a/Tools/cases_generator/tier1_generator.py b/Tools/cases_generator/tier1_generator.py
index c749896..1b116a5 100644
--- a/Tools/cases_generator/tier1_generator.py
+++ b/Tools/cases_generator/tier1_generator.py
@@ -22,10 +22,11 @@ from generators_common import (
write_header,
type_and_null,
Emitter,
+ TokenIterator,
)
from cwriter import CWriter
from typing import TextIO
-from stack import Local, Stack, StackError, get_stack_effect
+from stack import Local, Stack, StackError, get_stack_effect, Storage
DEFAULT_OUTPUT = ROOT / "Python/generated_cases.c.h"
@@ -47,7 +48,7 @@ def declare_variables(inst: Instruction, out: CWriter) -> None:
try:
stack = get_stack_effect(inst)
except StackError as ex:
- raise analysis_error(ex.args[0], inst.where)
+ raise analysis_error(ex.args[0], inst.where) from None
required = set(stack.defined)
required.discard("unused")
for part in inst.parts:
@@ -70,46 +71,26 @@ def write_uop(
stack: Stack,
inst: Instruction,
braces: bool,
-) -> int:
+) -> tuple[int, Stack]:
# out.emit(stack.as_comment() + "\n")
if isinstance(uop, Skip):
entries = "entries" if uop.size > 1 else "entry"
emitter.emit(f"/* Skip {uop.size} cache {entries} */\n")
- return offset + uop.size
+ return (offset + uop.size), stack
if isinstance(uop, Flush):
emitter.emit(f"// flush\n")
stack.flush(emitter.out)
- return offset
+ return offset, stack
try:
locals: dict[str, Local] = {}
emitter.out.start_line()
if braces:
emitter.out.emit(f"// {uop.name}\n")
- peeks: list[Local] = []
- for var in reversed(uop.stack.inputs):
- code, local = stack.pop(var)
- emitter.emit(code)
- if var.peek:
- peeks.append(local)
- if local.defined:
- locals[local.name] = local
- # Push back the peeks, so that they remain on the logical
- # stack, but their values are cached.
- while peeks:
- stack.push(peeks.pop())
- if braces:
emitter.emit("{\n")
- emitter.out.emit(stack.define_output_arrays(uop.stack.outputs))
- outputs: list[Local] = []
- for var in uop.stack.outputs:
- if not var.peek:
- if var.name in locals:
- local = locals[var.name]
- elif var.name == "unused":
- local = Local.unused(var)
- else:
- local = Local.local(var)
- outputs.append(local)
+ code_list, storage = Storage.for_uop(stack, uop)
+ emitter._print_storage(storage)
+ for code in code_list:
+ emitter.emit(code)
for cache in uop.caches:
if cache.name != "unused":
@@ -125,17 +106,13 @@ def write_uop(
if inst.family is None:
emitter.emit(f"(void){cache.name};\n")
offset += cache.size
- emitter.emit_tokens(uop, stack, inst)
- for output in outputs:
- if output.name in uop.deferred_refs.values():
- # We've already spilled this when emitting tokens
- output.cached = False
- stack.push(output)
+
+ storage = emitter.emit_tokens(uop, storage, inst)
if braces:
emitter.out.start_line()
emitter.emit("}\n")
# emitter.emit(stack.as_comment() + "\n")
- return offset
+ return offset, storage.stack
except StackError as ex:
raise analysis_error(ex.args[0], uop.body[0])
@@ -197,10 +174,11 @@ def generate_tier1(
for part in inst.parts:
# Only emit braces if more than one uop
insert_braces = len([p for p in inst.parts if isinstance(p, Uop)]) > 1
- offset = write_uop(part, emitter, offset, stack, inst, insert_braces)
+ offset, stack = write_uop(part, emitter, offset, stack, inst, insert_braces)
out.start_line()
+
+ stack.flush(out)
if not inst.parts[-1].properties.always_exits:
- stack.flush(out)
out.emit("DISPATCH();\n")
out.start_line()
out.emit("}")
diff --git a/Tools/cases_generator/tier2_generator.py b/Tools/cases_generator/tier2_generator.py
index b7c70fd..634848c 100644
--- a/Tools/cases_generator/tier2_generator.py
+++ b/Tools/cases_generator/tier2_generator.py
@@ -20,11 +20,13 @@ from generators_common import (
write_header,
type_and_null,
Emitter,
+ TokenIterator,
+ always_true,
)
from cwriter import CWriter
from typing import TextIO, Iterator
from lexer import Token
-from stack import Local, Stack, StackError, get_stack_effect
+from stack import Local, Stack, StackError, Storage
DEFAULT_OUTPUT = ROOT / "Python/executor_cases.c.h"
@@ -32,7 +34,7 @@ DEFAULT_OUTPUT = ROOT / "Python/executor_cases.c.h"
def declare_variable(
var: StackItem, uop: Uop, required: set[str], out: CWriter
) -> None:
- if var.name not in required:
+ if not var.used or var.name not in required:
return
required.remove(var.name)
type, null = type_and_null(var)
@@ -52,7 +54,7 @@ def declare_variables(uop: Uop, out: CWriter) -> None:
for var in reversed(uop.stack.inputs):
stack.pop(var)
for var in uop.stack.outputs:
- stack.push(Local.unused(var))
+ stack.push(Local.undefined(var))
required = set(stack.defined)
required.discard("unused")
for var in reversed(uop.stack.inputs):
@@ -69,88 +71,103 @@ class Tier2Emitter(Emitter):
def error_if(
self,
tkn: Token,
- tkn_iter: Iterator[Token],
+ tkn_iter: TokenIterator,
uop: Uop,
- stack: Stack,
+ storage: Storage,
inst: Instruction | None,
- ) -> None:
+ ) -> bool:
self.out.emit_at("if ", tkn)
- self.emit(next(tkn_iter))
+ lparen = next(tkn_iter)
+ self.emit(lparen)
+ assert lparen.kind == "LPAREN"
+ first_tkn = next(tkn_iter)
+ self.out.emit(first_tkn)
emit_to(self.out, tkn_iter, "COMMA")
label = next(tkn_iter).text
next(tkn_iter) # RPAREN
next(tkn_iter) # Semi colon
self.emit(") JUMP_TO_ERROR();\n")
+ return not always_true(first_tkn)
+
def error_no_pop(
self,
tkn: Token,
- tkn_iter: Iterator[Token],
+ tkn_iter: TokenIterator,
uop: Uop,
- stack: Stack,
+ storage: Storage,
inst: Instruction | None,
- ) -> None:
+ ) -> bool:
next(tkn_iter) # LPAREN
next(tkn_iter) # RPAREN
next(tkn_iter) # Semi colon
self.out.emit_at("JUMP_TO_ERROR();", tkn)
+ return False
def deopt_if(
self,
tkn: Token,
- tkn_iter: Iterator[Token],
+ tkn_iter: TokenIterator,
uop: Uop,
- unused: Stack,
+ storage: Storage,
inst: Instruction | None,
- ) -> None:
+ ) -> bool:
self.out.emit_at("if ", tkn)
- self.emit(next(tkn_iter))
+ lparen = next(tkn_iter)
+ self.emit(lparen)
+ assert lparen.kind == "LPAREN"
+ first_tkn = tkn_iter.peek()
emit_to(self.out, tkn_iter, "RPAREN")
next(tkn_iter) # Semi colon
self.emit(") {\n")
self.emit("UOP_STAT_INC(uopcode, miss);\n")
self.emit("JUMP_TO_JUMP_TARGET();\n")
self.emit("}\n")
+ return not always_true(first_tkn)
def exit_if( # type: ignore[override]
self,
tkn: Token,
- tkn_iter: Iterator[Token],
+ tkn_iter: TokenIterator,
uop: Uop,
- unused: Stack,
+ storage: Storage,
inst: Instruction | None,
- ) -> None:
+ ) -> bool:
self.out.emit_at("if ", tkn)
- self.emit(next(tkn_iter))
+ lparen = next(tkn_iter)
+ self.emit(lparen)
+ first_tkn = tkn_iter.peek()
emit_to(self.out, tkn_iter, "RPAREN")
next(tkn_iter) # Semi colon
self.emit(") {\n")
self.emit("UOP_STAT_INC(uopcode, miss);\n")
self.emit("JUMP_TO_JUMP_TARGET();\n")
self.emit("}\n")
+ return not always_true(first_tkn)
def oparg(
self,
tkn: Token,
- tkn_iter: Iterator[Token],
+ tkn_iter: TokenIterator,
uop: Uop,
- unused: Stack,
+ storage: Storage,
inst: Instruction | None,
- ) -> None:
+ ) -> bool:
if not uop.name.endswith("_0") and not uop.name.endswith("_1"):
self.emit(tkn)
- return
+ return True
amp = next(tkn_iter)
if amp.text != "&":
self.emit(tkn)
self.emit(amp)
- return
+ return True
one = next(tkn_iter)
assert one.text == "1"
self.out.emit_at(uop.name[-1], tkn)
+ return True
-def write_uop(uop: Uop, emitter: Emitter, stack: Stack) -> None:
+def write_uop(uop: Uop, emitter: Emitter, stack: Stack) -> Stack:
locals: dict[str, Local] = {}
try:
emitter.out.start_line()
@@ -160,19 +177,9 @@ def write_uop(uop: Uop, emitter: Emitter, stack: Stack) -> None:
elif uop.properties.const_oparg >= 0:
emitter.emit(f"oparg = {uop.properties.const_oparg};\n")
emitter.emit(f"assert(oparg == CURRENT_OPARG());\n")
- for var in reversed(uop.stack.inputs):
- code, local = stack.pop(var)
+ code_list, storage = Storage.for_uop(stack, uop)
+ for code in code_list:
emitter.emit(code)
- if local.defined:
- locals[local.name] = local
- emitter.emit(stack.define_output_arrays(uop.stack.outputs))
- outputs: list[Local] = []
- for var in uop.stack.outputs:
- if var.name in locals:
- local = locals[var.name]
- else:
- local = Local.local(var)
- outputs.append(local)
for cache in uop.caches:
if cache.name != "unused":
if cache.size == 4:
@@ -181,15 +188,10 @@ def write_uop(uop: Uop, emitter: Emitter, stack: Stack) -> None:
type = f"uint{cache.size*16}_t "
cast = f"uint{cache.size*16}_t"
emitter.emit(f"{type}{cache.name} = ({cast})CURRENT_OPERAND();\n")
- emitter.emit_tokens(uop, stack, None)
- for output in outputs:
- if output.name in uop.deferred_refs.values():
- # We've already spilled this when emitting tokens
- output.cached = False
- stack.push(output)
+ storage = emitter.emit_tokens(uop, storage, None)
except StackError as ex:
raise analysis_error(ex.args[0], uop.body[0]) from None
-
+ return storage.stack
SKIPS = ("_EXTENDED_ARG",)
@@ -226,7 +228,7 @@ def generate_tier2(
out.emit(f"case {uop.name}: {{\n")
declare_variables(uop, out)
stack = Stack()
- write_uop(uop, emitter, stack)
+ stack = write_uop(uop, emitter, stack)
out.start_line()
if not uop.properties.always_exits:
stack.flush(out)