summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMark Shannon <mark@hotpy.org>2024-01-15 11:41:06 (GMT)
committerGitHub <noreply@github.com>2024-01-15 11:41:06 (GMT)
commitac10947ba79a15bfdaa3ca92c6864214648ab364 (patch)
tree1853aea09cbd74fdaf1d4d3b999ba250f43ff6ae
parent2010d45327128594aed332befa687c8aead010bc (diff)
downloadcpython-ac10947ba79a15bfdaa3ca92c6864214648ab364.zip
cpython-ac10947ba79a15bfdaa3ca92c6864214648ab364.tar.gz
cpython-ac10947ba79a15bfdaa3ca92c6864214648ab364.tar.bz2
GH-112354: `_GUARD_IS_TRUE_POP` side-exits to target the next instruction, not themselves. (GH-114078)
-rw-r--r--Include/internal/pycore_uop_metadata.h2
-rw-r--r--Python/bytecodes.c19
-rw-r--r--Python/executor_cases.c.h15
-rw-r--r--Python/optimizer.c7
-rw-r--r--Tools/cases_generator/analyzer.py2
-rw-r--r--Tools/cases_generator/generators_common.py6
-rw-r--r--Tools/cases_generator/interpreter_definition.md13
-rw-r--r--Tools/cases_generator/stack.py2
8 files changed, 40 insertions, 26 deletions
diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h
index 3b251d3..9bfb4f4 100644
--- a/Include/internal/pycore_uop_metadata.h
+++ b/Include/internal/pycore_uop_metadata.h
@@ -169,7 +169,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = {
[_CHECK_FUNCTION_EXACT_ARGS] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_PASSTHROUGH_FLAG,
[_CHECK_STACK_SPACE] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_PASSTHROUGH_FLAG,
[_INIT_CALL_PY_EXACT_ARGS] = HAS_ARG_FLAG | HAS_ESCAPES_FLAG | HAS_PURE_FLAG,
- [_PUSH_FRAME] = 0,
+ [_PUSH_FRAME] = HAS_ESCAPES_FLAG,
[_CALL_TYPE_1] = HAS_ARG_FLAG | HAS_DEOPT_FLAG,
[_CALL_STR_1] = HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG,
[_CALL_TUPLE_1] = HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG,
diff --git a/Python/bytecodes.c b/Python/bytecodes.c
index 6df99d6..c48f0a1 100644
--- a/Python/bytecodes.c
+++ b/Python/bytecodes.c
@@ -805,7 +805,8 @@ dummy_func(
#if TIER_ONE
assert(frame != &entry_frame);
#endif
- STORE_SP();
+ SYNC_SP();
+ _PyFrame_SetStackPointer(frame, stack_pointer);
assert(EMPTY());
_Py_LeaveRecursiveCallPy(tstate);
// GH-99729: We need to unlink the frame *before* clearing it:
@@ -3154,7 +3155,8 @@ dummy_func(
// Write it out explicitly because it's subtly different.
// Eventually this should be the only occurrence of this code.
assert(tstate->interp->eval_frame == NULL);
- STORE_SP();
+ SYNC_SP();
+ _PyFrame_SetStackPointer(frame, stack_pointer);
new_frame->previous = frame;
CALL_STAT_INC(inlined_py_calls);
frame = tstate->current_frame = new_frame;
@@ -4013,20 +4015,27 @@ dummy_func(
///////// Tier-2 only opcodes /////////
op (_GUARD_IS_TRUE_POP, (flag -- )) {
- DEOPT_IF(Py_IsFalse(flag));
+ SYNC_SP();
+ DEOPT_IF(!Py_IsTrue(flag));
assert(Py_IsTrue(flag));
}
op (_GUARD_IS_FALSE_POP, (flag -- )) {
- DEOPT_IF(Py_IsTrue(flag));
+ SYNC_SP();
+ DEOPT_IF(!Py_IsFalse(flag));
assert(Py_IsFalse(flag));
}
op (_GUARD_IS_NONE_POP, (val -- )) {
- DEOPT_IF(!Py_IsNone(val));
+ SYNC_SP();
+ if (!Py_IsNone(val)) {
+ Py_DECREF(val);
+ DEOPT_IF(1);
+ }
}
op (_GUARD_IS_NOT_NONE_POP, (val -- )) {
+ SYNC_SP();
DEOPT_IF(Py_IsNone(val));
Py_DECREF(val);
}
diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h
index 6060beb..2b4399b 100644
--- a/Python/executor_cases.c.h
+++ b/Python/executor_cases.c.h
@@ -3318,35 +3318,38 @@
case _GUARD_IS_TRUE_POP: {
PyObject *flag;
flag = stack_pointer[-1];
- if (Py_IsFalse(flag)) goto deoptimize;
- assert(Py_IsTrue(flag));
stack_pointer += -1;
+ if (!Py_IsTrue(flag)) goto deoptimize;
+ assert(Py_IsTrue(flag));
break;
}
case _GUARD_IS_FALSE_POP: {
PyObject *flag;
flag = stack_pointer[-1];
- if (Py_IsTrue(flag)) goto deoptimize;
- assert(Py_IsFalse(flag));
stack_pointer += -1;
+ if (!Py_IsFalse(flag)) goto deoptimize;
+ assert(Py_IsFalse(flag));
break;
}
case _GUARD_IS_NONE_POP: {
PyObject *val;
val = stack_pointer[-1];
- if (!Py_IsNone(val)) goto deoptimize;
stack_pointer += -1;
+ if (!Py_IsNone(val)) {
+ Py_DECREF(val);
+ if (1) goto deoptimize;
+ }
break;
}
case _GUARD_IS_NOT_NONE_POP: {
PyObject *val;
val = stack_pointer[-1];
+ stack_pointer += -1;
if (Py_IsNone(val)) goto deoptimize;
Py_DECREF(val);
- stack_pointer += -1;
break;
}
diff --git a/Python/optimizer.c b/Python/optimizer.c
index 236ae26..1551a5e 100644
--- a/Python/optimizer.c
+++ b/Python/optimizer.c
@@ -481,18 +481,19 @@ top: // Jump here after _PUSH_FRAME or likely branches
goto done;
}
uint32_t uopcode = BRANCH_TO_GUARD[opcode - POP_JUMP_IF_FALSE][jump_likely];
- _Py_CODEUNIT *next_instr = instr + 1 + _PyOpcode_Caches[_PyOpcode_Deopt[opcode]];
DPRINTF(2, "%s(%d): counter=%x, bitcount=%d, likely=%d, confidence=%d, uopcode=%s\n",
_PyOpcode_OpName[opcode], oparg,
counter, bitcount, jump_likely, confidence, _PyUOpName(uopcode));
- ADD_TO_TRACE(uopcode, max_length, 0, target);
+ _Py_CODEUNIT *next_instr = instr + 1 + _PyOpcode_Caches[_PyOpcode_Deopt[opcode]];
+ _Py_CODEUNIT *target_instr = next_instr + oparg;
if (jump_likely) {
- _Py_CODEUNIT *target_instr = next_instr + oparg;
DPRINTF(2, "Jump likely (%x = %d bits), continue at byte offset %d\n",
instr[1].cache, bitcount, 2 * INSTR_IP(target_instr, code));
instr = target_instr;
+ ADD_TO_TRACE(uopcode, max_length, 0, INSTR_IP(next_instr, code));
goto top;
}
+ ADD_TO_TRACE(uopcode, max_length, 0, INSTR_IP(target_instr, code));
break;
}
diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py
index 7ed3b57..b80fa66 100644
--- a/Tools/cases_generator/analyzer.py
+++ b/Tools/cases_generator/analyzer.py
@@ -464,7 +464,7 @@ def compute_properties(op: parser.InstDef) -> Properties:
ends_with_eval_breaker=eval_breaker_at_end(op),
needs_this=variable_used(op, "this_instr"),
always_exits=always_exits(op),
- stores_sp=variable_used(op, "STORE_SP"),
+ stores_sp=variable_used(op, "SYNC_SP"),
tier_one_only=variable_used(op, "TIER_ONE_ONLY"),
uses_co_consts=variable_used(op, "FRAME_CO_CONSTS"),
uses_co_names=variable_used(op, "FRAME_CO_NAMES"),
diff --git a/Tools/cases_generator/generators_common.py b/Tools/cases_generator/generators_common.py
index c6c602c..2fc2ab1 100644
--- a/Tools/cases_generator/generators_common.py
+++ b/Tools/cases_generator/generators_common.py
@@ -124,7 +124,7 @@ def replace_decrefs(
out.emit(f"Py_DECREF({var.name});\n")
-def replace_store_sp(
+def replace_sync_sp(
out: CWriter,
tkn: Token,
tkn_iter: Iterator[Token],
@@ -135,9 +135,7 @@ def replace_store_sp(
next(tkn_iter)
next(tkn_iter)
next(tkn_iter)
- out.emit_at("", tkn)
stack.flush(out)
- out.emit("_PyFrame_SetStackPointer(frame, stack_pointer);\n")
def replace_check_eval_breaker(
@@ -160,7 +158,7 @@ REPLACEMENT_FUNCTIONS = {
"ERROR_IF": replace_error,
"DECREF_INPUTS": replace_decrefs,
"CHECK_EVAL_BREAKER": replace_check_eval_breaker,
- "STORE_SP": replace_store_sp,
+ "SYNC_SP": replace_sync_sp,
}
ReplacementFunctionType = Callable[
diff --git a/Tools/cases_generator/interpreter_definition.md b/Tools/cases_generator/interpreter_definition.md
index e5a4899..e87aff4 100644
--- a/Tools/cases_generator/interpreter_definition.md
+++ b/Tools/cases_generator/interpreter_definition.md
@@ -6,7 +6,7 @@ The CPython interpreter is defined in C, meaning that the semantics of the
bytecode instructions, the dispatching mechanism, error handling, and
tracing and instrumentation are all intermixed.
-This document proposes defining a custom C-like DSL for defining the
+This document proposes defining a custom C-like DSL for defining the
instruction semantics and tools for generating the code deriving from
the instruction definitions.
@@ -46,7 +46,7 @@ passes from the semantic definition, reducing errors.
As we improve the performance of CPython, we need to optimize larger regions
of code, use more complex optimizations and, ultimately, translate to machine
-code.
+code.
All of these steps introduce the possibility of more bugs, and require more code
to be written. One way to mitigate this is through the use of code generators.
@@ -62,7 +62,7 @@ blocks as the instructions for the tier 1 (PEP 659) interpreter.
Rewriting all the instructions is tedious and error-prone, and changing the
instructions is a maintenance headache as both versions need to be kept in sync.
-By using a code generator and using a common source for the instructions, or
+By using a code generator and using a common source for the instructions, or
parts of instructions, we can reduce the potential for errors considerably.
@@ -75,7 +75,7 @@ We update it as the need arises.
Each op definition has a kind, a name, a stack and instruction stream effect,
and a piece of C code describing its semantics::
-
+
```
file:
(definition | family | pseudo)+
@@ -86,7 +86,7 @@ and a piece of C code describing its semantics::
"op" "(" NAME "," stack_effect ")" "{" C-code "}"
|
"macro" "(" NAME ")" "=" uop ("+" uop)* ";"
-
+
stack_effect:
"(" [inputs] "--" [outputs] ")"
@@ -201,6 +201,7 @@ Those functions include:
* `DEOPT_IF(cond, instruction)`. Deoptimize if `cond` is met.
* `ERROR_IF(cond, label)`. Jump to error handler at `label` if `cond` is true.
* `DECREF_INPUTS()`. Generate `Py_DECREF()` calls for the input stack effects.
+* `SYNC_SP()`. Synchronizes the physical stack pointer with the stack effects.
Note that the use of `DECREF_INPUTS()` is optional -- manual calls
to `Py_DECREF()` or other approaches are also acceptable
@@ -445,7 +446,7 @@ rather than popping and pushing, such that `LOAD_ATTR_SLOT` would look something
stack_pointer += 1;
}
s1 = res;
- }
+ }
next_instr += (1 + 1 + 2 + 1 + 4);
stack_pointer[-1] = s1;
DISPATCH();
diff --git a/Tools/cases_generator/stack.py b/Tools/cases_generator/stack.py
index 6633950..f62ece4 100644
--- a/Tools/cases_generator/stack.py
+++ b/Tools/cases_generator/stack.py
@@ -169,6 +169,7 @@ class Stack:
return ""
def flush(self, out: CWriter) -> None:
+ out.start_line()
for var in self.variables:
if not var.peek:
cast = "(PyObject *)" if var.type else ""
@@ -189,6 +190,7 @@ class Stack:
self.base_offset.clear()
self.top_offset.clear()
self.peek_offset.clear()
+ out.start_line()
def as_comment(self) -> str:
return f"/* Variables: {[v.name for v in self.variables]}. Base offset: {self.base_offset.to_c()}. Top offset: {self.top_offset.to_c()} */"