summaryrefslogtreecommitdiffstats
path: root/Python
diff options
context:
space:
mode:
authorDennis Sweeney <36520290+sweeneyde@users.noreply.github.com>2022-10-20 22:27:41 (GMT)
committerGitHub <noreply@github.com>2022-10-20 22:27:41 (GMT)
commit39bc70e267929600057d62103739b7160e69dc8b (patch)
treefa4bd575ea2c39ddcf759af111747bd4f48d9924 /Python
parent6f15ca8c7afa23e1adc87f2b66b958b721f9acab (diff)
downloadcpython-39bc70e267929600057d62103739b7160e69dc8b.zip
cpython-39bc70e267929600057d62103739b7160e69dc8b.tar.gz
cpython-39bc70e267929600057d62103739b7160e69dc8b.tar.bz2
gh-97912: Avoid quadratic behavior when adding LOAD_FAST_CHECK (GH-97952)
* The compiler analyzes the usage of the first 64 local variables all at once using bit masks. * Local variables beyond the first 64 are only partially analyzed, achieving linear time.
Diffstat (limited to 'Python')
-rw-r--r--Python/compile.c199
1 files changed, 135 insertions, 64 deletions
diff --git a/Python/compile.c b/Python/compile.c
index 70f38d4..676558b 100644
--- a/Python/compile.c
+++ b/Python/compile.c
@@ -114,6 +114,13 @@
(opcode) == RAISE_VARARGS || \
(opcode) == RERAISE)
+#define IS_SUPERINSTRUCTION_OPCODE(opcode) \
+ ((opcode) == LOAD_FAST__LOAD_FAST || \
+ (opcode) == LOAD_FAST__LOAD_CONST || \
+ (opcode) == LOAD_CONST__LOAD_FAST || \
+ (opcode) == STORE_FAST__LOAD_FAST || \
+ (opcode) == STORE_FAST__STORE_FAST)
+
#define IS_TOP_LEVEL_AWAIT(c) ( \
(c->c_flags->cf_flags & PyCF_ALLOW_TOP_LEVEL_AWAIT) \
&& (c->u->u_ste->ste_type == ModuleBlock))
@@ -258,6 +265,8 @@ typedef struct basicblock_ {
int b_iused;
/* length of instruction array (b_instr) */
int b_ialloc;
+ /* Used by add_checks_for_loads_of_unknown_variables */
+ uint64_t b_unsafe_locals_mask;
/* Number of predecessors that a block has. */
int b_predecessors;
/* depth of stack upon entry of block, computed by stackdepth() */
@@ -8052,103 +8061,165 @@ assemble_jump_offsets(basicblock *entryblock)
}
-// Ensure each basicblock is only put onto the stack once.
-#define MAYBE_PUSH(B) do { \
- if ((B)->b_visited == 0) { \
- *(*stack_top)++ = (B); \
- (B)->b_visited = 1; \
- } \
- } while (0)
+// helper functions for add_checks_for_loads_of_unknown_variables
+static inline void
+maybe_push(basicblock *b, uint64_t unsafe_mask, basicblock ***sp)
+{
+ // Push b if the unsafe mask is giving us any new information.
+ // To avoid overflowing the stack, only allow each block once.
+ // Use b->b_visited=1 to mean that b is currently on the stack.
+ uint64_t both = b->b_unsafe_locals_mask | unsafe_mask;
+ if (b->b_unsafe_locals_mask != both) {
+ b->b_unsafe_locals_mask = both;
+ // More work left to do.
+ if (!b->b_visited) {
+ // not on the stack, so push it.
+ *(*sp)++ = b;
+ b->b_visited = 1;
+ }
+ }
+}
static void
-scan_block_for_local(int target, basicblock *b, bool unsafe_to_start,
- basicblock ***stack_top)
+scan_block_for_locals(basicblock *b, basicblock ***sp)
{
- bool unsafe = unsafe_to_start;
+ // bit i is set if local i is potentially uninitialized
+ uint64_t unsafe_mask = b->b_unsafe_locals_mask;
for (int i = 0; i < b->b_iused; i++) {
struct instr *instr = &b->b_instr[i];
assert(instr->i_opcode != EXTENDED_ARG);
assert(instr->i_opcode != EXTENDED_ARG_QUICK);
- assert(instr->i_opcode != LOAD_FAST__LOAD_FAST);
- assert(instr->i_opcode != STORE_FAST__LOAD_FAST);
- assert(instr->i_opcode != LOAD_CONST__LOAD_FAST);
- assert(instr->i_opcode != STORE_FAST__STORE_FAST);
- assert(instr->i_opcode != LOAD_FAST__LOAD_CONST);
- if (unsafe && instr->i_except != NULL) {
- MAYBE_PUSH(instr->i_except);
- }
- if (instr->i_oparg != target) {
+ assert(!IS_SUPERINSTRUCTION_OPCODE(instr->i_opcode));
+ if (instr->i_except != NULL) {
+ maybe_push(instr->i_except, unsafe_mask, sp);
+ }
+ if (instr->i_oparg >= 64) {
continue;
}
+ assert(instr->i_oparg >= 0);
+ uint64_t bit = (uint64_t)1 << instr->i_oparg;
switch (instr->i_opcode) {
+ case DELETE_FAST:
+ unsafe_mask |= bit;
+ break;
+ case STORE_FAST:
+ unsafe_mask &= ~bit;
+ break;
case LOAD_FAST_CHECK:
- // if this doesn't raise, then var is defined
- unsafe = false;
+ // If this doesn't raise, then the local is defined.
+ unsafe_mask &= ~bit;
break;
case LOAD_FAST:
- if (unsafe) {
+ if (unsafe_mask & bit) {
instr->i_opcode = LOAD_FAST_CHECK;
}
- unsafe = false;
- break;
- case STORE_FAST:
- unsafe = false;
- break;
- case DELETE_FAST:
- unsafe = true;
+ unsafe_mask &= ~bit;
break;
}
}
- if (unsafe) {
- // unsafe at end of this block,
- // so unsafe at start of next blocks
- if (b->b_next && BB_HAS_FALLTHROUGH(b)) {
- MAYBE_PUSH(b->b_next);
- }
- struct instr *last = basicblock_last_instr(b);
- if (last != NULL) {
- if (is_jump(last)) {
- assert(last->i_target != NULL);
- MAYBE_PUSH(last->i_target);
+ if (b->b_next && BB_HAS_FALLTHROUGH(b)) {
+ maybe_push(b->b_next, unsafe_mask, sp);
+ }
+ struct instr *last = basicblock_last_instr(b);
+ if (last && is_jump(last)) {
+ assert(last->i_target != NULL);
+ maybe_push(last->i_target, unsafe_mask, sp);
+ }
+}
+
+static int
+fast_scan_many_locals(basicblock *entryblock, int nlocals)
+{
+ assert(nlocals > 64);
+ Py_ssize_t *states = PyMem_Calloc(nlocals - 64, sizeof(Py_ssize_t));
+ if (states == NULL) {
+ PyErr_NoMemory();
+ return -1;
+ }
+ Py_ssize_t blocknum = 0;
+ // state[i - 64] == blocknum if local i is guaranteed to
+ // be initialized, i.e., if it has had a previous LOAD_FAST or
+ // STORE_FAST within that basicblock (not followed by DELETE_FAST).
+ for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
+ blocknum++;
+ for (int i = 0; i < b->b_iused; i++) {
+ struct instr *instr = &b->b_instr[i];
+ assert(instr->i_opcode != EXTENDED_ARG);
+ assert(instr->i_opcode != EXTENDED_ARG_QUICK);
+ assert(!IS_SUPERINSTRUCTION_OPCODE(instr->i_opcode));
+ int arg = instr->i_oparg;
+ if (arg < 64) {
+ continue;
+ }
+ assert(arg >= 0);
+ switch (instr->i_opcode) {
+ case DELETE_FAST:
+ states[arg - 64] = blocknum - 1;
+ break;
+ case STORE_FAST:
+ states[arg - 64] = blocknum;
+ break;
+ case LOAD_FAST:
+ if (states[arg - 64] != blocknum) {
+ instr->i_opcode = LOAD_FAST_CHECK;
+ }
+ states[arg - 64] = blocknum;
+ break;
+ case LOAD_FAST_CHECK:
+ Py_UNREACHABLE();
}
}
}
+ PyMem_Free(states);
+ return 0;
}
-#undef MAYBE_PUSH
static int
add_checks_for_loads_of_uninitialized_variables(basicblock *entryblock,
struct compiler *c)
{
+ int nlocals = (int)PyDict_GET_SIZE(c->u->u_varnames);
+ if (nlocals == 0) {
+ return 0;
+ }
+ if (nlocals > 64) {
+ // To avoid O(nlocals**2) compilation, locals beyond the first
+ // 64 are only analyzed one basicblock at a time: initialization
+ // info is not passed between basicblocks.
+ if (fast_scan_many_locals(entryblock, nlocals) < 0) {
+ return -1;
+ }
+ nlocals = 64;
+ }
basicblock **stack = make_cfg_traversal_stack(entryblock);
if (stack == NULL) {
return -1;
}
- Py_ssize_t nparams = PyList_GET_SIZE(c->u->u_ste->ste_varnames);
- int nlocals = (int)PyDict_GET_SIZE(c->u->u_varnames);
- for (int target = 0; target < nlocals; target++) {
- for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
- b->b_visited = 0;
- }
- basicblock **stack_top = stack;
+ basicblock **sp = stack;
- // First pass: find the relevant DFS starting points:
- // the places where "being uninitialized" originates,
- // which are the entry block and any DELETE_FAST statements.
- if (target >= nparams) {
- // only non-parameter locals start out uninitialized.
- *(stack_top++) = entryblock;
- entryblock->b_visited = 1;
- }
- for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
- scan_block_for_local(target, b, false, &stack_top);
- }
+ // First origin of being uninitialized:
+ // The non-parameter locals in the entry block.
+ int nparams = (int)PyList_GET_SIZE(c->u->u_ste->ste_varnames);
+ uint64_t start_mask = 0;
+ for (int i = nparams; i < nlocals; i++) {
+ start_mask |= (uint64_t)1 << i;
+ }
+ maybe_push(entryblock, start_mask, &sp);
- // Second pass: Depth-first search to propagate uncertainty
- while (stack_top > stack) {
- basicblock *b = *--stack_top;
- scan_block_for_local(target, b, true, &stack_top);
- }
+ // Second origin of being uninitialized:
+ // There could be DELETE_FAST somewhere, so
+ // be sure to scan each basicblock at least once.
+ for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
+ scan_block_for_locals(b, &sp);
+ }
+
+ // Now propagate the uncertainty from the origins we found: Use
+ // LOAD_FAST_CHECK for any LOAD_FAST where the local could be undefined.
+ while (sp > stack) {
+ basicblock *b = *--sp;
+ // mark as no longer on stack
+ b->b_visited = 0;
+ scan_block_for_locals(b, &sp);
}
PyMem_Free(stack);
return 0;