diff options
author | Dennis Sweeney <36520290+sweeneyde@users.noreply.github.com> | 2022-10-20 22:27:41 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-10-20 22:27:41 (GMT) |
commit | 39bc70e267929600057d62103739b7160e69dc8b (patch) | |
tree | fa4bd575ea2c39ddcf759af111747bd4f48d9924 /Python | |
parent | 6f15ca8c7afa23e1adc87f2b66b958b721f9acab (diff) | |
download | cpython-39bc70e267929600057d62103739b7160e69dc8b.zip cpython-39bc70e267929600057d62103739b7160e69dc8b.tar.gz cpython-39bc70e267929600057d62103739b7160e69dc8b.tar.bz2 |
gh-97912: Avoid quadratic behavior when adding LOAD_FAST_CHECK (GH-97952)
* The compiler analyzes the usage of the first 64 local variables all at once using bit masks.
* Local variables beyond the first 64 are only partially analyzed, achieving linear time.
Diffstat (limited to 'Python')
-rw-r--r-- | Python/compile.c | 199 |
1 files changed, 135 insertions, 64 deletions
diff --git a/Python/compile.c b/Python/compile.c index 70f38d4..676558b 100644 --- a/Python/compile.c +++ b/Python/compile.c @@ -114,6 +114,13 @@ (opcode) == RAISE_VARARGS || \ (opcode) == RERAISE) +#define IS_SUPERINSTRUCTION_OPCODE(opcode) \ + ((opcode) == LOAD_FAST__LOAD_FAST || \ + (opcode) == LOAD_FAST__LOAD_CONST || \ + (opcode) == LOAD_CONST__LOAD_FAST || \ + (opcode) == STORE_FAST__LOAD_FAST || \ + (opcode) == STORE_FAST__STORE_FAST) + #define IS_TOP_LEVEL_AWAIT(c) ( \ (c->c_flags->cf_flags & PyCF_ALLOW_TOP_LEVEL_AWAIT) \ && (c->u->u_ste->ste_type == ModuleBlock)) @@ -258,6 +265,8 @@ typedef struct basicblock_ { int b_iused; /* length of instruction array (b_instr) */ int b_ialloc; + /* Used by add_checks_for_loads_of_unknown_variables */ + uint64_t b_unsafe_locals_mask; /* Number of predecessors that a block has. */ int b_predecessors; /* depth of stack upon entry of block, computed by stackdepth() */ @@ -8052,103 +8061,165 @@ assemble_jump_offsets(basicblock *entryblock) } -// Ensure each basicblock is only put onto the stack once. -#define MAYBE_PUSH(B) do { \ - if ((B)->b_visited == 0) { \ - *(*stack_top)++ = (B); \ - (B)->b_visited = 1; \ - } \ - } while (0) +// helper functions for add_checks_for_loads_of_unknown_variables +static inline void +maybe_push(basicblock *b, uint64_t unsafe_mask, basicblock ***sp) +{ + // Push b if the unsafe mask is giving us any new information. + // To avoid overflowing the stack, only allow each block once. + // Use b->b_visited=1 to mean that b is currently on the stack. + uint64_t both = b->b_unsafe_locals_mask | unsafe_mask; + if (b->b_unsafe_locals_mask != both) { + b->b_unsafe_locals_mask = both; + // More work left to do. + if (!b->b_visited) { + // not on the stack, so push it. + *(*sp)++ = b; + b->b_visited = 1; + } + } +} static void -scan_block_for_local(int target, basicblock *b, bool unsafe_to_start, - basicblock ***stack_top) +scan_block_for_locals(basicblock *b, basicblock ***sp) { - bool unsafe = unsafe_to_start; + // bit i is set if local i is potentially uninitialized + uint64_t unsafe_mask = b->b_unsafe_locals_mask; for (int i = 0; i < b->b_iused; i++) { struct instr *instr = &b->b_instr[i]; assert(instr->i_opcode != EXTENDED_ARG); assert(instr->i_opcode != EXTENDED_ARG_QUICK); - assert(instr->i_opcode != LOAD_FAST__LOAD_FAST); - assert(instr->i_opcode != STORE_FAST__LOAD_FAST); - assert(instr->i_opcode != LOAD_CONST__LOAD_FAST); - assert(instr->i_opcode != STORE_FAST__STORE_FAST); - assert(instr->i_opcode != LOAD_FAST__LOAD_CONST); - if (unsafe && instr->i_except != NULL) { - MAYBE_PUSH(instr->i_except); - } - if (instr->i_oparg != target) { + assert(!IS_SUPERINSTRUCTION_OPCODE(instr->i_opcode)); + if (instr->i_except != NULL) { + maybe_push(instr->i_except, unsafe_mask, sp); + } + if (instr->i_oparg >= 64) { continue; } + assert(instr->i_oparg >= 0); + uint64_t bit = (uint64_t)1 << instr->i_oparg; switch (instr->i_opcode) { + case DELETE_FAST: + unsafe_mask |= bit; + break; + case STORE_FAST: + unsafe_mask &= ~bit; + break; case LOAD_FAST_CHECK: - // if this doesn't raise, then var is defined - unsafe = false; + // If this doesn't raise, then the local is defined. + unsafe_mask &= ~bit; break; case LOAD_FAST: - if (unsafe) { + if (unsafe_mask & bit) { instr->i_opcode = LOAD_FAST_CHECK; } - unsafe = false; - break; - case STORE_FAST: - unsafe = false; - break; - case DELETE_FAST: - unsafe = true; + unsafe_mask &= ~bit; break; } } - if (unsafe) { - // unsafe at end of this block, - // so unsafe at start of next blocks - if (b->b_next && BB_HAS_FALLTHROUGH(b)) { - MAYBE_PUSH(b->b_next); - } - struct instr *last = basicblock_last_instr(b); - if (last != NULL) { - if (is_jump(last)) { - assert(last->i_target != NULL); - MAYBE_PUSH(last->i_target); + if (b->b_next && BB_HAS_FALLTHROUGH(b)) { + maybe_push(b->b_next, unsafe_mask, sp); + } + struct instr *last = basicblock_last_instr(b); + if (last && is_jump(last)) { + assert(last->i_target != NULL); + maybe_push(last->i_target, unsafe_mask, sp); + } +} + +static int +fast_scan_many_locals(basicblock *entryblock, int nlocals) +{ + assert(nlocals > 64); + Py_ssize_t *states = PyMem_Calloc(nlocals - 64, sizeof(Py_ssize_t)); + if (states == NULL) { + PyErr_NoMemory(); + return -1; + } + Py_ssize_t blocknum = 0; + // state[i - 64] == blocknum if local i is guaranteed to + // be initialized, i.e., if it has had a previous LOAD_FAST or + // STORE_FAST within that basicblock (not followed by DELETE_FAST). + for (basicblock *b = entryblock; b != NULL; b = b->b_next) { + blocknum++; + for (int i = 0; i < b->b_iused; i++) { + struct instr *instr = &b->b_instr[i]; + assert(instr->i_opcode != EXTENDED_ARG); + assert(instr->i_opcode != EXTENDED_ARG_QUICK); + assert(!IS_SUPERINSTRUCTION_OPCODE(instr->i_opcode)); + int arg = instr->i_oparg; + if (arg < 64) { + continue; + } + assert(arg >= 0); + switch (instr->i_opcode) { + case DELETE_FAST: + states[arg - 64] = blocknum - 1; + break; + case STORE_FAST: + states[arg - 64] = blocknum; + break; + case LOAD_FAST: + if (states[arg - 64] != blocknum) { + instr->i_opcode = LOAD_FAST_CHECK; + } + states[arg - 64] = blocknum; + break; + case LOAD_FAST_CHECK: + Py_UNREACHABLE(); } } } + PyMem_Free(states); + return 0; } -#undef MAYBE_PUSH static int add_checks_for_loads_of_uninitialized_variables(basicblock *entryblock, struct compiler *c) { + int nlocals = (int)PyDict_GET_SIZE(c->u->u_varnames); + if (nlocals == 0) { + return 0; + } + if (nlocals > 64) { + // To avoid O(nlocals**2) compilation, locals beyond the first + // 64 are only analyzed one basicblock at a time: initialization + // info is not passed between basicblocks. + if (fast_scan_many_locals(entryblock, nlocals) < 0) { + return -1; + } + nlocals = 64; + } basicblock **stack = make_cfg_traversal_stack(entryblock); if (stack == NULL) { return -1; } - Py_ssize_t nparams = PyList_GET_SIZE(c->u->u_ste->ste_varnames); - int nlocals = (int)PyDict_GET_SIZE(c->u->u_varnames); - for (int target = 0; target < nlocals; target++) { - for (basicblock *b = entryblock; b != NULL; b = b->b_next) { - b->b_visited = 0; - } - basicblock **stack_top = stack; + basicblock **sp = stack; - // First pass: find the relevant DFS starting points: - // the places where "being uninitialized" originates, - // which are the entry block and any DELETE_FAST statements. - if (target >= nparams) { - // only non-parameter locals start out uninitialized. - *(stack_top++) = entryblock; - entryblock->b_visited = 1; - } - for (basicblock *b = entryblock; b != NULL; b = b->b_next) { - scan_block_for_local(target, b, false, &stack_top); - } + // First origin of being uninitialized: + // The non-parameter locals in the entry block. + int nparams = (int)PyList_GET_SIZE(c->u->u_ste->ste_varnames); + uint64_t start_mask = 0; + for (int i = nparams; i < nlocals; i++) { + start_mask |= (uint64_t)1 << i; + } + maybe_push(entryblock, start_mask, &sp); - // Second pass: Depth-first search to propagate uncertainty - while (stack_top > stack) { - basicblock *b = *--stack_top; - scan_block_for_local(target, b, true, &stack_top); - } + // Second origin of being uninitialized: + // There could be DELETE_FAST somewhere, so + // be sure to scan each basicblock at least once. + for (basicblock *b = entryblock; b != NULL; b = b->b_next) { + scan_block_for_locals(b, &sp); + } + + // Now propagate the uncertainty from the origins we found: Use + // LOAD_FAST_CHECK for any LOAD_FAST where the local could be undefined. + while (sp > stack) { + basicblock *b = *--sp; + // mark as no longer on stack + b->b_visited = 0; + scan_block_for_locals(b, &sp); } PyMem_Free(stack); return 0; |