diff options
author | Irit Katriel <1055913+iritkatriel@users.noreply.github.com> | 2022-11-11 10:53:43 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-11-11 10:53:43 (GMT) |
commit | 3dd6ee2c0022cb49e5cb8862a569bdd35b6a72bc (patch) | |
tree | 088c87ecddc5a792051c109e1cef49d3ba44d92a | |
parent | faf7dfa656bd52959156fed39a4c680b2b13e032 (diff) | |
download | cpython-3dd6ee2c0022cb49e5cb8862a569bdd35b6a72bc.zip cpython-3dd6ee2c0022cb49e5cb8862a569bdd35b6a72bc.tar.gz cpython-3dd6ee2c0022cb49e5cb8862a569bdd35b6a72bc.tar.bz2 |
gh-99254: remove all unused consts from code objects (GH-99255)
-rw-r--r-- | Lib/importlib/_bootstrap_external.py | 3 | ||||
-rw-r--r-- | Lib/test/test_compile.py | 38 | ||||
-rw-r--r-- | Lib/test/test_dis.py | 22 | ||||
-rw-r--r-- | Misc/NEWS.d/next/Core and Builtins/2022-11-08-17-47-10.gh-issue-99254.RSvyFt.rst | 1 | ||||
-rw-r--r-- | Python/compile.c | 111 |
5 files changed, 142 insertions, 33 deletions
diff --git a/Lib/importlib/_bootstrap_external.py b/Lib/importlib/_bootstrap_external.py index 8cbc962..f4dbbeb 100644 --- a/Lib/importlib/_bootstrap_external.py +++ b/Lib/importlib/_bootstrap_external.py @@ -425,6 +425,7 @@ _code_type = type(_write_atomic.__code__) # Python 3.12a1 3509 (Conditional jumps only jump forward) # Python 3.12a1 3510 (FOR_ITER leaves iterator on the stack) # Python 3.12a1 3511 (Add STOPITERATION_ERROR instruction) +# Python 3.12a1 3512 (Remove all unused consts from code objects) # Python 3.13 will start with 3550 @@ -437,7 +438,7 @@ _code_type = type(_write_atomic.__code__) # Whenever MAGIC_NUMBER is changed, the ranges in the magic_values array # in PC/launcher.c must also be updated. -MAGIC_NUMBER = (3511).to_bytes(2, 'little') + b'\r\n' +MAGIC_NUMBER = (3512).to_bytes(2, 'little') + b'\r\n' _RAW_MAGIC_NUMBER = int.from_bytes(MAGIC_NUMBER, 'little') # For import.c diff --git a/Lib/test/test_compile.py b/Lib/test/test_compile.py index f7847a3..a14509a 100644 --- a/Lib/test/test_compile.py +++ b/Lib/test/test_compile.py @@ -670,7 +670,7 @@ if 1: self.assertIs(f1.__code__.co_linetable, f2.__code__.co_linetable) @support.cpython_only - def test_strip_unused_consts(self): + def test_remove_unused_consts(self): def f(): "docstring" if True: @@ -679,7 +679,41 @@ if 1: return "unused" self.assertEqual(f.__code__.co_consts, - ("docstring", True, "used")) + ("docstring", "used")) + + @support.cpython_only + def test_remove_unused_consts_no_docstring(self): + # the first item (None for no docstring in this case) is + # always retained. + def f(): + if True: + return "used" + else: + return "unused" + + self.assertEqual(f.__code__.co_consts, + (None, "used")) + + @support.cpython_only + def test_remove_unused_consts_extended_args(self): + N = 1000 + code = ["def f():\n"] + code.append("\ts = ''\n") + code.append("\tfor i in range(1):\n") + for i in range(N): + code.append(f"\t\tif True: s += 't{i}'\n") + code.append(f"\t\tif False: s += 'f{i}'\n") + code.append("\treturn s\n") + + code = "".join(code) + g = {} + eval(compile(code, "file.py", "exec"), g) + exec(code, g) + f = g['f'] + expected = tuple([None, '', 1] + [f't{i}' for i in range(N)]) + self.assertEqual(f.__code__.co_consts, expected) + expected = "".join(expected[3:]) + self.assertEqual(expected, f()) # Stripping unused constants is not a strict requirement for the # Python semantics, it's a more an implementation detail. diff --git a/Lib/test/test_dis.py b/Lib/test/test_dis.py index 5640bf2..950af3c 100644 --- a/Lib/test/test_dis.py +++ b/Lib/test/test_dis.py @@ -168,13 +168,13 @@ dis_bug1333982 = """\ %3d RESUME 0 %3d LOAD_ASSERTION_ERROR - LOAD_CONST 2 (<code object <listcomp> at 0x..., file "%s", line %d>) + LOAD_CONST 1 (<code object <listcomp> at 0x..., file "%s", line %d>) MAKE_FUNCTION 0 LOAD_FAST 0 (x) GET_ITER CALL 0 -%3d LOAD_CONST 3 (1) +%3d LOAD_CONST 2 (1) %3d BINARY_OP 0 (+) CALL 0 @@ -1446,9 +1446,9 @@ def jumpy(): # End fodder for opinfo generation tests expected_outer_line = 1 _line_offset = outer.__code__.co_firstlineno - 1 -code_object_f = outer.__code__.co_consts[3] +code_object_f = outer.__code__.co_consts[1] expected_f_line = code_object_f.co_firstlineno - _line_offset -code_object_inner = code_object_f.co_consts[3] +code_object_inner = code_object_f.co_consts[1] expected_inner_line = code_object_inner.co_firstlineno - _line_offset expected_jumpy_line = 1 @@ -1485,21 +1485,21 @@ expected_opinfo_outer = [ Instruction(opname='MAKE_CELL', opcode=135, arg=0, argval='a', argrepr='a', offset=0, starts_line=None, is_jump_target=False, positions=None), Instruction(opname='MAKE_CELL', opcode=135, arg=1, argval='b', argrepr='b', offset=2, starts_line=None, is_jump_target=False, positions=None), Instruction(opname='RESUME', opcode=151, arg=0, argval=0, argrepr='', offset=4, starts_line=1, is_jump_target=False, positions=None), - Instruction(opname='LOAD_CONST', opcode=100, arg=7, argval=(3, 4), argrepr='(3, 4)', offset=6, starts_line=2, is_jump_target=False, positions=None), + Instruction(opname='LOAD_CONST', opcode=100, arg=5, argval=(3, 4), argrepr='(3, 4)', offset=6, starts_line=2, is_jump_target=False, positions=None), Instruction(opname='LOAD_CLOSURE', opcode=136, arg=0, argval='a', argrepr='a', offset=8, starts_line=None, is_jump_target=False, positions=None), Instruction(opname='LOAD_CLOSURE', opcode=136, arg=1, argval='b', argrepr='b', offset=10, starts_line=None, is_jump_target=False, positions=None), Instruction(opname='BUILD_TUPLE', opcode=102, arg=2, argval=2, argrepr='', offset=12, starts_line=None, is_jump_target=False, positions=None), - Instruction(opname='LOAD_CONST', opcode=100, arg=3, argval=code_object_f, argrepr=repr(code_object_f), offset=14, starts_line=None, is_jump_target=False, positions=None), + Instruction(opname='LOAD_CONST', opcode=100, arg=1, argval=code_object_f, argrepr=repr(code_object_f), offset=14, starts_line=None, is_jump_target=False, positions=None), Instruction(opname='MAKE_FUNCTION', opcode=132, arg=9, argval=9, argrepr='defaults, closure', offset=16, starts_line=None, is_jump_target=False, positions=None), Instruction(opname='STORE_FAST', opcode=125, arg=2, argval='f', argrepr='f', offset=18, starts_line=None, is_jump_target=False, positions=None), Instruction(opname='LOAD_GLOBAL', opcode=116, arg=1, argval='print', argrepr='NULL + print', offset=20, starts_line=7, is_jump_target=False, positions=None), Instruction(opname='LOAD_DEREF', opcode=137, arg=0, argval='a', argrepr='a', offset=32, starts_line=None, is_jump_target=False, positions=None), Instruction(opname='LOAD_DEREF', opcode=137, arg=1, argval='b', argrepr='b', offset=34, starts_line=None, is_jump_target=False, positions=None), - Instruction(opname='LOAD_CONST', opcode=100, arg=4, argval='', argrepr="''", offset=36, starts_line=None, is_jump_target=False, positions=None), - Instruction(opname='LOAD_CONST', opcode=100, arg=5, argval=1, argrepr='1', offset=38, starts_line=None, is_jump_target=False, positions=None), + Instruction(opname='LOAD_CONST', opcode=100, arg=2, argval='', argrepr="''", offset=36, starts_line=None, is_jump_target=False, positions=None), + Instruction(opname='LOAD_CONST', opcode=100, arg=3, argval=1, argrepr='1', offset=38, starts_line=None, is_jump_target=False, positions=None), Instruction(opname='BUILD_LIST', opcode=103, arg=0, argval=0, argrepr='', offset=40, starts_line=None, is_jump_target=False, positions=None), Instruction(opname='BUILD_MAP', opcode=105, arg=0, argval=0, argrepr='', offset=42, starts_line=None, is_jump_target=False, positions=None), - Instruction(opname='LOAD_CONST', opcode=100, arg=6, argval='Hello world!', argrepr="'Hello world!'", offset=44, starts_line=None, is_jump_target=False, positions=None), + Instruction(opname='LOAD_CONST', opcode=100, arg=4, argval='Hello world!', argrepr="'Hello world!'", offset=44, starts_line=None, is_jump_target=False, positions=None), Instruction(opname='CALL', opcode=171, arg=7, argval=7, argrepr='', offset=46, starts_line=None, is_jump_target=False, positions=None), Instruction(opname='POP_TOP', opcode=1, arg=None, argval=None, argrepr='', offset=56, starts_line=None, is_jump_target=False, positions=None), Instruction(opname='LOAD_FAST', opcode=124, arg=2, argval='f', argrepr='f', offset=58, starts_line=8, is_jump_target=False, positions=None), @@ -1511,13 +1511,13 @@ expected_opinfo_f = [ Instruction(opname='MAKE_CELL', opcode=135, arg=0, argval='c', argrepr='c', offset=2, starts_line=None, is_jump_target=False, positions=None), Instruction(opname='MAKE_CELL', opcode=135, arg=1, argval='d', argrepr='d', offset=4, starts_line=None, is_jump_target=False, positions=None), Instruction(opname='RESUME', opcode=151, arg=0, argval=0, argrepr='', offset=6, starts_line=2, is_jump_target=False, positions=None), - Instruction(opname='LOAD_CONST', opcode=100, arg=4, argval=(5, 6), argrepr='(5, 6)', offset=8, starts_line=3, is_jump_target=False, positions=None), + Instruction(opname='LOAD_CONST', opcode=100, arg=2, argval=(5, 6), argrepr='(5, 6)', offset=8, starts_line=3, is_jump_target=False, positions=None), Instruction(opname='LOAD_CLOSURE', opcode=136, arg=3, argval='a', argrepr='a', offset=10, starts_line=None, is_jump_target=False, positions=None), Instruction(opname='LOAD_CLOSURE', opcode=136, arg=4, argval='b', argrepr='b', offset=12, starts_line=None, is_jump_target=False, positions=None), Instruction(opname='LOAD_CLOSURE', opcode=136, arg=0, argval='c', argrepr='c', offset=14, starts_line=None, is_jump_target=False, positions=None), Instruction(opname='LOAD_CLOSURE', opcode=136, arg=1, argval='d', argrepr='d', offset=16, starts_line=None, is_jump_target=False, positions=None), Instruction(opname='BUILD_TUPLE', opcode=102, arg=4, argval=4, argrepr='', offset=18, starts_line=None, is_jump_target=False, positions=None), - Instruction(opname='LOAD_CONST', opcode=100, arg=3, argval=code_object_inner, argrepr=repr(code_object_inner), offset=20, starts_line=None, is_jump_target=False, positions=None), + Instruction(opname='LOAD_CONST', opcode=100, arg=1, argval=code_object_inner, argrepr=repr(code_object_inner), offset=20, starts_line=None, is_jump_target=False, positions=None), Instruction(opname='MAKE_FUNCTION', opcode=132, arg=9, argval=9, argrepr='defaults, closure', offset=22, starts_line=None, is_jump_target=False, positions=None), Instruction(opname='STORE_FAST', opcode=125, arg=2, argval='inner', argrepr='inner', offset=24, starts_line=None, is_jump_target=False, positions=None), Instruction(opname='LOAD_GLOBAL', opcode=116, arg=1, argval='print', argrepr='NULL + print', offset=26, starts_line=5, is_jump_target=False, positions=None), diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-11-08-17-47-10.gh-issue-99254.RSvyFt.rst b/Misc/NEWS.d/next/Core and Builtins/2022-11-08-17-47-10.gh-issue-99254.RSvyFt.rst new file mode 100644 index 0000000..e3adbcc --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2022-11-08-17-47-10.gh-issue-99254.RSvyFt.rst @@ -0,0 +1 @@ +The compiler now removes all unused constants from code objects (except the first one, which may be a docstring). diff --git a/Python/compile.c b/Python/compile.c index c71563f..01ab7ef 100644 --- a/Python/compile.c +++ b/Python/compile.c @@ -8472,7 +8472,7 @@ static int optimize_cfg(cfg_builder *g, PyObject *consts, PyObject *const_cache); static int -trim_unused_consts(basicblock *entryblock, PyObject *consts); +remove_unused_consts(basicblock *entryblock, PyObject *consts); /* Duplicates exit BBs, so that line numbers can be propagated to them */ static int @@ -8813,6 +8813,9 @@ assemble(struct compiler *c, int addNone) if (add_checks_for_loads_of_uninitialized_variables(g->g_entryblock, c) < 0) { goto error; } + if (remove_unused_consts(g->g_entryblock, consts)) { + goto error; + } /** line numbers (TODO: move this before optimization stage) */ if (duplicate_exits_without_lineno(g) < 0) { @@ -8844,10 +8847,6 @@ assemble(struct compiler *c, int addNone) /* Can't modify the bytecode after computing jump offsets. */ assemble_jump_offsets(g->g_entryblock); - if (trim_unused_consts(g->g_entryblock, consts)) { - goto error; - } - /* Create assembler */ if (!assemble_init(&a, c->u->u_firstlineno)) goto error; @@ -9706,32 +9705,106 @@ optimize_cfg(cfg_builder *g, PyObject *consts, PyObject *const_cache) return 0; } -// Remove trailing unused constants. + static int -trim_unused_consts(basicblock *entryblock, PyObject *consts) +remove_unused_consts(basicblock *entryblock, PyObject *consts) { assert(PyList_CheckExact(consts)); + Py_ssize_t nconsts = PyList_GET_SIZE(consts); + if (nconsts == 0) { + return 0; /* nothing to do */ + } + Py_ssize_t *index_map = NULL; + Py_ssize_t *reverse_index_map = NULL; + int err = 1; + + index_map = PyMem_Malloc(nconsts * sizeof(Py_ssize_t)); + if (index_map == NULL) { + goto end; + } + for (Py_ssize_t i = 1; i < nconsts; i++) { + index_map[i] = -1; + } // The first constant may be docstring; keep it always. - int max_const_index = 0; + index_map[0] = 0; + + /* mark used consts */ for (basicblock *b = entryblock; b != NULL; b = b->b_next) { for (int i = 0; i < b->b_iused; i++) { - if ((b->b_instr[i].i_opcode == LOAD_CONST || - b->b_instr[i].i_opcode == KW_NAMES) && - b->b_instr[i].i_oparg > max_const_index) { - max_const_index = b->b_instr[i].i_oparg; + if (b->b_instr[i].i_opcode == LOAD_CONST || + b->b_instr[i].i_opcode == KW_NAMES) { + + int index = b->b_instr[i].i_oparg; + index_map[index] = index; } } } - if (max_const_index+1 < PyList_GET_SIZE(consts)) { - //fprintf(stderr, "removing trailing consts: max=%d, size=%d\n", - // max_const_index, (int)PyList_GET_SIZE(consts)); - if (PyList_SetSlice(consts, max_const_index+1, - PyList_GET_SIZE(consts), NULL) < 0) { - return 1; + /* now index_map[i] == i if consts[i] is used, -1 otherwise */ + + /* condense consts */ + Py_ssize_t n_used_consts = 0; + for (int i = 0; i < nconsts; i++) { + if (index_map[i] != -1) { + assert(index_map[i] == i); + index_map[n_used_consts++] = index_map[i]; } } - return 0; + if (n_used_consts == nconsts) { + /* nothing to do */ + err = 0; + goto end; + } + + /* move all used consts to the beginning of the consts list */ + assert(n_used_consts < nconsts); + for (Py_ssize_t i = 0; i < n_used_consts; i++) { + Py_ssize_t old_index = index_map[i]; + assert(i <= old_index && old_index < nconsts); + if (i != old_index) { + PyObject *value = PyList_GET_ITEM(consts, index_map[i]); + assert(value != NULL); + PyList_SetItem(consts, i, Py_NewRef(value)); + } + } + + /* truncate the consts list at its new size */ + if (PyList_SetSlice(consts, n_used_consts, nconsts, NULL) < 0) { + goto end; + } + + /* adjust const indices in the bytecode */ + reverse_index_map = PyMem_Malloc(nconsts * sizeof(Py_ssize_t)); + if (reverse_index_map == NULL) { + goto end; + } + for (Py_ssize_t i = 0; i < nconsts; i++) { + reverse_index_map[i] = -1; + } + for (Py_ssize_t i = 0; i < n_used_consts; i++) { + assert(index_map[i] != -1); + assert(reverse_index_map[index_map[i]] == -1); + reverse_index_map[index_map[i]] = i; + } + + for (basicblock *b = entryblock; b != NULL; b = b->b_next) { + for (int i = 0; i < b->b_iused; i++) { + if (b->b_instr[i].i_opcode == LOAD_CONST || + b->b_instr[i].i_opcode == KW_NAMES) { + + int index = b->b_instr[i].i_oparg; + assert(reverse_index_map[index] >= 0); + assert(reverse_index_map[index] < n_used_consts); + b->b_instr[i].i_oparg = (int)reverse_index_map[index]; + } + } + } + + err = 0; +end: + PyMem_Free(index_map); + PyMem_Free(reverse_index_map); + return err; } static inline int |