summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIrit Katriel <1055913+iritkatriel@users.noreply.github.com>2022-11-11 10:53:43 (GMT)
committerGitHub <noreply@github.com>2022-11-11 10:53:43 (GMT)
commit3dd6ee2c0022cb49e5cb8862a569bdd35b6a72bc (patch)
tree088c87ecddc5a792051c109e1cef49d3ba44d92a
parentfaf7dfa656bd52959156fed39a4c680b2b13e032 (diff)
downloadcpython-3dd6ee2c0022cb49e5cb8862a569bdd35b6a72bc.zip
cpython-3dd6ee2c0022cb49e5cb8862a569bdd35b6a72bc.tar.gz
cpython-3dd6ee2c0022cb49e5cb8862a569bdd35b6a72bc.tar.bz2
gh-99254: remove all unused consts from code objects (GH-99255)
-rw-r--r--Lib/importlib/_bootstrap_external.py3
-rw-r--r--Lib/test/test_compile.py38
-rw-r--r--Lib/test/test_dis.py22
-rw-r--r--Misc/NEWS.d/next/Core and Builtins/2022-11-08-17-47-10.gh-issue-99254.RSvyFt.rst1
-rw-r--r--Python/compile.c111
5 files changed, 142 insertions, 33 deletions
diff --git a/Lib/importlib/_bootstrap_external.py b/Lib/importlib/_bootstrap_external.py
index 8cbc962..f4dbbeb 100644
--- a/Lib/importlib/_bootstrap_external.py
+++ b/Lib/importlib/_bootstrap_external.py
@@ -425,6 +425,7 @@ _code_type = type(_write_atomic.__code__)
# Python 3.12a1 3509 (Conditional jumps only jump forward)
# Python 3.12a1 3510 (FOR_ITER leaves iterator on the stack)
# Python 3.12a1 3511 (Add STOPITERATION_ERROR instruction)
+# Python 3.12a1 3512 (Remove all unused consts from code objects)
# Python 3.13 will start with 3550
@@ -437,7 +438,7 @@ _code_type = type(_write_atomic.__code__)
# Whenever MAGIC_NUMBER is changed, the ranges in the magic_values array
# in PC/launcher.c must also be updated.
-MAGIC_NUMBER = (3511).to_bytes(2, 'little') + b'\r\n'
+MAGIC_NUMBER = (3512).to_bytes(2, 'little') + b'\r\n'
_RAW_MAGIC_NUMBER = int.from_bytes(MAGIC_NUMBER, 'little') # For import.c
diff --git a/Lib/test/test_compile.py b/Lib/test/test_compile.py
index f7847a3..a14509a 100644
--- a/Lib/test/test_compile.py
+++ b/Lib/test/test_compile.py
@@ -670,7 +670,7 @@ if 1:
self.assertIs(f1.__code__.co_linetable, f2.__code__.co_linetable)
@support.cpython_only
- def test_strip_unused_consts(self):
+ def test_remove_unused_consts(self):
def f():
"docstring"
if True:
@@ -679,7 +679,41 @@ if 1:
return "unused"
self.assertEqual(f.__code__.co_consts,
- ("docstring", True, "used"))
+ ("docstring", "used"))
+
+ @support.cpython_only
+ def test_remove_unused_consts_no_docstring(self):
+ # the first item (None for no docstring in this case) is
+ # always retained.
+ def f():
+ if True:
+ return "used"
+ else:
+ return "unused"
+
+ self.assertEqual(f.__code__.co_consts,
+ (None, "used"))
+
+ @support.cpython_only
+ def test_remove_unused_consts_extended_args(self):
+ N = 1000
+ code = ["def f():\n"]
+ code.append("\ts = ''\n")
+ code.append("\tfor i in range(1):\n")
+ for i in range(N):
+ code.append(f"\t\tif True: s += 't{i}'\n")
+ code.append(f"\t\tif False: s += 'f{i}'\n")
+ code.append("\treturn s\n")
+
+ code = "".join(code)
+ g = {}
+ eval(compile(code, "file.py", "exec"), g)
+ exec(code, g)
+ f = g['f']
+ expected = tuple([None, '', 1] + [f't{i}' for i in range(N)])
+ self.assertEqual(f.__code__.co_consts, expected)
+ expected = "".join(expected[3:])
+ self.assertEqual(expected, f())
# Stripping unused constants is not a strict requirement for the
# Python semantics, it's a more an implementation detail.
diff --git a/Lib/test/test_dis.py b/Lib/test/test_dis.py
index 5640bf2..950af3c 100644
--- a/Lib/test/test_dis.py
+++ b/Lib/test/test_dis.py
@@ -168,13 +168,13 @@ dis_bug1333982 = """\
%3d RESUME 0
%3d LOAD_ASSERTION_ERROR
- LOAD_CONST 2 (<code object <listcomp> at 0x..., file "%s", line %d>)
+ LOAD_CONST 1 (<code object <listcomp> at 0x..., file "%s", line %d>)
MAKE_FUNCTION 0
LOAD_FAST 0 (x)
GET_ITER
CALL 0
-%3d LOAD_CONST 3 (1)
+%3d LOAD_CONST 2 (1)
%3d BINARY_OP 0 (+)
CALL 0
@@ -1446,9 +1446,9 @@ def jumpy():
# End fodder for opinfo generation tests
expected_outer_line = 1
_line_offset = outer.__code__.co_firstlineno - 1
-code_object_f = outer.__code__.co_consts[3]
+code_object_f = outer.__code__.co_consts[1]
expected_f_line = code_object_f.co_firstlineno - _line_offset
-code_object_inner = code_object_f.co_consts[3]
+code_object_inner = code_object_f.co_consts[1]
expected_inner_line = code_object_inner.co_firstlineno - _line_offset
expected_jumpy_line = 1
@@ -1485,21 +1485,21 @@ expected_opinfo_outer = [
Instruction(opname='MAKE_CELL', opcode=135, arg=0, argval='a', argrepr='a', offset=0, starts_line=None, is_jump_target=False, positions=None),
Instruction(opname='MAKE_CELL', opcode=135, arg=1, argval='b', argrepr='b', offset=2, starts_line=None, is_jump_target=False, positions=None),
Instruction(opname='RESUME', opcode=151, arg=0, argval=0, argrepr='', offset=4, starts_line=1, is_jump_target=False, positions=None),
- Instruction(opname='LOAD_CONST', opcode=100, arg=7, argval=(3, 4), argrepr='(3, 4)', offset=6, starts_line=2, is_jump_target=False, positions=None),
+ Instruction(opname='LOAD_CONST', opcode=100, arg=5, argval=(3, 4), argrepr='(3, 4)', offset=6, starts_line=2, is_jump_target=False, positions=None),
Instruction(opname='LOAD_CLOSURE', opcode=136, arg=0, argval='a', argrepr='a', offset=8, starts_line=None, is_jump_target=False, positions=None),
Instruction(opname='LOAD_CLOSURE', opcode=136, arg=1, argval='b', argrepr='b', offset=10, starts_line=None, is_jump_target=False, positions=None),
Instruction(opname='BUILD_TUPLE', opcode=102, arg=2, argval=2, argrepr='', offset=12, starts_line=None, is_jump_target=False, positions=None),
- Instruction(opname='LOAD_CONST', opcode=100, arg=3, argval=code_object_f, argrepr=repr(code_object_f), offset=14, starts_line=None, is_jump_target=False, positions=None),
+ Instruction(opname='LOAD_CONST', opcode=100, arg=1, argval=code_object_f, argrepr=repr(code_object_f), offset=14, starts_line=None, is_jump_target=False, positions=None),
Instruction(opname='MAKE_FUNCTION', opcode=132, arg=9, argval=9, argrepr='defaults, closure', offset=16, starts_line=None, is_jump_target=False, positions=None),
Instruction(opname='STORE_FAST', opcode=125, arg=2, argval='f', argrepr='f', offset=18, starts_line=None, is_jump_target=False, positions=None),
Instruction(opname='LOAD_GLOBAL', opcode=116, arg=1, argval='print', argrepr='NULL + print', offset=20, starts_line=7, is_jump_target=False, positions=None),
Instruction(opname='LOAD_DEREF', opcode=137, arg=0, argval='a', argrepr='a', offset=32, starts_line=None, is_jump_target=False, positions=None),
Instruction(opname='LOAD_DEREF', opcode=137, arg=1, argval='b', argrepr='b', offset=34, starts_line=None, is_jump_target=False, positions=None),
- Instruction(opname='LOAD_CONST', opcode=100, arg=4, argval='', argrepr="''", offset=36, starts_line=None, is_jump_target=False, positions=None),
- Instruction(opname='LOAD_CONST', opcode=100, arg=5, argval=1, argrepr='1', offset=38, starts_line=None, is_jump_target=False, positions=None),
+ Instruction(opname='LOAD_CONST', opcode=100, arg=2, argval='', argrepr="''", offset=36, starts_line=None, is_jump_target=False, positions=None),
+ Instruction(opname='LOAD_CONST', opcode=100, arg=3, argval=1, argrepr='1', offset=38, starts_line=None, is_jump_target=False, positions=None),
Instruction(opname='BUILD_LIST', opcode=103, arg=0, argval=0, argrepr='', offset=40, starts_line=None, is_jump_target=False, positions=None),
Instruction(opname='BUILD_MAP', opcode=105, arg=0, argval=0, argrepr='', offset=42, starts_line=None, is_jump_target=False, positions=None),
- Instruction(opname='LOAD_CONST', opcode=100, arg=6, argval='Hello world!', argrepr="'Hello world!'", offset=44, starts_line=None, is_jump_target=False, positions=None),
+ Instruction(opname='LOAD_CONST', opcode=100, arg=4, argval='Hello world!', argrepr="'Hello world!'", offset=44, starts_line=None, is_jump_target=False, positions=None),
Instruction(opname='CALL', opcode=171, arg=7, argval=7, argrepr='', offset=46, starts_line=None, is_jump_target=False, positions=None),
Instruction(opname='POP_TOP', opcode=1, arg=None, argval=None, argrepr='', offset=56, starts_line=None, is_jump_target=False, positions=None),
Instruction(opname='LOAD_FAST', opcode=124, arg=2, argval='f', argrepr='f', offset=58, starts_line=8, is_jump_target=False, positions=None),
@@ -1511,13 +1511,13 @@ expected_opinfo_f = [
Instruction(opname='MAKE_CELL', opcode=135, arg=0, argval='c', argrepr='c', offset=2, starts_line=None, is_jump_target=False, positions=None),
Instruction(opname='MAKE_CELL', opcode=135, arg=1, argval='d', argrepr='d', offset=4, starts_line=None, is_jump_target=False, positions=None),
Instruction(opname='RESUME', opcode=151, arg=0, argval=0, argrepr='', offset=6, starts_line=2, is_jump_target=False, positions=None),
- Instruction(opname='LOAD_CONST', opcode=100, arg=4, argval=(5, 6), argrepr='(5, 6)', offset=8, starts_line=3, is_jump_target=False, positions=None),
+ Instruction(opname='LOAD_CONST', opcode=100, arg=2, argval=(5, 6), argrepr='(5, 6)', offset=8, starts_line=3, is_jump_target=False, positions=None),
Instruction(opname='LOAD_CLOSURE', opcode=136, arg=3, argval='a', argrepr='a', offset=10, starts_line=None, is_jump_target=False, positions=None),
Instruction(opname='LOAD_CLOSURE', opcode=136, arg=4, argval='b', argrepr='b', offset=12, starts_line=None, is_jump_target=False, positions=None),
Instruction(opname='LOAD_CLOSURE', opcode=136, arg=0, argval='c', argrepr='c', offset=14, starts_line=None, is_jump_target=False, positions=None),
Instruction(opname='LOAD_CLOSURE', opcode=136, arg=1, argval='d', argrepr='d', offset=16, starts_line=None, is_jump_target=False, positions=None),
Instruction(opname='BUILD_TUPLE', opcode=102, arg=4, argval=4, argrepr='', offset=18, starts_line=None, is_jump_target=False, positions=None),
- Instruction(opname='LOAD_CONST', opcode=100, arg=3, argval=code_object_inner, argrepr=repr(code_object_inner), offset=20, starts_line=None, is_jump_target=False, positions=None),
+ Instruction(opname='LOAD_CONST', opcode=100, arg=1, argval=code_object_inner, argrepr=repr(code_object_inner), offset=20, starts_line=None, is_jump_target=False, positions=None),
Instruction(opname='MAKE_FUNCTION', opcode=132, arg=9, argval=9, argrepr='defaults, closure', offset=22, starts_line=None, is_jump_target=False, positions=None),
Instruction(opname='STORE_FAST', opcode=125, arg=2, argval='inner', argrepr='inner', offset=24, starts_line=None, is_jump_target=False, positions=None),
Instruction(opname='LOAD_GLOBAL', opcode=116, arg=1, argval='print', argrepr='NULL + print', offset=26, starts_line=5, is_jump_target=False, positions=None),
diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-11-08-17-47-10.gh-issue-99254.RSvyFt.rst b/Misc/NEWS.d/next/Core and Builtins/2022-11-08-17-47-10.gh-issue-99254.RSvyFt.rst
new file mode 100644
index 0000000..e3adbcc
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2022-11-08-17-47-10.gh-issue-99254.RSvyFt.rst
@@ -0,0 +1 @@
+The compiler now removes all unused constants from code objects (except the first one, which may be a docstring).
diff --git a/Python/compile.c b/Python/compile.c
index c71563f..01ab7ef 100644
--- a/Python/compile.c
+++ b/Python/compile.c
@@ -8472,7 +8472,7 @@ static int
optimize_cfg(cfg_builder *g, PyObject *consts, PyObject *const_cache);
static int
-trim_unused_consts(basicblock *entryblock, PyObject *consts);
+remove_unused_consts(basicblock *entryblock, PyObject *consts);
/* Duplicates exit BBs, so that line numbers can be propagated to them */
static int
@@ -8813,6 +8813,9 @@ assemble(struct compiler *c, int addNone)
if (add_checks_for_loads_of_uninitialized_variables(g->g_entryblock, c) < 0) {
goto error;
}
+ if (remove_unused_consts(g->g_entryblock, consts)) {
+ goto error;
+ }
/** line numbers (TODO: move this before optimization stage) */
if (duplicate_exits_without_lineno(g) < 0) {
@@ -8844,10 +8847,6 @@ assemble(struct compiler *c, int addNone)
/* Can't modify the bytecode after computing jump offsets. */
assemble_jump_offsets(g->g_entryblock);
- if (trim_unused_consts(g->g_entryblock, consts)) {
- goto error;
- }
-
/* Create assembler */
if (!assemble_init(&a, c->u->u_firstlineno))
goto error;
@@ -9706,32 +9705,106 @@ optimize_cfg(cfg_builder *g, PyObject *consts, PyObject *const_cache)
return 0;
}
-// Remove trailing unused constants.
+
static int
-trim_unused_consts(basicblock *entryblock, PyObject *consts)
+remove_unused_consts(basicblock *entryblock, PyObject *consts)
{
assert(PyList_CheckExact(consts));
+ Py_ssize_t nconsts = PyList_GET_SIZE(consts);
+ if (nconsts == 0) {
+ return 0; /* nothing to do */
+ }
+ Py_ssize_t *index_map = NULL;
+ Py_ssize_t *reverse_index_map = NULL;
+ int err = 1;
+
+ index_map = PyMem_Malloc(nconsts * sizeof(Py_ssize_t));
+ if (index_map == NULL) {
+ goto end;
+ }
+ for (Py_ssize_t i = 1; i < nconsts; i++) {
+ index_map[i] = -1;
+ }
// The first constant may be docstring; keep it always.
- int max_const_index = 0;
+ index_map[0] = 0;
+
+ /* mark used consts */
for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
for (int i = 0; i < b->b_iused; i++) {
- if ((b->b_instr[i].i_opcode == LOAD_CONST ||
- b->b_instr[i].i_opcode == KW_NAMES) &&
- b->b_instr[i].i_oparg > max_const_index) {
- max_const_index = b->b_instr[i].i_oparg;
+ if (b->b_instr[i].i_opcode == LOAD_CONST ||
+ b->b_instr[i].i_opcode == KW_NAMES) {
+
+ int index = b->b_instr[i].i_oparg;
+ index_map[index] = index;
}
}
}
- if (max_const_index+1 < PyList_GET_SIZE(consts)) {
- //fprintf(stderr, "removing trailing consts: max=%d, size=%d\n",
- // max_const_index, (int)PyList_GET_SIZE(consts));
- if (PyList_SetSlice(consts, max_const_index+1,
- PyList_GET_SIZE(consts), NULL) < 0) {
- return 1;
+ /* now index_map[i] == i if consts[i] is used, -1 otherwise */
+
+ /* condense consts */
+ Py_ssize_t n_used_consts = 0;
+ for (int i = 0; i < nconsts; i++) {
+ if (index_map[i] != -1) {
+ assert(index_map[i] == i);
+ index_map[n_used_consts++] = index_map[i];
}
}
- return 0;
+ if (n_used_consts == nconsts) {
+ /* nothing to do */
+ err = 0;
+ goto end;
+ }
+
+ /* move all used consts to the beginning of the consts list */
+ assert(n_used_consts < nconsts);
+ for (Py_ssize_t i = 0; i < n_used_consts; i++) {
+ Py_ssize_t old_index = index_map[i];
+ assert(i <= old_index && old_index < nconsts);
+ if (i != old_index) {
+ PyObject *value = PyList_GET_ITEM(consts, index_map[i]);
+ assert(value != NULL);
+ PyList_SetItem(consts, i, Py_NewRef(value));
+ }
+ }
+
+ /* truncate the consts list at its new size */
+ if (PyList_SetSlice(consts, n_used_consts, nconsts, NULL) < 0) {
+ goto end;
+ }
+
+ /* adjust const indices in the bytecode */
+ reverse_index_map = PyMem_Malloc(nconsts * sizeof(Py_ssize_t));
+ if (reverse_index_map == NULL) {
+ goto end;
+ }
+ for (Py_ssize_t i = 0; i < nconsts; i++) {
+ reverse_index_map[i] = -1;
+ }
+ for (Py_ssize_t i = 0; i < n_used_consts; i++) {
+ assert(index_map[i] != -1);
+ assert(reverse_index_map[index_map[i]] == -1);
+ reverse_index_map[index_map[i]] = i;
+ }
+
+ for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
+ for (int i = 0; i < b->b_iused; i++) {
+ if (b->b_instr[i].i_opcode == LOAD_CONST ||
+ b->b_instr[i].i_opcode == KW_NAMES) {
+
+ int index = b->b_instr[i].i_oparg;
+ assert(reverse_index_map[index] >= 0);
+ assert(reverse_index_map[index] < n_used_consts);
+ b->b_instr[i].i_oparg = (int)reverse_index_map[index];
+ }
+ }
+ }
+
+ err = 0;
+end:
+ PyMem_Free(index_map);
+ PyMem_Free(reverse_index_map);
+ return err;
}
static inline int