summaryrefslogtreecommitdiffstats
path: root/Python/peephole.c
diff options
context:
space:
mode:
authorJeffrey Yasskin <jyasskin@gmail.com>2009-02-25 02:25:04 (GMT)
committerJeffrey Yasskin <jyasskin@gmail.com>2009-02-25 02:25:04 (GMT)
commit9de7ec7868554e92700e4bda6c5f9d8fcad634dc (patch)
treece9c4c821286769b6057d94ea20e337ea726f60f /Python/peephole.c
parent0a68b01d649168f096fddd3cd8d433f2b8b6bb9e (diff)
downloadcpython-9de7ec7868554e92700e4bda6c5f9d8fcad634dc.zip
cpython-9de7ec7868554e92700e4bda6c5f9d8fcad634dc.tar.gz
cpython-9de7ec7868554e92700e4bda6c5f9d8fcad634dc.tar.bz2
http://bugs.python.org/issue4715
This patch by Antoine Pitrou optimizes the bytecode for conditional branches by merging the following "POP_TOP" instruction into the conditional jump. For example, the list comprehension "[x for x in l if not x]" produced the following bytecode: 1 0 BUILD_LIST 0 3 LOAD_FAST 0 (.0) >> 6 FOR_ITER 23 (to 32) 9 STORE_FAST 1 (x) 12 LOAD_FAST 1 (x) 15 JUMP_IF_TRUE 10 (to 28) 18 POP_TOP 19 LOAD_FAST 1 (x) 22 LIST_APPEND 2 25 JUMP_ABSOLUTE 6 >> 28 POP_TOP 29 JUMP_ABSOLUTE 6 >> 32 RETURN_VALUE but after the patch it produces the following bytecode: 1 0 BUILD_LIST 0 3 LOAD_FAST 0 (.0) >> 6 FOR_ITER 18 (to 27) 9 STORE_FAST 1 (x) 12 LOAD_FAST 1 (x) 15 POP_JUMP_IF_TRUE 6 18 LOAD_FAST 1 (x) 21 LIST_APPEND 2 24 JUMP_ABSOLUTE 6 >> 27 RETURN_VALUE Notice that not only the code is shorter, but the conditional jump (POP_JUMP_IF_TRUE) jumps right to the start of the loop instead of going through the JUMP_ABSOLUTE at the end. "continue" statements are helped similarly. Furthermore, the old jump opcodes (JUMP_IF_FALSE, JUMP_IF_TRUE) have been replaced by two new opcodes: - JUMP_IF_TRUE_OR_POP, which jumps if true and pops otherwise - JUMP_IF_FALSE_OR_POP, which jumps if false and pops otherwise
Diffstat (limited to 'Python/peephole.c')
-rw-r--r--Python/peephole.c102
1 files changed, 58 insertions, 44 deletions
diff --git a/Python/peephole.c b/Python/peephole.c
index d31c89b..c413fc8 100644
--- a/Python/peephole.c
+++ b/Python/peephole.c
@@ -13,7 +13,12 @@
#define GETARG(arr, i) ((int)((arr[i+2]<<8) + arr[i+1]))
#define UNCONDITIONAL_JUMP(op) (op==JUMP_ABSOLUTE || op==JUMP_FORWARD)
-#define ABSOLUTE_JUMP(op) (op==JUMP_ABSOLUTE || op==CONTINUE_LOOP)
+#define CONDITIONAL_JUMP(op) (op==POP_JUMP_IF_FALSE || op==POP_JUMP_IF_TRUE \
+ || op==JUMP_IF_FALSE_OR_POP || op==JUMP_IF_TRUE_OR_POP)
+#define ABSOLUTE_JUMP(op) (op==JUMP_ABSOLUTE || op==CONTINUE_LOOP \
+ || op==POP_JUMP_IF_FALSE || op==POP_JUMP_IF_TRUE \
+ || op==JUMP_IF_FALSE_OR_POP || op==JUMP_IF_TRUE_OR_POP)
+#define JUMPS_ON_TRUE(op) (op==POP_JUMP_IF_TRUE || op==JUMP_IF_TRUE_OR_POP)
#define GETJUMPTGT(arr, i) (GETARG(arr,i) + (ABSOLUTE_JUMP(arr[i]) ? 0 : i+3))
#define SETARG(arr, i, val) arr[i+2] = val>>8; arr[i+1] = val & 255
#define CODESIZE(op) (HAS_ARG(op) ? 3 : 1)
@@ -237,8 +242,10 @@ markblocks(unsigned char *code, Py_ssize_t len)
switch (opcode) {
case FOR_ITER:
case JUMP_FORWARD:
- case JUMP_IF_FALSE:
- case JUMP_IF_TRUE:
+ case JUMP_IF_FALSE_OR_POP:
+ case JUMP_IF_TRUE_OR_POP:
+ case POP_JUMP_IF_FALSE:
+ case POP_JUMP_IF_TRUE:
case JUMP_ABSOLUTE:
case CONTINUE_LOOP:
case SETUP_LOOP:
@@ -363,29 +370,24 @@ PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names,
assert(PyList_Check(consts));
for (i=0 ; i<codelen ; i += CODESIZE(codestr[i])) {
+ reoptimize_current:
opcode = codestr[i];
lastlc = cumlc;
cumlc = 0;
switch (opcode) {
-
- /* Replace UNARY_NOT JUMP_IF_FALSE POP_TOP with
- with JUMP_IF_TRUE POP_TOP */
+ /* Replace UNARY_NOT POP_JUMP_IF_FALSE
+ with POP_JUMP_IF_TRUE */
case UNARY_NOT:
- if (codestr[i+1] != JUMP_IF_FALSE ||
- codestr[i+4] != POP_TOP ||
- !ISBASICBLOCK(blocks,i,5))
- continue;
- tgt = GETJUMPTGT(codestr, (i+1));
- if (codestr[tgt] != POP_TOP)
+ if (codestr[i+1] != POP_JUMP_IF_FALSE
+ || !ISBASICBLOCK(blocks,i,4))
continue;
- j = GETARG(codestr, i+1) + 1;
- codestr[i] = JUMP_IF_TRUE;
+ j = GETARG(codestr, i+1);
+ codestr[i] = POP_JUMP_IF_TRUE;
SETARG(codestr, i, j);
- codestr[i+3] = POP_TOP;
- codestr[i+4] = NOP;
- break;
+ codestr[i+3] = NOP;
+ goto reoptimize_current;
/* not a is b --> a is not b
not a in b --> a not in b
@@ -417,29 +419,19 @@ PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names,
break;
/* Skip over LOAD_CONST trueconst
- JUMP_IF_FALSE xx POP_TOP */
+ POP_JUMP_IF_FALSE xx. This improves
+ "while 1" performance. */
case LOAD_CONST:
cumlc = lastlc + 1;
j = GETARG(codestr, i);
- if (codestr[i+3] != JUMP_IF_FALSE ||
- codestr[i+6] != POP_TOP ||
+ if (codestr[i+3] != POP_JUMP_IF_FALSE ||
!ISBASICBLOCK(blocks,i,7) ||
!PyObject_IsTrue(PyList_GET_ITEM(consts, j)))
continue;
- memset(codestr+i, NOP, 7);
+ memset(codestr+i, NOP, 6);
cumlc = 0;
break;
- /* Replace POP_TOP JUMP_FORWARD 1 POP_TOP
- with NOP NOP NOP NOP POP_TOP. */
- case POP_TOP:
- if (UNCONDITIONAL_JUMP(codestr[i+1]) &&
- GETJUMPTGT(codestr, i+1) == i+5 &&
- codestr[i+4] == POP_TOP &&
- ISBASICBLOCK(blocks,i,4))
- memset(codestr+i, NOP, 4);
- break;
-
/* Try to fold tuples of constants (includes a case for lists
which are only used for "in" and "not in" tests).
Skip over BUILD_SEQN 1 UNPACK_SEQN 1.
@@ -524,27 +516,47 @@ PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names,
"if a or b:"
"a and b or c"
"(a and b) and c"
- x:JUMP_IF_FALSE y y:JUMP_IF_FALSE z --> x:JUMP_IF_FALSE z
- x:JUMP_IF_FALSE y y:JUMP_IF_TRUE z --> x:JUMP_IF_FALSE y+3
+ x:POP_OR_JUMP y y:POP_OR_JUMP z --> x:POP_OR_JUMP z
+ x:POP_OR_JUMP y y:JUMP_OR_POP z --> x:POP_JUMP_IF_FALSE y+3
where y+3 is the instruction following the second test.
*/
- case JUMP_IF_FALSE:
- case JUMP_IF_TRUE:
+ case JUMP_IF_FALSE_OR_POP:
+ case JUMP_IF_TRUE_OR_POP:
tgt = GETJUMPTGT(codestr, i);
j = codestr[tgt];
- if (j == JUMP_IF_FALSE || j == JUMP_IF_TRUE) {
- if (j == opcode) {
- tgttgt = GETJUMPTGT(codestr, tgt) - i - 3;
+ if (CONDITIONAL_JUMP(j)) {
+ /* NOTE: all possible jumps here are
+ absolute! */
+ if (JUMPS_ON_TRUE(j) == JUMPS_ON_TRUE(opcode)) {
+ /* The second jump will be
+ taken iff the first is. */
+ tgttgt = GETJUMPTGT(codestr, tgt);
+ /* The current opcode inherits
+ its target's stack behaviour */
+ codestr[i] = j;
SETARG(codestr, i, tgttgt);
+ goto reoptimize_current;
} else {
- tgt -= i;
- SETARG(codestr, i, tgt);
+ /* The second jump is not taken
+ if the first is (so jump past
+ it), and all conditional
+ jumps pop their argument when
+ they're not taken (so change
+ the first jump to pop its
+ argument when it's taken). */
+ if (JUMPS_ON_TRUE(opcode))
+ codestr[i] = POP_JUMP_IF_TRUE;
+ else
+ codestr[i] = POP_JUMP_IF_FALSE;
+ SETARG(codestr, i, (tgt + 3));
+ goto reoptimize_current;
}
- break;
}
- /* Intentional fallthrough */
+ /* Intentional fallthrough */
/* Replace jumps to unconditional jumps */
+ case POP_JUMP_IF_FALSE:
+ case POP_JUMP_IF_TRUE:
case FOR_ITER:
case JUMP_FORWARD:
case JUMP_ABSOLUTE:
@@ -621,14 +633,16 @@ PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names,
case JUMP_ABSOLUTE:
case CONTINUE_LOOP:
+ case POP_JUMP_IF_FALSE:
+ case POP_JUMP_IF_TRUE:
+ case JUMP_IF_FALSE_OR_POP:
+ case JUMP_IF_TRUE_OR_POP:
j = addrmap[GETARG(codestr, i)];
SETARG(codestr, i, j);
break;
case FOR_ITER:
case JUMP_FORWARD:
- case JUMP_IF_FALSE:
- case JUMP_IF_TRUE:
case SETUP_LOOP:
case SETUP_EXCEPT:
case SETUP_FINALLY: