diff options
author | Jeffrey Yasskin <jyasskin@gmail.com> | 2009-02-25 02:25:04 (GMT) |
---|---|---|
committer | Jeffrey Yasskin <jyasskin@gmail.com> | 2009-02-25 02:25:04 (GMT) |
commit | 9de7ec7868554e92700e4bda6c5f9d8fcad634dc (patch) | |
tree | ce9c4c821286769b6057d94ea20e337ea726f60f /Python/peephole.c | |
parent | 0a68b01d649168f096fddd3cd8d433f2b8b6bb9e (diff) | |
download | cpython-9de7ec7868554e92700e4bda6c5f9d8fcad634dc.zip cpython-9de7ec7868554e92700e4bda6c5f9d8fcad634dc.tar.gz cpython-9de7ec7868554e92700e4bda6c5f9d8fcad634dc.tar.bz2 |
http://bugs.python.org/issue4715
This patch by Antoine Pitrou optimizes the bytecode for conditional branches by
merging the following "POP_TOP" instruction into the conditional jump. For
example, the list comprehension "[x for x in l if not x]" produced the
following bytecode:
1 0 BUILD_LIST 0
3 LOAD_FAST 0 (.0)
>> 6 FOR_ITER 23 (to 32)
9 STORE_FAST 1 (x)
12 LOAD_FAST 1 (x)
15 JUMP_IF_TRUE 10 (to 28)
18 POP_TOP
19 LOAD_FAST 1 (x)
22 LIST_APPEND 2
25 JUMP_ABSOLUTE 6
>> 28 POP_TOP
29 JUMP_ABSOLUTE 6
>> 32 RETURN_VALUE
but after the patch it produces the following bytecode:
1 0 BUILD_LIST 0
3 LOAD_FAST 0 (.0)
>> 6 FOR_ITER 18 (to 27)
9 STORE_FAST 1 (x)
12 LOAD_FAST 1 (x)
15 POP_JUMP_IF_TRUE 6
18 LOAD_FAST 1 (x)
21 LIST_APPEND 2
24 JUMP_ABSOLUTE 6
>> 27 RETURN_VALUE
Notice that not only the code is shorter, but the conditional jump
(POP_JUMP_IF_TRUE) jumps right to the start of the loop instead of going through
the JUMP_ABSOLUTE at the end. "continue" statements are helped
similarly.
Furthermore, the old jump opcodes (JUMP_IF_FALSE, JUMP_IF_TRUE) have been
replaced by two new opcodes:
- JUMP_IF_TRUE_OR_POP, which jumps if true and pops otherwise
- JUMP_IF_FALSE_OR_POP, which jumps if false and pops otherwise
Diffstat (limited to 'Python/peephole.c')
-rw-r--r-- | Python/peephole.c | 102 |
1 files changed, 58 insertions, 44 deletions
diff --git a/Python/peephole.c b/Python/peephole.c index d31c89b..c413fc8 100644 --- a/Python/peephole.c +++ b/Python/peephole.c @@ -13,7 +13,12 @@ #define GETARG(arr, i) ((int)((arr[i+2]<<8) + arr[i+1])) #define UNCONDITIONAL_JUMP(op) (op==JUMP_ABSOLUTE || op==JUMP_FORWARD) -#define ABSOLUTE_JUMP(op) (op==JUMP_ABSOLUTE || op==CONTINUE_LOOP) +#define CONDITIONAL_JUMP(op) (op==POP_JUMP_IF_FALSE || op==POP_JUMP_IF_TRUE \ + || op==JUMP_IF_FALSE_OR_POP || op==JUMP_IF_TRUE_OR_POP) +#define ABSOLUTE_JUMP(op) (op==JUMP_ABSOLUTE || op==CONTINUE_LOOP \ + || op==POP_JUMP_IF_FALSE || op==POP_JUMP_IF_TRUE \ + || op==JUMP_IF_FALSE_OR_POP || op==JUMP_IF_TRUE_OR_POP) +#define JUMPS_ON_TRUE(op) (op==POP_JUMP_IF_TRUE || op==JUMP_IF_TRUE_OR_POP) #define GETJUMPTGT(arr, i) (GETARG(arr,i) + (ABSOLUTE_JUMP(arr[i]) ? 0 : i+3)) #define SETARG(arr, i, val) arr[i+2] = val>>8; arr[i+1] = val & 255 #define CODESIZE(op) (HAS_ARG(op) ? 3 : 1) @@ -237,8 +242,10 @@ markblocks(unsigned char *code, Py_ssize_t len) switch (opcode) { case FOR_ITER: case JUMP_FORWARD: - case JUMP_IF_FALSE: - case JUMP_IF_TRUE: + case JUMP_IF_FALSE_OR_POP: + case JUMP_IF_TRUE_OR_POP: + case POP_JUMP_IF_FALSE: + case POP_JUMP_IF_TRUE: case JUMP_ABSOLUTE: case CONTINUE_LOOP: case SETUP_LOOP: @@ -363,29 +370,24 @@ PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names, assert(PyList_Check(consts)); for (i=0 ; i<codelen ; i += CODESIZE(codestr[i])) { + reoptimize_current: opcode = codestr[i]; lastlc = cumlc; cumlc = 0; switch (opcode) { - - /* Replace UNARY_NOT JUMP_IF_FALSE POP_TOP with - with JUMP_IF_TRUE POP_TOP */ + /* Replace UNARY_NOT POP_JUMP_IF_FALSE + with POP_JUMP_IF_TRUE */ case UNARY_NOT: - if (codestr[i+1] != JUMP_IF_FALSE || - codestr[i+4] != POP_TOP || - !ISBASICBLOCK(blocks,i,5)) - continue; - tgt = GETJUMPTGT(codestr, (i+1)); - if (codestr[tgt] != POP_TOP) + if (codestr[i+1] != POP_JUMP_IF_FALSE + || !ISBASICBLOCK(blocks,i,4)) continue; - j = GETARG(codestr, i+1) + 1; - codestr[i] = JUMP_IF_TRUE; + j = GETARG(codestr, i+1); + codestr[i] = POP_JUMP_IF_TRUE; SETARG(codestr, i, j); - codestr[i+3] = POP_TOP; - codestr[i+4] = NOP; - break; + codestr[i+3] = NOP; + goto reoptimize_current; /* not a is b --> a is not b not a in b --> a not in b @@ -417,29 +419,19 @@ PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names, break; /* Skip over LOAD_CONST trueconst - JUMP_IF_FALSE xx POP_TOP */ + POP_JUMP_IF_FALSE xx. This improves + "while 1" performance. */ case LOAD_CONST: cumlc = lastlc + 1; j = GETARG(codestr, i); - if (codestr[i+3] != JUMP_IF_FALSE || - codestr[i+6] != POP_TOP || + if (codestr[i+3] != POP_JUMP_IF_FALSE || !ISBASICBLOCK(blocks,i,7) || !PyObject_IsTrue(PyList_GET_ITEM(consts, j))) continue; - memset(codestr+i, NOP, 7); + memset(codestr+i, NOP, 6); cumlc = 0; break; - /* Replace POP_TOP JUMP_FORWARD 1 POP_TOP - with NOP NOP NOP NOP POP_TOP. */ - case POP_TOP: - if (UNCONDITIONAL_JUMP(codestr[i+1]) && - GETJUMPTGT(codestr, i+1) == i+5 && - codestr[i+4] == POP_TOP && - ISBASICBLOCK(blocks,i,4)) - memset(codestr+i, NOP, 4); - break; - /* Try to fold tuples of constants (includes a case for lists which are only used for "in" and "not in" tests). Skip over BUILD_SEQN 1 UNPACK_SEQN 1. @@ -524,27 +516,47 @@ PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names, "if a or b:" "a and b or c" "(a and b) and c" - x:JUMP_IF_FALSE y y:JUMP_IF_FALSE z --> x:JUMP_IF_FALSE z - x:JUMP_IF_FALSE y y:JUMP_IF_TRUE z --> x:JUMP_IF_FALSE y+3 + x:POP_OR_JUMP y y:POP_OR_JUMP z --> x:POP_OR_JUMP z + x:POP_OR_JUMP y y:JUMP_OR_POP z --> x:POP_JUMP_IF_FALSE y+3 where y+3 is the instruction following the second test. */ - case JUMP_IF_FALSE: - case JUMP_IF_TRUE: + case JUMP_IF_FALSE_OR_POP: + case JUMP_IF_TRUE_OR_POP: tgt = GETJUMPTGT(codestr, i); j = codestr[tgt]; - if (j == JUMP_IF_FALSE || j == JUMP_IF_TRUE) { - if (j == opcode) { - tgttgt = GETJUMPTGT(codestr, tgt) - i - 3; + if (CONDITIONAL_JUMP(j)) { + /* NOTE: all possible jumps here are + absolute! */ + if (JUMPS_ON_TRUE(j) == JUMPS_ON_TRUE(opcode)) { + /* The second jump will be + taken iff the first is. */ + tgttgt = GETJUMPTGT(codestr, tgt); + /* The current opcode inherits + its target's stack behaviour */ + codestr[i] = j; SETARG(codestr, i, tgttgt); + goto reoptimize_current; } else { - tgt -= i; - SETARG(codestr, i, tgt); + /* The second jump is not taken + if the first is (so jump past + it), and all conditional + jumps pop their argument when + they're not taken (so change + the first jump to pop its + argument when it's taken). */ + if (JUMPS_ON_TRUE(opcode)) + codestr[i] = POP_JUMP_IF_TRUE; + else + codestr[i] = POP_JUMP_IF_FALSE; + SETARG(codestr, i, (tgt + 3)); + goto reoptimize_current; } - break; } - /* Intentional fallthrough */ + /* Intentional fallthrough */ /* Replace jumps to unconditional jumps */ + case POP_JUMP_IF_FALSE: + case POP_JUMP_IF_TRUE: case FOR_ITER: case JUMP_FORWARD: case JUMP_ABSOLUTE: @@ -621,14 +633,16 @@ PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names, case JUMP_ABSOLUTE: case CONTINUE_LOOP: + case POP_JUMP_IF_FALSE: + case POP_JUMP_IF_TRUE: + case JUMP_IF_FALSE_OR_POP: + case JUMP_IF_TRUE_OR_POP: j = addrmap[GETARG(codestr, i)]; SETARG(codestr, i, j); break; case FOR_ITER: case JUMP_FORWARD: - case JUMP_IF_FALSE: - case JUMP_IF_TRUE: case SETUP_LOOP: case SETUP_EXCEPT: case SETUP_FINALLY: |