diff options
author | Raymond Hettinger <python@rcn.com> | 2003-04-22 06:49:11 (GMT) |
---|---|---|
committer | Raymond Hettinger <python@rcn.com> | 2003-04-22 06:49:11 (GMT) |
commit | 060641d51160f6bf49a049bb677f8412b5a19de3 (patch) | |
tree | 1ad7d568e797a6387b7f0792f5286f901b8f4f32 | |
parent | 0c83348d5c110a6ca706219019e97d5cefe2fddb (diff) | |
download | cpython-060641d51160f6bf49a049bb677f8412b5a19de3.zip cpython-060641d51160f6bf49a049bb677f8412b5a19de3.tar.gz cpython-060641d51160f6bf49a049bb677f8412b5a19de3.tar.bz2 |
Improved the bytecode optimizer.
* Can now test for basic blocks.
* Optimize inverted comparisions.
* Optimize unary_not followed by a conditional jump.
* Added a new opcode, NOP, to keep code size constant.
* Applied NOP to previous transformations where appropriate.
Note, the NOP would not be necessary if other functions were
added to re-target jump addresses and update the co_lnotab mapping.
That would yield slightly faster and cleaner bytecode at the
expense of optimizer simplicity and of keeping it decoupled
from the line-numbering structure.
-rw-r--r-- | Include/opcode.h | 2 | ||||
-rw-r--r-- | Lib/opcode.py | 2 | ||||
-rw-r--r-- | Misc/NEWS | 7 | ||||
-rw-r--r-- | Python/ceval.c | 3 | ||||
-rw-r--r-- | Python/compile.c | 92 |
5 files changed, 97 insertions, 9 deletions
diff --git a/Include/opcode.h b/Include/opcode.h index 2f3dd04..9f7d263 100644 --- a/Include/opcode.h +++ b/Include/opcode.h @@ -14,6 +14,8 @@ extern "C" { #define DUP_TOP 4 #define ROT_FOUR 5 +#define NOP 9 + #define UNARY_POSITIVE 10 #define UNARY_NEGATIVE 11 #define UNARY_NOT 12 diff --git a/Lib/opcode.py b/Lib/opcode.py index cfde5f8..15f92a7 100644 --- a/Lib/opcode.py +++ b/Lib/opcode.py @@ -49,6 +49,8 @@ def_op('ROT_THREE', 3) def_op('DUP_TOP', 4) def_op('ROT_FOUR', 5) +def_op('NOP', 9) + def_op('UNARY_POSITIVE', 10) def_op('UNARY_NEGATIVE', 11) def_op('UNARY_NOT', 12) @@ -294,6 +294,13 @@ Core and builtins value, but according to PEP 237 it really needs to be 1 now. This will be backported to Python 2.2.3 a well. (SF #660455) +- Added several bytecode optimizations. Provides speed-ups to + inverted in/is tests, inverted jumps, while 1 loops, and jumps to + unconditional jumps. + +- Added a new opcode, NOP, which is used in some of the bytecode + transformations. + - int(s, base) sometimes sign-folds hex and oct constants; it only does this when base is 0 and s.strip() starts with a '0'. When the sign is actually folded, as in int("0xffffffff", 0) on a 32-bit diff --git a/Python/ceval.c b/Python/ceval.c index 3ea1bdc..7f8f654 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -873,6 +873,9 @@ eval_frame(PyFrameObject *f) /* case STOP_CODE: this is an error! */ + case NOP: + goto fast_next_opcode; + case LOAD_FAST: x = GETLOCAL(oparg); if (x != NULL) { diff --git a/Python/compile.c b/Python/compile.c index 57f0edb..4afd0eb 100644 --- a/Python/compile.c +++ b/Python/compile.c @@ -328,6 +328,43 @@ intern_strings(PyObject *tuple) #define ABSOLUTE_JUMP(op) (op==JUMP_ABSOLUTE || op==CONTINUE_LOOP) #define GETJUMPTGT(arr, i) (GETARG(arr,i) + (ABSOLUTE_JUMP(arr[i]) ? 0 : i+3)) #define SETARG(arr, i, val) arr[i+2] = val>>8; arr[i+1] = val & 255 +#define CODESIZE(op) (HAS_ARG(op) ? 3 : 1) +#define ISBASICBLOCK(blocks, start, bytes) (blocks[start]==blocks[start+bytes-1]) + +static unsigned int * +markblocks(unsigned char *code, int len) +{ + unsigned int *blocks = PyMem_Malloc(len*sizeof(int)); + int i,j, opcode, oldblock, newblock, blockcnt = 0; + + if (blocks == NULL) + return NULL; + memset(blocks, 0, len*sizeof(int)); + for (i=0 ; i<len ; i+=CODESIZE(opcode)) { + opcode = code[i]; + switch (opcode) { + case FOR_ITER: + case JUMP_FORWARD: + case JUMP_IF_FALSE: + case JUMP_IF_TRUE: + case JUMP_ABSOLUTE: + case CONTINUE_LOOP: + case SETUP_LOOP: + case SETUP_EXCEPT: + case SETUP_FINALLY: + j = GETJUMPTGT(code, i); + oldblock = blocks[j]; + newblock = ++blockcnt; + for (; j<len ; j++) { + if (blocks[j] != (unsigned)oldblock) + break; + blocks[j] = newblock; + } + break; + } + } + return blocks; +} static PyObject * optimize_code(PyObject *code, PyObject* consts) @@ -335,18 +372,24 @@ optimize_code(PyObject *code, PyObject* consts) int i, j, codelen; int tgt, tgttgt, opcode; unsigned char *codestr; + unsigned int *blocks; /* Make a modifiable copy of the code string */ if (!PyString_Check(code)) goto exitUnchanged; codelen = PyString_Size(code); codestr = PyMem_Malloc(codelen); - if (codestr == NULL) + if (codestr == NULL) goto exitUnchanged; codestr = memcpy(codestr, PyString_AS_STRING(code), codelen); + blocks = markblocks(codestr, codelen); + if (blocks == NULL) { + PyMem_Free(codestr); + goto exitUnchanged; + } assert(PyTuple_Check(consts)); - for (i=0 ; i<codelen-7 ; i += HAS_ARG(codestr[i]) ? 3 : 1) { + for (i=0 ; i<codelen ; i += CODESIZE(codestr[i])) { opcode = codestr[i]; switch (opcode) { @@ -363,8 +406,8 @@ optimize_code(PyObject *code, PyObject* consts) SETARG(codestr, i, 4); break; - /* Replace BUILD_SEQN 2 UNPACK_SEQN 2 with ROT2 JMP+2. - Replace BUILD_SEQN 3 UNPACK_SEQN 3 with ROT3 ROT2 JMP+1. + /* Replace BUILD_SEQN 2 UNPACK_SEQN 2 with ROT2 JMP+2 NOP NOP. + Replace BUILD_SEQN 3 UNPACK_SEQN 3 with ROT3 ROT2 JMP+1 NOP. Note, these opcodes occur together only in assignment statements. Accordingly, the unpack opcode is never a jump target. */ @@ -377,8 +420,8 @@ optimize_code(PyObject *code, PyObject* consts) codestr[i] = ROT_TWO; codestr[i+1] = JUMP_FORWARD; SETARG(codestr, i+1, 2); - codestr[i+4] = DUP_TOP; /* Filler codes used as NOPs */ - codestr[i+5] = POP_TOP; + codestr[i+4] = NOP; + codestr[i+5] = NOP; continue; } if (GETARG(codestr, i) == 3 && \ @@ -386,11 +429,41 @@ optimize_code(PyObject *code, PyObject* consts) codestr[i] = ROT_THREE; codestr[i+1] = ROT_TWO; codestr[i+2] = JUMP_FORWARD; - SETARG(codestr, i+2, 1); - codestr[i+5] = DUP_TOP; + SETARG(codestr, i+2, 1); + codestr[i+5] = NOP; } break; + /* Simplify inverted tests. + Must verify that sequence is a basic block because the jump + can itself be a jump target. Also, must verify that *both* + jump alternatives go to a POP_TOP. Otherwise, the code will + expect the stack value to have been inverted. */ + case UNARY_NOT: + if (codestr[i+1] != JUMP_IF_FALSE || \ + codestr[i+4] != POP_TOP || \ + !ISBASICBLOCK(blocks,i,5)) + continue; + tgt = GETJUMPTGT(codestr, (i+1)); + if (codestr[tgt] != POP_TOP) + continue; + codestr[i] = NOP; + codestr[i+1] = JUMP_IF_TRUE; + break; + + /* not a is b --> a is not b + not a in b --> a not in b + not a is not b --> a is b + not a not in b --> a in b */ + case COMPARE_OP: + j = GETARG(codestr, i); + if (codestr[i+3] != UNARY_NOT || j < 6 || \ + j > 9 || !ISBASICBLOCK(blocks,i,4)) + continue; + SETARG(codestr, i, (j^1)); + codestr[i+3] = NOP; + break; + /* Replace jumps to unconditional jumps */ case FOR_ITER: case JUMP_FORWARD: @@ -402,7 +475,7 @@ optimize_code(PyObject *code, PyObject* consts) case SETUP_EXCEPT: case SETUP_FINALLY: tgt = GETJUMPTGT(codestr, i); - if (!UNCONDITIONAL_JUMP(codestr[tgt])) + if (!UNCONDITIONAL_JUMP(codestr[tgt])) continue; tgttgt = GETJUMPTGT(codestr, tgt); if (opcode == JUMP_FORWARD) /* JMP_ABS can go backwards */ @@ -422,6 +495,7 @@ optimize_code(PyObject *code, PyObject* consts) } code = PyString_FromStringAndSize(codestr, codelen); PyMem_Free(codestr); + PyMem_Free(blocks); return code; exitUnchanged: |