summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRaymond Hettinger <python@rcn.com>2003-04-22 06:49:11 (GMT)
committerRaymond Hettinger <python@rcn.com>2003-04-22 06:49:11 (GMT)
commit060641d51160f6bf49a049bb677f8412b5a19de3 (patch)
tree1ad7d568e797a6387b7f0792f5286f901b8f4f32
parent0c83348d5c110a6ca706219019e97d5cefe2fddb (diff)
downloadcpython-060641d51160f6bf49a049bb677f8412b5a19de3.zip
cpython-060641d51160f6bf49a049bb677f8412b5a19de3.tar.gz
cpython-060641d51160f6bf49a049bb677f8412b5a19de3.tar.bz2
Improved the bytecode optimizer.
* Can now test for basic blocks. * Optimize inverted comparisions. * Optimize unary_not followed by a conditional jump. * Added a new opcode, NOP, to keep code size constant. * Applied NOP to previous transformations where appropriate. Note, the NOP would not be necessary if other functions were added to re-target jump addresses and update the co_lnotab mapping. That would yield slightly faster and cleaner bytecode at the expense of optimizer simplicity and of keeping it decoupled from the line-numbering structure.
-rw-r--r--Include/opcode.h2
-rw-r--r--Lib/opcode.py2
-rw-r--r--Misc/NEWS7
-rw-r--r--Python/ceval.c3
-rw-r--r--Python/compile.c92
5 files changed, 97 insertions, 9 deletions
diff --git a/Include/opcode.h b/Include/opcode.h
index 2f3dd04..9f7d263 100644
--- a/Include/opcode.h
+++ b/Include/opcode.h
@@ -14,6 +14,8 @@ extern "C" {
#define DUP_TOP 4
#define ROT_FOUR 5
+#define NOP 9
+
#define UNARY_POSITIVE 10
#define UNARY_NEGATIVE 11
#define UNARY_NOT 12
diff --git a/Lib/opcode.py b/Lib/opcode.py
index cfde5f8..15f92a7 100644
--- a/Lib/opcode.py
+++ b/Lib/opcode.py
@@ -49,6 +49,8 @@ def_op('ROT_THREE', 3)
def_op('DUP_TOP', 4)
def_op('ROT_FOUR', 5)
+def_op('NOP', 9)
+
def_op('UNARY_POSITIVE', 10)
def_op('UNARY_NEGATIVE', 11)
def_op('UNARY_NOT', 12)
diff --git a/Misc/NEWS b/Misc/NEWS
index 05f9ac6..2897793 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -294,6 +294,13 @@ Core and builtins
value, but according to PEP 237 it really needs to be 1 now. This
will be backported to Python 2.2.3 a well. (SF #660455)
+- Added several bytecode optimizations. Provides speed-ups to
+ inverted in/is tests, inverted jumps, while 1 loops, and jumps to
+ unconditional jumps.
+
+- Added a new opcode, NOP, which is used in some of the bytecode
+ transformations.
+
- int(s, base) sometimes sign-folds hex and oct constants; it only
does this when base is 0 and s.strip() starts with a '0'. When the
sign is actually folded, as in int("0xffffffff", 0) on a 32-bit
diff --git a/Python/ceval.c b/Python/ceval.c
index 3ea1bdc..7f8f654 100644
--- a/Python/ceval.c
+++ b/Python/ceval.c
@@ -873,6 +873,9 @@ eval_frame(PyFrameObject *f)
/* case STOP_CODE: this is an error! */
+ case NOP:
+ goto fast_next_opcode;
+
case LOAD_FAST:
x = GETLOCAL(oparg);
if (x != NULL) {
diff --git a/Python/compile.c b/Python/compile.c
index 57f0edb..4afd0eb 100644
--- a/Python/compile.c
+++ b/Python/compile.c
@@ -328,6 +328,43 @@ intern_strings(PyObject *tuple)
#define ABSOLUTE_JUMP(op) (op==JUMP_ABSOLUTE || op==CONTINUE_LOOP)
#define GETJUMPTGT(arr, i) (GETARG(arr,i) + (ABSOLUTE_JUMP(arr[i]) ? 0 : i+3))
#define SETARG(arr, i, val) arr[i+2] = val>>8; arr[i+1] = val & 255
+#define CODESIZE(op) (HAS_ARG(op) ? 3 : 1)
+#define ISBASICBLOCK(blocks, start, bytes) (blocks[start]==blocks[start+bytes-1])
+
+static unsigned int *
+markblocks(unsigned char *code, int len)
+{
+ unsigned int *blocks = PyMem_Malloc(len*sizeof(int));
+ int i,j, opcode, oldblock, newblock, blockcnt = 0;
+
+ if (blocks == NULL)
+ return NULL;
+ memset(blocks, 0, len*sizeof(int));
+ for (i=0 ; i<len ; i+=CODESIZE(opcode)) {
+ opcode = code[i];
+ switch (opcode) {
+ case FOR_ITER:
+ case JUMP_FORWARD:
+ case JUMP_IF_FALSE:
+ case JUMP_IF_TRUE:
+ case JUMP_ABSOLUTE:
+ case CONTINUE_LOOP:
+ case SETUP_LOOP:
+ case SETUP_EXCEPT:
+ case SETUP_FINALLY:
+ j = GETJUMPTGT(code, i);
+ oldblock = blocks[j];
+ newblock = ++blockcnt;
+ for (; j<len ; j++) {
+ if (blocks[j] != (unsigned)oldblock)
+ break;
+ blocks[j] = newblock;
+ }
+ break;
+ }
+ }
+ return blocks;
+}
static PyObject *
optimize_code(PyObject *code, PyObject* consts)
@@ -335,18 +372,24 @@ optimize_code(PyObject *code, PyObject* consts)
int i, j, codelen;
int tgt, tgttgt, opcode;
unsigned char *codestr;
+ unsigned int *blocks;
/* Make a modifiable copy of the code string */
if (!PyString_Check(code))
goto exitUnchanged;
codelen = PyString_Size(code);
codestr = PyMem_Malloc(codelen);
- if (codestr == NULL)
+ if (codestr == NULL)
goto exitUnchanged;
codestr = memcpy(codestr, PyString_AS_STRING(code), codelen);
+ blocks = markblocks(codestr, codelen);
+ if (blocks == NULL) {
+ PyMem_Free(codestr);
+ goto exitUnchanged;
+ }
assert(PyTuple_Check(consts));
- for (i=0 ; i<codelen-7 ; i += HAS_ARG(codestr[i]) ? 3 : 1) {
+ for (i=0 ; i<codelen ; i += CODESIZE(codestr[i])) {
opcode = codestr[i];
switch (opcode) {
@@ -363,8 +406,8 @@ optimize_code(PyObject *code, PyObject* consts)
SETARG(codestr, i, 4);
break;
- /* Replace BUILD_SEQN 2 UNPACK_SEQN 2 with ROT2 JMP+2.
- Replace BUILD_SEQN 3 UNPACK_SEQN 3 with ROT3 ROT2 JMP+1.
+ /* Replace BUILD_SEQN 2 UNPACK_SEQN 2 with ROT2 JMP+2 NOP NOP.
+ Replace BUILD_SEQN 3 UNPACK_SEQN 3 with ROT3 ROT2 JMP+1 NOP.
Note, these opcodes occur together only in assignment
statements. Accordingly, the unpack opcode is never
a jump target. */
@@ -377,8 +420,8 @@ optimize_code(PyObject *code, PyObject* consts)
codestr[i] = ROT_TWO;
codestr[i+1] = JUMP_FORWARD;
SETARG(codestr, i+1, 2);
- codestr[i+4] = DUP_TOP; /* Filler codes used as NOPs */
- codestr[i+5] = POP_TOP;
+ codestr[i+4] = NOP;
+ codestr[i+5] = NOP;
continue;
}
if (GETARG(codestr, i) == 3 && \
@@ -386,11 +429,41 @@ optimize_code(PyObject *code, PyObject* consts)
codestr[i] = ROT_THREE;
codestr[i+1] = ROT_TWO;
codestr[i+2] = JUMP_FORWARD;
- SETARG(codestr, i+2, 1);
- codestr[i+5] = DUP_TOP;
+ SETARG(codestr, i+2, 1);
+ codestr[i+5] = NOP;
}
break;
+ /* Simplify inverted tests.
+ Must verify that sequence is a basic block because the jump
+ can itself be a jump target. Also, must verify that *both*
+ jump alternatives go to a POP_TOP. Otherwise, the code will
+ expect the stack value to have been inverted. */
+ case UNARY_NOT:
+ if (codestr[i+1] != JUMP_IF_FALSE || \
+ codestr[i+4] != POP_TOP || \
+ !ISBASICBLOCK(blocks,i,5))
+ continue;
+ tgt = GETJUMPTGT(codestr, (i+1));
+ if (codestr[tgt] != POP_TOP)
+ continue;
+ codestr[i] = NOP;
+ codestr[i+1] = JUMP_IF_TRUE;
+ break;
+
+ /* not a is b --> a is not b
+ not a in b --> a not in b
+ not a is not b --> a is b
+ not a not in b --> a in b */
+ case COMPARE_OP:
+ j = GETARG(codestr, i);
+ if (codestr[i+3] != UNARY_NOT || j < 6 || \
+ j > 9 || !ISBASICBLOCK(blocks,i,4))
+ continue;
+ SETARG(codestr, i, (j^1));
+ codestr[i+3] = NOP;
+ break;
+
/* Replace jumps to unconditional jumps */
case FOR_ITER:
case JUMP_FORWARD:
@@ -402,7 +475,7 @@ optimize_code(PyObject *code, PyObject* consts)
case SETUP_EXCEPT:
case SETUP_FINALLY:
tgt = GETJUMPTGT(codestr, i);
- if (!UNCONDITIONAL_JUMP(codestr[tgt]))
+ if (!UNCONDITIONAL_JUMP(codestr[tgt]))
continue;
tgttgt = GETJUMPTGT(codestr, tgt);
if (opcode == JUMP_FORWARD) /* JMP_ABS can go backwards */
@@ -422,6 +495,7 @@ optimize_code(PyObject *code, PyObject* consts)
}
code = PyString_FromStringAndSize(codestr, codelen);
PyMem_Free(codestr);
+ PyMem_Free(blocks);
return code;
exitUnchanged: