From 17b880a5d6b64f1e7d26ec8229e8b41cdf740690 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Fri, 11 Mar 2011 17:27:02 +0100 Subject: Issue #11244: The peephole optimizer is now able to constant-fold arbitrarily complex expressions. This also fixes a 3.2 regression where operations involving negative numbers were not constant-folded. --- Lib/test/test_peepholer.py | 31 ++++++++- Misc/NEWS | 4 ++ Python/peephole.c | 167 ++++++++++++++++++++++++++++++++------------- 3 files changed, 153 insertions(+), 49 deletions(-) diff --git a/Lib/test/test_peepholer.py b/Lib/test/test_peepholer.py index 531b425..a9eb23f 100644 --- a/Lib/test/test_peepholer.py +++ b/Lib/test/test_peepholer.py @@ -8,8 +8,10 @@ def disassemble(func): f = StringIO() tmp = sys.stdout sys.stdout = f - dis.dis(func) - sys.stdout = tmp + try: + dis.dis(func) + finally: + sys.stdout = tmp result = f.getvalue() f.close() return result @@ -99,6 +101,12 @@ class TestTranforms(unittest.TestCase): self.assertIn(elem, asm) self.assertNotIn('BUILD_TUPLE', asm) + # Long tuples should be folded too. + asm = dis_single(repr(tuple(range(10000)))) + # One LOAD_CONST for the tuple, one for the None return value + self.assertEqual(asm.count('LOAD_CONST'), 2) + self.assertNotIn('BUILD_TUPLE', asm) + # Bug 1053819: Tuple of constants misidentified when presented with: # . . . opcode_with_arg 100 unary_opcode BUILD_TUPLE 1 . . . # The following would segfault upon compilation @@ -267,6 +275,25 @@ class TestTranforms(unittest.TestCase): asm = disassemble(f) self.assertNotIn('BINARY_ADD', asm) + def test_constant_folding(self): + # Issue #11244: aggressive constant folding. + exprs = [ + "3 * -5", + "-3 * 5", + "2 * (3 * 4)", + "(2 * 3) * 4", + "(-1, 2, 3)", + "(1, -2, 3)", + "(1, 2, -3)", + "(1, 2, -3) * 6", + "lambda x: x in {(3 * -5) + (-1 - 6), (1, -2, 3) * 2, None}", + ] + for e in exprs: + asm = dis_single(e) + self.assertNotIn('UNARY_', asm, e) + self.assertNotIn('BINARY_', asm, e) + self.assertNotIn('BUILD_', asm, e) + def test_main(verbose=None): import sys diff --git a/Misc/NEWS b/Misc/NEWS index dc4f63e..9844d07 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -10,6 +10,10 @@ What's New in Python 3.3 Alpha 1? Core and Builtins ----------------- +- Issue #11244: The peephole optimizer is now able to constant-fold + arbitrarily complex expressions. This also fixes a 3.2 regression where + operations involving negative numbers were not constant-folded. + - Issue #11450: Don't truncate hg version info in Py_GetBuildInfo() when there are many tags (e.g. when using mq). Patch by Nadeem Vawda. diff --git a/Python/peephole.c b/Python/peephole.c index f972e16..ab96ce9 100644 --- a/Python/peephole.c +++ b/Python/peephole.c @@ -23,6 +23,64 @@ #define ISBASICBLOCK(blocks, start, bytes) \ (blocks[start]==blocks[start+bytes-1]) + +#define CONST_STACK_CREATE() { \ + const_stack_size = 256; \ + const_stack = PyMem_New(PyObject *, const_stack_size); \ + load_const_stack = PyMem_New(Py_ssize_t, const_stack_size); \ + if (!const_stack || !load_const_stack) { \ + PyErr_NoMemory(); \ + goto exitError; \ + } \ + } + +#define CONST_STACK_DELETE() do { \ + if (const_stack) \ + PyMem_Free(const_stack); \ + if (load_const_stack) \ + PyMem_Free(load_const_stack); \ + } while(0) + +#define CONST_STACK_LEN() (const_stack_top + 1) + +#define CONST_STACK_PUSH_OP(i) do { \ + PyObject *_x; \ + assert(codestr[i] == LOAD_CONST); \ + assert(PyList_GET_SIZE(consts) > GETARG(codestr, i)); \ + _x = PyList_GET_ITEM(consts, GETARG(codestr, i)); \ + if (++const_stack_top >= const_stack_size) { \ + const_stack_size *= 2; \ + PyMem_Resize(const_stack, PyObject *, const_stack_size); \ + PyMem_Resize(load_const_stack, Py_ssize_t, const_stack_size); \ + if (!const_stack || !load_const_stack) { \ + PyErr_NoMemory(); \ + goto exitError; \ + } \ + } \ + load_const_stack[const_stack_top] = i; \ + const_stack[const_stack_top] = _x; \ + in_consts = 1; \ + } while(0) + +#define CONST_STACK_RESET() do { \ + const_stack_top = -1; \ + } while(0) + +#define CONST_STACK_TOP(x) \ + const_stack[const_stack_top] + +#define CONST_STACK_LASTN(i) \ + &const_stack[const_stack_top - i + 1] + +#define CONST_STACK_POP(i) do { \ + assert(const_stack_top + 1 >= i); \ + const_stack_top -= i; \ + } while(0) + +#define CONST_STACK_OP_LASTN(i) \ + ((const_stack_top >= i - 1) ? load_const_stack[const_stack_top - i + 1] : -1) + + /* Replace LOAD_CONST c1. LOAD_CONST c2 ... LOAD_CONST cn BUILD_TUPLE n with LOAD_CONST (c1, c2, ... cn). The consts table must still be in list form so that the @@ -33,17 +91,14 @@ test; for BUILD_SET it assembles a frozenset rather than a tuple. */ static int -tuple_of_constants(unsigned char *codestr, Py_ssize_t n, PyObject *consts) +tuple_of_constants(unsigned char *codestr, Py_ssize_t n, + PyObject *consts, PyObject **objs) { PyObject *newconst, *constant; - Py_ssize_t i, arg, len_consts; + Py_ssize_t i, len_consts; /* Pre-conditions */ assert(PyList_CheckExact(consts)); - assert(codestr[n*3] == BUILD_TUPLE || codestr[n*3] == BUILD_LIST || codestr[n*3] == BUILD_SET); - assert(GETARG(codestr, (n*3)) == n); - for (i=0 ; i= 0 && - j <= lastlc && + if (j == 0) + break; + h = CONST_STACK_OP_LASTN(j); + assert((h >= 0 || CONST_STACK_LEN() < j)); + if (h >= 0 && j > 0 && j <= CONST_STACK_LEN() && ((opcode == BUILD_TUPLE && - ISBASICBLOCK(blocks, h, 3*(j+1))) || + ISBASICBLOCK(blocks, h, i-h+3)) || ((opcode == BUILD_LIST || opcode == BUILD_SET) && codestr[i+3]==COMPARE_OP && - ISBASICBLOCK(blocks, h, 3*(j+2)) && + ISBASICBLOCK(blocks, h, i-h+6) && (GETARG(codestr,i+3)==6 || GETARG(codestr,i+3)==7))) && - tuple_of_constants(&codestr[h], j, consts)) { + tuple_of_constants(&codestr[i], j, consts, CONST_STACK_LASTN(j))) { assert(codestr[i] == LOAD_CONST); - cumlc = 1; + memset(&codestr[h], NOP, i - h); + CONST_STACK_POP(j); + CONST_STACK_PUSH_OP(i); break; } if (codestr[i+3] != UNPACK_SEQUENCE || @@ -482,10 +542,12 @@ PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names, } else if (j == 2) { codestr[i] = ROT_TWO; memset(codestr+i+1, NOP, 5); + CONST_STACK_RESET(); } else if (j == 3) { codestr[i] = ROT_THREE; codestr[i+1] = ROT_TWO; memset(codestr+i+2, NOP, 4); + CONST_STACK_RESET(); } break; @@ -504,12 +566,18 @@ PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names, case BINARY_AND: case BINARY_XOR: case BINARY_OR: - if (lastlc >= 2 && - ISBASICBLOCK(blocks, i-6, 7) && - fold_binops_on_constants(&codestr[i-6], consts)) { + /* NOTE: LOAD_CONST is saved at `i-2` since it has an arg + while BINOP hasn't */ + h = CONST_STACK_OP_LASTN(2); + assert((h >= 0 || CONST_STACK_LEN() < 2)); + if (h >= 0 && + ISBASICBLOCK(blocks, h, i-h+1) && + fold_binops_on_constants(&codestr[i], consts, CONST_STACK_LASTN(2))) { i -= 2; + memset(&codestr[h], NOP, i - h); assert(codestr[i] == LOAD_CONST); - cumlc = 1; + CONST_STACK_POP(2); + CONST_STACK_PUSH_OP(i); } break; @@ -518,12 +586,15 @@ PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names, case UNARY_NEGATIVE: case UNARY_INVERT: case UNARY_POSITIVE: - if (lastlc >= 1 && - ISBASICBLOCK(blocks, i-3, 4) && - fold_unaryops_on_constants(&codestr[i-3], consts)) { + h = CONST_STACK_OP_LASTN(1); + assert((h >= 0 || CONST_STACK_LEN() < 1)); + if (h >= 0 && + ISBASICBLOCK(blocks, h, i-h+1) && + fold_unaryops_on_constants(&codestr[i-3], consts, CONST_STACK_TOP())) { i -= 2; assert(codestr[i] == LOAD_CONST); - cumlc = 1; + CONST_STACK_POP(1); + CONST_STACK_PUSH_OP(i); } break; @@ -680,6 +751,7 @@ PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names, assert(h + nops == codelen); code = PyBytes_FromStringAndSize((char *)codestr, h); + CONST_STACK_DELETE(); PyMem_Free(addrmap); PyMem_Free(codestr); PyMem_Free(blocks); @@ -689,6 +761,7 @@ PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names, code = NULL; exitUnchanged: + CONST_STACK_DELETE(); if (blocks != NULL) PyMem_Free(blocks); if (addrmap != NULL) -- cgit v0.12