diff options
-rw-r--r-- | Lib/test/test_peepholer.py | 13 | ||||
-rw-r--r-- | Misc/NEWS | 4 | ||||
-rw-r--r-- | Python/peephole.c | 25 |
3 files changed, 18 insertions, 24 deletions
diff --git a/Lib/test/test_peepholer.py b/Lib/test/test_peepholer.py index 53719d3..e268ae2 100644 --- a/Lib/test/test_peepholer.py +++ b/Lib/test/test_peepholer.py @@ -196,21 +196,22 @@ class TestTranforms(unittest.TestCase): self.assertIn('(1000)', asm) def test_binary_subscr_on_unicode(self): - # valid code get optimized + # unicode strings don't get optimized asm = dis_single('"foo"[0]') - self.assertIn("('f')", asm) - self.assertNotIn('BINARY_SUBSCR', asm) + self.assertNotIn("('f')", asm) + self.assertIn('BINARY_SUBSCR', asm) asm = dis_single('"\u0061\uffff"[1]') - self.assertIn("('\\uffff')", asm) - self.assertNotIn('BINARY_SUBSCR', asm) + self.assertNotIn("('\\uffff')", asm) + self.assertIn('BINARY_SUBSCR', asm) - # invalid code doesn't get optimized # out of range asm = dis_single('"fuu"[10]') self.assertIn('BINARY_SUBSCR', asm) # non-BMP char (see #5057) asm = dis_single('"\U00012345"[0]') self.assertIn('BINARY_SUBSCR', asm) + asm = dis_single('"\U00012345abcdef"[3]') + self.assertIn('BINARY_SUBSCR', asm) def test_folding_of_unaryops_on_constants(self): @@ -10,6 +10,10 @@ What's New in Python 3.2.4 Core and Builtins ----------------- +- Issue #5057: the peepholer no longer optimizes subscription on unicode + literals (e.g. u'foo'[0]) in order to produce compatible pyc files between + narrow and wide builds. + - Issue #16402: When slicing a range, fix shadowing of exceptions from __index__. diff --git a/Python/peephole.c b/Python/peephole.c index 359eda8..7ae599b 100644 --- a/Python/peephole.c +++ b/Python/peephole.c @@ -132,25 +132,14 @@ fold_binops_on_constants(unsigned char *codestr, PyObject *consts) newconst = PyNumber_Subtract(v, w); break; case BINARY_SUBSCR: - newconst = PyObject_GetItem(v, w); /* #5057: if v is unicode, there might be differences between - wide and narrow builds in cases like '\U00012345'[0]. - Wide builds will return a non-BMP char, whereas narrow builds - will return a surrogate. In both the cases skip the - optimization in order to produce compatible pycs. - */ - if (newconst != NULL && - PyUnicode_Check(v) && PyUnicode_Check(newconst)) { - Py_UNICODE ch = PyUnicode_AS_UNICODE(newconst)[0]; -#ifdef Py_UNICODE_WIDE - if (ch > 0xFFFF) { -#else - if (ch >= 0xD800 && ch <= 0xDFFF) { -#endif - Py_DECREF(newconst); - return 0; - } - } + wide and narrow builds in cases like '\U00012345'[0] or + '\U00012345abcdef'[3], so it's better to skip the optimization + in order to produce compatible pycs. + */ + if (PyUnicode_Check(v)) + return 0; + newconst = PyObject_GetItem(v, w); break; case BINARY_LSHIFT: newconst = PyNumber_Lshift(v, w); |