From c18cc0edff5269d715f9aa06ca31a4f19ff87b30 Mon Sep 17 00:00:00 2001 From: Ezio Melotti Date: Mon, 5 Nov 2012 00:03:21 +0200 Subject: #5057: the peepholer no longer optimizes subscription on unicode literals (e.g. u"foo"[0]) in order to produce compatible pyc files between narrow and wide builds. --- Lib/test/test_peepholer.py | 13 +++++++------ Misc/NEWS | 4 ++++ Python/peephole.c | 27 +++++++-------------------- 3 files changed, 18 insertions(+), 26 deletions(-) diff --git a/Lib/test/test_peepholer.py b/Lib/test/test_peepholer.py index 3e8b7ae..7e05f49 100644 --- a/Lib/test/test_peepholer.py +++ b/Lib/test/test_peepholer.py @@ -138,21 +138,22 @@ class TestTranforms(unittest.TestCase): self.assertIn('(1000)', asm) def test_binary_subscr_on_unicode(self): - # valid code get optimized + # unicode strings don't get optimized asm = dis_single('u"foo"[0]') - self.assertIn("(u'f')", asm) - self.assertNotIn('BINARY_SUBSCR', asm) + self.assertNotIn("(u'f')", asm) + self.assertIn('BINARY_SUBSCR', asm) asm = dis_single('u"\u0061\uffff"[1]') - self.assertIn("(u'\\uffff')", asm) - self.assertNotIn('BINARY_SUBSCR', asm) + self.assertNotIn("(u'\\uffff')", asm) + self.assertIn('BINARY_SUBSCR', asm) - # invalid code doesn't get optimized # out of range asm = dis_single('u"fuu"[10]') self.assertIn('BINARY_SUBSCR', asm) # non-BMP char (see #5057) asm = dis_single('u"\U00012345"[0]') self.assertIn('BINARY_SUBSCR', asm) + asm = dis_single('u"\U00012345abcdef"[3]') + self.assertIn('BINARY_SUBSCR', asm) def test_folding_of_unaryops_on_constants(self): diff --git a/Misc/NEWS b/Misc/NEWS index 9716c6d..bd73d0a 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -9,6 +9,10 @@ What's New in Python 2.7.4 Core and Builtins ----------------- +- Issue #5057: the peepholer no longer optimize subscription on unicode + literals (e.g. u'foo'[0]) in order to produce compatible pyc files between + narrow and wide builds. + - Issue #8401: assigning an int to a bytearray slice (e.g. b[3:4] = 5) now raises an error. diff --git a/Python/peephole.c b/Python/peephole.c index ae84efa..fb6cd03 100644 --- a/Python/peephole.c +++ b/Python/peephole.c @@ -128,27 +128,14 @@ fold_binops_on_constants(unsigned char *codestr, PyObject *consts) newconst = PyNumber_Subtract(v, w); break; case BINARY_SUBSCR: - newconst = PyObject_GetItem(v, w); /* #5057: if v is unicode, there might be differences between - wide and narrow builds in cases like u'\U00012345'[0]. - Wide builds will return a non-BMP char, whereas narrow builds - will return a surrogate. In both the cases skip the - optimization in order to produce compatible pycs. - */ -#ifdef Py_USING_UNICODE - if (newconst != NULL && - PyUnicode_Check(v) && PyUnicode_Check(newconst)) { - Py_UNICODE ch = PyUnicode_AS_UNICODE(newconst)[0]; -#ifdef Py_UNICODE_WIDE - if (ch > 0xFFFF) { -#else - if (ch >= 0xD800 && ch <= 0xDFFF) { -#endif - Py_DECREF(newconst); - return 0; - } - } -#endif + wide and narrow builds in cases like '\U00012345'[0] or + '\U00012345abcdef'[3], so it's better to skip the optimization + in order to produce compatible pycs. + */ + if (PyUnicode_Check(v)) + return 0; + newconst = PyObject_GetItem(v, w); break; case BINARY_LSHIFT: newconst = PyNumber_Lshift(v, w); -- cgit v0.12