From 6c5f5210be9f68252b72d7b5e8dc2bde20d90c9d Mon Sep 17 00:00:00 2001 From: Ezio Melotti Date: Mon, 5 Nov 2012 00:06:32 +0200 Subject: #5057: the peepholer no longer optimizes subscription on unicode literals (e.g. u"foo"[0]) in order to produce compatible pyc files between narrow and wide builds. --- Lib/test/test_peepholer.py | 13 +++++++------ Misc/NEWS | 4 ++++ Python/peephole.c | 25 +++++++------------------ 3 files changed, 18 insertions(+), 24 deletions(-) diff --git a/Lib/test/test_peepholer.py b/Lib/test/test_peepholer.py index 53719d3..e268ae2 100644 --- a/Lib/test/test_peepholer.py +++ b/Lib/test/test_peepholer.py @@ -196,21 +196,22 @@ class TestTranforms(unittest.TestCase): self.assertIn('(1000)', asm) def test_binary_subscr_on_unicode(self): - # valid code get optimized + # unicode strings don't get optimized asm = dis_single('"foo"[0]') - self.assertIn("('f')", asm) - self.assertNotIn('BINARY_SUBSCR', asm) + self.assertNotIn("('f')", asm) + self.assertIn('BINARY_SUBSCR', asm) asm = dis_single('"\u0061\uffff"[1]') - self.assertIn("('\\uffff')", asm) - self.assertNotIn('BINARY_SUBSCR', asm) + self.assertNotIn("('\\uffff')", asm) + self.assertIn('BINARY_SUBSCR', asm) - # invalid code doesn't get optimized # out of range asm = dis_single('"fuu"[10]') self.assertIn('BINARY_SUBSCR', asm) # non-BMP char (see #5057) asm = dis_single('"\U00012345"[0]') self.assertIn('BINARY_SUBSCR', asm) + asm = dis_single('"\U00012345abcdef"[3]') + self.assertIn('BINARY_SUBSCR', asm) def test_folding_of_unaryops_on_constants(self): diff --git a/Misc/NEWS b/Misc/NEWS index 8d6035e..bb1d6f7 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -10,6 +10,10 @@ What's New in Python 3.2.4 Core and Builtins ----------------- +- Issue #5057: the peepholer no longer optimizes subscription on unicode + literals (e.g. u'foo'[0]) in order to produce compatible pyc files between + narrow and wide builds. + - Issue #16402: When slicing a range, fix shadowing of exceptions from __index__. diff --git a/Python/peephole.c b/Python/peephole.c index 359eda8..7ae599b 100644 --- a/Python/peephole.c +++ b/Python/peephole.c @@ -132,25 +132,14 @@ fold_binops_on_constants(unsigned char *codestr, PyObject *consts) newconst = PyNumber_Subtract(v, w); break; case BINARY_SUBSCR: - newconst = PyObject_GetItem(v, w); /* #5057: if v is unicode, there might be differences between - wide and narrow builds in cases like '\U00012345'[0]. - Wide builds will return a non-BMP char, whereas narrow builds - will return a surrogate. In both the cases skip the - optimization in order to produce compatible pycs. - */ - if (newconst != NULL && - PyUnicode_Check(v) && PyUnicode_Check(newconst)) { - Py_UNICODE ch = PyUnicode_AS_UNICODE(newconst)[0]; -#ifdef Py_UNICODE_WIDE - if (ch > 0xFFFF) { -#else - if (ch >= 0xD800 && ch <= 0xDFFF) { -#endif - Py_DECREF(newconst); - return 0; - } - } + wide and narrow builds in cases like '\U00012345'[0] or + '\U00012345abcdef'[3], so it's better to skip the optimization + in order to produce compatible pycs. + */ + if (PyUnicode_Check(v)) + return 0; + newconst = PyObject_GetItem(v, w); break; case BINARY_LSHIFT: newconst = PyNumber_Lshift(v, w); -- cgit v0.12