Merged revisions 75931 via svnmerge from

svn+ssh://pythondev@svn.python.org/python/trunk ........ r75931 | benjamin.peterson | 2009-10-28 20:49:07 -0500 (Wed, 28 Oct 2009) | 5 lines do a backport of r75928 The added test does not fail without the patch, but we still fix the issue of surrogates being used in wide builds where they should not be. ........
author: Benjamin Peterson <benjamin@python.org> 2009-10-29 02:02:47 (GMT)
committer: Benjamin Peterson <benjamin@python.org> 2009-10-29 02:02:47 (GMT)
commit: c717aec77271f6ed3bb6c2b9f87559fd39e4f642 (patch)
tree: ebb7992dfe4f675cd357edba62983d332ffe846a
parent: e7228d3e01d03e915ee42fceea914e1c7273b1e2 (diff)
download: cpython-c717aec77271f6ed3bb6c2b9f87559fd39e4f642.zip
cpython-c717aec77271f6ed3bb6c2b9f87559fd39e4f642.tar.gz
cpython-c717aec77271f6ed3bb6c2b9f87559fd39e4f642.tar.bz2
2 files changed, 49 insertions, 39 deletions
diff --git a/Lib/test/test_pep263.py b/Lib/test/test_pep263.py
index a3a9ade..e4faa9f 100644
--- a/Lib/test/test_pep263.py
+++ b/Lib/test/test_pep263.py
@@ -1,30 +1,37 @@
-#! -*- coding: koi8-r -*-
-
-import unittest
-from test import test_support
-
-class PEP263Test(unittest.TestCase):
-
-    def test_pep263(self):
-        self.assertEqual(
-            u"Питон".encode("utf-8"),
-            '\xd0\x9f\xd0\xb8\xd1\x82\xd0\xbe\xd0\xbd'
-        )
-        self.assertEqual(
-            u"\П".encode("utf-8"),
-            '\\\xd0\x9f'
-        )
-
-    def test_compilestring(self):
-        # see #1882
-        c = compile("\n# coding: utf-8\nu = u'\xc3\xb3'\n", "dummy", "exec")
-        d = {}
-        exec c in d
-        self.assertEqual(d['u'], u'\xf3')
-
-
-def test_main():
-    test_support.run_unittest(PEP263Test)
-
-if __name__=="__main__":
-    test_main()
+# -*- coding: koi8-r -*-
+
+import unittest
+from test import test_support
+
+class PEP263Test(unittest.TestCase):
+
+    def test_pep263(self):
+        self.assertEqual(
+            u"Питон".encode("utf-8"),
+            '\xd0\x9f\xd0\xb8\xd1\x82\xd0\xbe\xd0\xbd'
+        )
+        self.assertEqual(
+            u"\П".encode("utf-8"),
+            '\\\xd0\x9f'
+        )
+
+    def test_compilestring(self):
+        # see #1882
+        c = compile("\n# coding: utf-8\nu = u'\xc3\xb3'\n", "dummy", "exec")
+        d = {}
+        exec c in d
+        self.assertEqual(d['u'], u'\xf3')
+
+
+    def test_issue3297(self):
+        c = compile("a, b = '\U0001010F', '\\U0001010F'", "dummy", "exec")
+        d = {}
+        exec(c, d)
+        self.assertEqual(d['a'], d['b'])
+        self.assertEqual(len(d['a']), len(d['b']))
+
+def test_main():
+    test_support.run_unittest(PEP263Test)
+
+if __name__=="__main__":
+    test_main()
diff --git a/Python/ast.c b/Python/ast.c
index a3fdd89..b89e29c 100644
--- a/Python/ast.c
+++ b/Python/ast.c
@@ -3248,10 +3248,11 @@ decode_unicode(struct compiling *c, const char *s, size_t len, int rawmode, cons
                 u = NULL;
         } else {
                 /* check for integer overflow */
-                if (len > PY_SIZE_MAX / 4)
+                if (len > PY_SIZE_MAX / 6)
                         return NULL;
-                /* "\XX" may become "\u005c\uHHLL" (12 bytes) */
-                u = PyString_FromStringAndSize((char *)NULL, len * 4);
+		/* "<C3><A4>" (2 bytes) may become "\U000000E4" (10 bytes), or 1:5
+		   "\цє" (3 bytes) may become "\u005c\U000000E4" (16 bytes), or ~1:6 */
+                u = PyString_FromStringAndSize((char *)NULL, len * 6);
                 if (u == NULL)
                         return NULL;
                 p = buf = PyString_AsString(u);
@@ -3268,19 +3269,21 @@ decode_unicode(struct compiling *c, const char *s, size_t len, int rawmode, cons
                                 PyObject *w;
                                 char *r;
                                 Py_ssize_t rn, i;
-                                w = decode_utf8(c, &s, end, "utf-16-be");
+                                w = decode_utf8(c, &s, end, "utf-32-be");
                                 if (w == NULL) {
                                         Py_DECREF(u);
                                         return NULL;
                                 }
                                 r = PyString_AsString(w);
                                 rn = PyString_Size(w);
-                                assert(rn % 2 == 0);
-                                for (i = 0; i < rn; i += 2) {
-                                        sprintf(p, "\\u%02x%02x",
+                                assert(rn % 4 == 0);
+                                for (i = 0; i < rn; i += 4) {
+                                        sprintf(p, "\\U%02x%02x%02x%02x",
                                                 r[i + 0] & 0xFF,
-                                                r[i + 1] & 0xFF);
-                                        p += 6;
+                                                r[i + 1] & 0xFF,
+						r[i + 2] & 0xFF,
+						r[i + 3] & 0xFF);
+                                        p += 10;
                                 }
                                 Py_DECREF(w);
                         } else {
author	Benjamin Peterson <benjamin@python.org>	2009-10-29 02:02:47 (GMT)
committer	Benjamin Peterson <benjamin@python.org>	2009-10-29 02:02:47 (GMT)
commit	c717aec77271f6ed3bb6c2b9f87559fd39e4f642 (patch)
tree	ebb7992dfe4f675cd357edba62983d332ffe846a
parent	e7228d3e01d03e915ee42fceea914e1c7273b1e2 (diff)
download	cpython-c717aec77271f6ed3bb6c2b9f87559fd39e4f642.zip cpython-c717aec77271f6ed3bb6c2b9f87559fd39e4f642.tar.gz cpython-c717aec77271f6ed3bb6c2b9f87559fd39e4f642.tar.bz2