Issue #19279: UTF-7 decoder no more produces illegal strings.

author: Serhiy Storchaka <storchaka@gmail.com> 2013-10-19 17:39:28 (GMT)
committer: Serhiy Storchaka <storchaka@gmail.com> 2013-10-19 17:39:28 (GMT)
commit: 55e092f545d3829e94eaf3d6aaaf048c82451e18 (patch)
tree: 6e7404db19ef4f111befb5cbff67c9403969233f
parent: f19a6ef2c9e87f7a5429b2fcf0705265bdeb8b34 (diff)
parent: 35804e4c63ae0a61adb71ced8ea6ddcf68908d41 (diff)
download: cpython-55e092f545d3829e94eaf3d6aaaf048c82451e18.zip
cpython-55e092f545d3829e94eaf3d6aaaf048c82451e18.tar.gz
cpython-55e092f545d3829e94eaf3d6aaaf048c82451e18.tar.bz2
3 files changed, 34 insertions, 0 deletions
diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py
index 99d928d..5cef4da 100644
--- a/Lib/test/test_codecs.py
+++ b/Lib/test/test_codecs.py
@@ -820,6 +820,36 @@ class UTF7Test(ReadTest, unittest.TestCase):
             ]
         )
 
+    def test_errors(self):
+        tests = [
+            (b'a\xffb', 'a\ufffdb'),
+            (b'a+IK', 'a\ufffd'),
+            (b'a+IK-b', 'a\ufffdb'),
+            (b'a+IK,b', 'a\ufffdb'),
+            (b'a+IKx', 'a\u20ac\ufffd'),
+            (b'a+IKx-b', 'a\u20ac\ufffdb'),
+            (b'a+IKwgr', 'a\u20ac\ufffd'),
+            (b'a+IKwgr-b', 'a\u20ac\ufffdb'),
+            (b'a+IKwgr,', 'a\u20ac\ufffd'),
+            (b'a+IKwgr,-b', 'a\u20ac\ufffd-b'),
+            (b'a+IKwgrB', 'a\u20ac\u20ac\ufffd'),
+            (b'a+IKwgrB-b', 'a\u20ac\u20ac\ufffdb'),
+            (b'a+/,+IKw-b', 'a\ufffd\u20acb'),
+            (b'a+//,+IKw-b', 'a\ufffd\u20acb'),
+            (b'a+///,+IKw-b', 'a\uffff\ufffd\u20acb'),
+            (b'a+////,+IKw-b', 'a\uffff\ufffd\u20acb'),
+        ]
+        for raw, expected in tests:
+            with self.subTest(raw=raw):
+                self.assertRaises(UnicodeDecodeError, codecs.utf_7_decode,
+                                raw, 'strict', True)
+                self.assertEqual(raw.decode('utf-7', 'replace'), expected)
+
+    def test_nonbmp(self):
+        self.assertEqual('\U000104A0'.encode(self.encoding), b'+2AHcoA-')
+        self.assertEqual('\ud801\udca0'.encode(self.encoding), b'+2AHcoA-')
+        self.assertEqual(b'+2AHcoA-'.decode(self.encoding), '\U000104A0')
+
 class UTF16ExTest(unittest.TestCase):
 
     def test_errors(self):
diff --git a/Misc/NEWS b/Misc/NEWS
index 3f071ea..78efa31 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -10,6 +10,8 @@ Projected release date: 2013-10-20
 Core and Builtins
 -----------------
 
+- Issue #19279: UTF-7 decoder no more produces illegal strings.
+
 - Issue #16612: Add "Argument Clinic", a compile-time preprocessor for
   C files to generate argument parsing code.  (See PEP 436.)
 
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 5df4df6..b9e8e1e 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -4341,6 +4341,7 @@ PyUnicode_DecodeUTF7Stateful(const char *s,
                     Py_UCS4 outCh = (Py_UCS4)(base64buffer >> (base64bits-16));
                     base64bits -= 16;
                     base64buffer &= (1 << base64bits) - 1; /* clear high bits */
+                    assert(outCh <= 0xffff);
                     if (surrogate) {
                         /* expecting a second surrogate */
                         if (Py_UNICODE_IS_LOW_SURROGATE(outCh)) {
@@ -4408,6 +4409,7 @@ PyUnicode_DecodeUTF7Stateful(const char *s,
                 inShift = 1;
                 shiftOutStart = writer.pos;
                 base64bits = 0;
+                base64buffer = 0;
             }
         }
         else if (DECODE_DIRECT(ch)) { /* character decodes as itself */
author	Serhiy Storchaka <storchaka@gmail.com>	2013-10-19 17:39:28 (GMT)
committer	Serhiy Storchaka <storchaka@gmail.com>	2013-10-19 17:39:28 (GMT)
commit	55e092f545d3829e94eaf3d6aaaf048c82451e18 (patch)
tree	6e7404db19ef4f111befb5cbff67c9403969233f
parent	f19a6ef2c9e87f7a5429b2fcf0705265bdeb8b34 (diff)
parent	35804e4c63ae0a61adb71ced8ea6ddcf68908d41 (diff)
download	cpython-55e092f545d3829e94eaf3d6aaaf048c82451e18.zip cpython-55e092f545d3829e94eaf3d6aaaf048c82451e18.tar.gz cpython-55e092f545d3829e94eaf3d6aaaf048c82451e18.tar.bz2