Issue #20538: UTF-7 incremental decoder produced inconsistant string when

input was truncated in BASE64 section.
author: Serhiy Storchaka <storchaka@gmail.com> 2014-02-08 12:06:33 (GMT)
committer: Serhiy Storchaka <storchaka@gmail.com> 2014-02-08 12:06:33 (GMT)
commit: 6cbf151032d693e13bb880aa3781dd06b7662199 (patch)
tree: 625f4868170708231d01d09b20d51e82730f997d
parent: 73afe2a972e30a3e0f87401be2fa38c67e2cb964 (diff)
parent: 016a3f33a533bdec6977639bfa83b7f93f6c8c88 (diff)
download: cpython-6cbf151032d693e13bb880aa3781dd06b7662199.zip
cpython-6cbf151032d693e13bb880aa3781dd06b7662199.tar.gz
cpython-6cbf151032d693e13bb880aa3781dd06b7662199.tar.bz2
3 files changed, 45 insertions, 9 deletions
diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py
index 8a6e0f9..570539f 100644
--- a/Lib/test/test_codecs.py
+++ b/Lib/test/test_codecs.py
@@ -124,8 +124,6 @@ class ReadTest(MixInCheckStateHandling):
             "".join(codecs.iterdecode([bytes([c]) for c in encoded], self.encoding))
         )
 
-    # Temporary skip, see http://bugs.python.org/issue20542
-    @unittest.skip
     def test_readline(self):
         def getreader(input):
             stream = io.BytesIO(input.encode(self.encoding))
@@ -899,13 +897,40 @@ class UTF7Test(ReadTest, unittest.TestCase):
 
     def test_partial(self):
         self.check_partial(
-            "a+-b",
+            'a+-b\x00c\x80d\u0100e\U00010000f',
             [
-                "a",
-                "a",
-                "a+",
-                "a+-",
-                "a+-b",
+                'a',
+                'a',
+                'a+',
+                'a+-',
+                'a+-b',
+                'a+-b',
+                'a+-b',
+                'a+-b',
+                'a+-b',
+                'a+-b\x00',
+                'a+-b\x00c',
+                'a+-b\x00c',
+                'a+-b\x00c',
+                'a+-b\x00c',
+                'a+-b\x00c',
+                'a+-b\x00c\x80',
+                'a+-b\x00c\x80d',
+                'a+-b\x00c\x80d',
+                'a+-b\x00c\x80d',
+                'a+-b\x00c\x80d',
+                'a+-b\x00c\x80d',
+                'a+-b\x00c\x80d\u0100',
+                'a+-b\x00c\x80d\u0100e',
+                'a+-b\x00c\x80d\u0100e',
+                'a+-b\x00c\x80d\u0100e',
+                'a+-b\x00c\x80d\u0100e',
+                'a+-b\x00c\x80d\u0100e',
+                'a+-b\x00c\x80d\u0100e',
+                'a+-b\x00c\x80d\u0100e',
+                'a+-b\x00c\x80d\u0100e',
+                'a+-b\x00c\x80d\u0100e\U00010000',
+                'a+-b\x00c\x80d\u0100e\U00010000f',
             ]
         )
 
diff --git a/Misc/NEWS b/Misc/NEWS
index c0da6aa..998d87c 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -10,6 +10,9 @@ Release date: 2014-02-09
 Core and Builtins
 -----------------
 
+- Issue #20538: UTF-7 incremental decoder produced inconsistant string when
+  input was truncated in BASE64 section.
+
 - Issue #20404: io.TextIOWrapper (and hence the open() builtin) now uses the
   internal codec marking system added for issue #19619 to throw LookupError
   for known non-text encodings at stream construction time. The existing
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 02359e5..7a1aa16 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -4459,8 +4459,16 @@ utf7Error:
     /* return state */
     if (consumed) {
         if (inShift) {
-            writer.pos = shiftOutStart; /* back off output */
             *consumed = startinpos;
+            if (writer.pos != shiftOutStart && writer.maxchar > 127) {
+                PyObject *result = PyUnicode_FromKindAndData(
+                        writer.kind, writer.data, shiftOutStart);
+                Py_XDECREF(errorHandler);
+                Py_XDECREF(exc);
+                _PyUnicodeWriter_Dealloc(&writer);
+                return result;
+            }
+            writer.pos = shiftOutStart; /* back off output */
         }
         else {
             *consumed = s-starts;
author	Serhiy Storchaka <storchaka@gmail.com>	2014-02-08 12:06:33 (GMT)
committer	Serhiy Storchaka <storchaka@gmail.com>	2014-02-08 12:06:33 (GMT)
commit	6cbf151032d693e13bb880aa3781dd06b7662199 (patch)
tree	625f4868170708231d01d09b20d51e82730f997d
parent	73afe2a972e30a3e0f87401be2fa38c67e2cb964 (diff)
parent	016a3f33a533bdec6977639bfa83b7f93f6c8c88 (diff)
download	cpython-6cbf151032d693e13bb880aa3781dd06b7662199.zip cpython-6cbf151032d693e13bb880aa3781dd06b7662199.tar.gz cpython-6cbf151032d693e13bb880aa3781dd06b7662199.tar.bz2