diff options
author | Serhiy Storchaka <storchaka@gmail.com> | 2014-02-08 12:06:33 (GMT) |
---|---|---|
committer | Serhiy Storchaka <storchaka@gmail.com> | 2014-02-08 12:06:33 (GMT) |
commit | 6cbf151032d693e13bb880aa3781dd06b7662199 (patch) | |
tree | 625f4868170708231d01d09b20d51e82730f997d | |
parent | 73afe2a972e30a3e0f87401be2fa38c67e2cb964 (diff) | |
parent | 016a3f33a533bdec6977639bfa83b7f93f6c8c88 (diff) | |
download | cpython-6cbf151032d693e13bb880aa3781dd06b7662199.zip cpython-6cbf151032d693e13bb880aa3781dd06b7662199.tar.gz cpython-6cbf151032d693e13bb880aa3781dd06b7662199.tar.bz2 |
Issue #20538: UTF-7 incremental decoder produced inconsistant string when
input was truncated in BASE64 section.
-rw-r--r-- | Lib/test/test_codecs.py | 41 | ||||
-rw-r--r-- | Misc/NEWS | 3 | ||||
-rw-r--r-- | Objects/unicodeobject.c | 10 |
3 files changed, 45 insertions, 9 deletions
diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py index 8a6e0f9..570539f 100644 --- a/Lib/test/test_codecs.py +++ b/Lib/test/test_codecs.py @@ -124,8 +124,6 @@ class ReadTest(MixInCheckStateHandling): "".join(codecs.iterdecode([bytes([c]) for c in encoded], self.encoding)) ) - # Temporary skip, see http://bugs.python.org/issue20542 - @unittest.skip def test_readline(self): def getreader(input): stream = io.BytesIO(input.encode(self.encoding)) @@ -899,13 +897,40 @@ class UTF7Test(ReadTest, unittest.TestCase): def test_partial(self): self.check_partial( - "a+-b", + 'a+-b\x00c\x80d\u0100e\U00010000f', [ - "a", - "a", - "a+", - "a+-", - "a+-b", + 'a', + 'a', + 'a+', + 'a+-', + 'a+-b', + 'a+-b', + 'a+-b', + 'a+-b', + 'a+-b', + 'a+-b\x00', + 'a+-b\x00c', + 'a+-b\x00c', + 'a+-b\x00c', + 'a+-b\x00c', + 'a+-b\x00c', + 'a+-b\x00c\x80', + 'a+-b\x00c\x80d', + 'a+-b\x00c\x80d', + 'a+-b\x00c\x80d', + 'a+-b\x00c\x80d', + 'a+-b\x00c\x80d', + 'a+-b\x00c\x80d\u0100', + 'a+-b\x00c\x80d\u0100e', + 'a+-b\x00c\x80d\u0100e', + 'a+-b\x00c\x80d\u0100e', + 'a+-b\x00c\x80d\u0100e', + 'a+-b\x00c\x80d\u0100e', + 'a+-b\x00c\x80d\u0100e', + 'a+-b\x00c\x80d\u0100e', + 'a+-b\x00c\x80d\u0100e', + 'a+-b\x00c\x80d\u0100e\U00010000', + 'a+-b\x00c\x80d\u0100e\U00010000f', ] ) @@ -10,6 +10,9 @@ Release date: 2014-02-09 Core and Builtins ----------------- +- Issue #20538: UTF-7 incremental decoder produced inconsistant string when + input was truncated in BASE64 section. + - Issue #20404: io.TextIOWrapper (and hence the open() builtin) now uses the internal codec marking system added for issue #19619 to throw LookupError for known non-text encodings at stream construction time. The existing diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 02359e5..7a1aa16 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -4459,8 +4459,16 @@ utf7Error: /* return state */ if (consumed) { if (inShift) { - writer.pos = shiftOutStart; /* back off output */ *consumed = startinpos; + if (writer.pos != shiftOutStart && writer.maxchar > 127) { + PyObject *result = PyUnicode_FromKindAndData( + writer.kind, writer.data, shiftOutStart); + Py_XDECREF(errorHandler); + Py_XDECREF(exc); + _PyUnicodeWriter_Dealloc(&writer); + return result; + } + writer.pos = shiftOutStart; /* back off output */ } else { *consumed = s-starts; |