summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSerhiy Storchaka <storchaka@gmail.com>2014-02-08 12:06:33 (GMT)
committerSerhiy Storchaka <storchaka@gmail.com>2014-02-08 12:06:33 (GMT)
commit6cbf151032d693e13bb880aa3781dd06b7662199 (patch)
tree625f4868170708231d01d09b20d51e82730f997d
parent73afe2a972e30a3e0f87401be2fa38c67e2cb964 (diff)
parent016a3f33a533bdec6977639bfa83b7f93f6c8c88 (diff)
downloadcpython-6cbf151032d693e13bb880aa3781dd06b7662199.zip
cpython-6cbf151032d693e13bb880aa3781dd06b7662199.tar.gz
cpython-6cbf151032d693e13bb880aa3781dd06b7662199.tar.bz2
Issue #20538: UTF-7 incremental decoder produced inconsistant string when
input was truncated in BASE64 section.
-rw-r--r--Lib/test/test_codecs.py41
-rw-r--r--Misc/NEWS3
-rw-r--r--Objects/unicodeobject.c10
3 files changed, 45 insertions, 9 deletions
diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py
index 8a6e0f9..570539f 100644
--- a/Lib/test/test_codecs.py
+++ b/Lib/test/test_codecs.py
@@ -124,8 +124,6 @@ class ReadTest(MixInCheckStateHandling):
"".join(codecs.iterdecode([bytes([c]) for c in encoded], self.encoding))
)
- # Temporary skip, see http://bugs.python.org/issue20542
- @unittest.skip
def test_readline(self):
def getreader(input):
stream = io.BytesIO(input.encode(self.encoding))
@@ -899,13 +897,40 @@ class UTF7Test(ReadTest, unittest.TestCase):
def test_partial(self):
self.check_partial(
- "a+-b",
+ 'a+-b\x00c\x80d\u0100e\U00010000f',
[
- "a",
- "a",
- "a+",
- "a+-",
- "a+-b",
+ 'a',
+ 'a',
+ 'a+',
+ 'a+-',
+ 'a+-b',
+ 'a+-b',
+ 'a+-b',
+ 'a+-b',
+ 'a+-b',
+ 'a+-b\x00',
+ 'a+-b\x00c',
+ 'a+-b\x00c',
+ 'a+-b\x00c',
+ 'a+-b\x00c',
+ 'a+-b\x00c',
+ 'a+-b\x00c\x80',
+ 'a+-b\x00c\x80d',
+ 'a+-b\x00c\x80d',
+ 'a+-b\x00c\x80d',
+ 'a+-b\x00c\x80d',
+ 'a+-b\x00c\x80d',
+ 'a+-b\x00c\x80d\u0100',
+ 'a+-b\x00c\x80d\u0100e',
+ 'a+-b\x00c\x80d\u0100e',
+ 'a+-b\x00c\x80d\u0100e',
+ 'a+-b\x00c\x80d\u0100e',
+ 'a+-b\x00c\x80d\u0100e',
+ 'a+-b\x00c\x80d\u0100e',
+ 'a+-b\x00c\x80d\u0100e',
+ 'a+-b\x00c\x80d\u0100e',
+ 'a+-b\x00c\x80d\u0100e\U00010000',
+ 'a+-b\x00c\x80d\u0100e\U00010000f',
]
)
diff --git a/Misc/NEWS b/Misc/NEWS
index c0da6aa..998d87c 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -10,6 +10,9 @@ Release date: 2014-02-09
Core and Builtins
-----------------
+- Issue #20538: UTF-7 incremental decoder produced inconsistant string when
+ input was truncated in BASE64 section.
+
- Issue #20404: io.TextIOWrapper (and hence the open() builtin) now uses the
internal codec marking system added for issue #19619 to throw LookupError
for known non-text encodings at stream construction time. The existing
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 02359e5..7a1aa16 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -4459,8 +4459,16 @@ utf7Error:
/* return state */
if (consumed) {
if (inShift) {
- writer.pos = shiftOutStart; /* back off output */
*consumed = startinpos;
+ if (writer.pos != shiftOutStart && writer.maxchar > 127) {
+ PyObject *result = PyUnicode_FromKindAndData(
+ writer.kind, writer.data, shiftOutStart);
+ Py_XDECREF(errorHandler);
+ Py_XDECREF(exc);
+ _PyUnicodeWriter_Dealloc(&writer);
+ return result;
+ }
+ writer.pos = shiftOutStart; /* back off output */
}
else {
*consumed = s-starts;