From 016a3f33a533bdec6977639bfa83b7f93f6c8c88 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sat, 8 Feb 2014 14:01:29 +0200 Subject: Issue #20538: UTF-7 incremental decoder produced inconsistant string when input was truncated in BASE64 section. --- Lib/test/test_codecs.py | 39 +++++++++++++++++++++++++++++++++------ Misc/NEWS | 3 +++ Objects/unicodeobject.c | 10 +++++++++- 3 files changed, 45 insertions(+), 7 deletions(-) diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py index 2c6dce7..cb618ec 100644 --- a/Lib/test/test_codecs.py +++ b/Lib/test/test_codecs.py @@ -852,13 +852,40 @@ class UTF7Test(ReadTest, unittest.TestCase): def test_partial(self): self.check_partial( - "a+-b", + 'a+-b\x00c\x80d\u0100e\U00010000f', [ - "a", - "a", - "a+", - "a+-", - "a+-b", + 'a', + 'a', + 'a+', + 'a+-', + 'a+-b', + 'a+-b', + 'a+-b', + 'a+-b', + 'a+-b', + 'a+-b\x00', + 'a+-b\x00c', + 'a+-b\x00c', + 'a+-b\x00c', + 'a+-b\x00c', + 'a+-b\x00c', + 'a+-b\x00c\x80', + 'a+-b\x00c\x80d', + 'a+-b\x00c\x80d', + 'a+-b\x00c\x80d', + 'a+-b\x00c\x80d', + 'a+-b\x00c\x80d', + 'a+-b\x00c\x80d\u0100', + 'a+-b\x00c\x80d\u0100e', + 'a+-b\x00c\x80d\u0100e', + 'a+-b\x00c\x80d\u0100e', + 'a+-b\x00c\x80d\u0100e', + 'a+-b\x00c\x80d\u0100e', + 'a+-b\x00c\x80d\u0100e', + 'a+-b\x00c\x80d\u0100e', + 'a+-b\x00c\x80d\u0100e', + 'a+-b\x00c\x80d\u0100e\U00010000', + 'a+-b\x00c\x80d\u0100e\U00010000f', ] ) diff --git a/Misc/NEWS b/Misc/NEWS index e663bfe..adce2c1 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -10,6 +10,9 @@ What's New in Python 3.3.4 release candidate 1? Core and Builtins ----------------- +- Issue #20538: UTF-7 incremental decoder produced inconsistant string when + input was truncated in BASE64 section. + - Issue #17825: Cursor "^" is correctly positioned for SyntaxError and IndentationError. diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index a149177..beafaa4 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -4474,8 +4474,16 @@ utf7Error: /* return state */ if (consumed) { if (inShift) { - outpos = shiftOutStart; /* back off output */ *consumed = startinpos; + if (outpos != shiftOutStart && + PyUnicode_MAX_CHAR_VALUE(unicode) > 127) { + PyObject *result = PyUnicode_FromKindAndData( + PyUnicode_KIND(unicode), PyUnicode_DATA(unicode), + shiftOutStart); + Py_DECREF(unicode); + unicode = result; + } + outpos = shiftOutStart; /* back off output */ } else { *consumed = s-starts; -- cgit v0.12