summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSerhiy Storchaka <storchaka@gmail.com>2014-02-08 12:01:29 (GMT)
committerSerhiy Storchaka <storchaka@gmail.com>2014-02-08 12:01:29 (GMT)
commit016a3f33a533bdec6977639bfa83b7f93f6c8c88 (patch)
treed0f6562afddc85754ba5bcb0637a74d21b86fad0
parent1e949890f618867b7eabc1c08873611e960f5d03 (diff)
downloadcpython-016a3f33a533bdec6977639bfa83b7f93f6c8c88.zip
cpython-016a3f33a533bdec6977639bfa83b7f93f6c8c88.tar.gz
cpython-016a3f33a533bdec6977639bfa83b7f93f6c8c88.tar.bz2
Issue #20538: UTF-7 incremental decoder produced inconsistant string when
input was truncated in BASE64 section.
-rw-r--r--Lib/test/test_codecs.py39
-rw-r--r--Misc/NEWS3
-rw-r--r--Objects/unicodeobject.c10
3 files changed, 45 insertions, 7 deletions
diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py
index 2c6dce7..cb618ec 100644
--- a/Lib/test/test_codecs.py
+++ b/Lib/test/test_codecs.py
@@ -852,13 +852,40 @@ class UTF7Test(ReadTest, unittest.TestCase):
def test_partial(self):
self.check_partial(
- "a+-b",
+ 'a+-b\x00c\x80d\u0100e\U00010000f',
[
- "a",
- "a",
- "a+",
- "a+-",
- "a+-b",
+ 'a',
+ 'a',
+ 'a+',
+ 'a+-',
+ 'a+-b',
+ 'a+-b',
+ 'a+-b',
+ 'a+-b',
+ 'a+-b',
+ 'a+-b\x00',
+ 'a+-b\x00c',
+ 'a+-b\x00c',
+ 'a+-b\x00c',
+ 'a+-b\x00c',
+ 'a+-b\x00c',
+ 'a+-b\x00c\x80',
+ 'a+-b\x00c\x80d',
+ 'a+-b\x00c\x80d',
+ 'a+-b\x00c\x80d',
+ 'a+-b\x00c\x80d',
+ 'a+-b\x00c\x80d',
+ 'a+-b\x00c\x80d\u0100',
+ 'a+-b\x00c\x80d\u0100e',
+ 'a+-b\x00c\x80d\u0100e',
+ 'a+-b\x00c\x80d\u0100e',
+ 'a+-b\x00c\x80d\u0100e',
+ 'a+-b\x00c\x80d\u0100e',
+ 'a+-b\x00c\x80d\u0100e',
+ 'a+-b\x00c\x80d\u0100e',
+ 'a+-b\x00c\x80d\u0100e',
+ 'a+-b\x00c\x80d\u0100e\U00010000',
+ 'a+-b\x00c\x80d\u0100e\U00010000f',
]
)
diff --git a/Misc/NEWS b/Misc/NEWS
index e663bfe..adce2c1 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -10,6 +10,9 @@ What's New in Python 3.3.4 release candidate 1?
Core and Builtins
-----------------
+- Issue #20538: UTF-7 incremental decoder produced inconsistant string when
+ input was truncated in BASE64 section.
+
- Issue #17825: Cursor "^" is correctly positioned for SyntaxError and
IndentationError.
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index a149177..beafaa4 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -4474,8 +4474,16 @@ utf7Error:
/* return state */
if (consumed) {
if (inShift) {
- outpos = shiftOutStart; /* back off output */
*consumed = startinpos;
+ if (outpos != shiftOutStart &&
+ PyUnicode_MAX_CHAR_VALUE(unicode) > 127) {
+ PyObject *result = PyUnicode_FromKindAndData(
+ PyUnicode_KIND(unicode), PyUnicode_DATA(unicode),
+ shiftOutStart);
+ Py_DECREF(unicode);
+ unicode = result;
+ }
+ outpos = shiftOutStart; /* back off output */
}
else {
*consumed = s-starts;