summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorZackery Spytz <zspytz@gmail.com>2018-08-19 04:43:38 (GMT)
committerSerhiy Storchaka <storchaka@gmail.com>2018-08-19 04:43:38 (GMT)
commite349bf23584eef20e0d1e1b2989d9b1430f15507 (patch)
treed698b962c27f07d0e6f1baf4fbe13fee145c60dc
parentd3d3171da895d8cb880f23fae6be778f0ac23be7 (diff)
downloadcpython-e349bf23584eef20e0d1e1b2989d9b1430f15507.zip
cpython-e349bf23584eef20e0d1e1b2989d9b1430f15507.tar.gz
cpython-e349bf23584eef20e0d1e1b2989d9b1430f15507.tar.bz2
bpo-22602: Raise an exception in the UTF-7 decoder for ill-formed sequences starting with "+". (GH-8741)
The UTF-7 decoder now raises UnicodeDecodeError for ill-formed sequences starting with "+" (as specified in RFC 2152).
-rw-r--r--Lib/test/test_codecs.py1
-rw-r--r--Lib/test/test_unicode.py4
-rw-r--r--Misc/NEWS.d/next/Library/2018-08-12-00-14-54.bpo-22602.ybG9K8.rst3
-rw-r--r--Objects/unicodeobject.c5
4 files changed, 13 insertions, 0 deletions
diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py
index a59a5e2..86d0dde 100644
--- a/Lib/test/test_codecs.py
+++ b/Lib/test/test_codecs.py
@@ -1020,6 +1020,7 @@ class UTF7Test(ReadTest, unittest.TestCase):
(b'a+////,+IKw-b', 'a\uffff\ufffd\u20acb'),
(b'a+IKw-b\xff', 'a\u20acb\ufffd'),
(b'a+IKw\xffb', 'a\u20ac\ufffdb'),
+ (b'a+@b', 'a\ufffdb'),
]
for raw, expected in tests:
with self.subTest(raw=raw):
diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py
index 3cc018c..fb7bb2d 100644
--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@@ -1630,6 +1630,10 @@ class UnicodeTest(string_tests.CommonTest,
for c in set_o:
self.assertEqual(c.encode('ascii').decode('utf7'), c)
+ with self.assertRaisesRegex(UnicodeDecodeError,
+ 'ill-formed sequence'):
+ b'+@'.decode('utf-7')
+
def test_codecs_utf8(self):
self.assertEqual(''.encode('utf-8'), b'')
self.assertEqual('\u20ac'.encode('utf-8'), b'\xe2\x82\xac')
diff --git a/Misc/NEWS.d/next/Library/2018-08-12-00-14-54.bpo-22602.ybG9K8.rst b/Misc/NEWS.d/next/Library/2018-08-12-00-14-54.bpo-22602.ybG9K8.rst
new file mode 100644
index 0000000..5b113e3
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2018-08-12-00-14-54.bpo-22602.ybG9K8.rst
@@ -0,0 +1,3 @@
+The UTF-7 decoder now raises :exc:`UnicodeDecodeError` for ill-formed
+sequences starting with "+" (as specified in RFC 2152). Patch by Zackery
+Spytz.
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 04fd6d0..0460d18 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -4479,6 +4479,11 @@ PyUnicode_DecodeUTF7Stateful(const char *s,
if (_PyUnicodeWriter_WriteCharInline(&writer, '+') < 0)
goto onError;
}
+ else if (s < e && !IS_BASE64(*s)) {
+ s++;
+ errmsg = "ill-formed sequence";
+ goto utf7Error;
+ }
else { /* begin base64-encoded section */
inShift = 1;
surrogate = 0;