diff options
author | Zackery Spytz <zspytz@gmail.com> | 2018-08-19 04:43:38 (GMT) |
---|---|---|
committer | Serhiy Storchaka <storchaka@gmail.com> | 2018-08-19 04:43:38 (GMT) |
commit | e349bf23584eef20e0d1e1b2989d9b1430f15507 (patch) | |
tree | d698b962c27f07d0e6f1baf4fbe13fee145c60dc | |
parent | d3d3171da895d8cb880f23fae6be778f0ac23be7 (diff) | |
download | cpython-e349bf23584eef20e0d1e1b2989d9b1430f15507.zip cpython-e349bf23584eef20e0d1e1b2989d9b1430f15507.tar.gz cpython-e349bf23584eef20e0d1e1b2989d9b1430f15507.tar.bz2 |
bpo-22602: Raise an exception in the UTF-7 decoder for ill-formed sequences starting with "+". (GH-8741)
The UTF-7 decoder now raises UnicodeDecodeError for ill-formed
sequences starting with "+" (as specified in RFC 2152).
-rw-r--r-- | Lib/test/test_codecs.py | 1 | ||||
-rw-r--r-- | Lib/test/test_unicode.py | 4 | ||||
-rw-r--r-- | Misc/NEWS.d/next/Library/2018-08-12-00-14-54.bpo-22602.ybG9K8.rst | 3 | ||||
-rw-r--r-- | Objects/unicodeobject.c | 5 |
4 files changed, 13 insertions, 0 deletions
diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py index a59a5e2..86d0dde 100644 --- a/Lib/test/test_codecs.py +++ b/Lib/test/test_codecs.py @@ -1020,6 +1020,7 @@ class UTF7Test(ReadTest, unittest.TestCase): (b'a+////,+IKw-b', 'a\uffff\ufffd\u20acb'), (b'a+IKw-b\xff', 'a\u20acb\ufffd'), (b'a+IKw\xffb', 'a\u20ac\ufffdb'), + (b'a+@b', 'a\ufffdb'), ] for raw, expected in tests: with self.subTest(raw=raw): diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py index 3cc018c..fb7bb2d 100644 --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -1630,6 +1630,10 @@ class UnicodeTest(string_tests.CommonTest, for c in set_o: self.assertEqual(c.encode('ascii').decode('utf7'), c) + with self.assertRaisesRegex(UnicodeDecodeError, + 'ill-formed sequence'): + b'+@'.decode('utf-7') + def test_codecs_utf8(self): self.assertEqual(''.encode('utf-8'), b'') self.assertEqual('\u20ac'.encode('utf-8'), b'\xe2\x82\xac') diff --git a/Misc/NEWS.d/next/Library/2018-08-12-00-14-54.bpo-22602.ybG9K8.rst b/Misc/NEWS.d/next/Library/2018-08-12-00-14-54.bpo-22602.ybG9K8.rst new file mode 100644 index 0000000..5b113e3 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2018-08-12-00-14-54.bpo-22602.ybG9K8.rst @@ -0,0 +1,3 @@ +The UTF-7 decoder now raises :exc:`UnicodeDecodeError` for ill-formed +sequences starting with "+" (as specified in RFC 2152). Patch by Zackery +Spytz. diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 04fd6d0..0460d18 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -4479,6 +4479,11 @@ PyUnicode_DecodeUTF7Stateful(const char *s, if (_PyUnicodeWriter_WriteCharInline(&writer, '+') < 0) goto onError; } + else if (s < e && !IS_BASE64(*s)) { + s++; + errmsg = "ill-formed sequence"; + goto utf7Error; + } else { /* begin base64-encoded section */ inShift = 1; surrogate = 0; |