summaryrefslogtreecommitdiffstats
path: root/Lib/test/test_codecs.py
diff options
context:
space:
mode:
authorSerhiy Storchaka <storchaka@gmail.com>2015-10-02 10:14:53 (GMT)
committerSerhiy Storchaka <storchaka@gmail.com>2015-10-02 10:14:53 (GMT)
commite12f63218603f3e15592df7fba5a484f9ff5c004 (patch)
tree2b07611f51da28e88360f9afc0e34b3290c6f2f5 /Lib/test/test_codecs.py
parenta87633e59635a659aa7eb733094129c47187f8a3 (diff)
downloadcpython-e12f63218603f3e15592df7fba5a484f9ff5c004.zip
cpython-e12f63218603f3e15592df7fba5a484f9ff5c004.tar.gz
cpython-e12f63218603f3e15592df7fba5a484f9ff5c004.tar.bz2
Issue #24848: Fixed bugs in UTF-7 decoding of misformed data:
1. Non-ASCII bytes were accepted after shift sequence. 2. A low surrogate could be emitted in case of error in high surrogate.
Diffstat (limited to 'Lib/test/test_codecs.py')
-rw-r--r--Lib/test/test_codecs.py59
1 files changed, 59 insertions, 0 deletions
diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py
index cf48637..9ae0ed0 100644
--- a/Lib/test/test_codecs.py
+++ b/Lib/test/test_codecs.py
@@ -642,6 +642,32 @@ class UTF8Test(ReadTest):
class UTF7Test(ReadTest):
encoding = "utf-7"
+ def test_ascii(self):
+ # Set D (directly encoded characters)
+ set_d = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+ 'abcdefghijklmnopqrstuvwxyz'
+ '0123456789'
+ '\'(),-./:?')
+ self.assertEqual(set_d.encode(self.encoding), set_d)
+ self.assertEqual(set_d.decode(self.encoding), set_d)
+ # Set O (optional direct characters)
+ set_o = ' !"#$%&*;<=>@[]^_`{|}'
+ self.assertEqual(set_o.encode(self.encoding), set_o)
+ self.assertEqual(set_o.decode(self.encoding), set_o)
+ # +
+ self.assertEqual(u'a+b'.encode(self.encoding), 'a+-b')
+ self.assertEqual('a+-b'.decode(self.encoding), u'a+b')
+ # White spaces
+ ws = ' \t\n\r'
+ self.assertEqual(ws.encode(self.encoding), ws)
+ self.assertEqual(ws.decode(self.encoding), ws)
+ # Other ASCII characters
+ other_ascii = ''.join(sorted(set(chr(i) for i in range(0x80)) -
+ set(set_d + set_o + '+' + ws)))
+ self.assertEqual(other_ascii.encode(self.encoding),
+ '+AAAAAQACAAMABAAFAAYABwAIAAsADAAOAA8AEAARABIAEwAU'
+ 'ABUAFgAXABgAGQAaABsAHAAdAB4AHwBcAH4Afw-')
+
def test_partial(self):
self.check_partial(
u"a+-b",
@@ -656,7 +682,9 @@ class UTF7Test(ReadTest):
def test_errors(self):
tests = [
+ ('\xffb', u'\ufffdb'),
('a\xffb', u'a\ufffdb'),
+ ('a\xff\xffb', u'a\ufffd\ufffdb'),
('a+IK', u'a\ufffd'),
('a+IK-b', u'a\ufffdb'),
('a+IK,b', u'a\ufffdb'),
@@ -672,6 +700,8 @@ class UTF7Test(ReadTest):
('a+//,+IKw-b', u'a\ufffd\u20acb'),
('a+///,+IKw-b', u'a\uffff\ufffd\u20acb'),
('a+////,+IKw-b', u'a\uffff\ufffd\u20acb'),
+ ('a+IKw-b\xff', u'a\u20acb\ufffd'),
+ ('a+IKw\xffb', u'a\u20ac\ufffdb'),
]
for raw, expected in tests:
self.assertRaises(UnicodeDecodeError, codecs.utf_7_decode,
@@ -682,6 +712,35 @@ class UTF7Test(ReadTest):
self.assertEqual(u'\U000104A0'.encode(self.encoding), '+2AHcoA-')
self.assertEqual(u'\ud801\udca0'.encode(self.encoding), '+2AHcoA-')
self.assertEqual('+2AHcoA-'.decode(self.encoding), u'\U000104A0')
+ self.assertEqual('+2AHcoA'.decode(self.encoding), u'\U000104A0')
+ self.assertEqual(u'\u20ac\U000104A0'.encode(self.encoding), '+IKzYAdyg-')
+ self.assertEqual('+IKzYAdyg-'.decode(self.encoding), u'\u20ac\U000104A0')
+ self.assertEqual('+IKzYAdyg'.decode(self.encoding), u'\u20ac\U000104A0')
+ self.assertEqual(u'\u20ac\u20ac\U000104A0'.encode(self.encoding),
+ '+IKwgrNgB3KA-')
+ self.assertEqual('+IKwgrNgB3KA-'.decode(self.encoding),
+ u'\u20ac\u20ac\U000104A0')
+ self.assertEqual('+IKwgrNgB3KA'.decode(self.encoding),
+ u'\u20ac\u20ac\U000104A0')
+
+ def test_lone_surrogates(self):
+ tests = [
+ ('a+2AE-b', u'a\ud801b'),
+ ('a+2AE\xffb', u'a\ufffdb'),
+ ('a+2AE', u'a\ufffd'),
+ ('a+2AEA-b', u'a\ufffdb'),
+ ('a+2AH-b', u'a\ufffdb'),
+ ('a+IKzYAQ-b', u'a\u20ac\ud801b'),
+ ('a+IKzYAQ\xffb', u'a\u20ac\ufffdb'),
+ ('a+IKzYAQA-b', u'a\u20ac\ufffdb'),
+ ('a+IKzYAd-b', u'a\u20ac\ufffdb'),
+ ('a+IKwgrNgB-b', u'a\u20ac\u20ac\ud801b'),
+ ('a+IKwgrNgB\xffb', u'a\u20ac\u20ac\ufffdb'),
+ ('a+IKwgrNgB', u'a\u20ac\u20ac\ufffd'),
+ ('a+IKwgrNgBA-b', u'a\u20ac\u20ac\ufffdb'),
+ ]
+ for raw, expected in tests:
+ self.assertEqual(raw.decode('utf-7', 'replace'), expected)
class UTF16ExTest(unittest.TestCase):