diff options
author | Martin v. Löwis <martin@v.loewis.de> | 2009-05-02 18:52:14 (GMT) |
---|---|---|
committer | Martin v. Löwis <martin@v.loewis.de> | 2009-05-02 18:52:14 (GMT) |
commit | db12d454e6176e9c933babe3ce40b225307c6305 (patch) | |
tree | 28b09c64e9dfd797da58a98725bfb93b4dae7077 /Lib | |
parent | 02953d244fdb2fe99853d2fe5db905df53c6596f (diff) | |
download | cpython-db12d454e6176e9c933babe3ce40b225307c6305.zip cpython-db12d454e6176e9c933babe3ce40b225307c6305.tar.gz cpython-db12d454e6176e9c933babe3ce40b225307c6305.tar.bz2 |
Issue #3672: Reject surrogates in utf-8 codec; add surrogates error
handler.
Diffstat (limited to 'Lib')
-rw-r--r-- | Lib/test/test_bytes.py | 4 | ||||
-rw-r--r-- | Lib/test/test_codecs.py | 15 | ||||
-rw-r--r-- | Lib/test/test_unicode.py | 6 | ||||
-rw-r--r-- | Lib/test/test_unicodedata.py | 3 |
4 files changed, 20 insertions, 8 deletions
diff --git a/Lib/test/test_bytes.py b/Lib/test/test_bytes.py index a3ea40a..992f3d2 100644 --- a/Lib/test/test_bytes.py +++ b/Lib/test/test_bytes.py @@ -169,13 +169,13 @@ class BaseBytesTest(unittest.TestCase): self.assertEqual(b[start:stop:step], self.type2test(L[start:stop:step])) def test_encoding(self): - sample = "Hello world\n\u1234\u5678\u9abc\udef0" + sample = "Hello world\n\u1234\u5678\u9abc" for enc in ("utf8", "utf16"): b = self.type2test(sample, enc) self.assertEqual(b, self.type2test(sample.encode(enc))) self.assertRaises(UnicodeEncodeError, self.type2test, sample, "latin1") b = self.type2test(sample, "latin1", "ignore") - self.assertEqual(b, self.type2test(sample[:-4], "utf-8")) + self.assertEqual(b, self.type2test(sample[:-3], "utf-8")) def test_decode(self): sample = "Hello world\n\u1234\u5678\u9abc\def0\def0" diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py index 1730dbe..6706507 100644 --- a/Lib/test/test_codecs.py +++ b/Lib/test/test_codecs.py @@ -541,6 +541,17 @@ class UTF8Test(ReadTest): self.check_state_handling_decode(self.encoding, u, u.encode(self.encoding)) + def test_lone_surrogates(self): + self.assertRaises(UnicodeEncodeError, "\ud800".encode, "utf-8") + self.assertRaises(UnicodeDecodeError, b"\xed\xa0\x80".decode, "utf-8") + + def test_surrogates_handler(self): + self.assertEquals("abc\ud800def".encode("utf-8", "surrogates"), + b"abc\xed\xa0\x80def") + self.assertEquals(b"abc\xed\xa0\x80def".decode("utf-8", "surrogates"), + "abc\ud800def") + self.assertTrue(codecs.lookup_error("surrogates")) + class UTF7Test(ReadTest): encoding = "utf-7" @@ -1023,12 +1034,12 @@ class NameprepTest(unittest.TestCase): # Skipped continue # The Unicode strings are given in UTF-8 - orig = str(orig, "utf-8") + orig = str(orig, "utf-8", "surrogates") if prepped is None: # Input contains prohibited characters self.assertRaises(UnicodeError, nameprep, orig) else: - prepped = str(prepped, "utf-8") + prepped = str(prepped, "utf-8", "surrogates") try: self.assertEquals(nameprep(orig), prepped) except Exception as e: diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py index 1fddc06..220a8eb 100644 --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -886,10 +886,10 @@ class UnicodeTest( self.assertEqual('\u20ac'.encode('utf-8'), b'\xe2\x82\xac') self.assertEqual('\ud800\udc02'.encode('utf-8'), b'\xf0\x90\x80\x82') self.assertEqual('\ud84d\udc56'.encode('utf-8'), b'\xf0\xa3\x91\x96') - self.assertEqual('\ud800'.encode('utf-8'), b'\xed\xa0\x80') - self.assertEqual('\udc00'.encode('utf-8'), b'\xed\xb0\x80') + self.assertEqual('\ud800'.encode('utf-8', 'surrogates'), b'\xed\xa0\x80') + self.assertEqual('\udc00'.encode('utf-8', 'surrogates'), b'\xed\xb0\x80') self.assertEqual( - ('\ud800\udc02'*1000).encode('utf-8'), + ('\ud800\udc02'*1000).encode('utf-8', 'surrogates'), b'\xf0\x90\x80\x82'*1000 ) self.assertEqual( diff --git a/Lib/test/test_unicodedata.py b/Lib/test/test_unicodedata.py index aed8eaa..b84aaaf 100644 --- a/Lib/test/test_unicodedata.py +++ b/Lib/test/test_unicodedata.py @@ -13,6 +13,7 @@ import subprocess import test.support encoding = 'utf-8' +errors = 'surrogates' ### Run tests @@ -61,7 +62,7 @@ class UnicodeMethodsTest(unittest.TestCase): (char + 'ABC').title(), ] - h.update(''.join(data).encode(encoding)) + h.update(''.join(data).encode(encoding, errors)) result = h.hexdigest() self.assertEqual(result, self.expectedchecksum) |