diff options
author | Antoine Pitrou <solipsis@pitrou.net> | 2012-09-23 17:55:21 (GMT) |
---|---|---|
committer | Antoine Pitrou <solipsis@pitrou.net> | 2012-09-23 17:55:21 (GMT) |
commit | 6f80f5d4446f06d15274ad519cae6929a3565cc0 (patch) | |
tree | 652d58b6404e41887d2acbe1fe538d3eb05267a1 /Lib/test/test_codecs.py | |
parent | 20b8d992b008672d52a84c8d35992033ccfc9d84 (diff) | |
download | cpython-6f80f5d4446f06d15274ad519cae6929a3565cc0.zip cpython-6f80f5d4446f06d15274ad519cae6929a3565cc0.tar.gz cpython-6f80f5d4446f06d15274ad519cae6929a3565cc0.tar.bz2 |
Issue #15379: Fix passing of non-BMP characters as integers for the charmap decoder (already working as unicode strings).
Patch by Serhiy Storchaka.
Diffstat (limited to 'Lib/test/test_codecs.py')
-rw-r--r-- | Lib/test/test_codecs.py | 105 |
1 files changed, 105 insertions, 0 deletions
diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py index 3426a4d..f342d88 100644 --- a/Lib/test/test_codecs.py +++ b/Lib/test/test_codecs.py @@ -1546,6 +1546,10 @@ class CharmapTest(unittest.TestCase): ("abc", 3) ) + self.assertRaises(UnicodeDecodeError, + codecs.charmap_decode, b"\x00\x01\x02", "strict", "ab" + ) + self.assertEqual( codecs.charmap_decode(b"\x00\x01\x02", "replace", "ab"), ("ab\ufffd", 3) @@ -1572,6 +1576,107 @@ class CharmapTest(unittest.TestCase): ("", len(allbytes)) ) + def test_decode_with_int2str_map(self): + self.assertEqual( + codecs.charmap_decode(b"\x00\x01\x02", "strict", + {0: 'a', 1: 'b', 2: 'c'}), + ("abc", 3) + ) + + self.assertEqual( + codecs.charmap_decode(b"\x00\x01\x02", "strict", + {0: 'Aa', 1: 'Bb', 2: 'Cc'}), + ("AaBbCc", 3) + ) + + self.assertEqual( + codecs.charmap_decode(b"\x00\x01\x02", "strict", + {0: '\U0010FFFF', 1: 'b', 2: 'c'}), + ("\U0010FFFFbc", 3) + ) + + self.assertEqual( + codecs.charmap_decode(b"\x00\x01\x02", "strict", + {0: 'a', 1: 'b', 2: ''}), + ("ab", 3) + ) + + self.assertRaises(UnicodeDecodeError, + codecs.charmap_decode, b"\x00\x01\x02", "strict", + {0: 'a', 1: 'b'} + ) + + self.assertEqual( + codecs.charmap_decode(b"\x00\x01\x02", "replace", + {0: 'a', 1: 'b'}), + ("ab\ufffd", 3) + ) + + self.assertEqual( + codecs.charmap_decode(b"\x00\x01\x02", "replace", + {0: 'a', 1: 'b', 2: None}), + ("ab\ufffd", 3) + ) + + self.assertEqual( + codecs.charmap_decode(b"\x00\x01\x02", "ignore", + {0: 'a', 1: 'b'}), + ("ab", 3) + ) + + self.assertEqual( + codecs.charmap_decode(b"\x00\x01\x02", "ignore", + {0: 'a', 1: 'b', 2: None}), + ("ab", 3) + ) + + allbytes = bytes(range(256)) + self.assertEqual( + codecs.charmap_decode(allbytes, "ignore", {}), + ("", len(allbytes)) + ) + + def test_decode_with_int2int_map(self): + a = ord('a') + b = ord('b') + c = ord('c') + + self.assertEqual( + codecs.charmap_decode(b"\x00\x01\x02", "strict", + {0: a, 1: b, 2: c}), + ("abc", 3) + ) + + # Issue #15379 + self.assertEqual( + codecs.charmap_decode(b"\x00\x01\x02", "strict", + {0: 0x10FFFF, 1: b, 2: c}), + ("\U0010FFFFbc", 3) + ) + + self.assertRaises(TypeError, + codecs.charmap_decode, b"\x00\x01\x02", "strict", + {0: 0x110000, 1: b, 2: c} + ) + + self.assertRaises(UnicodeDecodeError, + codecs.charmap_decode, b"\x00\x01\x02", "strict", + {0: a, 1: b}, + ) + + self.assertEqual( + codecs.charmap_decode(b"\x00\x01\x02", "replace", + {0: a, 1: b}), + ("ab\ufffd", 3) + ) + + self.assertEqual( + codecs.charmap_decode(b"\x00\x01\x02", "ignore", + {0: a, 1: b}), + ("ab", 3) + ) + + class WithStmtTest(unittest.TestCase): def test_encodedfile(self): f = io.BytesIO(b"\xc3\xbc") |