diff options
Diffstat (limited to 'Lib/test/test_codecs.py')
-rw-r--r-- | Lib/test/test_codecs.py | 520 |
1 files changed, 486 insertions, 34 deletions
diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py index e412a64..43886fc 100644 --- a/Lib/test/test_codecs.py +++ b/Lib/test/test_codecs.py @@ -1,8 +1,14 @@ from test import support import unittest import codecs +import locale import sys, _testcapi, io +def coding_checker(self, coder): + def check(input, expect): + self.assertEqual(coder(input), (expect, len(input))) + return check + class Queue(object): """ queue: write bytes at one end, read bytes from the other end @@ -29,7 +35,7 @@ class MixInCheckStateHandling: d = codecs.getincrementaldecoder(encoding)() part1 = d.decode(s[:i]) state = d.getstate() - self.assertTrue(isinstance(state[1], int)) + self.assertIsInstance(state[1], int) # Check that the condition stated in the documentation for # IncrementalDecoder.getstate() holds if not state[1]: @@ -72,7 +78,6 @@ class ReadTest(unittest.TestCase, MixInCheckStateHandling): # check that there's nothing left in the buffers self.assertEqual(r.read(), "") self.assertEqual(r.bytebuffer, b"") - self.assertEqual(r.charbuffer, "") # do the check again, this time using a incremental decoder d = codecs.getincrementaldecoder(self.encoding)() @@ -313,7 +318,7 @@ class UTF32Test(ReadTest): def test_partial(self): self.check_partial( - "\x00\xff\u0100\uffff", + "\x00\xff\u0100\uffff\U00010000", [ "", # first byte of BOM read "", # second byte of BOM read @@ -335,6 +340,10 @@ class UTF32Test(ReadTest): "\x00\xff\u0100", "\x00\xff\u0100", "\x00\xff\u0100\uffff", + "\x00\xff\u0100\uffff", + "\x00\xff\u0100\uffff", + "\x00\xff\u0100\uffff", + "\x00\xff\u0100\uffff\U00010000", ] ) @@ -369,7 +378,7 @@ class UTF32LETest(ReadTest): def test_partial(self): self.check_partial( - "\x00\xff\u0100\uffff", + "\x00\xff\u0100\uffff\U00010000", [ "", "", @@ -387,6 +396,10 @@ class UTF32LETest(ReadTest): "\x00\xff\u0100", "\x00\xff\u0100", "\x00\xff\u0100\uffff", + "\x00\xff\u0100\uffff", + "\x00\xff\u0100\uffff", + "\x00\xff\u0100\uffff", + "\x00\xff\u0100\uffff\U00010000", ] ) @@ -409,7 +422,7 @@ class UTF32BETest(ReadTest): def test_partial(self): self.check_partial( - "\x00\xff\u0100\uffff", + "\x00\xff\u0100\uffff\U00010000", [ "", "", @@ -427,6 +440,10 @@ class UTF32BETest(ReadTest): "\x00\xff\u0100", "\x00\xff\u0100", "\x00\xff\u0100\uffff", + "\x00\xff\u0100\uffff", + "\x00\xff\u0100\uffff", + "\x00\xff\u0100\uffff", + "\x00\xff\u0100\uffff\U00010000", ] ) @@ -477,7 +494,7 @@ class UTF16Test(ReadTest): def test_partial(self): self.check_partial( - "\x00\xff\u0100\uffff", + "\x00\xff\u0100\uffff\U00010000", [ "", # first byte of BOM read "", # second byte of BOM read => byteorder known @@ -489,6 +506,10 @@ class UTF16Test(ReadTest): "\x00\xff\u0100", "\x00\xff\u0100", "\x00\xff\u0100\uffff", + "\x00\xff\u0100\uffff", + "\x00\xff\u0100\uffff", + "\x00\xff\u0100\uffff", + "\x00\xff\u0100\uffff\U00010000", ] ) @@ -526,7 +547,7 @@ class UTF16LETest(ReadTest): def test_partial(self): self.check_partial( - "\x00\xff\u0100\uffff", + "\x00\xff\u0100\uffff\U00010000", [ "", "\x00", @@ -536,19 +557,40 @@ class UTF16LETest(ReadTest): "\x00\xff\u0100", "\x00\xff\u0100", "\x00\xff\u0100\uffff", + "\x00\xff\u0100\uffff", + "\x00\xff\u0100\uffff", + "\x00\xff\u0100\uffff", + "\x00\xff\u0100\uffff\U00010000", ] ) def test_errors(self): - self.assertRaises(UnicodeDecodeError, codecs.utf_16_le_decode, - b"\xff", "strict", True) + tests = [ + (b'\xff', '\ufffd'), + (b'A\x00Z', 'A\ufffd'), + (b'A\x00B\x00C\x00D\x00Z', 'ABCD\ufffd'), + (b'\x00\xd8', '\ufffd'), + (b'\x00\xd8A', '\ufffd'), + (b'\x00\xd8A\x00', '\ufffdA'), + (b'\x00\xdcA\x00', '\ufffdA'), + ] + for raw, expected in tests: + self.assertRaises(UnicodeDecodeError, codecs.utf_16_le_decode, + raw, 'strict', True) + self.assertEqual(raw.decode('utf-16le', 'replace'), expected) + + def test_nonbmp(self): + self.assertEqual("\U00010203".encode(self.encoding), + b'\x00\xd8\x03\xde') + self.assertEqual(b'\x00\xd8\x03\xde'.decode(self.encoding), + "\U00010203") class UTF16BETest(ReadTest): encoding = "utf-16-be" def test_partial(self): self.check_partial( - "\x00\xff\u0100\uffff", + "\x00\xff\u0100\uffff\U00010000", [ "", "\x00", @@ -558,19 +600,40 @@ class UTF16BETest(ReadTest): "\x00\xff\u0100", "\x00\xff\u0100", "\x00\xff\u0100\uffff", + "\x00\xff\u0100\uffff", + "\x00\xff\u0100\uffff", + "\x00\xff\u0100\uffff", + "\x00\xff\u0100\uffff\U00010000", ] ) def test_errors(self): - self.assertRaises(UnicodeDecodeError, codecs.utf_16_be_decode, - b"\xff", "strict", True) + tests = [ + (b'\xff', '\ufffd'), + (b'\x00A\xff', 'A\ufffd'), + (b'\x00A\x00B\x00C\x00DZ', 'ABCD\ufffd'), + (b'\xd8\x00', '\ufffd'), + (b'\xd8\x00\xdc', '\ufffd'), + (b'\xd8\x00\x00A', '\ufffdA'), + (b'\xdc\x00\x00A', '\ufffdA'), + ] + for raw, expected in tests: + self.assertRaises(UnicodeDecodeError, codecs.utf_16_be_decode, + raw, 'strict', True) + self.assertEqual(raw.decode('utf-16be', 'replace'), expected) + + def test_nonbmp(self): + self.assertEqual("\U00010203".encode(self.encoding), + b'\xd8\x00\xde\x03') + self.assertEqual(b'\xd8\x00\xde\x03'.decode(self.encoding), + "\U00010203") class UTF8Test(ReadTest): encoding = "utf-8" def test_partial(self): self.check_partial( - "\x00\xff\u07ff\u0800\uffff", + "\x00\xff\u07ff\u0800\uffff\U00010000", [ "\x00", "\x00", @@ -583,6 +646,10 @@ class UTF8Test(ReadTest): "\x00\xff\u07ff\u0800", "\x00\xff\u07ff\u0800", "\x00\xff\u07ff\u0800\uffff", + "\x00\xff\u07ff\u0800\uffff", + "\x00\xff\u07ff\u0800\uffff", + "\x00\xff\u07ff\u0800\uffff", + "\x00\xff\u07ff\u0800\uffff\U00010000", ] ) @@ -611,6 +678,10 @@ class UTF8Test(ReadTest): self.assertEqual(b"abc\xed\xa0\x80def".decode("utf-8", "surrogatepass"), "abc\ud800def") self.assertTrue(codecs.lookup_error("surrogatepass")) + with self.assertRaises(UnicodeDecodeError): + b"abc\xed\xa0".decode("utf-8", "surrogatepass") + with self.assertRaises(UnicodeDecodeError): + b"abc\xed\xa0z".decode("utf-8", "surrogatepass") class UTF7Test(ReadTest): encoding = "utf-7" @@ -651,24 +722,12 @@ class ReadBufferTest(unittest.TestCase): self.assertRaises(TypeError, codecs.readbuffer_encode) self.assertRaises(TypeError, codecs.readbuffer_encode, 42) -class CharBufferTest(unittest.TestCase): - - def test_string(self): - self.assertEqual(codecs.charbuffer_encode(b"spam"), (b"spam", 4)) - - def test_empty(self): - self.assertEqual(codecs.charbuffer_encode(b""), (b"", 0)) - - def test_bad_args(self): - self.assertRaises(TypeError, codecs.charbuffer_encode) - self.assertRaises(TypeError, codecs.charbuffer_encode, 42) - class UTF8SigTest(ReadTest): encoding = "utf-8-sig" def test_partial(self): self.check_partial( - "\ufeff\x00\xff\u07ff\u0800\uffff", + "\ufeff\x00\xff\u07ff\u0800\uffff\U00010000", [ "", "", @@ -687,6 +746,10 @@ class UTF8SigTest(ReadTest): "\ufeff\x00\xff\u07ff\u0800", "\ufeff\x00\xff\u07ff\u0800", "\ufeff\x00\xff\u07ff\u0800\uffff", + "\ufeff\x00\xff\u07ff\u0800\uffff", + "\ufeff\x00\xff\u07ff\u0800\uffff", + "\ufeff\x00\xff\u07ff\u0800\uffff", + "\ufeff\x00\xff\u07ff\u0800\uffff\U00010000", ] ) @@ -745,7 +808,55 @@ class UTF8SigTest(ReadTest): class EscapeDecodeTest(unittest.TestCase): def test_empty(self): - self.assertEqual(codecs.escape_decode(""), ("", 0)) + self.assertEqual(codecs.escape_decode(b""), (b"", 0)) + + def test_raw(self): + decode = codecs.escape_decode + for b in range(256): + b = bytes([b]) + if b != b'\\': + self.assertEqual(decode(b + b'0'), (b + b'0', 2)) + + def test_escape(self): + decode = codecs.escape_decode + check = coding_checker(self, decode) + check(b"[\\\n]", b"[]") + check(br'[\"]', b'["]') + check(br"[\']", b"[']") + check(br"[\\]", br"[\]") + check(br"[\a]", b"[\x07]") + check(br"[\b]", b"[\x08]") + check(br"[\t]", b"[\x09]") + check(br"[\n]", b"[\x0a]") + check(br"[\v]", b"[\x0b]") + check(br"[\f]", b"[\x0c]") + check(br"[\r]", b"[\x0d]") + check(br"[\7]", b"[\x07]") + check(br"[\8]", br"[\8]") + check(br"[\78]", b"[\x078]") + check(br"[\41]", b"[!]") + check(br"[\418]", b"[!8]") + check(br"[\101]", b"[A]") + check(br"[\1010]", b"[A0]") + check(br"[\501]", b"[A]") + check(br"[\x41]", b"[A]") + check(br"[\X41]", br"[\X41]") + check(br"[\x410]", b"[A0]") + for b in range(256): + if b not in b'\n"\'\\abtnvfr01234567x': + b = bytes([b]) + check(b'\\' + b, b'\\' + b) + + def test_errors(self): + decode = codecs.escape_decode + self.assertRaises(ValueError, decode, br"\x") + self.assertRaises(ValueError, decode, br"[\x]") + self.assertEqual(decode(br"[\x]\x", "ignore"), (b"[]", 6)) + self.assertEqual(decode(br"[\x]\x", "replace"), (b"[?]?", 6)) + self.assertRaises(ValueError, decode, br"\x0") + self.assertRaises(ValueError, decode, br"[\x0]") + self.assertEqual(decode(br"[\x0]\x0", "ignore"), (b"[]", 8)) + self.assertEqual(decode(br"[\x0]\x0", "replace"), (b"[?]?", 8)) class RecodingTest(unittest.TestCase): def test_recoding(self): @@ -1231,6 +1342,19 @@ class CodecsModuleTest(unittest.TestCase): self.assertRaises(TypeError, codecs.getwriter) self.assertRaises(LookupError, codecs.getwriter, "__spam__") + def test_lookup_issue1813(self): + # Issue #1813: under Turkish locales, lookup of some codecs failed + # because 'I' is lowercased as "ı" (dotless i) + oldlocale = locale.setlocale(locale.LC_CTYPE) + self.addCleanup(locale.setlocale, locale.LC_CTYPE, oldlocale) + try: + locale.setlocale(locale.LC_CTYPE, 'tr_TR') + except locale.Error: + # Unsupported locale on this system + self.skipTest('test needs Turkish locale') + c = codecs.lookup('ASCII') + self.assertEqual(c.name, 'ascii') + class StreamReaderTest(unittest.TestCase): def setUp(self): @@ -1274,6 +1398,7 @@ all_unicode_encodings = [ "cp424", "cp437", "cp500", + "cp720", "cp737", "cp775", "cp850", @@ -1281,6 +1406,7 @@ all_unicode_encodings = [ "cp855", "cp856", "cp857", + "cp858", "cp860", "cp861", "cp862", @@ -1369,11 +1495,6 @@ broken_incremental_coders = broken_unicode_with_streams + [ "idna", ] -# The following encodings only support "strict" mode -only_strict_mode = [ - "idna", -] - class BasicUnicodeTest(unittest.TestCase, MixInCheckStateHandling): def test_basics(self): s = "abc123" # all codecs should be able to encode these @@ -1448,7 +1569,7 @@ class BasicUnicodeTest(unittest.TestCase, MixInCheckStateHandling): result = "".join(codecs.iterdecode(codecs.iterencode("", encoding), encoding)) self.assertEqual(result, "") - if encoding not in only_strict_mode: + if encoding not in ("idna", "mbcs"): # check incremental decoder/encoder with errors argument try: encoder = codecs.getincrementalencoder(encoding)("ignore") @@ -1514,6 +1635,14 @@ class CharmapTest(unittest.TestCase): ("abc", 3) ) + self.assertRaises(UnicodeDecodeError, + codecs.charmap_decode, b"\x00\x01\x02", "strict", "ab" + ) + + self.assertRaises(UnicodeDecodeError, + codecs.charmap_decode, b"\x00\x01\x02", "strict", "ab\ufffe" + ) + self.assertEqual( codecs.charmap_decode(b"\x00\x01\x02", "replace", "ab"), ("ab\ufffd", 3) @@ -1540,6 +1669,149 @@ class CharmapTest(unittest.TestCase): ("", len(allbytes)) ) + def test_decode_with_int2str_map(self): + self.assertEqual( + codecs.charmap_decode(b"\x00\x01\x02", "strict", + {0: 'a', 1: 'b', 2: 'c'}), + ("abc", 3) + ) + + self.assertEqual( + codecs.charmap_decode(b"\x00\x01\x02", "strict", + {0: 'Aa', 1: 'Bb', 2: 'Cc'}), + ("AaBbCc", 3) + ) + + self.assertEqual( + codecs.charmap_decode(b"\x00\x01\x02", "strict", + {0: '\U0010FFFF', 1: 'b', 2: 'c'}), + ("\U0010FFFFbc", 3) + ) + + self.assertEqual( + codecs.charmap_decode(b"\x00\x01\x02", "strict", + {0: 'a', 1: 'b', 2: ''}), + ("ab", 3) + ) + + self.assertRaises(UnicodeDecodeError, + codecs.charmap_decode, b"\x00\x01\x02", "strict", + {0: 'a', 1: 'b'} + ) + + self.assertRaises(UnicodeDecodeError, + codecs.charmap_decode, b"\x00\x01\x02", "strict", + {0: 'a', 1: 'b', 2: None} + ) + + # Issue #14850 + self.assertRaises(UnicodeDecodeError, + codecs.charmap_decode, b"\x00\x01\x02", "strict", + {0: 'a', 1: 'b', 2: '\ufffe'} + ) + + self.assertEqual( + codecs.charmap_decode(b"\x00\x01\x02", "replace", + {0: 'a', 1: 'b'}), + ("ab\ufffd", 3) + ) + + self.assertEqual( + codecs.charmap_decode(b"\x00\x01\x02", "replace", + {0: 'a', 1: 'b', 2: None}), + ("ab\ufffd", 3) + ) + + # Issue #14850 + self.assertEqual( + codecs.charmap_decode(b"\x00\x01\x02", "replace", + {0: 'a', 1: 'b', 2: '\ufffe'}), + ("ab\ufffd", 3) + ) + + self.assertEqual( + codecs.charmap_decode(b"\x00\x01\x02", "ignore", + {0: 'a', 1: 'b'}), + ("ab", 3) + ) + + self.assertEqual( + codecs.charmap_decode(b"\x00\x01\x02", "ignore", + {0: 'a', 1: 'b', 2: None}), + ("ab", 3) + ) + + # Issue #14850 + self.assertEqual( + codecs.charmap_decode(b"\x00\x01\x02", "ignore", + {0: 'a', 1: 'b', 2: '\ufffe'}), + ("ab", 3) + ) + + allbytes = bytes(range(256)) + self.assertEqual( + codecs.charmap_decode(allbytes, "ignore", {}), + ("", len(allbytes)) + ) + + def test_decode_with_int2int_map(self): + a = ord('a') + b = ord('b') + c = ord('c') + + self.assertEqual( + codecs.charmap_decode(b"\x00\x01\x02", "strict", + {0: a, 1: b, 2: c}), + ("abc", 3) + ) + + # Issue #15379 + self.assertEqual( + codecs.charmap_decode(b"\x00\x01\x02", "strict", + {0: 0x10FFFF, 1: b, 2: c}), + ("\U0010FFFFbc", 3) + ) + + self.assertRaises(TypeError, + codecs.charmap_decode, b"\x00\x01\x02", "strict", + {0: 0x110000, 1: b, 2: c} + ) + + self.assertRaises(UnicodeDecodeError, + codecs.charmap_decode, b"\x00\x01\x02", "strict", + {0: a, 1: b}, + ) + + self.assertRaises(UnicodeDecodeError, + codecs.charmap_decode, b"\x00\x01\x02", "strict", + {0: a, 1: b, 2: 0xFFFE}, + ) + + self.assertEqual( + codecs.charmap_decode(b"\x00\x01\x02", "replace", + {0: a, 1: b}), + ("ab\ufffd", 3) + ) + + self.assertEqual( + codecs.charmap_decode(b"\x00\x01\x02", "replace", + {0: a, 1: b, 2: 0xFFFE}), + ("ab\ufffd", 3) + ) + + self.assertEqual( + codecs.charmap_decode(b"\x00\x01\x02", "ignore", + {0: a, 1: b}), + ("ab", 3) + ) + + self.assertEqual( + codecs.charmap_decode(b"\x00\x01\x02", "ignore", + {0: a, 1: b, 2: 0xFFFE}), + ("ab", 3) + ) + + class WithStmtTest(unittest.TestCase): def test_encodedfile(self): f = io.BytesIO(b"\xc3\xbc") @@ -1583,6 +1855,135 @@ class TypesTest(unittest.TestCase): self.assertEqual(codecs.raw_unicode_escape_decode(r"\u1234"), ("\u1234", 6)) self.assertEqual(codecs.raw_unicode_escape_decode(br"\u1234"), ("\u1234", 6)) + +class UnicodeEscapeTest(unittest.TestCase): + def test_empty(self): + self.assertEqual(codecs.unicode_escape_encode(""), (b"", 0)) + self.assertEqual(codecs.unicode_escape_decode(b""), ("", 0)) + + def test_raw_encode(self): + encode = codecs.unicode_escape_encode + for b in range(32, 127): + if b != b'\\'[0]: + self.assertEqual(encode(chr(b)), (bytes([b]), 1)) + + def test_raw_decode(self): + decode = codecs.unicode_escape_decode + for b in range(256): + if b != b'\\'[0]: + self.assertEqual(decode(bytes([b]) + b'0'), (chr(b) + '0', 2)) + + def test_escape_encode(self): + encode = codecs.unicode_escape_encode + check = coding_checker(self, encode) + check('\t', br'\t') + check('\n', br'\n') + check('\r', br'\r') + check('\\', br'\\') + for b in range(32): + if chr(b) not in '\t\n\r': + check(chr(b), ('\\x%02x' % b).encode()) + for b in range(127, 256): + check(chr(b), ('\\x%02x' % b).encode()) + check('\u20ac', br'\u20ac') + check('\U0001d120', br'\U0001d120') + + def test_escape_decode(self): + decode = codecs.unicode_escape_decode + check = coding_checker(self, decode) + check(b"[\\\n]", "[]") + check(br'[\"]', '["]') + check(br"[\']", "[']") + check(br"[\\]", r"[\]") + check(br"[\a]", "[\x07]") + check(br"[\b]", "[\x08]") + check(br"[\t]", "[\x09]") + check(br"[\n]", "[\x0a]") + check(br"[\v]", "[\x0b]") + check(br"[\f]", "[\x0c]") + check(br"[\r]", "[\x0d]") + check(br"[\7]", "[\x07]") + check(br"[\8]", r"[\8]") + check(br"[\78]", "[\x078]") + check(br"[\41]", "[!]") + check(br"[\418]", "[!8]") + check(br"[\101]", "[A]") + check(br"[\1010]", "[A0]") + check(br"[\x41]", "[A]") + check(br"[\x410]", "[A0]") + check(br"\u20ac", "\u20ac") + check(br"\U0001d120", "\U0001d120") + for b in range(256): + if b not in b'\n"\'\\abtnvfr01234567xuUN': + check(b'\\' + bytes([b]), '\\' + chr(b)) + + def test_decode_errors(self): + decode = codecs.unicode_escape_decode + for c, d in (b'x', 2), (b'u', 4), (b'U', 4): + for i in range(d): + self.assertRaises(UnicodeDecodeError, decode, + b"\\" + c + b"0"*i) + self.assertRaises(UnicodeDecodeError, decode, + b"[\\" + c + b"0"*i + b"]") + data = b"[\\" + c + b"0"*i + b"]\\" + c + b"0"*i + self.assertEqual(decode(data, "ignore"), ("[]", len(data))) + self.assertEqual(decode(data, "replace"), + ("[\ufffd]\ufffd", len(data))) + self.assertRaises(UnicodeDecodeError, decode, br"\U00110000") + self.assertEqual(decode(br"\U00110000", "ignore"), ("", 10)) + self.assertEqual(decode(br"\U00110000", "replace"), ("\ufffd", 10)) + + +class RawUnicodeEscapeTest(unittest.TestCase): + def test_empty(self): + self.assertEqual(codecs.raw_unicode_escape_encode(""), (b"", 0)) + self.assertEqual(codecs.raw_unicode_escape_decode(b""), ("", 0)) + + def test_raw_encode(self): + encode = codecs.raw_unicode_escape_encode + for b in range(256): + self.assertEqual(encode(chr(b)), (bytes([b]), 1)) + + def test_raw_decode(self): + decode = codecs.raw_unicode_escape_decode + for b in range(256): + self.assertEqual(decode(bytes([b]) + b'0'), (chr(b) + '0', 2)) + + def test_escape_encode(self): + encode = codecs.raw_unicode_escape_encode + check = coding_checker(self, encode) + for b in range(256): + if b not in b'uU': + check('\\' + chr(b), b'\\' + bytes([b])) + check('\u20ac', br'\u20ac') + check('\U0001d120', br'\U0001d120') + + def test_escape_decode(self): + decode = codecs.raw_unicode_escape_decode + check = coding_checker(self, decode) + for b in range(256): + if b not in b'uU': + check(b'\\' + bytes([b]), '\\' + chr(b)) + check(br"\u20ac", "\u20ac") + check(br"\U0001d120", "\U0001d120") + + def test_decode_errors(self): + decode = codecs.raw_unicode_escape_decode + for c, d in (b'u', 4), (b'U', 4): + for i in range(d): + self.assertRaises(UnicodeDecodeError, decode, + b"\\" + c + b"0"*i) + self.assertRaises(UnicodeDecodeError, decode, + b"[\\" + c + b"0"*i + b"]") + data = b"[\\" + c + b"0"*i + b"]\\" + c + b"0"*i + self.assertEqual(decode(data, "ignore"), ("[]", len(data))) + self.assertEqual(decode(data, "replace"), + ("[\ufffd]\ufffd", len(data))) + self.assertRaises(UnicodeDecodeError, decode, br"\U00110000") + self.assertEqual(decode(br"\U00110000", "ignore"), ("", 10)) + self.assertEqual(decode(br"\U00110000", "replace"), ("\ufffd", 10)) + + class SurrogateEscapeTest(unittest.TestCase): def test_utf8(self): @@ -1674,6 +2075,54 @@ class BomTest(unittest.TestCase): self.assertEqual(f.read(), data * 2) +bytes_transform_encodings = [ + "base64_codec", + "uu_codec", + "quopri_codec", + "hex_codec", +] +try: + import zlib +except ImportError: + pass +else: + bytes_transform_encodings.append("zlib_codec") +try: + import bz2 +except ImportError: + pass +else: + bytes_transform_encodings.append("bz2_codec") + +class TransformCodecTest(unittest.TestCase): + + def test_basics(self): + binput = bytes(range(256)) + for encoding in bytes_transform_encodings: + # generic codecs interface + (o, size) = codecs.getencoder(encoding)(binput) + self.assertEqual(size, len(binput)) + (i, size) = codecs.getdecoder(encoding)(o) + self.assertEqual(size, len(o)) + self.assertEqual(i, binput) + + def test_read(self): + for encoding in bytes_transform_encodings: + sin = codecs.encode(b"\x80", encoding) + reader = codecs.getreader(encoding)(io.BytesIO(sin)) + sout = reader.read() + self.assertEqual(sout, b"\x80") + + def test_readline(self): + for encoding in bytes_transform_encodings: + if encoding in ['uu_codec', 'zlib_codec']: + continue + sin = codecs.encode(b"\x80", encoding) + reader = codecs.getreader(encoding)(io.BytesIO(sin)) + sout = reader.readline() + self.assertEqual(sout, b"\x80") + + def test_main(): support.run_unittest( UTF32Test, @@ -1684,10 +2133,10 @@ def test_main(): UTF16BETest, UTF8Test, UTF8SigTest, + EscapeDecodeTest, UTF7Test, UTF16ExTest, ReadBufferTest, - CharBufferTest, RecodingTest, PunycodeTest, UnicodeInternalTest, @@ -1700,8 +2149,11 @@ def test_main(): CharmapTest, WithStmtTest, TypesTest, + UnicodeEscapeTest, + RawUnicodeEscapeTest, SurrogateEscapeTest, BomTest, + TransformCodecTest, ) |