diff options
Diffstat (limited to 'Lib/test/test_codecs.py')
| -rw-r--r-- | Lib/test/test_codecs.py | 98 | 
1 files changed, 90 insertions, 8 deletions
| diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py index 8b78c24..a4a6f95 100644 --- a/Lib/test/test_codecs.py +++ b/Lib/test/test_codecs.py @@ -349,6 +349,8 @@ class ReadTest(MixInCheckStateHandling):          self.assertRaises(UnicodeEncodeError, "\ud800".encode, self.encoding)          self.assertEqual("[\uDC80]".encode(self.encoding, "backslashreplace"),                           "[\\udc80]".encode(self.encoding)) +        self.assertEqual("[\uDC80]".encode(self.encoding, "namereplace"), +                         "[\\udc80]".encode(self.encoding))          self.assertEqual("[\uDC80]".encode(self.encoding, "xmlcharrefreplace"),                           "[�]".encode(self.encoding))          self.assertEqual("[\uDC80]".encode(self.encoding, "ignore"), @@ -376,6 +378,10 @@ class ReadTest(MixInCheckStateHandling):                               before + after)              self.assertEqual(test_sequence.decode(self.encoding, "replace"),                               before + self.ill_formed_sequence_replace + after) +            backslashreplace = ''.join('\\x%02x' % b +                                       for b in self.ill_formed_sequence) +            self.assertEqual(test_sequence.decode(self.encoding, "backslashreplace"), +                             before + backslashreplace + after)  class UTF32Test(ReadTest, unittest.TestCase):      encoding = "utf-32" @@ -808,6 +814,7 @@ class CP65001Test(ReadTest, unittest.TestCase):                  ('\udc80', 'ignore', b''),                  ('\udc80', 'replace', b'?'),                  ('\udc80', 'backslashreplace', b'\\udc80'), +                ('\udc80', 'namereplace', b'\\udc80'),                  ('\udc80', 'surrogatepass', b'\xed\xb2\x80'),              ))          else: @@ -869,6 +876,8 @@ class CP65001Test(ReadTest, unittest.TestCase):          self.assertRaises(UnicodeDecodeError, b"\xed\xa0\x80".decode, "cp65001")          self.assertEqual("[\uDC80]".encode("cp65001", "backslashreplace"),                           b'[\\udc80]') +        self.assertEqual("[\uDC80]".encode("cp65001", "namereplace"), +                         b'[\\udc80]')          self.assertEqual("[\uDC80]".encode("cp65001", "xmlcharrefreplace"),                           b'[�]')          self.assertEqual("[\uDC80]".encode("cp65001", "surrogateescape"), @@ -890,10 +899,6 @@ class CP65001Test(ReadTest, unittest.TestCase):                           "\U00010fff\uD800")          self.assertTrue(codecs.lookup_error("surrogatepass")) -    def test_readline(self): -        self.skipTest("issue #20571: code page 65001 codec does not " -                      "support partial decoder yet") -  class UTF7Test(ReadTest, unittest.TestCase):      encoding = "utf-7" @@ -1081,6 +1086,7 @@ class UTF8SigTest(UTF8Test, unittest.TestCase):  class EscapeDecodeTest(unittest.TestCase):      def test_empty(self):          self.assertEqual(codecs.escape_decode(b""), (b"", 0)) +        self.assertEqual(codecs.escape_decode(bytearray()), (b"", 0))      def test_raw(self):          decode = codecs.escape_decode @@ -1299,14 +1305,19 @@ class UnicodeInternalTest(unittest.TestCase):                                    "unicode_internal")          if sys.byteorder == "little":              invalid = b"\x00\x00\x11\x00" +            invalid_backslashreplace = r"\x00\x00\x11\x00"          else:              invalid = b"\x00\x11\x00\x00" +            invalid_backslashreplace = r"\x00\x11\x00\x00"          with support.check_warnings():              self.assertRaises(UnicodeDecodeError,                                invalid.decode, "unicode_internal")          with support.check_warnings():              self.assertEqual(invalid.decode("unicode_internal", "replace"),                               '\ufffd') +        with support.check_warnings(): +            self.assertEqual(invalid.decode("unicode_internal", "backslashreplace"), +                             invalid_backslashreplace)      @unittest.skipUnless(SIZEOF_WCHAR_T == 4, 'specific to 32-bit wchar_t')      def test_decode_error_attributes(self): @@ -1612,6 +1623,12 @@ class CodecsModuleTest(unittest.TestCase):          self.assertEqual(codecs.decode(b'abc'), 'abc')          self.assertRaises(UnicodeDecodeError, codecs.decode, b'\xff', 'ascii') +        # test keywords +        self.assertEqual(codecs.decode(obj=b'\xe4\xf6\xfc', encoding='latin-1'), +                         '\xe4\xf6\xfc') +        self.assertEqual(codecs.decode(b'[\xff]', 'ascii', errors='ignore'), +                         '[]') +      def test_encode(self):          self.assertEqual(codecs.encode('\xe4\xf6\xfc', 'latin-1'),                           b'\xe4\xf6\xfc') @@ -1620,6 +1637,12 @@ class CodecsModuleTest(unittest.TestCase):          self.assertEqual(codecs.encode('abc'), b'abc')          self.assertRaises(UnicodeEncodeError, codecs.encode, '\xffff', 'ascii') +        # test keywords +        self.assertEqual(codecs.encode(obj='\xe4\xf6\xfc', encoding='latin-1'), +                         b'\xe4\xf6\xfc') +        self.assertEqual(codecs.encode('[\xff]', 'ascii', errors='ignore'), +                         b'[]') +      def test_register(self):          self.assertRaises(TypeError, codecs.register)          self.assertRaises(TypeError, codecs.register, 42) @@ -1668,6 +1691,7 @@ class CodecsModuleTest(unittest.TestCase):              "register_error", "lookup_error",              "strict_errors", "replace_errors", "ignore_errors",              "xmlcharrefreplace_errors", "backslashreplace_errors", +            "namereplace_errors",              "open", "EncodedFile",              "iterencode", "iterdecode",              "BOM", "BOM_BE", "BOM_LE", @@ -1798,7 +1822,9 @@ all_unicode_encodings = [      "iso8859_9",      "johab",      "koi8_r", +    "koi8_t",      "koi8_u", +    "kz1048",      "latin_1",      "mac_cyrillic",      "mac_greek", @@ -2029,6 +2055,16 @@ class CharmapTest(unittest.TestCase):          )          self.assertEqual( +            codecs.charmap_decode(b"\x00\x01\x02", "backslashreplace", "ab"), +            ("ab\\x02", 3) +        ) + +        self.assertEqual( +            codecs.charmap_decode(b"\x00\x01\x02", "backslashreplace", "ab\ufffe"), +            ("ab\\x02", 3) +        ) + +        self.assertEqual(              codecs.charmap_decode(b"\x00\x01\x02", "ignore", "ab"),              ("ab", 3)          ) @@ -2105,6 +2141,25 @@ class CharmapTest(unittest.TestCase):          )          self.assertEqual( +            codecs.charmap_decode(b"\x00\x01\x02", "backslashreplace", +                                  {0: 'a', 1: 'b'}), +            ("ab\\x02", 3) +        ) + +        self.assertEqual( +            codecs.charmap_decode(b"\x00\x01\x02", "backslashreplace", +                                  {0: 'a', 1: 'b', 2: None}), +            ("ab\\x02", 3) +        ) + +        # Issue #14850 +        self.assertEqual( +            codecs.charmap_decode(b"\x00\x01\x02", "backslashreplace", +                                  {0: 'a', 1: 'b', 2: '\ufffe'}), +            ("ab\\x02", 3) +        ) + +        self.assertEqual(              codecs.charmap_decode(b"\x00\x01\x02", "ignore",                                    {0: 'a', 1: 'b'}),              ("ab", 3) @@ -2181,6 +2236,18 @@ class CharmapTest(unittest.TestCase):          )          self.assertEqual( +            codecs.charmap_decode(b"\x00\x01\x02", "backslashreplace", +                                  {0: a, 1: b}), +            ("ab\\x02", 3) +        ) + +        self.assertEqual( +            codecs.charmap_decode(b"\x00\x01\x02", "backslashreplace", +                                  {0: a, 1: b, 2: 0xFFFE}), +            ("ab\\x02", 3) +        ) + +        self.assertEqual(              codecs.charmap_decode(b"\x00\x01\x02", "ignore",                                    {0: a, 1: b}),              ("ab", 3) @@ -2239,9 +2306,13 @@ class TypesTest(unittest.TestCase):          self.assertRaises(UnicodeDecodeError, codecs.unicode_escape_decode, br"\U00110000")          self.assertEqual(codecs.unicode_escape_decode(r"\U00110000", "replace"), ("\ufffd", 10)) +        self.assertEqual(codecs.unicode_escape_decode(r"\U00110000", "backslashreplace"), +                         (r"\x5c\x55\x30\x30\x31\x31\x30\x30\x30\x30", 10))          self.assertRaises(UnicodeDecodeError, codecs.raw_unicode_escape_decode, br"\U00110000")          self.assertEqual(codecs.raw_unicode_escape_decode(r"\U00110000", "replace"), ("\ufffd", 10)) +        self.assertEqual(codecs.raw_unicode_escape_decode(r"\U00110000", "backslashreplace"), +                         (r"\x5c\x55\x30\x30\x31\x31\x30\x30\x30\x30", 10))  class UnicodeEscapeTest(unittest.TestCase): @@ -2826,15 +2897,15 @@ class CodePageTest(unittest.TestCase):          self.assertRaisesRegex(UnicodeEncodeError, 'cp932',              codecs.code_page_encode, 932, '\xff')          self.assertRaisesRegex(UnicodeDecodeError, 'cp932', -            codecs.code_page_decode, 932, b'\x81\x00') +            codecs.code_page_decode, 932, b'\x81\x00', 'strict', True)          self.assertRaisesRegex(UnicodeDecodeError, 'CP_UTF8', -            codecs.code_page_decode, self.CP_UTF8, b'\xff') +            codecs.code_page_decode, self.CP_UTF8, b'\xff', 'strict', True)      def check_decode(self, cp, tests):          for raw, errors, expected in tests:              if expected is not None:                  try: -                    decoded = codecs.code_page_decode(cp, raw, errors) +                    decoded = codecs.code_page_decode(cp, raw, errors, True)                  except UnicodeDecodeError as err:                      self.fail('Unable to decode %a from "cp%s" with '                                'errors=%r: %s' % (raw, cp, errors, err)) @@ -2846,7 +2917,7 @@ class CodePageTest(unittest.TestCase):                  self.assertLessEqual(decoded[1], len(raw))              else:                  self.assertRaises(UnicodeDecodeError, -                    codecs.code_page_decode, cp, raw, errors) +                    codecs.code_page_decode, cp, raw, errors, True)      def check_encode(self, cp, tests):          for text, errors, expected in tests: @@ -2874,7 +2945,12 @@ class CodePageTest(unittest.TestCase):              ('[\xff]', 'replace', b'[y]'),              ('[\u20ac]', 'replace', b'[?]'),              ('[\xff]', 'backslashreplace', b'[\\xff]'), +            ('[\xff]', 'namereplace', +             b'[\\N{LATIN SMALL LETTER Y WITH DIAERESIS}]'),              ('[\xff]', 'xmlcharrefreplace', b'[ÿ]'), +            ('\udcff', 'strict', None), +            ('[\udcff]', 'surrogateescape', b'[\xff]'), +            ('[\udcff]', 'surrogatepass', None),          ))          self.check_decode(932, (              (b'abc', 'strict', 'abc'), @@ -2883,10 +2959,13 @@ class CodePageTest(unittest.TestCase):              (b'[\xff]', 'strict', None),              (b'[\xff]', 'ignore', '[]'),              (b'[\xff]', 'replace', '[\ufffd]'), +            (b'[\xff]', 'backslashreplace', '[\\xff]'),              (b'[\xff]', 'surrogateescape', '[\udcff]'), +            (b'[\xff]', 'surrogatepass', None),              (b'\x81\x00abc', 'strict', None),              (b'\x81\x00abc', 'ignore', '\x00abc'),              (b'\x81\x00abc', 'replace', '\ufffd\x00abc'), +            (b'\x81\x00abc', 'backslashreplace', '\\x81\x00abc'),          ))      def test_cp1252(self): @@ -2894,9 +2973,12 @@ class CodePageTest(unittest.TestCase):              ('abc', 'strict', b'abc'),              ('\xe9\u20ac', 'strict',  b'\xe9\x80'),              ('\xff', 'strict', b'\xff'), +            # test error handlers              ('\u0141', 'strict', None),              ('\u0141', 'ignore', b''),              ('\u0141', 'replace', b'L'), +            ('\udc98', 'surrogateescape', b'\x98'), +            ('\udc98', 'surrogatepass', None),          ))          self.check_decode(1252, (              (b'abc', 'strict', 'abc'), | 
