diff options
author | Walter Dörwald <walter@livinglogic.de> | 2005-08-30 10:23:14 (GMT) |
---|---|---|
committer | Walter Dörwald <walter@livinglogic.de> | 2005-08-30 10:23:14 (GMT) |
commit | a47d1c08d0911f2f49d92b8c6035593a672af436 (patch) | |
tree | b89cf4f689e9037da807a5e2509d87715d64057f /Lib/test/test_codeccallbacks.py | |
parent | 523c9f0709d5e7af4d45817b92cf5ce01609269c (diff) | |
download | cpython-a47d1c08d0911f2f49d92b8c6035593a672af436.zip cpython-a47d1c08d0911f2f49d92b8c6035593a672af436.tar.gz cpython-a47d1c08d0911f2f49d92b8c6035593a672af436.tar.bz2 |
SF bug #1251300: On UCS-4 builds the "unicode-internal" codec will now complain
about illegal code points. The codec now supports PEP 293 style error handlers.
(This is a variant of the Nik Haldimann's patch that detects truncated data)
Diffstat (limited to 'Lib/test/test_codeccallbacks.py')
-rw-r--r-- | Lib/test/test_codeccallbacks.py | 34 |
1 files changed, 32 insertions, 2 deletions
diff --git a/Lib/test/test_codeccallbacks.py b/Lib/test/test_codeccallbacks.py index 8f0d590..f8e59cd 100644 --- a/Lib/test/test_codeccallbacks.py +++ b/Lib/test/test_codeccallbacks.py @@ -111,7 +111,7 @@ class CodecCallbackTest(unittest.TestCase): sout += "\\U%08x" % sys.maxunicode self.assertEqual(sin.encode("iso-8859-15", "backslashreplace"), sout) - def test_relaxedutf8(self): + def test_decoderelaxedutf8(self): # This is the test for a decoding callback handler, # that relaxes the UTF-8 minimal encoding restriction. # A null byte that is encoded as "\xc0\x80" will be @@ -158,6 +158,35 @@ class CodecCallbackTest(unittest.TestCase): charmap[ord("?")] = u"XYZ" self.assertRaises(TypeError, codecs.charmap_encode, sin, "replace", charmap) + def test_decodeunicodeinternal(self): + self.assertRaises( + UnicodeDecodeError, + "\x00\x00\x00\x00\x00".decode, + "unicode-internal", + ) + if sys.maxunicode > 0xffff: + def handler_unicodeinternal(exc): + if not isinstance(exc, UnicodeDecodeError): + raise TypeError("don't know how to handle %r" % exc) + return (u"\x01", 1) + + self.assertEqual( + "\x00\x00\x00\x00\x00".decode("unicode-internal", "ignore"), + u"\u0000" + ) + + self.assertEqual( + "\x00\x00\x00\x00\x00".decode("unicode-internal", "replace"), + u"\u0000\ufffd" + ) + + codecs.register_error("test.hui", handler_unicodeinternal) + + self.assertEqual( + "\x00\x00\x00\x00\x00".decode("unicode-internal", "test.hui"), + u"\u0000\u0001\u0000" + ) + def test_callbacks(self): def handler1(exc): if not isinstance(exc, UnicodeEncodeError) \ @@ -503,7 +532,8 @@ class CodecCallbackTest(unittest.TestCase): for (enc, bytes) in ( ("ascii", "\xff"), ("utf-8", "\xff"), - ("utf-7", "+x-") + ("utf-7", "+x-"), + ("unicode-internal", "\x00"), ): self.assertRaises( TypeError, |