summaryrefslogtreecommitdiffstats
path: root/Lib/test/test_codeccallbacks.py
diff options
context:
space:
mode:
authorWalter Dörwald <walter@livinglogic.de>2005-08-30 10:23:14 (GMT)
committerWalter Dörwald <walter@livinglogic.de>2005-08-30 10:23:14 (GMT)
commita47d1c08d0911f2f49d92b8c6035593a672af436 (patch)
treeb89cf4f689e9037da807a5e2509d87715d64057f /Lib/test/test_codeccallbacks.py
parent523c9f0709d5e7af4d45817b92cf5ce01609269c (diff)
downloadcpython-a47d1c08d0911f2f49d92b8c6035593a672af436.zip
cpython-a47d1c08d0911f2f49d92b8c6035593a672af436.tar.gz
cpython-a47d1c08d0911f2f49d92b8c6035593a672af436.tar.bz2
SF bug #1251300: On UCS-4 builds the "unicode-internal" codec will now complain
about illegal code points. The codec now supports PEP 293 style error handlers. (This is a variant of the Nik Haldimann's patch that detects truncated data)
Diffstat (limited to 'Lib/test/test_codeccallbacks.py')
-rw-r--r--Lib/test/test_codeccallbacks.py34
1 files changed, 32 insertions, 2 deletions
diff --git a/Lib/test/test_codeccallbacks.py b/Lib/test/test_codeccallbacks.py
index 8f0d590..f8e59cd 100644
--- a/Lib/test/test_codeccallbacks.py
+++ b/Lib/test/test_codeccallbacks.py
@@ -111,7 +111,7 @@ class CodecCallbackTest(unittest.TestCase):
sout += "\\U%08x" % sys.maxunicode
self.assertEqual(sin.encode("iso-8859-15", "backslashreplace"), sout)
- def test_relaxedutf8(self):
+ def test_decoderelaxedutf8(self):
# This is the test for a decoding callback handler,
# that relaxes the UTF-8 minimal encoding restriction.
# A null byte that is encoded as "\xc0\x80" will be
@@ -158,6 +158,35 @@ class CodecCallbackTest(unittest.TestCase):
charmap[ord("?")] = u"XYZ"
self.assertRaises(TypeError, codecs.charmap_encode, sin, "replace", charmap)
+ def test_decodeunicodeinternal(self):
+ self.assertRaises(
+ UnicodeDecodeError,
+ "\x00\x00\x00\x00\x00".decode,
+ "unicode-internal",
+ )
+ if sys.maxunicode > 0xffff:
+ def handler_unicodeinternal(exc):
+ if not isinstance(exc, UnicodeDecodeError):
+ raise TypeError("don't know how to handle %r" % exc)
+ return (u"\x01", 1)
+
+ self.assertEqual(
+ "\x00\x00\x00\x00\x00".decode("unicode-internal", "ignore"),
+ u"\u0000"
+ )
+
+ self.assertEqual(
+ "\x00\x00\x00\x00\x00".decode("unicode-internal", "replace"),
+ u"\u0000\ufffd"
+ )
+
+ codecs.register_error("test.hui", handler_unicodeinternal)
+
+ self.assertEqual(
+ "\x00\x00\x00\x00\x00".decode("unicode-internal", "test.hui"),
+ u"\u0000\u0001\u0000"
+ )
+
def test_callbacks(self):
def handler1(exc):
if not isinstance(exc, UnicodeEncodeError) \
@@ -503,7 +532,8 @@ class CodecCallbackTest(unittest.TestCase):
for (enc, bytes) in (
("ascii", "\xff"),
("utf-8", "\xff"),
- ("utf-7", "+x-")
+ ("utf-7", "+x-"),
+ ("unicode-internal", "\x00"),
):
self.assertRaises(
TypeError,