diff options
author | Victor Stinner <victor.stinner@haypocalc.com> | 2011-12-09 19:49:49 (GMT) |
---|---|---|
committer | Victor Stinner <victor.stinner@haypocalc.com> | 2011-12-09 19:49:49 (GMT) |
commit | e3b47152a481313081621b46381384d18d0419e8 (patch) | |
tree | fe32c783377a494715e0b1ebf188548f9bc09adc /Lib | |
parent | db6238964d534a160f2b2d8b2b61e19a3d3dee47 (diff) | |
download | cpython-e3b47152a481313081621b46381384d18d0419e8.zip cpython-e3b47152a481313081621b46381384d18d0419e8.tar.gz cpython-e3b47152a481313081621b46381384d18d0419e8.tar.bz2 |
Write tests for invalid characters (U+00110000)
Test the following functions:
* codecs.raw_unicode_escape_decode()
* PyUnicode_FromWideChar()
* PyUnicode_FromUnicode()
* "unicode_internal" and "unicode_escape" decoders
Diffstat (limited to 'Lib')
-rw-r--r-- | Lib/test/test_codecs.py | 16 |
1 files changed, 16 insertions, 0 deletions
diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py index e885a5a..5daaa19 100644 --- a/Lib/test/test_codecs.py +++ b/Lib/test/test_codecs.py @@ -1034,6 +1034,16 @@ class UnicodeInternalTest(unittest.TestCase): 'deprecated', DeprecationWarning)): self.assertRaises(UnicodeDecodeError, internal.decode, "unicode_internal") + if sys.byteorder == "little": + invalid = b"\x00\x00\x11\x00" + else: + invalid = b"\x00\x11\x00\x00" + with support.check_warnings(): + self.assertRaises(UnicodeDecodeError, + invalid.decode, "unicode_internal") + with support.check_warnings(): + self.assertEqual(invalid.decode("unicode_internal", "replace"), + '\ufffd') @unittest.skipUnless(SIZEOF_WCHAR_T == 4, 'specific to 32-bit wchar_t') def test_decode_error_attributes(self): @@ -1729,6 +1739,12 @@ class TypesTest(unittest.TestCase): self.assertEqual(codecs.raw_unicode_escape_decode(r"\u1234"), ("\u1234", 6)) self.assertEqual(codecs.raw_unicode_escape_decode(br"\u1234"), ("\u1234", 6)) + self.assertRaises(UnicodeDecodeError, codecs.unicode_escape_decode, br"\U00110000") + self.assertEqual(codecs.unicode_escape_decode(r"\U00110000", "replace"), ("\ufffd", 10)) + + self.assertRaises(UnicodeDecodeError, codecs.raw_unicode_escape_decode, br"\U00110000") + self.assertEqual(codecs.raw_unicode_escape_decode(r"\U00110000", "replace"), ("\ufffd", 10)) + class SurrogateEscapeTest(unittest.TestCase): def test_utf8(self): |