From e3b47152a481313081621b46381384d18d0419e8 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Fri, 9 Dec 2011 20:49:49 +0100 Subject: Write tests for invalid characters (U+00110000) Test the following functions: * codecs.raw_unicode_escape_decode() * PyUnicode_FromWideChar() * PyUnicode_FromUnicode() * "unicode_internal" and "unicode_escape" decoders --- Lib/test/test_codecs.py | 16 ++++++++++++++++ Modules/_testcapimodule.c | 18 ++++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py index e885a5a..5daaa19 100644 --- a/Lib/test/test_codecs.py +++ b/Lib/test/test_codecs.py @@ -1034,6 +1034,16 @@ class UnicodeInternalTest(unittest.TestCase): 'deprecated', DeprecationWarning)): self.assertRaises(UnicodeDecodeError, internal.decode, "unicode_internal") + if sys.byteorder == "little": + invalid = b"\x00\x00\x11\x00" + else: + invalid = b"\x00\x11\x00\x00" + with support.check_warnings(): + self.assertRaises(UnicodeDecodeError, + invalid.decode, "unicode_internal") + with support.check_warnings(): + self.assertEqual(invalid.decode("unicode_internal", "replace"), + '\ufffd') @unittest.skipUnless(SIZEOF_WCHAR_T == 4, 'specific to 32-bit wchar_t') def test_decode_error_attributes(self): @@ -1729,6 +1739,12 @@ class TypesTest(unittest.TestCase): self.assertEqual(codecs.raw_unicode_escape_decode(r"\u1234"), ("\u1234", 6)) self.assertEqual(codecs.raw_unicode_escape_decode(br"\u1234"), ("\u1234", 6)) + self.assertRaises(UnicodeDecodeError, codecs.unicode_escape_decode, br"\U00110000") + self.assertEqual(codecs.unicode_escape_decode(r"\U00110000", "replace"), ("\ufffd", 10)) + + self.assertRaises(UnicodeDecodeError, codecs.raw_unicode_escape_decode, br"\U00110000") + self.assertEqual(codecs.raw_unicode_escape_decode(r"\U00110000", "replace"), ("\ufffd", 10)) + class SurrogateEscapeTest(unittest.TestCase): def test_utf8(self): diff --git a/Modules/_testcapimodule.c b/Modules/_testcapimodule.c index 962f10b..a9bb5be 100644 --- a/Modules/_testcapimodule.c +++ b/Modules/_testcapimodule.c @@ -1409,6 +1409,7 @@ test_widechar(PyObject *self) #if defined(SIZEOF_WCHAR_T) && (SIZEOF_WCHAR_T == 4) const wchar_t wtext[2] = {(wchar_t)0x10ABCDu}; size_t wtextlen = 1; + const wchar_t invalid[1] = {(wchar_t)0x110000u}; #else const wchar_t wtext[3] = {(wchar_t)0xDBEAu, (wchar_t)0xDFCDu}; size_t wtextlen = 2; @@ -1444,6 +1445,23 @@ test_widechar(PyObject *self) Py_DECREF(wide); Py_DECREF(utf8); + +#if defined(SIZEOF_WCHAR_T) && (SIZEOF_WCHAR_T == 4) + wide = PyUnicode_FromWideChar(invalid, 1); + if (wide == NULL) + PyErr_Clear(); + else + return raiseTestError("test_widechar", + "PyUnicode_FromWideChar(L\"\\U00110000\", 1) didn't fail"); + + wide = PyUnicode_FromUnicode(invalid, 1); + if (wide == NULL) + PyErr_Clear(); + else + return raiseTestError("test_widechar", + "PyUnicode_FromUnicode(L\"\\U00110000\", 1) didn't fail"); +#endif + Py_RETURN_NONE; } -- cgit v0.12