diff options
Diffstat (limited to 'Lib/test/test_capi/test_unicode.py')
| -rw-r--r-- | Lib/test/test_capi/test_unicode.py | 150 |
1 files changed, 149 insertions, 1 deletions
diff --git a/Lib/test/test_capi/test_unicode.py b/Lib/test/test_capi/test_unicode.py index a69f817..36106b0 100644 --- a/Lib/test/test_capi/test_unicode.py +++ b/Lib/test/test_capi/test_unicode.py @@ -16,6 +16,10 @@ try: import _testinternalcapi except ImportError: _testinternalcapi = None +try: + import ctypes +except ImportError: + ctypes = None NULL = None @@ -352,13 +356,13 @@ class CAPITest(unittest.TestCase): self.assertRaises(TypeError, fromobject, []) # CRASHES fromobject(NULL) + @unittest.skipIf(ctypes is None, 'need ctypes') def test_from_format(self): """Test PyUnicode_FromFormat()""" # Length modifiers "j" and "t" are not tested here because ctypes does # not expose types for intmax_t and ptrdiff_t. # _testlimitedcapi.test_string_from_format() has a wider coverage of all # formats. - import_helper.import_module('ctypes') from ctypes import ( c_char_p, pythonapi, py_object, sizeof, @@ -1676,5 +1680,149 @@ class CAPITest(unittest.TestCase): self.assertEqual(getargs_s_hash(s), chr(k).encode() * (i + 1)) +class PyUnicodeWriterTest(unittest.TestCase): + def create_writer(self, size): + return _testcapi.PyUnicodeWriter(size) + + def test_basic(self): + writer = self.create_writer(100) + + # test PyUnicodeWriter_WriteUTF8() + writer.write_utf8(b'var', -1) + + # test PyUnicodeWriter_WriteChar() + writer.write_char('=') + + # test PyUnicodeWriter_WriteSubstring() + writer.write_substring("[long]", 1, 5); + + # test PyUnicodeWriter_WriteStr() + writer.write_str(" value ") + + # test PyUnicodeWriter_WriteRepr() + writer.write_repr("repr") + + self.assertEqual(writer.finish(), + "var=long value 'repr'") + + def test_utf8(self): + writer = self.create_writer(0) + writer.write_utf8(b"ascii", -1) + writer.write_char('-') + writer.write_utf8(b"latin1=\xC3\xA9", -1) + writer.write_char('-') + writer.write_utf8(b"euro=\xE2\x82\xAC", -1) + writer.write_char('.') + self.assertEqual(writer.finish(), + "ascii-latin1=\xE9-euro=\u20AC.") + + def test_invalid_utf8(self): + writer = self.create_writer(0) + with self.assertRaises(UnicodeDecodeError): + writer.write_utf8(b"invalid=\xFF", -1) + + def test_recover_utf8_error(self): + # test recovering from PyUnicodeWriter_WriteUTF8() error + writer = self.create_writer(0) + writer.write_utf8(b"value=", -1) + + # write fails with an invalid string + with self.assertRaises(UnicodeDecodeError): + writer.write_utf8(b"invalid\xFF", -1) + + # retry write with a valid string + writer.write_utf8(b"valid", -1) + + self.assertEqual(writer.finish(), + "value=valid") + + def test_decode_utf8(self): + # test PyUnicodeWriter_DecodeUTF8Stateful() + writer = self.create_writer(0) + writer.decodeutf8stateful(b"ign\xFFore", -1, b"ignore") + writer.write_char('-') + writer.decodeutf8stateful(b"replace\xFF", -1, b"replace") + writer.write_char('-') + + # incomplete trailing UTF-8 sequence + writer.decodeutf8stateful(b"incomplete\xC3", -1, b"replace") + + self.assertEqual(writer.finish(), + "ignore-replace\uFFFD-incomplete\uFFFD") + + def test_decode_utf8_consumed(self): + # test PyUnicodeWriter_DecodeUTF8Stateful() with consumed + writer = self.create_writer(0) + + # valid string + consumed = writer.decodeutf8stateful(b"text", -1, b"strict", True) + self.assertEqual(consumed, 4) + writer.write_char('-') + + # non-ASCII + consumed = writer.decodeutf8stateful(b"\xC3\xA9-\xE2\x82\xAC", 6, b"strict", True) + self.assertEqual(consumed, 6) + writer.write_char('-') + + # invalid UTF-8 (consumed is 0 on error) + with self.assertRaises(UnicodeDecodeError): + writer.decodeutf8stateful(b"invalid\xFF", -1, b"strict", True) + + # ignore error handler + consumed = writer.decodeutf8stateful(b"more\xFF", -1, b"ignore", True) + self.assertEqual(consumed, 5) + writer.write_char('-') + + # incomplete trailing UTF-8 sequence + consumed = writer.decodeutf8stateful(b"incomplete\xC3", -1, b"ignore", True) + self.assertEqual(consumed, 10) + + self.assertEqual(writer.finish(), "text-\xE9-\u20AC-more-incomplete") + + def test_widechar(self): + writer = self.create_writer(0) + writer.write_widechar("latin1=\xE9") + writer.write_widechar("-") + writer.write_widechar("euro=\u20AC") + writer.write_char('.') + self.assertEqual(writer.finish(), "latin1=\xE9-euro=\u20AC.") + + +@unittest.skipIf(ctypes is None, 'need ctypes') +class PyUnicodeWriterFormatTest(unittest.TestCase): + def create_writer(self, size): + return _testcapi.PyUnicodeWriter(size) + + def writer_format(self, writer, *args): + from ctypes import c_char_p, pythonapi, c_int, c_void_p + _PyUnicodeWriter_Format = getattr(pythonapi, "PyUnicodeWriter_Format") + _PyUnicodeWriter_Format.argtypes = (c_void_p, c_char_p,) + _PyUnicodeWriter_Format.restype = c_int + + if _PyUnicodeWriter_Format(writer.get_pointer(), *args) < 0: + raise ValueError("PyUnicodeWriter_Format failed") + + def test_format(self): + from ctypes import c_int + writer = self.create_writer(0) + self.writer_format(writer, b'%s %i', b'abc', c_int(123)) + writer.write_char('.') + self.assertEqual(writer.finish(), 'abc 123.') + + def test_recover_error(self): + # test recovering from PyUnicodeWriter_Format() error + writer = self.create_writer(0) + self.writer_format(writer, b"%s ", b"Hello") + + # PyUnicodeWriter_Format() fails with an invalid format string + with self.assertRaises(ValueError): + self.writer_format(writer, b"%s\xff", b"World") + + # Retry PyUnicodeWriter_Format() with a valid format string + self.writer_format(writer, b"%s.", b"World") + + self.assertEqual(writer.finish(), 'Hello World.') + + if __name__ == "__main__": unittest.main() |
