summaryrefslogtreecommitdiffstats
path: root/Lib/test
diff options
context:
space:
mode:
authorBénédikt Tran <10796600+picnixz@users.noreply.github.com>2024-09-29 15:22:39 (GMT)
committerGitHub <noreply@github.com>2024-09-29 15:22:39 (GMT)
commit6d0d26eb8ca1e68ffad3364dea7edb1e4063ae07 (patch)
tree65bf8130d000b41f62009fc37d6e45aabcef1605 /Lib/test
parent4d8e7c40a0d37a84da1e0c130e7d585eb39438ad (diff)
downloadcpython-6d0d26eb8ca1e68ffad3364dea7edb1e4063ae07.zip
cpython-6d0d26eb8ca1e68ffad3364dea7edb1e4063ae07.tar.gz
cpython-6d0d26eb8ca1e68ffad3364dea7edb1e4063ae07.tar.bz2
gh-111495: Add tests for `PyCodec_*` C API (#123343)
Diffstat (limited to 'Lib/test')
-rw-r--r--Lib/test/test_capi/test_codecs.py297
1 files changed, 295 insertions, 2 deletions
diff --git a/Lib/test/test_capi/test_codecs.py b/Lib/test/test_capi/test_codecs.py
index bd521a5..85491a8 100644
--- a/Lib/test/test_capi/test_codecs.py
+++ b/Lib/test/test_capi/test_codecs.py
@@ -1,13 +1,20 @@
-import unittest
+import codecs
+import contextlib
+import io
+import re
import sys
+import unittest
+import unittest.mock as mock
+import _testcapi
from test.support import import_helper
_testlimitedcapi = import_helper.import_module('_testlimitedcapi')
NULL = None
+BAD_ARGUMENT = re.escape('bad argument type for built-in operation')
-class CAPITest(unittest.TestCase):
+class CAPIUnicodeTest(unittest.TestCase):
# TODO: Test the following functions:
#
# PyUnicode_BuildEncodingMap
@@ -516,5 +523,291 @@ class CAPITest(unittest.TestCase):
# CRASHES asrawunicodeescapestring(NULL)
+class CAPICodecs(unittest.TestCase):
+
+ def setUp(self):
+ # Encoding names are normalized internally by converting them
+ # to lowercase and their hyphens are replaced by underscores.
+ self.encoding_name = 'test.test_capi.test_codecs.codec_reversed'
+ # Make sure that our custom codec is not already registered (that
+ # way we know whether we correctly unregistered the custom codec
+ # after a test or not).
+ self.assertRaises(LookupError, codecs.lookup, self.encoding_name)
+ # create the search function without registering yet
+ self._create_custom_codec()
+
+ def _create_custom_codec(self):
+ def codec_encoder(m, errors='strict'):
+ return (type(m)().join(reversed(m)), len(m))
+
+ def codec_decoder(c, errors='strict'):
+ return (type(c)().join(reversed(c)), len(c))
+
+ class IncrementalEncoder(codecs.IncrementalEncoder):
+ def encode(self, input, final=False):
+ return codec_encoder(input)
+
+ class IncrementalDecoder(codecs.IncrementalDecoder):
+ def decode(self, input, final=False):
+ return codec_decoder(input)
+
+ class StreamReader(codecs.StreamReader):
+ def encode(self, input, errors='strict'):
+ return codec_encoder(input, errors=errors)
+
+ def decode(self, input, errors='strict'):
+ return codec_decoder(input, errors=errors)
+
+ class StreamWriter(codecs.StreamWriter):
+ def encode(self, input, errors='strict'):
+ return codec_encoder(input, errors=errors)
+
+ def decode(self, input, errors='strict'):
+ return codec_decoder(input, errors=errors)
+
+ info = codecs.CodecInfo(
+ encode=codec_encoder,
+ decode=codec_decoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ name=self.encoding_name
+ )
+
+ def search_function(encoding):
+ if encoding == self.encoding_name:
+ return info
+ return None
+
+ self.codec_info = info
+ self.search_function = search_function
+
+ @contextlib.contextmanager
+ def use_custom_encoder(self):
+ self.assertRaises(LookupError, codecs.lookup, self.encoding_name)
+ codecs.register(self.search_function)
+ yield
+ codecs.unregister(self.search_function)
+ self.assertRaises(LookupError, codecs.lookup, self.encoding_name)
+
+ def test_codec_register(self):
+ search_function, encoding = self.search_function, self.encoding_name
+ # register the search function using the C API
+ self.assertIsNone(_testcapi.codec_register(search_function))
+ # in case the test failed before cleaning up
+ self.addCleanup(codecs.unregister, self.search_function)
+ self.assertIs(codecs.lookup(encoding), search_function(encoding))
+ self.assertEqual(codecs.encode('123', encoding=encoding), '321')
+ # unregister the search function using the regular API
+ codecs.unregister(search_function)
+ self.assertRaises(LookupError, codecs.lookup, encoding)
+
+ def test_codec_unregister(self):
+ search_function, encoding = self.search_function, self.encoding_name
+ self.assertRaises(LookupError, codecs.lookup, encoding)
+ # register the search function using the regular API
+ codecs.register(search_function)
+ # in case the test failed before cleaning up
+ self.addCleanup(codecs.unregister, self.search_function)
+ self.assertIsNotNone(codecs.lookup(encoding))
+ # unregister the search function using the C API
+ self.assertIsNone(_testcapi.codec_unregister(search_function))
+ self.assertRaises(LookupError, codecs.lookup, encoding)
+
+ def test_codec_known_encoding(self):
+ self.assertRaises(LookupError, codecs.lookup, 'unknown-codec')
+ self.assertFalse(_testcapi.codec_known_encoding('unknown-codec'))
+ self.assertFalse(_testcapi.codec_known_encoding('unknown_codec'))
+ self.assertFalse(_testcapi.codec_known_encoding('UNKNOWN-codec'))
+
+ encoding_name = self.encoding_name
+ self.assertRaises(LookupError, codecs.lookup, encoding_name)
+
+ codecs.register(self.search_function)
+ self.addCleanup(codecs.unregister, self.search_function)
+
+ for name in [
+ encoding_name,
+ encoding_name.upper(),
+ encoding_name.replace('_', '-'),
+ ]:
+ with self.subTest(name):
+ self.assertTrue(_testcapi.codec_known_encoding(name))
+
+ def test_codec_encode(self):
+ encode = _testcapi.codec_encode
+ self.assertEqual(encode('a', 'utf-8', NULL), b'a')
+ self.assertEqual(encode('a', 'utf-8', 'strict'), b'a')
+ self.assertEqual(encode('[é]', 'ascii', 'ignore'), b'[]')
+
+ self.assertRaises(TypeError, encode, NULL, 'ascii', 'strict')
+ with self.assertRaisesRegex(TypeError, BAD_ARGUMENT):
+ encode('a', NULL, 'strict')
+
+ def test_codec_decode(self):
+ decode = _testcapi.codec_decode
+
+ s = 'a\xa1\u4f60\U0001f600'
+ b = s.encode()
+
+ self.assertEqual(decode(b, 'utf-8', 'strict'), s)
+ self.assertEqual(decode(b, 'utf-8', NULL), s)
+ self.assertEqual(decode(b, 'latin1', 'strict'), b.decode('latin1'))
+ self.assertRaises(UnicodeDecodeError, decode, b, 'ascii', 'strict')
+ self.assertRaises(UnicodeDecodeError, decode, b, 'ascii', NULL)
+ self.assertEqual(decode(b, 'ascii', 'replace'), 'a' + '\ufffd'*9)
+
+ # _codecs.decode() only reports an unknown error handling name when
+ # the corresponding error handling function is used; this difers
+ # from PyUnicode_Decode() which checks that both the encoding and
+ # the error handling name are recognized before even attempting to
+ # call the decoder.
+ self.assertEqual(decode(b'', 'utf-8', 'unknown-error-handler'), '')
+ self.assertEqual(decode(b'a', 'utf-8', 'unknown-error-handler'), 'a')
+
+ self.assertRaises(TypeError, decode, NULL, 'ascii', 'strict')
+ with self.assertRaisesRegex(TypeError, BAD_ARGUMENT):
+ decode(b, NULL, 'strict')
+
+ def test_codec_encoder(self):
+ codec_encoder = _testcapi.codec_encoder
+
+ with self.use_custom_encoder():
+ encoder = codec_encoder(self.encoding_name)
+ self.assertIs(encoder, self.codec_info.encode)
+
+ with self.assertRaisesRegex(TypeError, BAD_ARGUMENT):
+ codec_encoder(NULL)
+
+ def test_codec_decoder(self):
+ codec_decoder = _testcapi.codec_decoder
+
+ with self.use_custom_encoder():
+ decoder = codec_decoder(self.encoding_name)
+ self.assertIs(decoder, self.codec_info.decode)
+
+ with self.assertRaisesRegex(TypeError, BAD_ARGUMENT):
+ codec_decoder(NULL)
+
+ def test_codec_incremental_encoder(self):
+ codec_incremental_encoder = _testcapi.codec_incremental_encoder
+
+ with self.use_custom_encoder():
+ encoding = self.encoding_name
+
+ for errors in ['strict', NULL]:
+ with self.subTest(errors):
+ encoder = codec_incremental_encoder(encoding, errors)
+ self.assertIsInstance(encoder, self.codec_info.incrementalencoder)
+
+ with self.assertRaisesRegex(TypeError, BAD_ARGUMENT):
+ codec_incremental_encoder(NULL, 'strict')
+
+ def test_codec_incremental_decoder(self):
+ codec_incremental_decoder = _testcapi.codec_incremental_decoder
+
+ with self.use_custom_encoder():
+ encoding = self.encoding_name
+
+ for errors in ['strict', NULL]:
+ with self.subTest(errors):
+ decoder = codec_incremental_decoder(encoding, errors)
+ self.assertIsInstance(decoder, self.codec_info.incrementaldecoder)
+
+ with self.assertRaisesRegex(TypeError, BAD_ARGUMENT):
+ codec_incremental_decoder(NULL, 'strict')
+
+ def test_codec_stream_reader(self):
+ codec_stream_reader = _testcapi.codec_stream_reader
+
+ with self.use_custom_encoder():
+ encoding, stream = self.encoding_name, io.StringIO()
+ for errors in ['strict', NULL]:
+ with self.subTest(errors):
+ writer = codec_stream_reader(encoding, stream, errors)
+ self.assertIsInstance(writer, self.codec_info.streamreader)
+
+ with self.assertRaisesRegex(TypeError, BAD_ARGUMENT):
+ codec_stream_reader(NULL, stream, 'strict')
+
+ def test_codec_stream_writer(self):
+ codec_stream_writer = _testcapi.codec_stream_writer
+
+ with self.use_custom_encoder():
+ encoding, stream = self.encoding_name, io.StringIO()
+ for errors in ['strict', NULL]:
+ with self.subTest(errors):
+ writer = codec_stream_writer(encoding, stream, errors)
+ self.assertIsInstance(writer, self.codec_info.streamwriter)
+
+ with self.assertRaisesRegex(TypeError, BAD_ARGUMENT):
+ codec_stream_writer(NULL, stream, 'strict')
+
+
+class CAPICodecErrors(unittest.TestCase):
+
+ def test_codec_register_error(self):
+ # for cleaning up between tests
+ from _codecs import _unregister_error as _codecs_unregister_error
+
+ self.assertRaises(LookupError, _testcapi.codec_lookup_error, 'custom')
+
+ def custom_error_handler(exc):
+ raise exc
+
+ error_handler = mock.Mock(wraps=custom_error_handler)
+ _testcapi.codec_register_error('custom', error_handler)
+ self.addCleanup(_codecs_unregister_error, 'custom')
+
+ self.assertRaises(UnicodeEncodeError, codecs.encode,
+ '\xff', 'ascii', errors='custom')
+ error_handler.assert_called_once()
+ error_handler.reset_mock()
+
+ self.assertRaises(UnicodeDecodeError, codecs.decode,
+ b'\xff', 'ascii', errors='custom')
+ error_handler.assert_called_once()
+
+ # _codecs._unregister_error directly delegates to the internal C
+ # function so a Python-level function test is sufficient (it is
+ # tested in test_codeccallbacks).
+
+ def test_codec_lookup_error(self):
+ codec_lookup_error = _testcapi.codec_lookup_error
+ self.assertIs(codec_lookup_error(NULL), codecs.strict_errors)
+ self.assertIs(codec_lookup_error('strict'), codecs.strict_errors)
+ self.assertIs(codec_lookup_error('ignore'), codecs.ignore_errors)
+ self.assertIs(codec_lookup_error('replace'), codecs.replace_errors)
+ self.assertIs(codec_lookup_error('xmlcharrefreplace'), codecs.xmlcharrefreplace_errors)
+ self.assertIs(codec_lookup_error('namereplace'), codecs.namereplace_errors)
+ self.assertRaises(LookupError, codec_lookup_error, 'unknown')
+
+ def test_codec_error_handlers(self):
+ exceptions = [
+ # A UnicodeError with an empty message currently crashes:
+ # See: https://github.com/python/cpython/issues/123378
+ # UnicodeEncodeError('bad', '', 0, 1, 'reason'),
+ UnicodeEncodeError('bad', 'x', 0, 1, 'reason'),
+ UnicodeEncodeError('bad', 'xyz123', 0, 1, 'reason'),
+ UnicodeEncodeError('bad', 'xyz123', 1, 4, 'reason'),
+ ]
+
+ strict_handler = _testcapi.codec_strict_errors
+ for exc in exceptions:
+ with self.subTest(handler=strict_handler, exc=exc):
+ self.assertRaises(UnicodeEncodeError, strict_handler, exc)
+
+ for handler in [
+ _testcapi.codec_ignore_errors,
+ _testcapi.codec_replace_errors,
+ _testcapi.codec_xmlcharrefreplace_errors,
+ _testlimitedcapi.codec_namereplace_errors,
+ ]:
+ for exc in exceptions:
+ with self.subTest(handler=handler, exc=exc):
+ self.assertIsInstance(handler(exc), tuple)
+
+
if __name__ == "__main__":
unittest.main()