summaryrefslogtreecommitdiffstats
path: root/Lib/test/test_capi
diff options
context:
space:
mode:
authorBénédikt Tran <10796600+picnixz@users.noreply.github.com>2024-11-01 13:28:18 (GMT)
committerGitHub <noreply@github.com>2024-11-01 13:28:18 (GMT)
commit32e07fd377f81cbeb8c108fc791a3e7d631319b6 (patch)
treed38e0c82ceb413f8010d6c239d4faab023de31c0 /Lib/test/test_capi
parent6c67446a6e73ab0e9a26e4360412cbd2f5550e66 (diff)
downloadcpython-32e07fd377f81cbeb8c108fc791a3e7d631319b6.zip
cpython-32e07fd377f81cbeb8c108fc791a3e7d631319b6.tar.gz
cpython-32e07fd377f81cbeb8c108fc791a3e7d631319b6.tar.bz2
gh-111495: improve test coverage of codecs C API (GH-126030)
For now, skip some crashers (tracked in gh-123378).
Diffstat (limited to 'Lib/test/test_capi')
-rw-r--r--Lib/test/test_capi/test_codecs.py138
1 files changed, 115 insertions, 23 deletions
diff --git a/Lib/test/test_capi/test_codecs.py b/Lib/test/test_capi/test_codecs.py
index 85491a8..a557e35 100644
--- a/Lib/test/test_capi/test_codecs.py
+++ b/Lib/test/test_capi/test_codecs.py
@@ -747,6 +747,49 @@ class CAPICodecs(unittest.TestCase):
class CAPICodecErrors(unittest.TestCase):
+ @classmethod
+ def _generate_exception_args(cls):
+ for objlen in range(5):
+ maxind = 2 * max(2, objlen)
+ for start in range(-maxind, maxind + 1):
+ for end in range(-maxind, maxind + 1):
+ yield objlen, start, end
+
+ @classmethod
+ def generate_encode_errors(cls):
+ return tuple(
+ UnicodeEncodeError('utf-8', '0' * objlen, start, end, 'why')
+ for objlen, start, end in cls._generate_exception_args()
+ )
+
+ @classmethod
+ def generate_decode_errors(cls):
+ return tuple(
+ UnicodeDecodeError('utf-8', b'0' * objlen, start, end, 'why')
+ for objlen, start, end in cls._generate_exception_args()
+ )
+
+ @classmethod
+ def generate_translate_errors(cls):
+ return tuple(
+ UnicodeTranslateError('0' * objlen, start, end, 'why')
+ for objlen, start, end in cls._generate_exception_args()
+ )
+
+ @classmethod
+ def setUpClass(cls):
+ cls.unicode_encode_errors = cls.generate_encode_errors()
+ cls.unicode_decode_errors = cls.generate_decode_errors()
+ cls.unicode_translate_errors = cls.generate_translate_errors()
+ cls.all_unicode_errors = (
+ cls.unicode_encode_errors
+ + cls.unicode_decode_errors
+ + cls.unicode_translate_errors
+ )
+ cls.bad_unicode_errors = (
+ ValueError(),
+ )
+
def test_codec_register_error(self):
# for cleaning up between tests
from _codecs import _unregister_error as _codecs_unregister_error
@@ -780,33 +823,82 @@ class CAPICodecErrors(unittest.TestCase):
self.assertIs(codec_lookup_error('ignore'), codecs.ignore_errors)
self.assertIs(codec_lookup_error('replace'), codecs.replace_errors)
self.assertIs(codec_lookup_error('xmlcharrefreplace'), codecs.xmlcharrefreplace_errors)
+ self.assertIs(codec_lookup_error('backslashreplace'), codecs.backslashreplace_errors)
self.assertIs(codec_lookup_error('namereplace'), codecs.namereplace_errors)
self.assertRaises(LookupError, codec_lookup_error, 'unknown')
- def test_codec_error_handlers(self):
- exceptions = [
- # A UnicodeError with an empty message currently crashes:
- # See: https://github.com/python/cpython/issues/123378
- # UnicodeEncodeError('bad', '', 0, 1, 'reason'),
- UnicodeEncodeError('bad', 'x', 0, 1, 'reason'),
- UnicodeEncodeError('bad', 'xyz123', 0, 1, 'reason'),
- UnicodeEncodeError('bad', 'xyz123', 1, 4, 'reason'),
- ]
-
- strict_handler = _testcapi.codec_strict_errors
+ def test_codec_strict_errors_handler(self):
+ handler = _testcapi.codec_strict_errors
+ for exc in self.all_unicode_errors + self.bad_unicode_errors:
+ with self.subTest(handler=handler, exc=exc):
+ self.assertRaises(type(exc), handler, exc)
+
+ def test_codec_ignore_errors_handler(self):
+ handler = _testcapi.codec_ignore_errors
+ self.do_test_codec_errors_handler(handler, self.all_unicode_errors)
+
+ def test_codec_replace_errors_handler(self):
+ handler = _testcapi.codec_replace_errors
+ self.do_test_codec_errors_handler(handler, self.all_unicode_errors)
+
+ def test_codec_xmlcharrefreplace_errors_handler(self):
+ handler = _testcapi.codec_xmlcharrefreplace_errors
+ self.do_test_codec_errors_handler(handler, self.unicode_encode_errors)
+
+ def test_codec_backslashreplace_errors_handler(self):
+ handler = _testcapi.codec_backslashreplace_errors
+ self.do_test_codec_errors_handler(handler, self.all_unicode_errors)
+
+ def test_codec_namereplace_errors_handler(self):
+ handler = _testlimitedcapi.codec_namereplace_errors
+ self.do_test_codec_errors_handler(handler, self.unicode_encode_errors)
+
+ def do_test_codec_errors_handler(self, handler, exceptions):
+ at_least_one = False
for exc in exceptions:
- with self.subTest(handler=strict_handler, exc=exc):
- self.assertRaises(UnicodeEncodeError, strict_handler, exc)
-
- for handler in [
- _testcapi.codec_ignore_errors,
- _testcapi.codec_replace_errors,
- _testcapi.codec_xmlcharrefreplace_errors,
- _testlimitedcapi.codec_namereplace_errors,
- ]:
- for exc in exceptions:
- with self.subTest(handler=handler, exc=exc):
- self.assertIsInstance(handler(exc), tuple)
+ # See https://github.com/python/cpython/issues/123378 and related
+ # discussion and issues for details.
+ if self._exception_may_crash(exc):
+ continue
+
+ at_least_one = True
+ with self.subTest(handler=handler, exc=exc):
+ # test that the handler does not crash
+ self.assertIsInstance(handler(exc), tuple)
+
+ if exceptions:
+ self.assertTrue(at_least_one, "all exceptions are crashing")
+
+ for bad_exc in (
+ self.bad_unicode_errors
+ + tuple(e for e in self.all_unicode_errors if e not in exceptions)
+ ):
+ with self.subTest('bad type', handler=handler, exc=bad_exc):
+ self.assertRaises(TypeError, handler, bad_exc)
+
+ @classmethod
+ def _exception_may_crash(cls, exc):
+ """Indicate whether a Unicode exception might currently crash
+ the interpreter when used by a built-in codecs error handler.
+
+ Until gh-123378 is fixed, we skip the tests for these exceptions.
+
+ This should only be used by "do_test_codec_errors_handler".
+ """
+ message, start, end = exc.object, exc.start, exc.end
+ match exc:
+ case UnicodeEncodeError():
+ return end < start or (end - start) >= len(message)
+ case UnicodeDecodeError():
+ # The case "end - start >= len(message)" does not crash.
+ return end < start
+ case UnicodeTranslateError():
+ # Test "end <= start" because PyCodec_ReplaceErrors checks
+ # the Unicode kind of a 0-length string which by convention
+ # is PyUnicode_1BYTE_KIND and not PyUnicode_2BYTE_KIND as
+ # the handler currently expects.
+ return end <= start or (end - start) >= len(message)
+ return False
if __name__ == "__main__":