diff options
author | Bénédikt Tran <10796600+picnixz@users.noreply.github.com> | 2024-11-01 13:28:18 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-11-01 13:28:18 (GMT) |
commit | 32e07fd377f81cbeb8c108fc791a3e7d631319b6 (patch) | |
tree | d38e0c82ceb413f8010d6c239d4faab023de31c0 /Lib/test/test_capi | |
parent | 6c67446a6e73ab0e9a26e4360412cbd2f5550e66 (diff) | |
download | cpython-32e07fd377f81cbeb8c108fc791a3e7d631319b6.zip cpython-32e07fd377f81cbeb8c108fc791a3e7d631319b6.tar.gz cpython-32e07fd377f81cbeb8c108fc791a3e7d631319b6.tar.bz2 |
gh-111495: improve test coverage of codecs C API (GH-126030)
For now, skip some crashers (tracked in gh-123378).
Diffstat (limited to 'Lib/test/test_capi')
-rw-r--r-- | Lib/test/test_capi/test_codecs.py | 138 |
1 files changed, 115 insertions, 23 deletions
diff --git a/Lib/test/test_capi/test_codecs.py b/Lib/test/test_capi/test_codecs.py index 85491a8..a557e35 100644 --- a/Lib/test/test_capi/test_codecs.py +++ b/Lib/test/test_capi/test_codecs.py @@ -747,6 +747,49 @@ class CAPICodecs(unittest.TestCase): class CAPICodecErrors(unittest.TestCase): + @classmethod + def _generate_exception_args(cls): + for objlen in range(5): + maxind = 2 * max(2, objlen) + for start in range(-maxind, maxind + 1): + for end in range(-maxind, maxind + 1): + yield objlen, start, end + + @classmethod + def generate_encode_errors(cls): + return tuple( + UnicodeEncodeError('utf-8', '0' * objlen, start, end, 'why') + for objlen, start, end in cls._generate_exception_args() + ) + + @classmethod + def generate_decode_errors(cls): + return tuple( + UnicodeDecodeError('utf-8', b'0' * objlen, start, end, 'why') + for objlen, start, end in cls._generate_exception_args() + ) + + @classmethod + def generate_translate_errors(cls): + return tuple( + UnicodeTranslateError('0' * objlen, start, end, 'why') + for objlen, start, end in cls._generate_exception_args() + ) + + @classmethod + def setUpClass(cls): + cls.unicode_encode_errors = cls.generate_encode_errors() + cls.unicode_decode_errors = cls.generate_decode_errors() + cls.unicode_translate_errors = cls.generate_translate_errors() + cls.all_unicode_errors = ( + cls.unicode_encode_errors + + cls.unicode_decode_errors + + cls.unicode_translate_errors + ) + cls.bad_unicode_errors = ( + ValueError(), + ) + def test_codec_register_error(self): # for cleaning up between tests from _codecs import _unregister_error as _codecs_unregister_error @@ -780,33 +823,82 @@ class CAPICodecErrors(unittest.TestCase): self.assertIs(codec_lookup_error('ignore'), codecs.ignore_errors) self.assertIs(codec_lookup_error('replace'), codecs.replace_errors) self.assertIs(codec_lookup_error('xmlcharrefreplace'), codecs.xmlcharrefreplace_errors) + self.assertIs(codec_lookup_error('backslashreplace'), codecs.backslashreplace_errors) self.assertIs(codec_lookup_error('namereplace'), codecs.namereplace_errors) self.assertRaises(LookupError, codec_lookup_error, 'unknown') - def test_codec_error_handlers(self): - exceptions = [ - # A UnicodeError with an empty message currently crashes: - # See: https://github.com/python/cpython/issues/123378 - # UnicodeEncodeError('bad', '', 0, 1, 'reason'), - UnicodeEncodeError('bad', 'x', 0, 1, 'reason'), - UnicodeEncodeError('bad', 'xyz123', 0, 1, 'reason'), - UnicodeEncodeError('bad', 'xyz123', 1, 4, 'reason'), - ] - - strict_handler = _testcapi.codec_strict_errors + def test_codec_strict_errors_handler(self): + handler = _testcapi.codec_strict_errors + for exc in self.all_unicode_errors + self.bad_unicode_errors: + with self.subTest(handler=handler, exc=exc): + self.assertRaises(type(exc), handler, exc) + + def test_codec_ignore_errors_handler(self): + handler = _testcapi.codec_ignore_errors + self.do_test_codec_errors_handler(handler, self.all_unicode_errors) + + def test_codec_replace_errors_handler(self): + handler = _testcapi.codec_replace_errors + self.do_test_codec_errors_handler(handler, self.all_unicode_errors) + + def test_codec_xmlcharrefreplace_errors_handler(self): + handler = _testcapi.codec_xmlcharrefreplace_errors + self.do_test_codec_errors_handler(handler, self.unicode_encode_errors) + + def test_codec_backslashreplace_errors_handler(self): + handler = _testcapi.codec_backslashreplace_errors + self.do_test_codec_errors_handler(handler, self.all_unicode_errors) + + def test_codec_namereplace_errors_handler(self): + handler = _testlimitedcapi.codec_namereplace_errors + self.do_test_codec_errors_handler(handler, self.unicode_encode_errors) + + def do_test_codec_errors_handler(self, handler, exceptions): + at_least_one = False for exc in exceptions: - with self.subTest(handler=strict_handler, exc=exc): - self.assertRaises(UnicodeEncodeError, strict_handler, exc) - - for handler in [ - _testcapi.codec_ignore_errors, - _testcapi.codec_replace_errors, - _testcapi.codec_xmlcharrefreplace_errors, - _testlimitedcapi.codec_namereplace_errors, - ]: - for exc in exceptions: - with self.subTest(handler=handler, exc=exc): - self.assertIsInstance(handler(exc), tuple) + # See https://github.com/python/cpython/issues/123378 and related + # discussion and issues for details. + if self._exception_may_crash(exc): + continue + + at_least_one = True + with self.subTest(handler=handler, exc=exc): + # test that the handler does not crash + self.assertIsInstance(handler(exc), tuple) + + if exceptions: + self.assertTrue(at_least_one, "all exceptions are crashing") + + for bad_exc in ( + self.bad_unicode_errors + + tuple(e for e in self.all_unicode_errors if e not in exceptions) + ): + with self.subTest('bad type', handler=handler, exc=bad_exc): + self.assertRaises(TypeError, handler, bad_exc) + + @classmethod + def _exception_may_crash(cls, exc): + """Indicate whether a Unicode exception might currently crash + the interpreter when used by a built-in codecs error handler. + + Until gh-123378 is fixed, we skip the tests for these exceptions. + + This should only be used by "do_test_codec_errors_handler". + """ + message, start, end = exc.object, exc.start, exc.end + match exc: + case UnicodeEncodeError(): + return end < start or (end - start) >= len(message) + case UnicodeDecodeError(): + # The case "end - start >= len(message)" does not crash. + return end < start + case UnicodeTranslateError(): + # Test "end <= start" because PyCodec_ReplaceErrors checks + # the Unicode kind of a 0-length string which by convention + # is PyUnicode_1BYTE_KIND and not PyUnicode_2BYTE_KIND as + # the handler currently expects. + return end <= start or (end - start) >= len(message) + return False if __name__ == "__main__": |