diff options
author | Xiang Zhang <angwerzx@126.com> | 2018-01-31 12:48:05 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-01-31 12:48:05 (GMT) |
commit | 2c7fd46e11333ef5e5cce34212f7d087694f3658 (patch) | |
tree | 0497c3b1fa32112a475fe3b7da5390b59205f7fd /Lib/test/test_codeccallbacks.py | |
parent | 84521047e413d7d1150aaa1c333580b683b3f4b1 (diff) | |
download | cpython-2c7fd46e11333ef5e5cce34212f7d087694f3658.zip cpython-2c7fd46e11333ef5e5cce34212f7d087694f3658.tar.gz cpython-2c7fd46e11333ef5e5cce34212f7d087694f3658.tar.bz2 |
bpo-32583: Fix possible crashing in builtin Unicode decoders (#5325)
When using customized decode error handlers, it is possible for builtin decoders
to write out-of-bounds and then crash.
Diffstat (limited to 'Lib/test/test_codeccallbacks.py')
-rw-r--r-- | Lib/test/test_codeccallbacks.py | 52 |
1 files changed, 52 insertions, 0 deletions
diff --git a/Lib/test/test_codeccallbacks.py b/Lib/test/test_codeccallbacks.py index 0c066e6..e2e7463 100644 --- a/Lib/test/test_codeccallbacks.py +++ b/Lib/test/test_codeccallbacks.py @@ -1044,6 +1044,58 @@ class CodecCallbackTest(unittest.TestCase): for (encoding, data) in baddata: self.assertEqual(data.decode(encoding, "test.mutating"), "\u4242") + # issue32583 + def test_crashing_decode_handler(self): + # better generating one more character to fill the extra space slot + # so in debug build it can steadily fail + def forward_shorter_than_end(exc): + if isinstance(exc, UnicodeDecodeError): + # size one character, 0 < forward < exc.end + return ('\ufffd', exc.start+1) + else: + raise TypeError("don't know how to handle %r" % exc) + codecs.register_error( + "test.forward_shorter_than_end", forward_shorter_than_end) + + self.assertEqual( + b'\xd8\xd8\xd8\xd8\xd8\x00\x00\x00'.decode( + 'utf-16-le', 'test.forward_shorter_than_end'), + '\ufffd\ufffd\ufffd\ufffd\xd8\x00' + ) + self.assertEqual( + b'\xd8\xd8\xd8\xd8\x00\xd8\x00\x00'.decode( + 'utf-16-be', 'test.forward_shorter_than_end'), + '\ufffd\ufffd\ufffd\ufffd\xd8\x00' + ) + self.assertEqual( + b'\x11\x11\x11\x11\x11\x00\x00\x00\x00\x00\x00'.decode( + 'utf-32-le', 'test.forward_shorter_than_end'), + '\ufffd\ufffd\ufffd\u1111\x00' + ) + self.assertEqual( + b'\x11\x11\x11\x00\x00\x11\x11\x00\x00\x00\x00'.decode( + 'utf-32-be', 'test.forward_shorter_than_end'), + '\ufffd\ufffd\ufffd\u1111\x00' + ) + + def replace_with_long(exc): + if isinstance(exc, UnicodeDecodeError): + exc.object = b"\x00" * 8 + return ('\ufffd', exc.start) + else: + raise TypeError("don't know how to handle %r" % exc) + codecs.register_error("test.replace_with_long", replace_with_long) + + self.assertEqual( + b'\x00'.decode('utf-16', 'test.replace_with_long'), + '\ufffd\x00\x00\x00\x00' + ) + self.assertEqual( + b'\x00'.decode('utf-32', 'test.replace_with_long'), + '\ufffd\x00\x00' + ) + + def test_fake_error_class(self): handlers = [ codecs.strict_errors, |