summaryrefslogtreecommitdiffstats
path: root/Lib/test/test_codeccallbacks.py
diff options
context:
space:
mode:
authorXiang Zhang <angwerzx@126.com>2018-01-31 12:48:05 (GMT)
committerGitHub <noreply@github.com>2018-01-31 12:48:05 (GMT)
commit2c7fd46e11333ef5e5cce34212f7d087694f3658 (patch)
tree0497c3b1fa32112a475fe3b7da5390b59205f7fd /Lib/test/test_codeccallbacks.py
parent84521047e413d7d1150aaa1c333580b683b3f4b1 (diff)
downloadcpython-2c7fd46e11333ef5e5cce34212f7d087694f3658.zip
cpython-2c7fd46e11333ef5e5cce34212f7d087694f3658.tar.gz
cpython-2c7fd46e11333ef5e5cce34212f7d087694f3658.tar.bz2
bpo-32583: Fix possible crashing in builtin Unicode decoders (#5325)
When using customized decode error handlers, it is possible for builtin decoders to write out-of-bounds and then crash.
Diffstat (limited to 'Lib/test/test_codeccallbacks.py')
-rw-r--r--Lib/test/test_codeccallbacks.py52
1 files changed, 52 insertions, 0 deletions
diff --git a/Lib/test/test_codeccallbacks.py b/Lib/test/test_codeccallbacks.py
index 0c066e6..e2e7463 100644
--- a/Lib/test/test_codeccallbacks.py
+++ b/Lib/test/test_codeccallbacks.py
@@ -1044,6 +1044,58 @@ class CodecCallbackTest(unittest.TestCase):
for (encoding, data) in baddata:
self.assertEqual(data.decode(encoding, "test.mutating"), "\u4242")
+ # issue32583
+ def test_crashing_decode_handler(self):
+ # better generating one more character to fill the extra space slot
+ # so in debug build it can steadily fail
+ def forward_shorter_than_end(exc):
+ if isinstance(exc, UnicodeDecodeError):
+ # size one character, 0 < forward < exc.end
+ return ('\ufffd', exc.start+1)
+ else:
+ raise TypeError("don't know how to handle %r" % exc)
+ codecs.register_error(
+ "test.forward_shorter_than_end", forward_shorter_than_end)
+
+ self.assertEqual(
+ b'\xd8\xd8\xd8\xd8\xd8\x00\x00\x00'.decode(
+ 'utf-16-le', 'test.forward_shorter_than_end'),
+ '\ufffd\ufffd\ufffd\ufffd\xd8\x00'
+ )
+ self.assertEqual(
+ b'\xd8\xd8\xd8\xd8\x00\xd8\x00\x00'.decode(
+ 'utf-16-be', 'test.forward_shorter_than_end'),
+ '\ufffd\ufffd\ufffd\ufffd\xd8\x00'
+ )
+ self.assertEqual(
+ b'\x11\x11\x11\x11\x11\x00\x00\x00\x00\x00\x00'.decode(
+ 'utf-32-le', 'test.forward_shorter_than_end'),
+ '\ufffd\ufffd\ufffd\u1111\x00'
+ )
+ self.assertEqual(
+ b'\x11\x11\x11\x00\x00\x11\x11\x00\x00\x00\x00'.decode(
+ 'utf-32-be', 'test.forward_shorter_than_end'),
+ '\ufffd\ufffd\ufffd\u1111\x00'
+ )
+
+ def replace_with_long(exc):
+ if isinstance(exc, UnicodeDecodeError):
+ exc.object = b"\x00" * 8
+ return ('\ufffd', exc.start)
+ else:
+ raise TypeError("don't know how to handle %r" % exc)
+ codecs.register_error("test.replace_with_long", replace_with_long)
+
+ self.assertEqual(
+ b'\x00'.decode('utf-16', 'test.replace_with_long'),
+ '\ufffd\x00\x00\x00\x00'
+ )
+ self.assertEqual(
+ b'\x00'.decode('utf-32', 'test.replace_with_long'),
+ '\ufffd\x00\x00'
+ )
+
+
def test_fake_error_class(self):
handlers = [
codecs.strict_errors,