bpo-32583: Fix possible crashing in builtin Unicode decoders (#5325)

When using customized decode error handlers, it is possible for builtin decoders to write out-of-bounds and then crash.
author: Xiang Zhang <angwerzx@126.com> 2018-01-31 12:48:05 (GMT)
committer: GitHub <noreply@github.com> 2018-01-31 12:48:05 (GMT)
commit: 2c7fd46e11333ef5e5cce34212f7d087694f3658 (patch)
tree: 0497c3b1fa32112a475fe3b7da5390b59205f7fd /Lib/test/test_codeccallbacks.py
parent: 84521047e413d7d1150aaa1c333580b683b3f4b1 (diff)
download: cpython-2c7fd46e11333ef5e5cce34212f7d087694f3658.zip
cpython-2c7fd46e11333ef5e5cce34212f7d087694f3658.tar.gz
cpython-2c7fd46e11333ef5e5cce34212f7d087694f3658.tar.bz2
1 files changed, 52 insertions, 0 deletions
diff --git a/Lib/test/test_codeccallbacks.py b/Lib/test/test_codeccallbacks.py
index 0c066e6..e2e7463 100644
--- a/Lib/test/test_codeccallbacks.py
+++ b/Lib/test/test_codeccallbacks.py
@@ -1044,6 +1044,58 @@ class CodecCallbackTest(unittest.TestCase):
             for (encoding, data) in baddata:
                 self.assertEqual(data.decode(encoding, "test.mutating"), "\u4242")
 
+    # issue32583
+    def test_crashing_decode_handler(self):
+        # better generating one more character to fill the extra space slot
+        # so in debug build it can steadily fail
+        def forward_shorter_than_end(exc):
+            if isinstance(exc, UnicodeDecodeError):
+                # size one character, 0 < forward < exc.end
+                return ('\ufffd', exc.start+1)
+            else:
+                raise TypeError("don't know how to handle %r" % exc)
+        codecs.register_error(
+            "test.forward_shorter_than_end", forward_shorter_than_end)
+
+        self.assertEqual(
+            b'\xd8\xd8\xd8\xd8\xd8\x00\x00\x00'.decode(
+                'utf-16-le', 'test.forward_shorter_than_end'),
+            '\ufffd\ufffd\ufffd\ufffd\xd8\x00'
+        )
+        self.assertEqual(
+            b'\xd8\xd8\xd8\xd8\x00\xd8\x00\x00'.decode(
+                'utf-16-be', 'test.forward_shorter_than_end'),
+            '\ufffd\ufffd\ufffd\ufffd\xd8\x00'
+        )
+        self.assertEqual(
+            b'\x11\x11\x11\x11\x11\x00\x00\x00\x00\x00\x00'.decode(
+                'utf-32-le', 'test.forward_shorter_than_end'),
+            '\ufffd\ufffd\ufffd\u1111\x00'
+        )
+        self.assertEqual(
+            b'\x11\x11\x11\x00\x00\x11\x11\x00\x00\x00\x00'.decode(
+                'utf-32-be', 'test.forward_shorter_than_end'),
+            '\ufffd\ufffd\ufffd\u1111\x00'
+        )
+
+        def replace_with_long(exc):
+            if isinstance(exc, UnicodeDecodeError):
+                exc.object = b"\x00" * 8
+                return ('\ufffd', exc.start)
+            else:
+                raise TypeError("don't know how to handle %r" % exc)
+        codecs.register_error("test.replace_with_long", replace_with_long)
+
+        self.assertEqual(
+            b'\x00'.decode('utf-16', 'test.replace_with_long'),
+            '\ufffd\x00\x00\x00\x00'
+        )
+        self.assertEqual(
+            b'\x00'.decode('utf-32', 'test.replace_with_long'),
+            '\ufffd\x00\x00'
+        )
+
+
     def test_fake_error_class(self):
         handlers = [
             codecs.strict_errors,
author	Xiang Zhang <angwerzx@126.com>	2018-01-31 12:48:05 (GMT)
committer	GitHub <noreply@github.com>	2018-01-31 12:48:05 (GMT)
commit	2c7fd46e11333ef5e5cce34212f7d087694f3658 (patch)
tree	0497c3b1fa32112a475fe3b7da5390b59205f7fd /Lib/test/test_codeccallbacks.py
parent	84521047e413d7d1150aaa1c333580b683b3f4b1 (diff)
download	cpython-2c7fd46e11333ef5e5cce34212f7d087694f3658.zip cpython-2c7fd46e11333ef5e5cce34212f7d087694f3658.tar.gz cpython-2c7fd46e11333ef5e5cce34212f7d087694f3658.tar.bz2