Issue #19619: Blacklist non-text codecs in method API

str.encode, bytes.decode and bytearray.decode now use an internal API to throw LookupError for known non-text encodings, rather than attempting the encoding or decoding operation and then throwing a TypeError for an unexpected output type. The latter mechanism remains in place for third party non-text encodings. Backported changeset d68df99d7a57.
author: Serhiy Storchaka <storchaka@gmail.com> 2014-02-24 12:43:03 (GMT)
committer: Serhiy Storchaka <storchaka@gmail.com> 2014-02-24 12:43:03 (GMT)
commit: 94ee389308ec9e0e07b3f7a944d5179aba540c5e (patch)
tree: 80bc231aff27723119beacbcfa2654b90f793060 /Lib/test
parent: 20f8728bf0cce877c1908b15ddc59e2d1011ad0f (diff)
download: cpython-94ee389308ec9e0e07b3f7a944d5179aba540c5e.zip
cpython-94ee389308ec9e0e07b3f7a944d5179aba540c5e.tar.gz
cpython-94ee389308ec9e0e07b3f7a944d5179aba540c5e.tar.bz2
1 files changed, 42 insertions, 0 deletions
diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py
index 1a199f7..a8b3da0 100644
--- a/Lib/test/test_codecs.py
+++ b/Lib/test/test_codecs.py
@@ -4,6 +4,7 @@ import locale
 import sys
 import unittest
 import warnings
+import encodings
 
 from test import support
 
@@ -2408,6 +2409,47 @@ class TransformCodecTest(unittest.TestCase):
             sout = reader.readline()
             self.assertEqual(sout, b"\x80")
 
+    def test_text_to_binary_blacklists_binary_transforms(self):
+        # Check binary -> binary codecs give a good error for str input
+        bad_input = "bad input type"
+        for encoding in bytes_transform_encodings:
+            fmt = (r"{!r} is not a text encoding; "
+                   r"use codecs.encode\(\) to handle arbitrary codecs")
+            msg = fmt.format(encoding)
+            with self.assertRaisesRegex(LookupError, msg) as failure:
+                bad_input.encode(encoding)
+            self.assertIsNone(failure.exception.__cause__)
+
+    def test_text_to_binary_blacklists_text_transforms(self):
+        # Check str.encode gives a good error message for str -> str codecs
+        msg = (r"^'rot_13' is not a text encoding; "
+               r"use codecs.encode\(\) to handle arbitrary codecs")
+        with self.assertRaisesRegex(LookupError, msg):
+            "just an example message".encode("rot_13")
+
+    def test_binary_to_text_blacklists_binary_transforms(self):
+        # Check bytes.decode and bytearray.decode give a good error
+        # message for binary -> binary codecs
+        data = b"encode first to ensure we meet any format restrictions"
+        for encoding in bytes_transform_encodings:
+            encoded_data = codecs.encode(data, encoding)
+            fmt = (r"{!r} is not a text encoding; "
+                   r"use codecs.decode\(\) to handle arbitrary codecs")
+            msg = fmt.format(encoding)
+            with self.assertRaisesRegex(LookupError, msg):
+                encoded_data.decode(encoding)
+            with self.assertRaisesRegex(LookupError, msg):
+                bytearray(encoded_data).decode(encoding)
+
+    def test_binary_to_text_blacklists_text_transforms(self):
+        # Check str -> str codec gives a good error for binary input
+        for bad_input in (b"immutable", bytearray(b"mutable")):
+            msg = (r"^'rot_13' is not a text encoding; "
+                   r"use codecs.decode\(\) to handle arbitrary codecs")
+            with self.assertRaisesRegex(LookupError, msg) as failure:
+                bad_input.decode("rot_13")
+            self.assertIsNone(failure.exception.__cause__)
+
 
 @unittest.skipUnless(sys.platform == 'win32',
                      'code pages are specific to Windows')
author	Serhiy Storchaka <storchaka@gmail.com>	2014-02-24 12:43:03 (GMT)
committer	Serhiy Storchaka <storchaka@gmail.com>	2014-02-24 12:43:03 (GMT)
commit	94ee389308ec9e0e07b3f7a944d5179aba540c5e (patch)
tree	80bc231aff27723119beacbcfa2654b90f793060 /Lib/test
parent	20f8728bf0cce877c1908b15ddc59e2d1011ad0f (diff)
download	cpython-94ee389308ec9e0e07b3f7a944d5179aba540c5e.zip cpython-94ee389308ec9e0e07b3f7a944d5179aba540c5e.tar.gz cpython-94ee389308ec9e0e07b3f7a944d5179aba540c5e.tar.bz2