diff options
author | Serhiy Storchaka <storchaka@gmail.com> | 2014-02-24 12:43:03 (GMT) |
---|---|---|
committer | Serhiy Storchaka <storchaka@gmail.com> | 2014-02-24 12:43:03 (GMT) |
commit | 94ee389308ec9e0e07b3f7a944d5179aba540c5e (patch) | |
tree | 80bc231aff27723119beacbcfa2654b90f793060 /Lib | |
parent | 20f8728bf0cce877c1908b15ddc59e2d1011ad0f (diff) | |
download | cpython-94ee389308ec9e0e07b3f7a944d5179aba540c5e.zip cpython-94ee389308ec9e0e07b3f7a944d5179aba540c5e.tar.gz cpython-94ee389308ec9e0e07b3f7a944d5179aba540c5e.tar.bz2 |
Issue #19619: Blacklist non-text codecs in method API
str.encode, bytes.decode and bytearray.decode now use an
internal API to throw LookupError for known non-text encodings,
rather than attempting the encoding or decoding operation and
then throwing a TypeError for an unexpected output type.
The latter mechanism remains in place for third party non-text
encodings.
Backported changeset d68df99d7a57.
Diffstat (limited to 'Lib')
-rw-r--r-- | Lib/codecs.py | 14 | ||||
-rw-r--r-- | Lib/encodings/base64_codec.py | 1 | ||||
-rw-r--r-- | Lib/encodings/bz2_codec.py | 1 | ||||
-rw-r--r-- | Lib/encodings/hex_codec.py | 1 | ||||
-rw-r--r-- | Lib/encodings/quopri_codec.py | 1 | ||||
-rwxr-xr-x | Lib/encodings/rot_13.py | 1 | ||||
-rw-r--r-- | Lib/encodings/uu_codec.py | 1 | ||||
-rw-r--r-- | Lib/encodings/zlib_codec.py | 1 | ||||
-rw-r--r-- | Lib/test/test_codecs.py | 42 |
9 files changed, 62 insertions, 1 deletions
diff --git a/Lib/codecs.py b/Lib/codecs.py index 01ae0f3..c2065da 100644 --- a/Lib/codecs.py +++ b/Lib/codecs.py @@ -73,9 +73,19 @@ BOM64_BE = BOM_UTF32_BE ### Codec base classes (defining the API) class CodecInfo(tuple): + """Codec details when looking up the codec registry""" + + # Private API to allow Python 3.4 to blacklist the known non-Unicode + # codecs in the standard library. A more general mechanism to + # reliably distinguish test encodings from other codecs will hopefully + # be defined for Python 3.5 + # + # See http://bugs.python.org/issue19619 + _is_text_encoding = True # Assume codecs are text encodings by default def __new__(cls, encode, decode, streamreader=None, streamwriter=None, - incrementalencoder=None, incrementaldecoder=None, name=None): + incrementalencoder=None, incrementaldecoder=None, name=None, + *, _is_text_encoding=None): self = tuple.__new__(cls, (encode, decode, streamreader, streamwriter)) self.name = name self.encode = encode @@ -84,6 +94,8 @@ class CodecInfo(tuple): self.incrementaldecoder = incrementaldecoder self.streamwriter = streamwriter self.streamreader = streamreader + if _is_text_encoding is not None: + self._is_text_encoding = _is_text_encoding return self def __repr__(self): diff --git a/Lib/encodings/base64_codec.py b/Lib/encodings/base64_codec.py index 321a961..881d1ba 100644 --- a/Lib/encodings/base64_codec.py +++ b/Lib/encodings/base64_codec.py @@ -52,4 +52,5 @@ def getregentry(): incrementaldecoder=IncrementalDecoder, streamwriter=StreamWriter, streamreader=StreamReader, + _is_text_encoding=False, ) diff --git a/Lib/encodings/bz2_codec.py b/Lib/encodings/bz2_codec.py index e65d226..fd9495e 100644 --- a/Lib/encodings/bz2_codec.py +++ b/Lib/encodings/bz2_codec.py @@ -74,4 +74,5 @@ def getregentry(): incrementaldecoder=IncrementalDecoder, streamwriter=StreamWriter, streamreader=StreamReader, + _is_text_encoding=False, ) diff --git a/Lib/encodings/hex_codec.py b/Lib/encodings/hex_codec.py index e003fc3..f2ed0a7 100644 --- a/Lib/encodings/hex_codec.py +++ b/Lib/encodings/hex_codec.py @@ -52,4 +52,5 @@ def getregentry(): incrementaldecoder=IncrementalDecoder, streamwriter=StreamWriter, streamreader=StreamReader, + _is_text_encoding=False, ) diff --git a/Lib/encodings/quopri_codec.py b/Lib/encodings/quopri_codec.py index 9243fc4..70f7083 100644 --- a/Lib/encodings/quopri_codec.py +++ b/Lib/encodings/quopri_codec.py @@ -53,4 +53,5 @@ def getregentry(): incrementaldecoder=IncrementalDecoder, streamwriter=StreamWriter, streamreader=StreamReader, + _is_text_encoding=False, ) diff --git a/Lib/encodings/rot_13.py b/Lib/encodings/rot_13.py index 3140c14..fff9153 100755 --- a/Lib/encodings/rot_13.py +++ b/Lib/encodings/rot_13.py @@ -43,6 +43,7 @@ def getregentry(): incrementaldecoder=IncrementalDecoder, streamwriter=StreamWriter, streamreader=StreamReader, + _is_text_encoding=False, ) ### Map diff --git a/Lib/encodings/uu_codec.py b/Lib/encodings/uu_codec.py index 69c6f17..e3269e4 100644 --- a/Lib/encodings/uu_codec.py +++ b/Lib/encodings/uu_codec.py @@ -96,4 +96,5 @@ def getregentry(): incrementaldecoder=IncrementalDecoder, streamreader=StreamReader, streamwriter=StreamWriter, + _is_text_encoding=False, ) diff --git a/Lib/encodings/zlib_codec.py b/Lib/encodings/zlib_codec.py index e0b9cda..4c81ca1 100644 --- a/Lib/encodings/zlib_codec.py +++ b/Lib/encodings/zlib_codec.py @@ -74,4 +74,5 @@ def getregentry(): incrementaldecoder=IncrementalDecoder, streamreader=StreamReader, streamwriter=StreamWriter, + _is_text_encoding=False, ) diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py index 1a199f7..a8b3da0 100644 --- a/Lib/test/test_codecs.py +++ b/Lib/test/test_codecs.py @@ -4,6 +4,7 @@ import locale import sys import unittest import warnings +import encodings from test import support @@ -2408,6 +2409,47 @@ class TransformCodecTest(unittest.TestCase): sout = reader.readline() self.assertEqual(sout, b"\x80") + def test_text_to_binary_blacklists_binary_transforms(self): + # Check binary -> binary codecs give a good error for str input + bad_input = "bad input type" + for encoding in bytes_transform_encodings: + fmt = (r"{!r} is not a text encoding; " + r"use codecs.encode\(\) to handle arbitrary codecs") + msg = fmt.format(encoding) + with self.assertRaisesRegex(LookupError, msg) as failure: + bad_input.encode(encoding) + self.assertIsNone(failure.exception.__cause__) + + def test_text_to_binary_blacklists_text_transforms(self): + # Check str.encode gives a good error message for str -> str codecs + msg = (r"^'rot_13' is not a text encoding; " + r"use codecs.encode\(\) to handle arbitrary codecs") + with self.assertRaisesRegex(LookupError, msg): + "just an example message".encode("rot_13") + + def test_binary_to_text_blacklists_binary_transforms(self): + # Check bytes.decode and bytearray.decode give a good error + # message for binary -> binary codecs + data = b"encode first to ensure we meet any format restrictions" + for encoding in bytes_transform_encodings: + encoded_data = codecs.encode(data, encoding) + fmt = (r"{!r} is not a text encoding; " + r"use codecs.decode\(\) to handle arbitrary codecs") + msg = fmt.format(encoding) + with self.assertRaisesRegex(LookupError, msg): + encoded_data.decode(encoding) + with self.assertRaisesRegex(LookupError, msg): + bytearray(encoded_data).decode(encoding) + + def test_binary_to_text_blacklists_text_transforms(self): + # Check str -> str codec gives a good error for binary input + for bad_input in (b"immutable", bytearray(b"mutable")): + msg = (r"^'rot_13' is not a text encoding; " + r"use codecs.decode\(\) to handle arbitrary codecs") + with self.assertRaisesRegex(LookupError, msg) as failure: + bad_input.decode("rot_13") + self.assertIsNone(failure.exception.__cause__) + @unittest.skipUnless(sys.platform == 'win32', 'code pages are specific to Windows') |