summaryrefslogtreecommitdiffstats
path: root/Lib
diff options
context:
space:
mode:
authorSerhiy Storchaka <storchaka@gmail.com>2014-02-24 12:43:03 (GMT)
committerSerhiy Storchaka <storchaka@gmail.com>2014-02-24 12:43:03 (GMT)
commit94ee389308ec9e0e07b3f7a944d5179aba540c5e (patch)
tree80bc231aff27723119beacbcfa2654b90f793060 /Lib
parent20f8728bf0cce877c1908b15ddc59e2d1011ad0f (diff)
downloadcpython-94ee389308ec9e0e07b3f7a944d5179aba540c5e.zip
cpython-94ee389308ec9e0e07b3f7a944d5179aba540c5e.tar.gz
cpython-94ee389308ec9e0e07b3f7a944d5179aba540c5e.tar.bz2
Issue #19619: Blacklist non-text codecs in method API
str.encode, bytes.decode and bytearray.decode now use an internal API to throw LookupError for known non-text encodings, rather than attempting the encoding or decoding operation and then throwing a TypeError for an unexpected output type. The latter mechanism remains in place for third party non-text encodings. Backported changeset d68df99d7a57.
Diffstat (limited to 'Lib')
-rw-r--r--Lib/codecs.py14
-rw-r--r--Lib/encodings/base64_codec.py1
-rw-r--r--Lib/encodings/bz2_codec.py1
-rw-r--r--Lib/encodings/hex_codec.py1
-rw-r--r--Lib/encodings/quopri_codec.py1
-rwxr-xr-xLib/encodings/rot_13.py1
-rw-r--r--Lib/encodings/uu_codec.py1
-rw-r--r--Lib/encodings/zlib_codec.py1
-rw-r--r--Lib/test/test_codecs.py42
9 files changed, 62 insertions, 1 deletions
diff --git a/Lib/codecs.py b/Lib/codecs.py
index 01ae0f3..c2065da 100644
--- a/Lib/codecs.py
+++ b/Lib/codecs.py
@@ -73,9 +73,19 @@ BOM64_BE = BOM_UTF32_BE
### Codec base classes (defining the API)
class CodecInfo(tuple):
+ """Codec details when looking up the codec registry"""
+
+ # Private API to allow Python 3.4 to blacklist the known non-Unicode
+ # codecs in the standard library. A more general mechanism to
+ # reliably distinguish test encodings from other codecs will hopefully
+ # be defined for Python 3.5
+ #
+ # See http://bugs.python.org/issue19619
+ _is_text_encoding = True # Assume codecs are text encodings by default
def __new__(cls, encode, decode, streamreader=None, streamwriter=None,
- incrementalencoder=None, incrementaldecoder=None, name=None):
+ incrementalencoder=None, incrementaldecoder=None, name=None,
+ *, _is_text_encoding=None):
self = tuple.__new__(cls, (encode, decode, streamreader, streamwriter))
self.name = name
self.encode = encode
@@ -84,6 +94,8 @@ class CodecInfo(tuple):
self.incrementaldecoder = incrementaldecoder
self.streamwriter = streamwriter
self.streamreader = streamreader
+ if _is_text_encoding is not None:
+ self._is_text_encoding = _is_text_encoding
return self
def __repr__(self):
diff --git a/Lib/encodings/base64_codec.py b/Lib/encodings/base64_codec.py
index 321a961..881d1ba 100644
--- a/Lib/encodings/base64_codec.py
+++ b/Lib/encodings/base64_codec.py
@@ -52,4 +52,5 @@ def getregentry():
incrementaldecoder=IncrementalDecoder,
streamwriter=StreamWriter,
streamreader=StreamReader,
+ _is_text_encoding=False,
)
diff --git a/Lib/encodings/bz2_codec.py b/Lib/encodings/bz2_codec.py
index e65d226..fd9495e 100644
--- a/Lib/encodings/bz2_codec.py
+++ b/Lib/encodings/bz2_codec.py
@@ -74,4 +74,5 @@ def getregentry():
incrementaldecoder=IncrementalDecoder,
streamwriter=StreamWriter,
streamreader=StreamReader,
+ _is_text_encoding=False,
)
diff --git a/Lib/encodings/hex_codec.py b/Lib/encodings/hex_codec.py
index e003fc3..f2ed0a7 100644
--- a/Lib/encodings/hex_codec.py
+++ b/Lib/encodings/hex_codec.py
@@ -52,4 +52,5 @@ def getregentry():
incrementaldecoder=IncrementalDecoder,
streamwriter=StreamWriter,
streamreader=StreamReader,
+ _is_text_encoding=False,
)
diff --git a/Lib/encodings/quopri_codec.py b/Lib/encodings/quopri_codec.py
index 9243fc4..70f7083 100644
--- a/Lib/encodings/quopri_codec.py
+++ b/Lib/encodings/quopri_codec.py
@@ -53,4 +53,5 @@ def getregentry():
incrementaldecoder=IncrementalDecoder,
streamwriter=StreamWriter,
streamreader=StreamReader,
+ _is_text_encoding=False,
)
diff --git a/Lib/encodings/rot_13.py b/Lib/encodings/rot_13.py
index 3140c14..fff9153 100755
--- a/Lib/encodings/rot_13.py
+++ b/Lib/encodings/rot_13.py
@@ -43,6 +43,7 @@ def getregentry():
incrementaldecoder=IncrementalDecoder,
streamwriter=StreamWriter,
streamreader=StreamReader,
+ _is_text_encoding=False,
)
### Map
diff --git a/Lib/encodings/uu_codec.py b/Lib/encodings/uu_codec.py
index 69c6f17..e3269e4 100644
--- a/Lib/encodings/uu_codec.py
+++ b/Lib/encodings/uu_codec.py
@@ -96,4 +96,5 @@ def getregentry():
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
+ _is_text_encoding=False,
)
diff --git a/Lib/encodings/zlib_codec.py b/Lib/encodings/zlib_codec.py
index e0b9cda..4c81ca1 100644
--- a/Lib/encodings/zlib_codec.py
+++ b/Lib/encodings/zlib_codec.py
@@ -74,4 +74,5 @@ def getregentry():
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
+ _is_text_encoding=False,
)
diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py
index 1a199f7..a8b3da0 100644
--- a/Lib/test/test_codecs.py
+++ b/Lib/test/test_codecs.py
@@ -4,6 +4,7 @@ import locale
import sys
import unittest
import warnings
+import encodings
from test import support
@@ -2408,6 +2409,47 @@ class TransformCodecTest(unittest.TestCase):
sout = reader.readline()
self.assertEqual(sout, b"\x80")
+ def test_text_to_binary_blacklists_binary_transforms(self):
+ # Check binary -> binary codecs give a good error for str input
+ bad_input = "bad input type"
+ for encoding in bytes_transform_encodings:
+ fmt = (r"{!r} is not a text encoding; "
+ r"use codecs.encode\(\) to handle arbitrary codecs")
+ msg = fmt.format(encoding)
+ with self.assertRaisesRegex(LookupError, msg) as failure:
+ bad_input.encode(encoding)
+ self.assertIsNone(failure.exception.__cause__)
+
+ def test_text_to_binary_blacklists_text_transforms(self):
+ # Check str.encode gives a good error message for str -> str codecs
+ msg = (r"^'rot_13' is not a text encoding; "
+ r"use codecs.encode\(\) to handle arbitrary codecs")
+ with self.assertRaisesRegex(LookupError, msg):
+ "just an example message".encode("rot_13")
+
+ def test_binary_to_text_blacklists_binary_transforms(self):
+ # Check bytes.decode and bytearray.decode give a good error
+ # message for binary -> binary codecs
+ data = b"encode first to ensure we meet any format restrictions"
+ for encoding in bytes_transform_encodings:
+ encoded_data = codecs.encode(data, encoding)
+ fmt = (r"{!r} is not a text encoding; "
+ r"use codecs.decode\(\) to handle arbitrary codecs")
+ msg = fmt.format(encoding)
+ with self.assertRaisesRegex(LookupError, msg):
+ encoded_data.decode(encoding)
+ with self.assertRaisesRegex(LookupError, msg):
+ bytearray(encoded_data).decode(encoding)
+
+ def test_binary_to_text_blacklists_text_transforms(self):
+ # Check str -> str codec gives a good error for binary input
+ for bad_input in (b"immutable", bytearray(b"mutable")):
+ msg = (r"^'rot_13' is not a text encoding; "
+ r"use codecs.decode\(\) to handle arbitrary codecs")
+ with self.assertRaisesRegex(LookupError, msg) as failure:
+ bad_input.decode("rot_13")
+ self.assertIsNone(failure.exception.__cause__)
+
@unittest.skipUnless(sys.platform == 'win32',
'code pages are specific to Windows')