diff options
author | Nick Coghlan <ncoghlan@gmail.com> | 2013-10-02 14:43:22 (GMT) |
---|---|---|
committer | Nick Coghlan <ncoghlan@gmail.com> | 2013-10-02 14:43:22 (GMT) |
commit | fdf239a855c82bc20df157815de947867aa2648e (patch) | |
tree | 05995514fc9cbb3283c5bdd2586982baebc3286b | |
parent | 73c6ee00805729919f98d4f2dbe27e16c54b4db2 (diff) | |
download | cpython-fdf239a855c82bc20df157815de947867aa2648e.zip cpython-fdf239a855c82bc20df157815de947867aa2648e.tar.gz cpython-fdf239a855c82bc20df157815de947867aa2648e.tar.bz2 |
Close #17839: support bytes-like objects in base64 module
This mostly affected the encodebytes and decodebytes function
(which are used by base64_codec)
Also added a test to ensure all bytes-bytes codecs can handle
memoryview input and tests for handling of multidimensional
and non-bytes format input in the modern base64 API.
-rw-r--r-- | Doc/library/base64.rst | 4 | ||||
-rw-r--r-- | Doc/library/codecs.rst | 65 | ||||
-rwxr-xr-x | Lib/base64.py | 40 | ||||
-rw-r--r-- | Lib/test/test_base64.py | 110 | ||||
-rw-r--r-- | Lib/test/test_codecs.py | 18 | ||||
-rw-r--r-- | Misc/NEWS | 4 |
6 files changed, 172 insertions, 69 deletions
diff --git a/Doc/library/base64.rst b/Doc/library/base64.rst index 3b23e79..de87441 100644 --- a/Doc/library/base64.rst +++ b/Doc/library/base64.rst @@ -27,6 +27,10 @@ byte strings, but only using the Base64 standard alphabet. ASCII-only Unicode strings are now accepted by the decoding functions of the modern interface. +.. versionchanged:: 3.4 + Any :term:`bytes-like object`\ s are now accepted by all + encoding and decoding functions in this module. + The modern interface provides: .. function:: b64encode(s, altchars=None) diff --git a/Doc/library/codecs.rst b/Doc/library/codecs.rst index fcef948..48c3b24 100644 --- a/Doc/library/codecs.rst +++ b/Doc/library/codecs.rst @@ -1208,36 +1208,41 @@ mappings. .. tabularcolumns:: |l|L|L| -+----------------------+---------------------------+------------------------------+ -| Codec | Purpose | Encoder/decoder | -+======================+===========================+==============================+ -| base64_codec [#b64]_ | Convert operand to MIME | :meth:`base64.b64encode`, | -| | base64 (the result always | :meth:`base64.b64decode` | -| | includes a trailing | | -| | ``'\n'``) | | -+----------------------+---------------------------+------------------------------+ -| bz2_codec | Compress the operand | :meth:`bz2.compress`, | -| | using bz2 | :meth:`bz2.decompress` | -+----------------------+---------------------------+------------------------------+ -| hex_codec | Convert operand to | :meth:`base64.b16encode`, | -| | hexadecimal | :meth:`base64.b16decode` | -| | representation, with two | | -| | digits per byte | | -+----------------------+---------------------------+------------------------------+ -| quopri_codec | Convert operand to MIME | :meth:`quopri.encodestring`, | -| | quoted printable | :meth:`quopri.decodestring` | -+----------------------+---------------------------+------------------------------+ -| uu_codec | Convert the operand using | :meth:`uu.encode`, | -| | uuencode | :meth:`uu.decode` | -+----------------------+---------------------------+------------------------------+ -| zlib_codec | Compress the operand | :meth:`zlib.compress`, | -| | using gzip | :meth:`zlib.decompress` | -+----------------------+---------------------------+------------------------------+ - -.. [#b64] Rather than accepting any :term:`bytes-like object`, - ``'base64_codec'`` accepts only :class:`bytes` and :class:`bytearray` for - encoding and only :class:`bytes`, :class:`bytearray`, and ASCII-only - instances of :class:`str` for decoding ++----------------------+------------------------------+------------------------------+ +| Codec | Purpose | Encoder / decoder | ++======================+==============================+==============================+ +| base64_codec [#b64]_ | Convert operand to MIME | :meth:`base64.b64encode` / | +| | base64 (the result always | :meth:`base64.b64decode` | +| | includes a trailing | | +| | ``'\n'``) | | +| | | | +| | .. versionchanged:: 3.4 | | +| | accepts any | | +| | :term:`bytes-like object` | | +| | as input for encoding and | | +| | decoding | | ++----------------------+------------------------------+------------------------------+ +| bz2_codec | Compress the operand | :meth:`bz2.compress` / | +| | using bz2 | :meth:`bz2.decompress` | ++----------------------+------------------------------+------------------------------+ +| hex_codec | Convert operand to | :meth:`base64.b16encode` / | +| | hexadecimal | :meth:`base64.b16decode` | +| | representation, with two | | +| | digits per byte | | ++----------------------+------------------------------+------------------------------+ +| quopri_codec | Convert operand to MIME | :meth:`quopri.encodestring` /| +| | quoted printable | :meth:`quopri.decodestring` | ++----------------------+------------------------------+------------------------------+ +| uu_codec | Convert the operand using | :meth:`uu.encode` / | +| | uuencode | :meth:`uu.decode` | ++----------------------+------------------------------+------------------------------+ +| zlib_codec | Compress the operand | :meth:`zlib.compress` / | +| | using gzip | :meth:`zlib.decompress` | ++----------------------+------------------------------+------------------------------+ + +.. [#b64] In addition to :term:`bytes-like objects <bytes-like object>`, + ``'base64_codec'`` also accepts ASCII-only instances of :class:`str` for + decoding The following codecs provide :class:`str` to :class:`str` mappings. diff --git a/Lib/base64.py b/Lib/base64.py index 9c15752..0a93f2e 100755 --- a/Lib/base64.py +++ b/Lib/base64.py @@ -35,11 +35,13 @@ def _bytes_from_decode_data(s): return s.encode('ascii') except UnicodeEncodeError: raise ValueError('string argument should contain only ASCII characters') - elif isinstance(s, bytes_types): + if isinstance(s, bytes_types): return s - else: - raise TypeError("argument should be bytes or ASCII string, not %s" % s.__class__.__name__) - + try: + return memoryview(s).tobytes() + except TypeError: + raise TypeError("argument should be a bytes-like object or ASCII " + "string, not %r" % s.__class__.__name__) from None # Base64 encoding/decoding uses binascii @@ -54,14 +56,9 @@ def b64encode(s, altchars=None): The encoded byte string is returned. """ - if not isinstance(s, bytes_types): - raise TypeError("expected bytes, not %s" % s.__class__.__name__) # Strip off the trailing newline encoded = binascii.b2a_base64(s)[:-1] if altchars is not None: - if not isinstance(altchars, bytes_types): - raise TypeError("expected bytes, not %s" - % altchars.__class__.__name__) assert len(altchars) == 2, repr(altchars) return encoded.translate(bytes.maketrans(b'+/', altchars)) return encoded @@ -149,7 +146,7 @@ def b32encode(s): s is the byte string to encode. The encoded byte string is returned. """ if not isinstance(s, bytes_types): - raise TypeError("expected bytes, not %s" % s.__class__.__name__) + s = memoryview(s).tobytes() leftover = len(s) % 5 # Pad the last quantum with zero bits if necessary if leftover: @@ -250,8 +247,6 @@ def b16encode(s): s is the byte string to encode. The encoded byte string is returned. """ - if not isinstance(s, bytes_types): - raise TypeError("expected bytes, not %s" % s.__class__.__name__) return binascii.hexlify(s).upper() @@ -306,12 +301,26 @@ def decode(input, output): s = binascii.a2b_base64(line) output.write(s) +def _input_type_check(s): + try: + m = memoryview(s) + except TypeError as err: + msg = "expected bytes-like object, not %s" % s.__class__.__name__ + raise TypeError(msg) from err + if m.format not in ('c', 'b', 'B'): + msg = ("expected single byte elements, not %r from %s" % + (m.format, s.__class__.__name__)) + raise TypeError(msg) + if m.ndim != 1: + msg = ("expected 1-D data, not %d-D data from %s" % + (m.ndim, s.__class__.__name__)) + raise TypeError(msg) + def encodebytes(s): """Encode a bytestring into a bytestring containing multiple lines of base-64 data.""" - if not isinstance(s, bytes_types): - raise TypeError("expected bytes, not %s" % s.__class__.__name__) + _input_type_check(s) pieces = [] for i in range(0, len(s), MAXBINSIZE): chunk = s[i : i + MAXBINSIZE] @@ -328,8 +337,7 @@ def encodestring(s): def decodebytes(s): """Decode a bytestring of base-64 data into a bytestring.""" - if not isinstance(s, bytes_types): - raise TypeError("expected bytes, not %s" % s.__class__.__name__) + _input_type_check(s) return binascii.a2b_base64(s) def decodestring(s): diff --git a/Lib/test/test_base64.py b/Lib/test/test_base64.py index 13695de..54f392d 100644 --- a/Lib/test/test_base64.py +++ b/Lib/test/test_base64.py @@ -5,10 +5,21 @@ import binascii import os import sys import subprocess - +import struct +from array import array class LegacyBase64TestCase(unittest.TestCase): + + # Legacy API is not as permissive as the modern API + def check_type_errors(self, f): + self.assertRaises(TypeError, f, "") + self.assertRaises(TypeError, f, []) + multidimensional = memoryview(b"1234").cast('B', (2, 2)) + self.assertRaises(TypeError, f, multidimensional) + int_data = memoryview(b"1234").cast('I') + self.assertRaises(TypeError, f, int_data) + def test_encodebytes(self): eq = self.assertEqual eq(base64.encodebytes(b"www.python.org"), b"d3d3LnB5dGhvbi5vcmc=\n") @@ -24,7 +35,9 @@ class LegacyBase64TestCase(unittest.TestCase): b"Y3ODkhQCMwXiYqKCk7Ojw+LC4gW117fQ==\n") # Non-bytes eq(base64.encodebytes(bytearray(b'abc')), b'YWJj\n') - self.assertRaises(TypeError, base64.encodebytes, "") + eq(base64.encodebytes(memoryview(b'abc')), b'YWJj\n') + eq(base64.encodebytes(array('B', b'abc')), b'YWJj\n') + self.check_type_errors(base64.encodebytes) def test_decodebytes(self): eq = self.assertEqual @@ -41,7 +54,9 @@ class LegacyBase64TestCase(unittest.TestCase): eq(base64.decodebytes(b''), b'') # Non-bytes eq(base64.decodebytes(bytearray(b'YWJj\n')), b'abc') - self.assertRaises(TypeError, base64.decodebytes, "") + eq(base64.decodebytes(memoryview(b'YWJj\n')), b'abc') + eq(base64.decodebytes(array('B', b'YWJj\n')), b'abc') + self.check_type_errors(base64.decodebytes) def test_encode(self): eq = self.assertEqual @@ -73,6 +88,38 @@ class LegacyBase64TestCase(unittest.TestCase): class BaseXYTestCase(unittest.TestCase): + + # Modern API completely ignores exported dimension and format data and + # treats any buffer as a stream of bytes + def check_encode_type_errors(self, f): + self.assertRaises(TypeError, f, "") + self.assertRaises(TypeError, f, []) + + def check_decode_type_errors(self, f): + self.assertRaises(TypeError, f, []) + + def check_other_types(self, f, bytes_data, expected): + eq = self.assertEqual + eq(f(bytearray(bytes_data)), expected) + eq(f(memoryview(bytes_data)), expected) + eq(f(array('B', bytes_data)), expected) + self.check_nonbyte_element_format(base64.b64encode, bytes_data) + self.check_multidimensional(base64.b64encode, bytes_data) + + def check_multidimensional(self, f, data): + padding = b"\x00" if len(data) % 2 else b"" + bytes_data = data + padding # Make sure cast works + shape = (len(bytes_data) // 2, 2) + multidimensional = memoryview(bytes_data).cast('B', shape) + self.assertEqual(f(multidimensional), f(bytes_data)) + + def check_nonbyte_element_format(self, f, data): + padding = b"\x00" * ((4 - len(data)) % 4) + bytes_data = data + padding # Make sure cast works + int_data = memoryview(bytes_data).cast('I') + self.assertEqual(f(int_data), f(bytes_data)) + + def test_b64encode(self): eq = self.assertEqual # Test default alphabet @@ -90,13 +137,16 @@ class BaseXYTestCase(unittest.TestCase): b"Y3ODkhQCMwXiYqKCk7Ojw+LC4gW117fQ==") # Test with arbitrary alternative characters eq(base64.b64encode(b'\xd3V\xbeo\xf7\x1d', altchars=b'*$'), b'01a*b$cd') - # Non-bytes - eq(base64.b64encode(bytearray(b'abcd')), b'YWJjZA==') eq(base64.b64encode(b'\xd3V\xbeo\xf7\x1d', altchars=bytearray(b'*$')), b'01a*b$cd') - # Check if passing a str object raises an error - self.assertRaises(TypeError, base64.b64encode, "") - self.assertRaises(TypeError, base64.b64encode, b"", altchars="") + eq(base64.b64encode(b'\xd3V\xbeo\xf7\x1d', altchars=memoryview(b'*$')), + b'01a*b$cd') + eq(base64.b64encode(b'\xd3V\xbeo\xf7\x1d', altchars=array('B', b'*$')), + b'01a*b$cd') + # Non-bytes + self.check_other_types(base64.b64encode, b'abcd', b'YWJjZA==') + self.check_encode_type_errors(base64.b64encode) + self.assertRaises(TypeError, base64.b64encode, b"", altchars="*$") # Test standard alphabet eq(base64.standard_b64encode(b"www.python.org"), b"d3d3LnB5dGhvbi5vcmc=") eq(base64.standard_b64encode(b"a"), b"YQ==") @@ -110,15 +160,15 @@ class BaseXYTestCase(unittest.TestCase): b"RUZHSElKS0xNTk9QUVJTVFVWV1hZWjAxMjM0NT" b"Y3ODkhQCMwXiYqKCk7Ojw+LC4gW117fQ==") # Non-bytes - eq(base64.standard_b64encode(bytearray(b'abcd')), b'YWJjZA==') - # Check if passing a str object raises an error - self.assertRaises(TypeError, base64.standard_b64encode, "") + self.check_other_types(base64.standard_b64encode, + b'abcd', b'YWJjZA==') + self.check_encode_type_errors(base64.standard_b64encode) # Test with 'URL safe' alternative characters eq(base64.urlsafe_b64encode(b'\xd3V\xbeo\xf7\x1d'), b'01a-b_cd') # Non-bytes - eq(base64.urlsafe_b64encode(bytearray(b'\xd3V\xbeo\xf7\x1d')), b'01a-b_cd') - # Check if passing a str object raises an error - self.assertRaises(TypeError, base64.urlsafe_b64encode, "") + self.check_other_types(base64.urlsafe_b64encode, + b'\xd3V\xbeo\xf7\x1d', b'01a-b_cd') + self.check_encode_type_errors(base64.urlsafe_b64encode) def test_b64decode(self): eq = self.assertEqual @@ -141,7 +191,8 @@ class BaseXYTestCase(unittest.TestCase): eq(base64.b64decode(data), res) eq(base64.b64decode(data.decode('ascii')), res) # Non-bytes - eq(base64.b64decode(bytearray(b"YWJj")), b"abc") + self.check_other_types(base64.b64decode, b"YWJj", b"abc") + self.check_decode_type_errors(base64.b64decode) # Test with arbitrary alternative characters tests_altchars = {(b'01a*b$cd', b'*$'): b'\xd3V\xbeo\xf7\x1d', @@ -160,7 +211,8 @@ class BaseXYTestCase(unittest.TestCase): eq(base64.standard_b64decode(data), res) eq(base64.standard_b64decode(data.decode('ascii')), res) # Non-bytes - eq(base64.standard_b64decode(bytearray(b"YWJj")), b"abc") + self.check_other_types(base64.standard_b64decode, b"YWJj", b"abc") + self.check_decode_type_errors(base64.standard_b64decode) # Test with 'URL safe' alternative characters tests_urlsafe = {b'01a-b_cd': b'\xd3V\xbeo\xf7\x1d', @@ -170,7 +222,9 @@ class BaseXYTestCase(unittest.TestCase): eq(base64.urlsafe_b64decode(data), res) eq(base64.urlsafe_b64decode(data.decode('ascii')), res) # Non-bytes - eq(base64.urlsafe_b64decode(bytearray(b'01a-b_cd')), b'\xd3V\xbeo\xf7\x1d') + self.check_other_types(base64.urlsafe_b64decode, b'01a-b_cd', + b'\xd3V\xbeo\xf7\x1d') + self.check_decode_type_errors(base64.urlsafe_b64decode) def test_b64decode_padding_error(self): self.assertRaises(binascii.Error, base64.b64decode, b'abc') @@ -205,8 +259,8 @@ class BaseXYTestCase(unittest.TestCase): eq(base64.b32encode(b'abcd'), b'MFRGGZA=') eq(base64.b32encode(b'abcde'), b'MFRGGZDF') # Non-bytes - eq(base64.b32encode(bytearray(b'abcd')), b'MFRGGZA=') - self.assertRaises(TypeError, base64.b32encode, "") + self.check_other_types(base64.b32encode, b'abcd', b'MFRGGZA=') + self.check_encode_type_errors(base64.b32encode) def test_b32decode(self): eq = self.assertEqual @@ -222,7 +276,8 @@ class BaseXYTestCase(unittest.TestCase): eq(base64.b32decode(data), res) eq(base64.b32decode(data.decode('ascii')), res) # Non-bytes - eq(base64.b32decode(bytearray(b'MFRGG===')), b'abc') + self.check_other_types(base64.b32decode, b'MFRGG===', b"abc") + self.check_decode_type_errors(base64.b32decode) def test_b32decode_casefold(self): eq = self.assertEqual @@ -277,8 +332,9 @@ class BaseXYTestCase(unittest.TestCase): eq(base64.b16encode(b'\x01\x02\xab\xcd\xef'), b'0102ABCDEF') eq(base64.b16encode(b'\x00'), b'00') # Non-bytes - eq(base64.b16encode(bytearray(b'\x01\x02\xab\xcd\xef')), b'0102ABCDEF') - self.assertRaises(TypeError, base64.b16encode, "") + self.check_other_types(base64.b16encode, b'\x01\x02\xab\xcd\xef', + b'0102ABCDEF') + self.check_encode_type_errors(base64.b16encode) def test_b16decode(self): eq = self.assertEqual @@ -293,7 +349,15 @@ class BaseXYTestCase(unittest.TestCase): eq(base64.b16decode(b'0102abcdef', True), b'\x01\x02\xab\xcd\xef') eq(base64.b16decode('0102abcdef', True), b'\x01\x02\xab\xcd\xef') # Non-bytes - eq(base64.b16decode(bytearray(b"0102ABCDEF")), b'\x01\x02\xab\xcd\xef') + self.check_other_types(base64.b16decode, b"0102ABCDEF", + b'\x01\x02\xab\xcd\xef') + self.check_decode_type_errors(base64.b16decode) + eq(base64.b16decode(bytearray(b"0102abcdef"), True), + b'\x01\x02\xab\xcd\xef') + eq(base64.b16decode(memoryview(b"0102abcdef"), True), + b'\x01\x02\xab\xcd\xef') + eq(base64.b16decode(array('B', b"0102abcdef"), True), + b'\x01\x02\xab\xcd\xef') def test_decode_nonascii_str(self): decode_funcs = (base64.b64decode, diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py index 2f3cf4d..99d928d 100644 --- a/Lib/test/test_codecs.py +++ b/Lib/test/test_codecs.py @@ -2285,6 +2285,24 @@ class TransformCodecTest(unittest.TestCase): sout = reader.readline() self.assertEqual(sout, b"\x80") + def test_buffer_api_usage(self): + # We check all the transform codecs accept memoryview input + # for encoding and decoding + # and also that they roundtrip correctly + original = b"12345\x80" + for encoding in bytes_transform_encodings: + data = original + view = memoryview(data) + data = codecs.encode(data, encoding) + view_encoded = codecs.encode(view, encoding) + self.assertEqual(view_encoded, data) + view = memoryview(data) + data = codecs.decode(data, encoding) + self.assertEqual(data, original) + view_decoded = codecs.decode(view, encoding) + self.assertEqual(view_decoded, data) + + @unittest.skipUnless(sys.platform == 'win32', 'code pages are specific to Windows') @@ -20,6 +20,10 @@ Core and Builtins Library ------- +- Issue #17839: base64.decodebytes and base64.encodebytes now accept any + object that exports a 1 dimensional array of bytes (this means the same + is now also true for base64_codec) + - Issue #19132: The pprint module now supports compact mode. - Issue #19137: The pprint module now correctly formats instances of set and |