summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Doc/library/base64.rst29
-rwxr-xr-xLib/base64.py122
-rw-r--r--Lib/test/test_base64.py20
3 files changed, 92 insertions, 79 deletions
diff --git a/Doc/library/base64.rst b/Doc/library/base64.rst
index 3f0161f..6e88a76 100644
--- a/Doc/library/base64.rst
+++ b/Doc/library/base64.rst
@@ -24,8 +24,8 @@ POST request. The encoding algorithm is not the same as the
There are two interfaces provided by this module. The modern interface
supports encoding :term:`bytes-like objects <bytes-like object>` to ASCII
:class:`bytes`, and decoding :term:`bytes-like objects <bytes-like object>` or
-strings containing ASCII to :class:`bytes`. All three :rfc:`3548` defined
-alphabets (normal, URL-safe, and filesystem-safe) are supported.
+strings containing ASCII to :class:`bytes`. Both base-64 alphabets
+defined in :rfc:`3548` (normal, and URL- and filesystem-safe) are supported.
The legacy interface does not support decoding from strings, but it does
provide functions for encoding and decoding to and from :term:`file objects
@@ -69,9 +69,10 @@ The modern interface provides:
A :exc:`binascii.Error` exception is raised
if *s* is incorrectly padded.
- If *validate* is ``False`` (the default), non-base64-alphabet characters are
+ If *validate* is ``False`` (the default), characters that are neither
+ in the normal base-64 alphabet nor the alternative alphabet are
discarded prior to the padding check. If *validate* is ``True``,
- non-base64-alphabet characters in the input result in a
+ these non-alphabet characters in the input result in a
:exc:`binascii.Error`.
@@ -89,7 +90,8 @@ The modern interface provides:
.. function:: urlsafe_b64encode(s)
- Encode :term:`bytes-like object` *s* using a URL-safe alphabet, which
+ Encode :term:`bytes-like object` *s* using the
+ URL- and filesystem-safe alphabet, which
substitutes ``-`` instead of ``+`` and ``_`` instead of ``/`` in the
standard Base64 alphabet, and return the encoded :class:`bytes`. The result
can still contain ``=``.
@@ -97,7 +99,8 @@ The modern interface provides:
.. function:: urlsafe_b64decode(s)
- Decode :term:`bytes-like object` or ASCII string *s* using a URL-safe
+ Decode :term:`bytes-like object` or ASCII string *s*
+ using the URL- and filesystem-safe
alphabet, which substitutes ``-`` instead of ``+`` and ``_`` instead of
``/`` in the standard Base64 alphabet, and return the decoded
:class:`bytes`.
@@ -145,14 +148,14 @@ The modern interface provides:
lowercase alphabet is acceptable as input. For security purposes, the default
is ``False``.
- A :exc:`TypeError` is raised if *s* is
+ A :exc:`binascii.Error` is raised if *s* is
incorrectly padded or if there are non-alphabet characters present in the
input.
-.. function:: a85encode(s, *, foldspaces=False, wrapcol=0, pad=False, adobe=False)
+.. function:: a85encode(b, *, foldspaces=False, wrapcol=0, pad=False, adobe=False)
- Encode the :term:`bytes-like object` *s* using Ascii85 and return the
+ Encode the :term:`bytes-like object` *b* using Ascii85 and return the
encoded :class:`bytes`.
*foldspaces* is an optional flag that uses the special short sequence 'y'
@@ -172,9 +175,9 @@ The modern interface provides:
.. versionadded:: 3.4
-.. function:: a85decode(s, *, foldspaces=False, adobe=False, ignorechars=b' \\t\\n\\r\\v')
+.. function:: a85decode(b, *, foldspaces=False, adobe=False, ignorechars=b' \\t\\n\\r\\v')
- Decode the Ascii85 encoded :term:`bytes-like object` or ASCII string *s* and
+ Decode the Ascii85 encoded :term:`bytes-like object` or ASCII string *b* and
return the decoded :class:`bytes`.
*foldspaces* is a flag that specifies whether the 'y' short sequence
@@ -192,9 +195,9 @@ The modern interface provides:
.. versionadded:: 3.4
-.. function:: b85encode(s, pad=False)
+.. function:: b85encode(b, pad=False)
- Encode the :term:`bytes-like object` *s* using base85 (as used in e.g.
+ Encode the :term:`bytes-like object` *b* using base85 (as used in e.g.
git-style binary diffs) and return the encoded :class:`bytes`.
If *pad* is true, the input is padded with ``b'\0'`` so its length is a
diff --git a/Lib/base64.py b/Lib/base64.py
index 640f787..e2c597b 100755
--- a/Lib/base64.py
+++ b/Lib/base64.py
@@ -12,7 +12,7 @@ import binascii
__all__ = [
- # Legacy interface exports traditional RFC 1521 Base64 encodings
+ # Legacy interface exports traditional RFC 2045 Base64 encodings
'encode', 'decode', 'encodebytes', 'decodebytes',
# Generalized interface for other encodings
'b64encode', 'b64decode', 'b32encode', 'b32decode',
@@ -49,14 +49,11 @@ def _bytes_from_decode_data(s):
# Base64 encoding/decoding uses binascii
def b64encode(s, altchars=None):
- """Encode a byte string using Base64.
+ """Encode the bytes-like object s using Base64 and return a bytes object.
- s is the byte string to encode. Optional altchars must be a byte
- string of length 2 which specifies an alternative alphabet for the
- '+' and '/' characters. This allows an application to
- e.g. generate url or filesystem safe Base64 strings.
-
- The encoded byte string is returned.
+ Optional altchars should be a byte string of length 2 which specifies an
+ alternative alphabet for the '+' and '/' characters. This allows an
+ application to e.g. generate url or filesystem safe Base64 strings.
"""
# Strip off the trailing newline
encoded = binascii.b2a_base64(s)[:-1]
@@ -67,18 +64,19 @@ def b64encode(s, altchars=None):
def b64decode(s, altchars=None, validate=False):
- """Decode a Base64 encoded byte string.
+ """Decode the Base64 encoded bytes-like object or ASCII string s.
- s is the byte string to decode. Optional altchars must be a
- string of length 2 which specifies the alternative alphabet used
- instead of the '+' and '/' characters.
+ Optional altchars must be a bytes-like object or ASCII string of length 2
+ which specifies the alternative alphabet used instead of the '+' and '/'
+ characters.
- The decoded string is returned. A binascii.Error is raised if s is
- incorrectly padded.
+ The result is returned as a bytes object. A binascii.Error is raised if
+ s is incorrectly padded.
- If validate is False (the default), non-base64-alphabet characters are
- discarded prior to the padding check. If validate is True,
- non-base64-alphabet characters in the input result in a binascii.Error.
+ If validate is False (the default), characters that are neither in the
+ normal base-64 alphabet nor the alternative alphabet are discarded prior
+ to the padding check. If validate is True, these non-alphabet characters
+ in the input result in a binascii.Error.
"""
s = _bytes_from_decode_data(s)
if altchars is not None:
@@ -91,19 +89,19 @@ def b64decode(s, altchars=None, validate=False):
def standard_b64encode(s):
- """Encode a byte string using the standard Base64 alphabet.
+ """Encode bytes-like object s using the standard Base64 alphabet.
- s is the byte string to encode. The encoded byte string is returned.
+ The result is returned as a bytes object.
"""
return b64encode(s)
def standard_b64decode(s):
- """Decode a byte string encoded with the standard Base64 alphabet.
+ """Decode bytes encoded with the standard Base64 alphabet.
- s is the byte string to decode. The decoded byte string is
- returned. binascii.Error is raised if the input is incorrectly
- padded or if there are non-alphabet characters present in the
- input.
+ Argument s is a bytes-like object or ASCII string to decode. The result
+ is returned as a bytes object. A binascii.Error is raised if the input
+ is incorrectly padded. Characters that are not in the standard alphabet
+ are discarded prior to the padding check.
"""
return b64decode(s)
@@ -112,21 +110,22 @@ _urlsafe_encode_translation = bytes.maketrans(b'+/', b'-_')
_urlsafe_decode_translation = bytes.maketrans(b'-_', b'+/')
def urlsafe_b64encode(s):
- """Encode a byte string using a url-safe Base64 alphabet.
+ """Encode bytes using the URL- and filesystem-safe Base64 alphabet.
- s is the byte string to encode. The encoded byte string is
- returned. The alphabet uses '-' instead of '+' and '_' instead of
+ Argument s is a bytes-like object to encode. The result is returned as a
+ bytes object. The alphabet uses '-' instead of '+' and '_' instead of
'/'.
"""
return b64encode(s).translate(_urlsafe_encode_translation)
def urlsafe_b64decode(s):
- """Decode a byte string encoded with the standard Base64 alphabet.
+ """Decode bytes using the URL- and filesystem-safe Base64 alphabet.
- s is the byte string to decode. The decoded byte string is
- returned. binascii.Error is raised if the input is incorrectly
- padded or if there are non-alphabet characters present in the
- input.
+ Argument s is a bytes-like object or ASCII string to decode. The result
+ is returned as a bytes object. A binascii.Error is raised if the input
+ is incorrectly padded. Characters that are not in the URL-safe base-64
+ alphabet, and are not a plus '+' or slash '/', are discarded prior to the
+ padding check.
The alphabet uses '-' instead of '+' and '_' instead of '/'.
"""
@@ -142,9 +141,7 @@ _b32tab2 = None
_b32rev = None
def b32encode(s):
- """Encode a byte string using Base32.
-
- s is the byte string to encode. The encoded byte string is returned.
+ """Encode the bytes-like object s using Base32 and return a bytes object.
"""
global _b32tab2
# Delay the initialization of the table to not waste memory
@@ -182,11 +179,10 @@ def b32encode(s):
return bytes(encoded)
def b32decode(s, casefold=False, map01=None):
- """Decode a Base32 encoded byte string.
+ """Decode the Base32 encoded bytes-like object or ASCII string s.
- s is the byte string to decode. Optional casefold is a flag
- specifying whether a lowercase alphabet is acceptable as input.
- For security purposes, the default is False.
+ Optional casefold is a flag specifying whether a lowercase alphabet is
+ acceptable as input. For security purposes, the default is False.
RFC 3548 allows for optional mapping of the digit 0 (zero) to the
letter O (oh), and for optional mapping of the digit 1 (one) to
@@ -196,7 +192,7 @@ def b32decode(s, casefold=False, map01=None):
the letter O). For security purposes the default is None, so that
0 and 1 are not allowed in the input.
- The decoded byte string is returned. binascii.Error is raised if
+ The result is returned as a bytes object. A binascii.Error is raised if
the input is incorrectly padded or if there are non-alphabet
characters present in the input.
"""
@@ -257,23 +253,20 @@ def b32decode(s, casefold=False, map01=None):
# lowercase. The RFC also recommends against accepting input case
# insensitively.
def b16encode(s):
- """Encode a byte string using Base16.
-
- s is the byte string to encode. The encoded byte string is returned.
+ """Encode the bytes-like object s using Base16 and return a bytes object.
"""
return binascii.hexlify(s).upper()
def b16decode(s, casefold=False):
- """Decode a Base16 encoded byte string.
+ """Decode the Base16 encoded bytes-like object or ASCII string s.
- s is the byte string to decode. Optional casefold is a flag
- specifying whether a lowercase alphabet is acceptable as input.
- For security purposes, the default is False.
+ Optional casefold is a flag specifying whether a lowercase alphabet is
+ acceptable as input. For security purposes, the default is False.
- The decoded byte string is returned. binascii.Error is raised if
- s were incorrectly padded or if there are non-alphabet characters
- present in the string.
+ The result is returned as a bytes object. A binascii.Error is raised if
+ s is incorrectly padded or if there are non-alphabet characters present
+ in the input.
"""
s = _bytes_from_decode_data(s)
if casefold:
@@ -316,19 +309,17 @@ def _85encode(b, chars, chars2, pad=False, foldnuls=False, foldspaces=False):
return b''.join(chunks)
def a85encode(b, *, foldspaces=False, wrapcol=0, pad=False, adobe=False):
- """Encode a byte string using Ascii85.
-
- b is the byte string to encode. The encoded byte string is returned.
+ """Encode bytes-like object b using Ascii85 and return a bytes object.
foldspaces is an optional flag that uses the special short sequence 'y'
instead of 4 consecutive spaces (ASCII 0x20) as supported by 'btoa'. This
feature is not supported by the "standard" Adobe encoding.
- wrapcol controls whether the output should have newline ('\\n') characters
+ wrapcol controls whether the output should have newline (b'\\n') characters
added to it. If this is non-zero, each output line will be at most this
many characters long.
- pad controls whether the input string is padded to a multiple of 4 before
+ pad controls whether the input is padded to a multiple of 4 before
encoding. Note that the btoa implementation always pads.
adobe controls whether the encoded byte sequence is framed with <~ and ~>,
@@ -359,9 +350,7 @@ def a85encode(b, *, foldspaces=False, wrapcol=0, pad=False, adobe=False):
return result
def a85decode(b, *, foldspaces=False, adobe=False, ignorechars=b' \t\n\r\v'):
- """Decode an Ascii85 encoded byte string.
-
- s is the byte string to decode.
+ """Decode the Ascii85 encoded bytes-like object or ASCII string b.
foldspaces is a flag that specifies whether the 'y' short sequence should be
accepted as shorthand for 4 consecutive spaces (ASCII 0x20). This feature is
@@ -373,6 +362,8 @@ def a85decode(b, *, foldspaces=False, adobe=False, ignorechars=b' \t\n\r\v'):
ignorechars should be a byte string containing characters to ignore from the
input. This should only contain whitespace characters, and by default
contains all whitespace characters in ASCII.
+
+ The result is returned as a bytes object.
"""
b = _bytes_from_decode_data(b)
if adobe:
@@ -432,10 +423,10 @@ _b85chars2 = None
_b85dec = None
def b85encode(b, pad=False):
- """Encode an ASCII-encoded byte array in base85 format.
+ """Encode bytes-like object b in base85 format and return a bytes object.
- If pad is true, the input is padded with "\\0" so its length is a multiple of
- 4 characters before encoding.
+ If pad is true, the input is padded with b'\\0' so its length is a multiple of
+ 4 bytes before encoding.
"""
global _b85chars, _b85chars2
# Delay the initialization of tables to not waste memory
@@ -446,7 +437,10 @@ def b85encode(b, pad=False):
return _85encode(b, _b85chars, _b85chars2, pad)
def b85decode(b):
- """Decode base85-encoded byte array"""
+ """Decode the base85-encoded bytes-like object or ASCII string b
+
+ The result is returned as a bytes object.
+ """
global _b85dec
# Delay the initialization of tables to not waste memory
# if the function is never called
@@ -531,7 +525,7 @@ def _input_type_check(s):
def encodebytes(s):
- """Encode a bytestring into a bytestring containing multiple lines
+ """Encode a bytestring into a bytes object containing multiple lines
of base-64 data."""
_input_type_check(s)
pieces = []
@@ -549,7 +543,7 @@ def encodestring(s):
def decodebytes(s):
- """Decode a bytestring of base-64 data into a bytestring."""
+ """Decode a bytestring of base-64 data into a bytes object."""
_input_type_check(s)
return binascii.a2b_base64(s)
diff --git a/Lib/test/test_base64.py b/Lib/test/test_base64.py
index a0f548d..9b853a8 100644
--- a/Lib/test/test_base64.py
+++ b/Lib/test/test_base64.py
@@ -243,14 +243,26 @@ class BaseXYTestCase(unittest.TestCase):
(b'@@', b''),
(b'!', b''),
(b'YWJj\nYWI=', b'abcab'))
+ funcs = (
+ base64.b64decode,
+ base64.standard_b64decode,
+ base64.urlsafe_b64decode,
+ )
for bstr, res in tests:
- self.assertEqual(base64.b64decode(bstr), res)
- self.assertEqual(base64.b64decode(bstr.decode('ascii')), res)
+ for func in funcs:
+ with self.subTest(bstr=bstr, func=func):
+ self.assertEqual(func(bstr), res)
+ self.assertEqual(func(bstr.decode('ascii')), res)
with self.assertRaises(binascii.Error):
base64.b64decode(bstr, validate=True)
with self.assertRaises(binascii.Error):
base64.b64decode(bstr.decode('ascii'), validate=True)
+ # Normal alphabet characters not discarded when alternative given
+ res = b'\xFB\xEF\xBE\xFF\xFF\xFF'
+ self.assertEqual(base64.b64decode(b'++[[//]]', b'[]'), res)
+ self.assertEqual(base64.urlsafe_b64decode(b'++--//__'), res)
+
def test_b32encode(self):
eq = self.assertEqual
eq(base64.b32encode(b''), b'')
@@ -360,6 +372,10 @@ class BaseXYTestCase(unittest.TestCase):
b'\x01\x02\xab\xcd\xef')
eq(base64.b16decode(array('B', b"0102abcdef"), True),
b'\x01\x02\xab\xcd\xef')
+ # Non-alphabet characters
+ self.assertRaises(binascii.Error, base64.b16decode, '0102AG')
+ # Incorrect "padding"
+ self.assertRaises(binascii.Error, base64.b16decode, '010')
def test_a85encode(self):
eq = self.assertEqual