diff options
author | Martin Panter <vadmium+py@gmail.com> | 2016-02-23 22:30:50 (GMT) |
---|---|---|
committer | Martin Panter <vadmium+py@gmail.com> | 2016-02-23 22:30:50 (GMT) |
commit | 39267c21cc8e5e7bc8facd1260eac212b9ebeefd (patch) | |
tree | 564cf5a6df7e0172901e1b5f0b8ca4368e10c67d | |
parent | 3058eb418a3374ddb9ae8b7f29f4ebd072837dd7 (diff) | |
download | cpython-39267c21cc8e5e7bc8facd1260eac212b9ebeefd.zip cpython-39267c21cc8e5e7bc8facd1260eac212b9ebeefd.tar.gz cpython-39267c21cc8e5e7bc8facd1260eac212b9ebeefd.tar.bz2 |
Issue #22088: Clarify base-64 alphabets and which characters are discarded
* There are only two base-64 alphabets defined by the RFCs, not three
* Due to the internal translation, plus (+) and slash (/) are never discarded
* standard_ and urlsafe_b64decode() discard characters as well
-rw-r--r-- | Doc/library/base64.rst | 15 | ||||
-rwxr-xr-x | Lib/base64.py | 29 | ||||
-rw-r--r-- | Lib/test/test_base64.py | 11 |
3 files changed, 36 insertions, 19 deletions
diff --git a/Doc/library/base64.rst b/Doc/library/base64.rst index e346efb..7d1a6e0 100644 --- a/Doc/library/base64.rst +++ b/Doc/library/base64.rst @@ -16,8 +16,8 @@ email, used as parts of URLs, or included as part of an HTTP POST request. The encoding algorithm is not the same as the :program:`uuencode` program. There are two interfaces provided by this module. The modern interface supports -encoding and decoding string objects using all three :rfc:`3548` defined -alphabets (normal, URL-safe, and filesystem-safe). The legacy +encoding and decoding string objects using both base-64 alphabets defined +in :rfc:`3548` (normal, and URL- and filesystem-safe). The legacy interface provides for encoding and decoding to and from file-like objects as well as strings, but only using the Base64 standard alphabet. @@ -26,7 +26,7 @@ The modern interface, which was introduced in Python 2.4, provides: .. function:: b64encode(s[, altchars]) - Encode a string use Base64. + Encode a string using Base64. *s* is the string to encode. Optional *altchars* must be a string of at least length 2 (additional characters are ignored) which specifies an alternative @@ -46,7 +46,8 @@ The modern interface, which was introduced in Python 2.4, provides: alphabet used instead of the ``+`` and ``/`` characters. The decoded string is returned. A :exc:`TypeError` is raised if *s* is - incorrectly padded. Non-base64-alphabet characters are + incorrectly padded. Characters that are neither + in the normal base-64 alphabet nor the alternative alphabet are discarded prior to the padding check. @@ -62,14 +63,16 @@ The modern interface, which was introduced in Python 2.4, provides: .. function:: urlsafe_b64encode(s) - Encode string *s* using a URL-safe alphabet, which substitutes ``-`` instead of + Encode string *s* using the URL- and filesystem-safe + alphabet, which substitutes ``-`` instead of ``+`` and ``_`` instead of ``/`` in the standard Base64 alphabet. The result can still contain ``=``. .. function:: urlsafe_b64decode(s) - Decode string *s* using a URL-safe alphabet, which substitutes ``-`` instead of + Decode string *s* using the URL- and filesystem-safe + alphabet, which substitutes ``-`` instead of ``+`` and ``_`` instead of ``/`` in the standard Base64 alphabet. diff --git a/Lib/base64.py b/Lib/base64.py index 844907f..38bc61e 100755 --- a/Lib/base64.py +++ b/Lib/base64.py @@ -65,8 +65,9 @@ def b64decode(s, altchars=None): alternative alphabet used instead of the '+' and '/' characters. The decoded string is returned. A TypeError is raised if s is - incorrectly padded. Non-base64-alphabet characters are discarded prior - to the padding check. + incorrectly padded. Characters that are neither in the normal base-64 + alphabet nor the alternative alphabet are discarded prior to the padding + check. """ if altchars is not None: s = s.translate(string.maketrans(altchars[:2], '+/')) @@ -87,9 +88,10 @@ def standard_b64encode(s): def standard_b64decode(s): """Decode a string encoded with the standard Base64 alphabet. - s is the string to decode. The decoded string is returned. A TypeError - is raised if the string is incorrectly padded or if there are non-alphabet - characters present in the string. + Argument s is the string to decode. The decoded string is returned. A + TypeError is raised if the string is incorrectly padded. Characters that + are not in the standard alphabet are discarded prior to the padding + check. """ return b64decode(s) @@ -97,19 +99,20 @@ _urlsafe_encode_translation = string.maketrans(b'+/', b'-_') _urlsafe_decode_translation = string.maketrans(b'-_', b'+/') def urlsafe_b64encode(s): - """Encode a string using a url-safe Base64 alphabet. + """Encode a string using the URL- and filesystem-safe Base64 alphabet. - s is the string to encode. The encoded string is returned. The alphabet - uses '-' instead of '+' and '_' instead of '/'. + Argument s is the string to encode. The encoded string is returned. The + alphabet uses '-' instead of '+' and '_' instead of '/'. """ return b64encode(s).translate(_urlsafe_encode_translation) def urlsafe_b64decode(s): - """Decode a string encoded with the standard Base64 alphabet. + """Decode a string using the URL- and filesystem-safe Base64 alphabet. - s is the string to decode. The decoded string is returned. A TypeError - is raised if the string is incorrectly padded or if there are non-alphabet - characters present in the string. + Argument s is the string to decode. The decoded string is returned. A + TypeError is raised if the string is incorrectly padded. Characters that + are not in the URL-safe base-64 alphabet, and are not a plus '+' or slash + '/', are discarded prior to the padding check. The alphabet uses '-' instead of '+' and '_' instead of '/'. """ @@ -267,7 +270,7 @@ def b16decode(s, casefold=False): a lowercase alphabet is acceptable as input. For security purposes, the default is False. - The decoded string is returned. A TypeError is raised if s were + The decoded string is returned. A TypeError is raised if s is incorrectly padded or if there are non-alphabet characters present in the string. """ diff --git a/Lib/test/test_base64.py b/Lib/test/test_base64.py index 5dd283b..6e67dc0 100644 --- a/Lib/test/test_base64.py +++ b/Lib/test/test_base64.py @@ -153,6 +153,13 @@ class BaseXYTestCase(unittest.TestCase): (b'YWJj\nYWI=', b'abcab')) for bstr, res in tests: self.assertEqual(base64.b64decode(bstr), res) + self.assertEqual(base64.standard_b64decode(bstr), res) + self.assertEqual(base64.urlsafe_b64decode(bstr), res) + + # Normal alphabet characters not discarded when alternative given + res = b'\xFB\xEF\xBE\xFF\xFF\xFF' + self.assertEqual(base64.b64decode(b'++[[//]]', b'[]'), res) + self.assertEqual(base64.urlsafe_b64decode(b'++--//__'), res) def test_b32encode(self): eq = self.assertEqual @@ -220,6 +227,10 @@ class BaseXYTestCase(unittest.TestCase): eq(base64.b16decode('0102abcdef', True), '\x01\x02\xab\xcd\xef') # Non-bytes eq(base64.b16decode(bytearray("0102ABCDEF")), '\x01\x02\xab\xcd\xef') + # Non-alphabet characters + self.assertRaises(TypeError, base64.b16decode, '0102AG') + # Incorrect "padding" + self.assertRaises(TypeError, base64.b16decode, '010') |