From c3f55be7dd012b7e92901627d0b31c21e983ccb4 Mon Sep 17 00:00:00 2001 From: Tal Einat Date: Tue, 12 Jun 2018 15:46:22 +0300 Subject: bpo-27397: Make email module properly handle invalid-length base64 strings (#7583) When attempting to base64-decode a payload of invalid length (1 mod 4), properly recognize and handle it. The given data will be returned as-is, i.e. not decoded, along with a new defect, InvalidBase64LengthDefect. --- Doc/library/email.errors.rst | 4 ++ Lib/email/_encoded_words.py | 48 ++++++++++++++-------- Lib/email/errors.py | 3 ++ Lib/test/test_email/test__encoded_words.py | 6 +++ Lib/test/test_email/test__header_value_parser.py | 9 ++++ Lib/test/test_email/test_defect_handling.py | 17 ++++++++ .../2018-06-10-09-43-54.bpo-27397.0_fFQR.rst | 1 + 7 files changed, 70 insertions(+), 18 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2018-06-10-09-43-54.bpo-27397.0_fFQR.rst diff --git a/Doc/library/email.errors.rst b/Doc/library/email.errors.rst index 5838767..511ad16 100644 --- a/Doc/library/email.errors.rst +++ b/Doc/library/email.errors.rst @@ -108,3 +108,7 @@ All defect classes are subclassed from :class:`email.errors.MessageDefect`. * :class:`InvalidBase64CharactersDefect` -- When decoding a block of base64 encoded bytes, characters outside the base64 alphabet were encountered. The characters are ignored, but the resulting decoded bytes may be invalid. + +* :class:`InvalidBase64LengthDefect` -- When decoding a block of base64 encoded + bytes, the number of non-padding base64 characters was invalid (1 more than + a multiple of 4). The encoded block was kept as-is. diff --git a/Lib/email/_encoded_words.py b/Lib/email/_encoded_words.py index c40ffa9..295ae7e 100644 --- a/Lib/email/_encoded_words.py +++ b/Lib/email/_encoded_words.py @@ -98,30 +98,42 @@ def len_q(bstring): # def decode_b(encoded): - defects = [] + # First try encoding with validate=True, fixing the padding if needed. + # This will succeed only if encoded includes no invalid characters. pad_err = len(encoded) % 4 - if pad_err: - defects.append(errors.InvalidBase64PaddingDefect()) - padded_encoded = encoded + b'==='[:4-pad_err] - else: - padded_encoded = encoded + missing_padding = b'==='[:4-pad_err] if pad_err else b'' try: - return base64.b64decode(padded_encoded, validate=True), defects + return ( + base64.b64decode(encoded + missing_padding, validate=True), + [errors.InvalidBase64PaddingDefect()] if pad_err else [], + ) except binascii.Error: - # Since we had correct padding, this must an invalid char error. - defects = [errors.InvalidBase64CharactersDefect()] + # Since we had correct padding, this is likely an invalid char error. + # # The non-alphabet characters are ignored as far as padding - # goes, but we don't know how many there are. So we'll just - # try various padding lengths until something works. - for i in 0, 1, 2, 3: + # goes, but we don't know how many there are. So try without adding + # padding to see if it works. + try: + return ( + base64.b64decode(encoded, validate=False), + [errors.InvalidBase64CharactersDefect()], + ) + except binascii.Error: + # Add as much padding as could possibly be necessary (extra padding + # is ignored). try: - return base64.b64decode(encoded+b'='*i, validate=False), defects + return ( + base64.b64decode(encoded + b'==', validate=False), + [errors.InvalidBase64CharactersDefect(), + errors.InvalidBase64PaddingDefect()], + ) except binascii.Error: - if i==0: - defects.append(errors.InvalidBase64PaddingDefect()) - else: - # This should never happen. - raise AssertionError("unexpected binascii.Error") + # This only happens when the encoded string's length is 1 more + # than a multiple of 4, which is invalid. + # + # bpo-27397: Just return the encoded string since there's no + # way to decode. + return encoded, [errors.InvalidBase64LengthDefect()] def encode_b(bstring): return base64.b64encode(bstring).decode('ascii') diff --git a/Lib/email/errors.py b/Lib/email/errors.py index 791239f..d28a680 100644 --- a/Lib/email/errors.py +++ b/Lib/email/errors.py @@ -73,6 +73,9 @@ class InvalidBase64PaddingDefect(MessageDefect): class InvalidBase64CharactersDefect(MessageDefect): """base64 encoded sequence had characters not in base64 alphabet""" +class InvalidBase64LengthDefect(MessageDefect): + """base64 encoded sequence had invalid length (1 mod 4)""" + # These errors are specific to header parsing. class HeaderDefect(MessageDefect): diff --git a/Lib/test/test_email/test__encoded_words.py b/Lib/test/test_email/test__encoded_words.py index 900e1d0..5a59aeb 100644 --- a/Lib/test/test_email/test__encoded_words.py +++ b/Lib/test/test_email/test__encoded_words.py @@ -33,7 +33,10 @@ class TestDecodeB(TestEmailBase): self._test(b'Zm9v', b'foo') def test_missing_padding(self): + # 1 missing padding character self._test(b'dmk', b'vi', [errors.InvalidBase64PaddingDefect]) + # 2 missing padding characters + self._test(b'dg', b'v', [errors.InvalidBase64PaddingDefect]) def test_invalid_character(self): self._test(b'dm\x01k===', b'vi', [errors.InvalidBase64CharactersDefect]) @@ -42,6 +45,9 @@ class TestDecodeB(TestEmailBase): self._test(b'dm\x01k', b'vi', [errors.InvalidBase64CharactersDefect, errors.InvalidBase64PaddingDefect]) + def test_invalid_length(self): + self._test(b'abcde', b'abcde', [errors.InvalidBase64LengthDefect]) + class TestDecode(TestEmailBase): diff --git a/Lib/test/test_email/test__header_value_parser.py b/Lib/test/test_email/test__header_value_parser.py index 5cdc4bc..5036de2 100644 --- a/Lib/test/test_email/test__header_value_parser.py +++ b/Lib/test/test_email/test__header_value_parser.py @@ -347,6 +347,15 @@ class TestParser(TestParserMixin, TestEmailBase): errors.InvalidBase64PaddingDefect], '') + def test_get_unstructured_invalid_base64_length(self): + # bpo-27397: Return the encoded string since there's no way to decode. + self._test_get_x(self._get_unst, + '=?utf-8?b?abcde?=', + 'abcde', + 'abcde', + [errors.InvalidBase64LengthDefect], + '') + def test_get_unstructured_no_whitespace_between_ews(self): self._test_get_x(self._get_unst, '=?utf-8?q?foo?==?utf-8?q?bar?=', diff --git a/Lib/test/test_email/test_defect_handling.py b/Lib/test/test_email/test_defect_handling.py index f36b907..781f657 100644 --- a/Lib/test/test_email/test_defect_handling.py +++ b/Lib/test/test_email/test_defect_handling.py @@ -254,6 +254,23 @@ class TestDefectsBase: self.assertDefectsEqual(self.get_defects(msg), [errors.InvalidBase64CharactersDefect]) + def test_invalid_length_of_base64_payload(self): + source = textwrap.dedent("""\ + Subject: test + MIME-Version: 1.0 + Content-Type: text/plain; charset="utf-8" + Content-Transfer-Encoding: base64 + + abcde + """) + msg = self._str_msg(source) + with self._raise_point(errors.InvalidBase64LengthDefect): + payload = msg.get_payload(decode=True) + if self.raise_expected: return + self.assertEqual(payload, b'abcde') + self.assertDefectsEqual(self.get_defects(msg), + [errors.InvalidBase64LengthDefect]) + def test_missing_ending_boundary(self): source = textwrap.dedent("""\ To: 1@harrydomain4.com diff --git a/Misc/NEWS.d/next/Library/2018-06-10-09-43-54.bpo-27397.0_fFQR.rst b/Misc/NEWS.d/next/Library/2018-06-10-09-43-54.bpo-27397.0_fFQR.rst new file mode 100644 index 0000000..1098172 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2018-06-10-09-43-54.bpo-27397.0_fFQR.rst @@ -0,0 +1 @@ +Make email module properly handle invalid-length base64 strings. -- cgit v0.12