diff options
author | Tal Einat <taleinat+github@gmail.com> | 2018-06-12 12:46:22 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-06-12 12:46:22 (GMT) |
commit | c3f55be7dd012b7e92901627d0b31c21e983ccb4 (patch) | |
tree | 86fb2e7e33615aee453d73f27063a4ec345e89fa /Lib/email | |
parent | 5a9820918077a65db90f24733edc8935c3e2130e (diff) | |
download | cpython-c3f55be7dd012b7e92901627d0b31c21e983ccb4.zip cpython-c3f55be7dd012b7e92901627d0b31c21e983ccb4.tar.gz cpython-c3f55be7dd012b7e92901627d0b31c21e983ccb4.tar.bz2 |
bpo-27397: Make email module properly handle invalid-length base64 strings (#7583)
When attempting to base64-decode a payload of invalid length (1 mod 4),
properly recognize and handle it. The given data will be returned as-is,
i.e. not decoded, along with a new defect, InvalidBase64LengthDefect.
Diffstat (limited to 'Lib/email')
-rw-r--r-- | Lib/email/_encoded_words.py | 48 | ||||
-rw-r--r-- | Lib/email/errors.py | 3 |
2 files changed, 33 insertions, 18 deletions
diff --git a/Lib/email/_encoded_words.py b/Lib/email/_encoded_words.py index c40ffa9..295ae7e 100644 --- a/Lib/email/_encoded_words.py +++ b/Lib/email/_encoded_words.py @@ -98,30 +98,42 @@ def len_q(bstring): # def decode_b(encoded): - defects = [] + # First try encoding with validate=True, fixing the padding if needed. + # This will succeed only if encoded includes no invalid characters. pad_err = len(encoded) % 4 - if pad_err: - defects.append(errors.InvalidBase64PaddingDefect()) - padded_encoded = encoded + b'==='[:4-pad_err] - else: - padded_encoded = encoded + missing_padding = b'==='[:4-pad_err] if pad_err else b'' try: - return base64.b64decode(padded_encoded, validate=True), defects + return ( + base64.b64decode(encoded + missing_padding, validate=True), + [errors.InvalidBase64PaddingDefect()] if pad_err else [], + ) except binascii.Error: - # Since we had correct padding, this must an invalid char error. - defects = [errors.InvalidBase64CharactersDefect()] + # Since we had correct padding, this is likely an invalid char error. + # # The non-alphabet characters are ignored as far as padding - # goes, but we don't know how many there are. So we'll just - # try various padding lengths until something works. - for i in 0, 1, 2, 3: + # goes, but we don't know how many there are. So try without adding + # padding to see if it works. + try: + return ( + base64.b64decode(encoded, validate=False), + [errors.InvalidBase64CharactersDefect()], + ) + except binascii.Error: + # Add as much padding as could possibly be necessary (extra padding + # is ignored). try: - return base64.b64decode(encoded+b'='*i, validate=False), defects + return ( + base64.b64decode(encoded + b'==', validate=False), + [errors.InvalidBase64CharactersDefect(), + errors.InvalidBase64PaddingDefect()], + ) except binascii.Error: - if i==0: - defects.append(errors.InvalidBase64PaddingDefect()) - else: - # This should never happen. - raise AssertionError("unexpected binascii.Error") + # This only happens when the encoded string's length is 1 more + # than a multiple of 4, which is invalid. + # + # bpo-27397: Just return the encoded string since there's no + # way to decode. + return encoded, [errors.InvalidBase64LengthDefect()] def encode_b(bstring): return base64.b64encode(bstring).decode('ascii') diff --git a/Lib/email/errors.py b/Lib/email/errors.py index 791239f..d28a680 100644 --- a/Lib/email/errors.py +++ b/Lib/email/errors.py @@ -73,6 +73,9 @@ class InvalidBase64PaddingDefect(MessageDefect): class InvalidBase64CharactersDefect(MessageDefect): """base64 encoded sequence had characters not in base64 alphabet""" +class InvalidBase64LengthDefect(MessageDefect): + """base64 encoded sequence had invalid length (1 mod 4)""" + # These errors are specific to header parsing. class HeaderDefect(MessageDefect): |