summaryrefslogtreecommitdiffstats
path: root/Lib/email
diff options
context:
space:
mode:
authorTal Einat <taleinat+github@gmail.com>2018-06-12 12:46:22 (GMT)
committerGitHub <noreply@github.com>2018-06-12 12:46:22 (GMT)
commitc3f55be7dd012b7e92901627d0b31c21e983ccb4 (patch)
tree86fb2e7e33615aee453d73f27063a4ec345e89fa /Lib/email
parent5a9820918077a65db90f24733edc8935c3e2130e (diff)
downloadcpython-c3f55be7dd012b7e92901627d0b31c21e983ccb4.zip
cpython-c3f55be7dd012b7e92901627d0b31c21e983ccb4.tar.gz
cpython-c3f55be7dd012b7e92901627d0b31c21e983ccb4.tar.bz2
bpo-27397: Make email module properly handle invalid-length base64 strings (#7583)
When attempting to base64-decode a payload of invalid length (1 mod 4), properly recognize and handle it. The given data will be returned as-is, i.e. not decoded, along with a new defect, InvalidBase64LengthDefect.
Diffstat (limited to 'Lib/email')
-rw-r--r--Lib/email/_encoded_words.py48
-rw-r--r--Lib/email/errors.py3
2 files changed, 33 insertions, 18 deletions
diff --git a/Lib/email/_encoded_words.py b/Lib/email/_encoded_words.py
index c40ffa9..295ae7e 100644
--- a/Lib/email/_encoded_words.py
+++ b/Lib/email/_encoded_words.py
@@ -98,30 +98,42 @@ def len_q(bstring):
#
def decode_b(encoded):
- defects = []
+ # First try encoding with validate=True, fixing the padding if needed.
+ # This will succeed only if encoded includes no invalid characters.
pad_err = len(encoded) % 4
- if pad_err:
- defects.append(errors.InvalidBase64PaddingDefect())
- padded_encoded = encoded + b'==='[:4-pad_err]
- else:
- padded_encoded = encoded
+ missing_padding = b'==='[:4-pad_err] if pad_err else b''
try:
- return base64.b64decode(padded_encoded, validate=True), defects
+ return (
+ base64.b64decode(encoded + missing_padding, validate=True),
+ [errors.InvalidBase64PaddingDefect()] if pad_err else [],
+ )
except binascii.Error:
- # Since we had correct padding, this must an invalid char error.
- defects = [errors.InvalidBase64CharactersDefect()]
+ # Since we had correct padding, this is likely an invalid char error.
+ #
# The non-alphabet characters are ignored as far as padding
- # goes, but we don't know how many there are. So we'll just
- # try various padding lengths until something works.
- for i in 0, 1, 2, 3:
+ # goes, but we don't know how many there are. So try without adding
+ # padding to see if it works.
+ try:
+ return (
+ base64.b64decode(encoded, validate=False),
+ [errors.InvalidBase64CharactersDefect()],
+ )
+ except binascii.Error:
+ # Add as much padding as could possibly be necessary (extra padding
+ # is ignored).
try:
- return base64.b64decode(encoded+b'='*i, validate=False), defects
+ return (
+ base64.b64decode(encoded + b'==', validate=False),
+ [errors.InvalidBase64CharactersDefect(),
+ errors.InvalidBase64PaddingDefect()],
+ )
except binascii.Error:
- if i==0:
- defects.append(errors.InvalidBase64PaddingDefect())
- else:
- # This should never happen.
- raise AssertionError("unexpected binascii.Error")
+ # This only happens when the encoded string's length is 1 more
+ # than a multiple of 4, which is invalid.
+ #
+ # bpo-27397: Just return the encoded string since there's no
+ # way to decode.
+ return encoded, [errors.InvalidBase64LengthDefect()]
def encode_b(bstring):
return base64.b64encode(bstring).decode('ascii')
diff --git a/Lib/email/errors.py b/Lib/email/errors.py
index 791239f..d28a680 100644
--- a/Lib/email/errors.py
+++ b/Lib/email/errors.py
@@ -73,6 +73,9 @@ class InvalidBase64PaddingDefect(MessageDefect):
class InvalidBase64CharactersDefect(MessageDefect):
"""base64 encoded sequence had characters not in base64 alphabet"""
+class InvalidBase64LengthDefect(MessageDefect):
+ """base64 encoded sequence had invalid length (1 mod 4)"""
+
# These errors are specific to header parsing.
class HeaderDefect(MessageDefect):