diff options
author | Thomas Dwyer <github@tomd.tel> | 2023-07-10 23:00:55 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-07-10 23:00:55 (GMT) |
commit | 18dfbd035775c15533d13a98e56b1d2bf5c65f00 (patch) | |
tree | 3da9c6fff33d3c8f5cb7b503e1915a005beaf068 /Lib/email | |
parent | 6782fc050281205734700a1c3e13b123961ed15b (diff) | |
download | cpython-18dfbd035775c15533d13a98e56b1d2bf5c65f00.zip cpython-18dfbd035775c15533d13a98e56b1d2bf5c65f00.tar.gz cpython-18dfbd035775c15533d13a98e56b1d2bf5c65f00.tar.bz2 |
gh-102988: Detect email address parsing errors and return empty tuple to indicate the parsing error (old API) (#105127)
Detect email address parsing errors and return empty tuple to indicate the parsing error (old API). This fixes or at least ameliorates CVE-2023-27043.
---------
Co-authored-by: Gregory P. Smith <greg@krypto.org>
Diffstat (limited to 'Lib/email')
-rw-r--r-- | Lib/email/utils.py | 63 |
1 files changed, 57 insertions, 6 deletions
diff --git a/Lib/email/utils.py b/Lib/email/utils.py index 81da539..11ad75e 100644 --- a/Lib/email/utils.py +++ b/Lib/email/utils.py @@ -106,12 +106,54 @@ def formataddr(pair, charset='utf-8'): return address +def _pre_parse_validation(email_header_fields): + accepted_values = [] + for v in email_header_fields: + s = v.replace('\\(', '').replace('\\)', '') + if s.count('(') != s.count(')'): + v = "('', '')" + accepted_values.append(v) + + return accepted_values + + +def _post_parse_validation(parsed_email_header_tuples): + accepted_values = [] + # The parser would have parsed a correctly formatted domain-literal + # The existence of an [ after parsing indicates a parsing failure + for v in parsed_email_header_tuples: + if '[' in v[1]: + v = ('', '') + accepted_values.append(v) + + return accepted_values + def getaddresses(fieldvalues): - """Return a list of (REALNAME, EMAIL) for each fieldvalue.""" - all = COMMASPACE.join(str(v) for v in fieldvalues) + """Return a list of (REALNAME, EMAIL) or ('','') for each fieldvalue. + + When parsing fails for a fieldvalue, a 2-tuple of ('', '') is returned in + its place. + + If the resulting list of parsed address is not the same as the number of + fieldvalues in the input list a parsing error has occurred. A list + containing a single empty 2-tuple [('', '')] is returned in its place. + This is done to avoid invalid output. + """ + fieldvalues = [str(v) for v in fieldvalues] + fieldvalues = _pre_parse_validation(fieldvalues) + all = COMMASPACE.join(v for v in fieldvalues) a = _AddressList(all) - return a.addresslist + result = _post_parse_validation(a.addresslist) + + n = 0 + for v in fieldvalues: + n += v.count(',') + 1 + + if len(result) != n: + return [('', '')] + + return result def _format_timetuple_and_zone(timetuple, zone): @@ -212,9 +254,18 @@ def parseaddr(addr): Return a tuple of realname and email address, unless the parse fails, in which case return a 2-tuple of ('', ''). """ - addrs = _AddressList(addr).addresslist - if not addrs: - return '', '' + if isinstance(addr, list): + addr = addr[0] + + if not isinstance(addr, str): + return ('', '') + + addr = _pre_parse_validation([addr])[0] + addrs = _post_parse_validation(_AddressList(addr).addresslist) + + if not addrs or len(addrs) > 1: + return ('', '') + return addrs[0] |