diff options
author | Gregory P. Smith <greg@krypto.org> | 2023-07-21 04:05:46 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-07-21 04:05:46 (GMT) |
commit | 656f62454bff35db8d630ca43c94bf6db44338ba (patch) | |
tree | cdf2a5e6a6c7fc34c886ec648ddaf94c9201d535 | |
parent | c1fd76e138c0acf4b90b08ded990d6521187fe63 (diff) | |
download | cpython-656f62454bff35db8d630ca43c94bf6db44338ba.zip cpython-656f62454bff35db8d630ca43c94bf6db44338ba.tar.gz cpython-656f62454bff35db8d630ca43c94bf6db44338ba.tar.bz2 |
[3.12] gh-106669: Revert "gh-102988: Detect email address parsing errors ... (GH-105127)" (GH-106733) (#106941)
This reverts commit 18dfbd035775c15533d13a98e56b1d2bf5c65f00.
Adds a regression test from the issue.
See https://github.com/python/cpython/issues/106669..
(cherry picked from commit a31dea1feb61793e48fa9aa5014f358352205c1d)
-rw-r--r-- | Doc/library/email.utils.rst | 26 | ||||
-rw-r--r-- | Doc/whatsnew/3.12.rst | 8 | ||||
-rw-r--r-- | Lib/email/utils.py | 63 | ||||
-rw-r--r-- | Lib/test/test_email/test_email.py | 96 | ||||
-rw-r--r-- | Misc/NEWS.d/next/Security/2023-06-13-20-52-24.gh-issue-102988.Kei7Vf.rst | 4 |
5 files changed, 30 insertions, 167 deletions
diff --git a/Doc/library/email.utils.rst b/Doc/library/email.utils.rst index a87a0bd..345b640 100644 --- a/Doc/library/email.utils.rst +++ b/Doc/library/email.utils.rst @@ -65,11 +65,6 @@ of the new API. *email address* parts. Returns a tuple of that information, unless the parse fails, in which case a 2-tuple of ``('', '')`` is returned. - .. versionchanged:: 3.12 - For security reasons, addresses that were ambiguous and could parse into - multiple different addresses now cause ``('', '')`` to be returned - instead of only one of the *potential* addresses. - .. function:: formataddr(pair, charset='utf-8') @@ -92,7 +87,7 @@ of the new API. This method returns a list of 2-tuples of the form returned by ``parseaddr()``. *fieldvalues* is a sequence of header field values as might be returned by :meth:`Message.get_all <email.message.Message.get_all>`. Here's a simple - example that gets all the recipients of a message: + example that gets all the recipients of a message:: from email.utils import getaddresses @@ -102,25 +97,6 @@ of the new API. resent_ccs = msg.get_all('resent-cc', []) all_recipients = getaddresses(tos + ccs + resent_tos + resent_ccs) - When parsing fails for a single fieldvalue, a 2-tuple of ``('', '')`` - is returned in its place. Other errors in parsing the list of - addresses such as a fieldvalue seemingly parsing into multiple - addresses may result in a list containing a single empty 2-tuple - ``[('', '')]`` being returned rather than returning potentially - invalid output. - - Example malformed input parsing: - - .. doctest:: - - >>> from email.utils import getaddresses - >>> getaddresses(['alice@example.com <bob@example.com>', 'me@example.com']) - [('', '')] - - .. versionchanged:: 3.12 - The 2-tuple of ``('', '')`` in the returned values when parsing - fails were added as to address a security issue. - .. function:: parsedate(date) diff --git a/Doc/whatsnew/3.12.rst b/Doc/whatsnew/3.12.rst index d6d7b7d..068618f 100644 --- a/Doc/whatsnew/3.12.rst +++ b/Doc/whatsnew/3.12.rst @@ -570,14 +570,6 @@ dis :data:`~dis.hasarg` collection instead. (Contributed by Irit Katriel in :gh:`94216`.) -email ------ - -* :func:`email.utils.getaddresses` and :func:`email.utils.parseaddr` now return - ``('', '')`` 2-tuples in more situations where invalid email addresses are - encountered instead of potentially inaccurate values. - (Contributed by Thomas Dwyer for :gh:`102988` to ameliorate CVE-2023-27043.) - fractions --------- diff --git a/Lib/email/utils.py b/Lib/email/utils.py index 11ad75e..81da539 100644 --- a/Lib/email/utils.py +++ b/Lib/email/utils.py @@ -106,54 +106,12 @@ def formataddr(pair, charset='utf-8'): return address -def _pre_parse_validation(email_header_fields): - accepted_values = [] - for v in email_header_fields: - s = v.replace('\\(', '').replace('\\)', '') - if s.count('(') != s.count(')'): - v = "('', '')" - accepted_values.append(v) - - return accepted_values - - -def _post_parse_validation(parsed_email_header_tuples): - accepted_values = [] - # The parser would have parsed a correctly formatted domain-literal - # The existence of an [ after parsing indicates a parsing failure - for v in parsed_email_header_tuples: - if '[' in v[1]: - v = ('', '') - accepted_values.append(v) - - return accepted_values - def getaddresses(fieldvalues): - """Return a list of (REALNAME, EMAIL) or ('','') for each fieldvalue. - - When parsing fails for a fieldvalue, a 2-tuple of ('', '') is returned in - its place. - - If the resulting list of parsed address is not the same as the number of - fieldvalues in the input list a parsing error has occurred. A list - containing a single empty 2-tuple [('', '')] is returned in its place. - This is done to avoid invalid output. - """ - fieldvalues = [str(v) for v in fieldvalues] - fieldvalues = _pre_parse_validation(fieldvalues) - all = COMMASPACE.join(v for v in fieldvalues) + """Return a list of (REALNAME, EMAIL) for each fieldvalue.""" + all = COMMASPACE.join(str(v) for v in fieldvalues) a = _AddressList(all) - result = _post_parse_validation(a.addresslist) - - n = 0 - for v in fieldvalues: - n += v.count(',') + 1 - - if len(result) != n: - return [('', '')] - - return result + return a.addresslist def _format_timetuple_and_zone(timetuple, zone): @@ -254,18 +212,9 @@ def parseaddr(addr): Return a tuple of realname and email address, unless the parse fails, in which case return a 2-tuple of ('', ''). """ - if isinstance(addr, list): - addr = addr[0] - - if not isinstance(addr, str): - return ('', '') - - addr = _pre_parse_validation([addr])[0] - addrs = _post_parse_validation(_AddressList(addr).addresslist) - - if not addrs or len(addrs) > 1: - return ('', '') - + addrs = _AddressList(addr).addresslist + if not addrs: + return '', '' return addrs[0] diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py index 5238944..b4f3a24 100644 --- a/Lib/test/test_email/test_email.py +++ b/Lib/test/test_email/test_email.py @@ -3319,90 +3319,32 @@ Foo [('Al Person', 'aperson@dom.ain'), ('Bud Person', 'bperson@dom.ain')]) - def test_getaddresses_parsing_errors(self): - """Test for parsing errors from CVE-2023-27043""" - eq = self.assertEqual - eq(utils.getaddresses(['alice@example.org(<bob@example.com>']), - [('', '')]) - eq(utils.getaddresses(['alice@example.org)<bob@example.com>']), - [('', '')]) - eq(utils.getaddresses(['alice@example.org<<bob@example.com>']), - [('', '')]) - eq(utils.getaddresses(['alice@example.org><bob@example.com>']), - [('', '')]) - eq(utils.getaddresses(['alice@example.org@<bob@example.com>']), - [('', '')]) - eq(utils.getaddresses(['alice@example.org,<bob@example.com>']), - [('', 'alice@example.org'), ('', 'bob@example.com')]) - eq(utils.getaddresses(['alice@example.org;<bob@example.com>']), - [('', '')]) - eq(utils.getaddresses(['alice@example.org:<bob@example.com>']), - [('', '')]) - eq(utils.getaddresses(['alice@example.org.<bob@example.com>']), - [('', '')]) - eq(utils.getaddresses(['alice@example.org"<bob@example.com>']), - [('', '')]) - eq(utils.getaddresses(['alice@example.org[<bob@example.com>']), - [('', '')]) - eq(utils.getaddresses(['alice@example.org]<bob@example.com>']), - [('', '')]) - - def test_parseaddr_parsing_errors(self): - """Test for parsing errors from CVE-2023-27043""" - eq = self.assertEqual - eq(utils.parseaddr(['alice@example.org(<bob@example.com>']), - ('', '')) - eq(utils.parseaddr(['alice@example.org)<bob@example.com>']), - ('', '')) - eq(utils.parseaddr(['alice@example.org<<bob@example.com>']), - ('', '')) - eq(utils.parseaddr(['alice@example.org><bob@example.com>']), - ('', '')) - eq(utils.parseaddr(['alice@example.org@<bob@example.com>']), - ('', '')) - eq(utils.parseaddr(['alice@example.org,<bob@example.com>']), - ('', '')) - eq(utils.parseaddr(['alice@example.org;<bob@example.com>']), - ('', '')) - eq(utils.parseaddr(['alice@example.org:<bob@example.com>']), - ('', '')) - eq(utils.parseaddr(['alice@example.org.<bob@example.com>']), - ('', '')) - eq(utils.parseaddr(['alice@example.org"<bob@example.com>']), - ('', '')) - eq(utils.parseaddr(['alice@example.org[<bob@example.com>']), - ('', '')) - eq(utils.parseaddr(['alice@example.org]<bob@example.com>']), - ('', '')) + def test_getaddresses_comma_in_name(self): + """GH-106669 regression test.""" + self.assertEqual( + utils.getaddresses( + [ + '"Bud, Person" <bperson@dom.ain>', + 'aperson@dom.ain (Al Person)', + '"Mariusz Felisiak" <to@example.com>', + ] + ), + [ + ('Bud, Person', 'bperson@dom.ain'), + ('Al Person', 'aperson@dom.ain'), + ('Mariusz Felisiak', 'to@example.com'), + ], + ) def test_getaddresses_nasty(self): eq = self.assertEqual eq(utils.getaddresses(['foo: ;']), [('', '')]) - eq(utils.getaddresses(['[]*-- =~$']), [('', '')]) + eq(utils.getaddresses( + ['[]*-- =~$']), + [('', ''), ('', ''), ('', '*--')]) eq(utils.getaddresses( ['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>']), [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')]) - eq(utils.getaddresses( - [r'Pete(A nice \) chap) <pete(his account)@silly.test(his host)>']), - [('Pete (A nice ) chap his account his host)', 'pete@silly.test')]) - eq(utils.getaddresses( - ['(Empty list)(start)Undisclosed recipients :(nobody(I know))']), - [('', '')]) - eq(utils.getaddresses( - ['Mary <@machine.tld:mary@example.net>, , jdoe@test . example']), - [('Mary', 'mary@example.net'), ('', ''), ('', 'jdoe@test.example')]) - eq(utils.getaddresses( - ['John Doe <jdoe@machine(comment). example>']), - [('John Doe (comment)', 'jdoe@machine.example')]) - eq(utils.getaddresses( - ['"Mary Smith: Personal Account" <smith@home.example>']), - [('Mary Smith: Personal Account', 'smith@home.example')]) - eq(utils.getaddresses( - ['Undisclosed recipients:;']), - [('', '')]) - eq(utils.getaddresses( - [r'<boss@nil.test>, "Giant; \"Big\" Box" <bob@example.net>']), - [('', 'boss@nil.test'), ('Giant; "Big" Box', 'bob@example.net')]) def test_getaddresses_embedded_comment(self): """Test proper handling of a nested comment""" diff --git a/Misc/NEWS.d/next/Security/2023-06-13-20-52-24.gh-issue-102988.Kei7Vf.rst b/Misc/NEWS.d/next/Security/2023-06-13-20-52-24.gh-issue-102988.Kei7Vf.rst new file mode 100644 index 0000000..c67ec45 --- /dev/null +++ b/Misc/NEWS.d/next/Security/2023-06-13-20-52-24.gh-issue-102988.Kei7Vf.rst @@ -0,0 +1,4 @@ +Reverted the :mod:`email.utils` security improvement change released in +3.12beta4 that unintentionally caused :mod:`email.utils.getaddresses` to fail +to parse email addresses with a comma in the quoted name field. +See :gh:`106669`. |