diff options
author | Vinay Sajip <vinay_sajip@yahoo.co.uk> | 2013-07-12 20:18:49 (GMT) |
---|---|---|
committer | Vinay Sajip <vinay_sajip@yahoo.co.uk> | 2013-07-12 20:18:49 (GMT) |
commit | 4969d468cbb9b57dbb88267a8bb29e20d56a8b00 (patch) | |
tree | c94c1ea735bb2ba257745dea1577a2071463b7b8 | |
parent | ad644e011f1ac9066ff03cc9d8407cb7301f31ca (diff) | |
parent | a03a7c79ca1fa331abab8746696847dbd9f80520 (diff) | |
download | cpython-4969d468cbb9b57dbb88267a8bb29e20d56a8b00.zip cpython-4969d468cbb9b57dbb88267a8bb29e20d56a8b00.tar.gz cpython-4969d468cbb9b57dbb88267a8bb29e20d56a8b00.tar.bz2 |
Merged upstream changes.
-rw-r--r-- | Lib/email/_header_value_parser.py | 23 | ||||
-rw-r--r-- | Lib/test/test_email/test__header_value_parser.py | 26 | ||||
-rw-r--r-- | Lib/test/test_email/test_headerregistry.py | 24 | ||||
-rw-r--r-- | Misc/NEWS | 3 |
4 files changed, 73 insertions, 3 deletions
diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py index 32fc06e..0392379 100644 --- a/Lib/email/_header_value_parser.py +++ b/Lib/email/_header_value_parser.py @@ -1624,6 +1624,7 @@ def get_quoted_string(value): def get_atom(value): """atom = [CFWS] 1*atext [CFWS] + An atom could be an rfc2047 encoded word. """ atom = Atom() if value and value[0] in CFWS_LEADER: @@ -1632,7 +1633,15 @@ def get_atom(value): if value and value[0] in ATOM_ENDS: raise errors.HeaderParseError( "expected atom but found '{}'".format(value)) - token, value = get_atext(value) + if value.startswith('=?'): + try: + token, value = get_encoded_word(value) + except errors.HeaderParseError: + # XXX: need to figure out how to register defects when + # appropriate here. + token, value = get_atext(value) + else: + token, value = get_atext(value) atom.append(token) if value and value[0] in CFWS_LEADER: token, value = get_cfws(value) @@ -1661,12 +1670,22 @@ def get_dot_atom_text(value): def get_dot_atom(value): """ dot-atom = [CFWS] dot-atom-text [CFWS] + Any place we can have a dot atom, we could instead have an rfc2047 encoded + word. """ dot_atom = DotAtom() if value[0] in CFWS_LEADER: token, value = get_cfws(value) dot_atom.append(token) - token, value = get_dot_atom_text(value) + if value.startswith('=?'): + try: + token, value = get_encoded_word(value) + except errors.HeaderParseError: + # XXX: need to figure out how to register defects when + # appropriate here. + token, value = get_dot_atom_text(value) + else: + token, value = get_dot_atom_text(value) dot_atom.append(token) if value and value[0] in CFWS_LEADER: token, value = get_cfws(value) diff --git a/Lib/test/test_email/test__header_value_parser.py b/Lib/test/test_email/test__header_value_parser.py index 8917447..646082b 100644 --- a/Lib/test/test_email/test__header_value_parser.py +++ b/Lib/test/test_email/test__header_value_parser.py @@ -808,9 +808,13 @@ class TestParser(TestParserMixin, TestEmailBase): self.assertEqual(atom[2].comments, ['bar']) def test_get_atom_atom_ends_at_noncfws(self): - atom = self._test_get_x(parser.get_atom, + self._test_get_x(parser.get_atom, 'bob fred', 'bob ', 'bob ', [], 'fred') + def test_get_atom_rfc2047_atom(self): + self._test_get_x(parser.get_atom, + '=?utf-8?q?=20bob?=', ' bob', ' bob', [], '') + # get_dot_atom_text def test_get_dot_atom_text(self): @@ -885,6 +889,10 @@ class TestParser(TestParserMixin, TestEmailBase): with self.assertRaises(errors.HeaderParseError): parser.get_dot_atom(' (foo) bar.bang. foo') + def test_get_dot_atom_rfc2047_atom(self): + self._test_get_x(parser.get_dot_atom, + '=?utf-8?q?=20bob?=', ' bob', ' bob', [], '') + # get_word (if this were black box we'd repeat all the qs/atom tests) def test_get_word_atom_yields_atom(self): @@ -2156,6 +2164,22 @@ class TestParser(TestParserMixin, TestEmailBase): self.assertEqual(address[0].token_type, 'mailbox') + def test_get_address_rfc2047_display_name(self): + address = self._test_get_x(parser.get_address, + '=?utf-8?q?=C3=89ric?= <foo@example.com>', + 'Éric <foo@example.com>', + 'Éric <foo@example.com>', + [], + '') + self.assertEqual(address.token_type, 'address') + self.assertEqual(len(address.mailboxes), 1) + self.assertEqual(address.mailboxes, + address.all_mailboxes) + self.assertEqual(address.mailboxes[0].display_name, + 'Éric') + self.assertEqual(address[0].token_type, + 'mailbox') + def test_get_address_empty_group(self): address = self._test_get_x(parser.get_address, 'Monty Python:;', diff --git a/Lib/test/test_email/test_headerregistry.py b/Lib/test/test_email/test_headerregistry.py index 80f1c02..f754a32 100644 --- a/Lib/test/test_email/test_headerregistry.py +++ b/Lib/test/test_email/test_headerregistry.py @@ -158,6 +158,10 @@ class TestUnstructuredHeader(TestHeaderBase): '=?utf-8?q?=C3=89ric?=', 'Éric'), + 'rfc2047_quopri_with_regular_text': ( + 'The =?utf-8?q?=C3=89ric=2C?= Himself', + 'The Éric, Himself'), + } @@ -1119,6 +1123,26 @@ class TestAddressHeader(TestHeaderBase): 'example.com', None), + 'rfc2047_atom_is_decoded': + ('=?utf-8?q?=C3=89ric?= <foo@example.com>', + [], + 'Éric <foo@example.com>', + 'Éric', + 'foo@example.com', + 'foo', + 'example.com', + None), + + 'rfc2047_atom_in_phrase_is_decoded': + ('The =?utf-8?q?=C3=89ric=2C?= Himself <foo@example.com>', + [], + '"The Éric, Himself" <foo@example.com>', + 'The Éric, Himself', + 'foo@example.com', + 'foo', + 'example.com', + None), + } # XXX: Need many more examples, and in particular some with names in @@ -154,6 +154,9 @@ Core and Builtins Library ------- +- Issue #18431: The new email header parser now decodes RFC2047 encoded words + in structured headers. + - Issue #18044: The new email header parser was mis-parsing encoded words where an encoded character immediately followed the '?' that follows the CTE character, resulting in a decoding failure. They are now decoded correctly. |