diff options
author | R David Murray <rdmurray@bitdance.com> | 2013-07-12 20:01:10 (GMT) |
---|---|---|
committer | R David Murray <rdmurray@bitdance.com> | 2013-07-12 20:01:10 (GMT) |
commit | 1f9d24a18d96a899dfcb0ce630cbbb78ec2cec7a (patch) | |
tree | c830141c0bd52bd4b1c8fb593ee127071caddb63 /Lib | |
parent | ae95b4f7a5344de372885fb25d8fadbb14c8fea4 (diff) | |
parent | 923512f327af6944bbdbc905d2372658a3977489 (diff) | |
download | cpython-1f9d24a18d96a899dfcb0ce630cbbb78ec2cec7a.zip cpython-1f9d24a18d96a899dfcb0ce630cbbb78ec2cec7a.tar.gz cpython-1f9d24a18d96a899dfcb0ce630cbbb78ec2cec7a.tar.bz2 |
Merge: #18431: Decode encoded words in atoms in new email parser.
Diffstat (limited to 'Lib')
-rw-r--r-- | Lib/email/_header_value_parser.py | 23 | ||||
-rw-r--r-- | Lib/test/test_email/test__header_value_parser.py | 26 | ||||
-rw-r--r-- | Lib/test/test_email/test_headerregistry.py | 24 |
3 files changed, 70 insertions, 3 deletions
diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py index 32fc06e..0392379 100644 --- a/Lib/email/_header_value_parser.py +++ b/Lib/email/_header_value_parser.py @@ -1624,6 +1624,7 @@ def get_quoted_string(value): def get_atom(value): """atom = [CFWS] 1*atext [CFWS] + An atom could be an rfc2047 encoded word. """ atom = Atom() if value and value[0] in CFWS_LEADER: @@ -1632,7 +1633,15 @@ def get_atom(value): if value and value[0] in ATOM_ENDS: raise errors.HeaderParseError( "expected atom but found '{}'".format(value)) - token, value = get_atext(value) + if value.startswith('=?'): + try: + token, value = get_encoded_word(value) + except errors.HeaderParseError: + # XXX: need to figure out how to register defects when + # appropriate here. + token, value = get_atext(value) + else: + token, value = get_atext(value) atom.append(token) if value and value[0] in CFWS_LEADER: token, value = get_cfws(value) @@ -1661,12 +1670,22 @@ def get_dot_atom_text(value): def get_dot_atom(value): """ dot-atom = [CFWS] dot-atom-text [CFWS] + Any place we can have a dot atom, we could instead have an rfc2047 encoded + word. """ dot_atom = DotAtom() if value[0] in CFWS_LEADER: token, value = get_cfws(value) dot_atom.append(token) - token, value = get_dot_atom_text(value) + if value.startswith('=?'): + try: + token, value = get_encoded_word(value) + except errors.HeaderParseError: + # XXX: need to figure out how to register defects when + # appropriate here. + token, value = get_dot_atom_text(value) + else: + token, value = get_dot_atom_text(value) dot_atom.append(token) if value and value[0] in CFWS_LEADER: token, value = get_cfws(value) diff --git a/Lib/test/test_email/test__header_value_parser.py b/Lib/test/test_email/test__header_value_parser.py index 8917447..646082b 100644 --- a/Lib/test/test_email/test__header_value_parser.py +++ b/Lib/test/test_email/test__header_value_parser.py @@ -808,9 +808,13 @@ class TestParser(TestParserMixin, TestEmailBase): self.assertEqual(atom[2].comments, ['bar']) def test_get_atom_atom_ends_at_noncfws(self): - atom = self._test_get_x(parser.get_atom, + self._test_get_x(parser.get_atom, 'bob fred', 'bob ', 'bob ', [], 'fred') + def test_get_atom_rfc2047_atom(self): + self._test_get_x(parser.get_atom, + '=?utf-8?q?=20bob?=', ' bob', ' bob', [], '') + # get_dot_atom_text def test_get_dot_atom_text(self): @@ -885,6 +889,10 @@ class TestParser(TestParserMixin, TestEmailBase): with self.assertRaises(errors.HeaderParseError): parser.get_dot_atom(' (foo) bar.bang. foo') + def test_get_dot_atom_rfc2047_atom(self): + self._test_get_x(parser.get_dot_atom, + '=?utf-8?q?=20bob?=', ' bob', ' bob', [], '') + # get_word (if this were black box we'd repeat all the qs/atom tests) def test_get_word_atom_yields_atom(self): @@ -2156,6 +2164,22 @@ class TestParser(TestParserMixin, TestEmailBase): self.assertEqual(address[0].token_type, 'mailbox') + def test_get_address_rfc2047_display_name(self): + address = self._test_get_x(parser.get_address, + '=?utf-8?q?=C3=89ric?= <foo@example.com>', + 'Éric <foo@example.com>', + 'Éric <foo@example.com>', + [], + '') + self.assertEqual(address.token_type, 'address') + self.assertEqual(len(address.mailboxes), 1) + self.assertEqual(address.mailboxes, + address.all_mailboxes) + self.assertEqual(address.mailboxes[0].display_name, + 'Éric') + self.assertEqual(address[0].token_type, + 'mailbox') + def test_get_address_empty_group(self): address = self._test_get_x(parser.get_address, 'Monty Python:;', diff --git a/Lib/test/test_email/test_headerregistry.py b/Lib/test/test_email/test_headerregistry.py index 80f1c02..f754a32 100644 --- a/Lib/test/test_email/test_headerregistry.py +++ b/Lib/test/test_email/test_headerregistry.py @@ -158,6 +158,10 @@ class TestUnstructuredHeader(TestHeaderBase): '=?utf-8?q?=C3=89ric?=', 'Éric'), + 'rfc2047_quopri_with_regular_text': ( + 'The =?utf-8?q?=C3=89ric=2C?= Himself', + 'The Éric, Himself'), + } @@ -1119,6 +1123,26 @@ class TestAddressHeader(TestHeaderBase): 'example.com', None), + 'rfc2047_atom_is_decoded': + ('=?utf-8?q?=C3=89ric?= <foo@example.com>', + [], + 'Éric <foo@example.com>', + 'Éric', + 'foo@example.com', + 'foo', + 'example.com', + None), + + 'rfc2047_atom_in_phrase_is_decoded': + ('The =?utf-8?q?=C3=89ric=2C?= Himself <foo@example.com>', + [], + '"The Éric, Himself" <foo@example.com>', + 'The Éric, Himself', + 'foo@example.com', + 'foo', + 'example.com', + None), + } # XXX: Need many more examples, and in particular some with names in |