summaryrefslogtreecommitdiffstats
path: root/Lib
diff options
context:
space:
mode:
authorR David Murray <rdmurray@bitdance.com>2013-07-12 20:01:10 (GMT)
committerR David Murray <rdmurray@bitdance.com>2013-07-12 20:01:10 (GMT)
commit1f9d24a18d96a899dfcb0ce630cbbb78ec2cec7a (patch)
treec830141c0bd52bd4b1c8fb593ee127071caddb63 /Lib
parentae95b4f7a5344de372885fb25d8fadbb14c8fea4 (diff)
parent923512f327af6944bbdbc905d2372658a3977489 (diff)
downloadcpython-1f9d24a18d96a899dfcb0ce630cbbb78ec2cec7a.zip
cpython-1f9d24a18d96a899dfcb0ce630cbbb78ec2cec7a.tar.gz
cpython-1f9d24a18d96a899dfcb0ce630cbbb78ec2cec7a.tar.bz2
Merge: #18431: Decode encoded words in atoms in new email parser.
Diffstat (limited to 'Lib')
-rw-r--r--Lib/email/_header_value_parser.py23
-rw-r--r--Lib/test/test_email/test__header_value_parser.py26
-rw-r--r--Lib/test/test_email/test_headerregistry.py24
3 files changed, 70 insertions, 3 deletions
diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py
index 32fc06e..0392379 100644
--- a/Lib/email/_header_value_parser.py
+++ b/Lib/email/_header_value_parser.py
@@ -1624,6 +1624,7 @@ def get_quoted_string(value):
def get_atom(value):
"""atom = [CFWS] 1*atext [CFWS]
+ An atom could be an rfc2047 encoded word.
"""
atom = Atom()
if value and value[0] in CFWS_LEADER:
@@ -1632,7 +1633,15 @@ def get_atom(value):
if value and value[0] in ATOM_ENDS:
raise errors.HeaderParseError(
"expected atom but found '{}'".format(value))
- token, value = get_atext(value)
+ if value.startswith('=?'):
+ try:
+ token, value = get_encoded_word(value)
+ except errors.HeaderParseError:
+ # XXX: need to figure out how to register defects when
+ # appropriate here.
+ token, value = get_atext(value)
+ else:
+ token, value = get_atext(value)
atom.append(token)
if value and value[0] in CFWS_LEADER:
token, value = get_cfws(value)
@@ -1661,12 +1670,22 @@ def get_dot_atom_text(value):
def get_dot_atom(value):
""" dot-atom = [CFWS] dot-atom-text [CFWS]
+ Any place we can have a dot atom, we could instead have an rfc2047 encoded
+ word.
"""
dot_atom = DotAtom()
if value[0] in CFWS_LEADER:
token, value = get_cfws(value)
dot_atom.append(token)
- token, value = get_dot_atom_text(value)
+ if value.startswith('=?'):
+ try:
+ token, value = get_encoded_word(value)
+ except errors.HeaderParseError:
+ # XXX: need to figure out how to register defects when
+ # appropriate here.
+ token, value = get_dot_atom_text(value)
+ else:
+ token, value = get_dot_atom_text(value)
dot_atom.append(token)
if value and value[0] in CFWS_LEADER:
token, value = get_cfws(value)
diff --git a/Lib/test/test_email/test__header_value_parser.py b/Lib/test/test_email/test__header_value_parser.py
index 8917447..646082b 100644
--- a/Lib/test/test_email/test__header_value_parser.py
+++ b/Lib/test/test_email/test__header_value_parser.py
@@ -808,9 +808,13 @@ class TestParser(TestParserMixin, TestEmailBase):
self.assertEqual(atom[2].comments, ['bar'])
def test_get_atom_atom_ends_at_noncfws(self):
- atom = self._test_get_x(parser.get_atom,
+ self._test_get_x(parser.get_atom,
'bob fred', 'bob ', 'bob ', [], 'fred')
+ def test_get_atom_rfc2047_atom(self):
+ self._test_get_x(parser.get_atom,
+ '=?utf-8?q?=20bob?=', ' bob', ' bob', [], '')
+
# get_dot_atom_text
def test_get_dot_atom_text(self):
@@ -885,6 +889,10 @@ class TestParser(TestParserMixin, TestEmailBase):
with self.assertRaises(errors.HeaderParseError):
parser.get_dot_atom(' (foo) bar.bang. foo')
+ def test_get_dot_atom_rfc2047_atom(self):
+ self._test_get_x(parser.get_dot_atom,
+ '=?utf-8?q?=20bob?=', ' bob', ' bob', [], '')
+
# get_word (if this were black box we'd repeat all the qs/atom tests)
def test_get_word_atom_yields_atom(self):
@@ -2156,6 +2164,22 @@ class TestParser(TestParserMixin, TestEmailBase):
self.assertEqual(address[0].token_type,
'mailbox')
+ def test_get_address_rfc2047_display_name(self):
+ address = self._test_get_x(parser.get_address,
+ '=?utf-8?q?=C3=89ric?= <foo@example.com>',
+ 'Éric <foo@example.com>',
+ 'Éric <foo@example.com>',
+ [],
+ '')
+ self.assertEqual(address.token_type, 'address')
+ self.assertEqual(len(address.mailboxes), 1)
+ self.assertEqual(address.mailboxes,
+ address.all_mailboxes)
+ self.assertEqual(address.mailboxes[0].display_name,
+ 'Éric')
+ self.assertEqual(address[0].token_type,
+ 'mailbox')
+
def test_get_address_empty_group(self):
address = self._test_get_x(parser.get_address,
'Monty Python:;',
diff --git a/Lib/test/test_email/test_headerregistry.py b/Lib/test/test_email/test_headerregistry.py
index 80f1c02..f754a32 100644
--- a/Lib/test/test_email/test_headerregistry.py
+++ b/Lib/test/test_email/test_headerregistry.py
@@ -158,6 +158,10 @@ class TestUnstructuredHeader(TestHeaderBase):
'=?utf-8?q?=C3=89ric?=',
'Éric'),
+ 'rfc2047_quopri_with_regular_text': (
+ 'The =?utf-8?q?=C3=89ric=2C?= Himself',
+ 'The Éric, Himself'),
+
}
@@ -1119,6 +1123,26 @@ class TestAddressHeader(TestHeaderBase):
'example.com',
None),
+ 'rfc2047_atom_is_decoded':
+ ('=?utf-8?q?=C3=89ric?= <foo@example.com>',
+ [],
+ 'Éric <foo@example.com>',
+ 'Éric',
+ 'foo@example.com',
+ 'foo',
+ 'example.com',
+ None),
+
+ 'rfc2047_atom_in_phrase_is_decoded':
+ ('The =?utf-8?q?=C3=89ric=2C?= Himself <foo@example.com>',
+ [],
+ '"The Éric, Himself" <foo@example.com>',
+ 'The Éric, Himself',
+ 'foo@example.com',
+ 'foo',
+ 'example.com',
+ None),
+
}
# XXX: Need many more examples, and in particular some with names in