summaryrefslogtreecommitdiffstats
path: root/Lib/email
diff options
context:
space:
mode:
authorR David Murray <rdmurray@bitdance.com>2013-07-12 20:00:28 (GMT)
committerR David Murray <rdmurray@bitdance.com>2013-07-12 20:00:28 (GMT)
commit923512f327af6944bbdbc905d2372658a3977489 (patch)
treeb59a06066813e708621c90e3145753d3e6d5236a /Lib/email
parent65171b28e77f589a490335c8749a24151e1d8817 (diff)
downloadcpython-923512f327af6944bbdbc905d2372658a3977489.zip
cpython-923512f327af6944bbdbc905d2372658a3977489.tar.gz
cpython-923512f327af6944bbdbc905d2372658a3977489.tar.bz2
#18431: Decode encoded words in atoms in new email parser.
There is more to be done here in terms of accepting RFC invalid input that some mailers accept, but this covers the valid RFC places where encoded words can occur in structured headers.
Diffstat (limited to 'Lib/email')
-rw-r--r--Lib/email/_header_value_parser.py23
1 files changed, 21 insertions, 2 deletions
diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py
index a01d845..291437c 100644
--- a/Lib/email/_header_value_parser.py
+++ b/Lib/email/_header_value_parser.py
@@ -1627,6 +1627,7 @@ def get_quoted_string(value):
def get_atom(value):
"""atom = [CFWS] 1*atext [CFWS]
+ An atom could be an rfc2047 encoded word.
"""
atom = Atom()
if value and value[0] in CFWS_LEADER:
@@ -1635,7 +1636,15 @@ def get_atom(value):
if value and value[0] in ATOM_ENDS:
raise errors.HeaderParseError(
"expected atom but found '{}'".format(value))
- token, value = get_atext(value)
+ if value.startswith('=?'):
+ try:
+ token, value = get_encoded_word(value)
+ except errors.HeaderParseError:
+ # XXX: need to figure out how to register defects when
+ # appropriate here.
+ token, value = get_atext(value)
+ else:
+ token, value = get_atext(value)
atom.append(token)
if value and value[0] in CFWS_LEADER:
token, value = get_cfws(value)
@@ -1664,12 +1673,22 @@ def get_dot_atom_text(value):
def get_dot_atom(value):
""" dot-atom = [CFWS] dot-atom-text [CFWS]
+ Any place we can have a dot atom, we could instead have an rfc2047 encoded
+ word.
"""
dot_atom = DotAtom()
if value[0] in CFWS_LEADER:
token, value = get_cfws(value)
dot_atom.append(token)
- token, value = get_dot_atom_text(value)
+ if value.startswith('=?'):
+ try:
+ token, value = get_encoded_word(value)
+ except errors.HeaderParseError:
+ # XXX: need to figure out how to register defects when
+ # appropriate here.
+ token, value = get_dot_atom_text(value)
+ else:
+ token, value = get_dot_atom_text(value)
dot_atom.append(token)
if value and value[0] in CFWS_LEADER:
token, value = get_cfws(value)