diff options
author | R David Murray <rdmurray@bitdance.com> | 2013-07-12 20:00:28 (GMT) |
---|---|---|
committer | R David Murray <rdmurray@bitdance.com> | 2013-07-12 20:00:28 (GMT) |
commit | 923512f327af6944bbdbc905d2372658a3977489 (patch) | |
tree | b59a06066813e708621c90e3145753d3e6d5236a /Lib/email | |
parent | 65171b28e77f589a490335c8749a24151e1d8817 (diff) | |
download | cpython-923512f327af6944bbdbc905d2372658a3977489.zip cpython-923512f327af6944bbdbc905d2372658a3977489.tar.gz cpython-923512f327af6944bbdbc905d2372658a3977489.tar.bz2 |
#18431: Decode encoded words in atoms in new email parser.
There is more to be done here in terms of accepting RFC invalid
input that some mailers accept, but this covers the valid
RFC places where encoded words can occur in structured headers.
Diffstat (limited to 'Lib/email')
-rw-r--r-- | Lib/email/_header_value_parser.py | 23 |
1 files changed, 21 insertions, 2 deletions
diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py index a01d845..291437c 100644 --- a/Lib/email/_header_value_parser.py +++ b/Lib/email/_header_value_parser.py @@ -1627,6 +1627,7 @@ def get_quoted_string(value): def get_atom(value): """atom = [CFWS] 1*atext [CFWS] + An atom could be an rfc2047 encoded word. """ atom = Atom() if value and value[0] in CFWS_LEADER: @@ -1635,7 +1636,15 @@ def get_atom(value): if value and value[0] in ATOM_ENDS: raise errors.HeaderParseError( "expected atom but found '{}'".format(value)) - token, value = get_atext(value) + if value.startswith('=?'): + try: + token, value = get_encoded_word(value) + except errors.HeaderParseError: + # XXX: need to figure out how to register defects when + # appropriate here. + token, value = get_atext(value) + else: + token, value = get_atext(value) atom.append(token) if value and value[0] in CFWS_LEADER: token, value = get_cfws(value) @@ -1664,12 +1673,22 @@ def get_dot_atom_text(value): def get_dot_atom(value): """ dot-atom = [CFWS] dot-atom-text [CFWS] + Any place we can have a dot atom, we could instead have an rfc2047 encoded + word. """ dot_atom = DotAtom() if value[0] in CFWS_LEADER: token, value = get_cfws(value) dot_atom.append(token) - token, value = get_dot_atom_text(value) + if value.startswith('=?'): + try: + token, value = get_encoded_word(value) + except errors.HeaderParseError: + # XXX: need to figure out how to register defects when + # appropriate here. + token, value = get_dot_atom_text(value) + else: + token, value = get_dot_atom_text(value) dot_atom.append(token) if value and value[0] in CFWS_LEADER: token, value = get_cfws(value) |