summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorR David Murray <rdmurray@bitdance.com>2014-02-08 18:13:01 (GMT)
committerR David Murray <rdmurray@bitdance.com>2014-02-08 18:13:01 (GMT)
commit01e46ee7e28f5f14ede2b7078cfd277199751dc3 (patch)
treeab1808c55105351d4bf0a8e794e9d7eb70139d45
parentff9616bbf7ecbd55a7206dae28f06ef90f270566 (diff)
parent0400d33928e6b463db164836da670700f03edc5d (diff)
downloadcpython-01e46ee7e28f5f14ede2b7078cfd277199751dc3.zip
cpython-01e46ee7e28f5f14ede2b7078cfd277199751dc3.tar.gz
cpython-01e46ee7e28f5f14ede2b7078cfd277199751dc3.tar.bz2
Merge: #16983: Apply postel's law to encoded words inside quoted strings.
-rw-r--r--Lib/email/_header_value_parser.py7
-rw-r--r--Lib/test/test_email/test__header_value_parser.py9
-rw-r--r--Lib/test/test_email/test_headerregistry.py10
-rw-r--r--Misc/NEWS3
4 files changed, 29 insertions, 0 deletions
diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py
index 0392379..04ab84c 100644
--- a/Lib/email/_header_value_parser.py
+++ b/Lib/email/_header_value_parser.py
@@ -1556,6 +1556,13 @@ def get_bare_quoted_string(value):
while value and value[0] != '"':
if value[0] in WSP:
token, value = get_fws(value)
+ elif value[:2] == '=?':
+ try:
+ token, value = get_encoded_word(value)
+ bare_quoted_string.defects.append(errors.InvalidHeaderDefect(
+ "encoded word inside quoted string"))
+ except errors.HeaderParseError:
+ token, value = get_qcontent(value)
else:
token, value = get_qcontent(value)
bare_quoted_string.append(token)
diff --git a/Lib/test/test_email/test__header_value_parser.py b/Lib/test/test_email/test__header_value_parser.py
index 646082b..32996ca 100644
--- a/Lib/test/test_email/test__header_value_parser.py
+++ b/Lib/test/test_email/test__header_value_parser.py
@@ -540,6 +540,15 @@ class TestParser(TestParserMixin, TestEmailBase):
self._test_get_x(parser.get_bare_quoted_string,
'""', '""', '', [], '')
+ # Issue 16983: apply postel's law to some bad encoding.
+ def test_encoded_word_inside_quotes(self):
+ self._test_get_x(parser.get_bare_quoted_string,
+ '"=?utf-8?Q?not_really_valid?="',
+ '"not really valid"',
+ 'not really valid',
+ [errors.InvalidHeaderDefect],
+ '')
+
# get_comment
def test_get_comment_only(self):
diff --git a/Lib/test/test_email/test_headerregistry.py b/Lib/test/test_email/test_headerregistry.py
index f2df372..7d64a38 100644
--- a/Lib/test/test_email/test_headerregistry.py
+++ b/Lib/test/test_email/test_headerregistry.py
@@ -1155,6 +1155,16 @@ class TestAddressHeader(TestHeaderBase):
'example.com',
None),
+ 'rfc2047_atom_in_quoted_string_is_decoded':
+ ('"=?utf-8?q?=C3=89ric?=" <foo@example.com>',
+ [errors.InvalidHeaderDefect],
+ 'Éric <foo@example.com>',
+ 'Éric',
+ 'foo@example.com',
+ 'foo',
+ 'example.com',
+ None),
+
}
# XXX: Need many more examples, and in particular some with names in
diff --git a/Misc/NEWS b/Misc/NEWS
index 4df5a41..be6fee9 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -27,6 +27,9 @@ Core and Builtins
Library
-------
+- Issue #16983: the new email header parsing code will now decode encoded words
+ that are (incorrectly) surrounded by quotes, and register a defect.
+
- Issue #19772: email.generator no longer mutates the message object when
doing a down-transform from 8bit to 7bit CTEs.