diff options
Diffstat (limited to 'Lib')
-rw-r--r-- | Lib/email/_header_value_parser.py | 21 | ||||
-rw-r--r-- | Lib/test/test_email/test__header_value_parser.py | 24 | ||||
-rw-r--r-- | Lib/test/test_email/test_headerregistry.py | 3 |
3 files changed, 45 insertions, 3 deletions
diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py index 34969ab..35d746a 100644 --- a/Lib/email/_header_value_parser.py +++ b/Lib/email/_header_value_parser.py @@ -96,6 +96,18 @@ EXTENDED_ATTRIBUTE_ENDS = ATTRIBUTE_ENDS - set('%') def quote_string(value): return '"'+str(value).replace('\\', '\\\\').replace('"', r'\"')+'"' +# Match a RFC 2047 word, looks like =?utf-8?q?someword?= +rfc2047_matcher = re.compile(r''' + =\? # literal =? + [^?]* # charset + \? # literal ? + [qQbB] # literal 'q' or 'b', case insensitive + \? # literal ? + .*? # encoded word + \?= # literal ?= +''', re.VERBOSE | re.MULTILINE) + + # # TokenList and its subclasses # @@ -1052,6 +1064,10 @@ def get_encoded_word(value): _validate_xtext(vtext) ew.append(vtext) text = ''.join(remainder) + # Encoded words should be followed by a WS + if value and value[0] not in WSP: + ew.defects.append(errors.InvalidHeaderDefect( + "missing trailing whitespace after encoded-word")) return ew, value def get_unstructured(value): @@ -1104,6 +1120,11 @@ def get_unstructured(value): unstructured.append(token) continue tok, *remainder = _wsp_splitter(value, 1) + # Split in the middle of an atom if there is a rfc2047 encoded word + # which does not have WSP on both sides. The defect will be registered + # the next time through the loop. + if rfc2047_matcher.search(tok): + tok, *remainder = value.partition('=?') vtext = ValueTerminal(tok, 'vtext') _validate_xtext(vtext) unstructured.append(vtext) diff --git a/Lib/test/test_email/test__header_value_parser.py b/Lib/test/test_email/test__header_value_parser.py index 12da3cf..649923f 100644 --- a/Lib/test/test_email/test__header_value_parser.py +++ b/Lib/test/test_email/test__header_value_parser.py @@ -118,7 +118,7 @@ class TestParser(TestParserMixin, TestEmailBase): '=?us-ascii?q?first?==?utf-8?q?second?=', 'first', 'first', - [], + [errors.InvalidHeaderDefect], '=?utf-8?q?second?=') def test_get_encoded_word_sets_extra_attributes(self): @@ -361,6 +361,25 @@ class TestParser(TestParserMixin, TestEmailBase): '=?utf-8?q?foo?==?utf-8?q?bar?=', 'foobar', 'foobar', + [errors.InvalidHeaderDefect, + errors.InvalidHeaderDefect], + '') + + def test_get_unstructured_ew_without_leading_whitespace(self): + self._test_get_x( + self._get_unst, + 'nowhitespace=?utf-8?q?somevalue?=', + 'nowhitespacesomevalue', + 'nowhitespacesomevalue', + [errors.InvalidHeaderDefect], + '') + + def test_get_unstructured_ew_without_trailing_whitespace(self): + self._test_get_x( + self._get_unst, + '=?utf-8?q?somevalue?=nowhitespace', + 'somevaluenowhitespace', + 'somevaluenowhitespace', [errors.InvalidHeaderDefect], '') @@ -546,7 +565,8 @@ class TestParser(TestParserMixin, TestEmailBase): '"=?utf-8?Q?not_really_valid?="', '"not really valid"', 'not really valid', - [errors.InvalidHeaderDefect], + [errors.InvalidHeaderDefect, + errors.InvalidHeaderDefect], '') # get_comment diff --git a/Lib/test/test_email/test_headerregistry.py b/Lib/test/test_email/test_headerregistry.py index 7550546..5d9b357 100644 --- a/Lib/test/test_email/test_headerregistry.py +++ b/Lib/test/test_email/test_headerregistry.py @@ -1180,7 +1180,8 @@ class TestAddressHeader(TestHeaderBase): 'rfc2047_atom_in_quoted_string_is_decoded': ('"=?utf-8?q?=C3=89ric?=" <foo@example.com>', - [errors.InvalidHeaderDefect], + [errors.InvalidHeaderDefect, + errors.InvalidHeaderDefect], 'Éric <foo@example.com>', 'Éric', 'foo@example.com', |