diff options
-rw-r--r-- | Lib/email/_header_value_parser.py | 34 | ||||
-rw-r--r-- | Lib/test/test_email/test__header_value_parser.py | 109 | ||||
-rw-r--r-- | Misc/NEWS | 3 |
3 files changed, 139 insertions, 7 deletions
diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py index 1806cac..a9bdf44 100644 --- a/Lib/email/_header_value_parser.py +++ b/Lib/email/_header_value_parser.py @@ -71,6 +71,7 @@ import re import urllib # For urllib.parse.unquote from string import hexdigits from collections import OrderedDict +from operator import itemgetter from email import _encoded_words as _ew from email import errors from email import utils @@ -1098,15 +1099,34 @@ class MimeParameters(TokenList): params[name] = [] params[name].append((token.section_number, token)) for name, parts in params.items(): - parts = sorted(parts) - # XXX: there might be more recovery we could do here if, for - # example, this is really a case of a duplicate attribute name. + parts = sorted(parts, key=itemgetter(0)) + first_param = parts[0][1] + charset = first_param.charset + # Our arbitrary error recovery is to ignore duplicate parameters, + # to use appearance order if there are duplicate rfc 2231 parts, + # and to ignore gaps. This mimics the error recovery of get_param. + if not first_param.extended and len(parts) > 1: + if parts[1][0] == 0: + parts[1][1].defects.append(errors.InvalidHeaderDefect( + 'duplicate parameter name; duplicate(s) ignored')) + parts = parts[:1] + # Else assume the *0* was missing...note that this is different + # from get_param, but we registered a defect for this earlier. value_parts = [] - charset = parts[0][1].charset - for i, (section_number, param) in enumerate(parts): + i = 0 + for section_number, param in parts: if section_number != i: - param.defects.append(errors.InvalidHeaderDefect( - "inconsistent multipart parameter numbering")) + # We could get fancier here and look for a complete + # duplicate extended parameter and ignore the second one + # seen. But we're not doing that. The old code didn't. + if not param.extended: + param.defects.append(errors.InvalidHeaderDefect( + 'duplicate parameter name; duplicate ignored')) + continue + else: + param.defects.append(errors.InvalidHeaderDefect( + "inconsistent RFC2231 parameter numbering")) + i += 1 value = param.param_value if param.extended: try: diff --git a/Lib/test/test_email/test__header_value_parser.py b/Lib/test/test_email/test__header_value_parser.py index 5404d19..d028f74 100644 --- a/Lib/test/test_email/test__header_value_parser.py +++ b/Lib/test/test_email/test__header_value_parser.py @@ -2456,6 +2456,115 @@ class TestParser(TestParserMixin, TestEmailBase): ";foo", ";foo", ";foo", [errors.InvalidHeaderDefect]*3 ) + +@parameterize +class Test_parse_mime_parameters(TestParserMixin, TestEmailBase): + + def mime_parameters_as_value(self, + value, + tl_str, + tl_value, + params, + defects): + mime_parameters = self._test_parse_x(parser.parse_mime_parameters, + value, tl_str, tl_value, defects) + self.assertEqual(mime_parameters.token_type, 'mime-parameters') + self.assertEqual(list(mime_parameters.params), params) + + + mime_parameters_params = { + + 'simple': ( + 'filename="abc.py"', + ' filename="abc.py"', + 'filename=abc.py', + [('filename', 'abc.py')], + []), + + 'multiple_keys': ( + 'filename="abc.py"; xyz=abc', + ' filename="abc.py"; xyz="abc"', + 'filename=abc.py; xyz=abc', + [('filename', 'abc.py'), ('xyz', 'abc')], + []), + + 'split_value': ( + "filename*0*=iso-8859-1''%32%30%31%2E; filename*1*=%74%69%66", + ' filename="201.tif"', + "filename*0*=iso-8859-1''%32%30%31%2E; filename*1*=%74%69%66", + [('filename', '201.tif')], + []), + + # Note that it is undefined what we should do for error recovery when + # there are duplicate parameter names or duplicate parts in a split + # part. We choose to ignore all duplicate parameters after the first + # and to take duplicate or missing rfc 2231 parts in apperance order. + # This is backward compatible with get_param's behavior, but the + # decisions are arbitrary. + + 'duplicate_key': ( + 'filename=abc.gif; filename=def.tiff', + ' filename="abc.gif"', + "filename=abc.gif; filename=def.tiff", + [('filename', 'abc.gif')], + [errors.InvalidHeaderDefect]), + + 'duplicate_key_with_split_value': ( + "filename*0*=iso-8859-1''%32%30%31%2E; filename*1*=%74%69%66;" + " filename=abc.gif", + ' filename="201.tif"', + "filename*0*=iso-8859-1''%32%30%31%2E; filename*1*=%74%69%66;" + " filename=abc.gif", + [('filename', '201.tif')], + [errors.InvalidHeaderDefect]), + + 'duplicate_key_with_split_value_other_order': ( + "filename=abc.gif; " + " filename*0*=iso-8859-1''%32%30%31%2E; filename*1*=%74%69%66", + ' filename="abc.gif"', + "filename=abc.gif;" + " filename*0*=iso-8859-1''%32%30%31%2E; filename*1*=%74%69%66", + [('filename', 'abc.gif')], + [errors.InvalidHeaderDefect]), + + 'duplicate_in_split_value': ( + "filename*0*=iso-8859-1''%32%30%31%2E; filename*1*=%74%69%66;" + " filename*1*=abc.gif", + ' filename="201.tifabc.gif"', + "filename*0*=iso-8859-1''%32%30%31%2E; filename*1*=%74%69%66;" + " filename*1*=abc.gif", + [('filename', '201.tifabc.gif')], + [errors.InvalidHeaderDefect]), + + 'missing_split_value': ( + "filename*0*=iso-8859-1''%32%30%31%2E; filename*3*=%74%69%66;", + ' filename="201.tif"', + "filename*0*=iso-8859-1''%32%30%31%2E; filename*3*=%74%69%66;", + [('filename', '201.tif')], + [errors.InvalidHeaderDefect]), + + 'duplicate_and_missing_split_value': ( + "filename*0*=iso-8859-1''%32%30%31%2E; filename*3*=%74%69%66;" + " filename*3*=abc.gif", + ' filename="201.tifabc.gif"', + "filename*0*=iso-8859-1''%32%30%31%2E; filename*3*=%74%69%66;" + " filename*3*=abc.gif", + [('filename', '201.tifabc.gif')], + [errors.InvalidHeaderDefect]*2), + + # Here we depart from get_param and assume the *0* was missing. + 'duplicate_with_broken_split_value': ( + "filename=abc.gif; " + " filename*2*=iso-8859-1''%32%30%31%2E; filename*3*=%74%69%66", + ' filename="abc.gif201.tif"', + "filename=abc.gif;" + " filename*2*=iso-8859-1''%32%30%31%2E; filename*3*=%74%69%66", + [('filename', 'abc.gif201.tif')], + # Defects are apparent missing *0*, and two 'out of sequence'. + [errors.InvalidHeaderDefect]*3), + + } + @parameterize class Test_parse_mime_version(TestParserMixin, TestEmailBase): @@ -21,6 +21,9 @@ Core and Builtins Library ------- +- Issue #23745: The new email header parser now handles duplicate MIME + parameter names without error, similar to how get_param behaves. + - Issue #23792: Ignore KeyboardInterrupt when the pydoc pager is active. This mimics the behavior of the standard unix pagers, and prevents pipepager from shutting down while the pager itself is still running. |