1 files changed, 48 insertions, 31 deletions
diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py
index 0369e01..a9bdf44 100644
--- a/Lib/email/_header_value_parser.py
+++ b/Lib/email/_header_value_parser.py
@@ -70,7 +70,8 @@ XXX: provide complete list of token types.
 import re
 import urllib   # For urllib.parse.unquote
 from string import hexdigits
-from collections import namedtuple, OrderedDict
+from collections import OrderedDict
+from operator import itemgetter
 from email import _encoded_words as _ew
 from email import errors
 from email import utils
@@ -368,8 +369,7 @@ class TokenList(list):
                 yield (indent + '    !! invalid element in token '
                                         'list: {!r}'.format(token))
             else:
-                for line in token._pp(indent+'    '):
-                    yield line
+                yield from token._pp(indent+'    ')
         if self.defects:
             extra = ' Defects: {}'.format(self.defects)
         else:
@@ -1099,15 +1099,34 @@ class MimeParameters(TokenList):
                 params[name] = []
             params[name].append((token.section_number, token))
         for name, parts in params.items():
-            parts = sorted(parts)
-            # XXX: there might be more recovery we could do here if, for
-            # example, this is really a case of a duplicate attribute name.
+            parts = sorted(parts, key=itemgetter(0))
+            first_param = parts[0][1]
+            charset = first_param.charset
+            # Our arbitrary error recovery is to ignore duplicate parameters,
+            # to use appearance order if there are duplicate rfc 2231 parts,
+            # and to ignore gaps.  This mimics the error recovery of get_param.
+            if not first_param.extended and len(parts) > 1:
+                if parts[1][0] == 0:
+                    parts[1][1].defects.append(errors.InvalidHeaderDefect(
+                        'duplicate parameter name; duplicate(s) ignored'))
+                    parts = parts[:1]
+                # Else assume the *0* was missing...note that this is different
+                # from get_param, but we registered a defect for this earlier.
             value_parts = []
-            charset = parts[0][1].charset
-            for i, (section_number, param) in enumerate(parts):
+            i = 0
+            for section_number, param in parts:
                 if section_number != i:
-                    param.defects.append(errors.InvalidHeaderDefect(
-                        "inconsistent multipart parameter numbering"))
+                    # We could get fancier here and look for a complete
+                    # duplicate extended parameter and ignore the second one
+                    # seen.  But we're not doing that.  The old code didn't.
+                    if not param.extended:
+                        param.defects.append(errors.InvalidHeaderDefect(
+                            'duplicate parameter name; duplicate ignored'))
+                        continue
+                    else:
+                        param.defects.append(errors.InvalidHeaderDefect(
+                            "inconsistent RFC2231 parameter numbering"))
+                i += 1
                 value = param.param_value
                 if param.extended:
                     try:
@@ -1315,24 +1334,22 @@ RouteComponentMarker = ValueTerminal('@', 'route-component-marker')
 # Parser
 #
 
-"""Parse strings according to RFC822/2047/2822/5322 rules.
-
-This is a stateless parser.  Each get_XXX function accepts a string and
-returns either a Terminal or a TokenList representing the RFC object named
-by the method and a string containing the remaining unparsed characters
-from the input.  Thus a parser method consumes the next syntactic construct
-of a given type and returns a token representing the construct plus the
-unparsed remainder of the input string.
-
-For example, if the first element of a structured header is a 'phrase',
-then:
-
-    phrase, value = get_phrase(value)
-
-returns the complete phrase from the start of the string value, plus any
-characters left in the string after the phrase is removed.
-
-"""
+# Parse strings according to RFC822/2047/2822/5322 rules.
+#
+# This is a stateless parser.  Each get_XXX function accepts a string and
+# returns either a Terminal or a TokenList representing the RFC object named
+# by the method and a string containing the remaining unparsed characters
+# from the input.  Thus a parser method consumes the next syntactic construct
+# of a given type and returns a token representing the construct plus the
+# unparsed remainder of the input string.
+#
+# For example, if the first element of a structured header is a 'phrase',
+# then:
+#
+#     phrase, value = get_phrase(value)
+#
+# returns the complete phrase from the start of the string value, plus any
+# characters left in the string after the phrase is removed.
 
 _wsp_splitter = re.compile(r'([{}]+)'.format(''.join(WSP))).split
 _non_atom_end_matcher = re.compile(r"[^{}]+".format(
@@ -2900,7 +2917,7 @@ def parse_content_disposition_header(value):
     try:
         token, value = get_token(value)
     except errors.HeaderParseError:
-        ctype.defects.append(errors.InvalidHeaderDefect(
+        disp_header.defects.append(errors.InvalidHeaderDefect(
             "Expected content disposition but found {!r}".format(value)))
         _find_mime_parameters(disp_header, value)
         return disp_header
@@ -2931,8 +2948,8 @@ def parse_content_transfer_encoding_header(value):
     try:
         token, value = get_token(value)
     except errors.HeaderParseError:
-        ctype.defects.append(errors.InvalidHeaderDefect(
-            "Expected content trnasfer encoding but found {!r}".format(value)))
+        cte_header.defects.append(errors.InvalidHeaderDefect(
+            "Expected content transfer encoding but found {!r}".format(value)))
     else:
         cte_header.append(token)
         cte_header.cte = token.value.strip().lower()