#23745: handle duplicate MIME parameter names in new parser.

This mimics get_param's error handling for the most part. It is slightly better in some regards as get_param can produce some really weird results for duplicate *0* parts. It departs from get_param slightly in that if we have a mix of non-extended and extended pieces for the same parameter name, the new parser assumes they were all supposed to be extended and concatenates all the values, whereas get_param always picks the non-extended parameter value. All of this error recovery is pretty much arbitrary decisions...
author: R David Murray <rdmurray@bitdance.com> 2015-03-30 01:53:05 (GMT)
committer: R David Murray <rdmurray@bitdance.com> 2015-03-30 01:53:05 (GMT)
commit: 7d0325d6c811d66d98955e9fbf9a71c50f7e770c (patch)
tree: c5d0fd4e964ea1e64cbc75f0da85d5de03e81c93 /Lib/email
parent: a3a100b594982f10911f2c9db6cf954bb8ff8f20 (diff)
download: cpython-7d0325d6c811d66d98955e9fbf9a71c50f7e770c.zip
cpython-7d0325d6c811d66d98955e9fbf9a71c50f7e770c.tar.gz
cpython-7d0325d6c811d66d98955e9fbf9a71c50f7e770c.tar.bz2
1 files changed, 27 insertions, 7 deletions
diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py
index 1806cac..a9bdf44 100644
--- a/Lib/email/_header_value_parser.py
+++ b/Lib/email/_header_value_parser.py
@@ -71,6 +71,7 @@ import re
 import urllib   # For urllib.parse.unquote
 from string import hexdigits
 from collections import OrderedDict
+from operator import itemgetter
 from email import _encoded_words as _ew
 from email import errors
 from email import utils
@@ -1098,15 +1099,34 @@ class MimeParameters(TokenList):
                 params[name] = []
             params[name].append((token.section_number, token))
         for name, parts in params.items():
-            parts = sorted(parts)
-            # XXX: there might be more recovery we could do here if, for
-            # example, this is really a case of a duplicate attribute name.
+            parts = sorted(parts, key=itemgetter(0))
+            first_param = parts[0][1]
+            charset = first_param.charset
+            # Our arbitrary error recovery is to ignore duplicate parameters,
+            # to use appearance order if there are duplicate rfc 2231 parts,
+            # and to ignore gaps.  This mimics the error recovery of get_param.
+            if not first_param.extended and len(parts) > 1:
+                if parts[1][0] == 0:
+                    parts[1][1].defects.append(errors.InvalidHeaderDefect(
+                        'duplicate parameter name; duplicate(s) ignored'))
+                    parts = parts[:1]
+                # Else assume the *0* was missing...note that this is different
+                # from get_param, but we registered a defect for this earlier.
             value_parts = []
-            charset = parts[0][1].charset
-            for i, (section_number, param) in enumerate(parts):
+            i = 0
+            for section_number, param in parts:
                 if section_number != i:
-                    param.defects.append(errors.InvalidHeaderDefect(
-                        "inconsistent multipart parameter numbering"))
+                    # We could get fancier here and look for a complete
+                    # duplicate extended parameter and ignore the second one
+                    # seen.  But we're not doing that.  The old code didn't.
+                    if not param.extended:
+                        param.defects.append(errors.InvalidHeaderDefect(
+                            'duplicate parameter name; duplicate ignored'))
+                        continue
+                    else:
+                        param.defects.append(errors.InvalidHeaderDefect(
+                            "inconsistent RFC2231 parameter numbering"))
+                i += 1
                 value = param.param_value
                 if param.extended:
                     try:
author	R David Murray <rdmurray@bitdance.com>	2015-03-30 01:53:05 (GMT)
committer	R David Murray <rdmurray@bitdance.com>	2015-03-30 01:53:05 (GMT)
commit	7d0325d6c811d66d98955e9fbf9a71c50f7e770c (patch)
tree	c5d0fd4e964ea1e64cbc75f0da85d5de03e81c93 /Lib/email
parent	a3a100b594982f10911f2c9db6cf954bb8ff8f20 (diff)
download	cpython-7d0325d6c811d66d98955e9fbf9a71c50f7e770c.zip cpython-7d0325d6c811d66d98955e9fbf9a71c50f7e770c.tar.gz cpython-7d0325d6c811d66d98955e9fbf9a71c50f7e770c.tar.bz2