summaryrefslogtreecommitdiffstats
path: root/Lib/email
diff options
context:
space:
mode:
authorR David Murray <rdmurray@bitdance.com>2015-03-30 01:53:05 (GMT)
committerR David Murray <rdmurray@bitdance.com>2015-03-30 01:53:05 (GMT)
commit7d0325d6c811d66d98955e9fbf9a71c50f7e770c (patch)
treec5d0fd4e964ea1e64cbc75f0da85d5de03e81c93 /Lib/email
parenta3a100b594982f10911f2c9db6cf954bb8ff8f20 (diff)
downloadcpython-7d0325d6c811d66d98955e9fbf9a71c50f7e770c.zip
cpython-7d0325d6c811d66d98955e9fbf9a71c50f7e770c.tar.gz
cpython-7d0325d6c811d66d98955e9fbf9a71c50f7e770c.tar.bz2
#23745: handle duplicate MIME parameter names in new parser.
This mimics get_param's error handling for the most part. It is slightly better in some regards as get_param can produce some really weird results for duplicate *0* parts. It departs from get_param slightly in that if we have a mix of non-extended and extended pieces for the same parameter name, the new parser assumes they were all supposed to be extended and concatenates all the values, whereas get_param always picks the non-extended parameter value. All of this error recovery is pretty much arbitrary decisions...
Diffstat (limited to 'Lib/email')
-rw-r--r--Lib/email/_header_value_parser.py34
1 files changed, 27 insertions, 7 deletions
diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py
index 1806cac..a9bdf44 100644
--- a/Lib/email/_header_value_parser.py
+++ b/Lib/email/_header_value_parser.py
@@ -71,6 +71,7 @@ import re
import urllib # For urllib.parse.unquote
from string import hexdigits
from collections import OrderedDict
+from operator import itemgetter
from email import _encoded_words as _ew
from email import errors
from email import utils
@@ -1098,15 +1099,34 @@ class MimeParameters(TokenList):
params[name] = []
params[name].append((token.section_number, token))
for name, parts in params.items():
- parts = sorted(parts)
- # XXX: there might be more recovery we could do here if, for
- # example, this is really a case of a duplicate attribute name.
+ parts = sorted(parts, key=itemgetter(0))
+ first_param = parts[0][1]
+ charset = first_param.charset
+ # Our arbitrary error recovery is to ignore duplicate parameters,
+ # to use appearance order if there are duplicate rfc 2231 parts,
+ # and to ignore gaps. This mimics the error recovery of get_param.
+ if not first_param.extended and len(parts) > 1:
+ if parts[1][0] == 0:
+ parts[1][1].defects.append(errors.InvalidHeaderDefect(
+ 'duplicate parameter name; duplicate(s) ignored'))
+ parts = parts[:1]
+ # Else assume the *0* was missing...note that this is different
+ # from get_param, but we registered a defect for this earlier.
value_parts = []
- charset = parts[0][1].charset
- for i, (section_number, param) in enumerate(parts):
+ i = 0
+ for section_number, param in parts:
if section_number != i:
- param.defects.append(errors.InvalidHeaderDefect(
- "inconsistent multipart parameter numbering"))
+ # We could get fancier here and look for a complete
+ # duplicate extended parameter and ignore the second one
+ # seen. But we're not doing that. The old code didn't.
+ if not param.extended:
+ param.defects.append(errors.InvalidHeaderDefect(
+ 'duplicate parameter name; duplicate ignored'))
+ continue
+ else:
+ param.defects.append(errors.InvalidHeaderDefect(
+ "inconsistent RFC2231 parameter numbering"))
+ i += 1
value = param.param_value
if param.extended:
try: