summaryrefslogtreecommitdiffstats
path: root/Lib/email/_header_value_parser.py
diff options
context:
space:
mode:
Diffstat (limited to 'Lib/email/_header_value_parser.py')
-rw-r--r--Lib/email/_header_value_parser.py79
1 files changed, 48 insertions, 31 deletions
diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py
index 0369e01..a9bdf44 100644
--- a/Lib/email/_header_value_parser.py
+++ b/Lib/email/_header_value_parser.py
@@ -70,7 +70,8 @@ XXX: provide complete list of token types.
import re
import urllib # For urllib.parse.unquote
from string import hexdigits
-from collections import namedtuple, OrderedDict
+from collections import OrderedDict
+from operator import itemgetter
from email import _encoded_words as _ew
from email import errors
from email import utils
@@ -368,8 +369,7 @@ class TokenList(list):
yield (indent + ' !! invalid element in token '
'list: {!r}'.format(token))
else:
- for line in token._pp(indent+' '):
- yield line
+ yield from token._pp(indent+' ')
if self.defects:
extra = ' Defects: {}'.format(self.defects)
else:
@@ -1099,15 +1099,34 @@ class MimeParameters(TokenList):
params[name] = []
params[name].append((token.section_number, token))
for name, parts in params.items():
- parts = sorted(parts)
- # XXX: there might be more recovery we could do here if, for
- # example, this is really a case of a duplicate attribute name.
+ parts = sorted(parts, key=itemgetter(0))
+ first_param = parts[0][1]
+ charset = first_param.charset
+ # Our arbitrary error recovery is to ignore duplicate parameters,
+ # to use appearance order if there are duplicate rfc 2231 parts,
+ # and to ignore gaps. This mimics the error recovery of get_param.
+ if not first_param.extended and len(parts) > 1:
+ if parts[1][0] == 0:
+ parts[1][1].defects.append(errors.InvalidHeaderDefect(
+ 'duplicate parameter name; duplicate(s) ignored'))
+ parts = parts[:1]
+ # Else assume the *0* was missing...note that this is different
+ # from get_param, but we registered a defect for this earlier.
value_parts = []
- charset = parts[0][1].charset
- for i, (section_number, param) in enumerate(parts):
+ i = 0
+ for section_number, param in parts:
if section_number != i:
- param.defects.append(errors.InvalidHeaderDefect(
- "inconsistent multipart parameter numbering"))
+ # We could get fancier here and look for a complete
+ # duplicate extended parameter and ignore the second one
+ # seen. But we're not doing that. The old code didn't.
+ if not param.extended:
+ param.defects.append(errors.InvalidHeaderDefect(
+ 'duplicate parameter name; duplicate ignored'))
+ continue
+ else:
+ param.defects.append(errors.InvalidHeaderDefect(
+ "inconsistent RFC2231 parameter numbering"))
+ i += 1
value = param.param_value
if param.extended:
try:
@@ -1315,24 +1334,22 @@ RouteComponentMarker = ValueTerminal('@', 'route-component-marker')
# Parser
#
-"""Parse strings according to RFC822/2047/2822/5322 rules.
-
-This is a stateless parser. Each get_XXX function accepts a string and
-returns either a Terminal or a TokenList representing the RFC object named
-by the method and a string containing the remaining unparsed characters
-from the input. Thus a parser method consumes the next syntactic construct
-of a given type and returns a token representing the construct plus the
-unparsed remainder of the input string.
-
-For example, if the first element of a structured header is a 'phrase',
-then:
-
- phrase, value = get_phrase(value)
-
-returns the complete phrase from the start of the string value, plus any
-characters left in the string after the phrase is removed.
-
-"""
+# Parse strings according to RFC822/2047/2822/5322 rules.
+#
+# This is a stateless parser. Each get_XXX function accepts a string and
+# returns either a Terminal or a TokenList representing the RFC object named
+# by the method and a string containing the remaining unparsed characters
+# from the input. Thus a parser method consumes the next syntactic construct
+# of a given type and returns a token representing the construct plus the
+# unparsed remainder of the input string.
+#
+# For example, if the first element of a structured header is a 'phrase',
+# then:
+#
+# phrase, value = get_phrase(value)
+#
+# returns the complete phrase from the start of the string value, plus any
+# characters left in the string after the phrase is removed.
_wsp_splitter = re.compile(r'([{}]+)'.format(''.join(WSP))).split
_non_atom_end_matcher = re.compile(r"[^{}]+".format(
@@ -2900,7 +2917,7 @@ def parse_content_disposition_header(value):
try:
token, value = get_token(value)
except errors.HeaderParseError:
- ctype.defects.append(errors.InvalidHeaderDefect(
+ disp_header.defects.append(errors.InvalidHeaderDefect(
"Expected content disposition but found {!r}".format(value)))
_find_mime_parameters(disp_header, value)
return disp_header
@@ -2931,8 +2948,8 @@ def parse_content_transfer_encoding_header(value):
try:
token, value = get_token(value)
except errors.HeaderParseError:
- ctype.defects.append(errors.InvalidHeaderDefect(
- "Expected content trnasfer encoding but found {!r}".format(value)))
+ cte_header.defects.append(errors.InvalidHeaderDefect(
+ "Expected content transfer encoding but found {!r}".format(value)))
else:
cte_header.append(token)
cte_header.cte = token.value.strip().lower()