summaryrefslogtreecommitdiffstats
path: root/Lib/email/_header_value_parser.py
diff options
context:
space:
mode:
authorAshwin Ramaswami <aramaswamis@gmail.com>2019-09-03 16:42:53 (GMT)
committerMiss Islington (bot) <31488909+miss-islington@users.noreply.github.com>2019-09-03 16:42:53 (GMT)
commitea21389dda401457198fb214aa2c981a45ed9528 (patch)
treefa4232b8646ea424f1411e18fe22f64061bef9c5 /Lib/email/_header_value_parser.py
parent48058050cee5f6600150392100c162f223b4317f (diff)
downloadcpython-ea21389dda401457198fb214aa2c981a45ed9528.zip
cpython-ea21389dda401457198fb214aa2c981a45ed9528.tar.gz
cpython-ea21389dda401457198fb214aa2c981a45ed9528.tar.bz2
[3.7] bpo-37764: Fix infinite loop when parsing unstructured email headers. (GH-15239) (GH-15654)
…aders. (GH-15239) Fixes a case in which email._header_value_parser.get_unstructured hangs the system for some invalid headers. This covers the cases in which the header contains either: - a case without trailing whitespace - an invalid encoded word https://bugs.python.org/issue37764 This fix should also be backported to 3.7 and 3.8 https://bugs.python.org/issue37764 (cherry picked from commit c5b242f87f31286ad38991bc3868cf4cfbf2b681) Co-authored-by: Ashwin Ramaswami <aramaswamis@gmail.com> https://bugs.python.org/issue37764
Diffstat (limited to 'Lib/email/_header_value_parser.py')
-rw-r--r--Lib/email/_header_value_parser.py19
1 files changed, 16 insertions, 3 deletions
diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py
index 2306879..7cc9a46 100644
--- a/Lib/email/_header_value_parser.py
+++ b/Lib/email/_header_value_parser.py
@@ -933,6 +933,10 @@ class EWWhiteSpaceTerminal(WhiteSpaceTerminal):
return ''
+class _InvalidEwError(errors.HeaderParseError):
+ """Invalid encoded word found while parsing headers."""
+
+
# XXX these need to become classes and used as instances so
# that a program can't change them in a parse tree and screw
# up other parse trees. Maybe should have tests for that, too.
@@ -1037,7 +1041,10 @@ def get_encoded_word(value):
raise errors.HeaderParseError(
"expected encoded word but found {}".format(value))
remstr = ''.join(remainder)
- if len(remstr) > 1 and remstr[0] in hexdigits and remstr[1] in hexdigits:
+ if (len(remstr) > 1 and
+ remstr[0] in hexdigits and
+ remstr[1] in hexdigits and
+ tok.count('?') < 2):
# The ? after the CTE was followed by an encoded word escape (=XX).
rest, *remainder = remstr.split('?=', 1)
tok = tok + '?=' + rest
@@ -1049,7 +1056,7 @@ def get_encoded_word(value):
try:
text, charset, lang, defects = _ew.decode('=?' + tok + '?=')
except ValueError:
- raise errors.HeaderParseError(
+ raise _InvalidEwError(
"encoded word format invalid: '{}'".format(ew.cte))
ew.charset = charset
ew.lang = lang
@@ -1099,9 +1106,12 @@ def get_unstructured(value):
token, value = get_fws(value)
unstructured.append(token)
continue
+ valid_ew = True
if value.startswith('=?'):
try:
token, value = get_encoded_word(value)
+ except _InvalidEwError:
+ valid_ew = False
except errors.HeaderParseError:
# XXX: Need to figure out how to register defects when
# appropriate here.
@@ -1123,7 +1133,10 @@ def get_unstructured(value):
# Split in the middle of an atom if there is a rfc2047 encoded word
# which does not have WSP on both sides. The defect will be registered
# the next time through the loop.
- if rfc2047_matcher.search(tok):
+ # This needs to only be performed when the encoded word is valid;
+ # otherwise, performing it on an invalid encoded word can cause
+ # the parser to go in an infinite loop.
+ if valid_ew and rfc2047_matcher.search(tok):
tok, *remainder = value.partition('=?')
vtext = ValueTerminal(tok, 'vtext')
_validate_xtext(vtext)