summaryrefslogtreecommitdiffstats
path: root/Lib
diff options
context:
space:
mode:
authorPaul Ganssle <pganssle@users.noreply.github.com>2019-07-16 17:50:01 (GMT)
committerBarry Warsaw <barry@python.org>2019-07-16 17:50:01 (GMT)
commitf69d5c61981ea97d251db515c7ff280fcc17182d (patch)
tree528eba8e080d37ec15085068f36fefd75ecbcb3a /Lib
parent1d8b04edfdc3030e645730492bfcc27b75718b96 (diff)
downloadcpython-f69d5c61981ea97d251db515c7ff280fcc17182d.zip
cpython-f69d5c61981ea97d251db515c7ff280fcc17182d.tar.gz
cpython-f69d5c61981ea97d251db515c7ff280fcc17182d.tar.bz2
Fix infinite loop in email folding logic (GH-12732)
As far as I can tell, this infinite loop would be triggered if: 1. The value being folded contains a single word (no spaces) longer than max_line_length 2. The max_line_length is shorter than the encoding's name + 9 characters. bpo-36564: https://bugs.python.org/issue36564
Diffstat (limited to 'Lib')
-rw-r--r--Lib/email/_header_value_parser.py17
-rw-r--r--Lib/email/parser.py1
-rw-r--r--Lib/test/test_email/test_policy.py20
3 files changed, 32 insertions, 6 deletions
diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py
index aefc457..37dc764 100644
--- a/Lib/email/_header_value_parser.py
+++ b/Lib/email/_header_value_parser.py
@@ -2846,15 +2846,22 @@ def _fold_as_ew(to_encode, lines, maxlen, last_ew, ew_combine_allowed, charset):
trailing_wsp = to_encode[-1]
to_encode = to_encode[:-1]
new_last_ew = len(lines[-1]) if last_ew is None else last_ew
+
+ encode_as = 'utf-8' if charset == 'us-ascii' else charset
+
+ # The RFC2047 chrome takes up 7 characters plus the length
+ # of the charset name.
+ chrome_len = len(encode_as) + 7
+
+ if (chrome_len + 1) >= maxlen:
+ raise errors.HeaderParseError(
+ "max_line_length is too small to fit an encoded word")
+
while to_encode:
remaining_space = maxlen - len(lines[-1])
- # The RFC2047 chrome takes up 7 characters plus the length
- # of the charset name.
- encode_as = 'utf-8' if charset == 'us-ascii' else charset
- text_space = remaining_space - len(encode_as) - 7
+ text_space = remaining_space - chrome_len
if text_space <= 0:
lines.append(' ')
- # XXX We'll get an infinite loop here if maxlen is <= 7
continue
to_encode_word = to_encode[:text_space]
diff --git a/Lib/email/parser.py b/Lib/email/parser.py
index 555b172..7db4da1 100644
--- a/Lib/email/parser.py
+++ b/Lib/email/parser.py
@@ -13,7 +13,6 @@ from email.feedparser import FeedParser, BytesFeedParser
from email._policybase import compat32
-
class Parser:
def __init__(self, _class=None, *, policy=compat32):
"""Parser of RFC 2822 and MIME email messages.
diff --git a/Lib/test/test_email/test_policy.py b/Lib/test/test_email/test_policy.py
index 1e39aa0..e87c275 100644
--- a/Lib/test/test_email/test_policy.py
+++ b/Lib/test/test_email/test_policy.py
@@ -2,6 +2,7 @@ import io
import types
import textwrap
import unittest
+import email.errors
import email.policy
import email.parser
import email.generator
@@ -257,6 +258,25 @@ class PolicyAPITests(unittest.TestCase):
'Subject: \n' +
12 * ' =?utf-8?q?=C4=85?=\n')
+ def test_short_maxlen_error(self):
+ # RFC 2047 chrome takes up 7 characters, plus the length of the charset
+ # name, so folding should fail if maxlen is lower than the minimum
+ # required length for a line.
+
+ # Note: This is only triggered when there is a single word longer than
+ # max_line_length, hence the 1234567890 at the end of this whimsical
+ # subject. This is because when we encounter a word longer than
+ # max_line_length, it is broken down into encoded words to fit
+ # max_line_length. If the max_line_length isn't large enough to even
+ # contain the RFC 2047 chrome (`?=<charset>?q??=`), we fail.
+ subject = "Melt away the pounds with this one simple trick! 1234567890"
+
+ for maxlen in [3, 7, 9]:
+ with self.subTest(maxlen=maxlen):
+ policy = email.policy.default.clone(max_line_length=maxlen)
+ with self.assertRaises(email.errors.HeaderParseError):
+ policy.fold("Subject", subject)
+
# XXX: Need subclassing tests.
# For adding subclassed objects, make sure the usual rules apply (subclass
# wins), but that the order still works (right overrides left).