bpo-33529, email: Fix infinite loop in email header encoding (GH-12020)

author: Krzysztof Wojcik <wojcikk2903@users.noreply.github.com> 2019-05-14 16:55:23 (GMT)
committer: Victor Stinner <vstinner@redhat.com> 2019-05-14 16:55:23 (GMT)
commit: c1f5667be1e3ec5871560c677402c1252c6018a6 (patch)
tree: 199ab3f8a68ef855b41a086ce280faff1ca817bf
parent: 4d45a3b1107977baba9dce868e80d1d95bce4085 (diff)
download: cpython-c1f5667be1e3ec5871560c677402c1252c6018a6.zip
cpython-c1f5667be1e3ec5871560c677402c1252c6018a6.tar.gz
cpython-c1f5667be1e3ec5871560c677402c1252c6018a6.tar.bz2
4 files changed, 27 insertions, 14 deletions
diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py
index bb26d5a..60d0d32 100644
--- a/Lib/email/_header_value_parser.py
+++ b/Lib/email/_header_value_parser.py
@@ -2723,16 +2723,19 @@ def _fold_as_ew(to_encode, lines, maxlen, last_ew, ew_combine_allowed, charset):
             lines.append(' ')
             # XXX We'll get an infinite loop here if maxlen is <= 7
             continue
-        first_part = to_encode[:text_space]
-        ew = _ew.encode(first_part, charset=encode_as)
-        excess = len(ew) - remaining_space
-        if excess > 0:
-            # encode always chooses the shortest encoding, so this
-            # is guaranteed to fit at this point.
-            first_part = first_part[:-excess]
-            ew = _ew.encode(first_part)
-        lines[-1] += ew
-        to_encode = to_encode[len(first_part):]
+
+        to_encode_word = to_encode[:text_space]
+        encoded_word = _ew.encode(to_encode_word, charset=encode_as)
+        excess = len(encoded_word) - remaining_space
+        while excess > 0:
+            # Since the chunk to encode is guaranteed to fit into less than 100 characters,
+            # shrinking it by one at a time shouldn't take long.
+            to_encode_word = to_encode_word[:-1]
+            encoded_word = _ew.encode(to_encode_word, charset=encode_as)
+            excess = len(encoded_word) - remaining_space
+        lines[-1] += encoded_word
+        to_encode = to_encode[len(to_encode_word):]
+
         if to_encode:
             lines.append(' ')
             new_last_ew = len(lines[-1])
diff --git a/Lib/test/test_email/test_headerregistry.py b/Lib/test/test_email/test_headerregistry.py
index 30ce0ba..d100709 100644
--- a/Lib/test/test_email/test_headerregistry.py
+++ b/Lib/test/test_email/test_headerregistry.py
@@ -1643,10 +1643,10 @@ class TestFolding(TestHeaderBase):
         self.assertEqual(
             h.fold(policy=policy.default),
             'X-Report-Abuse: =?utf-8?q?=3Chttps=3A//www=2Emailitapp=2E'
-                'com/report=5F?=\n'
-            ' =?utf-8?q?abuse=2Ephp=3Fmid=3Dxxx-xxx-xxxx'
-                'xxxxxxxxxxxxxxxxxxxx=3D=3D-xxx-?=\n'
-            ' =?utf-8?q?xx-xx=3E?=\n')
+                'com/report=5Fabuse?=\n'
+            ' =?utf-8?q?=2Ephp=3Fmid=3Dxxx-xxx-xxxx'
+                'xxxxxxxxxxxxxxxxxxxx=3D=3D-xxx-xx-xx?=\n'
+            ' =?utf-8?q?=3E?=\n')
 
 
 if __name__ == '__main__':
diff --git a/Lib/test/test_email/test_policy.py b/Lib/test/test_email/test_policy.py
index 8fecb8a..c2c437e 100644
--- a/Lib/test/test_email/test_policy.py
+++ b/Lib/test/test_email/test_policy.py
@@ -237,6 +237,14 @@ class PolicyAPITests(unittest.TestCase):
                          email.policy.EmailPolicy.header_factory)
         self.assertEqual(newpolicy.__dict__, {'raise_on_defect': True})
 
+    def test_non_ascii_chars_do_not_cause_inf_loop(self):
+        policy = email.policy.default.clone(max_line_length=20)
+        actual = policy.fold('Subject', 'ą' * 12)
+        self.assertEqual(
+            actual,
+            'Subject: \n' +
+            12 * ' =?utf-8?q?=C4=85?=\n')
+
     # XXX: Need subclassing tests.
     # For adding subclassed objects, make sure the usual rules apply (subclass
     # wins), but that the order still works (right overrides left).
diff --git a/Misc/NEWS.d/next/Security/2019-02-24-18-48-16.bpo-33529.wpNNBD.rst b/Misc/NEWS.d/next/Security/2019-02-24-18-48-16.bpo-33529.wpNNBD.rst
new file mode 100644
index 0000000..84d16f5
--- /dev/null
+++ b/Misc/NEWS.d/next/Security/2019-02-24-18-48-16.bpo-33529.wpNNBD.rst
@@ -0,0 +1,2 @@
+Prevent fold function used in email header encoding from entering infinite
+loop when there are too many non-ASCII characters in a header.
author	Krzysztof Wojcik <wojcikk2903@users.noreply.github.com>	2019-05-14 16:55:23 (GMT)
committer	Victor Stinner <vstinner@redhat.com>	2019-05-14 16:55:23 (GMT)
commit	c1f5667be1e3ec5871560c677402c1252c6018a6 (patch)
tree	199ab3f8a68ef855b41a086ce280faff1ca817bf
parent	4d45a3b1107977baba9dce868e80d1d95bce4085 (diff)
download	cpython-c1f5667be1e3ec5871560c677402c1252c6018a6.zip cpython-c1f5667be1e3ec5871560c677402c1252c6018a6.tar.gz cpython-c1f5667be1e3ec5871560c677402c1252c6018a6.tar.bz2