[3.12] gh-92081: Fix for email.generator.Generator with whitespace between encoded words. (GH-92281) (#119246)

* Fix for email.generator.Generator with whitespace between encoded words. email.generator.Generator currently does not handle whitespace between encoded words correctly when the encoded words span multiple lines. The current generator will create an encoded word for each line. If the end of the line happens to correspond with the end real word in the plaintext, the generator will place an unencoded space at the start of the subsequent lines to represent the whitespace between the plaintext words. A compliant decoder will strip all the whitespace from between two encoded words which leads to missing spaces in the round-tripped output. The fix for this is to make sure that whitespace between two encoded words ends up inside of one or the other of the encoded words. This fix places the space inside of the second encoded word. A second problem happens with continuation lines. A continuation line that starts with whitespace and is followed by a non-encoded word is fine because the newline between such continuation lines is defined as condensing to a single space character. When the continuation line starts with whitespace followed by an encoded word, however, the RFCs specify that the word is run together with the encoded word on the previous line. This is because normal words are filded on syntactic breaks by encoded words are not. The solution to this is to add the whitespace to the start of the encoded word on the continuation line. Test cases are from GH-92081 * Rename a variable so it's not confused with the final variable. (cherry picked from commit a6fdb31b6714c9f3c65fefbb3fe388b2b139a75f) Co-authored-by: Toshio Kuratomi <a.badger@gmail.com>
author: Miss Islington (bot) <31488909+miss-islington@users.noreply.github.com> 2024-05-20 20:10:49 (GMT)
committer: GitHub <noreply@github.com> 2024-05-20 20:10:49 (GMT)
commit: ffe9ba04778f852a14f2404b5fcf13cb3ba1bf45 (patch)
tree: 7ae25a6cf5f967e43f7bea8ae495abe179537fe1 /Lib/test
parent: 386e49259c517205045b98459bf8fdd3ef098e17 (diff)
download: cpython-ffe9ba04778f852a14f2404b5fcf13cb3ba1bf45.zip
cpython-ffe9ba04778f852a14f2404b5fcf13cb3ba1bf45.tar.gz
cpython-ffe9ba04778f852a14f2404b5fcf13cb3ba1bf45.tar.bz2
2 files changed, 37 insertions, 1 deletions
diff --git a/Lib/test/test_email/test_generator.py b/Lib/test/test_email/test_generator.py
index 3ebcb68..bfff105 100644
--- a/Lib/test/test_email/test_generator.py
+++ b/Lib/test/test_email/test_generator.py
@@ -281,6 +281,41 @@ class TestBytesGenerator(TestGeneratorBase, TestEmailBase):
     ioclass = io.BytesIO
     typ = lambda self, x: x.encode('ascii')
 
+    def test_defaults_handle_spaces_between_encoded_words_when_folded(self):
+        source = ("Уведомление о принятии в работу обращения для"
+                  " подключения услуги")
+        expected = ('Subject: =?utf-8?b?0KPQstC10LTQvtC80LvQtdC90LjQtSDQviDQv9GA0LjQvdGP0YLQuNC4?=\n'
+                    ' =?utf-8?b?INCyINGA0LDQsdC+0YLRgyDQvtCx0YDQsNGJ0LXQvdC40Y8g0LTQu9GPINC/0L4=?=\n'
+                    ' =?utf-8?b?0LTQutC70Y7Rh9C10L3QuNGPINGD0YHQu9GD0LPQuA==?=\n\n').encode('ascii')
+        msg = EmailMessage()
+        msg['Subject'] = source
+        s = io.BytesIO()
+        g = BytesGenerator(s)
+        g.flatten(msg)
+        self.assertEqual(s.getvalue(), expected)
+
+    def test_defaults_handle_spaces_at_start_of_subject(self):
+        source = " Уведомление"
+        expected = b"Subject:  =?utf-8?b?0KPQstC10LTQvtC80LvQtdC90LjQtQ==?=\n\n"
+        msg = EmailMessage()
+        msg['Subject'] = source
+        s = io.BytesIO()
+        g = BytesGenerator(s)
+        g.flatten(msg)
+        self.assertEqual(s.getvalue(), expected)
+
+    def test_defaults_handle_spaces_at_start_of_continuation_line(self):
+        source = " ф ффффффффффффффффффф ф ф"
+        expected = (b"Subject:  "
+                    b"=?utf-8?b?0YQg0YTRhNGE0YTRhNGE0YTRhNGE0YTRhNGE0YTRhNGE0YTRhNGE0YQ=?=\n"
+                    b" =?utf-8?b?INGEINGE?=\n\n")
+        msg = EmailMessage()
+        msg['Subject'] = source
+        s = io.BytesIO()
+        g = BytesGenerator(s)
+        g.flatten(msg)
+        self.assertEqual(s.getvalue(), expected)
+
     def test_cte_type_7bit_handles_unknown_8bit(self):
         source = ("Subject: Maintenant je vous présente mon "
                  "collègue\n\n").encode('utf-8')
diff --git a/Lib/test/test_email/test_headerregistry.py b/Lib/test/test_email/test_headerregistry.py
index bb7ca8d..5a608a0 100644
--- a/Lib/test/test_email/test_headerregistry.py
+++ b/Lib/test/test_email/test_headerregistry.py
@@ -7,6 +7,7 @@ from email.message import Message
 from test.test_email import TestEmailBase, parameterize
 from email import headerregistry
 from email.headerregistry import Address, Group
+from email.header import decode_header
 from test.support import ALWAYS_EQ
 
 
@@ -1648,7 +1649,7 @@ class TestFolding(TestHeaderBase):
                     'Lôrem ipsum dôlôr sit amet, cônsectetuer adipiscing. '
                     'Suspendisse pôtenti. Aliquam nibh. Suspendisse pôtenti.',
                     '=?utf-8?q?L=C3=B4rem_ipsum_d=C3=B4l=C3=B4r_sit_amet=2C_c'
-                    '=C3=B4nsectetuer?=\n =?utf-8?q?adipiscing=2E_Suspendisse'
+                    '=C3=B4nsectetuer?=\n =?utf-8?q?_adipiscing=2E_Suspendisse'
                     '_p=C3=B4tenti=2E_Aliquam_nibh=2E?=\n Suspendisse =?utf-8'
                     '?q?p=C3=B4tenti=2E?=',
                     ),
author	Miss Islington (bot) <31488909+miss-islington@users.noreply.github.com>	2024-05-20 20:10:49 (GMT)
committer	GitHub <noreply@github.com>	2024-05-20 20:10:49 (GMT)
commit	ffe9ba04778f852a14f2404b5fcf13cb3ba1bf45 (patch)
tree	7ae25a6cf5f967e43f7bea8ae495abe179537fe1 /Lib/test
parent	386e49259c517205045b98459bf8fdd3ef098e17 (diff)
download	cpython-ffe9ba04778f852a14f2404b5fcf13cb3ba1bf45.zip cpython-ffe9ba04778f852a14f2404b5fcf13cb3ba1bf45.tar.gz cpython-ffe9ba04778f852a14f2404b5fcf13cb3ba1bf45.tar.bz2