diff options
author | websurfer5 <49998481+websurfer5@users.noreply.github.com> | 2019-06-06 19:53:27 (GMT) |
---|---|---|
committer | Barry Warsaw <barry@python.org> | 2019-06-06 19:53:27 (GMT) |
commit | f6713e84afc5addcfa8477dbdf2c027787f711c0 (patch) | |
tree | 0102e1ffc13da4bf9c0b0acb39c6729cfae8a377 | |
parent | e0c0c7e8c9f8153a54b92e43aa3d09e69a9fd0c0 (diff) | |
download | cpython-f6713e84afc5addcfa8477dbdf2c027787f711c0.zip cpython-f6713e84afc5addcfa8477dbdf2c027787f711c0.tar.gz cpython-f6713e84afc5addcfa8477dbdf2c027787f711c0.tar.bz2 |
bpo-36520: Email header folded incorrectly (#13608)
* bpo-36520: reset the encoded word offset when starting a new
line during an email header folding operation
* 📜🤖 Added by blurb_it.
* bpo-36520: add an additional test case, and provide descriptive
comments for the test_folding_with_utf8_encoding_* tests
* bpo-36520: fix whitespace issue
* bpo-36520: changes per reviewer request -- remove extraneous
backslashes; add whitespace between terminating quotes and
line-continuation backslashes; use "bpo-" instead of
"issue #" in comments
-rw-r--r-- | Lib/email/_header_value_parser.py | 1 | ||||
-rw-r--r-- | Lib/test/test_email/test_message.py | 131 | ||||
-rw-r--r-- | Misc/NEWS.d/next/Library/2019-05-28-02-37-00.bpo-36520.W4tday.rst | 1 |
3 files changed, 133 insertions, 0 deletions
diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py index 35d746a..bb5ff8d 100644 --- a/Lib/email/_header_value_parser.py +++ b/Lib/email/_header_value_parser.py @@ -2789,6 +2789,7 @@ def _refold_parse_tree(parse_tree, *, policy): newline = _steal_trailing_WSP_if_exists(lines) if newline or part.startswith_fws(): lines.append(newline + tstr) + last_ew = None continue if not hasattr(part, 'encode'): # It's not a terminal, try folding the subparts. diff --git a/Lib/test/test_email/test_message.py b/Lib/test/test_email/test_message.py index f3a57df..5dc46e1 100644 --- a/Lib/test/test_email/test_message.py +++ b/Lib/test/test_email/test_message.py @@ -784,6 +784,137 @@ class TestEmailMessage(TestEmailMessageBase, TestEmailBase): m['Subject'] = 'unicöde' self.assertEqual(str(m), 'Subject: unicöde\n\n') + def test_folding_with_utf8_encoding_1(self): + # bpo-36520 + # + # Fold a line that contains UTF-8 words before + # and after the whitespace fold point, where the + # line length limit is reached within an ASCII + # word. + + m = EmailMessage() + m['Subject'] = 'Hello Wörld! Hello Wörld! ' \ + 'Hello Wörld! Hello Wörld!Hello Wörld!' + self.assertEqual(bytes(m), + b'Subject: Hello =?utf-8?q?W=C3=B6rld!_Hello_W' + b'=C3=B6rld!_Hello_W=C3=B6rld!?=\n' + b' Hello =?utf-8?q?W=C3=B6rld!Hello_W=C3=B6rld!?=\n\n') + + + def test_folding_with_utf8_encoding_2(self): + # bpo-36520 + # + # Fold a line that contains UTF-8 words before + # and after the whitespace fold point, where the + # line length limit is reached at the end of an + # encoded word. + + m = EmailMessage() + m['Subject'] = 'Hello Wörld! Hello Wörld! ' \ + 'Hello Wörlds123! Hello Wörld!Hello Wörld!' + self.assertEqual(bytes(m), + b'Subject: Hello =?utf-8?q?W=C3=B6rld!_Hello_W' + b'=C3=B6rld!_Hello_W=C3=B6rlds123!?=\n' + b' Hello =?utf-8?q?W=C3=B6rld!Hello_W=C3=B6rld!?=\n\n') + + def test_folding_with_utf8_encoding_3(self): + # bpo-36520 + # + # Fold a line that contains UTF-8 words before + # and after the whitespace fold point, where the + # line length limit is reached at the end of the + # first word. + + m = EmailMessage() + m['Subject'] = 'Hello-Wörld!-Hello-Wörld!-Hello-Wörlds123! ' \ + 'Hello Wörld!Hello Wörld!' + self.assertEqual(bytes(m), \ + b'Subject: =?utf-8?q?Hello-W=C3=B6rld!-Hello-W' + b'=C3=B6rld!-Hello-W=C3=B6rlds123!?=\n' + b' Hello =?utf-8?q?W=C3=B6rld!Hello_W=C3=B6rld!?=\n\n') + + def test_folding_with_utf8_encoding_4(self): + # bpo-36520 + # + # Fold a line that contains UTF-8 words before + # and after the fold point, where the first + # word is UTF-8 and the fold point is within + # the word. + + m = EmailMessage() + m['Subject'] = 'Hello-Wörld!-Hello-Wörld!-Hello-Wörlds123!-Hello' \ + ' Wörld!Hello Wörld!' + self.assertEqual(bytes(m), + b'Subject: =?utf-8?q?Hello-W=C3=B6rld!-Hello-W' + b'=C3=B6rld!-Hello-W=C3=B6rlds123!?=\n' + b' =?utf-8?q?-Hello_W=C3=B6rld!Hello_W=C3=B6rld!?=\n\n') + + def test_folding_with_utf8_encoding_5(self): + # bpo-36520 + # + # Fold a line that contains a UTF-8 word after + # the fold point. + + m = EmailMessage() + m['Subject'] = '123456789 123456789 123456789 123456789 123456789' \ + ' 123456789 123456789 Hello Wörld!' + self.assertEqual(bytes(m), + b'Subject: 123456789 123456789 123456789 123456789' + b' 123456789 123456789 123456789\n' + b' Hello =?utf-8?q?W=C3=B6rld!?=\n\n') + + def test_folding_with_utf8_encoding_6(self): + # bpo-36520 + # + # Fold a line that contains a UTF-8 word before + # the fold point and ASCII words after + + m = EmailMessage() + m['Subject'] = '123456789 123456789 123456789 123456789 Hello Wörld!' \ + ' 123456789 123456789 123456789 123456789 123456789' \ + ' 123456789' + self.assertEqual(bytes(m), + b'Subject: 123456789 123456789 123456789 123456789' + b' Hello =?utf-8?q?W=C3=B6rld!?=\n 123456789 ' + b'123456789 123456789 123456789 123456789 ' + b'123456789\n\n') + + def test_folding_with_utf8_encoding_7(self): + # bpo-36520 + # + # Fold a line twice that contains UTF-8 words before + # and after the first fold point, and ASCII words + # after the second fold point. + + m = EmailMessage() + m['Subject'] = '123456789 123456789 Hello Wörld! Hello Wörld! ' \ + '123456789-123456789 123456789 Hello Wörld! 123456789' \ + ' 123456789' + self.assertEqual(bytes(m), + b'Subject: 123456789 123456789 Hello =?utf-8?q?' + b'W=C3=B6rld!_Hello_W=C3=B6rld!?=\n' + b' 123456789-123456789 123456789 Hello ' + b'=?utf-8?q?W=C3=B6rld!?= 123456789\n 123456789\n\n') + + def test_folding_with_utf8_encoding_8(self): + # bpo-36520 + # + # Fold a line twice that contains UTF-8 words before + # the first fold point, and ASCII words after the + # first fold point, and UTF-8 words after the second + # fold point. + + m = EmailMessage() + m['Subject'] = '123456789 123456789 Hello Wörld! Hello Wörld! ' \ + '123456789 123456789 123456789 123456789 123456789 ' \ + '123456789-123456789 123456789 Hello Wörld! 123456789' \ + ' 123456789' + self.assertEqual(bytes(m), + b'Subject: 123456789 123456789 Hello ' + b'=?utf-8?q?W=C3=B6rld!_Hello_W=C3=B6rld!?=\n 123456789 ' + b'123456789 123456789 123456789 123456789 ' + b'123456789-123456789\n 123456789 Hello ' + b'=?utf-8?q?W=C3=B6rld!?= 123456789 123456789\n\n') class TestMIMEPart(TestEmailMessageBase, TestEmailBase): # Doing the full test run here may seem a bit redundant, since the two diff --git a/Misc/NEWS.d/next/Library/2019-05-28-02-37-00.bpo-36520.W4tday.rst b/Misc/NEWS.d/next/Library/2019-05-28-02-37-00.bpo-36520.W4tday.rst new file mode 100644 index 0000000..8171bfe --- /dev/null +++ b/Misc/NEWS.d/next/Library/2019-05-28-02-37-00.bpo-36520.W4tday.rst @@ -0,0 +1 @@ +Lengthy email headers with UTF-8 characters are now properly encoded when they are folded. Patch by Jeffrey Kintscher. |