summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorwebsurfer5 <49998481+websurfer5@users.noreply.github.com>2019-06-06 19:53:27 (GMT)
committerBarry Warsaw <barry@python.org>2019-06-06 19:53:27 (GMT)
commitf6713e84afc5addcfa8477dbdf2c027787f711c0 (patch)
tree0102e1ffc13da4bf9c0b0acb39c6729cfae8a377
parente0c0c7e8c9f8153a54b92e43aa3d09e69a9fd0c0 (diff)
downloadcpython-f6713e84afc5addcfa8477dbdf2c027787f711c0.zip
cpython-f6713e84afc5addcfa8477dbdf2c027787f711c0.tar.gz
cpython-f6713e84afc5addcfa8477dbdf2c027787f711c0.tar.bz2
bpo-36520: Email header folded incorrectly (#13608)
* bpo-36520: reset the encoded word offset when starting a new line during an email header folding operation * 📜🤖 Added by blurb_it. * bpo-36520: add an additional test case, and provide descriptive comments for the test_folding_with_utf8_encoding_* tests * bpo-36520: fix whitespace issue * bpo-36520: changes per reviewer request -- remove extraneous backslashes; add whitespace between terminating quotes and line-continuation backslashes; use "bpo-" instead of "issue #" in comments
-rw-r--r--Lib/email/_header_value_parser.py1
-rw-r--r--Lib/test/test_email/test_message.py131
-rw-r--r--Misc/NEWS.d/next/Library/2019-05-28-02-37-00.bpo-36520.W4tday.rst1
3 files changed, 133 insertions, 0 deletions
diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py
index 35d746a..bb5ff8d 100644
--- a/Lib/email/_header_value_parser.py
+++ b/Lib/email/_header_value_parser.py
@@ -2789,6 +2789,7 @@ def _refold_parse_tree(parse_tree, *, policy):
newline = _steal_trailing_WSP_if_exists(lines)
if newline or part.startswith_fws():
lines.append(newline + tstr)
+ last_ew = None
continue
if not hasattr(part, 'encode'):
# It's not a terminal, try folding the subparts.
diff --git a/Lib/test/test_email/test_message.py b/Lib/test/test_email/test_message.py
index f3a57df..5dc46e1 100644
--- a/Lib/test/test_email/test_message.py
+++ b/Lib/test/test_email/test_message.py
@@ -784,6 +784,137 @@ class TestEmailMessage(TestEmailMessageBase, TestEmailBase):
m['Subject'] = 'unicöde'
self.assertEqual(str(m), 'Subject: unicöde\n\n')
+ def test_folding_with_utf8_encoding_1(self):
+ # bpo-36520
+ #
+ # Fold a line that contains UTF-8 words before
+ # and after the whitespace fold point, where the
+ # line length limit is reached within an ASCII
+ # word.
+
+ m = EmailMessage()
+ m['Subject'] = 'Hello Wörld! Hello Wörld! ' \
+ 'Hello Wörld! Hello Wörld!Hello Wörld!'
+ self.assertEqual(bytes(m),
+ b'Subject: Hello =?utf-8?q?W=C3=B6rld!_Hello_W'
+ b'=C3=B6rld!_Hello_W=C3=B6rld!?=\n'
+ b' Hello =?utf-8?q?W=C3=B6rld!Hello_W=C3=B6rld!?=\n\n')
+
+
+ def test_folding_with_utf8_encoding_2(self):
+ # bpo-36520
+ #
+ # Fold a line that contains UTF-8 words before
+ # and after the whitespace fold point, where the
+ # line length limit is reached at the end of an
+ # encoded word.
+
+ m = EmailMessage()
+ m['Subject'] = 'Hello Wörld! Hello Wörld! ' \
+ 'Hello Wörlds123! Hello Wörld!Hello Wörld!'
+ self.assertEqual(bytes(m),
+ b'Subject: Hello =?utf-8?q?W=C3=B6rld!_Hello_W'
+ b'=C3=B6rld!_Hello_W=C3=B6rlds123!?=\n'
+ b' Hello =?utf-8?q?W=C3=B6rld!Hello_W=C3=B6rld!?=\n\n')
+
+ def test_folding_with_utf8_encoding_3(self):
+ # bpo-36520
+ #
+ # Fold a line that contains UTF-8 words before
+ # and after the whitespace fold point, where the
+ # line length limit is reached at the end of the
+ # first word.
+
+ m = EmailMessage()
+ m['Subject'] = 'Hello-Wörld!-Hello-Wörld!-Hello-Wörlds123! ' \
+ 'Hello Wörld!Hello Wörld!'
+ self.assertEqual(bytes(m), \
+ b'Subject: =?utf-8?q?Hello-W=C3=B6rld!-Hello-W'
+ b'=C3=B6rld!-Hello-W=C3=B6rlds123!?=\n'
+ b' Hello =?utf-8?q?W=C3=B6rld!Hello_W=C3=B6rld!?=\n\n')
+
+ def test_folding_with_utf8_encoding_4(self):
+ # bpo-36520
+ #
+ # Fold a line that contains UTF-8 words before
+ # and after the fold point, where the first
+ # word is UTF-8 and the fold point is within
+ # the word.
+
+ m = EmailMessage()
+ m['Subject'] = 'Hello-Wörld!-Hello-Wörld!-Hello-Wörlds123!-Hello' \
+ ' Wörld!Hello Wörld!'
+ self.assertEqual(bytes(m),
+ b'Subject: =?utf-8?q?Hello-W=C3=B6rld!-Hello-W'
+ b'=C3=B6rld!-Hello-W=C3=B6rlds123!?=\n'
+ b' =?utf-8?q?-Hello_W=C3=B6rld!Hello_W=C3=B6rld!?=\n\n')
+
+ def test_folding_with_utf8_encoding_5(self):
+ # bpo-36520
+ #
+ # Fold a line that contains a UTF-8 word after
+ # the fold point.
+
+ m = EmailMessage()
+ m['Subject'] = '123456789 123456789 123456789 123456789 123456789' \
+ ' 123456789 123456789 Hello Wörld!'
+ self.assertEqual(bytes(m),
+ b'Subject: 123456789 123456789 123456789 123456789'
+ b' 123456789 123456789 123456789\n'
+ b' Hello =?utf-8?q?W=C3=B6rld!?=\n\n')
+
+ def test_folding_with_utf8_encoding_6(self):
+ # bpo-36520
+ #
+ # Fold a line that contains a UTF-8 word before
+ # the fold point and ASCII words after
+
+ m = EmailMessage()
+ m['Subject'] = '123456789 123456789 123456789 123456789 Hello Wörld!' \
+ ' 123456789 123456789 123456789 123456789 123456789' \
+ ' 123456789'
+ self.assertEqual(bytes(m),
+ b'Subject: 123456789 123456789 123456789 123456789'
+ b' Hello =?utf-8?q?W=C3=B6rld!?=\n 123456789 '
+ b'123456789 123456789 123456789 123456789 '
+ b'123456789\n\n')
+
+ def test_folding_with_utf8_encoding_7(self):
+ # bpo-36520
+ #
+ # Fold a line twice that contains UTF-8 words before
+ # and after the first fold point, and ASCII words
+ # after the second fold point.
+
+ m = EmailMessage()
+ m['Subject'] = '123456789 123456789 Hello Wörld! Hello Wörld! ' \
+ '123456789-123456789 123456789 Hello Wörld! 123456789' \
+ ' 123456789'
+ self.assertEqual(bytes(m),
+ b'Subject: 123456789 123456789 Hello =?utf-8?q?'
+ b'W=C3=B6rld!_Hello_W=C3=B6rld!?=\n'
+ b' 123456789-123456789 123456789 Hello '
+ b'=?utf-8?q?W=C3=B6rld!?= 123456789\n 123456789\n\n')
+
+ def test_folding_with_utf8_encoding_8(self):
+ # bpo-36520
+ #
+ # Fold a line twice that contains UTF-8 words before
+ # the first fold point, and ASCII words after the
+ # first fold point, and UTF-8 words after the second
+ # fold point.
+
+ m = EmailMessage()
+ m['Subject'] = '123456789 123456789 Hello Wörld! Hello Wörld! ' \
+ '123456789 123456789 123456789 123456789 123456789 ' \
+ '123456789-123456789 123456789 Hello Wörld! 123456789' \
+ ' 123456789'
+ self.assertEqual(bytes(m),
+ b'Subject: 123456789 123456789 Hello '
+ b'=?utf-8?q?W=C3=B6rld!_Hello_W=C3=B6rld!?=\n 123456789 '
+ b'123456789 123456789 123456789 123456789 '
+ b'123456789-123456789\n 123456789 Hello '
+ b'=?utf-8?q?W=C3=B6rld!?= 123456789 123456789\n\n')
class TestMIMEPart(TestEmailMessageBase, TestEmailBase):
# Doing the full test run here may seem a bit redundant, since the two
diff --git a/Misc/NEWS.d/next/Library/2019-05-28-02-37-00.bpo-36520.W4tday.rst b/Misc/NEWS.d/next/Library/2019-05-28-02-37-00.bpo-36520.W4tday.rst
new file mode 100644
index 0000000..8171bfe
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2019-05-28-02-37-00.bpo-36520.W4tday.rst
@@ -0,0 +1 @@
+Lengthy email headers with UTF-8 characters are now properly encoded when they are folded. Patch by Jeffrey Kintscher.