summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMiss Islington (bot) <31488909+miss-islington@users.noreply.github.com>2020-05-18 00:57:42 (GMT)
committerGitHub <noreply@github.com>2020-05-18 00:57:42 (GMT)
commitc1f1ddf30a595c2bfa3c06e54fb03fa212cd28b5 (patch)
treeeaf147b7a9a36163917ece7bfbdefe50c9778d6c
parent7a3522d478d456b38ef5e647c21595904bea79df (diff)
downloadcpython-c1f1ddf30a595c2bfa3c06e54fb03fa212cd28b5.zip
cpython-c1f1ddf30a595c2bfa3c06e54fb03fa212cd28b5.tar.gz
cpython-c1f1ddf30a595c2bfa3c06e54fb03fa212cd28b5.tar.bz2
bpo-40597: email: Use CTE if lines are longer than max_line_length consistently (gh-20038) (gh-20084)
raw_data_manager (default for EmailPolicy, EmailMessage) does correct wrapping of 'text' parts as long as the message contains characters outside of 7bit US-ASCII set: base64 or qp Content-Transfer-Encoding is applied if the lines would be too long without it. It did not, however, do this for ascii-only text, which could result in lines that were longer than policy.max_line_length or even the rfc 998 maximum. This changeset fixes the heuristic so that if lines are longer than policy.max_line_length, it will always apply a content-transfer-encoding so that the lines are wrapped correctly. (cherry picked from commit 6f2f475d5a2cd7675dce844f3af436ba919ef92b) Co-authored-by: Arkadiusz Hiler <arek.l1@gmail.com>
-rw-r--r--Lib/email/contentmanager.py14
-rw-r--r--Lib/test/test_email/test_contentmanager.py15
-rw-r--r--Misc/NEWS.d/next/Library/2020-05-11-19-17-23.bpo-40597.4SGfgm.rst1
3 files changed, 23 insertions, 7 deletions
diff --git a/Lib/email/contentmanager.py b/Lib/email/contentmanager.py
index b904ded..2b4b875 100644
--- a/Lib/email/contentmanager.py
+++ b/Lib/email/contentmanager.py
@@ -146,13 +146,13 @@ def _encode_text(string, charset, cte, policy):
def normal_body(lines): return b'\n'.join(lines) + b'\n'
if cte==None:
# Use heuristics to decide on the "best" encoding.
- try:
- return '7bit', normal_body(lines).decode('ascii')
- except UnicodeDecodeError:
- pass
- if (policy.cte_type == '8bit' and
- max(len(x) for x in lines) <= policy.max_line_length):
- return '8bit', normal_body(lines).decode('ascii', 'surrogateescape')
+ if max(len(x) for x in lines) <= policy.max_line_length:
+ try:
+ return '7bit', normal_body(lines).decode('ascii')
+ except UnicodeDecodeError:
+ pass
+ if policy.cte_type == '8bit':
+ return '8bit', normal_body(lines).decode('ascii', 'surrogateescape')
sniff = embedded_body(lines[:10])
sniff_qp = quoprimime.body_encode(sniff.decode('latin-1'),
policy.max_line_length)
diff --git a/Lib/test/test_email/test_contentmanager.py b/Lib/test/test_email/test_contentmanager.py
index 169058e..64dca2d 100644
--- a/Lib/test/test_email/test_contentmanager.py
+++ b/Lib/test/test_email/test_contentmanager.py
@@ -329,6 +329,21 @@ class TestRawDataManager(TestEmailBase):
self.assertEqual(m.get_payload(decode=True).decode('utf-8'), content)
self.assertEqual(m.get_content(), content)
+ def test_set_text_plain_long_line_heuristics(self):
+ m = self._make_message()
+ content = ("Simple but long message that is over 78 characters"
+ " long to force transfer encoding.\n")
+ raw_data_manager.set_content(m, content)
+ self.assertEqual(str(m), textwrap.dedent("""\
+ Content-Type: text/plain; charset="utf-8"
+ Content-Transfer-Encoding: quoted-printable
+
+ Simple but long message that is over 78 characters long to =
+ force transfer encoding.
+ """))
+ self.assertEqual(m.get_payload(decode=True).decode('utf-8'), content)
+ self.assertEqual(m.get_content(), content)
+
def test_set_text_short_line_minimal_non_ascii_heuristics(self):
m = self._make_message()
content = "et là il est monté sur moi et il commence à m'éto.\n"
diff --git a/Misc/NEWS.d/next/Library/2020-05-11-19-17-23.bpo-40597.4SGfgm.rst b/Misc/NEWS.d/next/Library/2020-05-11-19-17-23.bpo-40597.4SGfgm.rst
new file mode 100644
index 0000000..1b9fe60
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2020-05-11-19-17-23.bpo-40597.4SGfgm.rst
@@ -0,0 +1 @@
+If text content lines are longer than policy.max_line_length, always use a content-encoding to make sure they are wrapped.