summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSerhiy Storchaka <storchaka@gmail.com>2024-03-05 15:49:01 (GMT)
committerGitHub <noreply@github.com>2024-03-05 15:49:01 (GMT)
commitf97f25ef5dfcdfec0d9a359fd970abd139cf3428 (patch)
tree989a8a5b5c21dcff639abc58adadb9b512b73854
parentdf594011089a83d151ac7000954665536f3461b5 (diff)
downloadcpython-f97f25ef5dfcdfec0d9a359fd970abd139cf3428.zip
cpython-f97f25ef5dfcdfec0d9a359fd970abd139cf3428.tar.gz
cpython-f97f25ef5dfcdfec0d9a359fd970abd139cf3428.tar.bz2
gh-76511: Fix email.Message.as_string() for non-ASCII message with ASCII charset (GH-116125)
-rw-r--r--Lib/email/generator.py2
-rw-r--r--Lib/email/message.py2
-rw-r--r--Lib/test/test_email/test_email.py15
-rw-r--r--Misc/NEWS.d/next/Library/2024-02-29-17-06-54.gh-issue-76511.WqjRLP.rst4
4 files changed, 21 insertions, 2 deletions
diff --git a/Lib/email/generator.py b/Lib/email/generator.py
index 7ccbe10..c8056ad 100644
--- a/Lib/email/generator.py
+++ b/Lib/email/generator.py
@@ -243,7 +243,7 @@ class Generator:
# existing message.
msg = deepcopy(msg)
del msg['content-transfer-encoding']
- msg.set_payload(payload, charset)
+ msg.set_payload(msg._payload, charset)
payload = msg.get_payload()
self._munge_cte = (msg['content-transfer-encoding'],
msg['content-type'])
diff --git a/Lib/email/message.py b/Lib/email/message.py
index fe76958..a14cca5 100644
--- a/Lib/email/message.py
+++ b/Lib/email/message.py
@@ -340,7 +340,7 @@ class Message:
return
if not isinstance(charset, Charset):
charset = Charset(charset)
- payload = payload.encode(charset.output_charset)
+ payload = payload.encode(charset.output_charset, 'surrogateescape')
if hasattr(payload, 'decode'):
self._payload = payload.decode('ascii', 'surrogateescape')
else:
diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py
index 39d4ace..d9af05c 100644
--- a/Lib/test/test_email/test_email.py
+++ b/Lib/test/test_email/test_email.py
@@ -337,6 +337,21 @@ class TestMessageAPI(TestEmailBase):
msg = email.message_from_bytes(source)
self.assertEqual(msg.as_string(), expected)
+ def test_nonascii_as_string_with_ascii_charset(self):
+ m = textwrap.dedent("""\
+ MIME-Version: 1.0
+ Content-type: text/plain; charset="us-ascii"
+ Content-Transfer-Encoding: 8bit
+
+ Test if non-ascii messages with no Content-Transfer-Encoding set
+ can be as_string'd:
+ Föö bär
+ """)
+ source = m.encode('iso-8859-1')
+ expected = source.decode('ascii', 'replace')
+ msg = email.message_from_bytes(source)
+ self.assertEqual(msg.as_string(), expected)
+
def test_nonascii_as_string_without_content_type_and_cte(self):
m = textwrap.dedent("""\
MIME-Version: 1.0
diff --git a/Misc/NEWS.d/next/Library/2024-02-29-17-06-54.gh-issue-76511.WqjRLP.rst b/Misc/NEWS.d/next/Library/2024-02-29-17-06-54.gh-issue-76511.WqjRLP.rst
new file mode 100644
index 0000000..da62f8a
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2024-02-29-17-06-54.gh-issue-76511.WqjRLP.rst
@@ -0,0 +1,4 @@
+Fix UnicodeEncodeError in :meth:`email.Message.as_string` that results when
+a message that claims to be in the ascii character set actually has non-ascii
+characters. Non-ascii characters are now replaced with the U+FFFD replacement
+character, like in the ``replace`` error handler.