diff options
author | Serhiy Storchaka <storchaka@gmail.com> | 2024-03-05 15:49:01 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-03-05 15:49:01 (GMT) |
commit | f97f25ef5dfcdfec0d9a359fd970abd139cf3428 (patch) | |
tree | 989a8a5b5c21dcff639abc58adadb9b512b73854 | |
parent | df594011089a83d151ac7000954665536f3461b5 (diff) | |
download | cpython-f97f25ef5dfcdfec0d9a359fd970abd139cf3428.zip cpython-f97f25ef5dfcdfec0d9a359fd970abd139cf3428.tar.gz cpython-f97f25ef5dfcdfec0d9a359fd970abd139cf3428.tar.bz2 |
gh-76511: Fix email.Message.as_string() for non-ASCII message with ASCII charset (GH-116125)
-rw-r--r-- | Lib/email/generator.py | 2 | ||||
-rw-r--r-- | Lib/email/message.py | 2 | ||||
-rw-r--r-- | Lib/test/test_email/test_email.py | 15 | ||||
-rw-r--r-- | Misc/NEWS.d/next/Library/2024-02-29-17-06-54.gh-issue-76511.WqjRLP.rst | 4 |
4 files changed, 21 insertions, 2 deletions
diff --git a/Lib/email/generator.py b/Lib/email/generator.py index 7ccbe10..c8056ad 100644 --- a/Lib/email/generator.py +++ b/Lib/email/generator.py @@ -243,7 +243,7 @@ class Generator: # existing message. msg = deepcopy(msg) del msg['content-transfer-encoding'] - msg.set_payload(payload, charset) + msg.set_payload(msg._payload, charset) payload = msg.get_payload() self._munge_cte = (msg['content-transfer-encoding'], msg['content-type']) diff --git a/Lib/email/message.py b/Lib/email/message.py index fe76958..a14cca5 100644 --- a/Lib/email/message.py +++ b/Lib/email/message.py @@ -340,7 +340,7 @@ class Message: return if not isinstance(charset, Charset): charset = Charset(charset) - payload = payload.encode(charset.output_charset) + payload = payload.encode(charset.output_charset, 'surrogateescape') if hasattr(payload, 'decode'): self._payload = payload.decode('ascii', 'surrogateescape') else: diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py index 39d4ace..d9af05c 100644 --- a/Lib/test/test_email/test_email.py +++ b/Lib/test/test_email/test_email.py @@ -337,6 +337,21 @@ class TestMessageAPI(TestEmailBase): msg = email.message_from_bytes(source) self.assertEqual(msg.as_string(), expected) + def test_nonascii_as_string_with_ascii_charset(self): + m = textwrap.dedent("""\ + MIME-Version: 1.0 + Content-type: text/plain; charset="us-ascii" + Content-Transfer-Encoding: 8bit + + Test if non-ascii messages with no Content-Transfer-Encoding set + can be as_string'd: + Föö bär + """) + source = m.encode('iso-8859-1') + expected = source.decode('ascii', 'replace') + msg = email.message_from_bytes(source) + self.assertEqual(msg.as_string(), expected) + def test_nonascii_as_string_without_content_type_and_cte(self): m = textwrap.dedent("""\ MIME-Version: 1.0 diff --git a/Misc/NEWS.d/next/Library/2024-02-29-17-06-54.gh-issue-76511.WqjRLP.rst b/Misc/NEWS.d/next/Library/2024-02-29-17-06-54.gh-issue-76511.WqjRLP.rst new file mode 100644 index 0000000..da62f8a --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-02-29-17-06-54.gh-issue-76511.WqjRLP.rst @@ -0,0 +1,4 @@ +Fix UnicodeEncodeError in :meth:`email.Message.as_string` that results when +a message that claims to be in the ascii character set actually has non-ascii +characters. Non-ascii characters are now replaced with the U+FFFD replacement +character, like in the ``replace`` error handler. |