summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorR David Murray <rdmurray@bitdance.com>2012-03-23 02:17:51 (GMT)
committerR David Murray <rdmurray@bitdance.com>2012-03-23 02:17:51 (GMT)
commit8680bcc5db7c0edc09b09387c145932da34cd889 (patch)
tree5901ccac2b8e03dd7904d2a65eec65570160599c
parent192195a4fcf3aec27a43169df901667b21ea763a (diff)
downloadcpython-8680bcc5db7c0edc09b09387c145932da34cd889.zip
cpython-8680bcc5db7c0edc09b09387c145932da34cd889.tar.gz
cpython-8680bcc5db7c0edc09b09387c145932da34cd889.tar.bz2
#14380: Have MIMEText defaults to utf-8 when passed non-ASCII unicode
Previously it would just accept the unicode, which would wind up as unicode in the transfer-encoded message object, which is just wrong. Patch by Jeff Knupp.
-rw-r--r--Doc/library/email.mime.rst6
-rw-r--r--Lib/email/mime/text.py10
-rw-r--r--Lib/test/test_email/test_email.py15
-rw-r--r--Misc/ACKS1
-rw-r--r--Misc/NEWS3
5 files changed, 31 insertions, 4 deletions
diff --git a/Doc/library/email.mime.rst b/Doc/library/email.mime.rst
index 9cd94d8..f3f34c8 100644
--- a/Doc/library/email.mime.rst
+++ b/Doc/library/email.mime.rst
@@ -175,7 +175,7 @@ Here are the classes:
.. currentmodule:: email.mime.text
-.. class:: MIMEText(_text, _subtype='plain', _charset='us-ascii')
+.. class:: MIMEText(_text, _subtype='plain', _charset=None)
Module: :mod:`email.mime.text`
@@ -185,5 +185,5 @@ Here are the classes:
minor type and defaults to :mimetype:`plain`. *_charset* is the character
set of the text and is passed as a parameter to the
:class:`~email.mime.nonmultipart.MIMENonMultipart` constructor; it defaults
- to ``us-ascii``. No guessing or encoding is performed on the text data.
-
+ to ``us-ascii`` if the string contains only ``ascii`` codepoints, and
+ ``utf-8`` otherwise.
diff --git a/Lib/email/mime/text.py b/Lib/email/mime/text.py
index 5747db5..5a5e39b 100644
--- a/Lib/email/mime/text.py
+++ b/Lib/email/mime/text.py
@@ -27,4 +27,14 @@ class MIMEText(MIMENonMultipart):
"""
MIMENonMultipart.__init__(self, 'text', _subtype,
**{'charset': _charset})
+
+ # If _charset was defualted, check to see see if there are non-ascii
+ # characters present. Default to utf-8 if there are.
+ # XXX: This can be removed once #7304 is fixed.
+ if _charset =='us-ascii':
+ try:
+ _text.encode(_charset)
+ except UnicodeEncodeError:
+ _charset = 'utf-8'
+
self.set_payload(_text, _charset)
diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py
index e771c20..86aa60c 100644
--- a/Lib/test/test_email/test_email.py
+++ b/Lib/test/test_email/test_email.py
@@ -617,6 +617,19 @@ class TestMessageAPI(TestEmailBase):
abc
"""))
+ def test_unicode_body_defaults_to_utf8_encoding(self):
+ # Issue 14291
+ m = MIMEText('É testabc\n')
+ self.assertEqual(str(m),textwrap.dedent("""\
+ MIME-Version: 1.0
+ Content-Type: text/plain; charset="utf-8"
+ Content-Transfer-Encoding: base64
+
+ w4kgdGVzdGFiYwo=
+ """))
+
+
+
# Test the email.encoders module
class TestEncoders(unittest.TestCase):
@@ -642,7 +655,7 @@ class TestEncoders(unittest.TestCase):
eq(msg['content-transfer-encoding'], '7bit')
# Similar, but with 8bit data
msg = MIMEText('hello \xf8 world')
- eq(msg['content-transfer-encoding'], '8bit')
+ eq(msg['content-transfer-encoding'], 'base64')
# And now with a different charset
msg = MIMEText('hello \xf8 world', _charset='iso-8859-1')
eq(msg['content-transfer-encoding'], 'quoted-printable')
diff --git a/Misc/ACKS b/Misc/ACKS
index 9ed148e..4e6597d 100644
--- a/Misc/ACKS
+++ b/Misc/ACKS
@@ -548,6 +548,7 @@ Thomas Kluyver
Kim Knapp
Lenny Kneler
Pat Knight
+Jeff Knupp
Greg Kochanski
Damon Kohler
Marko Kohtala
diff --git a/Misc/NEWS b/Misc/NEWS
index 02bafcd..b6d7dad 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -34,6 +34,9 @@ Core and Builtins
Library
-------
+- Issue #14380: MIMEText now defaults to utf-8 when passed non-ASCII unicode
+ with no charset specified.
+
- Issue #10340: asyncore - properly handle EINVAL in dispatcher constructor on
OSX; avoid to call handle_connect in case of a disconnected socket which
was not meant to connect.