diff options
author | R David Murray <rdmurray@bitdance.com> | 2012-03-23 02:17:51 (GMT) |
---|---|---|
committer | R David Murray <rdmurray@bitdance.com> | 2012-03-23 02:17:51 (GMT) |
commit | 8680bcc5db7c0edc09b09387c145932da34cd889 (patch) | |
tree | 5901ccac2b8e03dd7904d2a65eec65570160599c | |
parent | 192195a4fcf3aec27a43169df901667b21ea763a (diff) | |
download | cpython-8680bcc5db7c0edc09b09387c145932da34cd889.zip cpython-8680bcc5db7c0edc09b09387c145932da34cd889.tar.gz cpython-8680bcc5db7c0edc09b09387c145932da34cd889.tar.bz2 |
#14380: Have MIMEText defaults to utf-8 when passed non-ASCII unicode
Previously it would just accept the unicode, which would wind up as unicode in
the transfer-encoded message object, which is just wrong.
Patch by Jeff Knupp.
-rw-r--r-- | Doc/library/email.mime.rst | 6 | ||||
-rw-r--r-- | Lib/email/mime/text.py | 10 | ||||
-rw-r--r-- | Lib/test/test_email/test_email.py | 15 | ||||
-rw-r--r-- | Misc/ACKS | 1 | ||||
-rw-r--r-- | Misc/NEWS | 3 |
5 files changed, 31 insertions, 4 deletions
diff --git a/Doc/library/email.mime.rst b/Doc/library/email.mime.rst index 9cd94d8..f3f34c8 100644 --- a/Doc/library/email.mime.rst +++ b/Doc/library/email.mime.rst @@ -175,7 +175,7 @@ Here are the classes: .. currentmodule:: email.mime.text -.. class:: MIMEText(_text, _subtype='plain', _charset='us-ascii') +.. class:: MIMEText(_text, _subtype='plain', _charset=None) Module: :mod:`email.mime.text` @@ -185,5 +185,5 @@ Here are the classes: minor type and defaults to :mimetype:`plain`. *_charset* is the character set of the text and is passed as a parameter to the :class:`~email.mime.nonmultipart.MIMENonMultipart` constructor; it defaults - to ``us-ascii``. No guessing or encoding is performed on the text data. - + to ``us-ascii`` if the string contains only ``ascii`` codepoints, and + ``utf-8`` otherwise. diff --git a/Lib/email/mime/text.py b/Lib/email/mime/text.py index 5747db5..5a5e39b 100644 --- a/Lib/email/mime/text.py +++ b/Lib/email/mime/text.py @@ -27,4 +27,14 @@ class MIMEText(MIMENonMultipart): """ MIMENonMultipart.__init__(self, 'text', _subtype, **{'charset': _charset}) + + # If _charset was defualted, check to see see if there are non-ascii + # characters present. Default to utf-8 if there are. + # XXX: This can be removed once #7304 is fixed. + if _charset =='us-ascii': + try: + _text.encode(_charset) + except UnicodeEncodeError: + _charset = 'utf-8' + self.set_payload(_text, _charset) diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py index e771c20..86aa60c 100644 --- a/Lib/test/test_email/test_email.py +++ b/Lib/test/test_email/test_email.py @@ -617,6 +617,19 @@ class TestMessageAPI(TestEmailBase): abc """)) + def test_unicode_body_defaults_to_utf8_encoding(self): + # Issue 14291 + m = MIMEText('É testabc\n') + self.assertEqual(str(m),textwrap.dedent("""\ + MIME-Version: 1.0 + Content-Type: text/plain; charset="utf-8" + Content-Transfer-Encoding: base64 + + w4kgdGVzdGFiYwo= + """)) + + + # Test the email.encoders module class TestEncoders(unittest.TestCase): @@ -642,7 +655,7 @@ class TestEncoders(unittest.TestCase): eq(msg['content-transfer-encoding'], '7bit') # Similar, but with 8bit data msg = MIMEText('hello \xf8 world') - eq(msg['content-transfer-encoding'], '8bit') + eq(msg['content-transfer-encoding'], 'base64') # And now with a different charset msg = MIMEText('hello \xf8 world', _charset='iso-8859-1') eq(msg['content-transfer-encoding'], 'quoted-printable') @@ -548,6 +548,7 @@ Thomas Kluyver Kim Knapp Lenny Kneler Pat Knight +Jeff Knupp Greg Kochanski Damon Kohler Marko Kohtala @@ -34,6 +34,9 @@ Core and Builtins Library ------- +- Issue #14380: MIMEText now defaults to utf-8 when passed non-ASCII unicode + with no charset specified. + - Issue #10340: asyncore - properly handle EINVAL in dispatcher constructor on OSX; avoid to call handle_connect in case of a disconnected socket which was not meant to connect. |