diff options
author | R. David Murray <rdmurray@bitdance.com> | 2010-06-02 22:03:15 (GMT) |
---|---|---|
committer | R. David Murray <rdmurray@bitdance.com> | 2010-06-02 22:03:15 (GMT) |
commit | 52dcd4590652e3c6f57a94fb57a5522880b7d72b (patch) | |
tree | d8361501f980bff3a999186ff2ac1a961584e9c8 /Lib | |
parent | c395545a4ab4b4760c0472af74f526f405389864 (diff) | |
download | cpython-52dcd4590652e3c6f57a94fb57a5522880b7d72b.zip cpython-52dcd4590652e3c6f57a94fb57a5522880b7d72b.tar.gz cpython-52dcd4590652e3c6f57a94fb57a5522880b7d72b.tar.bz2 |
#1368247: make set_charset/MIMEText automatically encode unicode _payload.
Fixes (mysterious, to the end user) UnicodeErrors when using utf-8 as
the charset and unicode as the _text argument. Also makes the way in
which unicode gets encoded to quoted printable for other charsets more
sane (it only worked by accident previously). The _payload now is encoded
to the charset.output_charset if it is unicode.
Diffstat (limited to 'Lib')
-rw-r--r-- | Lib/email/message.py | 2 | ||||
-rw-r--r-- | Lib/email/test/test_email.py | 25 |
2 files changed, 27 insertions, 0 deletions
diff --git a/Lib/email/message.py b/Lib/email/message.py index 993a1ac..08423cd 100644 --- a/Lib/email/message.py +++ b/Lib/email/message.py @@ -256,6 +256,8 @@ class Message: charset=charset.get_output_charset()) else: self.set_param('charset', charset.get_output_charset()) + if isinstance(self._payload, unicode): + self._payload = self._payload.encode(charset.output_charset) if str(charset) != charset.get_output_charset(): self._payload = charset.body_encode(self._payload) if 'Content-Transfer-Encoding' not in self: diff --git a/Lib/email/test/test_email.py b/Lib/email/test/test_email.py index bf41be7..7d01079 100644 --- a/Lib/email/test/test_email.py +++ b/Lib/email/test/test_email.py @@ -1045,6 +1045,31 @@ class TestMIMEText(unittest.TestCase): eq(msg.get_charset().input_charset, 'us-ascii') eq(msg['content-type'], 'text/plain; charset="us-ascii"') + def test_7bit_unicode_input(self): + eq = self.assertEqual + msg = MIMEText(u'hello there', _charset='us-ascii') + eq(msg.get_charset().input_charset, 'us-ascii') + eq(msg['content-type'], 'text/plain; charset="us-ascii"') + + def test_7bit_unicode_input_no_charset(self): + eq = self.assertEqual + msg = MIMEText(u'hello there') + eq(msg.get_charset(), 'us-ascii') + eq(msg['content-type'], 'text/plain; charset="us-ascii"') + self.assertTrue('hello there' in msg.as_string()) + + def test_8bit_unicode_input(self): + teststr = u'\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430' + eq = self.assertEqual + msg = MIMEText(teststr, _charset='utf-8') + eq(msg.get_charset().output_charset, 'utf-8') + eq(msg['content-type'], 'text/plain; charset="utf-8"') + eq(msg.get_payload(decode=True), teststr.encode('utf-8')) + + def test_8bit_unicode_input_no_charset(self): + teststr = u'\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430' + self.assertRaises(UnicodeEncodeError, MIMEText, teststr) + # Test complicated multipart/* messages |