summaryrefslogtreecommitdiffstats
path: root/Lib/email
diff options
context:
space:
mode:
authorR. David Murray <rdmurray@bitdance.com>2010-06-03 02:05:47 (GMT)
committerR. David Murray <rdmurray@bitdance.com>2010-06-03 02:05:47 (GMT)
commitd2d08c6872a118cfd910439420c380101e18c38e (patch)
treee83ba939fa3952f397449fd1940a3eae818bf2e0 /Lib/email
parent5e50f8b5a19e7bad02f53e9af7e25c7e3ddeee40 (diff)
downloadcpython-d2d08c6872a118cfd910439420c380101e18c38e.zip
cpython-d2d08c6872a118cfd910439420c380101e18c38e.tar.gz
cpython-d2d08c6872a118cfd910439420c380101e18c38e.tar.bz2
Merged revisions 81660 via svnmerge from
svn+ssh://pythondev@svn.python.org/python/branches/py3k ................ r81660 | r.david.murray | 2010-06-02 21:58:28 -0400 (Wed, 02 Jun 2010) | 25 lines Fix Charset.body_encode to encode to output_charset before calling base64mime. This means that what gets encoded in base64 is the encoded version of the unicode payload. This bug was revealed by a forward port of the tests from Issue 1368247, but the fix was completely different. Note that the merge is only of the tests, the doc changes were inappropriate since email5 expects unicode, not bytes. I'm also not convinced that quopri works correctly in email5, but that's a different issue. Merged revisions 81658 via svnmerge from svn+ssh://pythondev@svn.python.org/python/trunk ........ r81658 | r.david.murray | 2010-06-02 18:03:15 -0400 (Wed, 02 Jun 2010) | 9 lines #1368247: make set_charset/MIMEText automatically encode unicode _payload. Fixes (mysterious, to the end user) UnicodeErrors when using utf-8 as the charset and unicode as the _text argument. Also makes the way in which unicode gets encoded to quoted printable for other charsets more sane (it only worked by accident previously). The _payload now is encoded to the charset.output_charset if it is unicode. ........ ................
Diffstat (limited to 'Lib/email')
-rw-r--r--Lib/email/charset.py2
-rw-r--r--Lib/email/test/test_email.py29
2 files changed, 30 insertions, 1 deletions
diff --git a/Lib/email/charset.py b/Lib/email/charset.py
index 9e5ee67..a44b711 100644
--- a/Lib/email/charset.py
+++ b/Lib/email/charset.py
@@ -377,6 +377,8 @@ class Charset:
"""
# 7bit/8bit encodings return the string unchanged (module conversions)
if self.body_encoding is BASE64:
+ if isinstance(string, str):
+ string = string.encode(self.output_charset)
return email.base64mime.body_encode(string)
elif self.body_encoding is QP:
return email.quoprimime.body_encode(string)
diff --git a/Lib/email/test/test_email.py b/Lib/email/test/test_email.py
index 2ebd39f..5508456 100644
--- a/Lib/email/test/test_email.py
+++ b/Lib/email/test/test_email.py
@@ -531,7 +531,7 @@ class TestEncoders(unittest.TestCase):
# whose output character set is 7bit gets a transfer-encoding
# of 7bit.
eq = self.assertEqual
- msg = MIMEText('\xca\xb8', _charset='euc-jp')
+ msg = MIMEText('文', _charset='euc-jp')
eq(msg['content-transfer-encoding'], '7bit')
@@ -1076,6 +1076,33 @@ class TestMIMEText(unittest.TestCase):
eq(msg.get_charset().input_charset, 'us-ascii')
eq(msg['content-type'], 'text/plain; charset="us-ascii"')
+ def test_7bit_input(self):
+ eq = self.assertEqual
+ msg = MIMEText('hello there', _charset='us-ascii')
+ eq(msg.get_charset().input_charset, 'us-ascii')
+ eq(msg['content-type'], 'text/plain; charset="us-ascii"')
+
+ def test_7bit_input_no_charset(self):
+ eq = self.assertEqual
+ msg = MIMEText('hello there')
+ eq(msg.get_charset(), 'us-ascii')
+ eq(msg['content-type'], 'text/plain; charset="us-ascii"')
+ self.assertTrue('hello there' in msg.as_string())
+
+ def test_utf8_input(self):
+ teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
+ eq = self.assertEqual
+ msg = MIMEText(teststr, _charset='utf-8')
+ eq(msg.get_charset().output_charset, 'utf-8')
+ eq(msg['content-type'], 'text/plain; charset="utf-8"')
+ eq(msg.get_payload(decode=True), teststr.encode('utf-8'))
+
+ @unittest.skip("can't fix because of backward compat in email5, "
+ "will fix in email6")
+ def test_utf8_input_no_charset(self):
+ teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
+ self.assertRaises(UnicodeEncodeError, MIMEText, teststr)
+
# Test complicated multipart/* messages