From f870d8736ad15094b9f5f1f6a51800a562d3ecb1 Mon Sep 17 00:00:00 2001 From: "R. David Murray" Date: Thu, 6 May 2010 01:53:03 +0000 Subject: Merged revisions 79996,80855 via svnmerge from svn+ssh://pythondev@svn.python.org/python/branches/py3k ................ r79996 | r.david.murray | 2010-04-12 10:48:58 -0400 (Mon, 12 Apr 2010) | 15 lines Merged revisions 79994 via svnmerge from svn+ssh://pythondev@svn.python.org/python/trunk ........ r79994 | r.david.murray | 2010-04-12 10:26:06 -0400 (Mon, 12 Apr 2010) | 9 lines Issue #7472: ISO-2022 charsets now consistently use 7bit CTE. Fixed a typo in the email.encoders module so that messages output using an ISO-2022 character set will use a content-transfer-encoding of 7bit consistently. Previously if the input data had any eight bit characters the output data would get marked as 8bit even though it was actually 7bit. ........ ................ r80855 | r.david.murray | 2010-05-05 21:41:14 -0400 (Wed, 05 May 2010) | 24 lines Merged revisions 80800 via svnmerge from svn+ssh://pythondev@svn.python.org/python/trunk It turns out that email5 (py3k), because it is using unicode for the payload, doesn't do the encoding to the output character set until later in the process. Specifically, charset.body_encode no longer does the input-to-output charset conversion. So the if test in the exception clause in encoders.encode_7or8bit really is needed in email5. So, this merge only merges the test, not the removal of the 'if'. ........ r80800 | r.david.murray | 2010-05-05 13:31:03 -0400 (Wed, 05 May 2010) | 9 lines Issue #7472: remove unused code from email.encoders.encode_7or8bit. Yukihiro Nakadaira noticed a typo in encode_7or8bit that was trying to special case iso-2022 codecs. It turns out that the code in question is never used, because whereas it was designed to trigger if the payload encoding was eight bit but its output encoding was 7 bit, in practice the payload is always converted to the 7bit encoding before encode_7or8bit is called. Patch by Shawat Anand. ........ ................ --- Lib/email/encoders.py | 2 +- Lib/email/test/test_email.py | 7 +++++++ Misc/NEWS | 4 ++++ 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/Lib/email/encoders.py b/Lib/email/encoders.py index 2e77e61..20feb02 100644 --- a/Lib/email/encoders.py +++ b/Lib/email/encoders.py @@ -62,7 +62,7 @@ def encode_7or8bit(msg): # iso-2022-* is non-ASCII but still 7-bit charset = msg.get_charset() output_cset = charset and charset.output_charset - if output_cset and output_cset.lower().startswith('iso-2202-'): + if output_cset and output_cset.lower().startswith('iso-2022-'): msg['Content-Transfer-Encoding'] = '7bit' else: msg['Content-Transfer-Encoding'] = '8bit' diff --git a/Lib/email/test/test_email.py b/Lib/email/test/test_email.py index 1f257c7..2ebd39f 100644 --- a/Lib/email/test/test_email.py +++ b/Lib/email/test/test_email.py @@ -526,6 +526,13 @@ class TestEncoders(unittest.TestCase): msg = MIMEText('hello \xf8 world', _charset='iso-8859-1') eq(msg['content-transfer-encoding'], 'quoted-printable') + def test_encode7or8bit(self): + # Make sure a charset whose input character set is 8bit but + # whose output character set is 7bit gets a transfer-encoding + # of 7bit. + eq = self.assertEqual + msg = MIMEText('\xca\xb8', _charset='euc-jp') + eq(msg['content-transfer-encoding'], '7bit') # Test long header wrapping diff --git a/Misc/NEWS b/Misc/NEWS index 7147f77..227ebff 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -40,6 +40,10 @@ Core and Builtins Library ------- +- Issue #7472: Fixed typo in email.encoders module; messages using ISO-2022 + character sets will now consistently use a Content-Transfer-Encoding of + 7bit rather than sometimes being marked as 8bit. + - Issue #4265: shutil.copyfile() was leaking file descriptors when disk fills. Patch by Tres Seaver. -- cgit v0.12