summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorR David Murray <rdmurray@bitdance.com>2012-03-14 07:03:27 (GMT)
committerR David Murray <rdmurray@bitdance.com>2012-03-14 07:03:27 (GMT)
commite2922835b0d499d6f6d108e89ccb424f74b54c71 (patch)
tree23f15f18d1ec1a8f0f7bc03b2c5cd9ed7e772836
parentb20a019d467daad5a6e2856a36fca4d90904f969 (diff)
parent7441a7aedd251d529eb14eff9a16708e9cb32409 (diff)
downloadcpython-e2922835b0d499d6f6d108e89ccb424f74b54c71.zip
cpython-e2922835b0d499d6f6d108e89ccb424f74b54c71.tar.gz
cpython-e2922835b0d499d6f6d108e89ccb424f74b54c71.tar.bz2
Merge #14291: if a header has non-ascii unicode, default to CTE using utf-8
In Python2, if a unicode string was assigned as the value of a header, email would automatically CTE encode it using the UTF8 charset. This capability was lost in the Python3 translation, and this patch restores it. Patch by Ali Ikinci, assisted by R. David Murray. I also added a fix for the mailbox test that was depending (with a comment that it was a bad idea to so depend) on non-ASCII causing message_from_string to raise an error. It now uses support.patch to induce an error during message serialization.
-rw-r--r--Lib/email/header.py7
-rw-r--r--Lib/test/test_email/test_email.py21
-rw-r--r--Lib/test/test_mailbox.py8
-rw-r--r--Misc/ACKS1
-rw-r--r--Misc/NEWS3
5 files changed, 33 insertions, 7 deletions
diff --git a/Lib/email/header.py b/Lib/email/header.py
index 2e687b7..3250d36 100644
--- a/Lib/email/header.py
+++ b/Lib/email/header.py
@@ -283,7 +283,12 @@ class Header:
# character set, otherwise an early error is thrown.
output_charset = charset.output_codec or 'us-ascii'
if output_charset != _charset.UNKNOWN8BIT:
- s.encode(output_charset, errors)
+ try:
+ s.encode(output_charset, errors)
+ except UnicodeEncodeError:
+ if output_charset!='us-ascii':
+ raise
+ charset = UTF8
self._chunks.append((s, charset))
def encode(self, splitchars=';, \t', maxlinelen=None, linesep='\n'):
diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py
index 121c939..a4d39ab 100644
--- a/Lib/test/test_email/test_email.py
+++ b/Lib/test/test_email/test_email.py
@@ -604,6 +604,19 @@ class TestMessageAPI(TestEmailBase):
msg['Dummy'] = 'dummy\nX-Injected-Header: test'
self.assertRaises(errors.HeaderParseError, msg.as_string)
+ def test_unicode_header_defaults_to_utf8_encoding(self):
+ # Issue 14291
+ m = MIMEText('abc\n')
+ m['Subject'] = 'É test'
+ self.assertEqual(str(m),textwrap.dedent("""\
+ Content-Type: text/plain; charset="us-ascii"
+ MIME-Version: 1.0
+ Content-Transfer-Encoding: 7bit
+ Subject: =?utf-8?q?=C3=89_test?=
+
+ abc
+ """))
+
# Test the email.encoders module
class TestEncoders(unittest.TestCase):
@@ -1045,9 +1058,13 @@ Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offshore-W
'f\xfcr Offshore-Windkraftprojekte '
'<a-very-long-address@example.com>')
msg['Reply-To'] = header_string
- self.assertRaises(UnicodeEncodeError, msg.as_string)
+ eq(msg.as_string(maxheaderlen=78), """\
+Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
+ =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
+
+""")
msg = Message()
- msg['Reply-To'] = Header(header_string, 'utf-8',
+ msg['Reply-To'] = Header(header_string,
header_name='Reply-To')
eq(msg.as_string(maxheaderlen=78), """\
Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
diff --git a/Lib/test/test_mailbox.py b/Lib/test/test_mailbox.py
index 212ceb9..54963a9 100644
--- a/Lib/test/test_mailbox.py
+++ b/Lib/test/test_mailbox.py
@@ -111,10 +111,10 @@ class TestMailbox(TestBase):
self.assertMailboxEmpty()
def test_add_that_raises_leaves_mailbox_empty(self):
- # XXX This test will start failing when Message learns to handle
- # non-ASCII string headers, and a different internal failure will
- # need to be found or manufactured.
- with self.assertRaises(ValueError):
+ def raiser(*args, **kw):
+ raise Exception("a fake error")
+ support.patch(self, email.generator.BytesGenerator, 'flatten', raiser)
+ with self.assertRaises(Exception):
self._box.add(email.message_from_string("From: Alphöso"))
self.assertEqual(len(self._box), 0)
self._box.close()
diff --git a/Misc/ACKS b/Misc/ACKS
index a11d4eb..eeb61e0 100644
--- a/Misc/ACKS
+++ b/Misc/ACKS
@@ -470,6 +470,7 @@ Gerhard Häring
Fredrik Håård
Catalin Iacob
Mihai Ibanescu
+Ali Ikinci
Lars Immisch
Bobby Impollonia
Meador Inge
diff --git a/Misc/NEWS b/Misc/NEWS
index 9b4d2c2..0d6cdbb 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -24,6 +24,9 @@ Core and Builtins
Library
-------
+- Issue #14291: Email now defaults to utf-8 for non-ASCII unicode headers
+ instead of raising an error. This fixes a regression relative to 2.7.
+
- Issue #989712: Support using Tk without a mainloop.
- Issue #5219: Prevent event handler cascade in IDLE.