summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Doc/library/email.policy.rst8
-rw-r--r--Doc/whatsnew/3.5.rst6
-rw-r--r--Lib/email/_header_value_parser.py11
-rw-r--r--Lib/email/policy.py15
-rw-r--r--Lib/test/test_email/test_generator.py22
-rw-r--r--Lib/test/test_email/test_policy.py4
-rw-r--r--Misc/NEWS3
7 files changed, 64 insertions, 5 deletions
diff --git a/Doc/library/email.policy.rst b/Doc/library/email.policy.rst
index d4e3fc1..9fadfb3 100644
--- a/Doc/library/email.policy.rst
+++ b/Doc/library/email.policy.rst
@@ -378,6 +378,14 @@ added matters. To illustrate::
In addition to the settable attributes listed above that apply to all
policies, this policy adds the following additional attributes:
+ .. attribute:: utf8
+
+ If ``False``, follow :rfc:`5322`, supporting non-ASCII characters in
+ headers by encoding them as "encoded words". If ``True``, follow
+ :rfc:`6532` and use ``utf-8`` encoding for headers. Messages
+ formatted in this way may be passed to SMTP servers that support
+ the ``SMTPUTF8`` extension (:rfc:`6531`).
+
.. attribute:: refold_source
If the value for a header in the ``Message`` object originated from a
diff --git a/Doc/whatsnew/3.5.rst b/Doc/whatsnew/3.5.rst
index 0360de4..51a3aa3 100644
--- a/Doc/whatsnew/3.5.rst
+++ b/Doc/whatsnew/3.5.rst
@@ -356,6 +356,12 @@ email
header (``None`` if there is no such header). (Contributed by Abhilash Raj
in :issue:`21083`.)
+* A new policy option :attr:`~email.policy.EmailPolicy.utf8` can be set
+ ``True`` to encode email headers using the utf8 charset instead of using
+ encoded words. This allows ``Messages`` to be formatted according to
+ :rfc:`6532` and used with an SMTP server that supports the :rfc:`6531`
+ ``SMTPUTF8`` extension. (Contributed by R. David Murray in :issue:`24211`.)
+
glob
----
diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py
index a9bdf44..f264191 100644
--- a/Lib/email/_header_value_parser.py
+++ b/Lib/email/_header_value_parser.py
@@ -320,17 +320,18 @@ class TokenList(list):
return ''.join(res)
def _fold(self, folded):
+ encoding = 'utf-8' if folded.policy.utf8 else 'ascii'
for part in self.parts:
tstr = str(part)
tlen = len(tstr)
try:
- str(part).encode('us-ascii')
+ str(part).encode(encoding)
except UnicodeEncodeError:
if any(isinstance(x, errors.UndecodableBytesDefect)
for x in part.all_defects):
charset = 'unknown-8bit'
else:
- # XXX: this should be a policy setting
+ # XXX: this should be a policy setting when utf8 is False.
charset = 'utf-8'
tstr = part.cte_encode(charset, folded.policy)
tlen = len(tstr)
@@ -394,11 +395,12 @@ class UnstructuredTokenList(TokenList):
def _fold(self, folded):
last_ew = None
+ encoding = 'utf-8' if folded.policy.utf8 else 'ascii'
for part in self.parts:
tstr = str(part)
is_ew = False
try:
- str(part).encode('us-ascii')
+ str(part).encode(encoding)
except UnicodeEncodeError:
if any(isinstance(x, errors.UndecodableBytesDefect)
for x in part.all_defects):
@@ -475,12 +477,13 @@ class Phrase(TokenList):
# comment that becomes a barrier across which we can't compose encoded
# words.
last_ew = None
+ encoding = 'utf-8' if folded.policy.utf8 else 'ascii'
for part in self.parts:
tstr = str(part)
tlen = len(tstr)
has_ew = False
try:
- str(part).encode('us-ascii')
+ str(part).encode(encoding)
except UnicodeEncodeError:
if any(isinstance(x, errors.UndecodableBytesDefect)
for x in part.all_defects):
diff --git a/Lib/email/policy.py b/Lib/email/policy.py
index f0b20f4..6ac64a5 100644
--- a/Lib/email/policy.py
+++ b/Lib/email/policy.py
@@ -35,6 +35,13 @@ class EmailPolicy(Policy):
In addition to the settable attributes listed above that apply to
all Policies, this policy adds the following additional attributes:
+ utf8 -- if False (the default) message headers will be
+ serialized as ASCII, using encoded words to encode
+ any non-ASCII characters in the source strings. If
+ True, the message headers will be serialized using
+ utf8 and will not contain encoded words (see RFC
+ 6532 for more on this serialization format).
+
refold_source -- if the value for a header in the Message object
came from the parsing of some source, this attribute
indicates whether or not a generator should refold
@@ -72,6 +79,7 @@ class EmailPolicy(Policy):
"""
+ utf8 = False
refold_source = 'long'
header_factory = HeaderRegistry()
content_manager = raw_data_manager
@@ -175,9 +183,13 @@ class EmailPolicy(Policy):
refold_header setting, since there is no way to know whether the binary
data consists of single byte characters or multibyte characters.
+ If utf8 is true, headers are encoded to utf8, otherwise to ascii with
+ non-ASCII unicode rendered as encoded words.
+
"""
folded = self._fold(name, value, refold_binary=self.cte_type=='7bit')
- return folded.encode('ascii', 'surrogateescape')
+ charset = 'utf8' if self.utf8 else 'ascii'
+ return folded.encode(charset, 'surrogateescape')
def _fold(self, name, value, refold_binary=False):
if hasattr(value, 'name'):
@@ -199,3 +211,4 @@ del default.header_factory
strict = default.clone(raise_on_defect=True)
SMTP = default.clone(linesep='\r\n')
HTTP = default.clone(linesep='\r\n', max_line_length=None)
+SMTPUTF8 = SMTP.clone(utf8=True)
diff --git a/Lib/test/test_email/test_generator.py b/Lib/test/test_email/test_generator.py
index 8917408..920f870 100644
--- a/Lib/test/test_email/test_generator.py
+++ b/Lib/test/test_email/test_generator.py
@@ -2,6 +2,7 @@ import io
import textwrap
import unittest
from email import message_from_string, message_from_bytes
+from email.message import EmailMessage
from email.generator import Generator, BytesGenerator
from email import policy
from test.test_email import TestEmailBase, parameterize
@@ -194,6 +195,27 @@ class TestBytesGenerator(TestGeneratorBase, TestEmailBase):
g.flatten(msg)
self.assertEqual(s.getvalue(), expected)
+ def test_smtputf8_policy(self):
+ msg = EmailMessage()
+ msg['From'] = "Páolo <főo@bar.com>"
+ msg['To'] = 'Dinsdale'
+ msg['Subject'] = 'Nudge nudge, wink, wink \u1F609'
+ msg.set_content("oh là là, know what I mean, know what I mean?")
+ expected = textwrap.dedent("""\
+ From: Páolo <főo@bar.com>
+ To: Dinsdale
+ Subject: Nudge nudge, wink, wink \u1F609
+ Content-Type: text/plain; charset="utf-8"
+ Content-Transfer-Encoding: 8bit
+ MIME-Version: 1.0
+
+ oh là là, know what I mean, know what I mean?
+ """).encode('utf-8').replace(b'\n', b'\r\n')
+ s = io.BytesIO()
+ g = BytesGenerator(s, policy=policy.SMTPUTF8)
+ g.flatten(msg)
+ self.assertEqual(s.getvalue(), expected)
+
if __name__ == '__main__':
unittest.main()
diff --git a/Lib/test/test_email/test_policy.py b/Lib/test/test_email/test_policy.py
index e797f36..4b0a04e 100644
--- a/Lib/test/test_email/test_policy.py
+++ b/Lib/test/test_email/test_policy.py
@@ -27,6 +27,7 @@ class PolicyAPITests(unittest.TestCase):
# If any of these defaults change, the docs must be updated.
policy_defaults = compat32_defaults.copy()
policy_defaults.update({
+ 'utf8': False,
'raise_on_defect': False,
'header_factory': email.policy.EmailPolicy.header_factory,
'refold_source': 'long',
@@ -42,6 +43,9 @@ class PolicyAPITests(unittest.TestCase):
email.policy.default: make_defaults(policy_defaults, {}),
email.policy.SMTP: make_defaults(policy_defaults,
{'linesep': '\r\n'}),
+ email.policy.SMTPUTF8: make_defaults(policy_defaults,
+ {'linesep': '\r\n',
+ 'utf8': True}),
email.policy.HTTP: make_defaults(policy_defaults,
{'linesep': '\r\n',
'max_line_length': None}),
diff --git a/Misc/NEWS b/Misc/NEWS
index c905bcc..5ae6031 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -47,6 +47,9 @@ Core and Builtins
Library
-------
+- Issue #24211: The email library now supports RFC 6532: it can generate
+ headers using utf-8 instead of encoded words.
+
- Issue #16314: Added support for the LZMA compression in distutils.
- Issue #21804: poplib now supports RFC 6856 (UTF8).