summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorR David Murray <rdmurray@bitdance.com>2011-04-06 13:35:57 (GMT)
committerR David Murray <rdmurray@bitdance.com>2011-04-06 13:35:57 (GMT)
commit8debacb51c0a90828c671e2ea1d581e236e26b4b (patch)
tree49a178dfbea5b826839206e1b573f7dcd71b7160
parenta0b1c77a19ecfe58d34d10ba4b60f9bb4ad217f0 (diff)
downloadcpython-8debacb51c0a90828c671e2ea1d581e236e26b4b.zip
cpython-8debacb51c0a90828c671e2ea1d581e236e26b4b.tar.gz
cpython-8debacb51c0a90828c671e2ea1d581e236e26b4b.tar.bz2
#1690608: make formataddr RFC2047 aware.
Patch by Torsten Becker.
-rw-r--r--Doc/library/email.util.rst9
-rw-r--r--Lib/email/utils.py28
-rw-r--r--Lib/test/test_email/test_email.py40
-rw-r--r--Misc/ACKS1
-rw-r--r--Misc/NEWS4
5 files changed, 75 insertions, 7 deletions
diff --git a/Doc/library/email.util.rst b/Doc/library/email.util.rst
index f7b777a..4d96857 100644
--- a/Doc/library/email.util.rst
+++ b/Doc/library/email.util.rst
@@ -29,13 +29,20 @@ There are several useful utilities provided in the :mod:`email.utils` module:
fails, in which case a 2-tuple of ``('', '')`` is returned.
-.. function:: formataddr(pair)
+.. function:: formataddr(pair, charset='utf-8')
The inverse of :meth:`parseaddr`, this takes a 2-tuple of the form ``(realname,
email_address)`` and returns the string value suitable for a :mailheader:`To` or
:mailheader:`Cc` header. If the first element of *pair* is false, then the
second element is returned unmodified.
+ Optional *charset* is the character set that will be used in the :rfc:`2047`
+ encoding of the ``realname`` if the ``realname`` contains non-ASCII
+ characters. Can be an instance of :class:`str` or a
+ :class:`~email.charset.Charset`. Defaults to ``utf-8``.
+
+ .. versionchanged: 3.3 added the *charset* option
+
.. function:: getaddresses(fieldvalues)
diff --git a/Lib/email/utils.py b/Lib/email/utils.py
index ac4da37..82f7283 100644
--- a/Lib/email/utils.py
+++ b/Lib/email/utils.py
@@ -42,6 +42,7 @@ from quopri import decodestring as _qdecode
# Intrapackage imports
from email.encoders import _bencode, _qencode
+from email.charset import Charset
COMMASPACE = ', '
EMPTYSTRING = ''
@@ -56,21 +57,36 @@ escapesre = re.compile(r'[][\\()"]')
# Helpers
-def formataddr(pair):
+def formataddr(pair, charset='utf-8'):
"""The inverse of parseaddr(), this takes a 2-tuple of the form
(realname, email_address) and returns the string value suitable
for an RFC 2822 From, To or Cc header.
If the first element of pair is false, then the second element is
returned unmodified.
+
+ Optional charset if given is the character set that is used to encode
+ realname in case realname is not ASCII safe. Can be an instance of str or
+ a Charset-like object which has a header_encode method. Default is
+ 'utf-8'.
"""
name, address = pair
+ # The address MUST (per RFC) be ascii, so throw a UnicodeError if it isn't.
+ address.encode('ascii')
if name:
- quotes = ''
- if specialsre.search(name):
- quotes = '"'
- name = escapesre.sub(r'\\\g<0>', name)
- return '%s%s%s <%s>' % (quotes, name, quotes, address)
+ try:
+ name.encode('ascii')
+ except UnicodeEncodeError:
+ if isinstance(charset, str):
+ charset = Charset(charset)
+ encoded_name = charset.header_encode(name)
+ return "%s <%s>" % (encoded_name, address)
+ else:
+ quotes = ''
+ if specialsre.search(name):
+ quotes = '"'
+ name = escapesre.sub(r'\\\g<0>', name)
+ return '%s%s%s <%s>' % (quotes, name, quotes, address)
return address
diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py
index 44acc9f..8530e5e 100644
--- a/Lib/test/test_email/test_email.py
+++ b/Lib/test/test_email/test_email.py
@@ -2376,6 +2376,46 @@ class TestMiscellaneous(TestEmailBase):
b = 'person@dom.ain'
self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
+ def test_quotes_unicode_names(self):
+ # issue 1690608. email.utils.formataddr() should be rfc2047 aware.
+ name = "H\u00e4ns W\u00fcrst"
+ addr = 'person@dom.ain'
+ utf8_base64 = "=?utf-8?b?SMOkbnMgV8O8cnN0?= <person@dom.ain>"
+ latin1_quopri = "=?iso-8859-1?q?H=E4ns_W=FCrst?= <person@dom.ain>"
+ self.assertEqual(utils.formataddr((name, addr)), utf8_base64)
+ self.assertEqual(utils.formataddr((name, addr), 'iso-8859-1'),
+ latin1_quopri)
+
+ def test_accepts_any_charset_like_object(self):
+ # issue 1690608. email.utils.formataddr() should be rfc2047 aware.
+ name = "H\u00e4ns W\u00fcrst"
+ addr = 'person@dom.ain'
+ utf8_base64 = "=?utf-8?b?SMOkbnMgV8O8cnN0?= <person@dom.ain>"
+ foobar = "FOOBAR"
+ class CharsetMock:
+ def header_encode(self, string):
+ return foobar
+ mock = CharsetMock()
+ mock_expected = "%s <%s>" % (foobar, addr)
+ self.assertEqual(utils.formataddr((name, addr), mock), mock_expected)
+ self.assertEqual(utils.formataddr((name, addr), Charset('utf-8')),
+ utf8_base64)
+
+ def test_invalid_charset_like_object_raises_error(self):
+ # issue 1690608. email.utils.formataddr() should be rfc2047 aware.
+ name = "H\u00e4ns W\u00fcrst"
+ addr = 'person@dom.ain'
+ # A object without a header_encode method:
+ bad_charset = object()
+ self.assertRaises(AttributeError, utils.formataddr, (name, addr),
+ bad_charset)
+
+ def test_unicode_address_raises_error(self):
+ # issue 1690608. email.utils.formataddr() should be rfc2047 aware.
+ addr = 'pers\u00f6n@dom.in'
+ self.assertRaises(UnicodeError, utils.formataddr, (None, addr))
+ self.assertRaises(UnicodeError, utils.formataddr, ("Name", addr))
+
def test_name_with_dot(self):
x = 'John X. Doe <jxd@example.com>'
y = '"John X. Doe" <jxd@example.com>'
diff --git a/Misc/ACKS b/Misc/ACKS
index b1c2eea..f3555f3 100644
--- a/Misc/ACKS
+++ b/Misc/ACKS
@@ -979,3 +979,4 @@ Uwe Zessin
Kai Zhu
Tarek Ziadé
Peter Ã…strand
+Torsten Becker
diff --git a/Misc/NEWS b/Misc/NEWS
index 44d4e9e..37eb250 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -97,6 +97,10 @@ Library
- Issue #11605: email.parser.BytesFeedParser was incorrectly converting multipart
subpararts with an 8bit CTE into unicode instead of preserving the bytes.
+- Issue #1690608: email.util.formataddr is now RFC2047 aware: it now has a
+ charset parameter that defaults utf-8 which is used as the charset for RFC
+ 2047 encoding when the realname contains non-ASCII characters.
+
- Issue #10963: Ensure that subprocess.communicate() never raises EPIPE.
- Issue #10791: Implement missing method GzipFile.read1(), allowing GzipFile