diff options
author | R David Murray <rdmurray@bitdance.com> | 2011-04-06 13:35:57 (GMT) |
---|---|---|
committer | R David Murray <rdmurray@bitdance.com> | 2011-04-06 13:35:57 (GMT) |
commit | 8debacb51c0a90828c671e2ea1d581e236e26b4b (patch) | |
tree | 49a178dfbea5b826839206e1b573f7dcd71b7160 | |
parent | a0b1c77a19ecfe58d34d10ba4b60f9bb4ad217f0 (diff) | |
download | cpython-8debacb51c0a90828c671e2ea1d581e236e26b4b.zip cpython-8debacb51c0a90828c671e2ea1d581e236e26b4b.tar.gz cpython-8debacb51c0a90828c671e2ea1d581e236e26b4b.tar.bz2 |
#1690608: make formataddr RFC2047 aware.
Patch by Torsten Becker.
-rw-r--r-- | Doc/library/email.util.rst | 9 | ||||
-rw-r--r-- | Lib/email/utils.py | 28 | ||||
-rw-r--r-- | Lib/test/test_email/test_email.py | 40 | ||||
-rw-r--r-- | Misc/ACKS | 1 | ||||
-rw-r--r-- | Misc/NEWS | 4 |
5 files changed, 75 insertions, 7 deletions
diff --git a/Doc/library/email.util.rst b/Doc/library/email.util.rst index f7b777a..4d96857 100644 --- a/Doc/library/email.util.rst +++ b/Doc/library/email.util.rst @@ -29,13 +29,20 @@ There are several useful utilities provided in the :mod:`email.utils` module: fails, in which case a 2-tuple of ``('', '')`` is returned. -.. function:: formataddr(pair) +.. function:: formataddr(pair, charset='utf-8') The inverse of :meth:`parseaddr`, this takes a 2-tuple of the form ``(realname, email_address)`` and returns the string value suitable for a :mailheader:`To` or :mailheader:`Cc` header. If the first element of *pair* is false, then the second element is returned unmodified. + Optional *charset* is the character set that will be used in the :rfc:`2047` + encoding of the ``realname`` if the ``realname`` contains non-ASCII + characters. Can be an instance of :class:`str` or a + :class:`~email.charset.Charset`. Defaults to ``utf-8``. + + .. versionchanged: 3.3 added the *charset* option + .. function:: getaddresses(fieldvalues) diff --git a/Lib/email/utils.py b/Lib/email/utils.py index ac4da37..82f7283 100644 --- a/Lib/email/utils.py +++ b/Lib/email/utils.py @@ -42,6 +42,7 @@ from quopri import decodestring as _qdecode # Intrapackage imports from email.encoders import _bencode, _qencode +from email.charset import Charset COMMASPACE = ', ' EMPTYSTRING = '' @@ -56,21 +57,36 @@ escapesre = re.compile(r'[][\\()"]') # Helpers -def formataddr(pair): +def formataddr(pair, charset='utf-8'): """The inverse of parseaddr(), this takes a 2-tuple of the form (realname, email_address) and returns the string value suitable for an RFC 2822 From, To or Cc header. If the first element of pair is false, then the second element is returned unmodified. + + Optional charset if given is the character set that is used to encode + realname in case realname is not ASCII safe. Can be an instance of str or + a Charset-like object which has a header_encode method. Default is + 'utf-8'. """ name, address = pair + # The address MUST (per RFC) be ascii, so throw a UnicodeError if it isn't. + address.encode('ascii') if name: - quotes = '' - if specialsre.search(name): - quotes = '"' - name = escapesre.sub(r'\\\g<0>', name) - return '%s%s%s <%s>' % (quotes, name, quotes, address) + try: + name.encode('ascii') + except UnicodeEncodeError: + if isinstance(charset, str): + charset = Charset(charset) + encoded_name = charset.header_encode(name) + return "%s <%s>" % (encoded_name, address) + else: + quotes = '' + if specialsre.search(name): + quotes = '"' + name = escapesre.sub(r'\\\g<0>', name) + return '%s%s%s <%s>' % (quotes, name, quotes, address) return address diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py index 44acc9f..8530e5e 100644 --- a/Lib/test/test_email/test_email.py +++ b/Lib/test/test_email/test_email.py @@ -2376,6 +2376,46 @@ class TestMiscellaneous(TestEmailBase): b = 'person@dom.ain' self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b)) + def test_quotes_unicode_names(self): + # issue 1690608. email.utils.formataddr() should be rfc2047 aware. + name = "H\u00e4ns W\u00fcrst" + addr = 'person@dom.ain' + utf8_base64 = "=?utf-8?b?SMOkbnMgV8O8cnN0?= <person@dom.ain>" + latin1_quopri = "=?iso-8859-1?q?H=E4ns_W=FCrst?= <person@dom.ain>" + self.assertEqual(utils.formataddr((name, addr)), utf8_base64) + self.assertEqual(utils.formataddr((name, addr), 'iso-8859-1'), + latin1_quopri) + + def test_accepts_any_charset_like_object(self): + # issue 1690608. email.utils.formataddr() should be rfc2047 aware. + name = "H\u00e4ns W\u00fcrst" + addr = 'person@dom.ain' + utf8_base64 = "=?utf-8?b?SMOkbnMgV8O8cnN0?= <person@dom.ain>" + foobar = "FOOBAR" + class CharsetMock: + def header_encode(self, string): + return foobar + mock = CharsetMock() + mock_expected = "%s <%s>" % (foobar, addr) + self.assertEqual(utils.formataddr((name, addr), mock), mock_expected) + self.assertEqual(utils.formataddr((name, addr), Charset('utf-8')), + utf8_base64) + + def test_invalid_charset_like_object_raises_error(self): + # issue 1690608. email.utils.formataddr() should be rfc2047 aware. + name = "H\u00e4ns W\u00fcrst" + addr = 'person@dom.ain' + # A object without a header_encode method: + bad_charset = object() + self.assertRaises(AttributeError, utils.formataddr, (name, addr), + bad_charset) + + def test_unicode_address_raises_error(self): + # issue 1690608. email.utils.formataddr() should be rfc2047 aware. + addr = 'pers\u00f6n@dom.in' + self.assertRaises(UnicodeError, utils.formataddr, (None, addr)) + self.assertRaises(UnicodeError, utils.formataddr, ("Name", addr)) + def test_name_with_dot(self): x = 'John X. Doe <jxd@example.com>' y = '"John X. Doe" <jxd@example.com>' @@ -979,3 +979,4 @@ Uwe Zessin Kai Zhu Tarek Ziadé Peter Åstrand +Torsten Becker @@ -97,6 +97,10 @@ Library - Issue #11605: email.parser.BytesFeedParser was incorrectly converting multipart subpararts with an 8bit CTE into unicode instead of preserving the bytes. +- Issue #1690608: email.util.formataddr is now RFC2047 aware: it now has a + charset parameter that defaults utf-8 which is used as the charset for RFC + 2047 encoding when the realname contains non-ASCII characters. + - Issue #10963: Ensure that subprocess.communicate() never raises EPIPE. - Issue #10791: Implement missing method GzipFile.read1(), allowing GzipFile |