diff options
Diffstat (limited to 'Lib/email')
-rw-r--r-- | Lib/email/_parseaddr.py | 5 | ||||
-rw-r--r-- | Lib/email/header.py | 3 | ||||
-rw-r--r-- | Lib/email/message.py | 6 | ||||
-rw-r--r-- | Lib/email/test/test_email.py | 27 | ||||
-rw-r--r-- | Lib/email/test/test_email_renamed.py | 20 |
5 files changed, 54 insertions, 7 deletions
diff --git a/Lib/email/_parseaddr.py b/Lib/email/_parseaddr.py index 8047df2..81913a3 100644 --- a/Lib/email/_parseaddr.py +++ b/Lib/email/_parseaddr.py @@ -1,4 +1,4 @@ -# Copyright (C) 2002-2006 Python Software Foundation +# Copyright (C) 2002-2007 Python Software Foundation # Contact: email-sig@python.org """Email address parsing code. @@ -172,6 +172,7 @@ class AddrlistClass: self.pos = 0 self.LWS = ' \t' self.CR = '\r\n' + self.FWS = self.LWS + self.CR self.atomends = self.specials + self.LWS + self.CR # Note that RFC 2822 now specifies `.' as obs-phrase, meaning that it # is obsolete syntax. RFC 2822 requires that we recognize obsolete @@ -418,7 +419,7 @@ class AddrlistClass: plist = [] while self.pos < len(self.field): - if self.field[self.pos] in self.LWS: + if self.field[self.pos] in self.FWS: self.pos += 1 elif self.field[self.pos] == '"': plist.append(self.getquote()) diff --git a/Lib/email/header.py b/Lib/email/header.py index 3de44f9..ab0d3fc 100644 --- a/Lib/email/header.py +++ b/Lib/email/header.py @@ -39,7 +39,8 @@ ecre = re.compile(r''' \? # literal ? (?P<encoded>.*?) # non-greedy up to the next ?= is the encoded string \?= # literal ?= - ''', re.VERBOSE | re.IGNORECASE) + (?=[ \t]|$) # whitespace or the end of the string + ''', re.VERBOSE | re.IGNORECASE | re.MULTILINE) # Field name regexp, including trailing colon, but not separating whitespace, # according to RFC 2822. Character range is from tilde to exclamation mark. diff --git a/Lib/email/message.py b/Lib/email/message.py index 9d25cb0..6fc3af1 100644 --- a/Lib/email/message.py +++ b/Lib/email/message.py @@ -238,7 +238,7 @@ class Message: self.del_param('charset') self._charset = None return - if isinstance(charset, str): + if isinstance(charset, basestring): charset = email.charset.Charset(charset) if not isinstance(charset, email.charset.Charset): raise TypeError(charset) @@ -756,7 +756,9 @@ class Message: charset = charset[2] # charset character must be in us-ascii range try: - charset = unicode(charset, 'us-ascii').encode('us-ascii') + if isinstance(charset, str): + charset = unicode(charset, 'us-ascii') + charset = charset.encode('us-ascii') except UnicodeError: return failobj # RFC 2046, $4.1.2 says charsets are not case sensitive diff --git a/Lib/email/test/test_email.py b/Lib/email/test/test_email.py index c3269d7..a2e09fa 100644 --- a/Lib/email/test/test_email.py +++ b/Lib/email/test/test_email.py @@ -1,4 +1,4 @@ -# Copyright (C) 2001-2006 Python Software Foundation +# Copyright (C) 2001-2007 Python Software Foundation # Contact: email-sig@python.org # email package unit tests @@ -501,6 +501,13 @@ class TestMessageAPI(TestEmailBase): msg.set_payload(x) self.assertEqual(msg.get_payload(decode=True), x) + def test_get_content_charset(self): + msg = Message() + msg.set_charset('us-ascii') + self.assertEqual('us-ascii', msg.get_content_charset()) + msg.set_charset(u'us-ascii') + self.assertEqual('us-ascii', msg.get_content_charset()) + # Test the email.Encoders module @@ -1519,6 +1526,18 @@ class TestRFC2047(unittest.TestCase): hu = make_header(dh).__unicode__() eq(hu, u'The quick brown fox jumped over the lazy dog') + def test_rfc2047_without_whitespace(self): + s = 'Sm=?ISO-8859-1?B?9g==?=rg=?ISO-8859-1?B?5Q==?=sbord' + dh = decode_header(s) + self.assertEqual(dh, [(s, None)]) + + def test_rfc2047_with_whitespace(self): + s = 'Sm =?ISO-8859-1?B?9g==?= rg =?ISO-8859-1?B?5Q==?= sbord' + dh = decode_header(s) + self.assertEqual(dh, [('Sm', None), ('\xf6', 'iso-8859-1'), + ('rg', None), ('\xe5', 'iso-8859-1'), + ('sbord', None)]) + # Test the MIMEMessage class @@ -2164,6 +2183,12 @@ class TestMiscellaneous(TestEmailBase): # formataddr() quotes the name if there's a dot in it self.assertEqual(Utils.formataddr((a, b)), y) + def test_multiline_from_comment(self): + x = """\ +Foo +\tBar <foo@example.com>""" + self.assertEqual(Utils.parseaddr(x), ('Foo Bar', 'foo@example.com')) + def test_quote_dump(self): self.assertEqual( Utils.formataddr(('A Silly; Person', 'person@dom.ain')), diff --git a/Lib/email/test/test_email_renamed.py b/Lib/email/test/test_email_renamed.py index 21061b0..7f72270 100644 --- a/Lib/email/test/test_email_renamed.py +++ b/Lib/email/test/test_email_renamed.py @@ -1,4 +1,4 @@ -# Copyright (C) 2001-2006 Python Software Foundation +# Copyright (C) 2001-2007 Python Software Foundation # Contact: email-sig@python.org # email package unit tests @@ -1524,6 +1524,18 @@ class TestRFC2047(unittest.TestCase): hu = make_header(dh).__unicode__() eq(hu, u'The quick brown fox jumped over the lazy dog') + def test_rfc2047_missing_whitespace(self): + s = 'Sm=?ISO-8859-1?B?9g==?=rg=?ISO-8859-1?B?5Q==?=sbord' + dh = decode_header(s) + self.assertEqual(dh, [(s, None)]) + + def test_rfc2047_with_whitespace(self): + s = 'Sm =?ISO-8859-1?B?9g==?= rg =?ISO-8859-1?B?5Q==?= sbord' + dh = decode_header(s) + self.assertEqual(dh, [('Sm', None), ('\xf6', 'iso-8859-1'), + ('rg', None), ('\xe5', 'iso-8859-1'), + ('sbord', None)]) + # Test the MIMEMessage class @@ -2170,6 +2182,12 @@ class TestMiscellaneous(TestEmailBase): # formataddr() quotes the name if there's a dot in it self.assertEqual(utils.formataddr((a, b)), y) + def test_multiline_from_comment(self): + x = """\ +Foo +\tBar <foo@example.com>""" + self.assertEqual(utils.parseaddr(x), ('Foo Bar', 'foo@example.com')) + def test_quote_dump(self): self.assertEqual( utils.formataddr(('A Silly; Person', 'person@dom.ain')), |