diff options
| author | Barry Warsaw <barry@python.org> | 2006-07-26 05:54:46 (GMT) | 
|---|---|---|
| committer | Barry Warsaw <barry@python.org> | 2006-07-26 05:54:46 (GMT) | 
| commit | d92ae78bdbab63a68e88fb561a2bc9555d8cef6c (patch) | |
| tree | 309a26b8b0749aac42b3b947db4df0cebfbe3258 | |
| parent | 9815f8b25238e22fc14f8305b0bb53711bbb3de6 (diff) | |
| download | cpython-d92ae78bdbab63a68e88fb561a2bc9555d8cef6c.zip cpython-d92ae78bdbab63a68e88fb561a2bc9555d8cef6c.tar.gz cpython-d92ae78bdbab63a68e88fb561a2bc9555d8cef6c.tar.bz2 | |
Forward port some fixes that were in email 2.5 but for some reason didn't make
it into email 4.0.  Specifically, in Message.get_content_charset(), handle RFC
2231 headers that contain an encoding not known to Python, or a character in
the data that isn't in the charset encoding.  Also forward port the
appropriate unit tests.
| -rw-r--r-- | Lib/email/message.py | 13 | ||||
| -rw-r--r-- | Lib/email/test/test_email.py | 44 | ||||
| -rw-r--r-- | Lib/email/test/test_email_renamed.py | 44 | 
3 files changed, 100 insertions, 1 deletions
| diff --git a/Lib/email/message.py b/Lib/email/message.py index 50d90b4..79c5c4c 100644 --- a/Lib/email/message.py +++ b/Lib/email/message.py @@ -747,7 +747,18 @@ class Message:          if isinstance(charset, tuple):              # RFC 2231 encoded, so decode it, and it better end up as ascii.              pcharset = charset[0] or 'us-ascii' -            charset = unicode(charset[2], pcharset).encode('us-ascii') +            try: +                # LookupError will be raised if the charset isn't known to +                # Python.  UnicodeError will be raised if the encoded text +                # contains a character not in the charset. +                charset = unicode(charset[2], pcharset).encode('us-ascii') +            except (LookupError, UnicodeError): +                charset = charset[2] +        # charset character must be in us-ascii range +        try: +            charset = unicode(charset, 'us-ascii').encode('us-ascii') +        except UnicodeError: +            return failobj          # RFC 2046, $4.1.2 says charsets are not case sensitive          return charset.lower() diff --git a/Lib/email/test/test_email.py b/Lib/email/test/test_email.py index db0c2be..13801dc 100644 --- a/Lib/email/test/test_email.py +++ b/Lib/email/test/test_email.py @@ -3086,6 +3086,50 @@ Content-Type: text/plain;          self.assertEqual(msg.get_content_charset(),                           'this is even more ***fun*** is it not.pdf') +    def test_rfc2231_bad_encoding_in_filename(self): +        m = '''\ +Content-Disposition: inline; +\tfilename*0*="bogus'xx'This%20is%20even%20more%20"; +\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20"; +\tfilename*2="is it not.pdf" + +''' +        msg = email.message_from_string(m) +        self.assertEqual(msg.get_filename(), +                         'This is even more ***fun*** is it not.pdf') + +    def test_rfc2231_bad_encoding_in_charset(self): +        m = """\ +Content-Type: text/plain; charset*=bogus''utf-8%E2%80%9D + +""" +        msg = email.message_from_string(m) +        # This should return None because non-ascii characters in the charset +        # are not allowed. +        self.assertEqual(msg.get_content_charset(), None) + +    def test_rfc2231_bad_character_in_charset(self): +        m = """\ +Content-Type: text/plain; charset*=ascii''utf-8%E2%80%9D + +""" +        msg = email.message_from_string(m) +        # This should return None because non-ascii characters in the charset +        # are not allowed. +        self.assertEqual(msg.get_content_charset(), None) + +    def test_rfc2231_bad_character_in_filename(self): +        m = '''\ +Content-Disposition: inline; +\tfilename*0*="ascii'xx'This%20is%20even%20more%20"; +\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20"; +\tfilename*2*="is it not.pdf%E2" + +''' +        msg = email.message_from_string(m) +        self.assertEqual(msg.get_filename(), +                         u'This is even more ***fun*** is it not.pdf\ufffd') +      def test_rfc2231_unknown_encoding(self):          m = """\  Content-Transfer-Encoding: 8bit diff --git a/Lib/email/test/test_email_renamed.py b/Lib/email/test/test_email_renamed.py index 680a725..30f39b9 100644 --- a/Lib/email/test/test_email_renamed.py +++ b/Lib/email/test/test_email_renamed.py @@ -3092,6 +3092,50 @@ Content-Type: text/plain;          self.assertEqual(msg.get_content_charset(),                           'this is even more ***fun*** is it not.pdf') +    def test_rfc2231_bad_encoding_in_filename(self): +        m = '''\ +Content-Disposition: inline; +\tfilename*0*="bogus'xx'This%20is%20even%20more%20"; +\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20"; +\tfilename*2="is it not.pdf" + +''' +        msg = email.message_from_string(m) +        self.assertEqual(msg.get_filename(), +                         'This is even more ***fun*** is it not.pdf') + +    def test_rfc2231_bad_encoding_in_charset(self): +        m = """\ +Content-Type: text/plain; charset*=bogus''utf-8%E2%80%9D + +""" +        msg = email.message_from_string(m) +        # This should return None because non-ascii characters in the charset +        # are not allowed. +        self.assertEqual(msg.get_content_charset(), None) + +    def test_rfc2231_bad_character_in_charset(self): +        m = """\ +Content-Type: text/plain; charset*=ascii''utf-8%E2%80%9D + +""" +        msg = email.message_from_string(m) +        # This should return None because non-ascii characters in the charset +        # are not allowed. +        self.assertEqual(msg.get_content_charset(), None) + +    def test_rfc2231_bad_character_in_filename(self): +        m = '''\ +Content-Disposition: inline; +\tfilename*0*="ascii'xx'This%20is%20even%20more%20"; +\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20"; +\tfilename*2*="is it not.pdf%E2" + +''' +        msg = email.message_from_string(m) +        self.assertEqual(msg.get_filename(), +                         u'This is even more ***fun*** is it not.pdf\ufffd') +      def test_rfc2231_unknown_encoding(self):          m = """\  Content-Transfer-Encoding: 8bit | 
