summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBarry Warsaw <barry@python.org>2006-07-26 05:54:46 (GMT)
committerBarry Warsaw <barry@python.org>2006-07-26 05:54:46 (GMT)
commitd92ae78bdbab63a68e88fb561a2bc9555d8cef6c (patch)
tree309a26b8b0749aac42b3b947db4df0cebfbe3258
parent9815f8b25238e22fc14f8305b0bb53711bbb3de6 (diff)
downloadcpython-d92ae78bdbab63a68e88fb561a2bc9555d8cef6c.zip
cpython-d92ae78bdbab63a68e88fb561a2bc9555d8cef6c.tar.gz
cpython-d92ae78bdbab63a68e88fb561a2bc9555d8cef6c.tar.bz2
Forward port some fixes that were in email 2.5 but for some reason didn't make
it into email 4.0. Specifically, in Message.get_content_charset(), handle RFC 2231 headers that contain an encoding not known to Python, or a character in the data that isn't in the charset encoding. Also forward port the appropriate unit tests.
-rw-r--r--Lib/email/message.py13
-rw-r--r--Lib/email/test/test_email.py44
-rw-r--r--Lib/email/test/test_email_renamed.py44
3 files changed, 100 insertions, 1 deletions
diff --git a/Lib/email/message.py b/Lib/email/message.py
index 50d90b4..79c5c4c 100644
--- a/Lib/email/message.py
+++ b/Lib/email/message.py
@@ -747,7 +747,18 @@ class Message:
if isinstance(charset, tuple):
# RFC 2231 encoded, so decode it, and it better end up as ascii.
pcharset = charset[0] or 'us-ascii'
- charset = unicode(charset[2], pcharset).encode('us-ascii')
+ try:
+ # LookupError will be raised if the charset isn't known to
+ # Python. UnicodeError will be raised if the encoded text
+ # contains a character not in the charset.
+ charset = unicode(charset[2], pcharset).encode('us-ascii')
+ except (LookupError, UnicodeError):
+ charset = charset[2]
+ # charset character must be in us-ascii range
+ try:
+ charset = unicode(charset, 'us-ascii').encode('us-ascii')
+ except UnicodeError:
+ return failobj
# RFC 2046, $4.1.2 says charsets are not case sensitive
return charset.lower()
diff --git a/Lib/email/test/test_email.py b/Lib/email/test/test_email.py
index db0c2be..13801dc 100644
--- a/Lib/email/test/test_email.py
+++ b/Lib/email/test/test_email.py
@@ -3086,6 +3086,50 @@ Content-Type: text/plain;
self.assertEqual(msg.get_content_charset(),
'this is even more ***fun*** is it not.pdf')
+ def test_rfc2231_bad_encoding_in_filename(self):
+ m = '''\
+Content-Disposition: inline;
+\tfilename*0*="bogus'xx'This%20is%20even%20more%20";
+\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
+\tfilename*2="is it not.pdf"
+
+'''
+ msg = email.message_from_string(m)
+ self.assertEqual(msg.get_filename(),
+ 'This is even more ***fun*** is it not.pdf')
+
+ def test_rfc2231_bad_encoding_in_charset(self):
+ m = """\
+Content-Type: text/plain; charset*=bogus''utf-8%E2%80%9D
+
+"""
+ msg = email.message_from_string(m)
+ # This should return None because non-ascii characters in the charset
+ # are not allowed.
+ self.assertEqual(msg.get_content_charset(), None)
+
+ def test_rfc2231_bad_character_in_charset(self):
+ m = """\
+Content-Type: text/plain; charset*=ascii''utf-8%E2%80%9D
+
+"""
+ msg = email.message_from_string(m)
+ # This should return None because non-ascii characters in the charset
+ # are not allowed.
+ self.assertEqual(msg.get_content_charset(), None)
+
+ def test_rfc2231_bad_character_in_filename(self):
+ m = '''\
+Content-Disposition: inline;
+\tfilename*0*="ascii'xx'This%20is%20even%20more%20";
+\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
+\tfilename*2*="is it not.pdf%E2"
+
+'''
+ msg = email.message_from_string(m)
+ self.assertEqual(msg.get_filename(),
+ u'This is even more ***fun*** is it not.pdf\ufffd')
+
def test_rfc2231_unknown_encoding(self):
m = """\
Content-Transfer-Encoding: 8bit
diff --git a/Lib/email/test/test_email_renamed.py b/Lib/email/test/test_email_renamed.py
index 680a725..30f39b9 100644
--- a/Lib/email/test/test_email_renamed.py
+++ b/Lib/email/test/test_email_renamed.py
@@ -3092,6 +3092,50 @@ Content-Type: text/plain;
self.assertEqual(msg.get_content_charset(),
'this is even more ***fun*** is it not.pdf')
+ def test_rfc2231_bad_encoding_in_filename(self):
+ m = '''\
+Content-Disposition: inline;
+\tfilename*0*="bogus'xx'This%20is%20even%20more%20";
+\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
+\tfilename*2="is it not.pdf"
+
+'''
+ msg = email.message_from_string(m)
+ self.assertEqual(msg.get_filename(),
+ 'This is even more ***fun*** is it not.pdf')
+
+ def test_rfc2231_bad_encoding_in_charset(self):
+ m = """\
+Content-Type: text/plain; charset*=bogus''utf-8%E2%80%9D
+
+"""
+ msg = email.message_from_string(m)
+ # This should return None because non-ascii characters in the charset
+ # are not allowed.
+ self.assertEqual(msg.get_content_charset(), None)
+
+ def test_rfc2231_bad_character_in_charset(self):
+ m = """\
+Content-Type: text/plain; charset*=ascii''utf-8%E2%80%9D
+
+"""
+ msg = email.message_from_string(m)
+ # This should return None because non-ascii characters in the charset
+ # are not allowed.
+ self.assertEqual(msg.get_content_charset(), None)
+
+ def test_rfc2231_bad_character_in_filename(self):
+ m = '''\
+Content-Disposition: inline;
+\tfilename*0*="ascii'xx'This%20is%20even%20more%20";
+\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
+\tfilename*2*="is it not.pdf%E2"
+
+'''
+ msg = email.message_from_string(m)
+ self.assertEqual(msg.get_filename(),
+ u'This is even more ***fun*** is it not.pdf\ufffd')
+
def test_rfc2231_unknown_encoding(self):
m = """\
Content-Transfer-Encoding: 8bit