summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBarry Warsaw <barry@python.org>2003-08-19 04:56:47 (GMT)
committerBarry Warsaw <barry@python.org>2003-08-19 04:56:47 (GMT)
commit27dd1d476ebe07bc07cb5b1dd66660e171261905 (patch)
tree8b359f1bc0ccfecb43f7ad34aff3fdfd4f34605f
parent8374ad281219d596b416565f002a260b799bbfa6 (diff)
downloadcpython-27dd1d476ebe07bc07cb5b1dd66660e171261905.zip
cpython-27dd1d476ebe07bc07cb5b1dd66660e171261905.tar.gz
cpython-27dd1d476ebe07bc07cb5b1dd66660e171261905.tar.bz2
Backporting email 2.5.4 fixes from the trunk.
-rw-r--r--Lib/email/Message.py19
-rw-r--r--Lib/email/Utils.py2
-rw-r--r--Lib/email/__init__.py2
-rw-r--r--Lib/email/test/test_email.py37
4 files changed, 51 insertions, 9 deletions
diff --git a/Lib/email/Message.py b/Lib/email/Message.py
index 0f513f5..3a6f32e 100644
--- a/Lib/email/Message.py
+++ b/Lib/email/Message.py
@@ -571,13 +571,16 @@ class Message:
Parameter keys are always compared case insensitively. The return
value can either be a string, or a 3-tuple if the parameter was RFC
2231 encoded. When it's a 3-tuple, the elements of the value are of
- the form (CHARSET, LANGUAGE, VALUE), where LANGUAGE may be the empty
- string. Your application should be prepared to deal with these, and
- can convert the parameter to a Unicode string like so:
+ the form (CHARSET, LANGUAGE, VALUE). Note that both CHARSET and
+ LANGUAGE can be None, in which case you should consider VALUE to be
+ encoded in the us-ascii charset. You can usually ignore LANGUAGE.
+
+ Your application should be prepared to deal with 3-tuple return
+ values, and can convert the parameter to a Unicode string like so:
param = msg.get_param('foo')
if isinstance(param, tuple):
- param = unicode(param[2], param[0])
+ param = unicode(param[2], param[0] or 'us-ascii')
In any case, the parameter value (either the returned string, or the
VALUE item in the 3-tuple) is always unquoted, unless unquote is set
@@ -708,7 +711,7 @@ class Message:
if isinstance(filename, TupleType):
# It's an RFC 2231 encoded parameter
newvalue = _unquotevalue(filename)
- return unicode(newvalue[2], newvalue[0])
+ return unicode(newvalue[2], newvalue[0] or 'us-ascii')
else:
newvalue = _unquotevalue(filename.strip())
return newvalue
@@ -725,7 +728,8 @@ class Message:
return failobj
if isinstance(boundary, TupleType):
# RFC 2231 encoded, so decode. It better end up as ascii
- return unicode(boundary[2], boundary[0]).encode('us-ascii')
+ charset = boundary[0] or 'us-ascii'
+ return unicode(boundary[2], charset).encode('us-ascii')
return _unquotevalue(boundary.strip())
def set_boundary(self, boundary):
@@ -792,7 +796,8 @@ class Message:
return failobj
if isinstance(charset, TupleType):
# RFC 2231 encoded, so decode it, and it better end up as ascii.
- charset = unicode(charset[2], charset[0]).encode('us-ascii')
+ pcharset = charset[0] or 'us-ascii'
+ charset = unicode(charset[2], pcharset).encode('us-ascii')
# RFC 2046, $4.1.2 says charsets are not case sensitive
return charset.lower()
diff --git a/Lib/email/Utils.py b/Lib/email/Utils.py
index 2b8b94f..a409e16 100644
--- a/Lib/email/Utils.py
+++ b/Lib/email/Utils.py
@@ -280,7 +280,7 @@ def decode_rfc2231(s):
import urllib
parts = s.split("'", 2)
if len(parts) == 1:
- return None, None, s
+ return None, None, urllib.unquote(s)
charset, language, s = parts
return charset, language, urllib.unquote(s)
diff --git a/Lib/email/__init__.py b/Lib/email/__init__.py
index b5d8d72..bfd6105 100644
--- a/Lib/email/__init__.py
+++ b/Lib/email/__init__.py
@@ -4,7 +4,7 @@
"""A package for parsing, handling, and generating email messages.
"""
-__version__ = '2.5.3'
+__version__ = '2.5.4'
__all__ = [
'base64MIME',
diff --git a/Lib/email/test/test_email.py b/Lib/email/test/test_email.py
index a0586e5..37e6230 100644
--- a/Lib/email/test/test_email.py
+++ b/Lib/email/test/test_email.py
@@ -2650,6 +2650,43 @@ Content-Type: text/html; NAME*0=file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOC
self.assertEqual(msg.get_param('NAME'),
(None, None, 'file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm'))
+ def test_rfc2231_no_language_or_charset_in_filename(self):
+ m = '''\
+Content-Disposition: inline;
+\tfilename*0="This%20is%20even%20more%20";
+\tfilename*1="%2A%2A%2Afun%2A%2A%2A%20";
+\tfilename*2="is it not.pdf"
+
+'''
+ msg = email.message_from_string(m)
+ self.assertEqual(msg.get_filename(),
+ 'This is even more ***fun*** is it not.pdf')
+
+ def test_rfc2231_no_language_or_charset_in_boundary(self):
+ m = '''\
+Content-Type: multipart/alternative;
+\tboundary*0="This%20is%20even%20more%20";
+\tboundary*1="%2A%2A%2Afun%2A%2A%2A%20";
+\tboundary*2="is it not.pdf"
+
+'''
+ msg = email.message_from_string(m)
+ self.assertEqual(msg.get_boundary(),
+ 'This is even more ***fun*** is it not.pdf')
+
+ def test_rfc2231_no_language_or_charset_in_charset(self):
+ # This is a nonsensical charset value, but tests the code anyway
+ m = '''\
+Content-Type: text/plain;
+\tcharset*0="This%20is%20even%20more%20";
+\tcharset*1="%2A%2A%2Afun%2A%2A%2A%20";
+\tcharset*2="is it not.pdf"
+
+'''
+ msg = email.message_from_string(m)
+ self.assertEqual(msg.get_content_charset(),
+ 'this is even more ***fun*** is it not.pdf')
+
def _testclasses():