get_param(): Update the docstring to explain how CHARSET and LANGUAGE

can be None, and what to do in that situation. get_filename(), get_boundary(), get_content_charset(): Make sure these handle RFC 2231 headers without a CHARSET field. Backport candidate (as was the Utils.py 1.25 change) to both Python 2.3.1 and 2.2.4 -- will do momentarily.
author: Barry Warsaw <barry@python.org> 2003-08-19 03:53:02 (GMT)
committer: Barry Warsaw <barry@python.org> 2003-08-19 03:53:02 (GMT)
commit: 6208369ff3ec94604dd19500c6d14427301751ef (patch)
tree: 41dcc2e62bde9df1439821dba9a1d7898ef618d6 /Lib/email
parent: 0b6f0d88102f86121d67cb10232f89838ee907d5 (diff)
download: cpython-6208369ff3ec94604dd19500c6d14427301751ef.zip
cpython-6208369ff3ec94604dd19500c6d14427301751ef.tar.gz
cpython-6208369ff3ec94604dd19500c6d14427301751ef.tar.bz2
1 files changed, 12 insertions, 7 deletions
diff --git a/Lib/email/Message.py b/Lib/email/Message.py
index 6dfa84b..6bba6ae 100644
--- a/Lib/email/Message.py
+++ b/Lib/email/Message.py
@@ -571,13 +571,16 @@ class Message:
         Parameter keys are always compared case insensitively.  The return
         value can either be a string, or a 3-tuple if the parameter was RFC
         2231 encoded.  When it's a 3-tuple, the elements of the value are of
-        the form (CHARSET, LANGUAGE, VALUE), where LANGUAGE may be the empty
-        string.  Your application should be prepared to deal with these, and
-        can convert the parameter to a Unicode string like so:
+        the form (CHARSET, LANGUAGE, VALUE).  Note that both CHARSET and
+        LANGUAGE can be None, in which case you should consider VALUE to be
+        encoded in the us-ascii charset.  You can usually ignore LANGUAGE.
+
+        Your application should be prepared to deal with 3-tuple return
+        values, and can convert the parameter to a Unicode string like so:
 
             param = msg.get_param('foo')
             if isinstance(param, tuple):
-                param = unicode(param[2], param[0])
+                param = unicode(param[2], param[0] or 'us-ascii')
 
         In any case, the parameter value (either the returned string, or the
         VALUE item in the 3-tuple) is always unquoted, unless unquote is set
@@ -708,7 +711,7 @@ class Message:
         if isinstance(filename, TupleType):
             # It's an RFC 2231 encoded parameter
             newvalue = _unquotevalue(filename)
-            return unicode(newvalue[2], newvalue[0])
+            return unicode(newvalue[2], newvalue[0] or 'us-ascii')
         else:
             newvalue = _unquotevalue(filename.strip())
             return newvalue
@@ -725,7 +728,8 @@ class Message:
             return failobj
         if isinstance(boundary, TupleType):
             # RFC 2231 encoded, so decode.  It better end up as ascii
-            return unicode(boundary[2], boundary[0]).encode('us-ascii')
+            charset = boundary[0] or 'us-ascii'
+            return unicode(boundary[2], charset).encode('us-ascii')
         return _unquotevalue(boundary.strip())
 
     def set_boundary(self, boundary):
@@ -792,7 +796,8 @@ class Message:
             return failobj
         if isinstance(charset, TupleType):
             # RFC 2231 encoded, so decode it, and it better end up as ascii.
-            charset = unicode(charset[2], charset[0]).encode('us-ascii')
+            pcharset = charset[0] or 'us-ascii'
+            charset = unicode(charset[2], pcharset).encode('us-ascii')
         # RFC 2046, $4.1.2 says charsets are not case sensitive
         return charset.lower()
author	Barry Warsaw <barry@python.org>	2003-08-19 03:53:02 (GMT)
committer	Barry Warsaw <barry@python.org>	2003-08-19 03:53:02 (GMT)
commit	6208369ff3ec94604dd19500c6d14427301751ef (patch)
tree	41dcc2e62bde9df1439821dba9a1d7898ef618d6 /Lib/email
parent	0b6f0d88102f86121d67cb10232f89838ee907d5 (diff)
download	cpython-6208369ff3ec94604dd19500c6d14427301751ef.zip cpython-6208369ff3ec94604dd19500c6d14427301751ef.tar.gz cpython-6208369ff3ec94604dd19500c6d14427301751ef.tar.bz2