diff options
author | Barry Warsaw <barry@python.org> | 2003-08-19 03:53:02 (GMT) |
---|---|---|
committer | Barry Warsaw <barry@python.org> | 2003-08-19 03:53:02 (GMT) |
commit | 6208369ff3ec94604dd19500c6d14427301751ef (patch) | |
tree | 41dcc2e62bde9df1439821dba9a1d7898ef618d6 /Lib/email | |
parent | 0b6f0d88102f86121d67cb10232f89838ee907d5 (diff) | |
download | cpython-6208369ff3ec94604dd19500c6d14427301751ef.zip cpython-6208369ff3ec94604dd19500c6d14427301751ef.tar.gz cpython-6208369ff3ec94604dd19500c6d14427301751ef.tar.bz2 |
get_param(): Update the docstring to explain how CHARSET and LANGUAGE
can be None, and what to do in that situation.
get_filename(), get_boundary(), get_content_charset(): Make sure these
handle RFC 2231 headers without a CHARSET field.
Backport candidate (as was the Utils.py 1.25 change) to both Python
2.3.1 and 2.2.4 -- will do momentarily.
Diffstat (limited to 'Lib/email')
-rw-r--r-- | Lib/email/Message.py | 19 |
1 files changed, 12 insertions, 7 deletions
diff --git a/Lib/email/Message.py b/Lib/email/Message.py index 6dfa84b..6bba6ae 100644 --- a/Lib/email/Message.py +++ b/Lib/email/Message.py @@ -571,13 +571,16 @@ class Message: Parameter keys are always compared case insensitively. The return value can either be a string, or a 3-tuple if the parameter was RFC 2231 encoded. When it's a 3-tuple, the elements of the value are of - the form (CHARSET, LANGUAGE, VALUE), where LANGUAGE may be the empty - string. Your application should be prepared to deal with these, and - can convert the parameter to a Unicode string like so: + the form (CHARSET, LANGUAGE, VALUE). Note that both CHARSET and + LANGUAGE can be None, in which case you should consider VALUE to be + encoded in the us-ascii charset. You can usually ignore LANGUAGE. + + Your application should be prepared to deal with 3-tuple return + values, and can convert the parameter to a Unicode string like so: param = msg.get_param('foo') if isinstance(param, tuple): - param = unicode(param[2], param[0]) + param = unicode(param[2], param[0] or 'us-ascii') In any case, the parameter value (either the returned string, or the VALUE item in the 3-tuple) is always unquoted, unless unquote is set @@ -708,7 +711,7 @@ class Message: if isinstance(filename, TupleType): # It's an RFC 2231 encoded parameter newvalue = _unquotevalue(filename) - return unicode(newvalue[2], newvalue[0]) + return unicode(newvalue[2], newvalue[0] or 'us-ascii') else: newvalue = _unquotevalue(filename.strip()) return newvalue @@ -725,7 +728,8 @@ class Message: return failobj if isinstance(boundary, TupleType): # RFC 2231 encoded, so decode. It better end up as ascii - return unicode(boundary[2], boundary[0]).encode('us-ascii') + charset = boundary[0] or 'us-ascii' + return unicode(boundary[2], charset).encode('us-ascii') return _unquotevalue(boundary.strip()) def set_boundary(self, boundary): @@ -792,7 +796,8 @@ class Message: return failobj if isinstance(charset, TupleType): # RFC 2231 encoded, so decode it, and it better end up as ascii. - charset = unicode(charset[2], charset[0]).encode('us-ascii') + pcharset = charset[0] or 'us-ascii' + charset = unicode(charset[2], pcharset).encode('us-ascii') # RFC 2046, $4.1.2 says charsets are not case sensitive return charset.lower() |