diff options
author | Barry Warsaw <barry@python.org> | 2002-04-10 21:01:31 (GMT) |
---|---|---|
committer | Barry Warsaw <barry@python.org> | 2002-04-10 21:01:31 (GMT) |
commit | 409a4c08b545aa064cf8fe3b8de51404756a301e (patch) | |
tree | 06cf8fe44e1fe28fbc0147635ec41961f2df6515 /Lib/email/Message.py | |
parent | 68e69338ae19c37bd3e69cb76e107bfa76231e06 (diff) | |
download | cpython-409a4c08b545aa064cf8fe3b8de51404756a301e.zip cpython-409a4c08b545aa064cf8fe3b8de51404756a301e.tar.gz cpython-409a4c08b545aa064cf8fe3b8de51404756a301e.tar.bz2 |
Sync'ing with standalone email package 2.0.1. This adds support for
non-us-ascii character sets in headers and bodies. Some API changes
(with DeprecationWarnings for the old APIs). Better RFC-compliant
implementations of base64 and quoted-printable.
Updated test cases. Documentation updates to follow (after I finish
writing them ;).
Diffstat (limited to 'Lib/email/Message.py')
-rw-r--r-- | Lib/email/Message.py | 223 |
1 files changed, 205 insertions, 18 deletions
diff --git a/Lib/email/Message.py b/Lib/email/Message.py index 91931a1..71d10c4 100644 --- a/Lib/email/Message.py +++ b/Lib/email/Message.py @@ -1,23 +1,47 @@ -# Copyright (C) 2001 Python Software Foundation +# Copyright (C) 2001,2002 Python Software Foundation # Author: barry@zope.com (Barry Warsaw) """Basic message object for the email package object model. """ -from __future__ import generators - import re -import base64 -import quopri +import warnings from cStringIO import StringIO -from types import ListType +from types import ListType, StringType # Intrapackage imports import Errors import Utils +import Charset SEMISPACE = '; ' + +# Regular expression used to split header parameters. BAW: this may be too +# simple. It isn't strictly RFC 2045 (section 5.1) compliant, but it catches +# most headers found in the wild. We may eventually need a full fledged +# parser eventually. paramre = re.compile(r'\s*;\s*') +# Regular expression that matches `special' characters in parameters, the +# existance of which force quoting of the parameter value. +tspecials = re.compile(r'[ \(\)<>@,;:\\"/\[\]\?=]') + + + +# Helper function +def _formatparam(param, value=None, quote=1): + """Convenience function to format and return a key=value pair. + + Will quote the value if needed or if quote is true. + """ + if value is not None and len(value) > 0: + # BAW: Please check this. I think that if quote is set it should + # force quoting even if not necessary. + if quote or tspecials.search(value): + return '%s="%s"' % (param, Utils.quote(value)) + else: + return '%s=%s' % (param, value) + else: + return param @@ -39,6 +63,7 @@ class Message: self._headers = [] self._unixfrom = None self._payload = None + self._charset = None # Defaults for multipart messages self.preamble = self.epilogue = None @@ -83,6 +108,8 @@ class Message: If the current payload is empty, then the current payload will be made a scalar, set to the given value. """ + warnings.warn('add_payload() is deprecated, use attach() instead.', + DeprecationWarning, 2) if self._payload is None: self._payload = payload elif type(self._payload) is ListType: @@ -93,8 +120,18 @@ class Message: else: self._payload = [self._payload, payload] - # A useful synonym - attach = add_payload + def attach(self, payload): + """Add the given payload to the current payload. + + The current payload will always be a list of objects after this method + is called. If you want to set the payload to a scalar object + (e.g. because you're attaching a message/rfc822 subpart), use + set_payload() instead. + """ + if self._payload is None: + self._payload = [payload] + else: + self._payload.append(payload) def get_payload(self, i=None, decode=0): """Return the current payload exactly as is. @@ -128,10 +165,58 @@ class Message: return payload - def set_payload(self, payload): - """Set the payload to the given value.""" + def set_payload(self, payload, charset=None): + """Set the payload to the given value. + + Optionally set the charset, which must be a Charset instance.""" self._payload = payload + if charset is not None: + self.set_charset(charset) + + def set_charset(self, charset): + """Set the charset of the payload to a given character set. + + charset can be a string or a Charset object. If it is a string, it + will be converted to a Charset object by calling Charset's + constructor. If charset is None, the charset parameter will be + removed from the Content-Type: field. Anything else will generate a + TypeError. + + The message will be assumed to be a text message encoded with + charset.input_charset. It will be converted to charset.output_charset + and encoded properly, if needed, when generating the plain text + representation of the message. MIME headers (MIME-Version, + Content-Type, Content-Transfer-Encoding) will be added as needed. + """ + if charset is None: + self.del_param('charset') + self._charset = None + return + if isinstance(charset, StringType): + charset = Charset.Charset(charset) + if not isinstance(charset, Charset.Charset): + raise TypeError, charset + # BAW: should we accept strings that can serve as arguments to the + # Charset constructor? + self._charset = charset + if not self.has_key('MIME-Version'): + self.add_header('MIME-Version', '1.0') + if not self.has_key('Content-Type'): + self.add_header('Content-Type', 'text/plain', + charset=charset.get_output_charset()) + else: + self.set_param('charset', charset.get_output_charset()) + if not self.has_key('Content-Transfer-Encoding'): + cte = charset.get_body_encoding() + if callable(cte): + cte(self) + else: + self.add_header('Content-Transfer-Encoding', cte) + def get_charset(self): + """Return the Charset object associated with the message's payload.""" + return self._charset + # # MAPPING INTERFACE (partial) # @@ -257,7 +342,7 @@ class Message: if v is None: parts.append(k.replace('_', '-')) else: - parts.append('%s="%s"' % (k.replace('_', '-'), v)) + parts.append(_formatparam(k.replace('_', '-'), v)) if _value is not None: parts.insert(0, _value) self._headers.append((_name, SEMISPACE.join(parts))) @@ -308,6 +393,8 @@ class Message: for p in paramre.split(value): try: name, val = p.split('=', 1) + name = name.rstrip() + val = val.lstrip() except ValueError: # Must have been a bare attribute name = p @@ -315,26 +402,29 @@ class Message: params.append((name, val)) return params - def get_params(self, failobj=None, header='content-type'): + def get_params(self, failobj=None, header='content-type', unquote=1): """Return the message's Content-Type: parameters, as a list. The elements of the returned list are 2-tuples of key/value pairs, as split on the `=' sign. The left hand side of the `=' is the key, while the right hand side is the value. If there is no `=' sign in the parameter the value is the empty string. The value is always - unquoted. + unquoted, unless unquote is set to a false value. Optional failobj is the object to return if there is no Content-Type: header. Optional header is the header to search instead of - Content-Type: + Content-Type:. """ missing = [] params = self._get_params_preserve(missing, header) if params is missing: return failobj - return [(k, Utils.unquote(v)) for k, v in params] + if unquote: + return [(k, Utils.unquote(v)) for k, v in params] + else: + return params - def get_param(self, param, failobj=None, header='content-type'): + def get_param(self, param, failobj=None, header='content-type', unquote=1): """Return the parameter value if found in the Content-Type: header. Optional failobj is the object to return if there is no Content-Type: @@ -342,15 +432,112 @@ class Message: Content-Type: Parameter keys are always compared case insensitively. Values are - always unquoted. + always unquoted, unless unquote is set to a false value. """ if not self.has_key(header): return failobj for k, v in self._get_params_preserve(failobj, header): if k.lower() == param.lower(): - return Utils.unquote(v) + if unquote: + return Utils.unquote(v) + else: + return v return failobj + def set_param(self, param, value, header='Content-Type', requote=1): + """Set a parameter in the Content-Type: header. + + If the parameter already exists in the header, its value will be + replaced with the new value. + + If header is Content-Type: and has not yet been defined in this + message, it will be set to "text/plain" and the new parameter and + value will be appended, as per RFC 2045. + + An alternate header can specified in the header argument, and + all parameters will be quoted as appropriate unless requote is + set to a false value. + """ + if not self.has_key(header) and header.lower() == 'content-type': + ctype = 'text/plain' + else: + ctype = self.get(header) + if not self.get_param(param, header=header): + if not ctype: + ctype = _formatparam(param, value, requote) + else: + ctype = SEMISPACE.join( + [ctype, _formatparam(param, value, requote)]) + else: + ctype = '' + for old_param, old_value in self.get_params(header=header, + unquote=requote): + append_param = '' + if old_param.lower() == param.lower(): + append_param = _formatparam(param, value, requote) + else: + append_param = _formatparam(old_param, old_value, requote) + if not ctype: + ctype = append_param + else: + ctype = SEMISPACE.join([ctype, append_param]) + if ctype <> self.get(header): + del self[header] + self[header] = ctype + + def del_param(self, param, header='content-type', requote=1): + """Remove the given parameter completely from the Content-Type header. + + The header will be re-written in place without param or its value. + All values will be quoted as appropriate unless requote is set to a + false value. + """ + if not self.has_key(header): + return + new_ctype = '' + for p, v in self.get_params(header, unquote=requote): + if p.lower() <> param.lower(): + if not new_ctype: + new_ctype = _formatparam(p, v, requote) + else: + new_ctype = SEMISPACE.join([new_ctype, + _formatparam(p, v, requote)]) + if new_ctype <> self.get(header): + del self[header] + self[header] = new_ctype + + def set_type(self, type, header='Content-Type', requote=1): + """Set the main type and subtype for the Content-Type: header. + + type must be a string in the form "maintype/subtype", otherwise a + ValueError is raised. + + This method replaces the Content-Type: header, keeping all the + parameters in place. If requote is false, this leaves the existing + header's quoting as is. Otherwise, the parameters will be quoted (the + default). + + An alternate header can be specified in the header argument. When the + Content-Type: header is set, we'll always also add a MIME-Version: + header. + """ + # BAW: should we be strict? + if not type.count('/') == 1: + raise ValueError + # Set the Content-Type: you get a MIME-Version: + if header.lower() == 'content-type': + del self['mime-version'] + self['MIME-Version'] = '1.0' + if not self.has_key(header): + self[header] = type + return + params = self.get_params(header, unquote=requote) + del self[header] + self[header] = type + # Skip the first param; it's the old type. + for p, v in params[1:]: + self.set_param(p, v, header, requote) + def get_filename(self, failobj=None): """Return the filename associated with the payload if present. |