diff options
Diffstat (limited to 'Lib/email/Message.py')
-rw-r--r-- | Lib/email/Message.py | 422 |
1 files changed, 422 insertions, 0 deletions
diff --git a/Lib/email/Message.py b/Lib/email/Message.py new file mode 100644 index 0000000..35e2dc5 --- /dev/null +++ b/Lib/email/Message.py @@ -0,0 +1,422 @@ +# Copyright (C) 2001 Python Software Foundation +# Author: barry@zope.com (Barry Warsaw) + +"""Basic message object for the email package object model. +""" + +from __future__ import generators + +import re +import base64 +import quopri +from cStringIO import StringIO +from types import ListType + +SEMISPACE = '; ' + +# Intrapackage imports +import Errors +import Utils + + + +class Message: + """Basic message object for use inside the object tree. + + A message object is defined as something that has a bunch of RFC 2822 + headers and a payload. If the body of the message is a multipart, then + the payload is a list of Messages, otherwise it is a string. + + These objects implement part of the `mapping' interface, which assumes + there is exactly one occurrance of the header per message. Some headers + do in fact appear multiple times (e.g. Received:) and for those headers, + you must use the explicit API to set or get all the headers. Not all of + the mapping methods are implemented. + + """ + def __init__(self): + self._headers = [] + self._unixfrom = None + self._payload = None + # Defaults for multipart messages + self.preamble = self.epilogue = None + + def __str__(self): + """Return the entire formatted message as a string. + This includes the headers, body, and `unixfrom' line. + """ + return self.as_string(unixfrom=1) + + def as_string(self, unixfrom=0): + """Return the entire formatted message as a string. + Optional `unixfrom' when true, means include the Unix From_ envelope + header. + """ + from Generator import Generator + fp = StringIO() + g = Generator(fp) + g(self, unixfrom=unixfrom) + return fp.getvalue() + + def is_multipart(self): + """Return true if the message consists of multiple parts.""" + if type(self._payload) is ListType: + return 1 + return 0 + + # + # Unix From_ line + # + def set_unixfrom(self, unixfrom): + self._unixfrom = unixfrom + + def get_unixfrom(self): + return self._unixfrom + + # + # Payload manipulation. + # + def add_payload(self, payload): + """Add the given payload to the current payload. + + If the current payload is empty, then the current payload will be made + a scalar, set to the given value. + """ + if self._payload is None: + self._payload = payload + elif type(self._payload) is ListType: + self._payload.append(payload) + elif self.get_main_type() not in (None, 'multipart'): + raise Errors.MultipartConversionError( + 'Message main Content-Type: must be "multipart" or missing') + else: + self._payload = [self._payload, payload] + + # A useful synonym + attach = add_payload + + def get_payload(self, i=None, decode=0): + """Return the current payload exactly as is. + + Optional i returns that index into the payload. + + Optional decode is a flag indicating whether the payload should be + decoded or not, according to the Content-Transfer-Encoding: header. + When true and the message is not a multipart, the payload will be + decoded if this header's value is `quoted-printable' or `base64'. If + some other encoding is used, or the header is missing, the payload is + returned as-is (undecoded). If the message is a multipart and the + decode flag is true, then None is returned. + """ + if i is None: + payload = self._payload + elif type(self._payload) is not ListType: + raise TypeError, i + else: + payload = self._payload[i] + if decode: + if self.is_multipart(): + return None + cte = self.get('content-transfer-encoding', '') + if cte.lower() == 'quoted-printable': + return Utils._qdecode(payload) + elif cte.lower() == 'base64': + return Utils._bdecode(payload) + # Everything else, including encodings with 8bit or 7bit are returned + # unchanged. + return payload + + + def set_payload(self, payload): + """Set the payload to the given value.""" + self._payload = payload + + # + # MAPPING INTERFACE (partial) + # + def __len__(self): + """Get the total number of headers, including duplicates.""" + return len(self._headers) + + def __getitem__(self, name): + """Get a header value. + + Return None if the header is missing instead of raising an exception. + + Note that if the header appeared multiple times, exactly which + occurrance gets returned is undefined. Use getall() to get all + the values matching a header field name. + """ + return self.get(name) + + def __setitem__(self, name, val): + """Set the value of a header. + + Note: this does not overwrite an existing header with the same field + name. Use __delitem__() first to delete any existing headers. + """ + self._headers.append((name, val)) + + def __delitem__(self, name): + """Delete all occurrences of a header, if present. + + Does not raise an exception if the header is missing. + """ + name = name.lower() + newheaders = [] + for k, v in self._headers: + if k.lower() <> name: + newheaders.append((k, v)) + self._headers = newheaders + + def __contains__(self, key): + return key.lower() in [k.lower() for k, v in self._headers] + + def has_key(self, name): + """Return true if the message contains the header.""" + return self[name] <> None + + def keys(self): + """Return a list of all the message's header field names. + + These will be sorted in the order they appeared in the original + message, and may contain duplicates. Any fields deleted and + re-inserted are always appended to the header list. + """ + return [k for k, v in self._headers] + + def values(self): + """Return a list of all the message's header values. + + These will be sorted in the order they appeared in the original + message, and may contain duplicates. Any fields deleted and + re-inserted are alwyas appended to the header list. + """ + return [v for k, v in self._headers] + + def items(self): + """Get all the message's header fields and values. + + These will be sorted in the order they appeared in the original + message, and may contain duplicates. Any fields deleted and + re-inserted are alwyas appended to the header list. + """ + return self._headers[:] + + def get(self, name, failobj=None): + """Get a header value. + + Like __getitem__() but return failobj instead of None when the field + is missing. + """ + name = name.lower() + for k, v in self._headers: + if k.lower() == name: + return v + return failobj + + # + # Additional useful stuff + # + + def get_all(self, name, failobj=None): + """Return a list of all the values for the named field. + + These will be sorted in the order they appeared in the original + message, and may contain duplicates. Any fields deleted and + re-inserted are alwyas appended to the header list. + """ + values = [] + name = name.lower() + for k, v in self._headers: + if k.lower() == name: + values.append(v) + return values + + def add_header(self, _name, _value, **_params): + """Extended header setting. + + name is the header field to add. keyword arguments can be used to set + additional parameters for the header field, with underscores converted + to dashes. Normally the parameter will be added as key="value" unless + value is None, in which case only the key will be added. + + Example: + + msg.add_header('content-disposition', 'attachment', filename='bud.gif') + + """ + parts = [] + for k, v in _params.items(): + if v is None: + parts.append(k.replace('_', '-')) + else: + parts.append('%s="%s"' % (k.replace('_', '-'), v)) + if _value is not None: + parts.insert(0, _value) + self._headers.append((_name, SEMISPACE.join(parts))) + + def get_type(self, failobj=None): + """Returns the message's content type. + + The returned string is coerced to lowercase and returned as a single + string of the form `maintype/subtype'. If there was no Content-Type: + header in the message, failobj is returned (defaults to None). + """ + missing = [] + value = self.get('content-type', missing) + if value is missing: + return failobj + return re.split(r';\s+', value)[0].lower() + + def get_main_type(self, failobj=None): + """Return the message's main content type if present.""" + missing = [] + ctype = self.get_type(missing) + if ctype is missing: + return failobj + parts = ctype.split('/') + if len(parts) > 0: + return ctype.split('/')[0] + return failobj + + def get_subtype(self, failobj=None): + """Return the message's content subtype if present.""" + missing = [] + ctype = self.get_type(missing) + if ctype is missing: + return failobj + parts = ctype.split('/') + if len(parts) > 1: + return ctype.split('/')[1] + return failobj + + def get_params(self, failobj=None, header='content-type'): + """Return the message's Content-Type: parameters, as a list. + + Optional failobj is the object to return if there is no Content-Type: + header. Optional header is the header to search instead of + Content-Type: + """ + missing = [] + value = self.get(header, missing) + if value is missing: + return failobj + return re.split(r';\s+', value)[1:] + + def get_param(self, param, failobj=None, header='content-type'): + """Return the parameter value if found in the Content-Type: header. + + Optional failobj is the object to return if there is no Content-Type: + header. Optional header is the header to search instead of + Content-Type: + """ + param = param.lower() + missing = [] + params = self.get_params(missing, header=header) + if params is missing: + return failobj + for p in params: + try: + name, val = p.split('=', 1) + except ValueError: + # Must have been a bare attribute + name = p + val = '' + if name.lower() == param: + return Utils.unquote(val) + return failobj + + def get_filename(self, failobj=None): + """Return the filename associated with the payload if present. + + The filename is extracted from the Content-Disposition: header's + `filename' parameter, and it is unquoted. + """ + missing = [] + filename = self.get_param('filename', missing, 'content-disposition') + if filename is missing: + return failobj + return Utils.unquote(filename.strip()) + + def get_boundary(self, failobj=None): + """Return the boundary associated with the payload if present. + + The boundary is extracted from the Content-Type: header's `boundary' + parameter, and it is unquoted. + """ + missing = [] + boundary = self.get_param('boundary', missing) + if boundary is missing: + return failobj + return Utils.unquote(boundary.strip()) + + def set_boundary(self, boundary): + """Set the boundary parameter in Content-Type: to 'boundary'. + + This is subtly different than deleting the Content-Type: header and + adding a new one with a new boundary parameter via add_header(). The + main difference is that using the set_boundary() method preserves the + order of the Content-Type: header in the original message. + + HeaderParseError is raised if the message has no Content-Type: header. + """ + params = self.get_params() + if not params: + # There was no Content-Type: header, and we don't know what type + # to set it to, so raise an exception. + raise Errors.HeaderParseError, 'No Content-Type: header found' + newparams = [] + foundp = 0 + for p in params: + if p.lower().startswith('boundary='): + newparams.append('boundary="%s"' % boundary) + foundp = 1 + else: + newparams.append(p) + if not foundp: + # The original Content-Type: header had no boundary attribute. + # Tack one one the end. BAW: should we raise an exception + # instead??? + newparams.append('boundary="%s"' % boundary) + # Replace the existing Content-Type: header with the new value + newheaders = [] + for h, v in self._headers: + if h.lower() == 'content-type': + value = v.split(';', 1)[0] + newparams.insert(0, value) + newheaders.append((h, SEMISPACE.join(newparams))) + else: + newheaders.append((h, v)) + self._headers = newheaders + + def walk(self): + """Walk over the message tree, yielding each subpart. + + The walk is performed in breadth-first order. This method is a + generator. + """ + if self.is_multipart(): + for subpart in self.get_payload(): + for subsubpart in subpart.walk(): + yield subsubpart + else: + yield self + + def get_charsets(self, failobj=None): + """Return a list containing the charset(s) used in this message. + + The returned list of items describes the Content-Type: headers' + charset parameter for this message and all the subparts in its + payload. + + Each item will either be a string (the value of the charset parameter + in the Content-Type: header of that part) or the value of the + 'failobj' parameter (defaults to None), if the part does not have a + main MIME type of "text", or the charset is not defined. + + The list will contain one string for each part of the message, plus + one for the container message (i.e. self), so that a non-multipart + message will still return a list of length 1. + """ + return [part.get_param('charset', failobj) for part in self.walk()] |