summaryrefslogtreecommitdiffstats
path: root/Lib/email/Message.py
diff options
context:
space:
mode:
Diffstat (limited to 'Lib/email/Message.py')
-rw-r--r--Lib/email/Message.py422
1 files changed, 422 insertions, 0 deletions
diff --git a/Lib/email/Message.py b/Lib/email/Message.py
new file mode 100644
index 0000000..35e2dc5
--- /dev/null
+++ b/Lib/email/Message.py
@@ -0,0 +1,422 @@
+# Copyright (C) 2001 Python Software Foundation
+# Author: barry@zope.com (Barry Warsaw)
+
+"""Basic message object for the email package object model.
+"""
+
+from __future__ import generators
+
+import re
+import base64
+import quopri
+from cStringIO import StringIO
+from types import ListType
+
+SEMISPACE = '; '
+
+# Intrapackage imports
+import Errors
+import Utils
+
+
+
+class Message:
+ """Basic message object for use inside the object tree.
+
+ A message object is defined as something that has a bunch of RFC 2822
+ headers and a payload. If the body of the message is a multipart, then
+ the payload is a list of Messages, otherwise it is a string.
+
+ These objects implement part of the `mapping' interface, which assumes
+ there is exactly one occurrance of the header per message. Some headers
+ do in fact appear multiple times (e.g. Received:) and for those headers,
+ you must use the explicit API to set or get all the headers. Not all of
+ the mapping methods are implemented.
+
+ """
+ def __init__(self):
+ self._headers = []
+ self._unixfrom = None
+ self._payload = None
+ # Defaults for multipart messages
+ self.preamble = self.epilogue = None
+
+ def __str__(self):
+ """Return the entire formatted message as a string.
+ This includes the headers, body, and `unixfrom' line.
+ """
+ return self.as_string(unixfrom=1)
+
+ def as_string(self, unixfrom=0):
+ """Return the entire formatted message as a string.
+ Optional `unixfrom' when true, means include the Unix From_ envelope
+ header.
+ """
+ from Generator import Generator
+ fp = StringIO()
+ g = Generator(fp)
+ g(self, unixfrom=unixfrom)
+ return fp.getvalue()
+
+ def is_multipart(self):
+ """Return true if the message consists of multiple parts."""
+ if type(self._payload) is ListType:
+ return 1
+ return 0
+
+ #
+ # Unix From_ line
+ #
+ def set_unixfrom(self, unixfrom):
+ self._unixfrom = unixfrom
+
+ def get_unixfrom(self):
+ return self._unixfrom
+
+ #
+ # Payload manipulation.
+ #
+ def add_payload(self, payload):
+ """Add the given payload to the current payload.
+
+ If the current payload is empty, then the current payload will be made
+ a scalar, set to the given value.
+ """
+ if self._payload is None:
+ self._payload = payload
+ elif type(self._payload) is ListType:
+ self._payload.append(payload)
+ elif self.get_main_type() not in (None, 'multipart'):
+ raise Errors.MultipartConversionError(
+ 'Message main Content-Type: must be "multipart" or missing')
+ else:
+ self._payload = [self._payload, payload]
+
+ # A useful synonym
+ attach = add_payload
+
+ def get_payload(self, i=None, decode=0):
+ """Return the current payload exactly as is.
+
+ Optional i returns that index into the payload.
+
+ Optional decode is a flag indicating whether the payload should be
+ decoded or not, according to the Content-Transfer-Encoding: header.
+ When true and the message is not a multipart, the payload will be
+ decoded if this header's value is `quoted-printable' or `base64'. If
+ some other encoding is used, or the header is missing, the payload is
+ returned as-is (undecoded). If the message is a multipart and the
+ decode flag is true, then None is returned.
+ """
+ if i is None:
+ payload = self._payload
+ elif type(self._payload) is not ListType:
+ raise TypeError, i
+ else:
+ payload = self._payload[i]
+ if decode:
+ if self.is_multipart():
+ return None
+ cte = self.get('content-transfer-encoding', '')
+ if cte.lower() == 'quoted-printable':
+ return Utils._qdecode(payload)
+ elif cte.lower() == 'base64':
+ return Utils._bdecode(payload)
+ # Everything else, including encodings with 8bit or 7bit are returned
+ # unchanged.
+ return payload
+
+
+ def set_payload(self, payload):
+ """Set the payload to the given value."""
+ self._payload = payload
+
+ #
+ # MAPPING INTERFACE (partial)
+ #
+ def __len__(self):
+ """Get the total number of headers, including duplicates."""
+ return len(self._headers)
+
+ def __getitem__(self, name):
+ """Get a header value.
+
+ Return None if the header is missing instead of raising an exception.
+
+ Note that if the header appeared multiple times, exactly which
+ occurrance gets returned is undefined. Use getall() to get all
+ the values matching a header field name.
+ """
+ return self.get(name)
+
+ def __setitem__(self, name, val):
+ """Set the value of a header.
+
+ Note: this does not overwrite an existing header with the same field
+ name. Use __delitem__() first to delete any existing headers.
+ """
+ self._headers.append((name, val))
+
+ def __delitem__(self, name):
+ """Delete all occurrences of a header, if present.
+
+ Does not raise an exception if the header is missing.
+ """
+ name = name.lower()
+ newheaders = []
+ for k, v in self._headers:
+ if k.lower() <> name:
+ newheaders.append((k, v))
+ self._headers = newheaders
+
+ def __contains__(self, key):
+ return key.lower() in [k.lower() for k, v in self._headers]
+
+ def has_key(self, name):
+ """Return true if the message contains the header."""
+ return self[name] <> None
+
+ def keys(self):
+ """Return a list of all the message's header field names.
+
+ These will be sorted in the order they appeared in the original
+ message, and may contain duplicates. Any fields deleted and
+ re-inserted are always appended to the header list.
+ """
+ return [k for k, v in self._headers]
+
+ def values(self):
+ """Return a list of all the message's header values.
+
+ These will be sorted in the order they appeared in the original
+ message, and may contain duplicates. Any fields deleted and
+ re-inserted are alwyas appended to the header list.
+ """
+ return [v for k, v in self._headers]
+
+ def items(self):
+ """Get all the message's header fields and values.
+
+ These will be sorted in the order they appeared in the original
+ message, and may contain duplicates. Any fields deleted and
+ re-inserted are alwyas appended to the header list.
+ """
+ return self._headers[:]
+
+ def get(self, name, failobj=None):
+ """Get a header value.
+
+ Like __getitem__() but return failobj instead of None when the field
+ is missing.
+ """
+ name = name.lower()
+ for k, v in self._headers:
+ if k.lower() == name:
+ return v
+ return failobj
+
+ #
+ # Additional useful stuff
+ #
+
+ def get_all(self, name, failobj=None):
+ """Return a list of all the values for the named field.
+
+ These will be sorted in the order they appeared in the original
+ message, and may contain duplicates. Any fields deleted and
+ re-inserted are alwyas appended to the header list.
+ """
+ values = []
+ name = name.lower()
+ for k, v in self._headers:
+ if k.lower() == name:
+ values.append(v)
+ return values
+
+ def add_header(self, _name, _value, **_params):
+ """Extended header setting.
+
+ name is the header field to add. keyword arguments can be used to set
+ additional parameters for the header field, with underscores converted
+ to dashes. Normally the parameter will be added as key="value" unless
+ value is None, in which case only the key will be added.
+
+ Example:
+
+ msg.add_header('content-disposition', 'attachment', filename='bud.gif')
+
+ """
+ parts = []
+ for k, v in _params.items():
+ if v is None:
+ parts.append(k.replace('_', '-'))
+ else:
+ parts.append('%s="%s"' % (k.replace('_', '-'), v))
+ if _value is not None:
+ parts.insert(0, _value)
+ self._headers.append((_name, SEMISPACE.join(parts)))
+
+ def get_type(self, failobj=None):
+ """Returns the message's content type.
+
+ The returned string is coerced to lowercase and returned as a single
+ string of the form `maintype/subtype'. If there was no Content-Type:
+ header in the message, failobj is returned (defaults to None).
+ """
+ missing = []
+ value = self.get('content-type', missing)
+ if value is missing:
+ return failobj
+ return re.split(r';\s+', value)[0].lower()
+
+ def get_main_type(self, failobj=None):
+ """Return the message's main content type if present."""
+ missing = []
+ ctype = self.get_type(missing)
+ if ctype is missing:
+ return failobj
+ parts = ctype.split('/')
+ if len(parts) > 0:
+ return ctype.split('/')[0]
+ return failobj
+
+ def get_subtype(self, failobj=None):
+ """Return the message's content subtype if present."""
+ missing = []
+ ctype = self.get_type(missing)
+ if ctype is missing:
+ return failobj
+ parts = ctype.split('/')
+ if len(parts) > 1:
+ return ctype.split('/')[1]
+ return failobj
+
+ def get_params(self, failobj=None, header='content-type'):
+ """Return the message's Content-Type: parameters, as a list.
+
+ Optional failobj is the object to return if there is no Content-Type:
+ header. Optional header is the header to search instead of
+ Content-Type:
+ """
+ missing = []
+ value = self.get(header, missing)
+ if value is missing:
+ return failobj
+ return re.split(r';\s+', value)[1:]
+
+ def get_param(self, param, failobj=None, header='content-type'):
+ """Return the parameter value if found in the Content-Type: header.
+
+ Optional failobj is the object to return if there is no Content-Type:
+ header. Optional header is the header to search instead of
+ Content-Type:
+ """
+ param = param.lower()
+ missing = []
+ params = self.get_params(missing, header=header)
+ if params is missing:
+ return failobj
+ for p in params:
+ try:
+ name, val = p.split('=', 1)
+ except ValueError:
+ # Must have been a bare attribute
+ name = p
+ val = ''
+ if name.lower() == param:
+ return Utils.unquote(val)
+ return failobj
+
+ def get_filename(self, failobj=None):
+ """Return the filename associated with the payload if present.
+
+ The filename is extracted from the Content-Disposition: header's
+ `filename' parameter, and it is unquoted.
+ """
+ missing = []
+ filename = self.get_param('filename', missing, 'content-disposition')
+ if filename is missing:
+ return failobj
+ return Utils.unquote(filename.strip())
+
+ def get_boundary(self, failobj=None):
+ """Return the boundary associated with the payload if present.
+
+ The boundary is extracted from the Content-Type: header's `boundary'
+ parameter, and it is unquoted.
+ """
+ missing = []
+ boundary = self.get_param('boundary', missing)
+ if boundary is missing:
+ return failobj
+ return Utils.unquote(boundary.strip())
+
+ def set_boundary(self, boundary):
+ """Set the boundary parameter in Content-Type: to 'boundary'.
+
+ This is subtly different than deleting the Content-Type: header and
+ adding a new one with a new boundary parameter via add_header(). The
+ main difference is that using the set_boundary() method preserves the
+ order of the Content-Type: header in the original message.
+
+ HeaderParseError is raised if the message has no Content-Type: header.
+ """
+ params = self.get_params()
+ if not params:
+ # There was no Content-Type: header, and we don't know what type
+ # to set it to, so raise an exception.
+ raise Errors.HeaderParseError, 'No Content-Type: header found'
+ newparams = []
+ foundp = 0
+ for p in params:
+ if p.lower().startswith('boundary='):
+ newparams.append('boundary="%s"' % boundary)
+ foundp = 1
+ else:
+ newparams.append(p)
+ if not foundp:
+ # The original Content-Type: header had no boundary attribute.
+ # Tack one one the end. BAW: should we raise an exception
+ # instead???
+ newparams.append('boundary="%s"' % boundary)
+ # Replace the existing Content-Type: header with the new value
+ newheaders = []
+ for h, v in self._headers:
+ if h.lower() == 'content-type':
+ value = v.split(';', 1)[0]
+ newparams.insert(0, value)
+ newheaders.append((h, SEMISPACE.join(newparams)))
+ else:
+ newheaders.append((h, v))
+ self._headers = newheaders
+
+ def walk(self):
+ """Walk over the message tree, yielding each subpart.
+
+ The walk is performed in breadth-first order. This method is a
+ generator.
+ """
+ if self.is_multipart():
+ for subpart in self.get_payload():
+ for subsubpart in subpart.walk():
+ yield subsubpart
+ else:
+ yield self
+
+ def get_charsets(self, failobj=None):
+ """Return a list containing the charset(s) used in this message.
+
+ The returned list of items describes the Content-Type: headers'
+ charset parameter for this message and all the subparts in its
+ payload.
+
+ Each item will either be a string (the value of the charset parameter
+ in the Content-Type: header of that part) or the value of the
+ 'failobj' parameter (defaults to None), if the part does not have a
+ main MIME type of "text", or the charset is not defined.
+
+ The list will contain one string for each part of the message, plus
+ one for the container message (i.e. self), so that a non-multipart
+ message will still return a list of length 1.
+ """
+ return [part.get_param('charset', failobj) for part in self.walk()]