summaryrefslogtreecommitdiffstats
path: root/Lib/email
diff options
context:
space:
mode:
authorBarry Warsaw <barry@python.org>2001-09-23 03:17:28 (GMT)
committerBarry Warsaw <barry@python.org>2001-09-23 03:17:28 (GMT)
commitba92580f01b47ba1468c382961ed5122654c2520 (patch)
tree413464c274da1a93dc99d0a1cf13baf9a99c3220 /Lib/email
parentd61d0d3f6dbd960a761c05ff7fea848cb6490aa3 (diff)
downloadcpython-ba92580f01b47ba1468c382961ed5122654c2520.zip
cpython-ba92580f01b47ba1468c382961ed5122654c2520.tar.gz
cpython-ba92580f01b47ba1468c382961ed5122654c2520.tar.bz2
The email package version 1.0, prototyped as mimelib
<http://sf.net/projects/mimelib>. There /are/ API differences between mimelib and email, but most of the implementations are shared (except where cool Py2.2 stuff like generators are used).
Diffstat (limited to 'Lib/email')
-rw-r--r--Lib/email/Encoders.py68
-rw-r--r--Lib/email/Errors.py26
-rw-r--r--Lib/email/Generator.py326
-rw-r--r--Lib/email/Image.py46
-rw-r--r--Lib/email/Iterators.py33
-rw-r--r--Lib/email/MIMEBase.py24
-rw-r--r--Lib/email/Message.py422
-rw-r--r--Lib/email/MessageRFC822.py24
-rw-r--r--Lib/email/Parser.py154
-rw-r--r--Lib/email/Text.py41
-rw-r--r--Lib/email/Utils.py104
-rw-r--r--Lib/email/__init__.py34
12 files changed, 1302 insertions, 0 deletions
diff --git a/Lib/email/Encoders.py b/Lib/email/Encoders.py
new file mode 100644
index 0000000..36db370
--- /dev/null
+++ b/Lib/email/Encoders.py
@@ -0,0 +1,68 @@
+# Copyright (C) 2001 Python Software Foundation
+# Author: barry@zope.com (Barry Warsaw)
+
+"""Module containing encoding functions for Image.Image and Text.Text.
+"""
+
+import base64
+from quopri import encodestring as _encodestring
+
+
+
+# Helpers
+def _qencode(s):
+ return _encodestring(s, quotetabs=1)
+
+def _bencode(s):
+ # We can't quite use base64.encodestring() since it tacks on a "courtesy
+ # newline". Blech!
+ if not s:
+ return s
+ hasnewline = (s[-1] == '\n')
+ value = base64.encodestring(s)
+ if not hasnewline and value[-1] == '\n':
+ return value[:-1]
+ return value
+
+
+
+def encode_base64(msg):
+ """Encode the message's payload in Base64.
+
+ Also, add an appropriate Content-Transfer-Encoding: header.
+ """
+ orig = msg.get_payload()
+ encdata = _bencode(orig)
+ msg.set_payload(encdata)
+ msg['Content-Transfer-Encoding'] = 'base64'
+
+
+
+def encode_quopri(msg):
+ """Encode the message's payload in Quoted-Printable.
+
+ Also, add an appropriate Content-Transfer-Encoding: header.
+ """
+ orig = msg.get_payload()
+ encdata = _qencode(orig)
+ msg.set_payload(encdata)
+ msg['Content-Transfer-Encoding'] = 'quoted-printable'
+
+
+
+def encode_7or8bit(msg):
+ """Set the Content-Transfer-Encoding: header to 7bit or 8bit."""
+ orig = msg.get_payload()
+ # We play a trick to make this go fast. If encoding to ASCII succeeds, we
+ # know the data must be 7bit, otherwise treat it as 8bit.
+ try:
+ orig.encode('ascii')
+ except UnicodeError:
+ msg['Content-Transfer-Encoding'] = '8bit'
+ else:
+ msg['Content-Transfer-Encoding'] = '7bit'
+
+
+
+def encode_noop(msg):
+ """Do nothing."""
diff --git a/Lib/email/Errors.py b/Lib/email/Errors.py
new file mode 100644
index 0000000..71d7663
--- /dev/null
+++ b/Lib/email/Errors.py
@@ -0,0 +1,26 @@
+# Copyright (C) 2001 Python Software Foundation
+# Author: barry@zope.com (Barry Warsaw)
+
+"""email package exception classes.
+"""
+
+
+
+class MessageError(Exception):
+ """Base class for errors in this module."""
+
+
+class MessageParseError(MessageError):
+ """Base class for message parsing errors."""
+
+
+class HeaderParseError(MessageParseError):
+ """Error while parsing headers."""
+
+
+class BoundaryError(MessageParseError):
+ """Couldn't find terminating boundary."""
+
+
+class MultipartConversionError(MessageError, TypeError):
+ """Conversion to a multipart is prohibited."""
diff --git a/Lib/email/Generator.py b/Lib/email/Generator.py
new file mode 100644
index 0000000..ca9757f
--- /dev/null
+++ b/Lib/email/Generator.py
@@ -0,0 +1,326 @@
+# Copyright (C) 2001 Python Software Foundation
+# Author: barry@zope.com (Barry Warsaw)
+
+"""Classes to generate plain text from a message object tree.
+"""
+
+import time
+import re
+import random
+
+from types import ListType, StringType
+from cStringIO import StringIO
+
+# Intrapackage imports
+import Message
+import Errors
+
+SEMISPACE = '; '
+BAR = '|'
+UNDERSCORE = '_'
+NL = '\n'
+SEMINLTAB = ';\n\t'
+SPACE8 = ' ' * 8
+
+fcre = re.compile(r'^From ', re.MULTILINE)
+
+
+
+class Generator:
+ """Generates output from a Message object tree.
+
+ This basic generator writes the message to the given file object as plain
+ text.
+ """
+ #
+ # Public interface
+ #
+
+ def __init__(self, outfp, mangle_from_=1, maxheaderlen=78):
+ """Create the generator for message flattening.
+
+ outfp is the output file-like object for writing the message to. It
+ must have a write() method.
+
+ Optional mangle_from_ is a flag that, when true, escapes From_ lines
+ in the body of the message by putting a `>' in front of them.
+
+ Optional maxheaderlen specifies the longest length for a non-continued
+ header. When a header line is longer (in characters, with tabs
+ expanded to 8 spaces), than maxheaderlen, the header will be broken on
+ semicolons and continued as per RFC 2822. If no semicolon is found,
+ then the header is left alone. Set to zero to disable wrapping
+ headers. Default is 78, as recommended (but not required by RFC
+ 2822.
+ """
+ self._fp = outfp
+ self._mangle_from_ = mangle_from_
+ self.__first = 1
+ self.__maxheaderlen = maxheaderlen
+
+ def write(self, s):
+ # Just delegate to the file object
+ self._fp.write(s)
+
+ def __call__(self, msg, unixfrom=0):
+ """Print the message object tree rooted at msg to the output file
+ specified when the Generator instance was created.
+
+ unixfrom is a flag that forces the printing of a Unix From_ delimiter
+ before the first object in the message tree. If the original message
+ has no From_ delimiter, a `standard' one is crafted. By default, this
+ is 0 to inhibit the printing of any From_ delimiter.
+
+ Note that for subobjects, no From_ line is printed.
+ """
+ if unixfrom:
+ ufrom = msg.get_unixfrom()
+ if not ufrom:
+ ufrom = 'From nobody ' + time.ctime(time.time())
+ print >> self._fp, ufrom
+ self._write(msg)
+
+ #
+ # Protected interface - undocumented ;/
+ #
+
+ def _write(self, msg):
+ # We can't write the headers yet because of the following scenario:
+ # say a multipart message includes the boundary string somewhere in
+ # its body. We'd have to calculate the new boundary /before/ we write
+ # the headers so that we can write the correct Content-Type:
+ # parameter.
+ #
+ # The way we do this, so as to make the _handle_*() methods simpler,
+ # is to cache any subpart writes into a StringIO. The we write the
+ # headers and the StringIO contents. That way, subpart handlers can
+ # Do The Right Thing, and can still modify the Content-Type: header if
+ # necessary.
+ oldfp = self._fp
+ try:
+ self._fp = sfp = StringIO()
+ self._dispatch(msg)
+ finally:
+ self._fp = oldfp
+ # Write the headers. First we see if the message object wants to
+ # handle that itself. If not, we'll do it generically.
+ meth = getattr(msg, '_write_headers', None)
+ if meth is None:
+ self._write_headers(msg)
+ else:
+ meth(self)
+ self._fp.write(sfp.getvalue())
+
+ def _dispatch(self, msg):
+ # Get the Content-Type: for the message, then try to dispatch to
+ # self._handle_maintype_subtype(). If there's no handler for the full
+ # MIME type, then dispatch to self._handle_maintype(). If that's
+ # missing too, then dispatch to self._writeBody().
+ ctype = msg.get_type()
+ if ctype is None:
+ # No Content-Type: header so try the default handler
+ self._writeBody(msg)
+ else:
+ # We do have a Content-Type: header.
+ specific = UNDERSCORE.join(ctype.split('/')).replace('-', '_')
+ meth = getattr(self, '_handle_' + specific, None)
+ if meth is None:
+ generic = msg.get_main_type().replace('-', '_')
+ meth = getattr(self, '_handle_' + generic, None)
+ if meth is None:
+ meth = self._writeBody
+ meth(msg)
+
+ #
+ # Default handlers
+ #
+
+ def _write_headers(self, msg):
+ for h, v in msg.items():
+ # We only write the MIME-Version: header for the outermost
+ # container message. Unfortunately, we can't use same technique
+ # as for the Unix-From above because we don't know when
+ # MIME-Version: will occur.
+ if h.lower() == 'mime-version' and not self.__first:
+ continue
+ # RFC 2822 says that lines SHOULD be no more than maxheaderlen
+ # characters wide, so we're well within our rights to split long
+ # headers.
+ text = '%s: %s' % (h, v)
+ if self.__maxheaderlen > 0 and len(text) > self.__maxheaderlen:
+ text = self._split_header(text)
+ print >> self._fp, text
+ # A blank line always separates headers from body
+ print >> self._fp
+
+ def _split_header(self, text):
+ maxheaderlen = self.__maxheaderlen
+ # Find out whether any lines in the header are really longer than
+ # maxheaderlen characters wide. There could be continuation lines
+ # that actually shorten it. Also, replace hard tabs with 8 spaces.
+ lines = [s.replace('\t', SPACE8) for s in text.split('\n')]
+ for line in lines:
+ if len(line) > maxheaderlen:
+ break
+ else:
+ # No line was actually longer than maxheaderlen characters, so
+ # just return the original unchanged.
+ return text
+ rtn = []
+ for line in text.split('\n'):
+ # Short lines can remain unchanged
+ if len(line.replace('\t', SPACE8)) <= maxheaderlen:
+ rtn.append(line)
+ else:
+ # Try to break the line on semicolons, but if that doesn't
+ # work, then just leave it alone.
+ while len(text) > maxheaderlen:
+ i = text.rfind(';', 0, maxheaderlen)
+ if i < 0:
+ rtn.append(text)
+ break
+ rtn.append(text[:i])
+ text = text[i+1:].lstrip()
+ rtn.append(text)
+ return SEMINLTAB.join(rtn)
+
+ #
+ # Handlers for writing types and subtypes
+ #
+
+ def _handle_text(self, msg):
+ payload = msg.get_payload()
+ if not isinstance(payload, StringType):
+ raise TypeError, 'string payload expected'
+ if self._mangle_from_:
+ payload = fcre.sub('>From ', payload)
+ self._fp.write(payload)
+
+ # Default body handler
+ _writeBody = _handle_text
+
+ def _handle_multipart(self, msg, isdigest=0):
+ # The trick here is to write out each part separately, merge them all
+ # together, and then make sure that the boundary we've chosen isn't
+ # present in the payload.
+ msgtexts = []
+ for part in msg.get_payload():
+ s = StringIO()
+ g = self.__class__(s)
+ g(part, unixfrom=0)
+ msgtexts.append(s.getvalue())
+ # Now make sure the boundary we've selected doesn't appear in any of
+ # the message texts.
+ alltext = NL.join(msgtexts)
+ # BAW: What about boundaries that are wrapped in double-quotes?
+ boundary = msg.get_boundary(failobj=_make_boundary(alltext))
+ # If we had to calculate a new boundary because the body text
+ # contained that string, set the new boundary. We don't do it
+ # unconditionally because, while set_boundary() preserves order, it
+ # doesn't preserve newlines/continuations in headers. This is no big
+ # deal in practice, but turns out to be inconvenient for the unittest
+ # suite.
+ if msg.get_boundary() <> boundary:
+ msg.set_boundary(boundary)
+ # Write out any preamble
+ if msg.preamble is not None:
+ self._fp.write(msg.preamble)
+ # First boundary is a bit different; it doesn't have a leading extra
+ # newline.
+ print >> self._fp, '--' + boundary
+ if isdigest:
+ print >> self._fp
+ # Join and write the individual parts
+ joiner = '\n--' + boundary + '\n'
+ if isdigest:
+ # multipart/digest types effectively add an extra newline between
+ # the boundary and the body part.
+ joiner += '\n'
+ self._fp.write(joiner.join(msgtexts))
+ print >> self._fp, '\n--' + boundary + '--',
+ # Write out any epilogue
+ if msg.epilogue is not None:
+ self._fp.write(msg.epilogue)
+
+ def _handle_multipart_digest(self, msg):
+ self._handle_multipart(msg, isdigest=1)
+
+ def _handle_message_rfc822(self, msg):
+ s = StringIO()
+ g = self.__class__(s)
+ # A message/rfc822 should contain a scalar payload which is another
+ # Message object. Extract that object, stringify it, and write that
+ # out.
+ g(msg.get_payload(), unixfrom=0)
+ self._fp.write(s.getvalue())
+
+
+
+class DecodedGenerator(Generator):
+ """Generator a text representation of a message.
+
+ Like the Generator base class, except that non-text parts are substituted
+ with a format string representing the part.
+ """
+ def __init__(self, outfp, mangle_from_=1, maxheaderlen=78, fmt=None):
+ """Like Generator.__init__() except that an additional optional
+ argument is allowed.
+
+ Walks through all subparts of a message. If the subpart is of main
+ type `text', then it prints the decoded payload of the subpart.
+
+ Otherwise, fmt is a format string that is used instead of the message
+ payload. fmt is expanded with the following keywords (in
+ %(keyword)s format):
+
+ type : Full MIME type of the non-text part
+ maintype : Main MIME type of the non-text part
+ subtype : Sub-MIME type of the non-text part
+ filename : Filename of the non-text part
+ description: Description associated with the non-text part
+ encoding : Content transfer encoding of the non-text part
+
+ The default value for fmt is None, meaning
+
+ [Non-text (%(type)s) part of message omitted, filename %(filename)s]
+ """
+ Generator.__init__(self, outfp, mangle_from_, maxheaderlen)
+ if fmt is None:
+ fmt = ('[Non-text (%(type)s) part of message omitted, '
+ 'filename %(filename)s]')
+ self._fmt = fmt
+
+ def _dispatch(self, msg):
+ for part in msg.walk():
+ if part.get_main_type('text') == 'text':
+ print >> self, part.get_payload(decode=1)
+ else:
+ print >> self, self._fmt % {
+ 'type' : part.get_type('[no MIME type]'),
+ 'maintype' : part.get_main_type('[no main MIME type]'),
+ 'subtype' : part.get_subtype('[no sub-MIME type]'),
+ 'filename' : part.get_filename('[no filename]'),
+ 'description': part.get('Content-Description',
+ '[no description]'),
+ 'encoding' : part.get('Content-Transfer-Encoding',
+ '[no encoding]'),
+ }
+
+
+
+# Helper
+def _make_boundary(self, text=None):
+ # Craft a random boundary. If text is given, ensure that the chosen
+ # boundary doesn't appear in the text.
+ boundary = ('=' * 15) + repr(random.random()).split('.')[1] + '=='
+ if text is None:
+ return boundary
+ b = boundary
+ counter = 0
+ while 1:
+ cre = re.compile('^--' + re.escape(b) + '(--)?$', re.MULTILINE)
+ if not cre.search(text):
+ break
+ b = boundary + '.' + str(counter)
+ counter += 1
+ return b
diff --git a/Lib/email/Image.py b/Lib/email/Image.py
new file mode 100644
index 0000000..d350785
--- /dev/null
+++ b/Lib/email/Image.py
@@ -0,0 +1,46 @@
+# Copyright (C) 2001 Python Software Foundation
+# Author: barry@zope.com (Barry Warsaw)
+
+"""Class representing image/* type MIME documents.
+"""
+
+import imghdr
+
+# Intrapackage imports
+import MIMEBase
+import Errors
+import Encoders
+
+
+
+class Image(MIMEBase.MIMEBase):
+ """Class for generating image/* type MIME documents."""
+
+ def __init__(self, _imagedata, _minor=None,
+ _encoder=Encoders.encode_base64, **_params):
+ """Create an image/* type MIME document.
+
+ _imagedata is a string containing the raw image data. If this data
+ can be decoded by the standard Python `imghdr' module, then the
+ subtype will be automatically included in the Content-Type: header.
+ Otherwise, you can specify the specific image subtype via the _minor
+ parameter.
+
+ _encoder is a function which will perform the actual encoding for
+ transport of the image data. It takes one argument, which is this
+ Image instance. It should use get_payload() and set_payload() to
+ change the payload to the encoded form. It should also add any
+ Content-Transfer-Encoding: or other headers to the message as
+ necessary. The default encoding is Base64.
+
+ Any additional keyword arguments are passed to the base class
+ constructor, which turns them into parameters on the Content-Type:
+ header.
+ """
+ if _minor is None:
+ _minor = imghdr.what(None, _imagedata)
+ if _minor is None:
+ raise TypeError, 'Could not guess image _minor type'
+ MIMEBase.MIMEBase.__init__(self, 'image', _minor, **_params)
+ self.set_payload(_imagedata)
+ _encoder(self)
diff --git a/Lib/email/Iterators.py b/Lib/email/Iterators.py
new file mode 100644
index 0000000..998530f
--- /dev/null
+++ b/Lib/email/Iterators.py
@@ -0,0 +1,33 @@
+# Copyright (C) 2001 Python Software Foundation
+# Author: barry@zope.com (Barry Warsaw)
+
+"""Various types of useful iterators and generators.
+"""
+
+from __future__ import generators
+from cStringIO import StringIO
+from types import StringType
+
+
+
+def body_line_iterator(msg):
+ """Iterator over the parts, returning the lines in a string payload."""
+ for subpart in msg.walk():
+ payload = subpart.get_payload()
+ if type(payload) is StringType:
+ for line in StringIO(payload):
+ yield line
+
+
+
+def typed_subpart_iterator(msg, major='text', minor=None):
+ """Iterator over the subparts with a given MIME type.
+
+ Use `major' as the main MIME type to match against; this defaults to
+ "text". Optional `minor' is the MIME subtype to match against; if
+ omitted, only the main type is matched.
+ """
+ for subpart in msg.walk():
+ if subpart.get_main_type() == major:
+ if minor is None or subpart.get_subtype() == minor:
+ yield subpart
diff --git a/Lib/email/MIMEBase.py b/Lib/email/MIMEBase.py
new file mode 100644
index 0000000..e6dda56
--- /dev/null
+++ b/Lib/email/MIMEBase.py
@@ -0,0 +1,24 @@
+# Copyright (C) 2001 Python Software Foundation
+# Author: barry@zope.com (Barry Warsaw)
+
+"""Base class for MIME specializations.
+"""
+
+import Message
+
+
+
+class MIMEBase(Message.Message):
+ """Base class for MIME specializations."""
+
+ def __init__(self, _major, _minor, **_params):
+ """This constructor adds a Content-Type: and a MIME-Version: header.
+
+ The Content-Type: header is taken from the _major and _minor
+ arguments. Additional parameters for this header are taken from the
+ keyword arguments.
+ """
+ Message.Message.__init__(self)
+ ctype = '%s/%s' % (_major, _minor)
+ self.add_header('Content-Type', ctype, **_params)
+ self['MIME-Version'] = '1.0'
diff --git a/Lib/email/Message.py b/Lib/email/Message.py
new file mode 100644
index 0000000..35e2dc5
--- /dev/null
+++ b/Lib/email/Message.py
@@ -0,0 +1,422 @@
+# Copyright (C) 2001 Python Software Foundation
+# Author: barry@zope.com (Barry Warsaw)
+
+"""Basic message object for the email package object model.
+"""
+
+from __future__ import generators
+
+import re
+import base64
+import quopri
+from cStringIO import StringIO
+from types import ListType
+
+SEMISPACE = '; '
+
+# Intrapackage imports
+import Errors
+import Utils
+
+
+
+class Message:
+ """Basic message object for use inside the object tree.
+
+ A message object is defined as something that has a bunch of RFC 2822
+ headers and a payload. If the body of the message is a multipart, then
+ the payload is a list of Messages, otherwise it is a string.
+
+ These objects implement part of the `mapping' interface, which assumes
+ there is exactly one occurrance of the header per message. Some headers
+ do in fact appear multiple times (e.g. Received:) and for those headers,
+ you must use the explicit API to set or get all the headers. Not all of
+ the mapping methods are implemented.
+
+ """
+ def __init__(self):
+ self._headers = []
+ self._unixfrom = None
+ self._payload = None
+ # Defaults for multipart messages
+ self.preamble = self.epilogue = None
+
+ def __str__(self):
+ """Return the entire formatted message as a string.
+ This includes the headers, body, and `unixfrom' line.
+ """
+ return self.as_string(unixfrom=1)
+
+ def as_string(self, unixfrom=0):
+ """Return the entire formatted message as a string.
+ Optional `unixfrom' when true, means include the Unix From_ envelope
+ header.
+ """
+ from Generator import Generator
+ fp = StringIO()
+ g = Generator(fp)
+ g(self, unixfrom=unixfrom)
+ return fp.getvalue()
+
+ def is_multipart(self):
+ """Return true if the message consists of multiple parts."""
+ if type(self._payload) is ListType:
+ return 1
+ return 0
+
+ #
+ # Unix From_ line
+ #
+ def set_unixfrom(self, unixfrom):
+ self._unixfrom = unixfrom
+
+ def get_unixfrom(self):
+ return self._unixfrom
+
+ #
+ # Payload manipulation.
+ #
+ def add_payload(self, payload):
+ """Add the given payload to the current payload.
+
+ If the current payload is empty, then the current payload will be made
+ a scalar, set to the given value.
+ """
+ if self._payload is None:
+ self._payload = payload
+ elif type(self._payload) is ListType:
+ self._payload.append(payload)
+ elif self.get_main_type() not in (None, 'multipart'):
+ raise Errors.MultipartConversionError(
+ 'Message main Content-Type: must be "multipart" or missing')
+ else:
+ self._payload = [self._payload, payload]
+
+ # A useful synonym
+ attach = add_payload
+
+ def get_payload(self, i=None, decode=0):
+ """Return the current payload exactly as is.
+
+ Optional i returns that index into the payload.
+
+ Optional decode is a flag indicating whether the payload should be
+ decoded or not, according to the Content-Transfer-Encoding: header.
+ When true and the message is not a multipart, the payload will be
+ decoded if this header's value is `quoted-printable' or `base64'. If
+ some other encoding is used, or the header is missing, the payload is
+ returned as-is (undecoded). If the message is a multipart and the
+ decode flag is true, then None is returned.
+ """
+ if i is None:
+ payload = self._payload
+ elif type(self._payload) is not ListType:
+ raise TypeError, i
+ else:
+ payload = self._payload[i]
+ if decode:
+ if self.is_multipart():
+ return None
+ cte = self.get('content-transfer-encoding', '')
+ if cte.lower() == 'quoted-printable':
+ return Utils._qdecode(payload)
+ elif cte.lower() == 'base64':
+ return Utils._bdecode(payload)
+ # Everything else, including encodings with 8bit or 7bit are returned
+ # unchanged.
+ return payload
+
+
+ def set_payload(self, payload):
+ """Set the payload to the given value."""
+ self._payload = payload
+
+ #
+ # MAPPING INTERFACE (partial)
+ #
+ def __len__(self):
+ """Get the total number of headers, including duplicates."""
+ return len(self._headers)
+
+ def __getitem__(self, name):
+ """Get a header value.
+
+ Return None if the header is missing instead of raising an exception.
+
+ Note that if the header appeared multiple times, exactly which
+ occurrance gets returned is undefined. Use getall() to get all
+ the values matching a header field name.
+ """
+ return self.get(name)
+
+ def __setitem__(self, name, val):
+ """Set the value of a header.
+
+ Note: this does not overwrite an existing header with the same field
+ name. Use __delitem__() first to delete any existing headers.
+ """
+ self._headers.append((name, val))
+
+ def __delitem__(self, name):
+ """Delete all occurrences of a header, if present.
+
+ Does not raise an exception if the header is missing.
+ """
+ name = name.lower()
+ newheaders = []
+ for k, v in self._headers:
+ if k.lower() <> name:
+ newheaders.append((k, v))
+ self._headers = newheaders
+
+ def __contains__(self, key):
+ return key.lower() in [k.lower() for k, v in self._headers]
+
+ def has_key(self, name):
+ """Return true if the message contains the header."""
+ return self[name] <> None
+
+ def keys(self):
+ """Return a list of all the message's header field names.
+
+ These will be sorted in the order they appeared in the original
+ message, and may contain duplicates. Any fields deleted and
+ re-inserted are always appended to the header list.
+ """
+ return [k for k, v in self._headers]
+
+ def values(self):
+ """Return a list of all the message's header values.
+
+ These will be sorted in the order they appeared in the original
+ message, and may contain duplicates. Any fields deleted and
+ re-inserted are alwyas appended to the header list.
+ """
+ return [v for k, v in self._headers]
+
+ def items(self):
+ """Get all the message's header fields and values.
+
+ These will be sorted in the order they appeared in the original
+ message, and may contain duplicates. Any fields deleted and
+ re-inserted are alwyas appended to the header list.
+ """
+ return self._headers[:]
+
+ def get(self, name, failobj=None):
+ """Get a header value.
+
+ Like __getitem__() but return failobj instead of None when the field
+ is missing.
+ """
+ name = name.lower()
+ for k, v in self._headers:
+ if k.lower() == name:
+ return v
+ return failobj
+
+ #
+ # Additional useful stuff
+ #
+
+ def get_all(self, name, failobj=None):
+ """Return a list of all the values for the named field.
+
+ These will be sorted in the order they appeared in the original
+ message, and may contain duplicates. Any fields deleted and
+ re-inserted are alwyas appended to the header list.
+ """
+ values = []
+ name = name.lower()
+ for k, v in self._headers:
+ if k.lower() == name:
+ values.append(v)
+ return values
+
+ def add_header(self, _name, _value, **_params):
+ """Extended header setting.
+
+ name is the header field to add. keyword arguments can be used to set
+ additional parameters for the header field, with underscores converted
+ to dashes. Normally the parameter will be added as key="value" unless
+ value is None, in which case only the key will be added.
+
+ Example:
+
+ msg.add_header('content-disposition', 'attachment', filename='bud.gif')
+
+ """
+ parts = []
+ for k, v in _params.items():
+ if v is None:
+ parts.append(k.replace('_', '-'))
+ else:
+ parts.append('%s="%s"' % (k.replace('_', '-'), v))
+ if _value is not None:
+ parts.insert(0, _value)
+ self._headers.append((_name, SEMISPACE.join(parts)))
+
+ def get_type(self, failobj=None):
+ """Returns the message's content type.
+
+ The returned string is coerced to lowercase and returned as a single
+ string of the form `maintype/subtype'. If there was no Content-Type:
+ header in the message, failobj is returned (defaults to None).
+ """
+ missing = []
+ value = self.get('content-type', missing)
+ if value is missing:
+ return failobj
+ return re.split(r';\s+', value)[0].lower()
+
+ def get_main_type(self, failobj=None):
+ """Return the message's main content type if present."""
+ missing = []
+ ctype = self.get_type(missing)
+ if ctype is missing:
+ return failobj
+ parts = ctype.split('/')
+ if len(parts) > 0:
+ return ctype.split('/')[0]
+ return failobj
+
+ def get_subtype(self, failobj=None):
+ """Return the message's content subtype if present."""
+ missing = []
+ ctype = self.get_type(missing)
+ if ctype is missing:
+ return failobj
+ parts = ctype.split('/')
+ if len(parts) > 1:
+ return ctype.split('/')[1]
+ return failobj
+
+ def get_params(self, failobj=None, header='content-type'):
+ """Return the message's Content-Type: parameters, as a list.
+
+ Optional failobj is the object to return if there is no Content-Type:
+ header. Optional header is the header to search instead of
+ Content-Type:
+ """
+ missing = []
+ value = self.get(header, missing)
+ if value is missing:
+ return failobj
+ return re.split(r';\s+', value)[1:]
+
+ def get_param(self, param, failobj=None, header='content-type'):
+ """Return the parameter value if found in the Content-Type: header.
+
+ Optional failobj is the object to return if there is no Content-Type:
+ header. Optional header is the header to search instead of
+ Content-Type:
+ """
+ param = param.lower()
+ missing = []
+ params = self.get_params(missing, header=header)
+ if params is missing:
+ return failobj
+ for p in params:
+ try:
+ name, val = p.split('=', 1)
+ except ValueError:
+ # Must have been a bare attribute
+ name = p
+ val = ''
+ if name.lower() == param:
+ return Utils.unquote(val)
+ return failobj
+
+ def get_filename(self, failobj=None):
+ """Return the filename associated with the payload if present.
+
+ The filename is extracted from the Content-Disposition: header's
+ `filename' parameter, and it is unquoted.
+ """
+ missing = []
+ filename = self.get_param('filename', missing, 'content-disposition')
+ if filename is missing:
+ return failobj
+ return Utils.unquote(filename.strip())
+
+ def get_boundary(self, failobj=None):
+ """Return the boundary associated with the payload if present.
+
+ The boundary is extracted from the Content-Type: header's `boundary'
+ parameter, and it is unquoted.
+ """
+ missing = []
+ boundary = self.get_param('boundary', missing)
+ if boundary is missing:
+ return failobj
+ return Utils.unquote(boundary.strip())
+
+ def set_boundary(self, boundary):
+ """Set the boundary parameter in Content-Type: to 'boundary'.
+
+ This is subtly different than deleting the Content-Type: header and
+ adding a new one with a new boundary parameter via add_header(). The
+ main difference is that using the set_boundary() method preserves the
+ order of the Content-Type: header in the original message.
+
+ HeaderParseError is raised if the message has no Content-Type: header.
+ """
+ params = self.get_params()
+ if not params:
+ # There was no Content-Type: header, and we don't know what type
+ # to set it to, so raise an exception.
+ raise Errors.HeaderParseError, 'No Content-Type: header found'
+ newparams = []
+ foundp = 0
+ for p in params:
+ if p.lower().startswith('boundary='):
+ newparams.append('boundary="%s"' % boundary)
+ foundp = 1
+ else:
+ newparams.append(p)
+ if not foundp:
+ # The original Content-Type: header had no boundary attribute.
+ # Tack one one the end. BAW: should we raise an exception
+ # instead???
+ newparams.append('boundary="%s"' % boundary)
+ # Replace the existing Content-Type: header with the new value
+ newheaders = []
+ for h, v in self._headers:
+ if h.lower() == 'content-type':
+ value = v.split(';', 1)[0]
+ newparams.insert(0, value)
+ newheaders.append((h, SEMISPACE.join(newparams)))
+ else:
+ newheaders.append((h, v))
+ self._headers = newheaders
+
+ def walk(self):
+ """Walk over the message tree, yielding each subpart.
+
+ The walk is performed in breadth-first order. This method is a
+ generator.
+ """
+ if self.is_multipart():
+ for subpart in self.get_payload():
+ for subsubpart in subpart.walk():
+ yield subsubpart
+ else:
+ yield self
+
+ def get_charsets(self, failobj=None):
+ """Return a list containing the charset(s) used in this message.
+
+ The returned list of items describes the Content-Type: headers'
+ charset parameter for this message and all the subparts in its
+ payload.
+
+ Each item will either be a string (the value of the charset parameter
+ in the Content-Type: header of that part) or the value of the
+ 'failobj' parameter (defaults to None), if the part does not have a
+ main MIME type of "text", or the charset is not defined.
+
+ The list will contain one string for each part of the message, plus
+ one for the container message (i.e. self), so that a non-multipart
+ message will still return a list of length 1.
+ """
+ return [part.get_param('charset', failobj) for part in self.walk()]
diff --git a/Lib/email/MessageRFC822.py b/Lib/email/MessageRFC822.py
new file mode 100644
index 0000000..81cc4dc
--- /dev/null
+++ b/Lib/email/MessageRFC822.py
@@ -0,0 +1,24 @@
+# Copyright (C) 2001 Python Software Foundation
+# Author: barry@zope.com (Barry Warsaw)
+
+"""Class for generating message/rfc822 MIME documents.
+"""
+
+import Message
+import MIMEBase
+
+
+
+class MessageRFC822(MIMEBase.MIMEBase):
+ """Class for generating message/rfc822 MIME documents."""
+
+ def __init__(self, _msg):
+ """Create a message/rfc822 type MIME document.
+
+ _msg is a message object and must be an instance of Message, or a
+ derived class of Message, otherwise a TypeError is raised.
+ """
+ MIMEBase.MIMEBase.__init__(self, 'message', 'rfc822')
+ if not isinstance(_msg, Message.Message):
+ raise TypeError, 'Argument is not an instance of Message'
+ self.set_payload(_msg)
diff --git a/Lib/email/Parser.py b/Lib/email/Parser.py
new file mode 100644
index 0000000..cc23d19
--- /dev/null
+++ b/Lib/email/Parser.py
@@ -0,0 +1,154 @@
+# Copyright (C) 2001 Python Software Foundation
+# Author: barry@zope.com (Barry Warsaw)
+
+"""A parser of RFC 2822 and MIME email messages.
+"""
+
+import re
+from cStringIO import StringIO
+
+# Intrapackage imports
+import Errors
+import Message
+
+bcre = re.compile('boundary="?([^"]+)"?', re.IGNORECASE)
+EMPTYSTRING = ''
+NL = '\n'
+
+
+
+class Parser:
+ def __init__(self, _class=Message.Message):
+ """Parser of RFC 2822 and MIME email messages.
+
+ Creates an in-memory object tree representing the email message, which
+ can then be manipulated and turned over to a Generator to return the
+ textual representation of the message.
+
+ The string must be formatted as a block of RFC 2822 headers and header
+ continuation lines, optionally preceeded by a `Unix-from' header. The
+ header block is terminated either by the end of the string or by a
+ blank line.
+
+ _class is the class to instantiate for new message objects when they
+ must be created. This class must have a constructor that can take
+ zero arguments. Default is Message.Message.
+ """
+ self._class = _class
+
+ def parse(self, fp):
+ root = self._class()
+ self._parseheaders(root, fp)
+ self._parsebody(root, fp)
+ return root
+
+ def parsestr(self, text):
+ return self.parse(StringIO(text))
+
+ def _parseheaders(self, container, fp):
+ # Parse the headers, returning a list of header/value pairs. None as
+ # the header means the Unix-From header.
+ lastheader = ''
+ lastvalue = []
+ lineno = 0
+ while 1:
+ line = fp.readline()[:-1]
+ if not line or not line.strip():
+ break
+ lineno += 1
+ # Check for initial Unix From_ line
+ if line.startswith('From '):
+ if lineno == 1:
+ container.set_unixfrom(line)
+ continue
+ else:
+ raise Errors.HeaderParseError(
+ 'Unix-from in headers after first rfc822 header')
+ #
+ # Header continuation line
+ if line[0] in ' \t':
+ if not lastheader:
+ raise Errors.HeaderParseError(
+ 'Continuation line seen before first header')
+ lastvalue.append(line)
+ continue
+ # Normal, non-continuation header. BAW: this should check to make
+ # sure it's a legal header, e.g. doesn't contain spaces. Also, we
+ # should expose the header matching algorithm in the API, and
+ # allow for a non-strict parsing mode (that ignores the line
+ # instead of raising the exception).
+ i = line.find(':')
+ if i < 0:
+ raise Errors.HeaderParseError(
+ 'Not a header, not a continuation')
+ if lastheader:
+ container[lastheader] = NL.join(lastvalue)
+ lastheader = line[:i]
+ lastvalue = [line[i+1:].lstrip()]
+ # Make sure we retain the last header
+ if lastheader:
+ container[lastheader] = NL.join(lastvalue)
+
+ def _parsebody(self, container, fp):
+ # Parse the body, but first split the payload on the content-type
+ # boundary if present.
+ boundary = isdigest = None
+ ctype = container['content-type']
+ if ctype:
+ mo = bcre.search(ctype)
+ if mo:
+ boundary = mo.group(1)
+ isdigest = container.get_type() == 'multipart/digest'
+ # If there's a boundary, split the payload text into its constituent
+ # parts and parse each separately. Otherwise, just parse the rest of
+ # the body as a single message. Note: any exceptions raised in the
+ # recursive parse need to have their line numbers coerced.
+ if boundary:
+ preamble = epilogue = None
+ # Split into subparts. The first boundary we're looking for won't
+ # have the leading newline since we're at the start of the body
+ # text.
+ separator = '--' + boundary
+ payload = fp.read()
+ start = payload.find(separator)
+ if start < 0:
+ raise Errors.BoundaryError(
+ "Couldn't find starting boundary: %s" % boundary)
+ if start > 0:
+ # there's some pre-MIME boundary preamble
+ preamble = payload[0:start]
+ start += len(separator) + 1 + isdigest
+ terminator = payload.find('\n' + separator + '--', start)
+ if terminator < 0:
+ raise Errors.BoundaryError(
+ "Couldn't find terminating boundary: %s" % boundary)
+ if terminator+len(separator)+3 < len(payload):
+ # there's some post-MIME boundary epilogue
+ epilogue = payload[terminator+len(separator)+3:]
+ # We split the textual payload on the boundary separator, which
+ # includes the trailing newline. If the container is a
+ # multipart/digest then the subparts are by default message/rfc822
+ # instead of text/plain. In that case, they'll have an extra
+ # newline before the headers to distinguish the message's headers
+ # from the subpart headers.
+ if isdigest:
+ separator += '\n\n'
+ else:
+ separator += '\n'
+ parts = payload[start:terminator].split('\n' + separator)
+ for part in parts:
+ msgobj = self.parsestr(part)
+ container.preamble = preamble
+ container.epilogue = epilogue
+ container.add_payload(msgobj)
+ elif ctype == 'message/rfc822':
+ # Create a container for the payload, but watch out for there not
+ # being any headers left
+ try:
+ msg = self.parse(fp)
+ except Errors.HeaderParseError:
+ msg = self._class()
+ self._parsebody(msg, fp)
+ container.add_payload(msg)
+ else:
+ container.add_payload(fp.read())
diff --git a/Lib/email/Text.py b/Lib/email/Text.py
new file mode 100644
index 0000000..5abfd0b
--- /dev/null
+++ b/Lib/email/Text.py
@@ -0,0 +1,41 @@
+# Copyright (C) 2001 Python Software Foundation
+# Author: barry@zope.com (Barry Warsaw)
+
+"""Class representing text/* type MIME documents.
+"""
+
+import MIMEBase
+from Encoders import encode_7or8bit
+
+
+
+class Text(MIMEBase.MIMEBase):
+ """Class for generating text/* type MIME documents."""
+
+ def __init__(self, _text, _minor='plain', _charset='us-ascii',
+ _encoder=encode_7or8bit):
+ """Create a text/* type MIME document.
+
+ _text is the string for this message object. If the text does not end
+ in a newline, one is added.
+
+ _minor is the minor content type, defaulting to "plain".
+
+ _charset is the character set parameter added to the Content-Type:
+ header. This defaults to "us-ascii".
+
+ _encoder is a function which will perform the actual encoding for
+ transport of the text data. It takes one argument, which is this
+ Text instance. It should use get_payload() and set_payload() to
+ change the payload to the encoded form. It should also add any
+ Content-Transfer-Encoding: or other headers to the message as
+ necessary. The default encoding doesn't actually modify the payload,
+ but it does set Content-Transfer-Encoding: to either `7bit' or `8bit'
+ as appropriate.
+ """
+ MIMEBase.MIMEBase.__init__(self, 'text', _minor,
+ **{'charset': _charset})
+ if _text and _text[-1] <> '\n':
+ _text += '\n'
+ self.set_payload(_text)
+ _encoder(self)
diff --git a/Lib/email/Utils.py b/Lib/email/Utils.py
new file mode 100644
index 0000000..6bbf2d3
--- /dev/null
+++ b/Lib/email/Utils.py
@@ -0,0 +1,104 @@
+# Copyright (C) 2001 Python Software Foundation
+# Author: barry@zope.com (Barry Warsaw)
+
+"""Miscellaneous utilities.
+"""
+
+import re
+
+from rfc822 import unquote, quote, parseaddr
+from rfc822 import dump_address_pair
+from rfc822 import AddrlistClass as _AddrlistClass
+from rfc822 import parsedate_tz, parsedate, mktime_tz, formatdate
+
+from quopri import decodestring as _qdecode
+import base64
+
+# Intrapackage imports
+from Encoders import _bencode, _qencode
+
+COMMASPACE = ', '
+UEMPTYSTRING = u''
+
+
+
+# Helpers
+
+def _identity(s):
+ return s
+
+
+def _bdecode(s):
+ if not s:
+ return s
+ # We can't quite use base64.encodestring() since it tacks on a "courtesy
+ # newline". Blech!
+ if not s:
+ return s
+ hasnewline = (s[-1] == '\n')
+ value = base64.decodestring(s)
+ if not hasnewline and value[-1] == '\n':
+ return value[:-1]
+ return value
+
+
+
+def getaddresses(fieldvalues):
+ """Return a list of (REALNAME, EMAIL) for each fieldvalue."""
+ all = COMMASPACE.join(fieldvalues)
+ a = _AddrlistClass(all)
+ return a.getaddrlist()
+
+
+
+ecre = re.compile(r'''
+ =\? # literal =?
+ (?P<charset>[^?]*?) # non-greedy up to the next ? is the charset
+ \? # literal ?
+ (?P<encoding>[qb]) # either a "q" or a "b", case insensitive
+ \? # literal ?
+ (?P<atom>.*?) # non-greedy up to the next ?= is the atom
+ \?= # literal ?=
+ ''', re.VERBOSE | re.IGNORECASE)
+
+
+def decode(s):
+ """Return a decoded string according to RFC 2047, as a unicode string."""
+ rtn = []
+ parts = ecre.split(s, 1)
+ while parts:
+ # If there are less than 4 parts, it can't be encoded and we're done
+ if len(parts) < 5:
+ rtn.extend(parts)
+ break
+ # The first element is any non-encoded leading text
+ rtn.append(parts[0])
+ charset = parts[1]
+ encoding = parts[2]
+ atom = parts[3]
+ # The next chunk to decode should be in parts[4]
+ parts = ecre.split(parts[4])
+ # The encoding must be either `q' or `b', case-insensitive
+ if encoding.lower() == 'q':
+ func = _qdecode
+ elif encoding.lower() == 'b':
+ func = _bdecode
+ else:
+ func = _identity
+ # Decode and get the unicode in the charset
+ rtn.append(unicode(func(atom), charset))
+ # Now that we've decoded everything, we just need to join all the parts
+ # together into the final string.
+ return UEMPTYSTRING.join(rtn)
+
+
+
+def encode(s, charset='iso-8859-1', encoding='q'):
+ """Encode a string according to RFC 2047."""
+ if encoding.lower() == 'q':
+ estr = _qencode(s)
+ elif encoding.lower() == 'b':
+ estr = _bencode(s)
+ else:
+ raise ValueError, 'Illegal encoding code: ' + encoding
+ return '=?%s?%s?%s?=' % (charset.lower(), encoding.lower(), estr)
diff --git a/Lib/email/__init__.py b/Lib/email/__init__.py
new file mode 100644
index 0000000..4995114
--- /dev/null
+++ b/Lib/email/__init__.py
@@ -0,0 +1,34 @@
+# Copyright (C) 2001 Python Software Foundation
+# Author: barry@zope.com (Barry Warsaw)
+
+"""A package for parsing, handling, and generating email messages.
+"""
+
+__version__ = '1.0'
+
+__all__ = ['Encoders',
+ 'Errors',
+ 'Generator',
+ 'Image',
+ 'Iterators',
+ 'MIMEBase',
+ 'Message',
+ 'MessageRFC822',
+ 'Parser',
+ 'Text',
+ 'Utils',
+ 'message_from_string',
+ 'message_from_file',
+ ]
+
+
+
+# Some convenience routines
+from Parser import Parser as _Parser
+from Message import Message as _Message
+
+def message_from_string(s, _class=_Message):
+ return _Parser(_class).parsestr(s)
+
+def message_from_file(fp, _class=_Message):
+ return _Parser(_class).parse(fp)