diff options
author | Barry Warsaw <barry@python.org> | 2001-09-23 03:17:28 (GMT) |
---|---|---|
committer | Barry Warsaw <barry@python.org> | 2001-09-23 03:17:28 (GMT) |
commit | ba92580f01b47ba1468c382961ed5122654c2520 (patch) | |
tree | 413464c274da1a93dc99d0a1cf13baf9a99c3220 /Lib/email | |
parent | d61d0d3f6dbd960a761c05ff7fea848cb6490aa3 (diff) | |
download | cpython-ba92580f01b47ba1468c382961ed5122654c2520.zip cpython-ba92580f01b47ba1468c382961ed5122654c2520.tar.gz cpython-ba92580f01b47ba1468c382961ed5122654c2520.tar.bz2 |
The email package version 1.0, prototyped as mimelib
<http://sf.net/projects/mimelib>. There /are/ API differences between
mimelib and email, but most of the implementations are shared (except
where cool Py2.2 stuff like generators are used).
Diffstat (limited to 'Lib/email')
-rw-r--r-- | Lib/email/Encoders.py | 68 | ||||
-rw-r--r-- | Lib/email/Errors.py | 26 | ||||
-rw-r--r-- | Lib/email/Generator.py | 326 | ||||
-rw-r--r-- | Lib/email/Image.py | 46 | ||||
-rw-r--r-- | Lib/email/Iterators.py | 33 | ||||
-rw-r--r-- | Lib/email/MIMEBase.py | 24 | ||||
-rw-r--r-- | Lib/email/Message.py | 422 | ||||
-rw-r--r-- | Lib/email/MessageRFC822.py | 24 | ||||
-rw-r--r-- | Lib/email/Parser.py | 154 | ||||
-rw-r--r-- | Lib/email/Text.py | 41 | ||||
-rw-r--r-- | Lib/email/Utils.py | 104 | ||||
-rw-r--r-- | Lib/email/__init__.py | 34 |
12 files changed, 1302 insertions, 0 deletions
diff --git a/Lib/email/Encoders.py b/Lib/email/Encoders.py new file mode 100644 index 0000000..36db370 --- /dev/null +++ b/Lib/email/Encoders.py @@ -0,0 +1,68 @@ +# Copyright (C) 2001 Python Software Foundation +# Author: barry@zope.com (Barry Warsaw) + +"""Module containing encoding functions for Image.Image and Text.Text. +""" + +import base64 +from quopri import encodestring as _encodestring + + + +# Helpers +def _qencode(s): + return _encodestring(s, quotetabs=1) + +def _bencode(s): + # We can't quite use base64.encodestring() since it tacks on a "courtesy + # newline". Blech! + if not s: + return s + hasnewline = (s[-1] == '\n') + value = base64.encodestring(s) + if not hasnewline and value[-1] == '\n': + return value[:-1] + return value + + + +def encode_base64(msg): + """Encode the message's payload in Base64. + + Also, add an appropriate Content-Transfer-Encoding: header. + """ + orig = msg.get_payload() + encdata = _bencode(orig) + msg.set_payload(encdata) + msg['Content-Transfer-Encoding'] = 'base64' + + + +def encode_quopri(msg): + """Encode the message's payload in Quoted-Printable. + + Also, add an appropriate Content-Transfer-Encoding: header. + """ + orig = msg.get_payload() + encdata = _qencode(orig) + msg.set_payload(encdata) + msg['Content-Transfer-Encoding'] = 'quoted-printable' + + + +def encode_7or8bit(msg): + """Set the Content-Transfer-Encoding: header to 7bit or 8bit.""" + orig = msg.get_payload() + # We play a trick to make this go fast. If encoding to ASCII succeeds, we + # know the data must be 7bit, otherwise treat it as 8bit. + try: + orig.encode('ascii') + except UnicodeError: + msg['Content-Transfer-Encoding'] = '8bit' + else: + msg['Content-Transfer-Encoding'] = '7bit' + + + +def encode_noop(msg): + """Do nothing.""" diff --git a/Lib/email/Errors.py b/Lib/email/Errors.py new file mode 100644 index 0000000..71d7663 --- /dev/null +++ b/Lib/email/Errors.py @@ -0,0 +1,26 @@ +# Copyright (C) 2001 Python Software Foundation +# Author: barry@zope.com (Barry Warsaw) + +"""email package exception classes. +""" + + + +class MessageError(Exception): + """Base class for errors in this module.""" + + +class MessageParseError(MessageError): + """Base class for message parsing errors.""" + + +class HeaderParseError(MessageParseError): + """Error while parsing headers.""" + + +class BoundaryError(MessageParseError): + """Couldn't find terminating boundary.""" + + +class MultipartConversionError(MessageError, TypeError): + """Conversion to a multipart is prohibited.""" diff --git a/Lib/email/Generator.py b/Lib/email/Generator.py new file mode 100644 index 0000000..ca9757f --- /dev/null +++ b/Lib/email/Generator.py @@ -0,0 +1,326 @@ +# Copyright (C) 2001 Python Software Foundation +# Author: barry@zope.com (Barry Warsaw) + +"""Classes to generate plain text from a message object tree. +""" + +import time +import re +import random + +from types import ListType, StringType +from cStringIO import StringIO + +# Intrapackage imports +import Message +import Errors + +SEMISPACE = '; ' +BAR = '|' +UNDERSCORE = '_' +NL = '\n' +SEMINLTAB = ';\n\t' +SPACE8 = ' ' * 8 + +fcre = re.compile(r'^From ', re.MULTILINE) + + + +class Generator: + """Generates output from a Message object tree. + + This basic generator writes the message to the given file object as plain + text. + """ + # + # Public interface + # + + def __init__(self, outfp, mangle_from_=1, maxheaderlen=78): + """Create the generator for message flattening. + + outfp is the output file-like object for writing the message to. It + must have a write() method. + + Optional mangle_from_ is a flag that, when true, escapes From_ lines + in the body of the message by putting a `>' in front of them. + + Optional maxheaderlen specifies the longest length for a non-continued + header. When a header line is longer (in characters, with tabs + expanded to 8 spaces), than maxheaderlen, the header will be broken on + semicolons and continued as per RFC 2822. If no semicolon is found, + then the header is left alone. Set to zero to disable wrapping + headers. Default is 78, as recommended (but not required by RFC + 2822. + """ + self._fp = outfp + self._mangle_from_ = mangle_from_ + self.__first = 1 + self.__maxheaderlen = maxheaderlen + + def write(self, s): + # Just delegate to the file object + self._fp.write(s) + + def __call__(self, msg, unixfrom=0): + """Print the message object tree rooted at msg to the output file + specified when the Generator instance was created. + + unixfrom is a flag that forces the printing of a Unix From_ delimiter + before the first object in the message tree. If the original message + has no From_ delimiter, a `standard' one is crafted. By default, this + is 0 to inhibit the printing of any From_ delimiter. + + Note that for subobjects, no From_ line is printed. + """ + if unixfrom: + ufrom = msg.get_unixfrom() + if not ufrom: + ufrom = 'From nobody ' + time.ctime(time.time()) + print >> self._fp, ufrom + self._write(msg) + + # + # Protected interface - undocumented ;/ + # + + def _write(self, msg): + # We can't write the headers yet because of the following scenario: + # say a multipart message includes the boundary string somewhere in + # its body. We'd have to calculate the new boundary /before/ we write + # the headers so that we can write the correct Content-Type: + # parameter. + # + # The way we do this, so as to make the _handle_*() methods simpler, + # is to cache any subpart writes into a StringIO. The we write the + # headers and the StringIO contents. That way, subpart handlers can + # Do The Right Thing, and can still modify the Content-Type: header if + # necessary. + oldfp = self._fp + try: + self._fp = sfp = StringIO() + self._dispatch(msg) + finally: + self._fp = oldfp + # Write the headers. First we see if the message object wants to + # handle that itself. If not, we'll do it generically. + meth = getattr(msg, '_write_headers', None) + if meth is None: + self._write_headers(msg) + else: + meth(self) + self._fp.write(sfp.getvalue()) + + def _dispatch(self, msg): + # Get the Content-Type: for the message, then try to dispatch to + # self._handle_maintype_subtype(). If there's no handler for the full + # MIME type, then dispatch to self._handle_maintype(). If that's + # missing too, then dispatch to self._writeBody(). + ctype = msg.get_type() + if ctype is None: + # No Content-Type: header so try the default handler + self._writeBody(msg) + else: + # We do have a Content-Type: header. + specific = UNDERSCORE.join(ctype.split('/')).replace('-', '_') + meth = getattr(self, '_handle_' + specific, None) + if meth is None: + generic = msg.get_main_type().replace('-', '_') + meth = getattr(self, '_handle_' + generic, None) + if meth is None: + meth = self._writeBody + meth(msg) + + # + # Default handlers + # + + def _write_headers(self, msg): + for h, v in msg.items(): + # We only write the MIME-Version: header for the outermost + # container message. Unfortunately, we can't use same technique + # as for the Unix-From above because we don't know when + # MIME-Version: will occur. + if h.lower() == 'mime-version' and not self.__first: + continue + # RFC 2822 says that lines SHOULD be no more than maxheaderlen + # characters wide, so we're well within our rights to split long + # headers. + text = '%s: %s' % (h, v) + if self.__maxheaderlen > 0 and len(text) > self.__maxheaderlen: + text = self._split_header(text) + print >> self._fp, text + # A blank line always separates headers from body + print >> self._fp + + def _split_header(self, text): + maxheaderlen = self.__maxheaderlen + # Find out whether any lines in the header are really longer than + # maxheaderlen characters wide. There could be continuation lines + # that actually shorten it. Also, replace hard tabs with 8 spaces. + lines = [s.replace('\t', SPACE8) for s in text.split('\n')] + for line in lines: + if len(line) > maxheaderlen: + break + else: + # No line was actually longer than maxheaderlen characters, so + # just return the original unchanged. + return text + rtn = [] + for line in text.split('\n'): + # Short lines can remain unchanged + if len(line.replace('\t', SPACE8)) <= maxheaderlen: + rtn.append(line) + else: + # Try to break the line on semicolons, but if that doesn't + # work, then just leave it alone. + while len(text) > maxheaderlen: + i = text.rfind(';', 0, maxheaderlen) + if i < 0: + rtn.append(text) + break + rtn.append(text[:i]) + text = text[i+1:].lstrip() + rtn.append(text) + return SEMINLTAB.join(rtn) + + # + # Handlers for writing types and subtypes + # + + def _handle_text(self, msg): + payload = msg.get_payload() + if not isinstance(payload, StringType): + raise TypeError, 'string payload expected' + if self._mangle_from_: + payload = fcre.sub('>From ', payload) + self._fp.write(payload) + + # Default body handler + _writeBody = _handle_text + + def _handle_multipart(self, msg, isdigest=0): + # The trick here is to write out each part separately, merge them all + # together, and then make sure that the boundary we've chosen isn't + # present in the payload. + msgtexts = [] + for part in msg.get_payload(): + s = StringIO() + g = self.__class__(s) + g(part, unixfrom=0) + msgtexts.append(s.getvalue()) + # Now make sure the boundary we've selected doesn't appear in any of + # the message texts. + alltext = NL.join(msgtexts) + # BAW: What about boundaries that are wrapped in double-quotes? + boundary = msg.get_boundary(failobj=_make_boundary(alltext)) + # If we had to calculate a new boundary because the body text + # contained that string, set the new boundary. We don't do it + # unconditionally because, while set_boundary() preserves order, it + # doesn't preserve newlines/continuations in headers. This is no big + # deal in practice, but turns out to be inconvenient for the unittest + # suite. + if msg.get_boundary() <> boundary: + msg.set_boundary(boundary) + # Write out any preamble + if msg.preamble is not None: + self._fp.write(msg.preamble) + # First boundary is a bit different; it doesn't have a leading extra + # newline. + print >> self._fp, '--' + boundary + if isdigest: + print >> self._fp + # Join and write the individual parts + joiner = '\n--' + boundary + '\n' + if isdigest: + # multipart/digest types effectively add an extra newline between + # the boundary and the body part. + joiner += '\n' + self._fp.write(joiner.join(msgtexts)) + print >> self._fp, '\n--' + boundary + '--', + # Write out any epilogue + if msg.epilogue is not None: + self._fp.write(msg.epilogue) + + def _handle_multipart_digest(self, msg): + self._handle_multipart(msg, isdigest=1) + + def _handle_message_rfc822(self, msg): + s = StringIO() + g = self.__class__(s) + # A message/rfc822 should contain a scalar payload which is another + # Message object. Extract that object, stringify it, and write that + # out. + g(msg.get_payload(), unixfrom=0) + self._fp.write(s.getvalue()) + + + +class DecodedGenerator(Generator): + """Generator a text representation of a message. + + Like the Generator base class, except that non-text parts are substituted + with a format string representing the part. + """ + def __init__(self, outfp, mangle_from_=1, maxheaderlen=78, fmt=None): + """Like Generator.__init__() except that an additional optional + argument is allowed. + + Walks through all subparts of a message. If the subpart is of main + type `text', then it prints the decoded payload of the subpart. + + Otherwise, fmt is a format string that is used instead of the message + payload. fmt is expanded with the following keywords (in + %(keyword)s format): + + type : Full MIME type of the non-text part + maintype : Main MIME type of the non-text part + subtype : Sub-MIME type of the non-text part + filename : Filename of the non-text part + description: Description associated with the non-text part + encoding : Content transfer encoding of the non-text part + + The default value for fmt is None, meaning + + [Non-text (%(type)s) part of message omitted, filename %(filename)s] + """ + Generator.__init__(self, outfp, mangle_from_, maxheaderlen) + if fmt is None: + fmt = ('[Non-text (%(type)s) part of message omitted, ' + 'filename %(filename)s]') + self._fmt = fmt + + def _dispatch(self, msg): + for part in msg.walk(): + if part.get_main_type('text') == 'text': + print >> self, part.get_payload(decode=1) + else: + print >> self, self._fmt % { + 'type' : part.get_type('[no MIME type]'), + 'maintype' : part.get_main_type('[no main MIME type]'), + 'subtype' : part.get_subtype('[no sub-MIME type]'), + 'filename' : part.get_filename('[no filename]'), + 'description': part.get('Content-Description', + '[no description]'), + 'encoding' : part.get('Content-Transfer-Encoding', + '[no encoding]'), + } + + + +# Helper +def _make_boundary(self, text=None): + # Craft a random boundary. If text is given, ensure that the chosen + # boundary doesn't appear in the text. + boundary = ('=' * 15) + repr(random.random()).split('.')[1] + '==' + if text is None: + return boundary + b = boundary + counter = 0 + while 1: + cre = re.compile('^--' + re.escape(b) + '(--)?$', re.MULTILINE) + if not cre.search(text): + break + b = boundary + '.' + str(counter) + counter += 1 + return b diff --git a/Lib/email/Image.py b/Lib/email/Image.py new file mode 100644 index 0000000..d350785 --- /dev/null +++ b/Lib/email/Image.py @@ -0,0 +1,46 @@ +# Copyright (C) 2001 Python Software Foundation +# Author: barry@zope.com (Barry Warsaw) + +"""Class representing image/* type MIME documents. +""" + +import imghdr + +# Intrapackage imports +import MIMEBase +import Errors +import Encoders + + + +class Image(MIMEBase.MIMEBase): + """Class for generating image/* type MIME documents.""" + + def __init__(self, _imagedata, _minor=None, + _encoder=Encoders.encode_base64, **_params): + """Create an image/* type MIME document. + + _imagedata is a string containing the raw image data. If this data + can be decoded by the standard Python `imghdr' module, then the + subtype will be automatically included in the Content-Type: header. + Otherwise, you can specify the specific image subtype via the _minor + parameter. + + _encoder is a function which will perform the actual encoding for + transport of the image data. It takes one argument, which is this + Image instance. It should use get_payload() and set_payload() to + change the payload to the encoded form. It should also add any + Content-Transfer-Encoding: or other headers to the message as + necessary. The default encoding is Base64. + + Any additional keyword arguments are passed to the base class + constructor, which turns them into parameters on the Content-Type: + header. + """ + if _minor is None: + _minor = imghdr.what(None, _imagedata) + if _minor is None: + raise TypeError, 'Could not guess image _minor type' + MIMEBase.MIMEBase.__init__(self, 'image', _minor, **_params) + self.set_payload(_imagedata) + _encoder(self) diff --git a/Lib/email/Iterators.py b/Lib/email/Iterators.py new file mode 100644 index 0000000..998530f --- /dev/null +++ b/Lib/email/Iterators.py @@ -0,0 +1,33 @@ +# Copyright (C) 2001 Python Software Foundation +# Author: barry@zope.com (Barry Warsaw) + +"""Various types of useful iterators and generators. +""" + +from __future__ import generators +from cStringIO import StringIO +from types import StringType + + + +def body_line_iterator(msg): + """Iterator over the parts, returning the lines in a string payload.""" + for subpart in msg.walk(): + payload = subpart.get_payload() + if type(payload) is StringType: + for line in StringIO(payload): + yield line + + + +def typed_subpart_iterator(msg, major='text', minor=None): + """Iterator over the subparts with a given MIME type. + + Use `major' as the main MIME type to match against; this defaults to + "text". Optional `minor' is the MIME subtype to match against; if + omitted, only the main type is matched. + """ + for subpart in msg.walk(): + if subpart.get_main_type() == major: + if minor is None or subpart.get_subtype() == minor: + yield subpart diff --git a/Lib/email/MIMEBase.py b/Lib/email/MIMEBase.py new file mode 100644 index 0000000..e6dda56 --- /dev/null +++ b/Lib/email/MIMEBase.py @@ -0,0 +1,24 @@ +# Copyright (C) 2001 Python Software Foundation +# Author: barry@zope.com (Barry Warsaw) + +"""Base class for MIME specializations. +""" + +import Message + + + +class MIMEBase(Message.Message): + """Base class for MIME specializations.""" + + def __init__(self, _major, _minor, **_params): + """This constructor adds a Content-Type: and a MIME-Version: header. + + The Content-Type: header is taken from the _major and _minor + arguments. Additional parameters for this header are taken from the + keyword arguments. + """ + Message.Message.__init__(self) + ctype = '%s/%s' % (_major, _minor) + self.add_header('Content-Type', ctype, **_params) + self['MIME-Version'] = '1.0' diff --git a/Lib/email/Message.py b/Lib/email/Message.py new file mode 100644 index 0000000..35e2dc5 --- /dev/null +++ b/Lib/email/Message.py @@ -0,0 +1,422 @@ +# Copyright (C) 2001 Python Software Foundation +# Author: barry@zope.com (Barry Warsaw) + +"""Basic message object for the email package object model. +""" + +from __future__ import generators + +import re +import base64 +import quopri +from cStringIO import StringIO +from types import ListType + +SEMISPACE = '; ' + +# Intrapackage imports +import Errors +import Utils + + + +class Message: + """Basic message object for use inside the object tree. + + A message object is defined as something that has a bunch of RFC 2822 + headers and a payload. If the body of the message is a multipart, then + the payload is a list of Messages, otherwise it is a string. + + These objects implement part of the `mapping' interface, which assumes + there is exactly one occurrance of the header per message. Some headers + do in fact appear multiple times (e.g. Received:) and for those headers, + you must use the explicit API to set or get all the headers. Not all of + the mapping methods are implemented. + + """ + def __init__(self): + self._headers = [] + self._unixfrom = None + self._payload = None + # Defaults for multipart messages + self.preamble = self.epilogue = None + + def __str__(self): + """Return the entire formatted message as a string. + This includes the headers, body, and `unixfrom' line. + """ + return self.as_string(unixfrom=1) + + def as_string(self, unixfrom=0): + """Return the entire formatted message as a string. + Optional `unixfrom' when true, means include the Unix From_ envelope + header. + """ + from Generator import Generator + fp = StringIO() + g = Generator(fp) + g(self, unixfrom=unixfrom) + return fp.getvalue() + + def is_multipart(self): + """Return true if the message consists of multiple parts.""" + if type(self._payload) is ListType: + return 1 + return 0 + + # + # Unix From_ line + # + def set_unixfrom(self, unixfrom): + self._unixfrom = unixfrom + + def get_unixfrom(self): + return self._unixfrom + + # + # Payload manipulation. + # + def add_payload(self, payload): + """Add the given payload to the current payload. + + If the current payload is empty, then the current payload will be made + a scalar, set to the given value. + """ + if self._payload is None: + self._payload = payload + elif type(self._payload) is ListType: + self._payload.append(payload) + elif self.get_main_type() not in (None, 'multipart'): + raise Errors.MultipartConversionError( + 'Message main Content-Type: must be "multipart" or missing') + else: + self._payload = [self._payload, payload] + + # A useful synonym + attach = add_payload + + def get_payload(self, i=None, decode=0): + """Return the current payload exactly as is. + + Optional i returns that index into the payload. + + Optional decode is a flag indicating whether the payload should be + decoded or not, according to the Content-Transfer-Encoding: header. + When true and the message is not a multipart, the payload will be + decoded if this header's value is `quoted-printable' or `base64'. If + some other encoding is used, or the header is missing, the payload is + returned as-is (undecoded). If the message is a multipart and the + decode flag is true, then None is returned. + """ + if i is None: + payload = self._payload + elif type(self._payload) is not ListType: + raise TypeError, i + else: + payload = self._payload[i] + if decode: + if self.is_multipart(): + return None + cte = self.get('content-transfer-encoding', '') + if cte.lower() == 'quoted-printable': + return Utils._qdecode(payload) + elif cte.lower() == 'base64': + return Utils._bdecode(payload) + # Everything else, including encodings with 8bit or 7bit are returned + # unchanged. + return payload + + + def set_payload(self, payload): + """Set the payload to the given value.""" + self._payload = payload + + # + # MAPPING INTERFACE (partial) + # + def __len__(self): + """Get the total number of headers, including duplicates.""" + return len(self._headers) + + def __getitem__(self, name): + """Get a header value. + + Return None if the header is missing instead of raising an exception. + + Note that if the header appeared multiple times, exactly which + occurrance gets returned is undefined. Use getall() to get all + the values matching a header field name. + """ + return self.get(name) + + def __setitem__(self, name, val): + """Set the value of a header. + + Note: this does not overwrite an existing header with the same field + name. Use __delitem__() first to delete any existing headers. + """ + self._headers.append((name, val)) + + def __delitem__(self, name): + """Delete all occurrences of a header, if present. + + Does not raise an exception if the header is missing. + """ + name = name.lower() + newheaders = [] + for k, v in self._headers: + if k.lower() <> name: + newheaders.append((k, v)) + self._headers = newheaders + + def __contains__(self, key): + return key.lower() in [k.lower() for k, v in self._headers] + + def has_key(self, name): + """Return true if the message contains the header.""" + return self[name] <> None + + def keys(self): + """Return a list of all the message's header field names. + + These will be sorted in the order they appeared in the original + message, and may contain duplicates. Any fields deleted and + re-inserted are always appended to the header list. + """ + return [k for k, v in self._headers] + + def values(self): + """Return a list of all the message's header values. + + These will be sorted in the order they appeared in the original + message, and may contain duplicates. Any fields deleted and + re-inserted are alwyas appended to the header list. + """ + return [v for k, v in self._headers] + + def items(self): + """Get all the message's header fields and values. + + These will be sorted in the order they appeared in the original + message, and may contain duplicates. Any fields deleted and + re-inserted are alwyas appended to the header list. + """ + return self._headers[:] + + def get(self, name, failobj=None): + """Get a header value. + + Like __getitem__() but return failobj instead of None when the field + is missing. + """ + name = name.lower() + for k, v in self._headers: + if k.lower() == name: + return v + return failobj + + # + # Additional useful stuff + # + + def get_all(self, name, failobj=None): + """Return a list of all the values for the named field. + + These will be sorted in the order they appeared in the original + message, and may contain duplicates. Any fields deleted and + re-inserted are alwyas appended to the header list. + """ + values = [] + name = name.lower() + for k, v in self._headers: + if k.lower() == name: + values.append(v) + return values + + def add_header(self, _name, _value, **_params): + """Extended header setting. + + name is the header field to add. keyword arguments can be used to set + additional parameters for the header field, with underscores converted + to dashes. Normally the parameter will be added as key="value" unless + value is None, in which case only the key will be added. + + Example: + + msg.add_header('content-disposition', 'attachment', filename='bud.gif') + + """ + parts = [] + for k, v in _params.items(): + if v is None: + parts.append(k.replace('_', '-')) + else: + parts.append('%s="%s"' % (k.replace('_', '-'), v)) + if _value is not None: + parts.insert(0, _value) + self._headers.append((_name, SEMISPACE.join(parts))) + + def get_type(self, failobj=None): + """Returns the message's content type. + + The returned string is coerced to lowercase and returned as a single + string of the form `maintype/subtype'. If there was no Content-Type: + header in the message, failobj is returned (defaults to None). + """ + missing = [] + value = self.get('content-type', missing) + if value is missing: + return failobj + return re.split(r';\s+', value)[0].lower() + + def get_main_type(self, failobj=None): + """Return the message's main content type if present.""" + missing = [] + ctype = self.get_type(missing) + if ctype is missing: + return failobj + parts = ctype.split('/') + if len(parts) > 0: + return ctype.split('/')[0] + return failobj + + def get_subtype(self, failobj=None): + """Return the message's content subtype if present.""" + missing = [] + ctype = self.get_type(missing) + if ctype is missing: + return failobj + parts = ctype.split('/') + if len(parts) > 1: + return ctype.split('/')[1] + return failobj + + def get_params(self, failobj=None, header='content-type'): + """Return the message's Content-Type: parameters, as a list. + + Optional failobj is the object to return if there is no Content-Type: + header. Optional header is the header to search instead of + Content-Type: + """ + missing = [] + value = self.get(header, missing) + if value is missing: + return failobj + return re.split(r';\s+', value)[1:] + + def get_param(self, param, failobj=None, header='content-type'): + """Return the parameter value if found in the Content-Type: header. + + Optional failobj is the object to return if there is no Content-Type: + header. Optional header is the header to search instead of + Content-Type: + """ + param = param.lower() + missing = [] + params = self.get_params(missing, header=header) + if params is missing: + return failobj + for p in params: + try: + name, val = p.split('=', 1) + except ValueError: + # Must have been a bare attribute + name = p + val = '' + if name.lower() == param: + return Utils.unquote(val) + return failobj + + def get_filename(self, failobj=None): + """Return the filename associated with the payload if present. + + The filename is extracted from the Content-Disposition: header's + `filename' parameter, and it is unquoted. + """ + missing = [] + filename = self.get_param('filename', missing, 'content-disposition') + if filename is missing: + return failobj + return Utils.unquote(filename.strip()) + + def get_boundary(self, failobj=None): + """Return the boundary associated with the payload if present. + + The boundary is extracted from the Content-Type: header's `boundary' + parameter, and it is unquoted. + """ + missing = [] + boundary = self.get_param('boundary', missing) + if boundary is missing: + return failobj + return Utils.unquote(boundary.strip()) + + def set_boundary(self, boundary): + """Set the boundary parameter in Content-Type: to 'boundary'. + + This is subtly different than deleting the Content-Type: header and + adding a new one with a new boundary parameter via add_header(). The + main difference is that using the set_boundary() method preserves the + order of the Content-Type: header in the original message. + + HeaderParseError is raised if the message has no Content-Type: header. + """ + params = self.get_params() + if not params: + # There was no Content-Type: header, and we don't know what type + # to set it to, so raise an exception. + raise Errors.HeaderParseError, 'No Content-Type: header found' + newparams = [] + foundp = 0 + for p in params: + if p.lower().startswith('boundary='): + newparams.append('boundary="%s"' % boundary) + foundp = 1 + else: + newparams.append(p) + if not foundp: + # The original Content-Type: header had no boundary attribute. + # Tack one one the end. BAW: should we raise an exception + # instead??? + newparams.append('boundary="%s"' % boundary) + # Replace the existing Content-Type: header with the new value + newheaders = [] + for h, v in self._headers: + if h.lower() == 'content-type': + value = v.split(';', 1)[0] + newparams.insert(0, value) + newheaders.append((h, SEMISPACE.join(newparams))) + else: + newheaders.append((h, v)) + self._headers = newheaders + + def walk(self): + """Walk over the message tree, yielding each subpart. + + The walk is performed in breadth-first order. This method is a + generator. + """ + if self.is_multipart(): + for subpart in self.get_payload(): + for subsubpart in subpart.walk(): + yield subsubpart + else: + yield self + + def get_charsets(self, failobj=None): + """Return a list containing the charset(s) used in this message. + + The returned list of items describes the Content-Type: headers' + charset parameter for this message and all the subparts in its + payload. + + Each item will either be a string (the value of the charset parameter + in the Content-Type: header of that part) or the value of the + 'failobj' parameter (defaults to None), if the part does not have a + main MIME type of "text", or the charset is not defined. + + The list will contain one string for each part of the message, plus + one for the container message (i.e. self), so that a non-multipart + message will still return a list of length 1. + """ + return [part.get_param('charset', failobj) for part in self.walk()] diff --git a/Lib/email/MessageRFC822.py b/Lib/email/MessageRFC822.py new file mode 100644 index 0000000..81cc4dc --- /dev/null +++ b/Lib/email/MessageRFC822.py @@ -0,0 +1,24 @@ +# Copyright (C) 2001 Python Software Foundation +# Author: barry@zope.com (Barry Warsaw) + +"""Class for generating message/rfc822 MIME documents. +""" + +import Message +import MIMEBase + + + +class MessageRFC822(MIMEBase.MIMEBase): + """Class for generating message/rfc822 MIME documents.""" + + def __init__(self, _msg): + """Create a message/rfc822 type MIME document. + + _msg is a message object and must be an instance of Message, or a + derived class of Message, otherwise a TypeError is raised. + """ + MIMEBase.MIMEBase.__init__(self, 'message', 'rfc822') + if not isinstance(_msg, Message.Message): + raise TypeError, 'Argument is not an instance of Message' + self.set_payload(_msg) diff --git a/Lib/email/Parser.py b/Lib/email/Parser.py new file mode 100644 index 0000000..cc23d19 --- /dev/null +++ b/Lib/email/Parser.py @@ -0,0 +1,154 @@ +# Copyright (C) 2001 Python Software Foundation +# Author: barry@zope.com (Barry Warsaw) + +"""A parser of RFC 2822 and MIME email messages. +""" + +import re +from cStringIO import StringIO + +# Intrapackage imports +import Errors +import Message + +bcre = re.compile('boundary="?([^"]+)"?', re.IGNORECASE) +EMPTYSTRING = '' +NL = '\n' + + + +class Parser: + def __init__(self, _class=Message.Message): + """Parser of RFC 2822 and MIME email messages. + + Creates an in-memory object tree representing the email message, which + can then be manipulated and turned over to a Generator to return the + textual representation of the message. + + The string must be formatted as a block of RFC 2822 headers and header + continuation lines, optionally preceeded by a `Unix-from' header. The + header block is terminated either by the end of the string or by a + blank line. + + _class is the class to instantiate for new message objects when they + must be created. This class must have a constructor that can take + zero arguments. Default is Message.Message. + """ + self._class = _class + + def parse(self, fp): + root = self._class() + self._parseheaders(root, fp) + self._parsebody(root, fp) + return root + + def parsestr(self, text): + return self.parse(StringIO(text)) + + def _parseheaders(self, container, fp): + # Parse the headers, returning a list of header/value pairs. None as + # the header means the Unix-From header. + lastheader = '' + lastvalue = [] + lineno = 0 + while 1: + line = fp.readline()[:-1] + if not line or not line.strip(): + break + lineno += 1 + # Check for initial Unix From_ line + if line.startswith('From '): + if lineno == 1: + container.set_unixfrom(line) + continue + else: + raise Errors.HeaderParseError( + 'Unix-from in headers after first rfc822 header') + # + # Header continuation line + if line[0] in ' \t': + if not lastheader: + raise Errors.HeaderParseError( + 'Continuation line seen before first header') + lastvalue.append(line) + continue + # Normal, non-continuation header. BAW: this should check to make + # sure it's a legal header, e.g. doesn't contain spaces. Also, we + # should expose the header matching algorithm in the API, and + # allow for a non-strict parsing mode (that ignores the line + # instead of raising the exception). + i = line.find(':') + if i < 0: + raise Errors.HeaderParseError( + 'Not a header, not a continuation') + if lastheader: + container[lastheader] = NL.join(lastvalue) + lastheader = line[:i] + lastvalue = [line[i+1:].lstrip()] + # Make sure we retain the last header + if lastheader: + container[lastheader] = NL.join(lastvalue) + + def _parsebody(self, container, fp): + # Parse the body, but first split the payload on the content-type + # boundary if present. + boundary = isdigest = None + ctype = container['content-type'] + if ctype: + mo = bcre.search(ctype) + if mo: + boundary = mo.group(1) + isdigest = container.get_type() == 'multipart/digest' + # If there's a boundary, split the payload text into its constituent + # parts and parse each separately. Otherwise, just parse the rest of + # the body as a single message. Note: any exceptions raised in the + # recursive parse need to have their line numbers coerced. + if boundary: + preamble = epilogue = None + # Split into subparts. The first boundary we're looking for won't + # have the leading newline since we're at the start of the body + # text. + separator = '--' + boundary + payload = fp.read() + start = payload.find(separator) + if start < 0: + raise Errors.BoundaryError( + "Couldn't find starting boundary: %s" % boundary) + if start > 0: + # there's some pre-MIME boundary preamble + preamble = payload[0:start] + start += len(separator) + 1 + isdigest + terminator = payload.find('\n' + separator + '--', start) + if terminator < 0: + raise Errors.BoundaryError( + "Couldn't find terminating boundary: %s" % boundary) + if terminator+len(separator)+3 < len(payload): + # there's some post-MIME boundary epilogue + epilogue = payload[terminator+len(separator)+3:] + # We split the textual payload on the boundary separator, which + # includes the trailing newline. If the container is a + # multipart/digest then the subparts are by default message/rfc822 + # instead of text/plain. In that case, they'll have an extra + # newline before the headers to distinguish the message's headers + # from the subpart headers. + if isdigest: + separator += '\n\n' + else: + separator += '\n' + parts = payload[start:terminator].split('\n' + separator) + for part in parts: + msgobj = self.parsestr(part) + container.preamble = preamble + container.epilogue = epilogue + container.add_payload(msgobj) + elif ctype == 'message/rfc822': + # Create a container for the payload, but watch out for there not + # being any headers left + try: + msg = self.parse(fp) + except Errors.HeaderParseError: + msg = self._class() + self._parsebody(msg, fp) + container.add_payload(msg) + else: + container.add_payload(fp.read()) diff --git a/Lib/email/Text.py b/Lib/email/Text.py new file mode 100644 index 0000000..5abfd0b --- /dev/null +++ b/Lib/email/Text.py @@ -0,0 +1,41 @@ +# Copyright (C) 2001 Python Software Foundation +# Author: barry@zope.com (Barry Warsaw) + +"""Class representing text/* type MIME documents. +""" + +import MIMEBase +from Encoders import encode_7or8bit + + + +class Text(MIMEBase.MIMEBase): + """Class for generating text/* type MIME documents.""" + + def __init__(self, _text, _minor='plain', _charset='us-ascii', + _encoder=encode_7or8bit): + """Create a text/* type MIME document. + + _text is the string for this message object. If the text does not end + in a newline, one is added. + + _minor is the minor content type, defaulting to "plain". + + _charset is the character set parameter added to the Content-Type: + header. This defaults to "us-ascii". + + _encoder is a function which will perform the actual encoding for + transport of the text data. It takes one argument, which is this + Text instance. It should use get_payload() and set_payload() to + change the payload to the encoded form. It should also add any + Content-Transfer-Encoding: or other headers to the message as + necessary. The default encoding doesn't actually modify the payload, + but it does set Content-Transfer-Encoding: to either `7bit' or `8bit' + as appropriate. + """ + MIMEBase.MIMEBase.__init__(self, 'text', _minor, + **{'charset': _charset}) + if _text and _text[-1] <> '\n': + _text += '\n' + self.set_payload(_text) + _encoder(self) diff --git a/Lib/email/Utils.py b/Lib/email/Utils.py new file mode 100644 index 0000000..6bbf2d3 --- /dev/null +++ b/Lib/email/Utils.py @@ -0,0 +1,104 @@ +# Copyright (C) 2001 Python Software Foundation +# Author: barry@zope.com (Barry Warsaw) + +"""Miscellaneous utilities. +""" + +import re + +from rfc822 import unquote, quote, parseaddr +from rfc822 import dump_address_pair +from rfc822 import AddrlistClass as _AddrlistClass +from rfc822 import parsedate_tz, parsedate, mktime_tz, formatdate + +from quopri import decodestring as _qdecode +import base64 + +# Intrapackage imports +from Encoders import _bencode, _qencode + +COMMASPACE = ', ' +UEMPTYSTRING = u'' + + + +# Helpers + +def _identity(s): + return s + + +def _bdecode(s): + if not s: + return s + # We can't quite use base64.encodestring() since it tacks on a "courtesy + # newline". Blech! + if not s: + return s + hasnewline = (s[-1] == '\n') + value = base64.decodestring(s) + if not hasnewline and value[-1] == '\n': + return value[:-1] + return value + + + +def getaddresses(fieldvalues): + """Return a list of (REALNAME, EMAIL) for each fieldvalue.""" + all = COMMASPACE.join(fieldvalues) + a = _AddrlistClass(all) + return a.getaddrlist() + + + +ecre = re.compile(r''' + =\? # literal =? + (?P<charset>[^?]*?) # non-greedy up to the next ? is the charset + \? # literal ? + (?P<encoding>[qb]) # either a "q" or a "b", case insensitive + \? # literal ? + (?P<atom>.*?) # non-greedy up to the next ?= is the atom + \?= # literal ?= + ''', re.VERBOSE | re.IGNORECASE) + + +def decode(s): + """Return a decoded string according to RFC 2047, as a unicode string.""" + rtn = [] + parts = ecre.split(s, 1) + while parts: + # If there are less than 4 parts, it can't be encoded and we're done + if len(parts) < 5: + rtn.extend(parts) + break + # The first element is any non-encoded leading text + rtn.append(parts[0]) + charset = parts[1] + encoding = parts[2] + atom = parts[3] + # The next chunk to decode should be in parts[4] + parts = ecre.split(parts[4]) + # The encoding must be either `q' or `b', case-insensitive + if encoding.lower() == 'q': + func = _qdecode + elif encoding.lower() == 'b': + func = _bdecode + else: + func = _identity + # Decode and get the unicode in the charset + rtn.append(unicode(func(atom), charset)) + # Now that we've decoded everything, we just need to join all the parts + # together into the final string. + return UEMPTYSTRING.join(rtn) + + + +def encode(s, charset='iso-8859-1', encoding='q'): + """Encode a string according to RFC 2047.""" + if encoding.lower() == 'q': + estr = _qencode(s) + elif encoding.lower() == 'b': + estr = _bencode(s) + else: + raise ValueError, 'Illegal encoding code: ' + encoding + return '=?%s?%s?%s?=' % (charset.lower(), encoding.lower(), estr) diff --git a/Lib/email/__init__.py b/Lib/email/__init__.py new file mode 100644 index 0000000..4995114 --- /dev/null +++ b/Lib/email/__init__.py @@ -0,0 +1,34 @@ +# Copyright (C) 2001 Python Software Foundation +# Author: barry@zope.com (Barry Warsaw) + +"""A package for parsing, handling, and generating email messages. +""" + +__version__ = '1.0' + +__all__ = ['Encoders', + 'Errors', + 'Generator', + 'Image', + 'Iterators', + 'MIMEBase', + 'Message', + 'MessageRFC822', + 'Parser', + 'Text', + 'Utils', + 'message_from_string', + 'message_from_file', + ] + + + +# Some convenience routines +from Parser import Parser as _Parser +from Message import Message as _Message + +def message_from_string(s, _class=_Message): + return _Parser(_class).parsestr(s) + +def message_from_file(fp, _class=_Message): + return _Parser(_class).parse(fp) |