diff options
Diffstat (limited to 'Lib/email/Generator.py')
-rw-r--r-- | Lib/email/Generator.py | 326 |
1 files changed, 326 insertions, 0 deletions
diff --git a/Lib/email/Generator.py b/Lib/email/Generator.py new file mode 100644 index 0000000..ca9757f --- /dev/null +++ b/Lib/email/Generator.py @@ -0,0 +1,326 @@ +# Copyright (C) 2001 Python Software Foundation +# Author: barry@zope.com (Barry Warsaw) + +"""Classes to generate plain text from a message object tree. +""" + +import time +import re +import random + +from types import ListType, StringType +from cStringIO import StringIO + +# Intrapackage imports +import Message +import Errors + +SEMISPACE = '; ' +BAR = '|' +UNDERSCORE = '_' +NL = '\n' +SEMINLTAB = ';\n\t' +SPACE8 = ' ' * 8 + +fcre = re.compile(r'^From ', re.MULTILINE) + + + +class Generator: + """Generates output from a Message object tree. + + This basic generator writes the message to the given file object as plain + text. + """ + # + # Public interface + # + + def __init__(self, outfp, mangle_from_=1, maxheaderlen=78): + """Create the generator for message flattening. + + outfp is the output file-like object for writing the message to. It + must have a write() method. + + Optional mangle_from_ is a flag that, when true, escapes From_ lines + in the body of the message by putting a `>' in front of them. + + Optional maxheaderlen specifies the longest length for a non-continued + header. When a header line is longer (in characters, with tabs + expanded to 8 spaces), than maxheaderlen, the header will be broken on + semicolons and continued as per RFC 2822. If no semicolon is found, + then the header is left alone. Set to zero to disable wrapping + headers. Default is 78, as recommended (but not required by RFC + 2822. + """ + self._fp = outfp + self._mangle_from_ = mangle_from_ + self.__first = 1 + self.__maxheaderlen = maxheaderlen + + def write(self, s): + # Just delegate to the file object + self._fp.write(s) + + def __call__(self, msg, unixfrom=0): + """Print the message object tree rooted at msg to the output file + specified when the Generator instance was created. + + unixfrom is a flag that forces the printing of a Unix From_ delimiter + before the first object in the message tree. If the original message + has no From_ delimiter, a `standard' one is crafted. By default, this + is 0 to inhibit the printing of any From_ delimiter. + + Note that for subobjects, no From_ line is printed. + """ + if unixfrom: + ufrom = msg.get_unixfrom() + if not ufrom: + ufrom = 'From nobody ' + time.ctime(time.time()) + print >> self._fp, ufrom + self._write(msg) + + # + # Protected interface - undocumented ;/ + # + + def _write(self, msg): + # We can't write the headers yet because of the following scenario: + # say a multipart message includes the boundary string somewhere in + # its body. We'd have to calculate the new boundary /before/ we write + # the headers so that we can write the correct Content-Type: + # parameter. + # + # The way we do this, so as to make the _handle_*() methods simpler, + # is to cache any subpart writes into a StringIO. The we write the + # headers and the StringIO contents. That way, subpart handlers can + # Do The Right Thing, and can still modify the Content-Type: header if + # necessary. + oldfp = self._fp + try: + self._fp = sfp = StringIO() + self._dispatch(msg) + finally: + self._fp = oldfp + # Write the headers. First we see if the message object wants to + # handle that itself. If not, we'll do it generically. + meth = getattr(msg, '_write_headers', None) + if meth is None: + self._write_headers(msg) + else: + meth(self) + self._fp.write(sfp.getvalue()) + + def _dispatch(self, msg): + # Get the Content-Type: for the message, then try to dispatch to + # self._handle_maintype_subtype(). If there's no handler for the full + # MIME type, then dispatch to self._handle_maintype(). If that's + # missing too, then dispatch to self._writeBody(). + ctype = msg.get_type() + if ctype is None: + # No Content-Type: header so try the default handler + self._writeBody(msg) + else: + # We do have a Content-Type: header. + specific = UNDERSCORE.join(ctype.split('/')).replace('-', '_') + meth = getattr(self, '_handle_' + specific, None) + if meth is None: + generic = msg.get_main_type().replace('-', '_') + meth = getattr(self, '_handle_' + generic, None) + if meth is None: + meth = self._writeBody + meth(msg) + + # + # Default handlers + # + + def _write_headers(self, msg): + for h, v in msg.items(): + # We only write the MIME-Version: header for the outermost + # container message. Unfortunately, we can't use same technique + # as for the Unix-From above because we don't know when + # MIME-Version: will occur. + if h.lower() == 'mime-version' and not self.__first: + continue + # RFC 2822 says that lines SHOULD be no more than maxheaderlen + # characters wide, so we're well within our rights to split long + # headers. + text = '%s: %s' % (h, v) + if self.__maxheaderlen > 0 and len(text) > self.__maxheaderlen: + text = self._split_header(text) + print >> self._fp, text + # A blank line always separates headers from body + print >> self._fp + + def _split_header(self, text): + maxheaderlen = self.__maxheaderlen + # Find out whether any lines in the header are really longer than + # maxheaderlen characters wide. There could be continuation lines + # that actually shorten it. Also, replace hard tabs with 8 spaces. + lines = [s.replace('\t', SPACE8) for s in text.split('\n')] + for line in lines: + if len(line) > maxheaderlen: + break + else: + # No line was actually longer than maxheaderlen characters, so + # just return the original unchanged. + return text + rtn = [] + for line in text.split('\n'): + # Short lines can remain unchanged + if len(line.replace('\t', SPACE8)) <= maxheaderlen: + rtn.append(line) + else: + # Try to break the line on semicolons, but if that doesn't + # work, then just leave it alone. + while len(text) > maxheaderlen: + i = text.rfind(';', 0, maxheaderlen) + if i < 0: + rtn.append(text) + break + rtn.append(text[:i]) + text = text[i+1:].lstrip() + rtn.append(text) + return SEMINLTAB.join(rtn) + + # + # Handlers for writing types and subtypes + # + + def _handle_text(self, msg): + payload = msg.get_payload() + if not isinstance(payload, StringType): + raise TypeError, 'string payload expected' + if self._mangle_from_: + payload = fcre.sub('>From ', payload) + self._fp.write(payload) + + # Default body handler + _writeBody = _handle_text + + def _handle_multipart(self, msg, isdigest=0): + # The trick here is to write out each part separately, merge them all + # together, and then make sure that the boundary we've chosen isn't + # present in the payload. + msgtexts = [] + for part in msg.get_payload(): + s = StringIO() + g = self.__class__(s) + g(part, unixfrom=0) + msgtexts.append(s.getvalue()) + # Now make sure the boundary we've selected doesn't appear in any of + # the message texts. + alltext = NL.join(msgtexts) + # BAW: What about boundaries that are wrapped in double-quotes? + boundary = msg.get_boundary(failobj=_make_boundary(alltext)) + # If we had to calculate a new boundary because the body text + # contained that string, set the new boundary. We don't do it + # unconditionally because, while set_boundary() preserves order, it + # doesn't preserve newlines/continuations in headers. This is no big + # deal in practice, but turns out to be inconvenient for the unittest + # suite. + if msg.get_boundary() <> boundary: + msg.set_boundary(boundary) + # Write out any preamble + if msg.preamble is not None: + self._fp.write(msg.preamble) + # First boundary is a bit different; it doesn't have a leading extra + # newline. + print >> self._fp, '--' + boundary + if isdigest: + print >> self._fp + # Join and write the individual parts + joiner = '\n--' + boundary + '\n' + if isdigest: + # multipart/digest types effectively add an extra newline between + # the boundary and the body part. + joiner += '\n' + self._fp.write(joiner.join(msgtexts)) + print >> self._fp, '\n--' + boundary + '--', + # Write out any epilogue + if msg.epilogue is not None: + self._fp.write(msg.epilogue) + + def _handle_multipart_digest(self, msg): + self._handle_multipart(msg, isdigest=1) + + def _handle_message_rfc822(self, msg): + s = StringIO() + g = self.__class__(s) + # A message/rfc822 should contain a scalar payload which is another + # Message object. Extract that object, stringify it, and write that + # out. + g(msg.get_payload(), unixfrom=0) + self._fp.write(s.getvalue()) + + + +class DecodedGenerator(Generator): + """Generator a text representation of a message. + + Like the Generator base class, except that non-text parts are substituted + with a format string representing the part. + """ + def __init__(self, outfp, mangle_from_=1, maxheaderlen=78, fmt=None): + """Like Generator.__init__() except that an additional optional + argument is allowed. + + Walks through all subparts of a message. If the subpart is of main + type `text', then it prints the decoded payload of the subpart. + + Otherwise, fmt is a format string that is used instead of the message + payload. fmt is expanded with the following keywords (in + %(keyword)s format): + + type : Full MIME type of the non-text part + maintype : Main MIME type of the non-text part + subtype : Sub-MIME type of the non-text part + filename : Filename of the non-text part + description: Description associated with the non-text part + encoding : Content transfer encoding of the non-text part + + The default value for fmt is None, meaning + + [Non-text (%(type)s) part of message omitted, filename %(filename)s] + """ + Generator.__init__(self, outfp, mangle_from_, maxheaderlen) + if fmt is None: + fmt = ('[Non-text (%(type)s) part of message omitted, ' + 'filename %(filename)s]') + self._fmt = fmt + + def _dispatch(self, msg): + for part in msg.walk(): + if part.get_main_type('text') == 'text': + print >> self, part.get_payload(decode=1) + else: + print >> self, self._fmt % { + 'type' : part.get_type('[no MIME type]'), + 'maintype' : part.get_main_type('[no main MIME type]'), + 'subtype' : part.get_subtype('[no sub-MIME type]'), + 'filename' : part.get_filename('[no filename]'), + 'description': part.get('Content-Description', + '[no description]'), + 'encoding' : part.get('Content-Transfer-Encoding', + '[no encoding]'), + } + + + +# Helper +def _make_boundary(self, text=None): + # Craft a random boundary. If text is given, ensure that the chosen + # boundary doesn't appear in the text. + boundary = ('=' * 15) + repr(random.random()).split('.')[1] + '==' + if text is None: + return boundary + b = boundary + counter = 0 + while 1: + cre = re.compile('^--' + re.escape(b) + '(--)?$', re.MULTILINE) + if not cre.search(text): + break + b = boundary + '.' + str(counter) + counter += 1 + return b |