diff options
Diffstat (limited to 'Lib/dos-8x3/mimepars.py')
-rw-r--r-- | Lib/dos-8x3/mimepars.py | 246 |
1 files changed, 246 insertions, 0 deletions
diff --git a/Lib/dos-8x3/mimepars.py b/Lib/dos-8x3/mimepars.py new file mode 100644 index 0000000..46fe9eb --- /dev/null +++ b/Lib/dos-8x3/mimepars.py @@ -0,0 +1,246 @@ +"""Generic MIME parser. + +Classes: + + MimeParser - Generic MIME parser. + +Exceptions: + + MimeError - Exception raised by MimeParser class. + +XXX To do: + +- Content-transfer-encoding issues +- Use Content-length header in rawbody()? +- Cache parts instead of reparsing each time +- The message strings in exceptions could use some work + +""" + +from types import * # Python types, not MIME types :-) +import string +import regex +import SubFile +import mimetools + + +MimeError = "MimeParser.MimeError" # Exception raised by this class + + +class MimeParser: + + """Generic MIME parser. + + This requires a seekable file. + + """ + + def __init__(self, fp): + """Constructor: store the file pointer and parse the headers.""" + self._fp = fp + self._start = fp.tell() + self._headers = h = mimetools.Message(fp) + self._bodystart = fp.tell() + self._multipart = h.getmaintype() == 'multipart' + + def multipart(self): + """Return whether this is a multipart message.""" + return self._multipart + + def headers(self): + """Return the headers of the MIME message, as a Message object.""" + return self._headers + + def rawbody(self): + """Return the raw body of the MIME message, as a file-like object. + + This is a fairly low-level interface -- for a multipart + message, you'd have to parse the body yourself, and it doesn't + translate the Content-transfer-encoding. + + """ + # XXX Use Content-length to set end if it exists? + return SubFile.SubFile(self._fp, self._bodystart) + + def body(self): + """Return the body of a 1-part MIME message, as a file-like object. + + This should interpret the Content-transfer-encoding, if any + (XXX currently it doesn't). + + """ + if self._multipart: + raise MimeError, "body() only works for 1-part messages" + return self.rawbody() + + _re_content_length = regex.compile('content-length:[ \t]*\([0-9]+\)', + regex.casefold) + + def rawparts(self): + """Return the raw body parts of a multipart MIME message. + + This returns a list of SubFile() objects corresponding to the + parts. Note that the phantom part before the first separator + is returned too, as list item 0. If the final part is not + followed by a terminator, it is ignored, and this error is not + reported. (XXX: the error should be raised). + + """ + if not self._multipart: + raise MimeError, "[raw]parts() only works for multipart messages" + h = self._headers + separator = h.getparam('boundary') + if not separator: + raise MimeError, "multipart boundary not specified" + separator = "--" + separator + terminator = separator + "--" + ns = len(separator) + list = [] + f = self._fp + start = f.tell() + clength = -1 + bodystart = -1 + inheaders = 0 + while 1: + end = f.tell() + line = f.readline() + if not line: + break + if line[:2] != "--" or line[:ns] != separator: + if inheaders: + re = self._re_content_length + if re.match(line) > 0: + try: + clength = string.atoi(re.group(1)) + except string.atoi_error: + pass + if not string.strip(line): + inheaders = 0 + bodystart = f.tell() + if clength > 0: + # Skip binary data + f.read(clength) + continue + line = string.strip(line) + if line == terminator or line == separator: + if clength >= 0: + # The Content-length header determines the subfile size + end = bodystart + clength + else: + # The final newline is not part of the content + end = end-1 + list.append(SubFile.SubFile(f, start, end)) + start = f.tell() + clength = -1 + inheaders = 1 + if line == terminator: + break + return list + + def parts(self): + """Return the parsed body parts of a multipart MIME message. + + This returns a list of MimeParser() instances corresponding to + the parts. The phantom part before the first separator is not + included. + + """ + return map(MimeParser, self.rawparts()[1:]) + + def getsubpartbyposition(self, indices): + part = self + for i in indices: + part = part.parts()[i] + return part + + def getsubpartbyid(self, id): + h = self._headers + cid = h.getheader('content-id') + if cid and cid == id: + return self + if self._multipart: + for part in self.parts(): + parser = MimeParser(part) + hit = parser.getsubpartbyid(id) + if hit: + return hit + return None + + def index(self): + """Return an index of the MIME file. + + This parses the entire file and returns index information + about it, in the form of a tuple + + (ctype, headers, body) + + where 'ctype' is the content type string of the message + (e.g. `text/plain' or `multipart/mixed') and 'headers' is a + Message instance containing the message headers (which should + be treated as read-only). + + The 'body' item depends on the content type: + + - If it is an atomic message (anything except for content type + multipart/*), it is the file-like object returned by + self.body(). + + - For a content type of multipart/*, it is the list of + MimeParser() objects returned by self.parts(). + + """ + if self._multipart: + body = self.parts() + else: + body = self.body() + return self._headers.gettype(), self._headers, body + + +def _show(parser, level=0): + """Helper for _test().""" + ctype, headers, body = parser.index() + print ctype, + if type(body) == ListType: + nparts = len(body) + print "(%d part%s):" % (nparts, nparts != 1 and "s" or "") + n = 0 + for part in body: + n = n+1 + print "%*d." % (4*level+2, n), + _show(part, level+1) + else: + bodylines = body.readlines() + print "(%d header lines, %d body lines)" % ( + len(headers.headers), len(bodylines)) + for line in headers.headers + ['\n'] + bodylines: + if line[-1:] == '\n': line = line[:-1] + print " "*level + line + +def _test(args = None): + """Test program invoked when run as a script. + + When a filename argument is specified, it reads from that file. + When no arguments are present, it defaults to 'testkp.txt' if it + exists, else it defaults to stdin. + + """ + if not args: + import sys + args = sys.argv[1:] + if args: + fn = args[0] + else: + import os + fn = 'testkp.txt' + if not os.path.exists(fn): + fn = '-' + if fn == '-': + fp = sys.stdin + else: + fp = open(fn) + mp = MimeParser(fp) + _show(mp) + +if __name__ == '__main__': + import sys + _test() |