summaryrefslogtreecommitdiffstats
path: root/Lib/dos-8x3/mimepars.py
diff options
context:
space:
mode:
Diffstat (limited to 'Lib/dos-8x3/mimepars.py')
-rw-r--r--Lib/dos-8x3/mimepars.py246
1 files changed, 246 insertions, 0 deletions
diff --git a/Lib/dos-8x3/mimepars.py b/Lib/dos-8x3/mimepars.py
new file mode 100644
index 0000000..46fe9eb
--- /dev/null
+++ b/Lib/dos-8x3/mimepars.py
@@ -0,0 +1,246 @@
+"""Generic MIME parser.
+
+Classes:
+
+ MimeParser - Generic MIME parser.
+
+Exceptions:
+
+ MimeError - Exception raised by MimeParser class.
+
+XXX To do:
+
+- Content-transfer-encoding issues
+- Use Content-length header in rawbody()?
+- Cache parts instead of reparsing each time
+- The message strings in exceptions could use some work
+
+"""
+
+from types import * # Python types, not MIME types :-)
+import string
+import regex
+import SubFile
+import mimetools
+
+
+MimeError = "MimeParser.MimeError" # Exception raised by this class
+
+
+class MimeParser:
+
+ """Generic MIME parser.
+
+ This requires a seekable file.
+
+ """
+
+ def __init__(self, fp):
+ """Constructor: store the file pointer and parse the headers."""
+ self._fp = fp
+ self._start = fp.tell()
+ self._headers = h = mimetools.Message(fp)
+ self._bodystart = fp.tell()
+ self._multipart = h.getmaintype() == 'multipart'
+
+ def multipart(self):
+ """Return whether this is a multipart message."""
+ return self._multipart
+
+ def headers(self):
+ """Return the headers of the MIME message, as a Message object."""
+ return self._headers
+
+ def rawbody(self):
+ """Return the raw body of the MIME message, as a file-like object.
+
+ This is a fairly low-level interface -- for a multipart
+ message, you'd have to parse the body yourself, and it doesn't
+ translate the Content-transfer-encoding.
+
+ """
+ # XXX Use Content-length to set end if it exists?
+ return SubFile.SubFile(self._fp, self._bodystart)
+
+ def body(self):
+ """Return the body of a 1-part MIME message, as a file-like object.
+
+ This should interpret the Content-transfer-encoding, if any
+ (XXX currently it doesn't).
+
+ """
+ if self._multipart:
+ raise MimeError, "body() only works for 1-part messages"
+ return self.rawbody()
+
+ _re_content_length = regex.compile('content-length:[ \t]*\([0-9]+\)',
+ regex.casefold)
+
+ def rawparts(self):
+ """Return the raw body parts of a multipart MIME message.
+
+ This returns a list of SubFile() objects corresponding to the
+ parts. Note that the phantom part before the first separator
+ is returned too, as list item 0. If the final part is not
+ followed by a terminator, it is ignored, and this error is not
+ reported. (XXX: the error should be raised).
+
+ """
+ if not self._multipart:
+ raise MimeError, "[raw]parts() only works for multipart messages"
+ h = self._headers
+ separator = h.getparam('boundary')
+ if not separator:
+ raise MimeError, "multipart boundary not specified"
+ separator = "--" + separator
+ terminator = separator + "--"
+ ns = len(separator)
+ list = []
+ f = self._fp
+ start = f.tell()
+ clength = -1
+ bodystart = -1
+ inheaders = 0
+ while 1:
+ end = f.tell()
+ line = f.readline()
+ if not line:
+ break
+ if line[:2] != "--" or line[:ns] != separator:
+ if inheaders:
+ re = self._re_content_length
+ if re.match(line) > 0:
+ try:
+ clength = string.atoi(re.group(1))
+ except string.atoi_error:
+ pass
+ if not string.strip(line):
+ inheaders = 0
+ bodystart = f.tell()
+ if clength > 0:
+ # Skip binary data
+ f.read(clength)
+ continue
+ line = string.strip(line)
+ if line == terminator or line == separator:
+ if clength >= 0:
+ # The Content-length header determines the subfile size
+ end = bodystart + clength
+ else:
+ # The final newline is not part of the content
+ end = end-1
+ list.append(SubFile.SubFile(f, start, end))
+ start = f.tell()
+ clength = -1
+ inheaders = 1
+ if line == terminator:
+ break
+ return list
+
+ def parts(self):
+ """Return the parsed body parts of a multipart MIME message.
+
+ This returns a list of MimeParser() instances corresponding to
+ the parts. The phantom part before the first separator is not
+ included.
+
+ """
+ return map(MimeParser, self.rawparts()[1:])
+
+ def getsubpartbyposition(self, indices):
+ part = self
+ for i in indices:
+ part = part.parts()[i]
+ return part
+
+ def getsubpartbyid(self, id):
+ h = self._headers
+ cid = h.getheader('content-id')
+ if cid and cid == id:
+ return self
+ if self._multipart:
+ for part in self.parts():
+ parser = MimeParser(part)
+ hit = parser.getsubpartbyid(id)
+ if hit:
+ return hit
+ return None
+
+ def index(self):
+ """Return an index of the MIME file.
+
+ This parses the entire file and returns index information
+ about it, in the form of a tuple
+
+ (ctype, headers, body)
+
+ where 'ctype' is the content type string of the message
+ (e.g. `text/plain' or `multipart/mixed') and 'headers' is a
+ Message instance containing the message headers (which should
+ be treated as read-only).
+
+ The 'body' item depends on the content type:
+
+ - If it is an atomic message (anything except for content type
+ multipart/*), it is the file-like object returned by
+ self.body().
+
+ - For a content type of multipart/*, it is the list of
+ MimeParser() objects returned by self.parts().
+
+ """
+ if self._multipart:
+ body = self.parts()
+ else:
+ body = self.body()
+ return self._headers.gettype(), self._headers, body
+
+
+def _show(parser, level=0):
+ """Helper for _test()."""
+ ctype, headers, body = parser.index()
+ print ctype,
+ if type(body) == ListType:
+ nparts = len(body)
+ print "(%d part%s):" % (nparts, nparts != 1 and "s" or "")
+ n = 0
+ for part in body:
+ n = n+1
+ print "%*d." % (4*level+2, n),
+ _show(part, level+1)
+ else:
+ bodylines = body.readlines()
+ print "(%d header lines, %d body lines)" % (
+ len(headers.headers), len(bodylines))
+ for line in headers.headers + ['\n'] + bodylines:
+ if line[-1:] == '\n': line = line[:-1]
+ print " "*level + line
+
+def _test(args = None):
+ """Test program invoked when run as a script.
+
+ When a filename argument is specified, it reads from that file.
+ When no arguments are present, it defaults to 'testkp.txt' if it
+ exists, else it defaults to stdin.
+
+ """
+ if not args:
+ import sys
+ args = sys.argv[1:]
+ if args:
+ fn = args[0]
+ else:
+ import os
+ fn = 'testkp.txt'
+ if not os.path.exists(fn):
+ fn = '-'
+ if fn == '-':
+ fp = sys.stdin
+ else:
+ fp = open(fn)
+ mp = MimeParser(fp)
+ _show(mp)
+
+if __name__ == '__main__':
+ import sys
+ _test()