From 01ca336ed101dc5dd8dcd6942df58740dbba81df Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Mon, 13 Jul 1992 14:28:59 +0000 Subject: New modules mimetools and rfc822. Minor, minor changes to commands.py and sndhdr.py. --- Lib/commands.py | 2 +- Lib/mimetools.py | 113 +++++++++++++++++++++++++++++ Lib/rfc822.py | 211 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 325 insertions(+), 1 deletion(-) create mode 100644 Lib/mimetools.py create mode 100644 Lib/rfc822.py diff --git a/Lib/commands.py b/Lib/commands.py index 5e4a9cf..d8c6e65 100644 --- a/Lib/commands.py +++ b/Lib/commands.py @@ -49,7 +49,7 @@ def mkarg(x): return ' \'' + x + '\'' s = ' "' for c in x: - if c in '\\$"': + if c in '\\$"`': s = s + '\\' s = s + c s = s + '"' diff --git a/Lib/mimetools.py b/Lib/mimetools.py new file mode 100644 index 0000000..79c6fb1 --- /dev/null +++ b/Lib/mimetools.py @@ -0,0 +1,113 @@ +# Various tools used by MIME-reading or MIME-writing programs. + + +import string +import rfc822 + + +# A derived class of rfc822.Message that knows about MIME headers and +# contains some hooks for decoding encoded and multipart messages. + +class Message(rfc822.Message): + + def init(self, fp): + self = rfc822.Message.init(self, fp) + self.encodingheader = \ + self.getheader('content-transfer-encoding') + self.typeheader = \ + self.getheader('content-type') + self.parsetype() + self.parseplist() + return self + + def parsetype(self): + str = self.typeheader + if str == None: + str = 'text/plain' + if ';' in str: + i = string.index(str, ';') + self.plisttext = str[i:] + str = str[:i] + else: + self.plisttext = '' + fields = string.splitfields(str, '/') + for i in range(len(fields)): + fields[i] = string.lower(string.strip(fields[i])) + self.type = string.joinfields(fields, '/') + self.maintype = fields[0] + self.subtype = string.joinfields(fields[1:], '/') + + def parseplist(self): + str = self.plisttext + self.plist = [] + while str[:1] == ';': + str = str[1:] + if ';' in str: + # XXX Should parse quotes! + end = string.index(str, ';') + else: + end = len(str) + f = str[:end] + if '=' in f: + i = string.index(f, '=') + f = string.lower(string.strip(f[:i])) + \ + '=' + string.strip(f[i+1:]) + self.plist.append(string.strip(f)) + + def getplist(self): + return self.plist + + def getparam(self, name): + name = string.lower(name) + '=' + n = len(name) + for p in self.plist: + if p[:n] == name: + return rfc822.unquote(p[n:]) + return None + + def getencoding(self): + if self.encodingheader == None: + return '7bit' + return self.encodingheader + + def gettype(self): + return self.type + + def getmaintype(self): + return self.maintype + + def getsubtype(self): + return self.subtype + + + + +# Utility functions +# ----------------- + + +# Return a random string usable as a multipart boundary. +# The method used is so that it is *very* unlikely that the same +# string of characters will every occur again in the Universe, +# so the caller needn't check the data it is packing for the +# occurrence of the boundary. +# +# The boundary contains dots so you have to quote it in the header. + +_prefix = None + +def choose_boundary(): + global _generation, _prefix, _timestamp + import time + import rand + if _prefix == None: + import socket + import os + hostid = socket.gethostbyname(socket.gethostname()) + uid = `os.getuid()` + pid = `os.getpid()` + seed = `rand.rand()` + _prefix = hostid + '.' + uid + '.' + pid + timestamp = `time.time()` + seed = `rand.rand()` + return _prefix + '.' + timestamp + '.' + seed diff --git a/Lib/rfc822.py b/Lib/rfc822.py new file mode 100644 index 0000000..63f2fb6 --- /dev/null +++ b/Lib/rfc822.py @@ -0,0 +1,211 @@ +# RFC-822 message manipulation class. +# +# XXX This is only a very rough sketch of a full RFC-822 parser; +# additional methods are needed to parse addresses and dates, and to +# tokenize lines according to various other syntax rules. +# +# Directions for use: +# +# To create a Message object: first open a file, e.g.: +# fp = open(file, 'r') +# (or use any other legal way of getting an open file object, e.g. use +# sys.stdin or call os.popen()). +# Then pass the open file object to the init() method of Message: +# m = Message().init(fp) +# +# To get the text of a particular header there are several methods: +# str = m.getheader(name) +# str = m.getrawheader(name) +# where name is the name of the header, e.g. 'Subject'. +# The difference is that getheader() strips the leading and trailing +# whitespace, while getrawheader() doesn't. Both functions retain +# embedded whitespace (including newlines) exactly as they are +# specified in the header, and leave the case of the text unchanged. +# +# See the class definition for lower level access methods. +# +# There are also some utility functions here. + + +import regex +import string + + +class Message: + + # Initialize the class instance and read the headers. + + def init(self, fp): + self.fp = fp + # + try: + self.startofheaders = self.fp.tell() + except IOError: + self.startofheaders = None + # + self.readheaders() + # + try: + self.startofbody = self.fp.tell() + except IOError: + self.startofbody = None + # + return self + + + # Rewind the file to the start of the body (if seekable). + + def rewindbody(self): + self.fp.seek(self.startofbody) + + + # Read header lines up to the entirely blank line that + # terminates them. The (normally blank) line that ends the + # headers is skipped, but not included in the returned list. + # If a non-header line ends the headers, (which is an error), + # an attempt is made to backspace over it; it is never + # included in the returned list. + # + # The variable self.status is set to the empty string if all + # went well, otherwise it is an error message. + # The variable self.headers is a completely uninterpreted list + # of lines contained in the header (so printing them will + # reproduce the header exactly as it appears in the file). + + def readheaders(self): + self.headers = list = [] + self.status = '' + headerseen = 0 + while 1: + line = self.fp.readline() + if not line: + self.status = 'EOF in headers' + break + if self.islast(line): + break + elif headerseen and line[0] in ' \t': + # It's a continuation line. + list.append(line) + elif regex.match('^[!-9;-~]+:', line): + # It's a header line. + list.append(line) + headerseen = 1 + else: + # It's not a header line; stop here. + if not headerseen: + self.status = 'No headers' + else: + self.status = 'Bad header' + # Try to undo the read. + try: + self.fp.seek(-len(line), 1) + except IOError: + self.status = \ + self.status + '; bad seek' + break + + + # Method to determine whether a line is a legal end of + # RFC-822 headers. You may override this method if your + # application wants to bend the rules, e.g. to accept lines + # ending in '\r\n', to strip trailing whitespace, or to + # recognise MH template separators ('--------'). + + def islast(self, line): + return line == '\n' + + + # Look through the list of headers and find all lines matching + # a given header name (and their continuation lines). + # A list of the lines is returned, without interpretation. + # If the header does not occur, an empty list is returned. + # If the header occurs multiple times, all occurrences are + # returned. Case is not important in the header name. + + def getallmatchingheaders(self, name): + name = string.lower(name) + ':' + n = len(name) + list = [] + hit = 0 + for line in self.headers: + if string.lower(line[:n]) == name: + hit = 1 + elif line[:1] not in string.whitespace: + hit = 0 + if hit: + list.append(line) + return list + + + # Similar, but return only the first matching header (and its + # continuation lines). + + def getfirstmatchingheader(self, name): + name = string.lower(name) + ':' + n = len(name) + list = [] + hit = 0 + for line in self.headers: + if string.lower(line[:n]) == name: + hit = 1 + elif line[:1] not in string.whitespace: + if hit: + break + if hit: + list.append(line) + return list + + + # A higher-level interface to getfirstmatchingheader(). + # Return a string containing the literal text of the header + # but with the keyword stripped. All leading, trailing and + # embedded whitespace is kept in the string, however. + # Return None if the header does not occur. + + def getrawheader(self, name): + list = self.getfirstmatchingheader(name) + if not list: + return None + list[0] = list[0][len(name) + 1:] + return string.joinfields(list, '') + + + # Going one step further: also strip leading and trailing + # whitespace. + + def getheader(self, name): + text = self.getrawheader(name) + if text == None: + return None + return string.strip(text) + + + # XXX The next step would be to define self.getaddr(name) + # and self.getaddrlist(name) which would parse a header + # consisting of a single mail address and a number of mail + # addresses, respectively. Lower level functions would be + # parseaddr(string) and parseaddrlist(string). + + # XXX Similar, there would be a function self.getdate(name) to + # return a date in canonical form (perhaps a number compatible + # to time.time()) and a function parsedate(string). + + # XXX The inverses of the parse functions may also be useful. + + + + +# Utility functions +# ----------------- + + +# Remove quotes from a string. +# XXX Should fix this to be really conformant. + +def unquote(str): + if len(str) > 1: + if str[0] == '"' and str[-1:] == '"': + return str[1:-1] + if str[0] == '<' and str[-1:] == '>': + return str[1:-1] + return str -- cgit v0.12