summaryrefslogtreecommitdiffstats
path: root/Lib
diff options
context:
space:
mode:
authorBarry Warsaw <barry@python.org>2001-01-31 22:13:15 (GMT)
committerBarry Warsaw <barry@python.org>2001-01-31 22:13:15 (GMT)
commit81ad67cdc6e89465d1503267c78ee570406bcc60 (patch)
tree80fe22163ca89081921dcb70e190c2c617afdb65 /Lib
parentb416290d5d646dc90ff288af3f4bbf58e46f3f67 (diff)
downloadcpython-81ad67cdc6e89465d1503267c78ee570406bcc60.zip
cpython-81ad67cdc6e89465d1503267c78ee570406bcc60.tar.gz
cpython-81ad67cdc6e89465d1503267c78ee570406bcc60.tar.bz2
Two changes:
- All constructors grow an optional argument `factory' which is a callable used when new message instances are created by the next() methods. Defaults to the rfc822.Message class. - A new subclass of UnixMailbox is added, called PortableUnixMailbox. It's identical to UnixMailbox, but uses a more portable test for From_ delimiter lines. With PortableUnixMailbox, any line that starts with "From " is considered a delimiter (this should really check for two newlines before the F, but it doesn't.
Diffstat (limited to 'Lib')
-rwxr-xr-xLib/mailbox.py55
1 files changed, 43 insertions, 12 deletions
diff --git a/Lib/mailbox.py b/Lib/mailbox.py
index a565948..2f96106 100755
--- a/Lib/mailbox.py
+++ b/Lib/mailbox.py
@@ -9,9 +9,10 @@ import os
__all__ = ["UnixMailbox","MmdfMailbox","MHMailbox","Maildir","BabylMailbox"]
class _Mailbox:
- def __init__(self, fp):
+ def __init__(self, fp, factory=rfc822.Message):
self.fp = fp
self.seekp = 0
+ self.factory = factory
def seek(self, pos, whence=0):
if whence==1: # Relative to current position
@@ -34,7 +35,7 @@ class _Mailbox:
self.seekp = stop = self.fp.tell()
if start != stop:
break
- return rfc822.Message(_Subfile(self.fp, start, stop))
+ return self.factory(_Subfile(self.fp, start, stop))
class _Subfile:
@@ -117,22 +118,50 @@ class UnixMailbox(_Mailbox):
self.fp.seek(pos)
return
- # An overridable mechanism to test for From-line-ness.
- # You can either specify a different regular expression
- # or define a whole new _isrealfromline() method.
- # Note that this only gets called for lines starting with
- # the 5 characters "From ".
+ # An overridable mechanism to test for From-line-ness. You can either
+ # specify a different regular expression or define a whole new
+ # _isrealfromline() method. Note that this only gets called for lines
+ # starting with the 5 characters "From ".
+ #
+ # BAW: According to
+ #http://home.netscape.com/eng/mozilla/2.0/relnotes/demo/content-length.html
+ # the only portable, reliable way to find message delimiters in a BSD (i.e
+ # Unix mailbox) style folder is to search for "\n\nFrom .*\n", or at the
+ # beginning of the file, "^From .*\n". While _fromlinepattern below seems
+ # like a good idea, in practice, there are too many variations for more
+ # strict parsing of the line to be completely accurate.
+ #
+ # _strict_isrealfromline() is the old version which tries to do stricter
+ # parsing of the From_ line. _portable_isrealfromline() simply returns
+ # true, since it's never called if the line doesn't already start with
+ # "From ".
+ #
+ # This algorithm, and the way it interacts with _search_start() and
+ # _search_end() may not be completely correct, because it doesn't check
+ # that the two characters preceding "From " are \n\n or the beginning of
+ # the file. Fixing this would require a more extensive rewrite than is
+ # necessary. For convenience, we've added a StrictUnixMailbox class which
+ # uses the older, more strict _fromlinepattern regular expression.
_fromlinepattern = r"From \s*[^\s]+\s+\w\w\w\s+\w\w\w\s+\d?\d\s+" \
r"\d?\d:\d\d(:\d\d)?(\s+[^\s]+)?\s+\d\d\d\d\s*$"
_regexp = None
- def _isrealfromline(self, line):
+ def _strict_isrealfromline(self, line):
if not self._regexp:
import re
self._regexp = re.compile(self._fromlinepattern)
return self._regexp.match(line)
+ def _portable_isrealfromline(self, line):
+ return 1
+
+ _isrealfromline = _strict_isrealfromline
+
+
+class PortableUnixMailbox(UnixMailbox):
+ _isrealfromline = UnixMailbox._portable_isrealfromline
+
class MmdfMailbox(_Mailbox):
def _search_start(self):
@@ -155,7 +184,7 @@ class MmdfMailbox(_Mailbox):
class MHMailbox:
- def __init__(self, dirname):
+ def __init__(self, dirname, factory=rfc822.Message):
import re
pat = re.compile('^[1-9][0-9]*$')
self.dirname = dirname
@@ -168,6 +197,7 @@ class MHMailbox:
# This only works in Python 1.6 or later;
# before that str() added 'L':
self.boxes = map(str, list)
+ self.factory = factory
def next(self):
if not self.boxes:
@@ -175,14 +205,15 @@ class MHMailbox:
fn = self.boxes[0]
del self.boxes[0]
fp = open(os.path.join(self.dirname, fn))
- return rfc822.Message(fp)
+ return self.factory(fp)
class Maildir:
# Qmail directory mailbox
- def __init__(self, dirname):
+ def __init__(self, dirname, factory=rfc822.Message):
self.dirname = dirname
+ self.factory = factory
# check for new mail
newdir = os.path.join(self.dirname, 'new')
@@ -202,7 +233,7 @@ class Maildir:
fn = self.boxes[0]
del self.boxes[0]
fp = open(fn)
- return rfc822.Message(fp)
+ return self.factory(fp)
class BabylMailbox(_Mailbox):