summaryrefslogtreecommitdiffstats
path: root/Doc/lib/libmailbox.tex
blob: a65619e14ee5fc6cf529f30b26f1d0216def53d4 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
\section{\module{mailbox} ---
         Read various mailbox formats}

\declaremodule{standard}{mailbox}
\modulesynopsis{Read various mailbox formats.}


This module defines a number of classes that allow easy and uniform
access to mail messages in a (\UNIX{}) mailbox.

\begin{classdesc}{UnixMailbox}{fp\optional{, factory}}
Access to a classic \UNIX-style mailbox, where all messages are
contained in a single file and separated by \samp{From }
(a.k.a.\ \samp{From_}) lines.  The file object \var{fp} points to the
mailbox file.  The optional \var{factory} parameter is a callable that
should create new message objects.  \var{factory} is called with one
argument, \var{fp} by the \method{next()} method of the mailbox
object.  The default is the \class{rfc822.Message} class (see the
\refmodule{rfc822} module).

For maximum portability, messages in a \UNIX-style mailbox are
separated by any line that begins exactly with the string \code{'From
'} (note the trailing space) if preceded by exactly two newlines.
Because of the wide-range of variations in practice, nothing else on
the From_ line should be considered.  However, the current
implementation doesn't check for the leading two newlines.  This is
usually fine for most applications.

The \class{UnixMailbox} class implements a more strict version of
From_ line checking, using a regular expression that usually correctly
matched From_ delimiters.  It considers delimiter line to be separated
by \samp{From \var{name} \var{time}} lines.  For maximum portability,
use the \class{PortableUnixMailbox} class instead.  This class is
identical to \class{UnixMailbox} except that individual messages are
separated by only \samp{From } lines.

For more information, see
\citetitle[http://home.netscape.com/eng/mozilla/2.0/relnotes/demo/content-length.html]{Configuring
Netscape Mail on \UNIX: Why the Content-Length Format is Bad}.
\end{classdesc}

\begin{classdesc}{PortableUnixMailbox}{fp\optional{, factory}}
A less-strict version of \class{UnixMailbox}, which considers only the
\samp{From } at the beginning of the line separating messages.  The
``\var{name} \var{time}'' portion of the From line is ignored, to
protect against some variations that are observed in practice.  This
works since lines in the message which begin with \code{'From '} are
quoted by mail handling software well before delivery.
\end{classdesc}

\begin{classdesc}{MmdfMailbox}{fp\optional{, factory}}
Access an MMDF-style mailbox, where all messages are contained
in a single file and separated by lines consisting of 4 control-A
characters.  The file object \var{fp} points to the mailbox file.
Optional \var{factory} is as with the \class{UnixMailbox} class.
\end{classdesc}

\begin{classdesc}{MHMailbox}{dirname\optional{, factory}}
Access an MH mailbox, a directory with each message in a separate
file with a numeric name.
The name of the mailbox directory is passed in \var{dirname}.
\var{factory} is as with the \class{UnixMailbox} class.
\end{classdesc}

\begin{classdesc}{Maildir}{dirname\optional{, factory}}
Access a Qmail mail directory.  All new and current mail for the
mailbox specified by \var{dirname} is made available.
\var{factory} is as with the \class{UnixMailbox} class.
\end{classdesc}

\begin{classdesc}{BabylMailbox}{fp\optional{, factory}}
Access a Babyl mailbox, which is similar to an MMDF mailbox.  In
Babyl format, each message has two sets of headers, the
\emph{original} headers and the \emph{visible} headers.  The original
headers appear before a a line containing only \code{'*** EOOH ***'}
(End-Of-Original-Headers) and the visible headers appear after the
\code{EOOH} line.  Babyl-compliant mail readers will show you only the
visible headers, and \class{BabylMailbox} objects will return messages
containing only the visible headers.  You'll have to do your own
parsing of the mailbox file to get at the original headers.  Mail
messages start with the EOOH line and end with a line containing only
\code{'\e{}037\e{}014'}.  \var{factory} is as with the
\class{UnixMailbox} class.
\end{classdesc}


\begin{seealso}
  \seetitle[http://www.qmail.org/man/man5/mbox.html]{mbox -
            file containing mail messages}{Description of the
            traditional ``mbox'' mailbox format.}
  \seetitle[http://www.qmail.org/man/man5/maildir.html]{maildir -
            directory for incoming mail messages}{Description of the
            ``maildir'' mailbox format.}
  \seetitle[http://home.netscape.com/eng/mozilla/2.0/relnotes/demo/content-length.html]{Configuring
            Netscape Mail on \UNIX: Why the Content-Length Format is
            Bad}{A description of problems with relying on the
            Content-Length header for messages stored in mailbox
            files.}
\end{seealso}


\subsection{Mailbox Objects \label{mailbox-objects}}

All implementations of Mailbox objects are iterator objects, and so
have one externally visible method:

\begin{methoddesc}[mailbox]{next}{}
Return the next message in the mailbox, created with the optional
\var{factory} argument passed into the mailbox object's constructor.
By defaul this is an \class{rfc822.Message}
object (see the \refmodule{rfc822} module).  Depending on the mailbox
implementation the \var{fp} attribute of this object may be a true
file object or a class instance simulating a file object, taking care
of things like message boundaries if multiple mail messages are
contained in a single file, etc.  If no more messages are available,
this method returns \code{None}.
\end{methoddesc}