summaryrefslogtreecommitdiffstats
path: root/Doc/lib/email.tex
blob: aa9f3e552ec8824ae53f584136d32f6eff45229d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
% Copyright (C) 2001,2002 Python Software Foundation
% Author: barry@zope.com (Barry Warsaw)

\section{\module{email} ---
	 An email and MIME handling package}

\declaremodule{standard}{email}
\modulesynopsis{Package supporting the parsing, manipulating, and
    generating email messages, including MIME documents.}
\moduleauthor{Barry A. Warsaw}{barry@zope.com}
\sectionauthor{Barry A. Warsaw}{barry@zope.com}

\versionadded{2.2}

The \module{email} package is a library for managing email messages,
including MIME and other \rfc{2822}-based message documents.  It
subsumes most of the functionality in several older standard modules
such as \refmodule{rfc822}, \refmodule{mimetools},
\refmodule{multifile}, and other non-standard packages such as
\module{mimecntl}.  It is specifically \emph{not} designed to do any
sending of email messages to SMTP (\rfc{2821}) servers; that is the
function of the \refmodule{smtplib} module.  The \module{email}
package attempts to be as RFC-compliant as possible, supporting in
addition to \rfc{2822}, such MIME-related RFCs as
\rfc{2045}-\rfc{2047}, and \rfc{2231}.

The primary distinguishing feature of the \module{email} package is
that it splits the parsing and generating of email messages from the
internal \emph{object model} representation of email.  Applications
using the \module{email} package deal primarily with objects; you can
add sub-objects to messages, remove sub-objects from messages,
completely re-arrange the contents, etc.  There is a separate parser
and a separate generator which handles the transformation from flat
text to the object model, and then back to flat text again.  There
are also handy subclasses for some common MIME object types, and a few
miscellaneous utilities that help with such common tasks as extracting
and parsing message field values, creating RFC-compliant dates, etc.

The following sections describe the functionality of the
\module{email} package.  The ordering follows a progression that
should be common in applications: an email message is read as flat
text from a file or other source, the text is parsed to produce an
object model representation of the email message, this model is
manipulated, and finally the model is rendered back into
flat text.

It is perfectly feasible to create the object model out of whole cloth
--- i.e. completely from scratch.  From there, a similar progression
can be taken as above.  

Also included are detailed specifications of all the classes and
modules that the \module{email} package provides, the exception
classes you might encounter while using the \module{email} package,
some auxiliary utilities, and a few examples.  For users of the older
\module{mimelib} package, or previous versions of the \module{email}
package, a section on differences and porting is provided.

\begin{seealso}
    \seemodule{smtplib}{SMTP protocol client}
\end{seealso}

\subsection{Representing an email message}
\input{emailmessage}

\subsection{Parsing email messages}
\input{emailparser}

\subsection{Generating MIME documents}
\input{emailgenerator}

\subsection{Creating email and MIME objects from scratch}
\input{emailmimebase}

\subsection{Headers, Character sets, and Internationalization}
\input{emailheaders}

\subsection{Encoders}
\input{emailencoders}

\subsection{Exception classes}
\input{emailexc}

\subsection{Miscellaneous utilities}
\input{emailutil}

\subsection{Iterators}
\input{emailiter}

\subsection{Differences from \module{email} v1 (up to Python 2.2.1)}

Version 1 of the \module{email} package was bundled with Python
releases up to Python 2.2.1.  Version 2 was developed for the Python
2.3 release, and backported to Python 2.2.2.  It was also available as
a separate distutils based package.  \module{email} version 2 is
almost entirely backwards compatible with version 1, with the
following differences:

\begin{itemize}
\item The \module{email.Header} and \module{email.Charset} modules
      have been added.
\item The pickle format for \class{Message} instances has changed.
      Since this was never (and still isn't) formally defined, this
      isn't considered a backwards incompatibility.  However if your
      application pickles and unpickles \class{Message} instances, be
      aware that in \module{email} version 2, \class{Message}
      instances now have private variables \var{_charset} and
      \var{_default_type}.
\item Several methods in the \class{Message} class have been
      deprecated, or their signatures changes.  Also, many new methods
      have been added.  See the documentation for the \class{Message}
      class for deatils.  The changes should be completely backwards
      compatible.
\item The object structure has changed in the face of
      \mimetype{message/rfc822} content types.  In \module{email}
      version 1, such a type would be represented by a scalar payload,
      i.e. the container message's \method{is_multipart()} returned
      false, \method{get_payload()} was not a list object, and was
      actually a \class{Message} instance.

      This structure was inconsistent with the rest of the package, so
      the object representation for \mimetype{message/rfc822} content
      types was changed.  In module{email} version 2, the container
      \emph{does} return \code{True} from \method{is_multipart()}, and
      \method{get_payload()} returns a list containing a single
      \class{Message} item.

      Note that this is one place that backwards compatibility could
      not be completely maintained.  However, if you're already
      testing the return type of \method{get_payload()}, you should be
      fine.  You just need to make sure your code doesn't do a
      \method{set_payload()} with a \class{Message} instance on a
      container with a content type of \mimetype{message/rfc822}.
\item The \class{Parser} constructor's \var{strict} argument was
      added, and its \method{parse()} and \method{parsestr()} methods
      grew a \var{headersonly} argument.  The \var{strict} flag was
      also added to functions \function{email.message_from_file()}
      and \function{email.message_from_string()}.
\item \method{Generator.__call__()} is deprecated; use
      \method{Generator.flatten()} instead.  The \class{Generator}
      class has also grown the \method{clone()} method.
\item The \class{DecodedGenerator} class in the
      \module{email.Generator} module was added.
\item The intermediate base classes \class{MIMENonMultipart} and
      \class{MIMEMultipart} have been added, and interposed in the
      class heirarchy for most of the other MIME-related derived
      classes.
\item The \var{_encoder} argument to the \class{MIMEText} constructor
      has been deprecated.  Encoding  now happens implicitly based
      on the \var{_charset} argument.
\item The following functions in the \module{email.Utils} module have
      been deprecated: \function{dump_address_pairs()},
      \function{decode()}, and \function{encode()}.  The following
      functions have been added to the module:
      \function{make_msgid()}, \function{decode_rfc2231()},
      \function{encode_rfc2231()}, and \function{decode_params()}.
\item The non-public function \function{email.Iterators._structure()}
      was added.
\end{itemize}

\subsection{Differences from \module{mimelib}}

The \module{email} package was originally prototyped as a separate
library called
\ulink{\module{mimelib}}{http://mimelib.sf.net/}.
Changes have been made so that
method names are more consistent, and some methods or modules have
either been added or removed.  The semantics of some of the methods
have also changed.  For the most part, any functionality available in
\module{mimelib} is still available in the \refmodule{email} package,
albeit often in a different way.

Here is a brief description of the differences between the
\module{mimelib} and the \refmodule{email} packages, along with hints on
how to port your applications.

Of course, the most visible difference between the two packages is
that the package name has been changed to \refmodule{email}.  In
addition, the top-level package has the following differences:

\begin{itemize}
\item \function{messageFromString()} has been renamed to
      \function{message_from_string()}.
\item \function{messageFromFile()} has been renamed to
      \function{message_from_file()}.
\end{itemize}

The \class{Message} class has the following differences:

\begin{itemize}
\item The method \method{asString()} was renamed to \method{as_string()}.
\item The method \method{ismultipart()} was renamed to
      \method{is_multipart()}.
\item The \method{get_payload()} method has grown a \var{decode}
      optional argument.
\item The method \method{getall()} was renamed to \method{get_all()}.
\item The method \method{addheader()} was renamed to \method{add_header()}.
\item The method \method{gettype()} was renamed to \method{get_type()}.
\item The method\method{getmaintype()} was renamed to
      \method{get_main_type()}.
\item The method \method{getsubtype()} was renamed to
      \method{get_subtype()}.
\item The method \method{getparams()} was renamed to
      \method{get_params()}.
      Also, whereas \method{getparams()} returned a list of strings,
      \method{get_params()} returns a list of 2-tuples, effectively
      the key/value pairs of the parameters, split on the \character{=}
      sign.
\item The method \method{getparam()} was renamed to \method{get_param()}.
\item The method \method{getcharsets()} was renamed to
      \method{get_charsets()}.
\item The method \method{getfilename()} was renamed to
      \method{get_filename()}.
\item The method \method{getboundary()} was renamed to
      \method{get_boundary()}.
\item The method \method{setboundary()} was renamed to
      \method{set_boundary()}.
\item The method \method{getdecodedpayload()} was removed.  To get
      similar functionality, pass the value 1 to the \var{decode} flag
      of the {get_payload()} method.
\item The method \method{getpayloadastext()} was removed.  Similar
      functionality
      is supported by the \class{DecodedGenerator} class in the
      \refmodule{email.Generator} module.
\item The method \method{getbodyastext()} was removed.  You can get
      similar functionality by creating an iterator with
      \function{typed_subpart_iterator()} in the
      \refmodule{email.Iterators} module.
\end{itemize}

The \class{Parser} class has no differences in its public interface.
It does have some additional smarts to recognize
\mimetype{message/delivery-status} type messages, which it represents as
a \class{Message} instance containing separate \class{Message}
subparts for each header block in the delivery status
notification\footnote{Delivery Status Notifications (DSN) are defined
in \rfc{1894}.}.

The \class{Generator} class has no differences in its public
interface.  There is a new class in the \refmodule{email.Generator}
module though, called \class{DecodedGenerator} which provides most of
the functionality previously available in the
\method{Message.getpayloadastext()} method.

The following modules and classes have been changed:

\begin{itemize}
\item The \class{MIMEBase} class constructor arguments \var{_major}
      and \var{_minor} have changed to \var{_maintype} and
      \var{_subtype} respectively.
\item The \code{Image} class/module has been renamed to
      \code{MIMEImage}.  The \var{_minor} argument has been renamed to
      \var{_subtype}.
\item The \code{Text} class/module has been renamed to
      \code{MIMEText}.  The \var{_minor} argument has been renamed to
      \var{_subtype}.
\item The \code{MessageRFC822} class/module has been renamed to
      \code{MIMEMessage}.  Note that an earlier version of
      \module{mimelib} called this class/module \code{RFC822}, but
      that clashed with the Python standard library module
      \refmodule{rfc822} on some case-insensitive file systems.

      Also, the \class{MIMEMessage} class now represents any kind of
      MIME message with main type \mimetype{message}.  It takes an
      optional argument \var{_subtype} which is used to set the MIME
      subtype.  \var{_subtype} defaults to \mimetype{rfc822}.
\end{itemize}

\module{mimelib} provided some utility functions in its
\module{address} and \module{date} modules.  All of these functions
have been moved to the \refmodule{email.Utils} module.

The \code{MsgReader} class/module has been removed.  Its functionality
is most closely supported in the \function{body_line_iterator()}
function in the \refmodule{email.Iterators} module.

\subsection{Examples}

Here are a few examples of how to use the \module{email} package to
read, write, and send simple email messages, as well as more complex
MIME messages.

First, let's see how to create and send a simple text message:

\begin{verbatim}
# Import smtplib for the actual sending function
import smtplib

# Here are the email pacakge modules we'll need
from email import Encoders
from email.MIMEText import MIMEText

# Open a plain text file for reading
fp = open(textfile)
# Create a text/plain message, using Quoted-Printable encoding for non-ASCII
# characters.
msg = MIMEText(fp.read(), _encoder=Encoders.encode_quopri)
fp.close()

# me == the sender's email address
# you == the recipient's email address
msg['Subject'] = 'The contents of %s' % textfile
msg['From'] = me
msg['To'] = you

# Send the message via our own SMTP server.  Use msg.as_string() with
# unixfrom=0 so as not to confuse SMTP.
s = smtplib.SMTP()
s.connect()
s.sendmail(me, [you], msg.as_string(0))
s.close()
\end{verbatim}

Here's an example of how to send a MIME message containing a bunch of
family pictures:

\begin{verbatim}
# Import smtplib for the actual sending function
import smtplib

# Here are the email pacakge modules we'll need
from email.MIMEImage import MIMEImage
from email.MIMEBase import MIMEBase

COMMASPACE = ', '

# Create the container (outer) email message.
# me == the sender's email address
# family = the list of all recipients' email addresses
msg = MIMEBase('multipart', 'mixed')
msg['Subject'] = 'Our family reunion'
msg['From'] = me
msg['To'] = COMMASPACE.join(family)
msg.preamble = 'Our family reunion'
# Guarantees the message ends in a newline
msg.epilogue = ''

# Assume we know that the image files are all in PNG format
for file in pngfiles:
    # Open the files in binary mode.  Let the MIMEIMage class automatically
    # guess the specific image type.
    fp = open(file, 'rb')
    img = MIMEImage(fp.read())
    fp.close()
    msg.attach(img)

# Send the email via our own SMTP server.
s = smtplib.SMTP()
s.connect()
s.sendmail(me, family, msg.as_string(unixfrom=0))
s.close()
\end{verbatim}

Here's an example\footnote{Thanks to Matthew Dixon Cowles for the
original inspiration and examples.} of how to send the entire contents
of a directory as an email message:

\begin{verbatim}
#!/usr/bin/env python

"""Send the contents of a directory as a MIME message.

Usage: dirmail [options] from to [to ...]*

Options:
    -h / --help
        Print this message and exit.

    -d directory
    --directory=directory
        Mail the contents of the specified directory, otherwise use the
        current directory.  Only the regular files in the directory are sent,
        and we don't recurse to subdirectories.

`from' is the email address of the sender of the message.

`to' is the email address of the recipient of the message, and multiple
recipients may be given.

The email is sent by forwarding to your local SMTP server, which then does the
normal delivery process.  Your local machine must be running an SMTP server.
"""

import sys
import os
import getopt
import smtplib
# For guessing MIME type based on file name extension
import mimetypes

from email import Encoders
from email.Message import Message
from email.MIMEAudio import MIMEAudio
from email.MIMEBase import MIMEBase
from email.MIMEImage import MIMEImage
from email.MIMEText import MIMEText

COMMASPACE = ', '


def usage(code, msg=''):
    print >> sys.stderr, __doc__
    if msg:
        print >> sys.stderr, msg
    sys.exit(code)


def main():
    try:
        opts, args = getopt.getopt(sys.argv[1:], 'hd:', ['help', 'directory='])
    except getopt.error, msg:
        usage(1, msg)

    dir = os.curdir
    for opt, arg in opts:
        if opt in ('-h', '--help'):
            usage(0)
        elif opt in ('-d', '--directory'):
            dir = arg

    if len(args) < 2:
        usage(1)

    sender = args[0]
    recips = args[1:]
    
    # Create the enclosing (outer) message
    outer = MIMEBase('multipart', 'mixed')
    outer['Subject'] = 'Contents of directory %s' % os.path.abspath(dir)
    outer['To'] = COMMASPACE.join(recips)
    outer['From'] = sender
    outer.preamble = 'You will not see this in a MIME-aware mail reader.\n'
    # To guarantee the message ends with a newline
    outer.epilogue = ''

    for filename in os.listdir(dir):
        path = os.path.join(dir, filename)
        if not os.path.isfile(path):
            continue
        # Guess the Content-Type: based on the file's extension.  Encoding
        # will be ignored, although we should check for simple things like
        # gzip'd or compressed files
        ctype, encoding = mimetypes.guess_type(path)
        if ctype is None or encoding is not None:
            # No guess could be made, or the file is encoded (compressed), so
            # use a generic bag-of-bits type.
            ctype = 'application/octet-stream'
        maintype, subtype = ctype.split('/', 1)
        if maintype == 'text':
            fp = open(path)
            # Note: we should handle calculating the charset
            msg = MIMEText(fp.read(), _subtype=subtype)
            fp.close()
        elif maintype == 'image':
            fp = open(path, 'rb')
            msg = MIMEImage(fp.read(), _subtype=subtype)
            fp.close()
        elif maintype == 'audio':
            fp = open(path, 'rb')
            msg = MIMEAudio(fp.read(), _subtype=subtype)
            fp.close()
        else:
            fp = open(path, 'rb')
            msg = MIMEBase(maintype, subtype)
            msg.add_payload(fp.read())
            fp.close()
            # Encode the payload using Base64
            Encoders.encode_base64(msg)
        # Set the filename parameter
        msg.add_header('Content-Disposition', 'attachment', filename=filename)
        outer.attach(msg)

    fp = open('/tmp/debug.pck', 'w')
    import cPickle
    cPickle.dump(outer, fp)
    fp.close()
    # Now send the message
    s = smtplib.SMTP()
    s.connect()
    s.sendmail(sender, recips, outer.as_string(0))
    s.close()


if __name__ == '__main__':
    main()
\end{verbatim}

And finally, here's an example of how to unpack a MIME message like
the one above, into a directory of files:

\begin{verbatim}
#!/usr/bin/env python

"""Unpack a MIME message into a directory of files.

Usage: unpackmail [options] msgfile

Options:
    -h / --help
        Print this message and exit.

    -d directory
    --directory=directory
        Unpack the MIME message into the named directory, which will be
        created if it doesn't already exist.

msgfile is the path to the file containing the MIME message.
"""

import sys
import os
import getopt
import errno
import mimetypes
import email


def usage(code, msg=''):
    print >> sys.stderr, __doc__
    if msg:
        print >> sys.stderr, msg
    sys.exit(code)


def main():
    try:
        opts, args = getopt.getopt(sys.argv[1:], 'hd:', ['help', 'directory='])
    except getopt.error, msg:
        usage(1, msg)

    dir = os.curdir
    for opt, arg in opts:
        if opt in ('-h', '--help'):
            usage(0)
        elif opt in ('-d', '--directory'):
            dir = arg

    try:
        msgfile = args[0]
    except IndexError:
        usage(1)

    try:
        os.mkdir(dir)
    except OSError, e:
        # Ignore directory exists error
        if e.errno <> errno.EEXIST: raise

    fp = open(msgfile)
    msg = email.message_from_file(fp)
    fp.close()

    counter = 1
    for part in msg.walk():
        # multipart/* are just containers
        if part.get_main_type() == 'multipart':
            continue
        # Applications should really sanitize the given filename so that an
        # email message can't be used to overwrite important files
        filename = part.get_filename()
        if not filename:
            ext = mimetypes.guess_extension(part.get_type())
            if not ext:
                # Use a generic bag-of-bits extension
                ext = '.bin'
            filename = 'part-%03d%s' % (counter, ext)
        counter += 1
        fp = open(os.path.join(dir, filename), 'wb')
        fp.write(part.get_payload(decode=1))
        fp.close()


if __name__ == '__main__':
    main()
\end{verbatim}