From 5b70f66bf61717ff286d6da959d05c43514a1d14 Mon Sep 17 00:00:00 2001 From: cvs2svn Date: Fri, 4 Oct 2002 17:24:24 +0000 Subject: This commit was manufactured by cvs2svn to create branch 'release22-maint'. --- Doc/lib/email-dir.py | 123 +++ Doc/lib/email-mime.py | 34 + Doc/lib/email-simple.py | 25 + Doc/lib/email-unpack.py | 83 ++ Doc/lib/emailcharsets.tex | 240 +++++ Doc/lib/emailheaders.tex | 167 +++ Doc/lib/emailmimebase.tex | 159 +++ Lib/email/Charset.py | 355 +++++++ Lib/email/Header.py | 401 +++++++ Lib/email/MIMEMultipart.py | 37 + Lib/email/MIMENonMultipart.py | 24 + Lib/email/base64MIME.py | 187 ++++ Lib/email/quopriMIME.py | 318 ++++++ Lib/email/test/data/msg_32.txt | 14 + Lib/email/test/data/msg_33.txt | 29 + Lib/email/test/test_email.py | 2280 ++++++++++++++++++++++++++++++++++++++++ 16 files changed, 4476 insertions(+) create mode 100644 Doc/lib/email-dir.py create mode 100644 Doc/lib/email-mime.py create mode 100644 Doc/lib/email-simple.py create mode 100644 Doc/lib/email-unpack.py create mode 100644 Doc/lib/emailcharsets.tex create mode 100644 Doc/lib/emailheaders.tex create mode 100644 Doc/lib/emailmimebase.tex create mode 100644 Lib/email/Charset.py create mode 100644 Lib/email/Header.py create mode 100644 Lib/email/MIMEMultipart.py create mode 100644 Lib/email/MIMENonMultipart.py create mode 100644 Lib/email/base64MIME.py create mode 100644 Lib/email/quopriMIME.py create mode 100644 Lib/email/test/data/msg_32.txt create mode 100644 Lib/email/test/data/msg_33.txt create mode 100644 Lib/email/test/test_email.py diff --git a/Doc/lib/email-dir.py b/Doc/lib/email-dir.py new file mode 100644 index 0000000..aa3b5e5 --- /dev/null +++ b/Doc/lib/email-dir.py @@ -0,0 +1,123 @@ +#!/usr/bin/env python + +"""Send the contents of a directory as a MIME message. + +Usage: dirmail [options] from to [to ...]* + +Options: + -h / --help + Print this message and exit. + + -d directory + --directory=directory + Mail the contents of the specified directory, otherwise use the + current directory. Only the regular files in the directory are sent, + and we don't recurse to subdirectories. + +`from' is the email address of the sender of the message. + +`to' is the email address of the recipient of the message, and multiple +recipients may be given. + +The email is sent by forwarding to your local SMTP server, which then does the +normal delivery process. Your local machine must be running an SMTP server. +""" + +import sys +import os +import getopt +import smtplib +# For guessing MIME type based on file name extension +import mimetypes + +from email import Encoders +from email.Message import Message +from email.MIMEAudio import MIMEAudio +from email.MIMEMultipart import MIMEMultipart +from email.MIMEImage import MIMEImage +from email.MIMEText import MIMEText + +COMMASPACE = ', ' + + +def usage(code, msg=''): + print >> sys.stderr, __doc__ + if msg: + print >> sys.stderr, msg + sys.exit(code) + + +def main(): + try: + opts, args = getopt.getopt(sys.argv[1:], 'hd:', ['help', 'directory=']) + except getopt.error, msg: + usage(1, msg) + + dir = os.curdir + for opt, arg in opts: + if opt in ('-h', '--help'): + usage(0) + elif opt in ('-d', '--directory'): + dir = arg + + if len(args) < 2: + usage(1) + + sender = args[0] + recips = args[1:] + + # Create the enclosing (outer) message + outer = MIMEMultipart() + outer['Subject'] = 'Contents of directory %s' % os.path.abspath(dir) + outer['To'] = COMMASPACE.join(recips) + outer['From'] = sender + outer.preamble = 'You will not see this in a MIME-aware mail reader.\n' + # To guarantee the message ends with a newline + outer.epilogue = '' + + for filename in os.listdir(dir): + path = os.path.join(dir, filename) + if not os.path.isfile(path): + continue + # Guess the content type based on the file's extension. Encoding + # will be ignored, although we should check for simple things like + # gzip'd or compressed files. + ctype, encoding = mimetypes.guess_type(path) + if ctype is None or encoding is not None: + # No guess could be made, or the file is encoded (compressed), so + # use a generic bag-of-bits type. + ctype = 'application/octet-stream' + maintype, subtype = ctype.split('/', 1) + if maintype == 'text': + fp = open(path) + # Note: we should handle calculating the charset + msg = MIMEText(fp.read(), _subtype=subtype) + fp.close() + elif maintype == 'image': + fp = open(path, 'rb') + msg = MIMEImage(fp.read(), _subtype=subtype) + fp.close() + elif maintype == 'audio': + fp = open(path, 'rb') + msg = MIMEAudio(fp.read(), _subtype=subtype) + fp.close() + else: + fp = open(path, 'rb') + msg = MIMEBase(maintype, subtype) + msg.set_payload(fp.read()) + fp.close() + # Encode the payload using Base64 + Encoders.encode_base64(msg) + # Set the filename parameter + msg.add_header('Content-Disposition', 'attachment', filename=filename) + outer.attach(msg) + + # Now send the message + s = smtplib.SMTP() + s.connect() + s.sendmail(sender, recips, outer.as_string()) + s.close() + + +if __name__ == '__main__': + main() diff --git a/Doc/lib/email-mime.py b/Doc/lib/email-mime.py new file mode 100644 index 0000000..28c8d2e --- /dev/null +++ b/Doc/lib/email-mime.py @@ -0,0 +1,34 @@ +# Import smtplib for the actual sending function +import smtplib + +# Here are the email pacakge modules we'll need +from email.MIMEImage import MIMEImage +from email.MIMEMultipart import MIMEMultipart + +COMMASPACE = ', ' + +# Create the container (outer) email message. +msg = MIMEMultipart() +msg['Subject'] = 'Our family reunion' +# me == the sender's email address +# family = the list of all recipients' email addresses +msg['From'] = me +msg['To'] = COMMASPACE.join(family) +msg.preamble = 'Our family reunion' +# Guarantees the message ends in a newline +msg.epilogue = '' + +# Assume we know that the image files are all in PNG format +for file in pngfiles: + # Open the files in binary mode. Let the MIMEImage class automatically + # guess the specific image type. + fp = open(file, 'rb') + img = MIMEImage(fp.read()) + fp.close() + msg.attach(img) + +# Send the email via our own SMTP server. +s = smtplib.SMTP() +s.connect() +s.sendmail(me, family, msg.as_string()) +s.close() diff --git a/Doc/lib/email-simple.py b/Doc/lib/email-simple.py new file mode 100644 index 0000000..a445f1b --- /dev/null +++ b/Doc/lib/email-simple.py @@ -0,0 +1,25 @@ +# Import smtplib for the actual sending function +import smtplib + +# Import the email modules we'll need +from email.MIMEText import MIMEText + +# Open a plain text file for reading. For this example, assume that +# the text file contains only ASCII characters. +fp = open(textfile, 'rb') +# Create a text/plain message +msg = MIMEText(fp.read()) +fp.close() + +# me == the sender's email address +# you == the recipient's email address +msg['Subject'] = 'The contents of %s' % textfile +msg['From'] = me +msg['To'] = you + +# Send the message via our own SMTP server, but don't include the +# envelope header. +s = smtplib.SMTP() +s.connect() +s.sendmail(me, [you], msg.as_string()) +s.close() diff --git a/Doc/lib/email-unpack.py b/Doc/lib/email-unpack.py new file mode 100644 index 0000000..b166fdb --- /dev/null +++ b/Doc/lib/email-unpack.py @@ -0,0 +1,83 @@ +#!/usr/bin/env python + +"""Unpack a MIME message into a directory of files. + +Usage: unpackmail [options] msgfile + +Options: + -h / --help + Print this message and exit. + + -d directory + --directory=directory + Unpack the MIME message into the named directory, which will be + created if it doesn't already exist. + +msgfile is the path to the file containing the MIME message. +""" + +import sys +import os +import getopt +import errno +import mimetypes +import email + + +def usage(code, msg=''): + print >> sys.stderr, __doc__ + if msg: + print >> sys.stderr, msg + sys.exit(code) + + +def main(): + try: + opts, args = getopt.getopt(sys.argv[1:], 'hd:', ['help', 'directory=']) + except getopt.error, msg: + usage(1, msg) + + dir = os.curdir + for opt, arg in opts: + if opt in ('-h', '--help'): + usage(0) + elif opt in ('-d', '--directory'): + dir = arg + + try: + msgfile = args[0] + except IndexError: + usage(1) + + try: + os.mkdir(dir) + except OSError, e: + # Ignore directory exists error + if e.errno <> errno.EEXIST: raise + + fp = open(msgfile) + msg = email.message_from_file(fp) + fp.close() + + counter = 1 + for part in msg.walk(): + # multipart/* are just containers + if part.get_content_maintype() == 'multipart': + continue + # Applications should really sanitize the given filename so that an + # email message can't be used to overwrite important files + filename = part.get_filename() + if not filename: + ext = mimetypes.guess_extension(part.get_type()) + if not ext: + # Use a generic bag-of-bits extension + ext = '.bin' + filename = 'part-%03d%s' % (counter, ext) + counter += 1 + fp = open(os.path.join(dir, filename), 'wb') + fp.write(part.get_payload(decode=1)) + fp.close() + + +if __name__ == '__main__': + main() diff --git a/Doc/lib/emailcharsets.tex b/Doc/lib/emailcharsets.tex new file mode 100644 index 0000000..d1ae728 --- /dev/null +++ b/Doc/lib/emailcharsets.tex @@ -0,0 +1,240 @@ +\declaremodule{standard}{email.Charset} +\modulesynopsis{Character Sets} + +This module provides a class \class{Charset} for representing +character sets and character set conversions in email messages, as +well as a character set registry and several convenience methods for +manipulating this registry. Instances of \class{Charset} are used in +several other modules within the \module{email} package. + +\versionadded{2.2.2} + +\begin{classdesc}{Charset}{\optional{input_charset}} +Map character sets to their email properties. + +This class provides information about the requirements imposed on +email for a specific character set. It also provides convenience +routines for converting between character sets, given the availability +of the applicable codecs. Given a character set, it will do its best +to provide information on how to use that character set in an email +message in an RFC-compliant way. + +Certain character sets must be encoded with quoted-printable or base64 +when used in email headers or bodies. Certain character sets must be +converted outright, and are not allowed in email. + +Optional \var{input_charset} is as described below. After being alias +normalized it is also used as a lookup into the registry of character +sets to find out the header encoding, body encoding, and output +conversion codec to be used for the character set. For example, if +\var{input_charset} is \code{iso-8859-1}, then headers and bodies will +be encoded using quoted-printable and no output conversion codec is +necessary. If \var{input_charset} is \code{euc-jp}, then headers will +be encoded with base64, bodies will not be encoded, but output text +will be converted from the \code{euc-jp} character set to the +\code{iso-2022-jp} character set. +\end{classdesc} + +\class{Charset} instances have the following data attributes: + +\begin{datadesc}{input_charset} +The initial character set specified. Common aliases are converted to +their \emph{official} email names (e.g. \code{latin_1} is converted to +\code{iso-8859-1}). Defaults to 7-bit \code{us-ascii}. +\end{datadesc} + +\begin{datadesc}{header_encoding} +If the character set must be encoded before it can be used in an +email header, this attribute will be set to \code{Charset.QP} (for +quoted-printable), \code{Charset.BASE64} (for base64 encoding), or +\code{Charset.SHORTEST} for the shortest of QP or BASE64 encoding. +Otherwise, it will be \code{None}. +\end{datadesc} + +\begin{datadesc}{body_encoding} +Same as \var{header_encoding}, but describes the encoding for the +mail message's body, which indeed may be different than the header +encoding. \code{Charset.SHORTEST} is not allowed for +\var{body_encoding}. +\end{datadesc} + +\begin{datadesc}{output_charset} +Some character sets must be converted before they can be used in +email headers or bodies. If the \var{input_charset} is one of +them, this attribute will contain the name of the character set +output will be converted to. Otherwise, it will be \code{None}. +\end{datadesc} + +\begin{datadesc}{input_codec} +The name of the Python codec used to convert the \var{input_charset} to +Unicode. If no conversion codec is necessary, this attribute will be +\code{None}. +\end{datadesc} + +\begin{datadesc}{output_codec} +The name of the Python codec used to convert Unicode to the +\var{output_charset}. If no conversion codec is necessary, this +attribute will have the same value as the \var{input_codec}. +\end{datadesc} + +\class{Charset} instances also have the following methods: + +\begin{methoddesc}[Charset]{get_body_encoding}{} +Return the content transfer encoding used for body encoding. + +This is either the string \samp{quoted-printable} or \samp{base64} +depending on the encoding used, or it is a function, in which case you +should call the function with a single argument, the Message object +being encoded. The function should then set the +\mailheader{Content-Transfer-Encoding} header itself to whatever is +appropriate. + +Returns the string \samp{quoted-printable} if +\var{body_encoding} is \code{QP}, returns the string +\samp{base64} if \var{body_encoding} is \code{BASE64}, and returns the +string \samp{7bit} otherwise. +\end{methoddesc} + +\begin{methoddesc}{convert}{s} +Convert the string \var{s} from the \var{input_codec} to the +\var{output_codec}. +\end{methoddesc} + +\begin{methoddesc}{to_splittable}{s} +Convert a possibly multibyte string to a safely splittable format. +\var{s} is the string to split. + +Uses the \var{input_codec} to try and convert the string to Unicode, +so it can be safely split on character boundaries (even for multibyte +characters). + +Returns the string as-is if it isn't known how to convert \var{s} to +Unicode with the \var{input_charset}. + +Characters that could not be converted to Unicode will be replaced +with the Unicode replacement character \character{U+FFFD}. +\end{methoddesc} + +\begin{methoddesc}{from_splittable}{ustr\optional{, to_output}} +Convert a splittable string back into an encoded string. \var{ustr} +is a Unicode string to ``unsplit''. + +This method uses the proper codec to try and convert the string from +Unicode back into an encoded format. Return the string as-is if it is +not Unicode, or if it could not be converted from Unicode. + +Characters that could not be converted from Unicode will be replaced +with an appropriate character (usually \character{?}). + +If \var{to_output} is \code{True} (the default), uses +\var{output_codec} to convert to an +encoded format. If \var{to_output} is \code{False}, it uses +\var{input_codec}. +\end{methoddesc} + +\begin{methoddesc}{get_output_charset}{} +Return the output character set. + +This is the \var{output_charset} attribute if that is not \code{None}, +otherwise it is \var{input_charset}. +\end{methoddesc} + +\begin{methoddesc}{encoded_header_len}{} +Return the length of the encoded header string, properly calculating +for quoted-printable or base64 encoding. +\end{methoddesc} + +\begin{methoddesc}{header_encode}{s\optional{, convert}} +Header-encode the string \var{s}. + +If \var{convert} is \code{True}, the string will be converted from the +input charset to the output charset automatically. This is not useful +for multibyte character sets, which have line length issues (multibyte +characters must be split on a character, not a byte boundary); use the +higher-level \class{Header} class to deal with these issues (see +\refmodule{email.Header}). \var{convert} defaults to \code{False}. + +The type of encoding (base64 or quoted-printable) will be based on +the \var{header_encoding} attribute. +\end{methoddesc} + +\begin{methoddesc}{body_encode}{s\optional{, convert}} +Body-encode the string \var{s}. + +If \var{convert} is \code{True} (the default), the string will be +converted from the input charset to output charset automatically. +Unlike \method{header_encode()}, there are no issues with byte +boundaries and multibyte charsets in email bodies, so this is usually +pretty safe. + +The type of encoding (base64 or quoted-printable) will be based on +the \var{body_encoding} attribute. +\end{methoddesc} + +The \class{Charset} class also provides a number of methods to support +standard operations and built-in functions. + +\begin{methoddesc}[Charset]{__str__}{} +Returns \var{input_charset} as a string coerced to lower case. +\end{methoddesc} + +\begin{methoddesc}[Charset]{__eq__}{other} +This method allows you to compare two \class{Charset} instances for equality. +\end{methoddesc} + +\begin{methoddesc}[Header]{__ne__}{other} +This method allows you to compare two \class{Charset} instances for inequality. +\end{methoddesc} + +The \module{email.Charset} module also provides the following +functions for adding new entries to the global character set, alias, +and codec registries: + +\begin{funcdesc}{add_charset}{charset\optional{, header_enc\optional{, + body_enc\optional{, output_charset}}}} +Add character properties to the global registry. + +\var{charset} is the input character set, and must be the canonical +name of a character set. + +Optional \var{header_enc} and \var{body_enc} is either +\code{Charset.QP} for quoted-printable, \code{Charset.BASE64} for +base64 encoding, \code{Charset.SHORTEST} for the shortest of +quoted-printable or base64 encoding, or \code{None} for no encoding. +\code{SHORTEST} is only valid for \var{header_enc}. The default is +\code{None} for no encoding. + +Optional \var{output_charset} is the character set that the output +should be in. Conversions will proceed from input charset, to +Unicode, to the output charset when the method +\method{Charset.convert()} is called. The default is to output in the +same character set as the input. + +Both \var{input_charset} and \var{output_charset} must have Unicode +codec entries in the module's character set-to-codec mapping; use +\function{add_codec()} to add codecs the module does +not know about. See the \refmodule{codecs} module's documentation for +more information. + +The global character set registry is kept in the module global +dictionary \code{CHARSETS}. +\end{funcdesc} + +\begin{funcdesc}{add_alias}{alias, canonical} +Add a character set alias. \var{alias} is the alias name, +e.g. \code{latin-1}. \var{canonical} is the character set's canonical +name, e.g. \code{iso-8859-1}. + +The global charset alias registry is kept in the module global +dictionary \code{ALIASES}. +\end{funcdesc} + +\begin{funcdesc}{add_codec}{charset, codecname} +Add a codec that map characters in the given character set to and from +Unicode. + +\var{charset} is the canonical name of a character set. +\var{codecname} is the name of a Python codec, as appropriate for the +second argument to the \function{unicode()} built-in, or to the +\method{encode()} method of a Unicode string. +\end{funcdesc} diff --git a/Doc/lib/emailheaders.tex b/Doc/lib/emailheaders.tex new file mode 100644 index 0000000..66eb716 --- /dev/null +++ b/Doc/lib/emailheaders.tex @@ -0,0 +1,167 @@ +\declaremodule{standard}{email.Header} +\modulesynopsis{Representing non-ASCII headers} + +\rfc{2822} is the base standard that describes the format of email +messages. It derives from the older \rfc{822} standard which came +into widespread use at a time when most email was composed of \ASCII{} +characters only. \rfc{2822} is a specification written assuming email +contains only 7-bit \ASCII{} characters. + +Of course, as email has been deployed worldwide, it has become +internationalized, such that language specific character sets can now +be used in email messages. The base standard still requires email +messages to be transfered using only 7-bit \ASCII{} characters, so a +slew of RFCs have been written describing how to encode email +containing non-\ASCII{} characters into \rfc{2822}-compliant format. +These RFCs include \rfc{2045}, \rfc{2046}, \rfc{2047}, and \rfc{2231}. +The \module{email} package supports these standards in its +\module{email.Header} and \module{email.Charset} modules. + +If you want to include non-\ASCII{} characters in your email headers, +say in the \mailheader{Subject} or \mailheader{To} fields, you should +use the \class{Header} class and assign the field in the +\class{Message} object to an instance of \class{Header} instead of +using a string for the header value. For example: + +\begin{verbatim} +>>> from email.Message import Message +>>> from email.Header import Header +>>> msg = Message() +>>> h = Header('p\xf6stal', 'iso-8859-1') +>>> msg['Subject'] = h +>>> print msg.as_string() +Subject: =?iso-8859-1?q?p=F6stal?= + + +\end{verbatim} + +Notice here how we wanted the \mailheader{Subject} field to contain a +non-\ASCII{} character? We did this by creating a \class{Header} +instance and passing in the character set that the byte string was +encoded in. When the subsequent \class{Message} instance was +flattened, the \mailheader{Subject} field was properly \rfc{2047} +encoded. MIME-aware mail readers would show this header using the +embedded ISO-8859-1 character. + +\versionadded{2.2.2} + +Here is the \class{Header} class description: + +\begin{classdesc}{Header}{\optional{s\optional{, charset\optional{, + maxlinelen\optional{, header_name\optional{, continuation_ws}}}}}} +Create a MIME-compliant header that can contain strings in different +character sets. + +Optional \var{s} is the initial header value. If \code{None} (the +default), the initial header value is not set. You can later append +to the header with \method{append()} method calls. \var{s} may be a +byte string or a Unicode string, but see the \method{append()} +documentation for semantics. + +Optional \var{charset} serves two purposes: it has the same meaning as +the \var{charset} argument to the \method{append()} method. It also +sets the default character set for all subsequent \method{append()} +calls that omit the \var{charset} argument. If \var{charset} is not +provided in the constructor (the default), the \code{us-ascii} +character set is used both as \var{s}'s initial charset and as the +default for subsequent \method{append()} calls. + +The maximum line length can be specified explicit via +\var{maxlinelen}. For splitting the first line to a shorter value (to +account for the field header which isn't included in \var{s}, +e.g. \mailheader{Subject}) pass in the name of the field in +\var{header_name}. The default \var{maxlinelen} is 76, and the +default value for \var{header_name} is \code{None}, meaning it is not +taken into account for the first line of a long, split header. + +Optional \var{continuation_ws} must be \rfc{2822}-compliant folding +whitespace, and is usually either a space or a hard tab character. +This character will be prepended to continuation lines. +\end{classdesc} + +\begin{methoddesc}[Header]{append}{s\optional{, charset}} +Append the string \var{s} to the MIME header. + +Optional \var{charset}, if given, should be a \class{Charset} instance +(see \refmodule{email.Charset}) or the name of a character set, which +will be converted to a \class{Charset} instance. A value of +\code{None} (the default) means that the \var{charset} given in the +constructor is used. + +\var{s} may be a byte string or a Unicode string. If it is a byte +string (i.e. \code{isinstance(s, str)} is true), then +\var{charset} is the encoding of that byte string, and a +\exception{UnicodeError} will be raised if the string cannot be +decoded with that character set. + +If \var{s} is a Unicode string, then \var{charset} is a hint +specifying the character set of the characters in the string. In this +case, when producing an \rfc{2822}-compliant header using \rfc{2047} +rules, the Unicode string will be encoded using the following charsets +in order: \code{us-ascii}, the \var{charset} hint, \code{utf-8}. The +first character set to not provoke a \exception{UnicodeError} is used. +\end{methoddesc} + +\begin{methoddesc}[Header]{encode}{} +Encode a message header into an RFC-compliant format, possibly +wrapping long lines and encapsulating non-\ASCII{} parts in base64 or +quoted-printable encodings. +\end{methoddesc} + +The \class{Header} class also provides a number of methods to support +standard operators and built-in functions. + +\begin{methoddesc}[Header]{__str__}{} +A synonym for \method{Header.encode()}. Useful for +\code{str(aHeader)}. +\end{methoddesc} + +\begin{methoddesc}[Header]{__unicode__}{} +A helper for the built-in \function{unicode()} function. Returns the +header as a Unicode string. +\end{methoddesc} + +\begin{methoddesc}[Header]{__eq__}{other} +This method allows you to compare two \class{Header} instances for equality. +\end{methoddesc} + +\begin{methoddesc}[Header]{__ne__}{other} +This method allows you to compare two \class{Header} instances for inequality. +\end{methoddesc} + +The \module{email.Header} module also provides the following +convenient functions. + +\begin{funcdesc}{decode_header}{header} +Decode a message header value without converting the character set. +The header value is in \var{header}. + +This function returns a list of \code{(decoded_string, charset)} pairs +containing each of the decoded parts of the header. \var{charset} is +\code{None} for non-encoded parts of the header, otherwise a lower +case string containing the name of the character set specified in the +encoded string. + +Here's an example: + +\begin{verbatim} +>>> from email.Header import decode_header +>>> decode_header('=?iso-8859-1?q?p=F6stal?=') +[('p\\xf6stal', 'iso-8859-1')] +\end{verbatim} +\end{funcdesc} + +\begin{funcdesc}{make_header}{decoded_seq\optional{, maxlinelen\optional{, + header_name\optional{, continuation_ws}}}} +Create a \class{Header} instance from a sequence of pairs as returned +by \function{decode_header()}. + +\function{decode_header()} takes a header value string and returns a +sequence of pairs of the format \code{(decoded_string, charset)} where +\var{charset} is the name of the character set. + +This function takes one of those sequence of pairs and returns a +\class{Header} instance. Optional \var{maxlinelen}, +\var{header_name}, and \var{continuation_ws} are as in the +\class{Header} constructor. +\end{funcdesc} diff --git a/Doc/lib/emailmimebase.tex b/Doc/lib/emailmimebase.tex new file mode 100644 index 0000000..6bbd5dd --- /dev/null +++ b/Doc/lib/emailmimebase.tex @@ -0,0 +1,159 @@ +Ordinarily, you get a message object structure by passing a file or +some text to a parser, which parses the text and returns the root +message object. However you can also build a complete message +structure from scratch, or even individual \class{Message} objects by +hand. In fact, you can also take an existing structure and add new +\class{Message} objects, move them around, etc. This makes a very +convenient interface for slicing-and-dicing MIME messages. + +You can create a new object structure by creating \class{Message} +instances, adding attachments and all the appropriate headers manually. +For MIME messages though, the \module{email} package provides some +convenient subclasses to make things easier. Each of these classes +should be imported from a module with the same name as the class, from +within the \module{email} package. E.g.: + +\begin{verbatim} +import email.MIMEImage.MIMEImage +\end{verbatim} + +or + +\begin{verbatim} +from email.MIMEText import MIMEText +\end{verbatim} + +Here are the classes: + +\begin{classdesc}{MIMEBase}{_maintype, _subtype, **_params} +This is the base class for all the MIME-specific subclasses of +\class{Message}. Ordinarily you won't create instances specifically +of \class{MIMEBase}, although you could. \class{MIMEBase} is provided +primarily as a convenient base class for more specific MIME-aware +subclasses. + +\var{_maintype} is the \mailheader{Content-Type} major type +(e.g. \mimetype{text} or \mimetype{image}), and \var{_subtype} is the +\mailheader{Content-Type} minor type +(e.g. \mimetype{plain} or \mimetype{gif}). \var{_params} is a parameter +key/value dictionary and is passed directly to +\method{Message.add_header()}. + +The \class{MIMEBase} class always adds a \mailheader{Content-Type} header +(based on \var{_maintype}, \var{_subtype}, and \var{_params}), and a +\mailheader{MIME-Version} header (always set to \code{1.0}). +\end{classdesc} + +\begin{classdesc}{MIMENonMultipart}{} +A subclass of \class{MIMEBase}, this is an intermediate base class for +MIME messages that are not \mimetype{multipart}. The primary purpose +of this class is to prevent the use of the \method{attach()} method, +which only makes sense for \mimetype{multipart} messages. If +\method{attach()} is called, a \exception{MultipartConversionError} +exception is raised. + +\versionadded{2.2.2} +\end{classdesc} + +\begin{classdesc}{MIMEMultipart}{\optional{subtype\optional{, + boundary\optional{, _subparts\optional{, _params}}}}} + +A subclass of \class{MIMEBase}, this is an intermediate base class for +MIME messages that are \mimetype{multipart}. Optional \var{_subtype} +defaults to \mimetype{mixed}, but can be used to specify the subtype +of the message. A \mailheader{Content-Type} header of +\mimetype{multipart/}\var{_subtype} will be added to the message +object. A \mailheader{MIME-Version} header will also be added. + +Optional \var{boundary} is the multipart boundary string. When +\code{None} (the default), the boundary is calculated when needed. + +\var{_subparts} is a sequence of initial subparts for the payload. It +must be possible to convert this sequence to a list. You can always +attach new subparts to the message by using the +\method{Message.attach()} method. + +Additional parameters for the \mailheader{Content-Type} header are +taken from the keyword arguments, or passed into the \var{_params} +argument, which is a keyword dictionary. + +\versionadded{2.2.2} +\end{classdesc} + +\begin{classdesc}{MIMEAudio}{_audiodata\optional{, _subtype\optional{, + _encoder\optional{, **_params}}}} + +A subclass of \class{MIMENonMultipart}, the \class{MIMEAudio} class +is used to create MIME message objects of major type \mimetype{audio}. +\var{_audiodata} is a string containing the raw audio data. If this +data can be decoded by the standard Python module \refmodule{sndhdr}, +then the subtype will be automatically included in the +\mailheader{Content-Type} header. Otherwise you can explicitly specify the +audio subtype via the \var{_subtype} parameter. If the minor type could +not be guessed and \var{_subtype} was not given, then \exception{TypeError} +is raised. + +Optional \var{_encoder} is a callable (i.e. function) which will +perform the actual encoding of the audio data for transport. This +callable takes one argument, which is the \class{MIMEAudio} instance. +It should use \method{get_payload()} and \method{set_payload()} to +change the payload to encoded form. It should also add any +\mailheader{Content-Transfer-Encoding} or other headers to the message +object as necessary. The default encoding is base64. See the +\refmodule{email.Encoders} module for a list of the built-in encoders. + +\var{_params} are passed straight through to the base class constructor. +\end{classdesc} + +\begin{classdesc}{MIMEImage}{_imagedata\optional{, _subtype\optional{, + _encoder\optional{, **_params}}}} + +A subclass of \class{MIMENonMultipart}, the \class{MIMEImage} class is +used to create MIME message objects of major type \mimetype{image}. +\var{_imagedata} is a string containing the raw image data. If this +data can be decoded by the standard Python module \refmodule{imghdr}, +then the subtype will be automatically included in the +\mailheader{Content-Type} header. Otherwise you can explicitly specify the +image subtype via the \var{_subtype} parameter. If the minor type could +not be guessed and \var{_subtype} was not given, then \exception{TypeError} +is raised. + +Optional \var{_encoder} is a callable (i.e. function) which will +perform the actual encoding of the image data for transport. This +callable takes one argument, which is the \class{MIMEImage} instance. +It should use \method{get_payload()} and \method{set_payload()} to +change the payload to encoded form. It should also add any +\mailheader{Content-Transfer-Encoding} or other headers to the message +object as necessary. The default encoding is base64. See the +\refmodule{email.Encoders} module for a list of the built-in encoders. + +\var{_params} are passed straight through to the \class{MIMEBase} +constructor. +\end{classdesc} + +\begin{classdesc}{MIMEMessage}{_msg\optional{, _subtype}} +A subclass of \class{MIMENonMultipart}, the \class{MIMEMessage} class +is used to create MIME objects of main type \mimetype{message}. +\var{_msg} is used as the payload, and must be an instance of class +\class{Message} (or a subclass thereof), otherwise a +\exception{TypeError} is raised. + +Optional \var{_subtype} sets the subtype of the message; it defaults +to \mimetype{rfc822}. +\end{classdesc} + +\begin{classdesc}{MIMEText}{_text\optional{, _subtype\optional{, + _charset\optional{, _encoder}}}} + +A subclass of \class{MIMENonMultipart}, the \class{MIMEText} class is +used to create MIME objects of major type \mimetype{text}. +\var{_text} is the string for the payload. \var{_subtype} is the +minor type and defaults to \mimetype{plain}. \var{_charset} is the +character set of the text and is passed as a parameter to the +\class{MIMENonMultipart} constructor; it defaults to \code{us-ascii}. No +guessing or encoding is performed on the text data, but a newline is +appended to \var{_text} if it doesn't already end with a newline. + +\deprecated{2.2.2}{The \var{_encoding} argument has been deprecated. +Encoding now happens implicitly based on the \var{_charset} argument.} +\end{classdesc} diff --git a/Lib/email/Charset.py b/Lib/email/Charset.py new file mode 100644 index 0000000..9a7e510 --- /dev/null +++ b/Lib/email/Charset.py @@ -0,0 +1,355 @@ +# Copyright (C) 2001,2002 Python Software Foundation +# Author: che@debian.org (Ben Gertzfield), barry@zope.com (Barry Warsaw) + +from types import UnicodeType +from email.Encoders import encode_7or8bit +import email.base64MIME +import email.quopriMIME + +def _isunicode(s): + return isinstance(s, UnicodeType) + +# Python 2.2.1 and beyond has these symbols +try: + True, False +except NameError: + True = 1 + False = 0 + + + +# Flags for types of header encodings +QP = 1 # Quoted-Printable +BASE64 = 2 # Base64 +SHORTEST = 3 # the shorter of QP and base64, but only for headers + +# In "=?charset?q?hello_world?=", the =?, ?q?, and ?= add up to 7 +MISC_LEN = 7 + +DEFAULT_CHARSET = 'us-ascii' + + + +# Defaults +CHARSETS = { + # input header enc body enc output conv + 'iso-8859-1': (QP, QP, None), + 'iso-8859-2': (QP, QP, None), + 'us-ascii': (None, None, None), + 'big5': (BASE64, BASE64, None), + 'gb2312': (BASE64, BASE64, None), + 'euc-jp': (BASE64, None, 'iso-2022-jp'), + 'shift_jis': (BASE64, None, 'iso-2022-jp'), + 'iso-2022-jp': (BASE64, None, None), + 'koi8-r': (BASE64, BASE64, None), + 'utf-8': (SHORTEST, BASE64, 'utf-8'), + } + +# Aliases for other commonly-used names for character sets. Map +# them to the real ones used in email. +ALIASES = { + 'latin_1': 'iso-8859-1', + 'latin-1': 'iso-8859-1', + 'ascii': 'us-ascii', + } + +# Map charsets to their Unicode codec strings. Note that the Japanese +# examples included below do not (yet) come with Python! They are available +# from http://pseudo.grad.sccs.chukyo-u.ac.jp/~kajiyama/python/ + +# The Chinese and Korean codecs are available from SourceForge: +# +# http://sourceforge.net/projects/python-codecs/ +# +# although you'll need to check them out of cvs since they haven't been file +# released yet. You might also try to use +# +# http://www.freshports.org/port-description.php3?port=6702 +# +# if you can get logged in. AFAICT, both the Chinese and Korean codecs are +# fairly experimental at this point. +CODEC_MAP = { + 'euc-jp': 'japanese.euc-jp', + 'iso-2022-jp': 'japanese.iso-2022-jp', + 'shift_jis': 'japanese.shift_jis', + 'gb2132': 'eucgb2312_cn', + 'big5': 'big5_tw', + 'utf-8': 'utf-8', + # Hack: We don't want *any* conversion for stuff marked us-ascii, as all + # sorts of garbage might be sent to us in the guise of 7-bit us-ascii. + # Let that stuff pass through without conversion to/from Unicode. + 'us-ascii': None, + } + + + +# Convenience functions for extending the above mappings +def add_charset(charset, header_enc=None, body_enc=None, output_charset=None): + """Add character set properties to the global registry. + + charset is the input character set, and must be the canonical name of a + character set. + + Optional header_enc and body_enc is either Charset.QP for + quoted-printable, Charset.BASE64 for base64 encoding, Charset.SHORTEST for + the shortest of qp or base64 encoding, or None for no encoding. SHORTEST + is only valid for header_enc. It describes how message headers and + message bodies in the input charset are to be encoded. Default is no + encoding. + + Optional output_charset is the character set that the output should be + in. Conversions will proceed from input charset, to Unicode, to the + output charset when the method Charset.convert() is called. The default + is to output in the same character set as the input. + + Both input_charset and output_charset must have Unicode codec entries in + the module's charset-to-codec mapping; use add_codec(charset, codecname) + to add codecs the module does not know about. See the codecs module's + documentation for more information. + """ + if body_enc == SHORTEST: + raise ValueError, 'SHORTEST not allowed for body_enc' + CHARSETS[charset] = (header_enc, body_enc, output_charset) + + +def add_alias(alias, canonical): + """Add a character set alias. + + alias is the alias name, e.g. latin-1 + canonical is the character set's canonical name, e.g. iso-8859-1 + """ + ALIASES[alias] = canonical + + +def add_codec(charset, codecname): + """Add a codec that map characters in the given charset to/from Unicode. + + charset is the canonical name of a character set. codecname is the name + of a Python codec, as appropriate for the second argument to the unicode() + built-in, or to the encode() method of a Unicode string. + """ + CODEC_MAP[charset] = codecname + + + +class Charset: + """Map character sets to their email properties. + + This class provides information about the requirements imposed on email + for a specific character set. It also provides convenience routines for + converting between character sets, given the availability of the + applicable codecs. Given a character set, it will do its best to provide + information on how to use that character set in an email in an + RFC-compliant way. + + Certain character sets must be encoded with quoted-printable or base64 + when used in email headers or bodies. Certain character sets must be + converted outright, and are not allowed in email. Instances of this + module expose the following information about a character set: + + input_charset: The initial character set specified. Common aliases + are converted to their `official' email names (e.g. latin_1 + is converted to iso-8859-1). Defaults to 7-bit us-ascii. + + header_encoding: If the character set must be encoded before it can be + used in an email header, this attribute will be set to + Charset.QP (for quoted-printable), Charset.BASE64 (for + base64 encoding), or Charset.SHORTEST for the shortest of + QP or BASE64 encoding. Otherwise, it will be None. + + body_encoding: Same as header_encoding, but describes the encoding for the + mail message's body, which indeed may be different than the + header encoding. Charset.SHORTEST is not allowed for + body_encoding. + + output_charset: Some character sets must be converted before the can be + used in email headers or bodies. If the input_charset is + one of them, this attribute will contain the name of the + charset output will be converted to. Otherwise, it will + be None. + + input_codec: The name of the Python codec used to convert the + input_charset to Unicode. If no conversion codec is + necessary, this attribute will be None. + + output_codec: The name of the Python codec used to convert Unicode + to the output_charset. If no conversion codec is necessary, + this attribute will have the same value as the input_codec. + """ + def __init__(self, input_charset=DEFAULT_CHARSET): + # Set the input charset after filtering through the aliases + self.input_charset = ALIASES.get(input_charset, input_charset) + # We can try to guess which encoding and conversion to use by the + # charset_map dictionary. Try that first, but let the user override + # it. + henc, benc, conv = CHARSETS.get(self.input_charset, + (SHORTEST, SHORTEST, None)) + # Set the attributes, allowing the arguments to override the default. + self.header_encoding = henc + self.body_encoding = benc + self.output_charset = ALIASES.get(conv, conv) + # Now set the codecs. If one isn't defined for input_charset, + # guess and try a Unicode codec with the same name as input_codec. + self.input_codec = CODEC_MAP.get(self.input_charset, + self.input_charset) + self.output_codec = CODEC_MAP.get(self.output_charset, + self.input_codec) + + def __str__(self): + return self.input_charset.lower() + + def __eq__(self, other): + return str(self) == str(other).lower() + + def __ne__(self, other): + return not self.__eq__(other) + + def get_body_encoding(self): + """Return the content-transfer-encoding used for body encoding. + + This is either the string `quoted-printable' or `base64' depending on + the encoding used, or it is a function in which case you should call + the function with a single argument, the Message object being + encoded. The function should then set the Content-Transfer-Encoding + header itself to whatever is appropriate. + + Returns "quoted-printable" if self.body_encoding is QP. + Returns "base64" if self.body_encoding is BASE64. + Returns "7bit" otherwise. + """ + assert self.body_encoding <> SHORTEST + if self.body_encoding == QP: + return 'quoted-printable' + elif self.body_encoding == BASE64: + return 'base64' + else: + return encode_7or8bit + + def convert(self, s): + """Convert a string from the input_codec to the output_codec.""" + if self.input_codec <> self.output_codec: + return unicode(s, self.input_codec).encode(self.output_codec) + else: + return s + + def to_splittable(self, s): + """Convert a possibly multibyte string to a safely splittable format. + + Uses the input_codec to try and convert the string to Unicode, so it + can be safely split on character boundaries (even for multibyte + characters). + + Returns the string as-is if it isn't known how to convert it to + Unicode with the input_charset. + + Characters that could not be converted to Unicode will be replaced + with the Unicode replacement character U+FFFD. + """ + if _isunicode(s) or self.input_codec is None: + return s + try: + return unicode(s, self.input_codec, 'replace') + except LookupError: + # Input codec not installed on system, so return the original + # string unchanged. + return s + + def from_splittable(self, ustr, to_output=True): + """Convert a splittable string back into an encoded string. + + Uses the proper codec to try and convert the string from Unicode back + into an encoded format. Return the string as-is if it is not Unicode, + or if it could not be converted from Unicode. + + Characters that could not be converted from Unicode will be replaced + with an appropriate character (usually '?'). + + If to_output is True (the default), uses output_codec to convert to an + encoded format. If to_output is False, uses input_codec. + """ + if to_output: + codec = self.output_codec + else: + codec = self.input_codec + if not _isunicode(ustr) or codec is None: + return ustr + try: + return ustr.encode(codec, 'replace') + except LookupError: + # Output codec not installed + return ustr + + def get_output_charset(self): + """Return the output character set. + + This is self.output_charset if that is not None, otherwise it is + self.input_charset. + """ + return self.output_charset or self.input_charset + + def encoded_header_len(self, s): + """Return the length of the encoded header string.""" + cset = self.get_output_charset() + # The len(s) of a 7bit encoding is len(s) + if self.header_encoding == BASE64: + return email.base64MIME.base64_len(s) + len(cset) + MISC_LEN + elif self.header_encoding == QP: + return email.quopriMIME.header_quopri_len(s) + len(cset) + MISC_LEN + elif self.header_encoding == SHORTEST: + lenb64 = email.base64MIME.base64_len(s) + lenqp = email.quopriMIME.header_quopri_len(s) + return min(lenb64, lenqp) + len(cset) + MISC_LEN + else: + return len(s) + + def header_encode(self, s, convert=False): + """Header-encode a string, optionally converting it to output_charset. + + If convert is True, the string will be converted from the input + charset to the output charset automatically. This is not useful for + multibyte character sets, which have line length issues (multibyte + characters must be split on a character, not a byte boundary); use the + high-level Header class to deal with these issues. convert defaults + to False. + + The type of encoding (base64 or quoted-printable) will be based on + self.header_encoding. + """ + cset = self.get_output_charset() + if convert: + s = self.convert(s) + # 7bit/8bit encodings return the string unchanged (modulo conversions) + if self.header_encoding == BASE64: + return email.base64MIME.header_encode(s, cset) + elif self.header_encoding == QP: + return email.quopriMIME.header_encode(s, cset) + elif self.header_encoding == SHORTEST: + lenb64 = email.base64MIME.base64_len(s) + lenqp = email.quopriMIME.header_quopri_len(s) + if lenb64 < lenqp: + return email.base64MIME.header_encode(s, cset) + else: + return email.quopriMIME.header_encode(s, cset) + else: + return s + + def body_encode(self, s, convert=True): + """Body-encode a string and convert it to output_charset. + + If convert is True (the default), the string will be converted from + the input charset to output charset automatically. Unlike + header_encode(), there are no issues with byte boundaries and + multibyte charsets in email bodies, so this is usually pretty safe. + + The type of encoding (base64 or quoted-printable) will be based on + self.body_encoding. + """ + if convert: + s = self.convert(s) + # 7bit/8bit encodings return the string unchanged (module conversions) + if self.body_encoding is BASE64: + return email.base64MIME.body_encode(s) + elif self.header_encoding is QP: + return email.quopriMIME.body_encode(s) + else: + return s diff --git a/Lib/email/Header.py b/Lib/email/Header.py new file mode 100644 index 0000000..a40226d --- /dev/null +++ b/Lib/email/Header.py @@ -0,0 +1,401 @@ +# Copyright (C) 2002 Python Software Foundation +# Author: che@debian.org (Ben Gertzfield), barry@zope.com (Barry Warsaw) + +"""Header encoding and decoding functionality.""" + +import re +from types import StringType, UnicodeType + +import email.quopriMIME +import email.base64MIME +from email.Charset import Charset + +try: + from email._compat22 import _floordiv +except SyntaxError: + # Python 2.1 spells integer division differently + from email._compat21 import _floordiv + +try: + True, False +except NameError: + True = 1 + False = 0 + +CRLFSPACE = '\r\n ' +CRLF = '\r\n' +NL = '\n' +SPACE8 = ' ' * 8 +EMPTYSTRING = '' + +MAXLINELEN = 76 + +ENCODE = 1 +DECODE = 2 + +USASCII = Charset('us-ascii') +UTF8 = Charset('utf-8') + +# Match encoded-word strings in the form =?charset?q?Hello_World?= +ecre = re.compile(r''' + =\? # literal =? + (?P[^?]*?) # non-greedy up to the next ? is the charset + \? # literal ? + (?P[qb]) # either a "q" or a "b", case insensitive + \? # literal ? + (?P.*?) # non-greedy up to the next ?= is the encoded string + \?= # literal ?= + ''', re.VERBOSE | re.IGNORECASE) + + + +# Helpers +_max_append = email.quopriMIME._max_append + + + +def decode_header(header): + """Decode a message header value without converting charset. + + Returns a list of (decoded_string, charset) pairs containing each of the + decoded parts of the header. Charset is None for non-encoded parts of the + header, otherwise a lower-case string containing the name of the character + set specified in the encoded string. + """ + # If no encoding, just return the header + header = str(header) + if not ecre.search(header): + return [(header, None)] + decoded = [] + dec = '' + for line in header.splitlines(): + # This line might not have an encoding in it + if not ecre.search(line): + decoded.append((line, None)) + continue + parts = ecre.split(line) + while parts: + unenc = parts.pop(0).strip() + if unenc: + # Should we continue a long line? + if decoded and decoded[-1][1] is None: + decoded[-1] = (decoded[-1][0] + dec, None) + else: + decoded.append((unenc, None)) + if parts: + charset, encoding = [s.lower() for s in parts[0:2]] + encoded = parts[2] + dec = '' + if encoding == 'q': + dec = email.quopriMIME.header_decode(encoded) + elif encoding == 'b': + dec = email.base64MIME.decode(encoded) + else: + dec = encoded + + if decoded and decoded[-1][1] == charset: + decoded[-1] = (decoded[-1][0] + dec, decoded[-1][1]) + else: + decoded.append((dec, charset)) + del parts[0:3] + return decoded + + + +def make_header(decoded_seq, maxlinelen=None, header_name=None, + continuation_ws=' '): + """Create a Header from a sequence of pairs as returned by decode_header() + + decode_header() takes a header value string and returns a sequence of + pairs of the format (decoded_string, charset) where charset is the string + name of the character set. + + This function takes one of those sequence of pairs and returns a Header + instance. Optional maxlinelen, header_name, and continuation_ws are as in + the Header constructor. + """ + h = Header(maxlinelen=maxlinelen, header_name=header_name, + continuation_ws=continuation_ws) + for s, charset in decoded_seq: + # None means us-ascii but we can simply pass it on to h.append() + if charset is not None and not isinstance(charset, Charset): + charset = Charset(charset) + h.append(s, charset) + return h + + + +class Header: + def __init__(self, s=None, charset=None, maxlinelen=None, header_name=None, + continuation_ws=' '): + """Create a MIME-compliant header that can contain many character sets. + + Optional s is the initial header value. If None, the initial header + value is not set. You can later append to the header with .append() + method calls. s may be a byte string or a Unicode string, but see the + .append() documentation for semantics. + + Optional charset serves two purposes: it has the same meaning as the + charset argument to the .append() method. It also sets the default + character set for all subsequent .append() calls that omit the charset + argument. If charset is not provided in the constructor, the us-ascii + charset is used both as s's initial charset and as the default for + subsequent .append() calls. + + The maximum line length can be specified explicit via maxlinelen. For + splitting the first line to a shorter value (to account for the field + header which isn't included in s, e.g. `Subject') pass in the name of + the field in header_name. The default maxlinelen is 76. + + continuation_ws must be RFC 2822 compliant folding whitespace (usually + either a space or a hard tab) which will be prepended to continuation + lines. + """ + if charset is None: + charset = USASCII + self._charset = charset + self._continuation_ws = continuation_ws + cws_expanded_len = len(continuation_ws.replace('\t', SPACE8)) + # BAW: I believe `chunks' and `maxlinelen' should be non-public. + self._chunks = [] + if s is not None: + self.append(s, charset) + if maxlinelen is None: + maxlinelen = MAXLINELEN + if header_name is None: + # We don't know anything about the field header so the first line + # is the same length as subsequent lines. + self._firstlinelen = maxlinelen + else: + # The first line should be shorter to take into account the field + # header. Also subtract off 2 extra for the colon and space. + self._firstlinelen = maxlinelen - len(header_name) - 2 + # Second and subsequent lines should subtract off the length in + # columns of the continuation whitespace prefix. + self._maxlinelen = maxlinelen - cws_expanded_len + + def __str__(self): + """A synonym for self.encode().""" + return self.encode() + + def __unicode__(self): + """Helper for the built-in unicode function.""" + # charset item is a Charset instance so we need to stringify it. + uchunks = [unicode(s, str(charset)) for s, charset in self._chunks] + return u''.join(uchunks) + + # Rich comparison operators for equality only. BAW: does it make sense to + # have or explicitly disable <, <=, >, >= operators? + def __eq__(self, other): + # other may be a Header or a string. Both are fine so coerce + # ourselves to a string, swap the args and do another comparison. + return other == self.encode() + + def __ne__(self, other): + return not self == other + + def append(self, s, charset=None): + """Append a string to the MIME header. + + Optional charset, if given, should be a Charset instance or the name + of a character set (which will be converted to a Charset instance). A + value of None (the default) means that the charset given in the + constructor is used. + + s may be a byte string or a Unicode string. If it is a byte string + (i.e. isinstance(s, StringType) is true), then charset is the encoding + of that byte string, and a UnicodeError will be raised if the string + cannot be decoded with that charset. If s is a Unicode string, then + charset is a hint specifying the character set of the characters in + the string. In this case, when producing an RFC 2822 compliant header + using RFC 2047 rules, the Unicode string will be encoded using the + following charsets in order: us-ascii, the charset hint, utf-8. The + first character set not to provoke a UnicodeError is used. + """ + if charset is None: + charset = self._charset + elif not isinstance(charset, Charset): + charset = Charset(charset) + # Normalize and check the string + if isinstance(s, StringType): + # Possibly raise UnicodeError if it can't e encoded + unicode(s, charset.get_output_charset()) + elif isinstance(s, UnicodeType): + # Convert Unicode to byte string for later concatenation + for charset in USASCII, charset, UTF8: + try: + s = s.encode(charset.get_output_charset()) + break + except UnicodeError: + pass + else: + assert False, 'Could not encode to utf-8' + self._chunks.append((s, charset)) + + def _split(self, s, charset, firstline=False): + # Split up a header safely for use with encode_chunks. BAW: this + # appears to be a private convenience method. + splittable = charset.to_splittable(s) + encoded = charset.from_splittable(splittable) + elen = charset.encoded_header_len(encoded) + + if elen <= self._maxlinelen: + return [(encoded, charset)] + # BAW: I'm not sure what the right test here is. What we're trying to + # do is be faithful to RFC 2822's recommendation that ($2.2.3): + # + # "Note: Though structured field bodies are defined in such a way that + # folding can take place between many of the lexical tokens (and even + # within some of the lexical tokens), folding SHOULD be limited to + # placing the CRLF at higher-level syntactic breaks." + # + # For now, I can only imagine doing this when the charset is us-ascii, + # although it's possible that other charsets may also benefit from the + # higher-level syntactic breaks. + # + elif charset == 'us-ascii': + return self._ascii_split(s, charset, firstline) + # BAW: should we use encoded? + elif elen == len(s): + # We can split on _maxlinelen boundaries because we know that the + # encoding won't change the size of the string + splitpnt = self._maxlinelen + first = charset.from_splittable(splittable[:splitpnt], False) + last = charset.from_splittable(splittable[splitpnt:], False) + else: + # Divide and conquer. + halfway = _floordiv(len(splittable), 2) + first = charset.from_splittable(splittable[:halfway], False) + last = charset.from_splittable(splittable[halfway:], False) + # Do the split + return self._split(first, charset, firstline) + \ + self._split(last, charset) + + def _ascii_split(self, s, charset, firstline): + # Attempt to split the line at the highest-level syntactic break + # possible. Note that we don't have a lot of smarts about field + # syntax; we just try to break on semi-colons, then whitespace. + rtn = [] + lines = s.splitlines() + while lines: + line = lines.pop(0) + if firstline: + maxlinelen = self._firstlinelen + firstline = False + else: + #line = line.lstrip() + maxlinelen = self._maxlinelen + # Short lines can remain unchanged + if len(line.replace('\t', SPACE8)) <= maxlinelen: + rtn.append(line) + else: + oldlen = len(line) + # Try to break the line on semicolons, but if that doesn't + # work, try to split on folding whitespace. + while len(line) > maxlinelen: + i = line.rfind(';', 0, maxlinelen) + if i < 0: + break + rtn.append(line[:i] + ';') + line = line[i+1:] + # Is the remaining stuff still longer than maxlinelen? + if len(line) <= maxlinelen: + # Splitting on semis worked + rtn.append(line) + continue + # Splitting on semis didn't finish the job. If it did any + # work at all, stick the remaining junk on the front of the + # `lines' sequence and let the next pass do its thing. + if len(line) <> oldlen: + lines.insert(0, line) + continue + # Otherwise, splitting on semis didn't help at all. + parts = re.split(r'(\s+)', line) + if len(parts) == 1 or (len(parts) == 3 and + parts[0].endswith(':')): + # This line can't be split on whitespace. There's now + # little we can do to get this into maxlinelen. BAW: + # We're still potentially breaking the RFC by possibly + # allowing lines longer than the absolute maximum of 998 + # characters. For now, let it slide. + # + # len(parts) will be 1 if this line has no `Field: ' + # prefix, otherwise it will be len(3). + rtn.append(line) + continue + # There is whitespace we can split on. + first = parts.pop(0) + sublines = [first] + acc = len(first) + while parts: + len0 = len(parts[0]) + len1 = len(parts[1]) + if acc + len0 + len1 <= maxlinelen: + sublines.append(parts.pop(0)) + sublines.append(parts.pop(0)) + acc += len0 + len1 + else: + # Split it here, but don't forget to ignore the + # next whitespace-only part + if first <> '': + rtn.append(EMPTYSTRING.join(sublines)) + del parts[0] + first = parts.pop(0) + sublines = [first] + acc = len(first) + rtn.append(EMPTYSTRING.join(sublines)) + return [(chunk, charset) for chunk in rtn] + + def _encode_chunks(self): + """MIME-encode a header with many different charsets and/or encodings. + + Given a list of pairs (string, charset), return a MIME-encoded string + suitable for use in a header field. Each pair may have different + charsets and/or encodings, and the resulting header will accurately + reflect each setting. + + Each encoding can be email.Utils.QP (quoted-printable, for ASCII-like + character sets like iso-8859-1), email.Utils.BASE64 (Base64, for + non-ASCII like character sets like KOI8-R and iso-2022-jp), or None + (no encoding). + + Each pair will be represented on a separate line; the resulting string + will be in the format: + + "=?charset1?q?Mar=EDa_Gonz=E1lez_Alonso?=\n + =?charset2?b?SvxyZ2VuIEL2aW5n?=" + """ + chunks = [] + for header, charset in self._chunks: + if charset is None or charset.header_encoding is None: + # There's no encoding for this chunk's charsets + _max_append(chunks, header, self._maxlinelen) + else: + _max_append(chunks, charset.header_encode(header), + self._maxlinelen, ' ') + joiner = NL + self._continuation_ws + return joiner.join(chunks) + + def encode(self): + """Encode a message header into an RFC-compliant format. + + There are many issues involved in converting a given string for use in + an email header. Only certain character sets are readable in most + email clients, and as header strings can only contain a subset of + 7-bit ASCII, care must be taken to properly convert and encode (with + Base64 or quoted-printable) header strings. In addition, there is a + 75-character length limit on any given encoded header field, so + line-wrapping must be performed, even with double-byte character sets. + + This method will do its best to convert the string to the correct + character set used in email, and encode and line wrap it safely with + the appropriate scheme for that character set. + + If the given charset is not known or an error occurs during + conversion, this function will return the header untouched. + """ + newchunks = [] + for s, charset in self._chunks: + newchunks += self._split(s, charset, True) + self._chunks = newchunks + return self._encode_chunks() diff --git a/Lib/email/MIMEMultipart.py b/Lib/email/MIMEMultipart.py new file mode 100644 index 0000000..16add2f --- /dev/null +++ b/Lib/email/MIMEMultipart.py @@ -0,0 +1,37 @@ +# Copyright (C) 2002 Python Software Foundation +# Author: barry@zope.com (Barry Warsaw) + +"""Base class for MIME multipart/* type messages. +""" + +from email import MIMEBase + + + +class MIMEMultipart(MIMEBase.MIMEBase): + """Base class for MIME multipart/* type messages.""" + + def __init__(self, _subtype='mixed', boundary=None, *_subparts, **_params): + """Creates a multipart/* type message. + + By default, creates a multipart/mixed message, with proper + Content-Type and MIME-Version headers. + + _subtype is the subtype of the multipart content type, defaulting to + `mixed'. + + boundary is the multipart boundary string. By default it is + calculated as needed. + + _subparts is a sequence of initial subparts for the payload. It + must be possible to convert this sequence to a list. You can always + attach new subparts to the message by using the attach() method. + + Additional parameters for the Content-Type header are taken from the + keyword arguments (or passed into the _params argument). + """ + MIMEBase.MIMEBase.__init__(self, 'multipart', _subtype, **_params) + if _subparts: + self.attach(*list(_subparts)) + if boundary: + self.set_boundary(boundary) diff --git a/Lib/email/MIMENonMultipart.py b/Lib/email/MIMENonMultipart.py new file mode 100644 index 0000000..1b3bcfd --- /dev/null +++ b/Lib/email/MIMENonMultipart.py @@ -0,0 +1,24 @@ +# Copyright (C) 2002 Python Software Foundation +# Author: barry@zope.com (Barry Warsaw) + +"""Base class for MIME type messages that are not multipart. +""" + +from email import Errors +from email import MIMEBase + + + +class MIMENonMultipart(MIMEBase.MIMEBase): + """Base class for MIME multipart/* type messages.""" + + __pychecker__ = 'unusednames=payload' + + def attach(self, payload): + # The public API prohibits attaching multiple subparts to MIMEBase + # derived subtypes since none of them are, by definition, of content + # type multipart/* + raise Errors.MultipartConversionError( + 'Cannot attach additional subparts to non-multipart/*') + + del __pychecker__ diff --git a/Lib/email/base64MIME.py b/Lib/email/base64MIME.py new file mode 100644 index 0000000..56e44e1 --- /dev/null +++ b/Lib/email/base64MIME.py @@ -0,0 +1,187 @@ +# Copyright (C) 2002 Python Software Foundation +# Author: che@debian.org (Ben Gertzfield) + +"""Base64 content transfer encoding per RFCs 2045-2047. + +This module handles the content transfer encoding method defined in RFC 2045 +to encode arbitrary 8-bit data using the three 8-bit bytes in four 7-bit +characters encoding known as Base64. + +It is used in the MIME standards for email to attach images, audio, and text +using some 8-bit character sets to messages. + +This module provides an interface to encode and decode both headers and bodies +with Base64 encoding. + +RFC 2045 defines a method for including character set information in an +`encoded-word' in a header. This method is commonly used for 8-bit real names +in To:, From:, Cc:, etc. fields, as well as Subject: lines. + +This module does not do the line wrapping or end-of-line character conversion +necessary for proper internationalized headers; it only does dumb encoding and +decoding. To deal with the various line wrapping issues, use the email.Header +module. +""" + +import re +from binascii import b2a_base64, a2b_base64 +from email.Utils import fix_eols + +try: + from email._compat22 import _floordiv +except SyntaxError: + # Python 2.1 spells integer division differently + from email._compat21 import _floordiv + + +CRLF = '\r\n' +NL = '\n' +EMPTYSTRING = '' + +# See also Charset.py +MISC_LEN = 7 + +try: + True, False +except NameError: + True = 1 + False = 0 + + + +# Helpers +def base64_len(s): + """Return the length of s when it is encoded with base64.""" + groups_of_3, leftover = divmod(len(s), 3) + # 4 bytes out for each 3 bytes (or nonzero fraction thereof) in. + # Thanks, Tim! + n = groups_of_3 * 4 + if leftover: + n += 4 + return n + + + +def header_encode(header, charset='iso-8859-1', keep_eols=False, + maxlinelen=76, eol=NL): + """Encode a single header line with Base64 encoding in a given charset. + + Defined in RFC 2045, this Base64 encoding is identical to normal Base64 + encoding, except that each line must be intelligently wrapped (respecting + the Base64 encoding), and subsequent lines must start with a space. + + charset names the character set to use to encode the header. It defaults + to iso-8859-1. + + End-of-line characters (\\r, \\n, \\r\\n) will be automatically converted + to the canonical email line separator \\r\\n unless the keep_eols + parameter is True (the default is False). + + Each line of the header will be terminated in the value of eol, which + defaults to "\\n". Set this to "\\r\\n" if you are using the result of + this function directly in email. + + The resulting string will be in the form: + + "=?charset?b?WW/5ciBtYXp66XLrIHf8eiBhIGhhbXBzdGHuciBBIFlv+XIgbWF6euly?=\\n + =?charset?b?6yB3/HogYSBoYW1wc3Rh7nIgQkMgWW/5ciBtYXp66XLrIHf8eiBhIGhh?=" + + with each line wrapped at, at most, maxlinelen characters (defaults to 76 + characters). + """ + # Return empty headers unchanged + if not header: + return header + + if not keep_eols: + header = fix_eols(header) + + # Base64 encode each line, in encoded chunks no greater than maxlinelen in + # length, after the RFC chrome is added in. + base64ed = [] + max_encoded = maxlinelen - len(charset) - MISC_LEN + max_unencoded = _floordiv(max_encoded * 3, 4) + + # BAW: Ben's original code used a step of max_unencoded, but I think it + # ought to be max_encoded. Otherwise, where's max_encoded used? I'm + # still not sure what the + for i in range(0, len(header), max_unencoded): + base64ed.append(b2a_base64(header[i:i+max_unencoded])) + + # Now add the RFC chrome to each encoded chunk + lines = [] + for line in base64ed: + # Ignore the last character of each line if it is a newline + if line.endswith(NL): + line = line[:-1] + # Add the chrome + lines.append('=?%s?b?%s?=' % (charset, line)) + # Glue the lines together and return it. BAW: should we be able to + # specify the leading whitespace in the joiner? + joiner = eol + ' ' + return joiner.join(lines) + + + +def encode(s, binary=True, maxlinelen=76, eol=NL): + """Encode a string with base64. + + Each line will be wrapped at, at most, maxlinelen characters (defaults to + 76 characters). + + If binary is False, end-of-line characters will be converted to the + canonical email end-of-line sequence \\r\\n. Otherwise they will be left + verbatim (this is the default). + + Each line of encoded text will end with eol, which defaults to "\\n". Set + this to "\r\n" if you will be using the result of this function directly + in an email. + """ + if not s: + return s + + if not binary: + s = fix_eols(s) + + encvec = [] + max_unencoded = _floordiv(maxlinelen * 3, 4) + for i in range(0, len(s), max_unencoded): + # BAW: should encode() inherit b2a_base64()'s dubious behavior in + # adding a newline to the encoded string? + enc = b2a_base64(s[i:i + max_unencoded]) + if enc.endswith(NL) and eol <> NL: + enc = enc[:-1] + eol + encvec.append(enc) + return EMPTYSTRING.join(encvec) + + +# For convenience and backwards compatibility w/ standard base64 module +body_encode = encode +encodestring = encode + + + +def decode(s, convert_eols=None): + """Decode a raw base64 string. + + If convert_eols is set to a string value, all canonical email linefeeds, + e.g. "\\r\\n", in the decoded text will be converted to the value of + convert_eols. os.linesep is a good choice for convert_eols if you are + decoding a text attachment. + + This function does not parse a full MIME header value encoded with + base64 (like =?iso-8895-1?b?bmloISBuaWgh?=) -- please use the high + level email.Header class for that functionality. + """ + if not s: + return s + + dec = a2b_base64(s) + if convert_eols: + return dec.replace(CRLF, convert_eols) + return dec + + +# For convenience and backwards compatibility w/ standard base64 module +body_decode = decode +decodestring = decode diff --git a/Lib/email/quopriMIME.py b/Lib/email/quopriMIME.py new file mode 100644 index 0000000..18ddd89 --- /dev/null +++ b/Lib/email/quopriMIME.py @@ -0,0 +1,318 @@ +# Copyright (C) 2001,2002 Python Software Foundation +# Author: che@debian.org (Ben Gertzfield) + +"""Quoted-printable content transfer encoding per RFCs 2045-2047. + +This module handles the content transfer encoding method defined in RFC 2045 +to encode US ASCII-like 8-bit data called `quoted-printable'. It is used to +safely encode text that is in a character set similar to the 7-bit US ASCII +character set, but that includes some 8-bit characters that are normally not +allowed in email bodies or headers. + +Quoted-printable is very space-inefficient for encoding binary files; use the +email.base64MIME module for that instead. + +This module provides an interface to encode and decode both headers and bodies +with quoted-printable encoding. + +RFC 2045 defines a method for including character set information in an +`encoded-word' in a header. This method is commonly used for 8-bit real names +in To:/From:/Cc: etc. fields, as well as Subject: lines. + +This module does not do the line wrapping or end-of-line character +conversion necessary for proper internationalized headers; it only +does dumb encoding and decoding. To deal with the various line +wrapping issues, use the email.Header module. +""" + +import re +from string import hexdigits +from email.Utils import fix_eols + +CRLF = '\r\n' +NL = '\n' + +# See also Charset.py +MISC_LEN = 7 + +hqre = re.compile(r'[^-a-zA-Z0-9!*+/ ]') +bqre = re.compile(r'[^ !-<>-~\t]') + +try: + True, False +except NameError: + True = 1 + False = 0 + + + +# Helpers +def header_quopri_check(c): + """Return True if the character should be escaped with header quopri.""" + return hqre.match(c) and True + + +def body_quopri_check(c): + """Return True if the character should be escaped with body quopri.""" + return bqre.match(c) and True + + +def header_quopri_len(s): + """Return the length of str when it is encoded with header quopri.""" + count = 0 + for c in s: + if hqre.match(c): + count += 3 + else: + count += 1 + return count + + +def body_quopri_len(str): + """Return the length of str when it is encoded with body quopri.""" + count = 0 + for c in str: + if bqre.match(c): + count += 3 + else: + count += 1 + return count + + +def _max_append(L, s, maxlen, extra=''): + if not L: + L.append(s.lstrip()) + elif len(L[-1]) + len(s) < maxlen: + L[-1] += extra + s + else: + L.append(s.lstrip()) + + +def unquote(s): + """Turn a string in the form =AB to the ASCII character with value 0xab""" + return chr(int(s[1:3], 16)) + + +def quote(c): + return "=%02X" % ord(c) + + + +def header_encode(header, charset="iso-8859-1", keep_eols=False, + maxlinelen=76, eol=NL): + """Encode a single header line with quoted-printable (like) encoding. + + Defined in RFC 2045, this `Q' encoding is similar to quoted-printable, but + used specifically for email header fields to allow charsets with mostly 7 + bit characters (and some 8 bit) to remain more or less readable in non-RFC + 2045 aware mail clients. + + charset names the character set to use to encode the header. It defaults + to iso-8859-1. + + The resulting string will be in the form: + + "=?charset?q?I_f=E2rt_in_your_g=E8n=E8ral_dire=E7tion?\\n + =?charset?q?Silly_=C8nglish_Kn=EEghts?=" + + with each line wrapped safely at, at most, maxlinelen characters (defaults + to 76 characters). + + End-of-line characters (\\r, \\n, \\r\\n) will be automatically converted + to the canonical email line separator \\r\\n unless the keep_eols + parameter is True (the default is False). + + Each line of the header will be terminated in the value of eol, which + defaults to "\\n". Set this to "\\r\\n" if you are using the result of + this function directly in email. + """ + # Return empty headers unchanged + if not header: + return header + + if not keep_eols: + header = fix_eols(header) + + # Quopri encode each line, in encoded chunks no greater than maxlinelen in + # lenght, after the RFC chrome is added in. + quoted = [] + max_encoded = maxlinelen - len(charset) - MISC_LEN + + for c in header: + # Space may be represented as _ instead of =20 for readability + if c == ' ': + _max_append(quoted, '_', max_encoded) + # These characters can be included verbatim + elif not hqre.match(c): + _max_append(quoted, c, max_encoded) + # Otherwise, replace with hex value like =E2 + else: + _max_append(quoted, "=%02X" % ord(c), max_encoded) + + # Now add the RFC chrome to each encoded chunk and glue the chunks + # together. BAW: should we be able to specify the leading whitespace in + # the joiner? + joiner = eol + ' ' + return joiner.join(['=?%s?q?%s?=' % (charset, line) for line in quoted]) + + + +def encode(body, binary=False, maxlinelen=76, eol=NL): + """Encode with quoted-printable, wrapping at maxlinelen characters. + + If binary is False (the default), end-of-line characters will be converted + to the canonical email end-of-line sequence \\r\\n. Otherwise they will + be left verbatim. + + Each line of encoded text will end with eol, which defaults to "\\n". Set + this to "\\r\\n" if you will be using the result of this function directly + in an email. + + Each line will be wrapped at, at most, maxlinelen characters (defaults to + 76 characters). Long lines will have the `soft linefeed' quoted-printable + character "=" appended to them, so the decoded text will be identical to + the original text. + """ + if not body: + return body + + if not binary: + body = fix_eols(body) + + # BAW: We're accumulating the body text by string concatenation. That + # can't be very efficient, but I don't have time now to rewrite it. It + # just feels like this algorithm could be more efficient. + encoded_body = '' + lineno = -1 + # Preserve line endings here so we can check later to see an eol needs to + # be added to the output later. + lines = body.splitlines(1) + for line in lines: + # But strip off line-endings for processing this line. + if line.endswith(CRLF): + line = line[:-2] + elif line[-1] in CRLF: + line = line[:-1] + + lineno += 1 + encoded_line = '' + prev = None + linelen = len(line) + # Now we need to examine every character to see if it needs to be + # quopri encoded. BAW: again, string concatenation is inefficient. + for j in range(linelen): + c = line[j] + prev = c + if bqre.match(c): + c = quote(c) + elif j+1 == linelen: + # Check for whitespace at end of line; special case + if c not in ' \t': + encoded_line += c + prev = c + continue + # Check to see to see if the line has reached its maximum length + if len(encoded_line) + len(c) >= maxlinelen: + encoded_body += encoded_line + '=' + eol + encoded_line = '' + encoded_line += c + # Now at end of line.. + if prev and prev in ' \t': + # Special case for whitespace at end of file + if lineno + 1 == len(lines): + prev = quote(prev) + if len(encoded_line) + len(prev) > maxlinelen: + encoded_body += encoded_line + '=' + eol + prev + else: + encoded_body += encoded_line + prev + # Just normal whitespace at end of line + else: + encoded_body += encoded_line + prev + '=' + eol + encoded_line = '' + # Now look at the line we just finished and it has a line ending, we + # need to add eol to the end of the line. + if lines[lineno].endswith(CRLF) or lines[lineno][-1] in CRLF: + encoded_body += encoded_line + eol + else: + encoded_body += encoded_line + encoded_line = '' + return encoded_body + + +# For convenience and backwards compatibility w/ standard base64 module +body_encode = encode +encodestring = encode + + + +# BAW: I'm not sure if the intent was for the signature of this function to be +# the same as base64MIME.decode() or not... +def decode(encoded, eol=NL): + """Decode a quoted-printable string. + + Lines are separated with eol, which defaults to \\n. + """ + if not encoded: + return encoded + # BAW: see comment in encode() above. Again, we're building up the + # decoded string with string concatenation, which could be done much more + # efficiently. + decoded = '' + + for line in encoded.splitlines(): + line = line.rstrip() + if not line: + decoded += eol + continue + + i = 0 + n = len(line) + while i < n: + c = line[i] + if c <> '=': + decoded += c + i += 1 + # Otherwise, c == "=". Are we at the end of the line? If so, add + # a soft line break. + elif i+1 == n: + i += 1 + continue + # Decode if in form =AB + elif i+2 < n and line[i+1] in hexdigits and line[i+2] in hexdigits: + decoded += unquote(line[i:i+3]) + i += 3 + # Otherwise, not in form =AB, pass literally + else: + decoded += c + i += 1 + + if i == n: + decoded += eol + # Special case if original string did not end with eol + if not encoded.endswith(eol) and decoded.endswith(eol): + decoded = decoded[:-1] + return decoded + + +# For convenience and backwards compatibility w/ standard base64 module +body_decode = decode +decodestring = decode + + + +def _unquote_match(match): + """Turn a match in the form =AB to the ASCII character with value 0xab""" + s = match.group(0) + return unquote(s) + + +# Header decoding is done a bit differently +def header_decode(s): + """Decode a string encoded with RFC 2045 MIME header `Q' encoding. + + This function does not parse a full MIME header value encoded with + quoted-printable (like =?iso-8895-1?q?Hello_World?=) -- please use + the high level email.Header class for that functionality. + """ + s = s.replace('_', ' ') + return re.sub(r'=\w{2}', _unquote_match, s) diff --git a/Lib/email/test/data/msg_32.txt b/Lib/email/test/data/msg_32.txt new file mode 100644 index 0000000..07ec5af --- /dev/null +++ b/Lib/email/test/data/msg_32.txt @@ -0,0 +1,14 @@ +Delivered-To: freebsd-isp@freebsd.org +Date: Tue, 26 Sep 2000 12:23:03 -0500 +From: Anne Person +To: Barney Dude +Subject: Re: Limiting Perl CPU Utilization... +Mime-Version: 1.0 +Content-Type: text/plain; charset*=ansi-x3.4-1968''us-ascii +Content-Disposition: inline +User-Agent: Mutt/1.3.8i +Sender: owner-freebsd-isp@FreeBSD.ORG +Precedence: bulk +X-Loop: FreeBSD.org + +Some message. diff --git a/Lib/email/test/data/msg_33.txt b/Lib/email/test/data/msg_33.txt new file mode 100644 index 0000000..042787a --- /dev/null +++ b/Lib/email/test/data/msg_33.txt @@ -0,0 +1,29 @@ +Delivered-To: freebsd-isp@freebsd.org +Date: Wed, 27 Sep 2000 11:11:09 -0500 +From: Anne Person +To: Barney Dude +Subject: Re: Limiting Perl CPU Utilization... +Mime-Version: 1.0 +Content-Type: multipart/signed; micalg*=ansi-x3.4-1968''pgp-md5; + protocol*=ansi-x3.4-1968''application%2Fpgp-signature; + boundary*="ansi-x3.4-1968''EeQfGwPcQSOJBaQU" +Content-Disposition: inline +Sender: owner-freebsd-isp@FreeBSD.ORG +Precedence: bulk +X-Loop: FreeBSD.org + + +--EeQfGwPcQSOJBaQU +Content-Type: text/plain; charset*=ansi-x3.4-1968''us-ascii +Content-Disposition: inline +Content-Transfer-Encoding: quoted-printable + +part 1 + +--EeQfGwPcQSOJBaQU +Content-Type: text/plain +Content-Disposition: inline + +part 2 + +--EeQfGwPcQSOJBaQU-- diff --git a/Lib/email/test/test_email.py b/Lib/email/test/test_email.py new file mode 100644 index 0000000..926c6bf --- /dev/null +++ b/Lib/email/test/test_email.py @@ -0,0 +1,2280 @@ +# Copyright (C) 2001,2002 Python Software Foundation +# email package unit tests + +import sys +import os +import time +import unittest +import base64 +import difflib +from cStringIO import StringIO +from types import StringType, ListType +import warnings + +import email + +from email.Charset import Charset +from email.Header import Header, decode_header, make_header +from email.Parser import Parser, HeaderParser +from email.Generator import Generator, DecodedGenerator +from email.Message import Message +from email.MIMEAudio import MIMEAudio +from email.MIMEText import MIMEText +from email.MIMEImage import MIMEImage +from email.MIMEBase import MIMEBase +from email.MIMEMessage import MIMEMessage +from email.MIMEMultipart import MIMEMultipart +from email import Utils +from email import Errors +from email import Encoders +from email import Iterators +from email import base64MIME +from email import quopriMIME + +from test.test_support import findfile, run_unittest +from email.test import __file__ as landmark + + +NL = '\n' +EMPTYSTRING = '' +SPACE = ' ' + +# We don't care about DeprecationWarnings +warnings.filterwarnings('ignore', '', DeprecationWarning, __name__) + + + +def openfile(filename): + path = os.path.join(os.path.dirname(landmark), 'data', filename) + return open(path, 'rb') + + + +# Base test class +class TestEmailBase(unittest.TestCase): + if hasattr(difflib, 'ndiff'): + # Python 2.2 and beyond + def ndiffAssertEqual(self, first, second): + """Like failUnlessEqual except use ndiff for readable output.""" + if first <> second: + sfirst = str(first) + ssecond = str(second) + diff = difflib.ndiff(sfirst.splitlines(), ssecond.splitlines()) + fp = StringIO() + print >> fp, NL, NL.join(diff) + raise self.failureException, fp.getvalue() + else: + # Python 2.1 + ndiffAssertEqual = unittest.TestCase.assertEqual + + def _msgobj(self, filename): + fp = openfile(findfile(filename)) + try: + msg = email.message_from_file(fp) + finally: + fp.close() + return msg + + + +# Test various aspects of the Message class's API +class TestMessageAPI(TestEmailBase): + def test_get_all(self): + eq = self.assertEqual + msg = self._msgobj('msg_20.txt') + eq(msg.get_all('cc'), ['ccc@zzz.org', 'ddd@zzz.org', 'eee@zzz.org']) + eq(msg.get_all('xx', 'n/a'), 'n/a') + + def test_getset_charset(self): + eq = self.assertEqual + msg = Message() + eq(msg.get_charset(), None) + charset = Charset('iso-8859-1') + msg.set_charset(charset) + eq(msg['mime-version'], '1.0') + eq(msg.get_type(), 'text/plain') + eq(msg['content-type'], 'text/plain; charset="iso-8859-1"') + eq(msg.get_param('charset'), 'iso-8859-1') + eq(msg['content-transfer-encoding'], 'quoted-printable') + eq(msg.get_charset().input_charset, 'iso-8859-1') + # Remove the charset + msg.set_charset(None) + eq(msg.get_charset(), None) + eq(msg['content-type'], 'text/plain') + # Try adding a charset when there's already MIME headers present + msg = Message() + msg['MIME-Version'] = '2.0' + msg['Content-Type'] = 'text/x-weird' + msg['Content-Transfer-Encoding'] = 'quinted-puntable' + msg.set_charset(charset) + eq(msg['mime-version'], '2.0') + eq(msg['content-type'], 'text/x-weird; charset="iso-8859-1"') + eq(msg['content-transfer-encoding'], 'quinted-puntable') + + def test_set_charset_from_string(self): + eq = self.assertEqual + msg = Message() + msg.set_charset('us-ascii') + eq(msg.get_charset().input_charset, 'us-ascii') + eq(msg['content-type'], 'text/plain; charset="us-ascii"') + + def test_set_payload_with_charset(self): + msg = Message() + charset = Charset('iso-8859-1') + msg.set_payload('This is a string payload', charset) + self.assertEqual(msg.get_charset().input_charset, 'iso-8859-1') + + def test_get_charsets(self): + eq = self.assertEqual + + msg = self._msgobj('msg_08.txt') + charsets = msg.get_charsets() + eq(charsets, [None, 'us-ascii', 'iso-8859-1', 'iso-8859-2', 'koi8-r']) + + msg = self._msgobj('msg_09.txt') + charsets = msg.get_charsets('dingbat') + eq(charsets, ['dingbat', 'us-ascii', 'iso-8859-1', 'dingbat', + 'koi8-r']) + + msg = self._msgobj('msg_12.txt') + charsets = msg.get_charsets() + eq(charsets, [None, 'us-ascii', 'iso-8859-1', None, 'iso-8859-2', + 'iso-8859-3', 'us-ascii', 'koi8-r']) + + def test_get_filename(self): + eq = self.assertEqual + + msg = self._msgobj('msg_04.txt') + filenames = [p.get_filename() for p in msg.get_payload()] + eq(filenames, ['msg.txt', 'msg.txt']) + + msg = self._msgobj('msg_07.txt') + subpart = msg.get_payload(1) + eq(subpart.get_filename(), 'dingusfish.gif') + + def test_get_boundary(self): + eq = self.assertEqual + msg = self._msgobj('msg_07.txt') + # No quotes! + eq(msg.get_boundary(), 'BOUNDARY') + + def test_set_boundary(self): + eq = self.assertEqual + # This one has no existing boundary parameter, but the Content-Type: + # header appears fifth. + msg = self._msgobj('msg_01.txt') + msg.set_boundary('BOUNDARY') + header, value = msg.items()[4] + eq(header.lower(), 'content-type') + eq(value, 'text/plain; charset="us-ascii"; boundary="BOUNDARY"') + # This one has a Content-Type: header, with a boundary, stuck in the + # middle of its headers. Make sure the order is preserved; it should + # be fifth. + msg = self._msgobj('msg_04.txt') + msg.set_boundary('BOUNDARY') + header, value = msg.items()[4] + eq(header.lower(), 'content-type') + eq(value, 'multipart/mixed; boundary="BOUNDARY"') + # And this one has no Content-Type: header at all. + msg = self._msgobj('msg_03.txt') + self.assertRaises(Errors.HeaderParseError, + msg.set_boundary, 'BOUNDARY') + + def test_get_decoded_payload(self): + eq = self.assertEqual + msg = self._msgobj('msg_10.txt') + # The outer message is a multipart + eq(msg.get_payload(decode=1), None) + # Subpart 1 is 7bit encoded + eq(msg.get_payload(0).get_payload(decode=1), + 'This is a 7bit encoded message.\n') + # Subpart 2 is quopri + eq(msg.get_payload(1).get_payload(decode=1), + '\xa1This is a Quoted Printable encoded message!\n') + # Subpart 3 is base64 + eq(msg.get_payload(2).get_payload(decode=1), + 'This is a Base64 encoded message.') + # Subpart 4 has no Content-Transfer-Encoding: header. + eq(msg.get_payload(3).get_payload(decode=1), + 'This has no Content-Transfer-Encoding: header.\n') + + def test_decoded_generator(self): + eq = self.assertEqual + msg = self._msgobj('msg_07.txt') + fp = openfile('msg_17.txt') + try: + text = fp.read() + finally: + fp.close() + s = StringIO() + g = DecodedGenerator(s) + g.flatten(msg) + eq(s.getvalue(), text) + + def test__contains__(self): + msg = Message() + msg['From'] = 'Me' + msg['to'] = 'You' + # Check for case insensitivity + self.failUnless('from' in msg) + self.failUnless('From' in msg) + self.failUnless('FROM' in msg) + self.failUnless('to' in msg) + self.failUnless('To' in msg) + self.failUnless('TO' in msg) + + def test_as_string(self): + eq = self.assertEqual + msg = self._msgobj('msg_01.txt') + fp = openfile('msg_01.txt') + try: + text = fp.read() + finally: + fp.close() + eq(text, msg.as_string()) + fullrepr = str(msg) + lines = fullrepr.split('\n') + self.failUnless(lines[0].startswith('From ')) + eq(text, NL.join(lines[1:])) + + def test_bad_param(self): + msg = email.message_from_string("Content-Type: blarg; baz; boo\n") + self.assertEqual(msg.get_param('baz'), '') + + def test_missing_filename(self): + msg = email.message_from_string("From: foo\n") + self.assertEqual(msg.get_filename(), None) + + def test_bogus_filename(self): + msg = email.message_from_string( + "Content-Disposition: blarg; filename\n") + self.assertEqual(msg.get_filename(), '') + + def test_missing_boundary(self): + msg = email.message_from_string("From: foo\n") + self.assertEqual(msg.get_boundary(), None) + + def test_get_params(self): + eq = self.assertEqual + msg = email.message_from_string( + 'X-Header: foo=one; bar=two; baz=three\n') + eq(msg.get_params(header='x-header'), + [('foo', 'one'), ('bar', 'two'), ('baz', 'three')]) + msg = email.message_from_string( + 'X-Header: foo; bar=one; baz=two\n') + eq(msg.get_params(header='x-header'), + [('foo', ''), ('bar', 'one'), ('baz', 'two')]) + eq(msg.get_params(), None) + msg = email.message_from_string( + 'X-Header: foo; bar="one"; baz=two\n') + eq(msg.get_params(header='x-header'), + [('foo', ''), ('bar', 'one'), ('baz', 'two')]) + + def test_get_param_liberal(self): + msg = Message() + msg['Content-Type'] = 'Content-Type: Multipart/mixed; boundary = "CPIMSSMTPC06p5f3tG"' + self.assertEqual(msg.get_param('boundary'), 'CPIMSSMTPC06p5f3tG') + + def test_get_param(self): + eq = self.assertEqual + msg = email.message_from_string( + "X-Header: foo=one; bar=two; baz=three\n") + eq(msg.get_param('bar', header='x-header'), 'two') + eq(msg.get_param('quuz', header='x-header'), None) + eq(msg.get_param('quuz'), None) + msg = email.message_from_string( + 'X-Header: foo; bar="one"; baz=two\n') + eq(msg.get_param('foo', header='x-header'), '') + eq(msg.get_param('bar', header='x-header'), 'one') + eq(msg.get_param('baz', header='x-header'), 'two') + # XXX: We are not RFC-2045 compliant! We cannot parse: + # msg["Content-Type"] = 'text/plain; weird="hey; dolly? [you] @ <\\"home\\">?"' + # msg.get_param("weird") + # yet. + + def test_get_param_funky_continuation_lines(self): + msg = self._msgobj('msg_22.txt') + self.assertEqual(msg.get_payload(1).get_param('name'), 'wibble.JPG') + + def test_has_key(self): + msg = email.message_from_string('Header: exists') + self.failUnless(msg.has_key('header')) + self.failUnless(msg.has_key('Header')) + self.failUnless(msg.has_key('HEADER')) + self.failIf(msg.has_key('headeri')) + + def test_set_param(self): + eq = self.assertEqual + msg = Message() + msg.set_param('charset', 'iso-2022-jp') + eq(msg.get_param('charset'), 'iso-2022-jp') + msg.set_param('importance', 'high value') + eq(msg.get_param('importance'), 'high value') + eq(msg.get_param('importance', unquote=0), '"high value"') + eq(msg.get_params(), [('text/plain', ''), + ('charset', 'iso-2022-jp'), + ('importance', 'high value')]) + eq(msg.get_params(unquote=0), [('text/plain', ''), + ('charset', '"iso-2022-jp"'), + ('importance', '"high value"')]) + msg.set_param('charset', 'iso-9999-xx', header='X-Jimmy') + eq(msg.get_param('charset', header='X-Jimmy'), 'iso-9999-xx') + + def test_del_param(self): + eq = self.assertEqual + msg = self._msgobj('msg_05.txt') + eq(msg.get_params(), + [('multipart/report', ''), ('report-type', 'delivery-status'), + ('boundary', 'D1690A7AC1.996856090/mail.example.com')]) + old_val = msg.get_param("report-type") + msg.del_param("report-type") + eq(msg.get_params(), + [('multipart/report', ''), + ('boundary', 'D1690A7AC1.996856090/mail.example.com')]) + msg.set_param("report-type", old_val) + eq(msg.get_params(), + [('multipart/report', ''), + ('boundary', 'D1690A7AC1.996856090/mail.example.com'), + ('report-type', old_val)]) + + def test_set_type(self): + eq = self.assertEqual + msg = Message() + self.assertRaises(ValueError, msg.set_type, 'text') + msg.set_type('text/plain') + eq(msg['content-type'], 'text/plain') + msg.set_param('charset', 'us-ascii') + eq(msg['content-type'], 'text/plain; charset="us-ascii"') + msg.set_type('text/html') + eq(msg['content-type'], 'text/html; charset="us-ascii"') + + def test_get_content_type_missing(self): + msg = Message() + self.assertEqual(msg.get_content_type(), 'text/plain') + + def test_get_content_type_missing_with_default_type(self): + msg = Message() + msg.set_default_type('message/rfc822') + self.assertEqual(msg.get_content_type(), 'message/rfc822') + + def test_get_content_type_from_message_implicit(self): + msg = self._msgobj('msg_30.txt') + self.assertEqual(msg.get_payload(0).get_content_type(), + 'message/rfc822') + + def test_get_content_type_from_message_explicit(self): + msg = self._msgobj('msg_28.txt') + self.assertEqual(msg.get_payload(0).get_content_type(), + 'message/rfc822') + + def test_get_content_type_from_message_text_plain_implicit(self): + msg = self._msgobj('msg_03.txt') + self.assertEqual(msg.get_content_type(), 'text/plain') + + def test_get_content_type_from_message_text_plain_explicit(self): + msg = self._msgobj('msg_01.txt') + self.assertEqual(msg.get_content_type(), 'text/plain') + + def test_get_content_maintype_missing(self): + msg = Message() + self.assertEqual(msg.get_content_maintype(), 'text') + + def test_get_content_maintype_missing_with_default_type(self): + msg = Message() + msg.set_default_type('message/rfc822') + self.assertEqual(msg.get_content_maintype(), 'message') + + def test_get_content_maintype_from_message_implicit(self): + msg = self._msgobj('msg_30.txt') + self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message') + + def test_get_content_maintype_from_message_explicit(self): + msg = self._msgobj('msg_28.txt') + self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message') + + def test_get_content_maintype_from_message_text_plain_implicit(self): + msg = self._msgobj('msg_03.txt') + self.assertEqual(msg.get_content_maintype(), 'text') + + def test_get_content_maintype_from_message_text_plain_explicit(self): + msg = self._msgobj('msg_01.txt') + self.assertEqual(msg.get_content_maintype(), 'text') + + def test_get_content_subtype_missing(self): + msg = Message() + self.assertEqual(msg.get_content_subtype(), 'plain') + + def test_get_content_subtype_missing_with_default_type(self): + msg = Message() + msg.set_default_type('message/rfc822') + self.assertEqual(msg.get_content_subtype(), 'rfc822') + + def test_get_content_subtype_from_message_implicit(self): + msg = self._msgobj('msg_30.txt') + self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822') + + def test_get_content_subtype_from_message_explicit(self): + msg = self._msgobj('msg_28.txt') + self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822') + + def test_get_content_subtype_from_message_text_plain_implicit(self): + msg = self._msgobj('msg_03.txt') + self.assertEqual(msg.get_content_subtype(), 'plain') + + def test_get_content_subtype_from_message_text_plain_explicit(self): + msg = self._msgobj('msg_01.txt') + self.assertEqual(msg.get_content_subtype(), 'plain') + + def test_get_content_maintype_error(self): + msg = Message() + msg['Content-Type'] = 'no-slash-in-this-string' + self.assertEqual(msg.get_content_maintype(), 'text') + + def test_get_content_subtype_error(self): + msg = Message() + msg['Content-Type'] = 'no-slash-in-this-string' + self.assertEqual(msg.get_content_subtype(), 'plain') + + def test_replace_header(self): + eq = self.assertEqual + msg = Message() + msg.add_header('First', 'One') + msg.add_header('Second', 'Two') + msg.add_header('Third', 'Three') + eq(msg.keys(), ['First', 'Second', 'Third']) + eq(msg.values(), ['One', 'Two', 'Three']) + msg.replace_header('Second', 'Twenty') + eq(msg.keys(), ['First', 'Second', 'Third']) + eq(msg.values(), ['One', 'Twenty', 'Three']) + msg.add_header('First', 'Eleven') + msg.replace_header('First', 'One Hundred') + eq(msg.keys(), ['First', 'Second', 'Third', 'First']) + eq(msg.values(), ['One Hundred', 'Twenty', 'Three', 'Eleven']) + self.assertRaises(KeyError, msg.replace_header, 'Fourth', 'Missing') + + + +# Test the email.Encoders module +class TestEncoders(unittest.TestCase): + def test_encode_noop(self): + eq = self.assertEqual + msg = MIMEText('hello world', _encoder=Encoders.encode_noop) + eq(msg.get_payload(), 'hello world\n') + + def test_encode_7bit(self): + eq = self.assertEqual + msg = MIMEText('hello world', _encoder=Encoders.encode_7or8bit) + eq(msg.get_payload(), 'hello world\n') + eq(msg['content-transfer-encoding'], '7bit') + msg = MIMEText('hello \x7f world', _encoder=Encoders.encode_7or8bit) + eq(msg.get_payload(), 'hello \x7f world\n') + eq(msg['content-transfer-encoding'], '7bit') + + def test_encode_8bit(self): + eq = self.assertEqual + msg = MIMEText('hello \x80 world', _encoder=Encoders.encode_7or8bit) + eq(msg.get_payload(), 'hello \x80 world\n') + eq(msg['content-transfer-encoding'], '8bit') + + def test_encode_empty_payload(self): + eq = self.assertEqual + msg = Message() + msg.set_charset('us-ascii') + eq(msg['content-transfer-encoding'], '7bit') + + def test_encode_base64(self): + eq = self.assertEqual + msg = MIMEText('hello world', _encoder=Encoders.encode_base64) + eq(msg.get_payload(), 'aGVsbG8gd29ybGQK\n') + eq(msg['content-transfer-encoding'], 'base64') + + def test_encode_quoted_printable(self): + eq = self.assertEqual + msg = MIMEText('hello world', _encoder=Encoders.encode_quopri) + eq(msg.get_payload(), 'hello=20world\n') + eq(msg['content-transfer-encoding'], 'quoted-printable') + + def test_default_cte(self): + eq = self.assertEqual + msg = MIMEText('hello world') + eq(msg['content-transfer-encoding'], '7bit') + + def test_default_cte(self): + eq = self.assertEqual + # With no explicit _charset its us-ascii, and all are 7-bit + msg = MIMEText('hello world') + eq(msg['content-transfer-encoding'], '7bit') + # Similar, but with 8-bit data + msg = MIMEText('hello \xf8 world') + eq(msg['content-transfer-encoding'], '8bit') + # And now with a different charset + msg = MIMEText('hello \xf8 world', _charset='iso-8859-1') + eq(msg['content-transfer-encoding'], 'quoted-printable') + + + +# Test long header wrapping +class TestLongHeaders(TestEmailBase): + def test_split_long_continuation(self): + eq = self.ndiffAssertEqual + msg = email.message_from_string("""\ +Subject: bug demonstration +\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789 +\tmore text + +test +""") + sfp = StringIO() + g = Generator(sfp) + g.flatten(msg) + eq(sfp.getvalue(), """\ +Subject: bug demonstration +\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789 +\tmore text + +test +""") + + def test_another_long_almost_unsplittable_header(self): + eq = self.ndiffAssertEqual + hstr = """\ +bug demonstration +\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789 +\tmore text""" + h = Header(hstr, continuation_ws='\t') + eq(h.encode(), """\ +bug demonstration +\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789 +\tmore text""") + h = Header(hstr) + eq(h.encode(), """\ +bug demonstration + 12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789 + more text""") + + def test_long_nonstring(self): + eq = self.ndiffAssertEqual + g = Charset("iso-8859-1") + cz = Charset("iso-8859-2") + utf8 = Charset("utf-8") + g_head = "Die Mieter treten hier ein werden mit einem Foerderband komfortabel den Korridor entlang, an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, gegen die rotierenden Klingen bef\xf6rdert. " + cz_head = "Finan\xe8ni metropole se hroutily pod tlakem jejich d\xf9vtipu.. " + utf8_head = u"\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066\u3044\u307e\u3059\u3002".encode("utf-8") + h = Header(g_head, g) + h.append(cz_head, cz) + h.append(utf8_head, utf8) + msg = Message() + msg['Subject'] = h + sfp = StringIO() + g = Generator(sfp) + g.flatten(msg) + eq(sfp.getvalue(), '''\ +Subject: =?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_eine?= + =?iso-8859-1?q?m_Foerderband_komfortabel_den_Korridor_ent?= + =?iso-8859-1?q?lang=2C_an_s=FCdl=FCndischen_Wandgem=E4lden_vorbei?= + =?iso-8859-1?q?=2C_gegen_die_rotierenden_Klingen_bef=F6rdert=2E_?= + =?iso-8859-2?q?Finan=E8ni_metropole_se_hroutil?= + =?iso-8859-2?q?y_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= + =?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv?= + =?utf-8?b?44GV44KM44Gm44GE44G+44Gb44KT44CC5LiA?= + =?utf-8?b?6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM?= + =?utf-8?b?44CB44GC44Go44Gv44Gn44Gf44KJ44KB44Gn?= + =?utf-8?b?44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGE=?= + =?utf-8?q?s_Nunstuck_git_und?= + =?utf-8?q?_Slotermeyer=3F_Ja!_Beiherhund_das_Ode?= + =?utf-8?q?r_die_Flipperwaldt?= + =?utf-8?b?IGdlcnNwdXQu44CN44Go6KiA44Gj44Gm44GE44G+44GZ44CC?= + +''') + eq(h.encode(), '''\ +=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_eine?= + =?iso-8859-1?q?m_Foerderband_komfortabel_den_Korridor_ent?= + =?iso-8859-1?q?lang=2C_an_s=FCdl=FCndischen_Wandgem=E4lden_vorbei?= + =?iso-8859-1?q?=2C_gegen_die_rotierenden_Klingen_bef=F6rdert=2E_?= + =?iso-8859-2?q?Finan=E8ni_metropole_se_hroutil?= + =?iso-8859-2?q?y_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= + =?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv?= + =?utf-8?b?44GV44KM44Gm44GE44G+44Gb44KT44CC5LiA?= + =?utf-8?b?6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM?= + =?utf-8?b?44CB44GC44Go44Gv44Gn44Gf44KJ44KB44Gn?= + =?utf-8?b?44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGE=?= + =?utf-8?q?s_Nunstuck_git_und?= + =?utf-8?q?_Slotermeyer=3F_Ja!_Beiherhund_das_Ode?= + =?utf-8?q?r_die_Flipperwaldt?= + =?utf-8?b?IGdlcnNwdXQu44CN44Go6KiA44Gj44Gm44GE44G+44GZ44CC?=''') + + def test_long_header_encode(self): + eq = self.ndiffAssertEqual + h = Header('wasnipoop; giraffes="very-long-necked-animals"; ' + 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"', + header_name='X-Foobar-Spoink-Defrobnit') + eq(h.encode(), '''\ +wasnipoop; giraffes="very-long-necked-animals"; + spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''') + + def test_long_header_encode_with_tab_continuation(self): + eq = self.ndiffAssertEqual + h = Header('wasnipoop; giraffes="very-long-necked-animals"; ' + 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"', + header_name='X-Foobar-Spoink-Defrobnit', + continuation_ws='\t') + eq(h.encode(), '''\ +wasnipoop; giraffes="very-long-necked-animals"; +\tspooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''') + + def test_header_splitter(self): + eq = self.ndiffAssertEqual + msg = MIMEText('') + # It'd be great if we could use add_header() here, but that doesn't + # guarantee an order of the parameters. + msg['X-Foobar-Spoink-Defrobnit'] = ( + 'wasnipoop; giraffes="very-long-necked-animals"; ' + 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"') + sfp = StringIO() + g = Generator(sfp) + g.flatten(msg) + eq(sfp.getvalue(), '''\ +Content-Type: text/plain; charset="us-ascii" +MIME-Version: 1.0 +Content-Transfer-Encoding: 7bit +X-Foobar-Spoink-Defrobnit: wasnipoop; giraffes="very-long-necked-animals"; +\tspooge="yummy"; hippos="gargantuan"; marshmallows="gooey" + +''') + + def test_no_semis_header_splitter(self): + eq = self.ndiffAssertEqual + msg = Message() + msg['From'] = 'test@dom.ain' + msg['References'] = SPACE.join(['<%d@dom.ain>' % i for i in range(10)]) + msg.set_payload('Test') + sfp = StringIO() + g = Generator(sfp) + g.flatten(msg) + eq(sfp.getvalue(), """\ +From: test@dom.ain +References: <0@dom.ain> <1@dom.ain> <2@dom.ain> <3@dom.ain> <4@dom.ain> +\t<5@dom.ain> <6@dom.ain> <7@dom.ain> <8@dom.ain> <9@dom.ain> + +Test""") + + def test_no_split_long_header(self): + eq = self.ndiffAssertEqual + hstr = 'References: ' + 'x' * 80 + h = Header(hstr, continuation_ws='\t') + eq(h.encode(), """\ +References: xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx""") + + def test_splitting_multiple_long_lines(self): + eq = self.ndiffAssertEqual + hstr = """\ +from babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for ; Sat, 2 Feb 2002 17:00:06 -0800 (PST) +\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for ; Sat, 2 Feb 2002 17:00:06 -0800 (PST) +\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for ; Sat, 2 Feb 2002 17:00:06 -0800 (PST) +""" + h = Header(hstr, continuation_ws='\t') + eq(h.encode(), """\ +from babylon.socal-raves.org (localhost [127.0.0.1]); +\tby babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; +\tfor ; +\tSat, 2 Feb 2002 17:00:06 -0800 (PST) +\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); +\tby babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; +\tfor ; +\tSat, 2 Feb 2002 17:00:06 -0800 (PST) +\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); +\tby babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; +\tfor ; +\tSat, 2 Feb 2002 17:00:06 -0800 (PST)""") + + def test_splitting_first_line_only_is_long(self): + eq = self.ndiffAssertEqual + hstr = """\ +from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93] helo=cthulhu.gerg.ca) +\tby kronos.mems-exchange.org with esmtp (Exim 4.05) +\tid 17k4h5-00034i-00 +\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400""" + h = Header(hstr, maxlinelen=78, header_name='Received', + continuation_ws='\t') + eq(h.encode(), """\ +from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93] +\thelo=cthulhu.gerg.ca) +\tby kronos.mems-exchange.org with esmtp (Exim 4.05) +\tid 17k4h5-00034i-00 +\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400""") + + + +# Test mangling of "From " lines in the body of a message +class TestFromMangling(unittest.TestCase): + def setUp(self): + self.msg = Message() + self.msg['From'] = 'aaa@bbb.org' + self.msg.set_payload("""\ +From the desk of A.A.A.: +Blah blah blah +""") + + def test_mangled_from(self): + s = StringIO() + g = Generator(s, mangle_from_=1) + g.flatten(self.msg) + self.assertEqual(s.getvalue(), """\ +From: aaa@bbb.org + +>From the desk of A.A.A.: +Blah blah blah +""") + + def test_dont_mangle_from(self): + s = StringIO() + g = Generator(s, mangle_from_=0) + g.flatten(self.msg) + self.assertEqual(s.getvalue(), """\ +From: aaa@bbb.org + +From the desk of A.A.A.: +Blah blah blah +""") + + + +# Test the basic MIMEAudio class +class TestMIMEAudio(unittest.TestCase): + def setUp(self): + # In Python, audiotest.au lives in Lib/test not Lib/test/data + fp = open(findfile('audiotest.au'), 'rb') + try: + self._audiodata = fp.read() + finally: + fp.close() + self._au = MIMEAudio(self._audiodata) + + def test_guess_minor_type(self): + self.assertEqual(self._au.get_type(), 'audio/basic') + + def test_encoding(self): + payload = self._au.get_payload() + self.assertEqual(base64.decodestring(payload), self._audiodata) + + def checkSetMinor(self): + au = MIMEAudio(self._audiodata, 'fish') + self.assertEqual(im.get_type(), 'audio/fish') + + def test_custom_encoder(self): + eq = self.assertEqual + def encoder(msg): + orig = msg.get_payload() + msg.set_payload(0) + msg['Content-Transfer-Encoding'] = 'broken64' + au = MIMEAudio(self._audiodata, _encoder=encoder) + eq(au.get_payload(), 0) + eq(au['content-transfer-encoding'], 'broken64') + + def test_add_header(self): + eq = self.assertEqual + unless = self.failUnless + self._au.add_header('Content-Disposition', 'attachment', + filename='audiotest.au') + eq(self._au['content-disposition'], + 'attachment; filename="audiotest.au"') + eq(self._au.get_params(header='content-disposition'), + [('attachment', ''), ('filename', 'audiotest.au')]) + eq(self._au.get_param('filename', header='content-disposition'), + 'audiotest.au') + missing = [] + eq(self._au.get_param('attachment', header='content-disposition'), '') + unless(self._au.get_param('foo', failobj=missing, + header='content-disposition') is missing) + # Try some missing stuff + unless(self._au.get_param('foobar', missing) is missing) + unless(self._au.get_param('attachment', missing, + header='foobar') is missing) + + + +# Test the basic MIMEImage class +class TestMIMEImage(unittest.TestCase): + def setUp(self): + fp = openfile('PyBanner048.gif') + try: + self._imgdata = fp.read() + finally: + fp.close() + self._im = MIMEImage(self._imgdata) + + def test_guess_minor_type(self): + self.assertEqual(self._im.get_type(), 'image/gif') + + def test_encoding(self): + payload = self._im.get_payload() + self.assertEqual(base64.decodestring(payload), self._imgdata) + + def checkSetMinor(self): + im = MIMEImage(self._imgdata, 'fish') + self.assertEqual(im.get_type(), 'image/fish') + + def test_custom_encoder(self): + eq = self.assertEqual + def encoder(msg): + orig = msg.get_payload() + msg.set_payload(0) + msg['Content-Transfer-Encoding'] = 'broken64' + im = MIMEImage(self._imgdata, _encoder=encoder) + eq(im.get_payload(), 0) + eq(im['content-transfer-encoding'], 'broken64') + + def test_add_header(self): + eq = self.assertEqual + unless = self.failUnless + self._im.add_header('Content-Disposition', 'attachment', + filename='dingusfish.gif') + eq(self._im['content-disposition'], + 'attachment; filename="dingusfish.gif"') + eq(self._im.get_params(header='content-disposition'), + [('attachment', ''), ('filename', 'dingusfish.gif')]) + eq(self._im.get_param('filename', header='content-disposition'), + 'dingusfish.gif') + missing = [] + eq(self._im.get_param('attachment', header='content-disposition'), '') + unless(self._im.get_param('foo', failobj=missing, + header='content-disposition') is missing) + # Try some missing stuff + unless(self._im.get_param('foobar', missing) is missing) + unless(self._im.get_param('attachment', missing, + header='foobar') is missing) + + + +# Test the basic MIMEText class +class TestMIMEText(unittest.TestCase): + def setUp(self): + self._msg = MIMEText('hello there') + + def test_types(self): + eq = self.assertEqual + unless = self.failUnless + eq(self._msg.get_type(), 'text/plain') + eq(self._msg.get_param('charset'), 'us-ascii') + missing = [] + unless(self._msg.get_param('foobar', missing) is missing) + unless(self._msg.get_param('charset', missing, header='foobar') + is missing) + + def test_payload(self): + self.assertEqual(self._msg.get_payload(), 'hello there\n') + self.failUnless(not self._msg.is_multipart()) + + def test_charset(self): + eq = self.assertEqual + msg = MIMEText('hello there', _charset='us-ascii') + eq(msg.get_charset().input_charset, 'us-ascii') + eq(msg['content-type'], 'text/plain; charset="us-ascii"') + + + +# Test a more complicated multipart/mixed type message +class TestMultipartMixed(unittest.TestCase): + def setUp(self): + fp = openfile('PyBanner048.gif') + try: + data = fp.read() + finally: + fp.close() + + container = MIMEBase('multipart', 'mixed', boundary='BOUNDARY') + image = MIMEImage(data, name='dingusfish.gif') + image.add_header('content-disposition', 'attachment', + filename='dingusfish.gif') + intro = MIMEText('''\ +Hi there, + +This is the dingus fish. +''') + container.attach(intro) + container.attach(image) + container['From'] = 'Barry ' + container['To'] = 'Dingus Lovers ' + container['Subject'] = 'Here is your dingus fish' + + now = 987809702.54848599 + timetuple = time.localtime(now) + if timetuple[-1] == 0: + tzsecs = time.timezone + else: + tzsecs = time.altzone + if tzsecs > 0: + sign = '-' + else: + sign = '+' + tzoffset = ' %s%04d' % (sign, tzsecs / 36) + container['Date'] = time.strftime( + '%a, %d %b %Y %H:%M:%S', + time.localtime(now)) + tzoffset + self._msg = container + self._im = image + self._txt = intro + + def test_hierarchy(self): + # convenience + eq = self.assertEqual + unless = self.failUnless + raises = self.assertRaises + # tests + m = self._msg + unless(m.is_multipart()) + eq(m.get_type(), 'multipart/mixed') + eq(len(m.get_payload()), 2) + raises(IndexError, m.get_payload, 2) + m0 = m.get_payload(0) + m1 = m.get_payload(1) + unless(m0 is self._txt) + unless(m1 is self._im) + eq(m.get_payload(), [m0, m1]) + unless(not m0.is_multipart()) + unless(not m1.is_multipart()) + + def test_no_parts_in_a_multipart(self): + outer = MIMEBase('multipart', 'mixed') + outer['Subject'] = 'A subject' + outer['To'] = 'aperson@dom.ain' + outer['From'] = 'bperson@dom.ain' + outer.preamble = '' + outer.epilogue = '' + outer.set_boundary('BOUNDARY') + msg = MIMEText('hello world') + self.assertEqual(outer.as_string(), '''\ +Content-Type: multipart/mixed; boundary="BOUNDARY" +MIME-Version: 1.0 +Subject: A subject +To: aperson@dom.ain +From: bperson@dom.ain + +--BOUNDARY + + +--BOUNDARY-- +''') + + def test_one_part_in_a_multipart(self): + outer = MIMEBase('multipart', 'mixed') + outer['Subject'] = 'A subject' + outer['To'] = 'aperson@dom.ain' + outer['From'] = 'bperson@dom.ain' + outer.preamble = '' + outer.epilogue = '' + outer.set_boundary('BOUNDARY') + msg = MIMEText('hello world') + outer.attach(msg) + self.assertEqual(outer.as_string(), '''\ +Content-Type: multipart/mixed; boundary="BOUNDARY" +MIME-Version: 1.0 +Subject: A subject +To: aperson@dom.ain +From: bperson@dom.ain + +--BOUNDARY +Content-Type: text/plain; charset="us-ascii" +MIME-Version: 1.0 +Content-Transfer-Encoding: 7bit + +hello world + +--BOUNDARY-- +''') + + def test_seq_parts_in_a_multipart(self): + outer = MIMEBase('multipart', 'mixed') + outer['Subject'] = 'A subject' + outer['To'] = 'aperson@dom.ain' + outer['From'] = 'bperson@dom.ain' + outer.preamble = '' + outer.epilogue = '' + msg = MIMEText('hello world') + outer.attach(msg) + outer.set_boundary('BOUNDARY') + self.assertEqual(outer.as_string(), '''\ +Content-Type: multipart/mixed; boundary="BOUNDARY" +MIME-Version: 1.0 +Subject: A subject +To: aperson@dom.ain +From: bperson@dom.ain + +--BOUNDARY +Content-Type: text/plain; charset="us-ascii" +MIME-Version: 1.0 +Content-Transfer-Encoding: 7bit + +hello world + +--BOUNDARY-- +''') + + + +# Test some badly formatted messages +class TestNonConformant(TestEmailBase): + def test_parse_missing_minor_type(self): + eq = self.assertEqual + msg = self._msgobj('msg_14.txt') + eq(msg.get_type(), 'text') + eq(msg.get_main_type(), None) + eq(msg.get_subtype(), None) + + def test_bogus_boundary(self): + fp = openfile(findfile('msg_15.txt')) + try: + data = fp.read() + finally: + fp.close() + p = Parser(strict=1) + # Note, under a future non-strict parsing mode, this would parse the + # message into the intended message tree. + self.assertRaises(Errors.BoundaryError, p.parsestr, data) + + def test_multipart_no_boundary(self): + fp = openfile(findfile('msg_25.txt')) + try: + self.assertRaises(Errors.BoundaryError, + email.message_from_file, fp) + finally: + fp.close() + + def test_invalid_content_type(self): + eq = self.assertEqual + neq = self.ndiffAssertEqual + msg = Message() + # RFC 2045, $5.2 says invalid yields text/plain + msg['Content-Type'] = 'text' + eq(msg.get_content_maintype(), 'text') + eq(msg.get_content_subtype(), 'plain') + eq(msg.get_content_type(), 'text/plain') + # Clear the old value and try something /really/ invalid + del msg['content-type'] + msg['Content-Type'] = 'foo' + eq(msg.get_content_maintype(), 'text') + eq(msg.get_content_subtype(), 'plain') + eq(msg.get_content_type(), 'text/plain') + # Still, make sure that the message is idempotently generated + s = StringIO() + g = Generator(s) + g.flatten(msg) + neq(s.getvalue(), 'Content-Type: foo\n\n') + + def test_no_start_boundary(self): + eq = self.ndiffAssertEqual + msg = self._msgobj('msg_31.txt') + eq(msg.get_payload(), """\ +--BOUNDARY +Content-Type: text/plain + +message 1 + +--BOUNDARY +Content-Type: text/plain + +message 2 + +--BOUNDARY-- +""") + + + +# Test RFC 2047 header encoding and decoding +class TestRFC2047(unittest.TestCase): + def test_iso_8859_1(self): + eq = self.assertEqual + s = '=?iso-8859-1?q?this=20is=20some=20text?=' + eq(Utils.decode(s), 'this is some text') + s = '=?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?=' + eq(Utils.decode(s), u'Keld J\xf8rn Simonsen') + s = '=?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?=' \ + '=?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?=' + eq(Utils.decode(s), 'If you can read this you understand the example.') + s = '=?iso-8859-8?b?7eXs+SDv4SDp7Oj08A==?=' + eq(Utils.decode(s), + u'\u05dd\u05d5\u05dc\u05e9 \u05df\u05d1 \u05d9\u05dc\u05d8\u05e4\u05e0') + s = '=?iso-8859-1?q?this=20is?= =?iso-8859-1?q?some=20text?=' + eq(Utils.decode(s), u'this issome text') + s = '=?iso-8859-1?q?this=20is_?= =?iso-8859-1?q?some=20text?=' + eq(Utils.decode(s), u'this is some text') + + def test_encode_header(self): + eq = self.assertEqual + s = 'this is some text' + eq(Utils.encode(s), '=?iso-8859-1?q?this=20is=20some=20text?=') + s = 'Keld_J\xf8rn_Simonsen' + eq(Utils.encode(s), '=?iso-8859-1?q?Keld_J=F8rn_Simonsen?=') + s1 = 'If you can read this yo' + s2 = 'u understand the example.' + eq(Utils.encode(s1, encoding='b'), + '=?iso-8859-1?b?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?=') + eq(Utils.encode(s2, charset='iso-8859-2', encoding='b'), + '=?iso-8859-2?b?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?=') + + + +# Test the MIMEMessage class +class TestMIMEMessage(TestEmailBase): + def setUp(self): + fp = openfile('msg_11.txt') + try: + self._text = fp.read() + finally: + fp.close() + + def test_type_error(self): + self.assertRaises(TypeError, MIMEMessage, 'a plain string') + + def test_valid_argument(self): + eq = self.assertEqual + unless = self.failUnless + subject = 'A sub-message' + m = Message() + m['Subject'] = subject + r = MIMEMessage(m) + eq(r.get_type(), 'message/rfc822') + payload = r.get_payload() + unless(type(payload), ListType) + eq(len(payload), 1) + subpart = payload[0] + unless(subpart is m) + eq(subpart['subject'], subject) + + def test_bad_multipart(self): + eq = self.assertEqual + msg1 = Message() + msg1['Subject'] = 'subpart 1' + msg2 = Message() + msg2['Subject'] = 'subpart 2' + r = MIMEMessage(msg1) + self.assertRaises(Errors.MultipartConversionError, r.attach, msg2) + + def test_generate(self): + # First craft the message to be encapsulated + m = Message() + m['Subject'] = 'An enclosed message' + m.set_payload('Here is the body of the message.\n') + r = MIMEMessage(m) + r['Subject'] = 'The enclosing message' + s = StringIO() + g = Generator(s) + g.flatten(r) + self.assertEqual(s.getvalue(), """\ +Content-Type: message/rfc822 +MIME-Version: 1.0 +Subject: The enclosing message + +Subject: An enclosed message + +Here is the body of the message. +""") + + def test_parse_message_rfc822(self): + eq = self.assertEqual + unless = self.failUnless + msg = self._msgobj('msg_11.txt') + eq(msg.get_type(), 'message/rfc822') + payload = msg.get_payload() + unless(isinstance(payload, ListType)) + eq(len(payload), 1) + submsg = payload[0] + self.failUnless(isinstance(submsg, Message)) + eq(submsg['subject'], 'An enclosed message') + eq(submsg.get_payload(), 'Here is the body of the message.\n') + + def test_dsn(self): + eq = self.assertEqual + unless = self.failUnless + # msg 16 is a Delivery Status Notification, see RFC 1894 + msg = self._msgobj('msg_16.txt') + eq(msg.get_type(), 'multipart/report') + unless(msg.is_multipart()) + eq(len(msg.get_payload()), 3) + # Subpart 1 is a text/plain, human readable section + subpart = msg.get_payload(0) + eq(subpart.get_type(), 'text/plain') + eq(subpart.get_payload(), """\ +This report relates to a message you sent with the following header fields: + + Message-id: <002001c144a6$8752e060$56104586@oxy.edu> + Date: Sun, 23 Sep 2001 20:10:55 -0700 + From: "Ian T. Henry" + To: SoCal Raves + Subject: [scr] yeah for Ians!! + +Your message cannot be delivered to the following recipients: + + Recipient address: jangel1@cougar.noc.ucla.edu + Reason: recipient reached disk quota + +""") + # Subpart 2 contains the machine parsable DSN information. It + # consists of two blocks of headers, represented by two nested Message + # objects. + subpart = msg.get_payload(1) + eq(subpart.get_type(), 'message/delivery-status') + eq(len(subpart.get_payload()), 2) + # message/delivery-status should treat each block as a bunch of + # headers, i.e. a bunch of Message objects. + dsn1 = subpart.get_payload(0) + unless(isinstance(dsn1, Message)) + eq(dsn1['original-envelope-id'], '0GK500B4HD0888@cougar.noc.ucla.edu') + eq(dsn1.get_param('dns', header='reporting-mta'), '') + # Try a missing one + eq(dsn1.get_param('nsd', header='reporting-mta'), None) + dsn2 = subpart.get_payload(1) + unless(isinstance(dsn2, Message)) + eq(dsn2['action'], 'failed') + eq(dsn2.get_params(header='original-recipient'), + [('rfc822', ''), ('jangel1@cougar.noc.ucla.edu', '')]) + eq(dsn2.get_param('rfc822', header='final-recipient'), '') + # Subpart 3 is the original message + subpart = msg.get_payload(2) + eq(subpart.get_type(), 'message/rfc822') + payload = subpart.get_payload() + unless(isinstance(payload, ListType)) + eq(len(payload), 1) + subsubpart = payload[0] + unless(isinstance(subsubpart, Message)) + eq(subsubpart.get_type(), 'text/plain') + eq(subsubpart['message-id'], + '<002001c144a6$8752e060$56104586@oxy.edu>') + + def test_epilogue(self): + fp = openfile('msg_21.txt') + try: + text = fp.read() + finally: + fp.close() + msg = Message() + msg['From'] = 'aperson@dom.ain' + msg['To'] = 'bperson@dom.ain' + msg['Subject'] = 'Test' + msg.preamble = 'MIME message\n' + msg.epilogue = 'End of MIME message\n' + msg1 = MIMEText('One') + msg2 = MIMEText('Two') + msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY') + msg.attach(msg1) + msg.attach(msg2) + sfp = StringIO() + g = Generator(sfp) + g.flatten(msg) + self.assertEqual(sfp.getvalue(), text) + + def test_default_type(self): + eq = self.assertEqual + fp = openfile('msg_30.txt') + try: + msg = email.message_from_file(fp) + finally: + fp.close() + container1 = msg.get_payload(0) + eq(container1.get_default_type(), 'message/rfc822') + eq(container1.get_type(), None) + container2 = msg.get_payload(1) + eq(container2.get_default_type(), 'message/rfc822') + eq(container2.get_type(), None) + container1a = container1.get_payload(0) + eq(container1a.get_default_type(), 'text/plain') + eq(container1a.get_type(), 'text/plain') + container2a = container2.get_payload(0) + eq(container2a.get_default_type(), 'text/plain') + eq(container2a.get_type(), 'text/plain') + + def test_default_type_with_explicit_container_type(self): + eq = self.assertEqual + fp = openfile('msg_28.txt') + try: + msg = email.message_from_file(fp) + finally: + fp.close() + container1 = msg.get_payload(0) + eq(container1.get_default_type(), 'message/rfc822') + eq(container1.get_type(), 'message/rfc822') + container2 = msg.get_payload(1) + eq(container2.get_default_type(), 'message/rfc822') + eq(container2.get_type(), 'message/rfc822') + container1a = container1.get_payload(0) + eq(container1a.get_default_type(), 'text/plain') + eq(container1a.get_type(), 'text/plain') + container2a = container2.get_payload(0) + eq(container2a.get_default_type(), 'text/plain') + eq(container2a.get_type(), 'text/plain') + + def test_default_type_non_parsed(self): + eq = self.assertEqual + neq = self.ndiffAssertEqual + # Set up container + container = MIMEMultipart('digest', 'BOUNDARY') + container.epilogue = '\n' + # Set up subparts + subpart1a = MIMEText('message 1\n') + subpart2a = MIMEText('message 2\n') + subpart1 = MIMEMessage(subpart1a) + subpart2 = MIMEMessage(subpart2a) + container.attach(subpart1) + container.attach(subpart2) + eq(subpart1.get_type(), 'message/rfc822') + eq(subpart1.get_default_type(), 'message/rfc822') + eq(subpart2.get_type(), 'message/rfc822') + eq(subpart2.get_default_type(), 'message/rfc822') + neq(container.as_string(0), '''\ +Content-Type: multipart/digest; boundary="BOUNDARY" +MIME-Version: 1.0 + +--BOUNDARY +Content-Type: message/rfc822 +MIME-Version: 1.0 + +Content-Type: text/plain; charset="us-ascii" +MIME-Version: 1.0 +Content-Transfer-Encoding: 7bit + +message 1 + +--BOUNDARY +Content-Type: message/rfc822 +MIME-Version: 1.0 + +Content-Type: text/plain; charset="us-ascii" +MIME-Version: 1.0 +Content-Transfer-Encoding: 7bit + +message 2 + +--BOUNDARY-- +''') + del subpart1['content-type'] + del subpart1['mime-version'] + del subpart2['content-type'] + del subpart2['mime-version'] + eq(subpart1.get_type(), None) + eq(subpart1.get_default_type(), 'message/rfc822') + eq(subpart2.get_type(), None) + eq(subpart2.get_default_type(), 'message/rfc822') + neq(container.as_string(0), '''\ +Content-Type: multipart/digest; boundary="BOUNDARY" +MIME-Version: 1.0 + +--BOUNDARY + +Content-Type: text/plain; charset="us-ascii" +MIME-Version: 1.0 +Content-Transfer-Encoding: 7bit + +message 1 + +--BOUNDARY + +Content-Type: text/plain; charset="us-ascii" +MIME-Version: 1.0 +Content-Transfer-Encoding: 7bit + +message 2 + +--BOUNDARY-- +''') + + + +# A general test of parser->model->generator idempotency. IOW, read a message +# in, parse it into a message object tree, then without touching the tree, +# regenerate the plain text. The original text and the transformed text +# should be identical. Note: that we ignore the Unix-From since that may +# contain a changed date. +class TestIdempotent(TestEmailBase): + def _msgobj(self, filename): + fp = openfile(filename) + try: + data = fp.read() + finally: + fp.close() + msg = email.message_from_string(data) + return msg, data + + def _idempotent(self, msg, text): + eq = self.ndiffAssertEqual + s = StringIO() + g = Generator(s, maxheaderlen=0) + g.flatten(msg) + eq(text, s.getvalue()) + + def test_parse_text_message(self): + eq = self.assertEquals + msg, text = self._msgobj('msg_01.txt') + eq(msg.get_type(), 'text/plain') + eq(msg.get_main_type(), 'text') + eq(msg.get_subtype(), 'plain') + eq(msg.get_params()[1], ('charset', 'us-ascii')) + eq(msg.get_param('charset'), 'us-ascii') + eq(msg.preamble, None) + eq(msg.epilogue, None) + self._idempotent(msg, text) + + def test_parse_untyped_message(self): + eq = self.assertEquals + msg, text = self._msgobj('msg_03.txt') + eq(msg.get_type(), None) + eq(msg.get_params(), None) + eq(msg.get_param('charset'), None) + self._idempotent(msg, text) + + def test_simple_multipart(self): + msg, text = self._msgobj('msg_04.txt') + self._idempotent(msg, text) + + def test_MIME_digest(self): + msg, text = self._msgobj('msg_02.txt') + self._idempotent(msg, text) + + def test_long_header(self): + msg, text = self._msgobj('msg_27.txt') + self._idempotent(msg, text) + + def test_MIME_digest_with_part_headers(self): + msg, text = self._msgobj('msg_28.txt') + self._idempotent(msg, text) + + def test_mixed_with_image(self): + msg, text = self._msgobj('msg_06.txt') + self._idempotent(msg, text) + + def test_multipart_report(self): + msg, text = self._msgobj('msg_05.txt') + self._idempotent(msg, text) + + def test_dsn(self): + msg, text = self._msgobj('msg_16.txt') + self._idempotent(msg, text) + + def test_preamble_epilogue(self): + msg, text = self._msgobj('msg_21.txt') + self._idempotent(msg, text) + + def test_multipart_one_part(self): + msg, text = self._msgobj('msg_23.txt') + self._idempotent(msg, text) + + def test_multipart_no_parts(self): + msg, text = self._msgobj('msg_24.txt') + self._idempotent(msg, text) + + def test_no_start_boundary(self): + msg, text = self._msgobj('msg_31.txt') + self._idempotent(msg, text) + + def test_rfc2231_charset(self): + msg, text = self._msgobj('msg_32.txt') + self._idempotent(msg, text) + + def test_more_rfc2231_parameters(self): + msg, text = self._msgobj('msg_33.txt') + self._idempotent(msg, text) + + def test_content_type(self): + eq = self.assertEquals + unless = self.failUnless + # Get a message object and reset the seek pointer for other tests + msg, text = self._msgobj('msg_05.txt') + eq(msg.get_type(), 'multipart/report') + # Test the Content-Type: parameters + params = {} + for pk, pv in msg.get_params(): + params[pk] = pv + eq(params['report-type'], 'delivery-status') + eq(params['boundary'], 'D1690A7AC1.996856090/mail.example.com') + eq(msg.preamble, 'This is a MIME-encapsulated message.\n\n') + eq(msg.epilogue, '\n\n') + eq(len(msg.get_payload()), 3) + # Make sure the subparts are what we expect + msg1 = msg.get_payload(0) + eq(msg1.get_type(), 'text/plain') + eq(msg1.get_payload(), 'Yadda yadda yadda\n') + msg2 = msg.get_payload(1) + eq(msg2.get_type(), None) + eq(msg2.get_payload(), 'Yadda yadda yadda\n') + msg3 = msg.get_payload(2) + eq(msg3.get_type(), 'message/rfc822') + self.failUnless(isinstance(msg3, Message)) + payload = msg3.get_payload() + unless(isinstance(payload, ListType)) + eq(len(payload), 1) + msg4 = payload[0] + unless(isinstance(msg4, Message)) + eq(msg4.get_payload(), 'Yadda yadda yadda\n') + + def test_parser(self): + eq = self.assertEquals + unless = self.failUnless + msg, text = self._msgobj('msg_06.txt') + # Check some of the outer headers + eq(msg.get_type(), 'message/rfc822') + # Make sure the payload is a list of exactly one sub-Message, and that + # that submessage has a type of text/plain + payload = msg.get_payload() + unless(isinstance(payload, ListType)) + eq(len(payload), 1) + msg1 = payload[0] + self.failUnless(isinstance(msg1, Message)) + eq(msg1.get_type(), 'text/plain') + self.failUnless(isinstance(msg1.get_payload(), StringType)) + eq(msg1.get_payload(), '\n') + + + +# Test various other bits of the package's functionality +class TestMiscellaneous(unittest.TestCase): + def test_message_from_string(self): + fp = openfile('msg_01.txt') + try: + text = fp.read() + finally: + fp.close() + msg = email.message_from_string(text) + s = StringIO() + # Don't wrap/continue long headers since we're trying to test + # idempotency. + g = Generator(s, maxheaderlen=0) + g.flatten(msg) + self.assertEqual(text, s.getvalue()) + + def test_message_from_file(self): + fp = openfile('msg_01.txt') + try: + text = fp.read() + fp.seek(0) + msg = email.message_from_file(fp) + s = StringIO() + # Don't wrap/continue long headers since we're trying to test + # idempotency. + g = Generator(s, maxheaderlen=0) + g.flatten(msg) + self.assertEqual(text, s.getvalue()) + finally: + fp.close() + + def test_message_from_string_with_class(self): + unless = self.failUnless + fp = openfile('msg_01.txt') + try: + text = fp.read() + finally: + fp.close() + # Create a subclass + class MyMessage(Message): + pass + + msg = email.message_from_string(text, MyMessage) + unless(isinstance(msg, MyMessage)) + # Try something more complicated + fp = openfile('msg_02.txt') + try: + text = fp.read() + finally: + fp.close() + msg = email.message_from_string(text, MyMessage) + for subpart in msg.walk(): + unless(isinstance(subpart, MyMessage)) + + def test_message_from_file_with_class(self): + unless = self.failUnless + # Create a subclass + class MyMessage(Message): + pass + + fp = openfile('msg_01.txt') + try: + msg = email.message_from_file(fp, MyMessage) + finally: + fp.close() + unless(isinstance(msg, MyMessage)) + # Try something more complicated + fp = openfile('msg_02.txt') + try: + msg = email.message_from_file(fp, MyMessage) + finally: + fp.close() + for subpart in msg.walk(): + unless(isinstance(subpart, MyMessage)) + + def test__all__(self): + module = __import__('email') + all = module.__all__ + all.sort() + self.assertEqual(all, ['Charset', 'Encoders', 'Errors', 'Generator', + 'Header', 'Iterators', 'MIMEAudio', 'MIMEBase', + 'MIMEImage', 'MIMEMessage', 'MIMEMultipart', + 'MIMENonMultipart', 'MIMEText', 'Message', + 'Parser', 'Utils', 'base64MIME', + 'message_from_file', 'message_from_string', + 'quopriMIME']) + + def test_formatdate(self): + now = time.time() + self.assertEqual(Utils.parsedate(Utils.formatdate(now))[:6], + time.gmtime(now)[:6]) + + def test_formatdate_localtime(self): + now = time.time() + self.assertEqual( + Utils.parsedate(Utils.formatdate(now, localtime=1))[:6], + time.localtime(now)[:6]) + + def test_parsedate_none(self): + self.assertEqual(Utils.parsedate(''), None) + + def test_parseaddr_empty(self): + self.assertEqual(Utils.parseaddr('<>'), ('', '')) + self.assertEqual(Utils.formataddr(Utils.parseaddr('<>')), '') + + def test_noquote_dump(self): + self.assertEqual( + Utils.formataddr(('A Silly Person', 'person@dom.ain')), + 'A Silly Person ') + + def test_escape_dump(self): + self.assertEqual( + Utils.formataddr(('A (Very) Silly Person', 'person@dom.ain')), + r'"A \(Very\) Silly Person" ') + a = r'A \(Special\) Person' + b = 'person@dom.ain' + self.assertEqual(Utils.parseaddr(Utils.formataddr((a, b))), (a, b)) + + def test_quote_dump(self): + self.assertEqual( + Utils.formataddr(('A Silly; Person', 'person@dom.ain')), + r'"A Silly; Person" ') + + def test_fix_eols(self): + eq = self.assertEqual + eq(Utils.fix_eols('hello'), 'hello') + eq(Utils.fix_eols('hello\n'), 'hello\r\n') + eq(Utils.fix_eols('hello\r'), 'hello\r\n') + eq(Utils.fix_eols('hello\r\n'), 'hello\r\n') + eq(Utils.fix_eols('hello\n\r'), 'hello\r\n\r\n') + + def test_charset_richcomparisons(self): + eq = self.assertEqual + ne = self.failIfEqual + cset1 = Charset() + cset2 = Charset() + eq(cset1, 'us-ascii') + eq(cset1, 'US-ASCII') + eq(cset1, 'Us-AsCiI') + eq('us-ascii', cset1) + eq('US-ASCII', cset1) + eq('Us-AsCiI', cset1) + ne(cset1, 'usascii') + ne(cset1, 'USASCII') + ne(cset1, 'UsAsCiI') + ne('usascii', cset1) + ne('USASCII', cset1) + ne('UsAsCiI', cset1) + eq(cset1, cset2) + eq(cset2, cset1) + + def test_getaddresses(self): + eq = self.assertEqual + eq(Utils.getaddresses(['aperson@dom.ain (Al Person)', + 'Bud Person ']), + [('Al Person', 'aperson@dom.ain'), + ('Bud Person', 'bperson@dom.ain')]) + + def test_utils_quote_unquote(self): + eq = self.assertEqual + msg = Message() + msg.add_header('content-disposition', 'attachment', + filename='foo\\wacky"name') + eq(msg.get_filename(), 'foo\\wacky"name') + + + +# Test the iterator/generators +class TestIterators(TestEmailBase): + def test_body_line_iterator(self): + eq = self.assertEqual + # First a simple non-multipart message + msg = self._msgobj('msg_01.txt') + it = Iterators.body_line_iterator(msg) + lines = list(it) + eq(len(lines), 6) + eq(EMPTYSTRING.join(lines), msg.get_payload()) + # Now a more complicated multipart + msg = self._msgobj('msg_02.txt') + it = Iterators.body_line_iterator(msg) + lines = list(it) + eq(len(lines), 43) + fp = openfile('msg_19.txt') + try: + eq(EMPTYSTRING.join(lines), fp.read()) + finally: + fp.close() + + def test_typed_subpart_iterator(self): + eq = self.assertEqual + msg = self._msgobj('msg_04.txt') + it = Iterators.typed_subpart_iterator(msg, 'text') + lines = [] + subparts = 0 + for subpart in it: + subparts += 1 + lines.append(subpart.get_payload()) + eq(subparts, 2) + eq(EMPTYSTRING.join(lines), """\ +a simple kind of mirror +to reflect upon our own +a simple kind of mirror +to reflect upon our own +""") + + def test_typed_subpart_iterator_default_type(self): + eq = self.assertEqual + msg = self._msgobj('msg_03.txt') + it = Iterators.typed_subpart_iterator(msg, 'text', 'plain') + lines = [] + subparts = 0 + for subpart in it: + subparts += 1 + lines.append(subpart.get_payload()) + eq(subparts, 1) + eq(EMPTYSTRING.join(lines), """\ + +Hi, + +Do you like this message? + +-Me +""") + + + +class TestParsers(TestEmailBase): + def test_header_parser(self): + eq = self.assertEqual + # Parse only the headers of a complex multipart MIME document + fp = openfile('msg_02.txt') + try: + msg = HeaderParser().parse(fp) + finally: + fp.close() + eq(msg['from'], 'ppp-request@zzz.org') + eq(msg['to'], 'ppp@zzz.org') + eq(msg.get_type(), 'multipart/mixed') + eq(msg.is_multipart(), 0) + self.failUnless(isinstance(msg.get_payload(), StringType)) + + def test_whitespace_continuaton(self): + eq = self.assertEqual + # This message contains a line after the Subject: header that has only + # whitespace, but it is not empty! + msg = email.message_from_string("""\ +From: aperson@dom.ain +To: bperson@dom.ain +Subject: the next line has a space on it +\x20 +Date: Mon, 8 Apr 2002 15:09:19 -0400 +Message-ID: spam + +Here's the message body +""") + eq(msg['subject'], 'the next line has a space on it\n ') + eq(msg['message-id'], 'spam') + eq(msg.get_payload(), "Here's the message body\n") + + def test_crlf_separation(self): + eq = self.assertEqual + fp = openfile('msg_26.txt') + try: + msg = Parser().parse(fp) + finally: + fp.close() + eq(len(msg.get_payload()), 2) + part1 = msg.get_payload(0) + eq(part1.get_type(), 'text/plain') + eq(part1.get_payload(), 'Simple email with attachment.\r\n\r\n') + part2 = msg.get_payload(1) + eq(part2.get_type(), 'application/riscos') + + def test_multipart_digest_with_extra_mime_headers(self): + eq = self.assertEqual + neq = self.ndiffAssertEqual + fp = openfile('msg_28.txt') + try: + msg = email.message_from_file(fp) + finally: + fp.close() + # Structure is: + # multipart/digest + # message/rfc822 + # text/plain + # message/rfc822 + # text/plain + eq(msg.is_multipart(), 1) + eq(len(msg.get_payload()), 2) + part1 = msg.get_payload(0) + eq(part1.get_type(), 'message/rfc822') + eq(part1.is_multipart(), 1) + eq(len(part1.get_payload()), 1) + part1a = part1.get_payload(0) + eq(part1a.is_multipart(), 0) + eq(part1a.get_type(), 'text/plain') + neq(part1a.get_payload(), 'message 1\n') + # next message/rfc822 + part2 = msg.get_payload(1) + eq(part2.get_type(), 'message/rfc822') + eq(part2.is_multipart(), 1) + eq(len(part2.get_payload()), 1) + part2a = part2.get_payload(0) + eq(part2a.is_multipart(), 0) + eq(part2a.get_type(), 'text/plain') + neq(part2a.get_payload(), 'message 2\n') + + def test_three_lines(self): + # A bug report by Andrew McNamara + lines = ['From: Andrew Person +Delivered-To: bbb@zzz.org +Received: by mail.zzz.org (Postfix, from userid 889) +\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT) +MIME-Version: 1.0 +Content-Transfer-Encoding: 7bit +Message-ID: <15090.61304.110929.45684@aaa.zzz.org> +From: bbb@ddd.com (John X. Doe) +To: bbb@zzz.org +Subject: This is a test message +Date: Fri, 4 May 2001 14:05:44 -0400 +Content-Type: text/plain; charset=us-ascii; +\ttitle*="us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21" + + +Hi, + +Do you like this message? + +-Me +""") + + def test_del_param(self): + eq = self.ndiffAssertEqual + msg = self._msgobj('msg_01.txt') + msg.set_param('foo', 'bar', charset='us-ascii', language='en') + msg.set_param('title', 'This is even more ***fun*** isn\'t it!', + charset='us-ascii', language='en') + msg.del_param('foo', header='Content-Type') + eq(msg.as_string(), """\ +Return-Path: +Delivered-To: bbb@zzz.org +Received: by mail.zzz.org (Postfix, from userid 889) +\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT) +MIME-Version: 1.0 +Content-Transfer-Encoding: 7bit +Message-ID: <15090.61304.110929.45684@aaa.zzz.org> +From: bbb@ddd.com (John X. Doe) +To: bbb@zzz.org +Subject: This is a test message +Date: Fri, 4 May 2001 14:05:44 -0400 +Content-Type: text/plain; charset="us-ascii"; +\ttitle*="us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21" + + +Hi, + +Do you like this message? + +-Me +""") + + def test_rfc2231_get_content_charset(self): + eq = self.assertEqual + msg = self._msgobj('msg_32.txt') + eq(msg.get_content_charset(), 'us-ascii') + + + +def _testclasses(): + mod = sys.modules[__name__] + return [getattr(mod, name) for name in dir(mod) if name.startswith('Test')] + + +def suite(): + suite = unittest.TestSuite() + for testclass in _testclasses(): + suite.addTest(unittest.makeSuite(testclass)) + return suite + + +def test_main(): + for testclass in _testclasses(): + run_unittest(testclass) + + + +if __name__ == '__main__': + unittest.main(defaultTest='suite') -- cgit v0.12