From bb113867305f8ab70947bffb77961a60d10730dc Mon Sep 17 00:00:00 2001 From: Barry Warsaw Date: Sun, 3 Oct 2004 03:16:19 +0000 Subject: Big email 3.0 API changes, with updated unit tests and documentation. Briefly (from the NEWS file): - Updates for the email package: + All deprecated APIs that in email 2.x issued warnings have been removed: _encoder argument to the MIMEText constructor, Message.add_payload(), Utils.dump_address_pair(), Utils.decode(), Utils.encode() + New deprecations: Generator.__call__(), Message.get_type(), Message.get_main_type(), Message.get_subtype(), the 'strict' argument to the Parser constructor. These will be removed in email 3.1. + Support for Python earlier than 2.3 has been removed (see PEP 291). + All defect classes have been renamed to end in 'Defect'. + Some FeedParser fixes; also a MultipartInvariantViolationDefect will be added to messages that claim to be multipart but really aren't. + Updates to documentation. --- Doc/lib/email.tex | 47 +++++++++--- Doc/lib/emailencoders.tex | 10 +-- Doc/lib/emailexc.tex | 33 +++++++++ Doc/lib/emailmessage.tex | 46 +++++------- Doc/lib/emailmimebase.tex | 9 ++- Doc/lib/emailparser.tex | 96 +++++++++++++++++++------ Doc/lib/emailutil.tex | 35 +++++---- Lib/email/Charset.py | 24 ++----- Lib/email/Encoders.py | 35 ++------- Lib/email/Errors.py | 18 +++-- Lib/email/FeedParser.py | 22 +++--- Lib/email/Generator.py | 30 ++++---- Lib/email/Header.py | 3 +- Lib/email/Iterators.py | 6 +- Lib/email/MIMEAudio.py | 7 +- Lib/email/MIMEBase.py | 8 +-- Lib/email/MIMEImage.py | 10 +-- Lib/email/MIMEMessage.py | 10 +-- Lib/email/MIMEMultipart.py | 6 +- Lib/email/MIMENonMultipart.py | 8 +-- Lib/email/MIMEText.py | 27 ++----- Lib/email/Message.py | 89 +++++++++-------------- Lib/email/Parser.py | 34 ++++++--- Lib/email/Utils.py | 64 +++++------------ Lib/email/__init__.py | 17 ++--- Lib/email/_parseaddr.py | 3 +- Lib/email/base64MIME.py | 5 +- Lib/email/quopriMIME.py | 7 +- Lib/email/test/__init__.py | 2 - Lib/email/test/data/msg_41.txt | 8 +++ Lib/email/test/test_email.py | 158 +++++++++++------------------------------ Misc/NEWS | 13 ++++ 32 files changed, 438 insertions(+), 452 deletions(-) create mode 100644 Lib/email/test/data/msg_41.txt diff --git a/Doc/lib/email.tex b/Doc/lib/email.tex index debed70..56affa5 100644 --- a/Doc/lib/email.tex +++ b/Doc/lib/email.tex @@ -1,5 +1,5 @@ -% Copyright (C) 2001,2002 Python Software Foundation -% Author: barry@zope.com (Barry Warsaw) +% Copyright (C) 2001-2004 Python Software Foundation +% Author: barry@python.org (Barry Warsaw) \section{\module{email} --- An email and MIME handling package} @@ -7,8 +7,8 @@ \declaremodule{standard}{email} \modulesynopsis{Package supporting the parsing, manipulating, and generating email messages, including MIME documents.} -\moduleauthor{Barry A. Warsaw}{barry@zope.com} -\sectionauthor{Barry A. Warsaw}{barry@zope.com} +\moduleauthor{Barry A. Warsaw}{barry@python.org} +\sectionauthor{Barry A. Warsaw}{barry@python.org} \versionadded{2.2} @@ -22,7 +22,7 @@ sending of email messages to SMTP (\rfc{2821}) servers; that is the function of the \refmodule{smtplib} module. The \module{email} package attempts to be as RFC-compliant as possible, supporting in addition to \rfc{2822}, such MIME-related RFCs as -\rfc{2045}-\rfc{2047}, and \rfc{2231}. +\rfc{2045}, \rfc{2046}, \rfc{2047}, and \rfc{2231}. The primary distinguishing feature of the \module{email} package is that it splits the parsing and generating of email messages from the @@ -79,7 +79,7 @@ package, a section on differences and porting is provided. \subsection{Encoders} \input{emailencoders} -\subsection{Exception classes} +\subsection{Exception and Defect classes} \input{emailexc} \subsection{Miscellaneous utilities} @@ -88,14 +88,41 @@ package, a section on differences and porting is provided. \subsection{Iterators} \input{emailiter} -\subsection{Differences from \module{email} v1 (up to Python 2.2.1)} +\subsection{Package History} Version 1 of the \module{email} package was bundled with Python releases up to Python 2.2.1. Version 2 was developed for the Python 2.3 release, and backported to Python 2.2.2. It was also available as -a separate distutils based package. \module{email} version 2 is -almost entirely backward compatible with version 1, with the -following differences: +a separate distutils-based package, and is compatible back to Python 2.1. + +\module{email} version 3.0 was released with Python 2.4 and as a separate +distutils-based package. It is compatible back to Python 2.3. + +Here are the differences between \module{email} version 3 and version 2: + +\begin{itemize} +\item The \class{FeedParser} class was introduced, and the \class{Parser} + class was implemented in terms of the \class{FeedParser}. All parsing + there for is non-strict, and parsing will make a best effort never to + raise an exception. Problems found while parsing messages are stored in + the message's \var{defect} attribute. + +\item All aspects of the API which raised \exception{DeprecationWarning}s in + version 2 have been removed. These include the \var{_encoder} argument + to the \class{MIMEText} constructor, the \method{Message.add_payload()} + method, the \function{Utils.dump_address_pair()} function, and the + functions \function{Utils.decode()} and \function{Utils.encode()}. + +\item New \exception{DeprecationWarning}s have been added to: + \method{Generator.__call__()}, \method{Message.get_type()}, + \method{Message.get_main_type()}, \method{Message.get_subtype()}, and + the \var{strict} argument to the \class{Parser} class. These are + expected to be removed in email 3.1. + +\item Support for Pythons earlier than 2.3 has been removed. +\end{itemize} + +Here are the differences between \module{email} version 2 and version 1: \begin{itemize} \item The \module{email.Header} and \module{email.Charset} modules diff --git a/Doc/lib/emailencoders.tex b/Doc/lib/emailencoders.tex index cd54d68..a49e04d 100644 --- a/Doc/lib/emailencoders.tex +++ b/Doc/lib/emailencoders.tex @@ -8,11 +8,11 @@ type messages containing binary data. The \module{email} package provides some convenient encodings in its \module{Encoders} module. These encoders are actually used by the -\class{MIMEImage} and \class{MIMEText} class constructors to provide default -encodings. All encoder functions take exactly one argument, the -message object to encode. They usually extract the payload, encode -it, and reset the payload to this newly encoded value. They should also -set the \mailheader{Content-Transfer-Encoding} header as appropriate. +\class{MIMEAudio} and \class{MIMEImage} class constructors to provide default +encodings. All encoder functions take exactly one argument, the message +object to encode. They usually extract the payload, encode it, and reset the +payload to this newly encoded value. They should also set the +\mailheader{Content-Transfer-Encoding} header as appropriate. Here are the encoding functions provided: diff --git a/Doc/lib/emailexc.tex b/Doc/lib/emailexc.tex index 824a276..6ac0889 100644 --- a/Doc/lib/emailexc.tex +++ b/Doc/lib/emailexc.tex @@ -52,3 +52,36 @@ rarely raised in practice. However the exception may also be raised if the \method{attach()} method is called on an instance of a class derived from \class{MIMENonMultipart} (e.g. \class{MIMEImage}). \end{excclassdesc} + +Here's the list of the defects that the \class{FeedParser} can find while +parsing messages. Note that the defects are added to the message where the +problem was found, so for example, if a message nested inside a +\mimetype{multipart/alternative} had a malformed header, that nested message +object would have a defect, but the containing messages would not. + +All defect classes are subclassed from \class{email.Errors.MessageDefect}, but +this class is \emph{not} an exception! + +\versionadded[All the defect classes were added]{2.4} + +\begin{itemize} +\item \class{NoBoundaryInMultipartDefect} -- A message claimed to be a + multipart, but had no \mimetype{boundary} parameter. + +\item \class{StartBoundaryNotFoundDefect} -- The start boundary claimed in the + \mailheader{Content-Type} header was never found. + +\item \class{FirstHeaderLineIsContinuationDefect} -- The message had a + continuation line as its first header line. + +\item \class{MisplacedEnvelopeHeaderDefect} - A ``Unix From'' header was found + in the middle of a header block. + +\item \class{MalformedHeaderDefect} -- A header was found that was missing a + colon, or was otherwise malformed. + +\item \class{MultipartInvariantViolationDefect} -- A message claimed to be a + \mimetype{multipart}, but no subparts were found. Note that when a + message has this defect, its \method{is_multipart()} method may return + false even though its content type claims to be \mimetype{multipart}. +\end{itemize} diff --git a/Doc/lib/emailmessage.tex b/Doc/lib/emailmessage.tex index 1943273..f732054 100644 --- a/Doc/lib/emailmessage.tex +++ b/Doc/lib/emailmessage.tex @@ -359,13 +359,16 @@ the form \code{(CHARSET, LANGUAGE, VALUE)}. Note that both \code{CHARSET} and \code{VALUE} to be encoded in the \code{us-ascii} charset. You can usually ignore \code{LANGUAGE}. -Your application should be prepared to deal with 3-tuple return -values, and can convert the parameter to a Unicode string like so: +If your application doesn't care whether the parameter was encoded as in +\rfc{2231}, you can collapse the parameter value by calling +\function{email.Utils.collapse_rfc2231_value()}, passing in the return value +from \method{get_param()}. This will return a suitably decoded Unicode string +whn the value is a tuple, or the original string unquoted if it isn't. For +example: \begin{verbatim} -param = msg.get_param('foo') -if isinstance(param, tuple): - param = unicode(param[2], param[0] or 'us-ascii') +rawparam = msg.get_param('foo') +param = email.Utils.collapse_rfc2231_value(rawparam) \end{verbatim} In any case, the parameter value (either the returned string, or the @@ -549,32 +552,21 @@ newline get printed after your closing \mimetype{multipart} boundary, set the \var{epilogue} to the empty string. \end{datadesc} -\subsubsection{Deprecated methods} - -The following methods are deprecated in \module{email} version 2. -They are documented here for completeness. +\begin{datadesc}{defects} +The \var{defects} attribute contains a list of all the problems found when +parsing this message. See \refmodule{email.Errors} for a detailed description +of the possible parsing defects. -\begin{methoddesc}[Message]{add_payload}{payload} -Add \var{payload} to the message object's existing payload. If, prior -to calling this method, the object's payload was \code{None} -(i.e. never before set), then after this method is called, the payload -will be the argument \var{payload}. +\versionadded{2.4} +\end{datadesc} -If the object's payload was already a list -(i.e. \method{is_multipart()} returns \code{True}), then \var{payload} is -appended to the end of the existing payload list. +\subsubsection{Deprecated methods} -For any other type of existing payload, \method{add_payload()} will -transform the new payload into a list consisting of the old payload -and \var{payload}, but only if the document is already a MIME -multipart document. This condition is satisfied if the message's -\mailheader{Content-Type} header's main type is either -\mimetype{multipart}, or there is no \mailheader{Content-Type} -header. In any other situation, -\exception{MultipartConversionError} is raised. +\versionchanged[The \method{add_payload()} method was removed; use the +\method{attach()} method instead]{2.4} -\deprecated{2.2.2}{Use the \method{attach()} method instead.} -\end{methoddesc} +The following methods are deprecated. They are documented here for +completeness. \begin{methoddesc}[Message]{get_type}{\optional{failobj}} Return the message's content type, as a string of the form diff --git a/Doc/lib/emailmimebase.tex b/Doc/lib/emailmimebase.tex index 3318d6a..070c9a2 100644 --- a/Doc/lib/emailmimebase.tex +++ b/Doc/lib/emailmimebase.tex @@ -142,9 +142,7 @@ Optional \var{_subtype} sets the subtype of the message; it defaults to \mimetype{rfc822}. \end{classdesc} -\begin{classdesc}{MIMEText}{_text\optional{, _subtype\optional{, - _charset\optional{, _encoder}}}} - +\begin{classdesc}{MIMEText}{_text\optional{, _subtype\optional{, _charset}}} A subclass of \class{MIMENonMultipart}, the \class{MIMEText} class is used to create MIME objects of major type \mimetype{text}. \var{_text} is the string for the payload. \var{_subtype} is the @@ -153,6 +151,7 @@ character set of the text and is passed as a parameter to the \class{MIMENonMultipart} constructor; it defaults to \code{us-ascii}. No guessing or encoding is performed on the text data. -\deprecated{2.2.2}{The \var{_encoding} argument has been deprecated. -Encoding now happens implicitly based on the \var{_charset} argument.} +\versionchanged[The previously deprecated \var{_encoding} argument has +been removed. Encoding happens implicitly based on the \var{_charset} +argument]{2.4} \end{classdesc} diff --git a/Doc/lib/emailparser.tex b/Doc/lib/emailparser.tex index 1e8597c..5fac92f 100644 --- a/Doc/lib/emailparser.tex +++ b/Doc/lib/emailparser.tex @@ -18,29 +18,79 @@ messages, the root object will return \code{True} from its \method{is_multipart()} method, and the subparts can be accessed via the \method{get_payload()} and \method{walk()} methods. +There are actually two parser interfaces available for use, the classic +\class{Parser} API and the incremental \class{FeedParser} API. The classic +\class{Parser} API is fine if you have the entire text of the message in +memory as a string, or if the entire message lives in a file on the file +system. \class{FeedParser} is more appropriate for when you're reading the +message from a stream which might block waiting for more input (e.g. reading +an email message from a socket). The \class{FeedParser} can consume and parse +the message incrementally, and only returns the root object when you close the +parser\footnote{As of email package version 3.0, introduced in +Python 2.4, the classic \class{Parser} was re-implemented in terms of the +\class{FeedParser}, so the semantics and results are identical between the two +parsers.}. + Note that the parser can be extended in limited ways, and of course you can implement your own parser completely from scratch. There is no magical connection between the \module{email} package's bundled parser and the \class{Message} class, so your custom parser can create message object trees any way it finds necessary. -The primary parser class is \class{Parser} which parses both the -headers and the payload of the message. In the case of -\mimetype{multipart} messages, it will recursively parse the body of -the container message. Two modes of parsing are supported, -\emph{strict} parsing, which will usually reject any non-RFC compliant -message, and \emph{lax} parsing, which attempts to adjust for common -MIME formatting problems. +\subsubsection{FeedParser API} + +\versionadded{2.4} + +The \class{FeedParser} provides an API that is conducive to incremental +parsing of email messages, such as would be necessary when reading the text of +an email message from a source that can block (e.g. a socket). The +\class{FeedParser} can of course be used to parse an email message fully +contained in a string or a file, but the classic \class{Parser} API may be +more convenient for such use cases. The semantics and results of the two +parser APIs are identical. + +The \class{FeedParser}'s API is simple; you create an instance, feed it a +bunch of text until there's no more to feed it, then close the parser to +retrieve the root message object. The \class{FeedParser} is extremely +accurate when parsing standards-compliant messages, and it does a very good +job of parsing non-compliant messages, providing information about how a +message was deemed broken. It will populate a message object's \var{defects} +attribute with a list of any problems it found in a message. See the +\refmodule{email.Errors} module for the list of defects that it can find. + +Here is the API for the \class{FeedParser}: + +\begin{classdesc}{FeedParser}{\optional{_factory}} +Create a \class{FeedParser} instance. Optional \var{_factory} is a +no-argument callable that will be called whenever a new message object is +needed. It defaults to the \class{email.Message.Message} class. +\end{classdesc} + +\begin{methoddesc}[FeedParser]{feed}{data} +Feed the \class{FeedParser} some more data. \var{data} should be a +string containing one or more lines. The lines can be partial and the +\class{FeedParser} will stitch such partial lines together properly. The +lines in the string can have any of the common three line endings, carriage +return, newline, or carriage return and newline (they can even be mixed). +\end{methoddesc} + +\begin{methoddesc}[FeedParser]{close}{} +Closing a \class{FeedParser} completes the parsing of all previously fed data, +and returns the root message object. It is undefined what happens if you feed +more data to a closed \class{FeedParser}. +\end{methoddesc} -The \module{email.Parser} module also provides a second class, called +\subsubsection{Parser class API} + +The \class{Parser} provides an API that can be used to parse a message when +the complete contents of the message are available in a string or file. The +\module{email.Parser} module also provides a second class, called \class{HeaderParser} which can be used if you're only interested in the headers of the message. \class{HeaderParser} can be much faster in these situations, since it does not attempt to parse the message body, instead setting the payload to the raw body as a string. \class{HeaderParser} has the same API as the \class{Parser} class. -\subsubsection{Parser class API} - \begin{classdesc}{Parser}{\optional{_class\optional{, strict}}} The constructor for the \class{Parser} class takes an optional argument \var{_class}. This must be a callable factory (such as a @@ -49,19 +99,14 @@ needs to be created. It defaults to \class{Message} (see \refmodule{email.Message}). The factory will be called without arguments. -The optional \var{strict} flag specifies whether strict or lax parsing -should be performed. Normally, when things like MIME terminating -boundaries are missing, or when messages contain other formatting -problems, the \class{Parser} will raise a -\exception{MessageParseError}. However, when lax parsing is enabled, -the \class{Parser} will attempt to work around such broken formatting -to produce a usable message structure (this doesn't mean -\exception{MessageParseError}s are never raised; some ill-formatted -messages just can't be parsed). The \var{strict} flag defaults to -\code{False} since lax parsing usually provides the most convenient -behavior. +The optional \var{strict} flag is ignored. \deprecated{2.4}{Because the +\class{Parser} class is a backward compatible API wrapper around the +new-in-Python 2.4 \class{FeedParser}, \emph{all} parsing is effectively +non-strict. You should simply stop passing a \var{strict} flag to the +\class{Parser} constructor.} \versionchanged[The \var{strict} flag was added]{2.2.2} +\versionchanged[The \var{strict} flag was deprecated]{2.4} \end{classdesc} The other public \class{Parser} methods are: @@ -149,4 +194,13 @@ Here are some notes on the parsing semantics: object containing a list payload of length 1. Their \method{is_multipart()} method will return \code{True}. The single element in the list payload will be a sub-message object. + +\item Some non-standards compliant messages may not be internally consistent + about their \mimetype{multipart}-edness. Such messages may have a + \mailheader{Content-Type} header of type \mimetype{multipart}, but their + \method{is_multipart()} method may return \code{False}. If such + messages were parsed with the \class{FeedParser}, they will have an + instance of the \class{MultipartInvariantViolationDefect} class in their + \var{defects} attribute list. See \refmodule{email.Errors} for + details. \end{itemize} diff --git a/Doc/lib/emailutil.tex b/Doc/lib/emailutil.tex index 80f0acf..c41f066 100644 --- a/Doc/lib/emailutil.tex +++ b/Doc/lib/emailutil.tex @@ -119,24 +119,33 @@ as-is. If \var{charset} is given but \var{language} is not, the string is encoded using the empty string for \var{language}. \end{funcdesc} +\begin{funcdesc}{collapse_rfc2231_value}{value\optional{, errors\optional{, + fallback_charset}}} +When a header parameter is encoded in \rfc{2231} format, +\method{Message.get_param()} may return a 3-tuple containing the character +set, language, and value. \function{collapse_rfc2231_value()} turns this into +a unicode string. Optional \var{errors} is passed to the \var{errors} +argument of the built-in \function{unicode()} function; it defaults to +\code{replace}. Optional \var{fallback_charset} specifies the character set +to use if the one in the \rfc{2231} header is not known by Python; it defaults +to \code{us-ascii}. + +For convenience, if the \var{value} passed to +\function{collapse_rfc2231_value()} is not a tuple, it should be a string and +it is returned unquoted. +\end{funcdesc} + \begin{funcdesc}{decode_params}{params} Decode parameters list according to \rfc{2231}. \var{params} is a sequence of 2-tuples containing elements of the form \code{(content-type, string-value)}. \end{funcdesc} -The following functions have been deprecated: - -\begin{funcdesc}{dump_address_pair}{pair} -\deprecated{2.2.2}{Use \function{formataddr()} instead.} -\end{funcdesc} - -\begin{funcdesc}{decode}{s} -\deprecated{2.2.2}{Use \method{Header.decode_header()} instead.} -\end{funcdesc} - +\versionchanged[The \function{dump_address_pair()} function has been removed; +use \function{formataddr()} instead.]{2.4} -\begin{funcdesc}{encode}{s\optional{, charset\optional{, encoding}}} -\deprecated{2.2.2}{Use \method{Header.encode()} instead.} -\end{funcdesc} +\versionchanged[The \function{decode()} function has been removed; use the +\method{Header.decode_header()} method instead.]{2.4} +\versionchanged[The \function{encode()} function has been removed; use the +\method{Header.encode()} method instead.]{2.4} diff --git a/Lib/email/Charset.py b/Lib/email/Charset.py index 3c8f7a4..6a3e3ca 100644 --- a/Lib/email/Charset.py +++ b/Lib/email/Charset.py @@ -1,18 +1,6 @@ # Copyright (C) 2001-2004 Python Software Foundation -# Author: che@debian.org (Ben Gertzfield), barry@python.org (Barry Warsaw) - -# XXX The following information needs updating. - -# Python 2.3 doesn't come with any Asian codecs by default. Two packages are -# currently available and supported as of this writing (30-Dec-2003): -# -# CJKCodecs -# http://cjkpython.i18n.org -# This package contains Chinese, Japanese, and Korean codecs - -# JapaneseCodecs -# http://www.asahi-net.or.jp/~rd6t-kjym/python -# Some Japanese users prefer this codec package +# Author: Ben Gertzfield, Barry Warsaw +# Contact: email-sig@python.org import email.base64MIME import email.quopriMIME @@ -21,9 +9,9 @@ from email.Encoders import encode_7or8bit # Flags for types of header encodings -QP = 1 # Quoted-Printable -BASE64 = 2 # Base64 -SHORTEST = 3 # the shorter of QP and base64, but only for headers +QP = 1 # Quoted-Printable +BASE64 = 2 # Base64 +SHORTEST = 3 # the shorter of QP and base64, but only for headers # In "=?charset?q?hello_world?=", the =?, ?q?, and ?= add up to 7 MISC_LEN = 7 @@ -128,7 +116,7 @@ def add_charset(charset, header_enc=None, body_enc=None, output_charset=None): documentation for more information. """ if body_enc == SHORTEST: - raise ValueError, 'SHORTEST not allowed for body_enc' + raise ValueError('SHORTEST not allowed for body_enc') CHARSETS[charset] = (header_enc, body_enc, output_charset) diff --git a/Lib/email/Encoders.py b/Lib/email/Encoders.py index 6851094..baac2a3 100644 --- a/Lib/email/Encoders.py +++ b/Lib/email/Encoders.py @@ -1,37 +1,16 @@ # Copyright (C) 2001-2004 Python Software Foundation -# Author: barry@python.org (Barry Warsaw) +# Author: Barry Warsaw +# Contact: email-sig@python.org """Encodings and related functions.""" import base64 +from quopri import encodestring as _encodestring - - -# Helpers -try: - from quopri import encodestring as _encodestring - - def _qencode(s): - enc = _encodestring(s, quotetabs=1) - # Must encode spaces, which quopri.encodestring() doesn't do - return enc.replace(' ', '=20') -except ImportError: - # Python 2.1 doesn't have quopri.encodestring() - from cStringIO import StringIO - import quopri as _quopri - - def _qencode(s): - if not s: - return s - hasnewline = (s[-1] == '\n') - infp = StringIO(s) - outfp = StringIO() - _quopri.encode(infp, outfp, quotetabs=1) - # Python 2.x's encode() doesn't encode spaces even when quotetabs==1 - value = outfp.getvalue().replace(' ', '=20') - if not hasnewline and value[-1] == '\n': - return value[:-1] - return value +def _qencode(s): + enc = _encodestring(s, quotetabs=True) + # Must encode spaces, which quopri.encodestring() doesn't do + return enc.replace(' ', '=20') def _bencode(s): diff --git a/Lib/email/Errors.py b/Lib/email/Errors.py index e233219..e13a2c7 100644 --- a/Lib/email/Errors.py +++ b/Lib/email/Errors.py @@ -1,5 +1,6 @@ # Copyright (C) 2001-2004 Python Software Foundation -# Author: barry@python.org (Barry Warsaw) +# Author: Barry Warsaw +# Contact: email-sig@python.org """email package exception classes.""" @@ -33,17 +34,20 @@ class MessageDefect: def __init__(self, line=None): self.line = line -class NoBoundaryInMultipart(MessageDefect): +class NoBoundaryInMultipartDefect(MessageDefect): """A message claimed to be a multipart but had no boundary parameter.""" -class StartBoundaryNotFound(MessageDefect): +class StartBoundaryNotFoundDefect(MessageDefect): """The claimed start boundary was never found.""" -class FirstHeaderLineIsContinuation(MessageDefect): +class FirstHeaderLineIsContinuationDefect(MessageDefect): """A message had a continuation line as its first header line.""" -class MisplacedEnvelopeHeader(MessageDefect): +class MisplacedEnvelopeHeaderDefect(MessageDefect): """A 'Unix-from' header was found in the middle of a header block.""" -class MalformedHeader(MessageDefect): - """Found a header that was missing a colon, or was otherwise malformed""" +class MalformedHeaderDefect(MessageDefect): + """Found a header that was missing a colon, or was otherwise malformed.""" + +class MultipartInvariantViolationDefect(MessageDefect): + """A message claimed to be a multipart but no subparts were found.""" diff --git a/Lib/email/FeedParser.py b/Lib/email/FeedParser.py index dc3027d..de2754e 100644 --- a/Lib/email/FeedParser.py +++ b/Lib/email/FeedParser.py @@ -1,5 +1,6 @@ # Copyright (C) 2004 Python Software Foundation # Authors: Baxter, Wouters and Warsaw +# Contact: email-sig@python.org """FeedParser - An email feed parser. @@ -15,7 +16,7 @@ This completes the parsing and returns the root message object. The other advantage of this parser is that it will never throw a parsing exception. Instead, when it finds something unexpected, it adds a 'defect' to the current message. Defects are just instances that live on the message -object's .defect attribute. +object's .defects attribute. """ import re @@ -100,7 +101,7 @@ class BufferedSubFile(object): # and the eol character(s). Gather up a list of lines after # re-attaching the newlines. lines = [] - for i in range(len(parts) / 2): + for i in range(len(parts) // 2): lines.append(parts[i*2] + parts[i*2+1]) self.pushlines(lines) @@ -156,6 +157,10 @@ class FeedParser: self._call_parse() root = self._pop_message() assert not self._msgstack + # Look for final set of defects + if root.get_content_maintype() == 'multipart' \ + and not root.is_multipart(): + root.defects.append(Errors.MultipartInvariantViolationDefect()) return root def _new_message(self): @@ -166,7 +171,6 @@ class FeedParser: self._msgstack[-1].attach(msg) self._msgstack.append(msg) self._cur = msg - self._cur.defects = [] self._last = msg def _pop_message(self): @@ -259,7 +263,7 @@ class FeedParser: # defined a boundary. That's a problem which we'll handle by # reading everything until the EOF and marking the message as # defective. - self._cur.defects.append(Errors.NoBoundaryInMultipart()) + self._cur.defects.append(Errors.NoBoundaryInMultipartDefect()) lines = [] for line in self._input: if line is NeedMoreData: @@ -305,6 +309,8 @@ class FeedParser: if eolmo: preamble[-1] = lastline[:-len(eolmo.group(0))] self._cur.preamble = EMPTYSTRING.join(preamble) + #import pdb ; pdb.set_trace() + # See SF bug #1030941 capturing_preamble = False self._input.unreadline(line) continue @@ -363,7 +369,7 @@ class FeedParser: # that as a defect and store the captured text as the payload. # Otherwise everything from here to the EOF is epilogue. if capturing_preamble: - self._cur.defects.append(Errors.StartBoundaryNotFound()) + self._cur.defects.append(Errors.StartBoundaryNotFoundDefect()) self._cur.set_payload(EMPTYSTRING.join(preamble)) return # If the end boundary ended in a newline, we'll need to make sure @@ -408,7 +414,7 @@ class FeedParser: # The first line of the headers was a continuation. This # is illegal, so let's note the defect, store the illegal # line, and ignore it for purposes of headers. - defect = Errors.FirstHeaderLineIsContinuation(line) + defect = Errors.FirstHeaderLineIsContinuationDefect(line) self._cur.defects.append(defect) continue lastvalue.append(line) @@ -436,13 +442,13 @@ class FeedParser: else: # Weirdly placed unix-from line. Note this as a defect # and ignore it. - defect = Errors.MisplacedEnvelopeHeader(line) + defect = Errors.MisplacedEnvelopeHeaderDefect(line) self._cur.defects.append(defect) continue # Split the line on the colon separating field name from value. i = line.find(':') if i < 0: - defect = Errors.MalformedHeader(line) + defect = Errors.MalformedHeaderDefect(line) self._cur.defects.append(defect) continue lastheader = line[:i] diff --git a/Lib/email/Generator.py b/Lib/email/Generator.py index 7fe634f..9411a9e 100644 --- a/Lib/email/Generator.py +++ b/Lib/email/Generator.py @@ -1,13 +1,14 @@ # Copyright (C) 2001-2004 Python Software Foundation -# Author: barry@python.org (Barry Warsaw) +# Author: Barry Warsaw +# Contact: email-sig@python.org -"""Classes to generate plain text from a message object tree. -""" +"""Classes to generate plain text from a message object tree.""" import re import sys import time import random +import warnings from cStringIO import StringIO from email.Header import Header @@ -81,7 +82,10 @@ class Generator: self._write(msg) # For backwards compatibility, but this is slower - __call__ = flatten + def __call__(self, msg, unixfrom=False): + warnings.warn('__call__() deprecated; use flatten()', + DeprecationWarning, 2) + self.flatten(msg, unixfrom) def clone(self, fp): """Clone this generator with the exact same options.""" @@ -175,7 +179,7 @@ class Generator: if cset is not None: payload = cset.body_encode(payload) if not isinstance(payload, basestring): - raise TypeError, 'string payload expected: %s' % type(payload) + raise TypeError('string payload expected: %s' % type(payload)) if self._mangle_from_: payload = fcre.sub('>From ', payload) self._fp.write(payload) @@ -271,6 +275,8 @@ class Generator: +_FMT = '[Non-text (%(type)s) part of message omitted, filename %(filename)s]' + class DecodedGenerator(Generator): """Generator a text representation of a message. @@ -301,13 +307,13 @@ class DecodedGenerator(Generator): """ Generator.__init__(self, outfp, mangle_from_, maxheaderlen) if fmt is None: - fmt = ('[Non-text (%(type)s) part of message omitted, ' - 'filename %(filename)s]') - self._fmt = fmt + self._fmt = _FMT + else: + self._fmt = fmt def _dispatch(self, msg): for part in msg.walk(): - maintype = part.get_main_type('text') + maintype = part.get_content_maintype() if maintype == 'text': print >> self, part.get_payload(decode=True) elif maintype == 'multipart': @@ -315,9 +321,9 @@ class DecodedGenerator(Generator): pass else: print >> self, self._fmt % { - 'type' : part.get_type('[no MIME type]'), - 'maintype' : part.get_main_type('[no main MIME type]'), - 'subtype' : part.get_subtype('[no sub-MIME type]'), + 'type' : part.get_content_type(), + 'maintype' : part.get_content_maintype(), + 'subtype' : part.get_content_subtype(), 'filename' : part.get_filename('[no filename]'), 'description': part.get('Content-Description', '[no description]'), diff --git a/Lib/email/Header.py b/Lib/email/Header.py index 21acaf1..5e24afe 100644 --- a/Lib/email/Header.py +++ b/Lib/email/Header.py @@ -1,5 +1,6 @@ # Copyright (C) 2002-2004 Python Software Foundation -# Author: che@debian.org (Ben Gertzfield), barry@python.org (Barry Warsaw) +# Author: Ben Gertzfield, Barry Warsaw +# Contact: email-sig@python.org """Header encoding and decoding functionality.""" diff --git a/Lib/email/Iterators.py b/Lib/email/Iterators.py index af6095e..74a93c7 100644 --- a/Lib/email/Iterators.py +++ b/Lib/email/Iterators.py @@ -1,8 +1,8 @@ # Copyright (C) 2001-2004 Python Software Foundation -# Author: Barry Warsaw +# Author: Barry Warsaw +# Contact: email-sig@python.org -"""Various types of useful iterators and generators. -""" +"""Various types of useful iterators and generators.""" import sys from cStringIO import StringIO diff --git a/Lib/email/MIMEAudio.py b/Lib/email/MIMEAudio.py index dda7689..266ec4c 100644 --- a/Lib/email/MIMEAudio.py +++ b/Lib/email/MIMEAudio.py @@ -1,7 +1,8 @@ +# Copyright (C) 2001-2004 Python Software Foundation # Author: Anthony Baxter +# Contact: email-sig@python.org -"""Class representing audio/* type MIME documents. -""" +"""Class representing audio/* type MIME documents.""" import sndhdr from cStringIO import StringIO @@ -65,7 +66,7 @@ class MIMEAudio(MIMENonMultipart): if _subtype is None: _subtype = _whatsnd(_audiodata) if _subtype is None: - raise TypeError, 'Could not find audio MIME subtype' + raise TypeError('Could not find audio MIME subtype') MIMENonMultipart.__init__(self, 'audio', _subtype, **_params) self.set_payload(_audiodata) _encoder(self) diff --git a/Lib/email/MIMEBase.py b/Lib/email/MIMEBase.py index 7485d85..88691f8 100644 --- a/Lib/email/MIMEBase.py +++ b/Lib/email/MIMEBase.py @@ -1,8 +1,8 @@ -# Copyright (C) 2001,2002 Python Software Foundation -# Author: barry@zope.com (Barry Warsaw) +# Copyright (C) 2001-2004 Python Software Foundation +# Author: Barry Warsaw +# Contact: email-sig@python.org -"""Base class for MIME specializations. -""" +"""Base class for MIME specializations.""" from email import Message diff --git a/Lib/email/MIMEImage.py b/Lib/email/MIMEImage.py index 5306e53..a658067 100644 --- a/Lib/email/MIMEImage.py +++ b/Lib/email/MIMEImage.py @@ -1,8 +1,8 @@ -# Copyright (C) 2001,2002 Python Software Foundation -# Author: barry@zope.com (Barry Warsaw) +# Copyright (C) 2001-2004 Python Software Foundation +# Author: Barry Warsaw +# Contact: email-sig@python.org -"""Class representing image/* type MIME documents. -""" +"""Class representing image/* type MIME documents.""" import imghdr @@ -39,7 +39,7 @@ class MIMEImage(MIMENonMultipart): if _subtype is None: _subtype = imghdr.what(None, _imagedata) if _subtype is None: - raise TypeError, 'Could not guess image MIME subtype' + raise TypeError('Could not guess image MIME subtype') MIMENonMultipart.__init__(self, 'image', _subtype, **_params) self.set_payload(_imagedata) _encoder(self) diff --git a/Lib/email/MIMEMessage.py b/Lib/email/MIMEMessage.py index 2042dd9..3021934 100644 --- a/Lib/email/MIMEMessage.py +++ b/Lib/email/MIMEMessage.py @@ -1,8 +1,8 @@ -# Copyright (C) 2001,2002 Python Software Foundation -# Author: barry@zope.com (Barry Warsaw) +# Copyright (C) 2001-2004 Python Software Foundation +# Author: Barry Warsaw +# Contact: email-sig@python.org -"""Class representing message/* MIME documents. -""" +"""Class representing message/* MIME documents.""" from email import Message from email.MIMENonMultipart import MIMENonMultipart @@ -24,7 +24,7 @@ class MIMEMessage(MIMENonMultipart): """ MIMENonMultipart.__init__(self, 'message', _subtype) if not isinstance(_msg, Message.Message): - raise TypeError, 'Argument is not an instance of Message' + raise TypeError('Argument is not an instance of Message') # It's convenient to use this base class method. We need to do it # this way or we'll get an exception Message.Message.attach(self, _msg) diff --git a/Lib/email/MIMEMultipart.py b/Lib/email/MIMEMultipart.py index ea6ae0c..9072a64 100644 --- a/Lib/email/MIMEMultipart.py +++ b/Lib/email/MIMEMultipart.py @@ -1,8 +1,8 @@ # Copyright (C) 2002-2004 Python Software Foundation -# Author: barry@python.org (Barry Warsaw) +# Author: Barry Warsaw +# Contact: email-sig@python.org -"""Base class for MIME multipart/* type messages. -""" +"""Base class for MIME multipart/* type messages.""" from email import MIMEBase diff --git a/Lib/email/MIMENonMultipart.py b/Lib/email/MIMENonMultipart.py index 1b3bcfd..4195d2a 100644 --- a/Lib/email/MIMENonMultipart.py +++ b/Lib/email/MIMENonMultipart.py @@ -1,8 +1,8 @@ -# Copyright (C) 2002 Python Software Foundation -# Author: barry@zope.com (Barry Warsaw) +# Copyright (C) 2002-2004 Python Software Foundation +# Author: Barry Warsaw +# Contact: email-sig@python.org -"""Base class for MIME type messages that are not multipart. -""" +"""Base class for MIME type messages that are not multipart.""" from email import Errors from email import MIMEBase diff --git a/Lib/email/MIMEText.py b/Lib/email/MIMEText.py index d049ad9..5ef1876 100644 --- a/Lib/email/MIMEText.py +++ b/Lib/email/MIMEText.py @@ -1,10 +1,9 @@ -# Copyright (C) 2001,2002 Python Software Foundation -# Author: barry@zope.com (Barry Warsaw) +# Copyright (C) 2001-2004 Python Software Foundation +# Author: Barry Warsaw +# Contact: email-sig@python.org -"""Class representing text/* type MIME documents. -""" +"""Class representing text/* type MIME documents.""" -import warnings from email.MIMENonMultipart import MIMENonMultipart from email.Encoders import encode_7or8bit @@ -13,8 +12,7 @@ from email.Encoders import encode_7or8bit class MIMEText(MIMENonMultipart): """Class for generating text/* type MIME documents.""" - def __init__(self, _text, _subtype='plain', _charset='us-ascii', - _encoder=None): + def __init__(self, _text, _subtype='plain', _charset='us-ascii'): """Create a text/* type MIME document. _text is the string for this message object. @@ -24,22 +22,7 @@ class MIMEText(MIMENonMultipart): _charset is the character set parameter added to the Content-Type header. This defaults to "us-ascii". Note that as a side-effect, the Content-Transfer-Encoding header will also be set. - - The use of the _encoder is deprecated. The encoding of the payload, - and the setting of the character set parameter now happens implicitly - based on the _charset argument. If _encoder is supplied, then a - DeprecationWarning is used, and the _encoder functionality may - override any header settings indicated by _charset. This is probably - not what you want. """ MIMENonMultipart.__init__(self, 'text', _subtype, **{'charset': _charset}) self.set_payload(_text, _charset) - if _encoder is not None: - warnings.warn('_encoder argument is obsolete.', - DeprecationWarning, 2) - # Because set_payload() with a _charset will set its own - # Content-Transfer-Encoding header, we need to delete the - # existing one or will end up with two of them. :( - del self['content-transfer-encoding'] - _encoder(self) diff --git a/Lib/email/Message.py b/Lib/email/Message.py index 2245f9b..d23a26f 100644 --- a/Lib/email/Message.py +++ b/Lib/email/Message.py @@ -1,5 +1,6 @@ # Copyright (C) 2001-2004 Python Software Foundation -# Author: barry@python.org (Barry Warsaw) +# Author: Barry Warsaw +# Contact: email-sig@python.org """Basic message object for the email package object model.""" @@ -69,6 +70,10 @@ def _parseparam(s): def _unquotevalue(value): + # This is different than Utils.collapse_rfc2231_value() because it doesn't + # try to convert the value to a unicode. Message.get_param() and + # Message.get_params() are both currently defined to return the tuple in + # the face of RFC 2231 parameters. if isinstance(value, tuple): return value[0], value[1], Utils.unquote(value[2]) else: @@ -98,6 +103,7 @@ class Message: self._charset = None # Defaults for multipart messages self.preamble = self.epilogue = None + self.defects = [] # Default content type self._default_type = 'text/plain' @@ -124,9 +130,7 @@ class Message: def is_multipart(self): """Return True if the message consists of multiple parts.""" - if isinstance(self._payload, list): - return True - return False + return isinstance(self._payload, list) # # Unix From_ line @@ -140,26 +144,6 @@ class Message: # # Payload manipulation. # - def add_payload(self, payload): - """Add the given payload to the current payload. - - If the current payload is empty, then the current payload will be made - a scalar, set to the given value. - - Note: This method is deprecated. Use .attach() instead. - """ - warnings.warn('add_payload() is deprecated, use attach() instead.', - DeprecationWarning, 2) - if self._payload is None: - self._payload = payload - elif isinstance(self._payload, list): - self._payload.append(payload) - elif self.get_main_type() not in (None, 'multipart'): - raise Errors.MultipartConversionError( - 'Message main content type must be "multipart" or missing') - else: - self._payload = [self._payload, payload] - def attach(self, payload): """Add the given payload to the current payload. @@ -195,7 +179,7 @@ class Message: if i is None: payload = self._payload elif not isinstance(self._payload, list): - raise TypeError, 'Expected list, got %s' % type(self._payload) + raise TypeError('Expected list, got %s' % type(self._payload)) else: payload = self._payload[i] if decode: @@ -254,7 +238,7 @@ class Message: if isinstance(charset, str): charset = Charset.Charset(charset) if not isinstance(charset, Charset.Charset): - raise TypeError, charset + raise TypeError(charset) # BAW: should we accept strings that can serve as arguments to the # Charset constructor? self._charset = charset @@ -267,9 +251,9 @@ class Message: self.set_param('charset', charset.get_output_charset()) if not self.has_key('Content-Transfer-Encoding'): cte = charset.get_body_encoding() - if callable(cte): + try: cte(self) - else: + except TypeError: self.add_header('Content-Transfer-Encoding', cte) def get_charset(self): @@ -290,7 +274,7 @@ class Message: Return None if the header is missing instead of raising an exception. Note that if the header appeared multiple times, exactly which - occurrance gets returned is undefined. Use getall() to get all + occurrance gets returned is undefined. Use get_all() to get all the values matching a header field name. """ return self.get(name) @@ -320,7 +304,7 @@ class Message: def has_key(self, name): """Return true if the message contains the header.""" - missing = [] + missing = object() return self.get(name, missing) is not missing def keys(self): @@ -422,11 +406,10 @@ class Message: self._headers[i] = (k, _value) break else: - raise KeyError, _name + raise KeyError(_name) # - # These methods are silently deprecated in favor of get_content_type() and - # friends (see below). They will be noisily deprecated in email 3.0. + # Deprecated methods. These will be removed in email 3.1. # def get_type(self, failobj=None): @@ -436,7 +419,9 @@ class Message: string of the form `maintype/subtype'. If there was no Content-Type header in the message, failobj is returned (defaults to None). """ - missing = [] + warnings.warn('get_type() deprecated; use get_content_type()', + DeprecationWarning, 2) + missing = object() value = self.get('content-type', missing) if value is missing: return failobj @@ -444,7 +429,9 @@ class Message: def get_main_type(self, failobj=None): """Return the message's main content type if present.""" - missing = [] + warnings.warn('get_main_type() deprecated; use get_content_maintype()', + DeprecationWarning, 2) + missing = object() ctype = self.get_type(missing) if ctype is missing: return failobj @@ -454,7 +441,9 @@ class Message: def get_subtype(self, failobj=None): """Return the message's content subtype if present.""" - missing = [] + warnings.warn('get_subtype() deprecated; use get_content_subtype()', + DeprecationWarning, 2) + missing = object() ctype = self.get_type(missing) if ctype is missing: return failobj @@ -479,7 +468,7 @@ class Message: appears inside a multipart/digest container, in which case it would be message/rfc822. """ - missing = [] + missing = object() value = self.get('content-type', missing) if value is missing: # This should have no parameters @@ -529,7 +518,7 @@ class Message: def _get_params_preserve(self, failobj, header): # Like get_params() but preserves the quoting of values. BAW: # should this be part of the public interface? - missing = [] + missing = object() value = self.get(header, missing) if value is missing: return failobj @@ -560,7 +549,7 @@ class Message: header. Optional header is the header to search instead of Content-Type. If unquote is True, the value is unquoted. """ - missing = [] + missing = object() params = self._get_params_preserve(missing, header) if params is missing: return failobj @@ -713,17 +702,11 @@ class Message: The filename is extracted from the Content-Disposition header's `filename' parameter, and it is unquoted. """ - missing = [] + missing = object() filename = self.get_param('filename', missing, 'content-disposition') if filename is missing: return failobj - if isinstance(filename, tuple): - # It's an RFC 2231 encoded parameter - newvalue = _unquotevalue(filename) - return unicode(newvalue[2], newvalue[0] or 'us-ascii') - else: - newvalue = _unquotevalue(filename.strip()) - return newvalue + return Utils.collapse_rfc2231_value(filename).strip() def get_boundary(self, failobj=None): """Return the boundary associated with the payload if present. @@ -731,15 +714,11 @@ class Message: The boundary is extracted from the Content-Type header's `boundary' parameter, and it is unquoted. """ - missing = [] + missing = object() boundary = self.get_param('boundary', missing) if boundary is missing: return failobj - if isinstance(boundary, tuple): - # RFC 2231 encoded, so decode. It better end up as ascii - charset = boundary[0] or 'us-ascii' - return unicode(boundary[2], charset).encode('us-ascii') - return _unquotevalue(boundary.strip()) + return Utils.collapse_rfc2231_value(boundary).strip() def set_boundary(self, boundary): """Set the boundary parameter in Content-Type to 'boundary'. @@ -751,7 +730,7 @@ class Message: HeaderParseError is raised if the message has no Content-Type header. """ - missing = [] + missing = object() params = self._get_params_preserve(missing, 'content-type') if params is missing: # There was no Content-Type header, and we don't know what type @@ -793,7 +772,7 @@ class Message: Content-Type header, or if that header has no charset parameter, failobj is returned. """ - missing = [] + missing = object() charset = self.get_param('charset', missing) if charset is missing: return failobj diff --git a/Lib/email/Parser.py b/Lib/email/Parser.py index 8c5661d..0c05224 100644 --- a/Lib/email/Parser.py +++ b/Lib/email/Parser.py @@ -4,17 +4,15 @@ """A parser of RFC 2822 and MIME email messages.""" -import re +import warnings from cStringIO import StringIO from email.FeedParser import FeedParser from email.Message import Message -NLCRE = re.compile('\r\n|\r|\n') - class Parser: - def __init__(self, _class=Message, strict=False): + def __init__(self, *args, **kws): """Parser of RFC 2822 and MIME email messages. Creates an in-memory object tree representing the email message, which @@ -29,14 +27,28 @@ class Parser: _class is the class to instantiate for new message objects when they must be created. This class must have a constructor that can take zero arguments. Default is Message.Message. - - Optional strict tells the parser to be strictly RFC compliant or to be - more forgiving in parsing of ill-formatted MIME documents. When - non-strict mode is used, the parser will try to make up for missing or - erroneous boundaries and other peculiarities seen in the wild. - Default is non-strict parsing. """ - self._class = _class + if len(args) >= 1: + if '_class' in kws: + raise TypeError("Multiple values for keyword arg '_class'") + kws['_class'] = args[0] + if len(args) == 2: + if 'strict' in kws: + raise TypeError("Multiple values for keyword arg 'strict'") + kws['strict'] = args[1] + if len(args) > 2: + raise TypeError('Too many arguments') + if '_class' in kws: + self._class = kws['_class'] + del kws['_class'] + else: + self._class = Message + if 'strict' in kws: + warnings.warn("'strict' argument is deprecated (and ignored)", + DeprecationWarning, 2) + del kws['strict'] + if kws: + raise TypeError('Unexpected keyword arguments') def parse(self, fp, headersonly=False): """Create a message structure from the data in a file. diff --git a/Lib/email/Utils.py b/Lib/email/Utils.py index 3a4bbc8..e786d26 100644 --- a/Lib/email/Utils.py +++ b/Lib/email/Utils.py @@ -1,5 +1,6 @@ # Copyright (C) 2001-2004 Python Software Foundation -# Author: barry@python.org (Barry Warsaw) +# Author: Barry Warsaw +# Contact: email-sig@python.org """Miscellaneous utilities.""" @@ -80,12 +81,6 @@ def formataddr(pair): return '%s%s%s <%s>' % (quotes, name, quotes, address) return address -# For backwards compatibility -def dump_address_pair(pair): - warnings.warn('Use email.Utils.formataddr() instead', - DeprecationWarning, 2) - return formataddr(pair) - def getaddresses(fieldvalues): @@ -107,46 +102,6 @@ ecre = re.compile(r''' ''', re.VERBOSE | re.IGNORECASE) -def decode(s): - """Return a decoded string according to RFC 2047, as a unicode string. - - NOTE: This function is deprecated. Use Header.decode_header() instead. - """ - warnings.warn('Use Header.decode_header() instead.', DeprecationWarning, 2) - # Intra-package import here to avoid circular import problems. - from email.Header import decode_header - L = decode_header(s) - if not isinstance(L, list): - # s wasn't decoded - return s - - rtn = [] - for atom, charset in L: - if charset is None: - rtn.append(atom) - else: - # Convert the string to Unicode using the given encoding. Leave - # Unicode conversion errors to strict. - rtn.append(unicode(atom, charset)) - # Now that we've decoded everything, we just need to join all the parts - # together into the final string. - return UEMPTYSTRING.join(rtn) - - - -def encode(s, charset='iso-8859-1', encoding='q'): - """Encode a string according to RFC 2047.""" - warnings.warn('Use Header.Header.encode() instead.', DeprecationWarning, 2) - encoding = encoding.lower() - if encoding == 'q': - estr = _qencode(s) - elif encoding == 'b': - estr = _bencode(s) - else: - raise ValueError, 'Illegal encoding code: ' + encoding - return '=?%s?%s?%s?=' % (charset.lower(), encoding, estr) - - def formatdate(timeval=None, localtime=False): """Returns a date string as specified by RFC 2822, e.g.: @@ -179,7 +134,7 @@ def formatdate(timeval=None, localtime=False): sign = '-' else: sign = '+' - zone = '%s%02d%02d' % (sign, hours, minutes / 60) + zone = '%s%02d%02d' % (sign, hours, minutes // 60) else: now = time.gmtime(timeval) # Timezone offset is always -0000 @@ -314,3 +269,16 @@ def decode_params(params): new_params.append( (name, (charset, language, '"%s"' % quote(value)))) return new_params + +def collapse_rfc2231_value(value, errors='replace', + fallback_charset='us-ascii'): + if isinstance(value, tuple): + rawval = unquote(value[2]) + charset = value[0] or 'us-ascii' + try: + return unicode(rawval, charset, errors) + except LookupError: + # XXX charset is unknown to Python. + return unicode(rawval, fallback_charset, errors) + else: + return unquote(value) diff --git a/Lib/email/__init__.py b/Lib/email/__init__.py index 74b9b73..8a46fec 100644 --- a/Lib/email/__init__.py +++ b/Lib/email/__init__.py @@ -1,5 +1,6 @@ # Copyright (C) 2001-2004 Python Software Foundation -# Author: barry@python.org (Barry Warsaw) +# Author: Barry Warsaw +# Contact: email-sig@python.org """A package for parsing, handling, and generating email messages.""" @@ -33,25 +34,19 @@ __all__ = [ # Some convenience routines. Don't import Parser and Message as side-effects # of importing email since those cascadingly import most of the rest of the # email package. -def message_from_string(s, _class=None, strict=False): +def message_from_string(s, *args, **kws): """Parse a string into a Message object model. Optional _class and strict are passed to the Parser constructor. """ from email.Parser import Parser - if _class is None: - from email.Message import Message - _class = Message - return Parser(_class, strict=strict).parsestr(s) + return Parser(*args, **kws).parsestr(s) -def message_from_file(fp, _class=None, strict=False): +def message_from_file(fp, *args, **kws): """Read a file and parse its contents into a Message object model. Optional _class and strict are passed to the Parser constructor. """ from email.Parser import Parser - if _class is None: - from email.Message import Message - _class = Message - return Parser(_class, strict=strict).parse(fp) + return Parser(*args, **kws).parse(fp) diff --git a/Lib/email/_parseaddr.py b/Lib/email/_parseaddr.py index 4a31aff..f6efcd5 100644 --- a/Lib/email/_parseaddr.py +++ b/Lib/email/_parseaddr.py @@ -1,4 +1,5 @@ # Copyright (C) 2002-2004 Python Software Foundation +# Contact: email-sig@python.org """Email address parsing code. @@ -115,7 +116,7 @@ def parsedate_tz(data): tzoffset = -tzoffset else: tzsign = 1 - tzoffset = tzsign * ( (tzoffset/100)*3600 + (tzoffset % 100)*60) + tzoffset = tzsign * ( (tzoffset//100)*3600 + (tzoffset % 100)*60) tuple = (yy, mm, dd, thh, tmm, tss, 0, 1, 0, tzoffset) return tuple diff --git a/Lib/email/base64MIME.py b/Lib/email/base64MIME.py index af85949..6ed1d53 100644 --- a/Lib/email/base64MIME.py +++ b/Lib/email/base64MIME.py @@ -1,5 +1,6 @@ -# Copyright (C) 2002 Python Software Foundation -# Author: che@debian.org (Ben Gertzfield) +# Copyright (C) 2002-2004 Python Software Foundation +# Author: Ben Gertzfield +# Contact: email-sig@python.org """Base64 content transfer encoding per RFCs 2045-2047. diff --git a/Lib/email/quopriMIME.py b/Lib/email/quopriMIME.py index 2802bc2..a9b5d49 100644 --- a/Lib/email/quopriMIME.py +++ b/Lib/email/quopriMIME.py @@ -1,5 +1,6 @@ # Copyright (C) 2001-2004 Python Software Foundation -# Author: che@debian.org (Ben Gertzfield) +# Author: Ben Gertzfield +# Contact: email-sig@python.org """Quoted-printable content transfer encoding per RFCs 2045-2047. @@ -43,12 +44,12 @@ bqre = re.compile(r'[^ !-<>-~\t]') # Helpers def header_quopri_check(c): """Return True if the character should be escaped with header quopri.""" - return hqre.match(c) and True + return bool(hqre.match(c)) def body_quopri_check(c): """Return True if the character should be escaped with body quopri.""" - return bqre.match(c) and True + return bool(bqre.match(c)) def header_quopri_len(s): diff --git a/Lib/email/test/__init__.py b/Lib/email/test/__init__.py index b8a7774..e69de29 100644 --- a/Lib/email/test/__init__.py +++ b/Lib/email/test/__init__.py @@ -1,2 +0,0 @@ -# Copyright (C) 2002 Python Software Foundation -# Author: barry@zope.com (Barry Warsaw) diff --git a/Lib/email/test/data/msg_41.txt b/Lib/email/test/data/msg_41.txt new file mode 100644 index 0000000..76cdd1c --- /dev/null +++ b/Lib/email/test/data/msg_41.txt @@ -0,0 +1,8 @@ +From: "Allison Dunlap" +To: yyy@example.com +Subject: 64423 +Date: Sun, 11 Jul 2004 16:09:27 -0300 +MIME-Version: 1.0 +Content-Type: multipart/alternative; + +Blah blah blah diff --git a/Lib/email/test/test_email.py b/Lib/email/test/test_email.py index d079b9e..a55ef5c 100644 --- a/Lib/email/test/test_email.py +++ b/Lib/email/test/test_email.py @@ -1,4 +1,5 @@ # Copyright (C) 2001-2004 Python Software Foundation +# Contact: email-sig@python.org # email package unit tests import os @@ -51,25 +52,20 @@ def openfile(filename, mode='r'): # Base test class class TestEmailBase(unittest.TestCase): - if hasattr(difflib, 'ndiff'): - # Python 2.2 and beyond - def ndiffAssertEqual(self, first, second): - """Like failUnlessEqual except use ndiff for readable output.""" - if first <> second: - sfirst = str(first) - ssecond = str(second) - diff = difflib.ndiff(sfirst.splitlines(), ssecond.splitlines()) - fp = StringIO() - print >> fp, NL, NL.join(diff) - raise self.failureException, fp.getvalue() - else: - # Python 2.1 - ndiffAssertEqual = unittest.TestCase.assertEqual - - def _msgobj(self, filename, strict=False): + def ndiffAssertEqual(self, first, second): + """Like failUnlessEqual except use ndiff for readable output.""" + if first <> second: + sfirst = str(first) + ssecond = str(second) + diff = difflib.ndiff(sfirst.splitlines(), ssecond.splitlines()) + fp = StringIO() + print >> fp, NL, NL.join(diff) + raise self.failureException, fp.getvalue() + + def _msgobj(self, filename): fp = openfile(findfile(filename)) try: - msg = email.message_from_file(fp, strict=strict) + msg = email.message_from_file(fp) finally: fp.close() return msg @@ -493,44 +489,12 @@ class TestMessageAPI(TestEmailBase): # Test the email.Encoders module class TestEncoders(unittest.TestCase): - def test_encode_noop(self): - eq = self.assertEqual - msg = MIMEText('hello world', _encoder=Encoders.encode_noop) - eq(msg.get_payload(), 'hello world') - - def test_encode_7bit(self): - eq = self.assertEqual - msg = MIMEText('hello world', _encoder=Encoders.encode_7or8bit) - eq(msg.get_payload(), 'hello world') - eq(msg['content-transfer-encoding'], '7bit') - msg = MIMEText('hello \x7f world', _encoder=Encoders.encode_7or8bit) - eq(msg.get_payload(), 'hello \x7f world') - eq(msg['content-transfer-encoding'], '7bit') - - def test_encode_8bit(self): - eq = self.assertEqual - msg = MIMEText('hello \x80 world', _encoder=Encoders.encode_7or8bit) - eq(msg.get_payload(), 'hello \x80 world') - eq(msg['content-transfer-encoding'], '8bit') - def test_encode_empty_payload(self): eq = self.assertEqual msg = Message() msg.set_charset('us-ascii') eq(msg['content-transfer-encoding'], '7bit') - def test_encode_base64(self): - eq = self.assertEqual - msg = MIMEText('hello world', _encoder=Encoders.encode_base64) - eq(msg.get_payload(), 'aGVsbG8gd29ybGQ=') - eq(msg['content-transfer-encoding'], 'base64') - - def test_encode_quoted_printable(self): - eq = self.assertEqual - msg = MIMEText('hello world', _encoder=Encoders.encode_quopri) - eq(msg.get_payload(), 'hello=20world') - eq(msg['content-transfer-encoding'], 'quoted-printable') - def test_default_cte(self): eq = self.assertEqual msg = MIMEText('hello world') @@ -932,16 +896,6 @@ class TestMIMEAudio(unittest.TestCase): au = MIMEAudio(self._audiodata, 'fish') self.assertEqual(im.get_type(), 'audio/fish') - def test_custom_encoder(self): - eq = self.assertEqual - def encoder(msg): - orig = msg.get_payload() - msg.set_payload(0) - msg['Content-Transfer-Encoding'] = 'broken64' - au = MIMEAudio(self._audiodata, _encoder=encoder) - eq(au.get_payload(), 0) - eq(au['content-transfer-encoding'], 'broken64') - def test_add_header(self): eq = self.assertEqual unless = self.failUnless @@ -985,16 +939,6 @@ class TestMIMEImage(unittest.TestCase): im = MIMEImage(self._imgdata, 'fish') self.assertEqual(im.get_type(), 'image/fish') - def test_custom_encoder(self): - eq = self.assertEqual - def encoder(msg): - orig = msg.get_payload() - msg.set_payload(0) - msg['Content-Transfer-Encoding'] = 'broken64' - im = MIMEImage(self._imgdata, _encoder=encoder) - eq(im.get_payload(), 0) - eq(im['content-transfer-encoding'], 'broken64') - def test_add_header(self): eq = self.assertEqual unless = self.failUnless @@ -1396,8 +1340,8 @@ class TestNonConformant(TestEmailBase): eq = self.assertEqual msg = self._msgobj('msg_14.txt') eq(msg.get_type(), 'text') - eq(msg.get_main_type(), None) - eq(msg.get_subtype(), None) + eq(msg.get_content_maintype(), 'text') + eq(msg.get_content_subtype(), 'plain') def test_same_boundary_inner_outer(self): unless = self.failUnless @@ -1406,14 +1350,17 @@ class TestNonConformant(TestEmailBase): inner = msg.get_payload(0) unless(hasattr(inner, 'defects')) self.assertEqual(len(inner.defects), 1) - unless(isinstance(inner.defects[0], Errors.StartBoundaryNotFound)) + unless(isinstance(inner.defects[0], + Errors.StartBoundaryNotFoundDefect)) def test_multipart_no_boundary(self): unless = self.failUnless msg = self._msgobj('msg_25.txt') unless(isinstance(msg.get_payload(), str)) - self.assertEqual(len(msg.defects), 1) - unless(isinstance(msg.defects[0], Errors.NoBoundaryInMultipart)) + self.assertEqual(len(msg.defects), 2) + unless(isinstance(msg.defects[0], Errors.NoBoundaryInMultipartDefect)) + unless(isinstance(msg.defects[1], + Errors.MultipartInvariantViolationDefect)) def test_invalid_content_type(self): eq = self.assertEqual @@ -1464,40 +1411,19 @@ Subject: here's something interesting counter to RFC 2822, there's no separating newline here """) + def test_lying_multipart(self): + unless = self.failUnless + msg = self._msgobj('msg_41.txt') + unless(hasattr(msg, 'defects')) + self.assertEqual(len(msg.defects), 2) + unless(isinstance(msg.defects[0], Errors.NoBoundaryInMultipartDefect)) + unless(isinstance(msg.defects[1], + Errors.MultipartInvariantViolationDefect)) + # Test RFC 2047 header encoding and decoding class TestRFC2047(unittest.TestCase): - def test_iso_8859_1(self): - eq = self.assertEqual - s = '=?iso-8859-1?q?this=20is=20some=20text?=' - eq(Utils.decode(s), 'this is some text') - s = '=?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?=' - eq(Utils.decode(s), u'Keld J\xf8rn Simonsen') - s = '=?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?=' \ - '=?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?=' - eq(Utils.decode(s), 'If you can read this you understand the example.') - s = '=?iso-8859-8?b?7eXs+SDv4SDp7Oj08A==?=' - eq(Utils.decode(s), - u'\u05dd\u05d5\u05dc\u05e9 \u05df\u05d1 \u05d9\u05dc\u05d8\u05e4\u05e0') - s = '=?iso-8859-1?q?this=20is?= =?iso-8859-1?q?some=20text?=' - eq(Utils.decode(s), u'this issome text') - s = '=?iso-8859-1?q?this=20is_?= =?iso-8859-1?q?some=20text?=' - eq(Utils.decode(s), u'this is some text') - - def test_encode_header(self): - eq = self.assertEqual - s = 'this is some text' - eq(Utils.encode(s), '=?iso-8859-1?q?this=20is=20some=20text?=') - s = 'Keld_J\xf8rn_Simonsen' - eq(Utils.encode(s), '=?iso-8859-1?q?Keld_J=F8rn_Simonsen?=') - s1 = 'If you can read this yo' - s2 = 'u understand the example.' - eq(Utils.encode(s1, encoding='b'), - '=?iso-8859-1?b?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?=') - eq(Utils.encode(s2, charset='iso-8859-2', encoding='b'), - '=?iso-8859-2?b?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?=') - def test_rfc2047_multiline(self): eq = self.assertEqual s = """Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz @@ -1517,10 +1443,7 @@ class TestRFC2047(unittest.TestCase): s = '=?ISO-8859-1?Q?Andr=E9?= Pirard ' dh = decode_header(s) eq(dh, [('Andr\xe9', 'iso-8859-1'), ('Pirard ', None)]) - # Python 2.1's unicode() builtin doesn't call the object's - # __unicode__() method. Use the following alternative instead. - #hu = unicode(make_header(dh)).encode('latin-1') - hu = make_header(dh).__unicode__().encode('latin-1') + hu = unicode(make_header(dh)).encode('latin-1') eq(hu, 'Andr\xe9 Pirard ') def test_whitespace_eater_unicode_2(self): @@ -1870,8 +1793,8 @@ class TestIdempotent(TestEmailBase): eq = self.assertEquals msg, text = self._msgobj('msg_01.txt') eq(msg.get_type(), 'text/plain') - eq(msg.get_main_type(), 'text') - eq(msg.get_subtype(), 'plain') + eq(msg.get_content_maintype(), 'text') + eq(msg.get_content_subtype(), 'plain') eq(msg.get_params()[1], ('charset', 'us-ascii')) eq(msg.get_param('charset'), 'us-ascii') eq(msg.preamble, None) @@ -2712,11 +2635,7 @@ class TestHeader(TestEmailBase): eq(decode_header(enc), [(g_head, "iso-8859-1"), (cz_head, "iso-8859-2"), (utf8_head, "utf-8")]) - # Test for conversion to unicode. BAW: Python 2.1 doesn't support the - # __unicode__() protocol, so do things this way for compatibility. - ustr = h.__unicode__() - # For Python 2.2 and beyond - #ustr = unicode(h) + ustr = unicode(h) eq(ustr.encode('utf-8'), 'Die Mieter treten hier ein werden mit einem Foerderband ' 'komfortabel den Korridor entlang, an s\xc3\xbcdl\xc3\xbcndischen ' @@ -2956,6 +2875,15 @@ Content-Type: text/plain; self.assertEqual(msg.get_content_charset(), 'this is even more ***fun*** is it not.pdf') + def test_rfc2231_unknown_encoding(self): + m = """\ +Content-Transfer-Encoding: 8bit +Content-Disposition: inline; filename*0=X-UNKNOWN''myfile.txt + +""" + msg = email.message_from_string(m) + self.assertEqual(msg.get_filename(), 'myfile.txt') + def _testclasses(): diff --git a/Misc/NEWS b/Misc/NEWS index 448ef29..cf7a321 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -34,6 +34,19 @@ Extension modules Library ------- +- Updates for the email package: + + All deprecated APIs that in email 2.x issued warnings have been removed: + _encoder argument to the MIMEText constructor, Message.add_payload(), + Utils.dump_address_pair(), Utils.decode(), Utils.encode() + + New deprecations: Generator.__call__(), Message.get_type(), + Message.get_main_type(), Message.get_subtype(), the 'strict' argument to + the Parser constructor. These will be removed in email 3.1. + + Support for Python earlier than 2.3 has been removed (see PEP 291). + + All defect classes have been renamed to end in 'Defect'. + + Some FeedParser fixes; also a MultipartInvariantViolationDefect will be + added to messages that claim to be multipart but really aren't. + + Updates to documentation. + - re's findall() and finditer() functions now take an optional flags argument just like the compile(), search(), and match() functions. Also, documented the previously existing start and stop parameters for the findall() and -- cgit v0.12