diff options
25 files changed, 2065 insertions, 1657 deletions
diff --git a/Doc/includes/email-alternative-new-api.py b/Doc/includes/email-alternative-new-api.py deleted file mode 100644 index 321f727..0000000 --- a/Doc/includes/email-alternative-new-api.py +++ /dev/null @@ -1,56 +0,0 @@ -#!/usr/bin/env python3 - -import smtplib - -from email.message import EmailMessage -from email.headerregistry import Address -from email.utils import make_msgid - -# Create the base text message. -msg = EmailMessage() -msg['Subject'] = "Ayons asperges pour le déjeuner" -msg['From'] = Address("Pepé Le Pew", "pepe", "example.com") -msg['To'] = (Address("Penelope Pussycat", "penelope", "example.com"), - Address("Fabrette Pussycat", "fabrette", "example.com")) -msg.set_content("""\ -Salut! - -Cela ressemble à un excellent recipie[1] déjeuner. - -[1] http://www.yummly.com/recipe/Roasted-Asparagus-Epicurious-203718 - ---Pepé -""") - -# Add the html version. This converts the message into a multipart/alternative -# container, with the original text message as the first part and the new html -# message as the second part. -asparagus_cid = make_msgid() -msg.add_alternative("""\ -<html> - <head></head> - <body> - <p>Salut!<\p> - <p>Cela ressemble à un excellent - <a href="http://www.yummly.com/recipe/Roasted-Asparagus-Epicurious-203718> - recipie - </a> déjeuner. - </p> - <img src="cid:{asparagus_cid}" \> - </body> -</html> -""".format(asparagus_cid=asparagus_cid[1:-1]), subtype='html') -# note that we needed to peel the <> off the msgid for use in the html. - -# Now add the related image to the html part. -with open("roasted-asparagus.jpg", 'rb') as img: - msg.get_payload()[1].add_related(img.read(), 'image', 'jpeg', - cid=asparagus_cid) - -# Make a local copy of what we are going to send. -with open('outgoing.msg', 'wb') as f: - f.write(bytes(msg)) - -# Send the message via local SMTP server. -with smtplib.SMTP('localhost') as s: - s.send_message(msg) diff --git a/Doc/includes/email-alternative.py b/Doc/includes/email-alternative.py index 85070f3..321f727 100755..100644 --- a/Doc/includes/email-alternative.py +++ b/Doc/includes/email-alternative.py @@ -2,47 +2,55 @@ import smtplib -from email.mime.multipart import MIMEMultipart -from email.mime.text import MIMEText - -# me == my email address -# you == recipient's email address -me = "my@email.com" -you = "your@email.com" - -# Create message container - the correct MIME type is multipart/alternative. -msg = MIMEMultipart('alternative') -msg['Subject'] = "Link" -msg['From'] = me -msg['To'] = you - -# Create the body of the message (a plain-text and an HTML version). -text = "Hi!\nHow are you?\nHere is the link you wanted:\nhttps://www.python.org" -html = """\ +from email.message import EmailMessage +from email.headerregistry import Address +from email.utils import make_msgid + +# Create the base text message. +msg = EmailMessage() +msg['Subject'] = "Ayons asperges pour le déjeuner" +msg['From'] = Address("Pepé Le Pew", "pepe", "example.com") +msg['To'] = (Address("Penelope Pussycat", "penelope", "example.com"), + Address("Fabrette Pussycat", "fabrette", "example.com")) +msg.set_content("""\ +Salut! + +Cela ressemble à un excellent recipie[1] déjeuner. + +[1] http://www.yummly.com/recipe/Roasted-Asparagus-Epicurious-203718 + +--Pepé +""") + +# Add the html version. This converts the message into a multipart/alternative +# container, with the original text message as the first part and the new html +# message as the second part. +asparagus_cid = make_msgid() +msg.add_alternative("""\ <html> <head></head> <body> - <p>Hi!<br> - How are you?<br> - Here is the <a href="https://www.python.org">link</a> you wanted. + <p>Salut!<\p> + <p>Cela ressemble à un excellent + <a href="http://www.yummly.com/recipe/Roasted-Asparagus-Epicurious-203718> + recipie + </a> déjeuner. </p> + <img src="cid:{asparagus_cid}" \> </body> </html> -""" +""".format(asparagus_cid=asparagus_cid[1:-1]), subtype='html') +# note that we needed to peel the <> off the msgid for use in the html. -# Record the MIME types of both parts - text/plain and text/html. -part1 = MIMEText(text, 'plain') -part2 = MIMEText(html, 'html') +# Now add the related image to the html part. +with open("roasted-asparagus.jpg", 'rb') as img: + msg.get_payload()[1].add_related(img.read(), 'image', 'jpeg', + cid=asparagus_cid) -# Attach parts into message container. -# According to RFC 2046, the last part of a multipart message, in this case -# the HTML message, is best and preferred. -msg.attach(part1) -msg.attach(part2) +# Make a local copy of what we are going to send. +with open('outgoing.msg', 'wb') as f: + f.write(bytes(msg)) # Send the message via local SMTP server. -s = smtplib.SMTP('localhost') -# sendmail function takes 3 arguments: sender's address, recipient's address -# and message to send - here it is sent as one string. -s.sendmail(me, you, msg.as_string()) -s.quit() +with smtplib.SMTP('localhost') as s: + s.send_message(msg) diff --git a/Doc/includes/email-dir.py b/Doc/includes/email-dir.py index 3c7c770..0dcfbfb 100644 --- a/Doc/includes/email-dir.py +++ b/Doc/includes/email-dir.py @@ -3,22 +3,14 @@ """Send the contents of a directory as a MIME message.""" import os -import sys import smtplib # For guessing MIME type based on file name extension import mimetypes from argparse import ArgumentParser -from email import encoders -from email.message import Message -from email.mime.audio import MIMEAudio -from email.mime.base import MIMEBase -from email.mime.image import MIMEImage -from email.mime.multipart import MIMEMultipart -from email.mime.text import MIMEText - -COMMASPACE = ', ' +from email.message import EmailMessage +from email.policy import SMTP def main(): @@ -47,12 +39,12 @@ must be running an SMTP server. directory = args.directory if not directory: directory = '.' - # Create the enclosing (outer) message - outer = MIMEMultipart() - outer['Subject'] = 'Contents of directory %s' % os.path.abspath(directory) - outer['To'] = COMMASPACE.join(args.recipients) - outer['From'] = args.sender - outer.preamble = 'You will not see this in a MIME-aware mail reader.\n' + # Create the message + msg = EmailMessage() + msg['Subject'] = 'Contents of directory %s' % os.path.abspath(directory) + msg['To'] = ', '.join(args.recipients) + msg['From'] = args.sender + msg.preamble = 'You will not see this in a MIME-aware mail reader.\n' for filename in os.listdir(directory): path = os.path.join(directory, filename) @@ -67,33 +59,18 @@ must be running an SMTP server. # use a generic bag-of-bits type. ctype = 'application/octet-stream' maintype, subtype = ctype.split('/', 1) - if maintype == 'text': - with open(path) as fp: - # Note: we should handle calculating the charset - msg = MIMEText(fp.read(), _subtype=subtype) - elif maintype == 'image': - with open(path, 'rb') as fp: - msg = MIMEImage(fp.read(), _subtype=subtype) - elif maintype == 'audio': - with open(path, 'rb') as fp: - msg = MIMEAudio(fp.read(), _subtype=subtype) - else: - with open(path, 'rb') as fp: - msg = MIMEBase(maintype, subtype) - msg.set_payload(fp.read()) - # Encode the payload using Base64 - encoders.encode_base64(msg) - # Set the filename parameter - msg.add_header('Content-Disposition', 'attachment', filename=filename) - outer.attach(msg) + with open(path, 'rb') as fp: + msg.add_attachment(fp.read(), + maintype=maintype, + subtype=subtype, + filename=filename) # Now send or store the message - composed = outer.as_string() if args.output: - with open(args.output, 'w') as fp: - fp.write(composed) + with open(args.output, 'wb') as fp: + fp.write(msg.as_bytes(policy=SMTP)) else: with smtplib.SMTP('localhost') as s: - s.sendmail(args.sender, args.recipients, composed) + s.send_message(msg) if __name__ == '__main__': diff --git a/Doc/includes/email-headers.py b/Doc/includes/email-headers.py index 89c8f3a..2c42145 100644 --- a/Doc/includes/email-headers.py +++ b/Doc/includes/email-headers.py @@ -1,18 +1,24 @@ # Import the email modules we'll need -from email.parser import Parser +from email.parser import BytesParser, Parser +from email.policy import default # If the e-mail headers are in a file, uncomment these two lines: -# with open(messagefile) as fp: -# headers = Parser().parse(fp) +# with open(messagefile, 'rb') as fp: +# headers = BytesParser(policy=default).parse(fp) -# Or for parsing headers in a string, use: -headers = Parser().parsestr('From: <user@example.com>\n' +# Or for parsing headers in a string (this is an uncommon operation), use: +headers = Parser(policy=default).parsestr( + 'From: Foo Bar <user@example.com>\n' 'To: <someone_else@example.com>\n' 'Subject: Test message\n' '\n' 'Body would go here\n') # Now the header items can be accessed as a dictionary: -print('To: %s' % headers['to']) -print('From: %s' % headers['from']) -print('Subject: %s' % headers['subject']) +print('To: {}'.format(headers['to'])) +print('From: {}'.format(headers['from'])) +print('Subject: {}'.format(headers['subject'])) + +# You can also access the parts of the addresses: +print('Recipient username: {}'.format(headers['to'].addresses[0].username)) +print('Sender name: {}'.format(headers['from'].addresses[0].display_name)) diff --git a/Doc/includes/email-mime.py b/Doc/includes/email-mime.py index 61d0830..c610242 100644 --- a/Doc/includes/email-mime.py +++ b/Doc/includes/email-mime.py @@ -1,30 +1,29 @@ # Import smtplib for the actual sending function import smtplib -# Here are the email package modules we'll need -from email.mime.image import MIMEImage -from email.mime.multipart import MIMEMultipart +# And imghdr to find the types of our images +import imghdr -COMMASPACE = ', ' +# Here are the email package modules we'll need +from email.message import EmailMessage -# Create the container (outer) email message. -msg = MIMEMultipart() +# Create the container email message. +msg = EmailMessage() msg['Subject'] = 'Our family reunion' # me == the sender's email address # family = the list of all recipients' email addresses msg['From'] = me -msg['To'] = COMMASPACE.join(family) +msg['To'] = ', '.join(family) msg.preamble = 'Our family reunion' -# Assume we know that the image files are all in PNG format +# Open the files in binary mode. Use imghdr to figure out the +# MIME subtype for each specific image. for file in pngfiles: - # Open the files in binary mode. Let the MIMEImage class automatically - # guess the specific image type. with open(file, 'rb') as fp: - img = MIMEImage(fp.read()) - msg.attach(img) + img_data = fp.read() + msg.add_attachment(img_data, maintype='image', + subtype=imghdr.what(None, img_data)) # Send the email via our own SMTP server. -s = smtplib.SMTP('localhost') -s.send_message(msg) -s.quit() +with smtplib.SMTP('localhost') as s: + s.send_message(msg) diff --git a/Doc/includes/email-read-alternative-new-api.py b/Doc/includes/email-read-alternative.py index 3f5ab24..3f5ab24 100644 --- a/Doc/includes/email-read-alternative-new-api.py +++ b/Doc/includes/email-read-alternative.py diff --git a/Doc/includes/email-simple.py b/Doc/includes/email-simple.py index b9b8b41..f69ef40 100644 --- a/Doc/includes/email-simple.py +++ b/Doc/includes/email-simple.py @@ -2,13 +2,13 @@ import smtplib # Import the email modules we'll need -from email.mime.text import MIMEText +from email.message import EmailMessage -# Open a plain text file for reading. For this example, assume that -# the text file contains only ASCII characters. +# Open the plain text file whose name is in textfile for reading. with open(textfile) as fp: # Create a text/plain message - msg = MIMEText(fp.read()) + msg = EmailMessage() + msg.set_content(fp.read()) # me == the sender's email address # you == the recipient's email address diff --git a/Doc/includes/email-unpack.py b/Doc/includes/email-unpack.py index 574a0b6..e0a7f01 100644 --- a/Doc/includes/email-unpack.py +++ b/Doc/includes/email-unpack.py @@ -3,11 +3,11 @@ """Unpack a MIME message into a directory of files.""" import os -import sys import email -import errno import mimetypes +from email.policy import default + from argparse import ArgumentParser @@ -22,8 +22,8 @@ Unpack a MIME message into a directory of files. parser.add_argument('msgfile') args = parser.parse_args() - with open(args.msgfile) as fp: - msg = email.message_from_file(fp) + with open(args.msgfile, 'rb') as fp: + msg = email.message_from_binary_file(fp, policy=default) try: os.mkdir(args.directory) diff --git a/Doc/library/email.charset.rst b/Doc/library/email.charset.rst index 161d86a..053463f 100644 --- a/Doc/library/email.charset.rst +++ b/Doc/library/email.charset.rst @@ -8,6 +8,11 @@ -------------- +This module is part of the legacy (``Compat32``) email API. In the new +API only the aliases table is used. + +The remaining text in this section is the original documentation of the module. + This module provides a class :class:`Charset` for representing character sets and character set conversions in email messages, as well as a character set registry and several convenience methods for manipulating this registry. diff --git a/Doc/library/email.compat32-message.rst b/Doc/library/email.compat32-message.rst new file mode 100644 index 0000000..2c65079 --- /dev/null +++ b/Doc/library/email.compat32-message.rst @@ -0,0 +1,754 @@ +.. _compat32_message: + +:mod:`email.message.Message`: Representing an email message using the :data:`~email.policy.compat32` API +-------------------------------------------------------------------------------------------------------- + +.. module:: email.message + :synopsis: The base class representing email messages in a fashion + backward compatible with python3.2 + + +The :class:`Message` class is very similar to the +:class:`~email.message.EmailMessage` class, without the methods added by that +class, and with the default behavior of certain other methods being slightly +different. We also document here some methods that, while supported by the +:class:`~email.message.EmailMessage` class, are not recommended unless you are +dealing with legacy code. + +The philosophy and structure of the two classes is otherwise the same. + +This document describes the behavior under the default (for :class:`Message`) +policy :attr:`~email.policy.Compat32`. If you are going to use another policy, +you should be using the :class:`~email.message.EmailMessage` class instead. + +An email message consists of *headers* and a *payload*. Headers must be +:rfc:`5233` style names and values, where the field name and value are +separated by a colon. The colon is not part of either the field name or the +field value. The payload may be a simple text message, or a binary object, or +a structured sequence of sub-messages each with their own set of headers and +their own payload. The latter type of payload is indicated by the message +having a MIME type such as :mimetype:`multipart/\*` or +:mimetype:`message/rfc822`. + +The conceptual model provided by a :class:`Message` object is that of an +ordered dictionary of headers with additional methods for accessing both +specialized information from the headers, for accessing the payload, for +generating a serialized version of the mssage, and for recursively walking over +the object tree. Note that duplicate headers are supported but special methods +must be used to access them. + +The :class:`Message` psuedo-dictionary is indexed by the header names, which +must be ASCII values. The values of the dictionary are strings that are +supposed to contain only ASCII characters; there is some special handling for +non-ASCII input, but it doesn't always produce the correct results. Headers +are stored and returned in case-preserving form, but field names are matched +case-insensitively. There may also be a single envelope header, also known as +the *Unix-From* header or the ``From_`` header. The *payload* is either a +string or bytes, in the case of simple message objects, or a list of +:class:`Message` objects, for MIME container documents (e.g. +:mimetype:`multipart/\*` and :mimetype:`message/rfc822`). + +Here are the methods of the :class:`Message` class: + + +.. class:: Message(policy=compat32) + + If *policy* is specified (it must be an instance of a :mod:`~email.policy` + class) use the rules it specifies to update and serialize the representation + of the message. If *policy* is not set, use the :class:`compat32 + <email.policy.Compat32>` policy, which maintains backward compatibility with + the Python 3.2 version of the email package. For more information see the + :mod:`~email.policy` documentation. + + .. versionchanged:: 3.3 The *policy* keyword argument was added. + + + .. method:: as_string(unixfrom=False, maxheaderlen=0, policy=None) + + Return the entire message flattened as a string. When optional *unixfrom* + is true, the envelope header is included in the returned string. + *unixfrom* defaults to ``False``. For backward compabitility reasons, + *maxheaderlen* defaults to ``0``, so if you want a different value you + must override it explicitly (the value specified for *max_line_length* in + the policy will be ignored by this method). The *policy* argument may be + used to override the default policy obtained from the message instance. + This can be used to control some of the formatting produced by the + method, since the specified *policy* will be passed to the ``Generator``. + + Flattening the message may trigger changes to the :class:`Message` if + defaults need to be filled in to complete the transformation to a string + (for example, MIME boundaries may be generated or modified). + + Note that this method is provided as a convenience and may not always + format the message the way you want. For example, by default it does + not do the mangling of lines that begin with ``From`` that is + required by the unix mbox format. For more flexibility, instantiate a + :class:`~email.generator.Generator` instance and use its + :meth:`~email.generator.Generator.flatten` method directly. For example:: + + from io import StringIO + from email.generator import Generator + fp = StringIO() + g = Generator(fp, mangle_from_=True, maxheaderlen=60) + g.flatten(msg) + text = fp.getvalue() + + If the message object contains binary data that is not encoded according + to RFC standards, the non-compliant data will be replaced by unicode + "unknown character" code points. (See also :meth:`.as_bytes` and + :class:`~email.generator.BytesGenerator`.) + + .. versionchanged:: 3.4 the *policy* keyword argument was added. + + + .. method:: __str__() + + Equivalent to :meth:`.as_string()`. Allows ``str(msg)`` to produce a + string containing the formatted message. + + + .. method:: as_bytes(unixfrom=False, policy=None) + + Return the entire message flattened as a bytes object. When optional + *unixfrom* is true, the envelope header is included in the returned + string. *unixfrom* defaults to ``False``. The *policy* argument may be + used to override the default policy obtained from the message instance. + This can be used to control some of the formatting produced by the + method, since the specified *policy* will be passed to the + ``BytesGenerator``. + + Flattening the message may trigger changes to the :class:`Message` if + defaults need to be filled in to complete the transformation to a string + (for example, MIME boundaries may be generated or modified). + + Note that this method is provided as a convenience and may not always + format the message the way you want. For example, by default it does + not do the mangling of lines that begin with ``From`` that is + required by the unix mbox format. For more flexibility, instantiate a + :class:`~email.generator.BytesGenerator` instance and use its + :meth:`~email.generator.BytesGenerator.flatten` method directly. + For example:: + + from io import BytesIO + from email.generator import BytesGenerator + fp = BytesIO() + g = BytesGenerator(fp, mangle_from_=True, maxheaderlen=60) + g.flatten(msg) + text = fp.getvalue() + + .. versionadded:: 3.4 + + + .. method:: __bytes__() + + Equivalent to :meth:`.as_bytes()`. Allows ``bytes(msg)`` to produce a + bytes object containing the formatted message. + + .. versionadded:: 3.4 + + + .. method:: is_multipart() + + Return ``True`` if the message's payload is a list of sub-\ + :class:`Message` objects, otherwise return ``False``. When + :meth:`is_multipart` returns ``False``, the payload should be a string + object (which might be a CTE encoded binary payload. (Note that + :meth:`is_multipart` returning ``True`` does not necessarily mean that + "msg.get_content_maintype() == 'multipart'" will return the ``True``. + For example, ``is_multipart`` will return ``True`` when the + :class:`Message` is of type ``message/rfc822``.) + + + .. method:: set_unixfrom(unixfrom) + + Set the message's envelope header to *unixfrom*, which should be a string. + + + .. method:: get_unixfrom() + + Return the message's envelope header. Defaults to ``None`` if the + envelope header was never set. + + + .. method:: attach(payload) + + Add the given *payload* to the current payload, which must be ``None`` or + a list of :class:`Message` objects before the call. After the call, the + payload will always be a list of :class:`Message` objects. If you want to + set the payload to a scalar object (e.g. a string), use + :meth:`set_payload` instead. + + This is a legacy method. On the + :class:`~email.emailmessage.EmailMessage` class its functionality is + replaced by :meth:`~email.message.EmailMessage.set_content` and the + realted ``make`` and ``add`` methods. + + + .. method:: get_payload(i=None, decode=False) + + Return the current payload, which will be a list of + :class:`Message` objects when :meth:`is_multipart` is ``True``, or a + string when :meth:`is_multipart` is ``False``. If the payload is a list + and you mutate the list object, you modify the message's payload in place. + + With optional argument *i*, :meth:`get_payload` will return the *i*-th + element of the payload, counting from zero, if :meth:`is_multipart` is + ``True``. An :exc:`IndexError` will be raised if *i* is less than 0 or + greater than or equal to the number of items in the payload. If the + payload is a string (i.e. :meth:`is_multipart` is ``False``) and *i* is + given, a :exc:`TypeError` is raised. + + Optional *decode* is a flag indicating whether the payload should be + decoded or not, according to the :mailheader:`Content-Transfer-Encoding` + header. When ``True`` and the message is not a multipart, the payload will + be decoded if this header's value is ``quoted-printable`` or ``base64``. + If some other encoding is used, or :mailheader:`Content-Transfer-Encoding` + header is missing, the payload is + returned as-is (undecoded). In all cases the returned value is binary + data. If the message is a multipart and the *decode* flag is ``True``, + then ``None`` is returned. If the payload is base64 and it was not + perfectly formed (missing padding, characters outside the base64 + alphabet), then an appropriate defect will be added to the message's + defect property (:class:`~email.errors.InvalidBase64PaddingDefect` or + :class:`~email.errors.InvalidBase64CharactersDefect`, respectively). + + When *decode* is ``False`` (the default) the body is returned as a string + without decoding the :mailheader:`Content-Transfer-Encoding`. However, + for a :mailheader:`Content-Transfer-Encoding` of 8bit, an attempt is made + to decode the original bytes using the ``charset`` specified by the + :mailheader:`Content-Type` header, using the ``replace`` error handler. + If no ``charset`` is specified, or if the ``charset`` given is not + recognized by the email package, the body is decoded using the default + ASCII charset. + + This is a legacy method. On the + :class:`~email.emailmessage.EmailMessage` class its functionality is + replaced by :meth:`~email.message.EmailMessage.get_content` and + :meth:`~email.message.EmailMessage.iter_parts`. + + + .. method:: set_payload(payload, charset=None) + + Set the entire message object's payload to *payload*. It is the client's + responsibility to ensure the payload invariants. Optional *charset* sets + the message's default character set; see :meth:`set_charset` for details. + + This is a legacy method. On the + :class:`~email.emailmessage.EmailMessage` class its functionality is + replaced by :meth:`~email.message.EmailMessage.set_content`. + + + .. method:: set_charset(charset) + + Set the character set of the payload to *charset*, which can either be a + :class:`~email.charset.Charset` instance (see :mod:`email.charset`), a + string naming a character set, or ``None``. If it is a string, it will + be converted to a :class:`~email.charset.Charset` instance. If *charset* + is ``None``, the ``charset`` parameter will be removed from the + :mailheader:`Content-Type` header (the message will not be otherwise + modified). Anything else will generate a :exc:`TypeError`. + + If there is no existing :mailheader:`MIME-Version` header one will be + added. If there is no existing :mailheader:`Content-Type` header, one + will be added with a value of :mimetype:`text/plain`. Whether the + :mailheader:`Content-Type` header already exists or not, its ``charset`` + parameter will be set to *charset.output_charset*. If + *charset.input_charset* and *charset.output_charset* differ, the payload + will be re-encoded to the *output_charset*. If there is no existing + :mailheader:`Content-Transfer-Encoding` header, then the payload will be + transfer-encoded, if needed, using the specified + :class:`~email.charset.Charset`, and a header with the appropriate value + will be added. If a :mailheader:`Content-Transfer-Encoding` header + already exists, the payload is assumed to already be correctly encoded + using that :mailheader:`Content-Transfer-Encoding` and is not modified. + + This is a legacy method. On the + :class:`~email.emailmessage.EmailMessage` class its functionality is + replaced by the *charset* parameter of the + :meth:`email.emailmessage.EmailMessage.set_content` method. + + + .. method:: get_charset() + + Return the :class:`~email.charset.Charset` instance associated with the + message's payload. + + This is a legacy method. On the + :class:`~email.emailmessage.EmailMessage` class it always returns + ``None``. + + + The following methods implement a mapping-like interface for accessing the + message's :rfc:`2822` headers. Note that there are some semantic differences + between these methods and a normal mapping (i.e. dictionary) interface. For + example, in a dictionary there are no duplicate keys, but here there may be + duplicate message headers. Also, in dictionaries there is no guaranteed + order to the keys returned by :meth:`keys`, but in a :class:`Message` object, + headers are always returned in the order they appeared in the original + message, or were added to the message later. Any header deleted and then + re-added are always appended to the end of the header list. + + These semantic differences are intentional and are biased toward maximal + convenience. + + Note that in all cases, any envelope header present in the message is not + included in the mapping interface. + + In a model generated from bytes, any header values that (in contravention of + the RFCs) contain non-ASCII bytes will, when retrieved through this + interface, be represented as :class:`~email.header.Header` objects with + a charset of `unknown-8bit`. + + + .. method:: __len__() + + Return the total number of headers, including duplicates. + + + .. method:: __contains__(name) + + Return true if the message object has a field named *name*. Matching is + done case-insensitively and *name* should not include the trailing colon. + Used for the ``in`` operator, e.g.:: + + if 'message-id' in myMessage: + print('Message-ID:', myMessage['message-id']) + + + .. method:: __getitem__(name) + + Return the value of the named header field. *name* should not include the + colon field separator. If the header is missing, ``None`` is returned; a + :exc:`KeyError` is never raised. + + Note that if the named field appears more than once in the message's + headers, exactly which of those field values will be returned is + undefined. Use the :meth:`get_all` method to get the values of all the + extant named headers. + + + .. method:: __setitem__(name, val) + + Add a header to the message with field name *name* and value *val*. The + field is appended to the end of the message's existing fields. + + Note that this does *not* overwrite or delete any existing header with the same + name. If you want to ensure that the new header is the only one present in the + message with field name *name*, delete the field first, e.g.:: + + del msg['subject'] + msg['subject'] = 'Python roolz!' + + + .. method:: __delitem__(name) + + Delete all occurrences of the field with name *name* from the message's + headers. No exception is raised if the named field isn't present in the + headers. + + + .. method:: keys() + + Return a list of all the message's header field names. + + + .. method:: values() + + Return a list of all the message's field values. + + + .. method:: items() + + Return a list of 2-tuples containing all the message's field headers and + values. + + + .. method:: get(name, failobj=None) + + Return the value of the named header field. This is identical to + :meth:`__getitem__` except that optional *failobj* is returned if the + named header is missing (defaults to ``None``). + + Here are some additional useful methods: + + + .. method:: get_all(name, failobj=None) + + Return a list of all the values for the field named *name*. If there are + no such named headers in the message, *failobj* is returned (defaults to + ``None``). + + + .. method:: add_header(_name, _value, **_params) + + Extended header setting. This method is similar to :meth:`__setitem__` + except that additional header parameters can be provided as keyword + arguments. *_name* is the header field to add and *_value* is the + *primary* value for the header. + + For each item in the keyword argument dictionary *_params*, the key is + taken as the parameter name, with underscores converted to dashes (since + dashes are illegal in Python identifiers). Normally, the parameter will + be added as ``key="value"`` unless the value is ``None``, in which case + only the key will be added. If the value contains non-ASCII characters, + it can be specified as a three tuple in the format + ``(CHARSET, LANGUAGE, VALUE)``, where ``CHARSET`` is a string naming the + charset to be used to encode the value, ``LANGUAGE`` can usually be set + to ``None`` or the empty string (see :rfc:`2231` for other possibilities), + and ``VALUE`` is the string value containing non-ASCII code points. If + a three tuple is not passed and the value contains non-ASCII characters, + it is automatically encoded in :rfc:`2231` format using a ``CHARSET`` + of ``utf-8`` and a ``LANGUAGE`` of ``None``. + + Here's an example:: + + msg.add_header('Content-Disposition', 'attachment', filename='bud.gif') + + This will add a header that looks like :: + + Content-Disposition: attachment; filename="bud.gif" + + An example with non-ASCII characters:: + + msg.add_header('Content-Disposition', 'attachment', + filename=('iso-8859-1', '', 'Fußballer.ppt')) + + Which produces :: + + Content-Disposition: attachment; filename*="iso-8859-1''Fu%DFballer.ppt" + + + .. method:: replace_header(_name, _value) + + Replace a header. Replace the first header found in the message that + matches *_name*, retaining header order and field name case. If no + matching header was found, a :exc:`KeyError` is raised. + + + .. method:: get_content_type() + + Return the message's content type. The returned string is coerced to + lower case of the form :mimetype:`maintype/subtype`. If there was no + :mailheader:`Content-Type` header in the message the default type as given + by :meth:`get_default_type` will be returned. Since according to + :rfc:`2045`, messages always have a default type, :meth:`get_content_type` + will always return a value. + + :rfc:`2045` defines a message's default type to be :mimetype:`text/plain` + unless it appears inside a :mimetype:`multipart/digest` container, in + which case it would be :mimetype:`message/rfc822`. If the + :mailheader:`Content-Type` header has an invalid type specification, + :rfc:`2045` mandates that the default type be :mimetype:`text/plain`. + + + .. method:: get_content_maintype() + + Return the message's main content type. This is the :mimetype:`maintype` + part of the string returned by :meth:`get_content_type`. + + + .. method:: get_content_subtype() + + Return the message's sub-content type. This is the :mimetype:`subtype` + part of the string returned by :meth:`get_content_type`. + + + .. method:: get_default_type() + + Return the default content type. Most messages have a default content + type of :mimetype:`text/plain`, except for messages that are subparts of + :mimetype:`multipart/digest` containers. Such subparts have a default + content type of :mimetype:`message/rfc822`. + + + .. method:: set_default_type(ctype) + + Set the default content type. *ctype* should either be + :mimetype:`text/plain` or :mimetype:`message/rfc822`, although this is not + enforced. The default content type is not stored in the + :mailheader:`Content-Type` header. + + + .. method:: get_params(failobj=None, header='content-type', unquote=True) + + Return the message's :mailheader:`Content-Type` parameters, as a list. + The elements of the returned list are 2-tuples of key/value pairs, as + split on the ``'='`` sign. The left hand side of the ``'='`` is the key, + while the right hand side is the value. If there is no ``'='`` sign in + the parameter the value is the empty string, otherwise the value is as + described in :meth:`get_param` and is unquoted if optional *unquote* is + ``True`` (the default). + + Optional *failobj* is the object to return if there is no + :mailheader:`Content-Type` header. Optional *header* is the header to + search instead of :mailheader:`Content-Type`. + + This is a legacy method. On the + :class:`~email.emailmessage.EmailMessage` class its functionality is + replaced by the *params* property of the individual header objects + returned by the header access methods. + + + .. method:: get_param(param, failobj=None, header='content-type', unquote=True) + + Return the value of the :mailheader:`Content-Type` header's parameter + *param* as a string. If the message has no :mailheader:`Content-Type` + header or if there is no such parameter, then *failobj* is returned + (defaults to ``None``). + + Optional *header* if given, specifies the message header to use instead of + :mailheader:`Content-Type`. + + Parameter keys are always compared case insensitively. The return value + can either be a string, or a 3-tuple if the parameter was :rfc:`2231` + encoded. When it's a 3-tuple, the elements of the value are of the form + ``(CHARSET, LANGUAGE, VALUE)``. Note that both ``CHARSET`` and + ``LANGUAGE`` can be ``None``, in which case you should consider ``VALUE`` + to be encoded in the ``us-ascii`` charset. You can usually ignore + ``LANGUAGE``. + + If your application doesn't care whether the parameter was encoded as in + :rfc:`2231`, you can collapse the parameter value by calling + :func:`email.utils.collapse_rfc2231_value`, passing in the return value + from :meth:`get_param`. This will return a suitably decoded Unicode + string when the value is a tuple, or the original string unquoted if it + isn't. For example:: + + rawparam = msg.get_param('foo') + param = email.utils.collapse_rfc2231_value(rawparam) + + In any case, the parameter value (either the returned string, or the + ``VALUE`` item in the 3-tuple) is always unquoted, unless *unquote* is set + to ``False``. + + This is a legacy method. On the + :class:`~email.emailmessage.EmailMessage` class its functionality is + replaced by the *params* property of the individual header objects + returned by the header access methods. + + + .. method:: set_param(param, value, header='Content-Type', requote=True, \ + charset=None, language='', replace=False) + + Set a parameter in the :mailheader:`Content-Type` header. If the + parameter already exists in the header, its value will be replaced with + *value*. If the :mailheader:`Content-Type` header as not yet been defined + for this message, it will be set to :mimetype:`text/plain` and the new + parameter value will be appended as per :rfc:`2045`. + + Optional *header* specifies an alternative header to + :mailheader:`Content-Type`, and all parameters will be quoted as necessary + unless optional *requote* is ``False`` (the default is ``True``). + + If optional *charset* is specified, the parameter will be encoded + according to :rfc:`2231`. Optional *language* specifies the RFC 2231 + language, defaulting to the empty string. Both *charset* and *language* + should be strings. + + If *replace* is ``False`` (the default) the header is moved to the + end of the list of headers. If *replace* is ``True``, the header + will be updated in place. + + .. versionchanged:: 3.4 ``replace`` keyword was added. + + + .. method:: del_param(param, header='content-type', requote=True) + + Remove the given parameter completely from the :mailheader:`Content-Type` + header. The header will be re-written in place without the parameter or + its value. All values will be quoted as necessary unless *requote* is + ``False`` (the default is ``True``). Optional *header* specifies an + alternative to :mailheader:`Content-Type`. + + + .. method:: set_type(type, header='Content-Type', requote=True) + + Set the main type and subtype for the :mailheader:`Content-Type` + header. *type* must be a string in the form :mimetype:`maintype/subtype`, + otherwise a :exc:`ValueError` is raised. + + This method replaces the :mailheader:`Content-Type` header, keeping all + the parameters in place. If *requote* is ``False``, this leaves the + existing header's quoting as is, otherwise the parameters will be quoted + (the default). + + An alternative header can be specified in the *header* argument. When the + :mailheader:`Content-Type` header is set a :mailheader:`MIME-Version` + header is also added. + + This is a legacy method. On the + :class:`~email.emailmessage.EmailMessage` class its functionality is + replaced by the ``make_`` and ``add_`` methods. + + + .. method:: get_filename(failobj=None) + + Return the value of the ``filename`` parameter of the + :mailheader:`Content-Disposition` header of the message. If the header + does not have a ``filename`` parameter, this method falls back to looking + for the ``name`` parameter on the :mailheader:`Content-Type` header. If + neither is found, or the header is missing, then *failobj* is returned. + The returned string will always be unquoted as per + :func:`email.utils.unquote`. + + + .. method:: get_boundary(failobj=None) + + Return the value of the ``boundary`` parameter of the + :mailheader:`Content-Type` header of the message, or *failobj* if either + the header is missing, or has no ``boundary`` parameter. The returned + string will always be unquoted as per :func:`email.utils.unquote`. + + + .. method:: set_boundary(boundary) + + Set the ``boundary`` parameter of the :mailheader:`Content-Type` header to + *boundary*. :meth:`set_boundary` will always quote *boundary* if + necessary. A :exc:`~email.errors.HeaderParseError` is raised if the + message object has no :mailheader:`Content-Type` header. + + Note that using this method is subtly different than deleting the old + :mailheader:`Content-Type` header and adding a new one with the new + boundary via :meth:`add_header`, because :meth:`set_boundary` preserves + the order of the :mailheader:`Content-Type` header in the list of + headers. However, it does *not* preserve any continuation lines which may + have been present in the original :mailheader:`Content-Type` header. + + + .. method:: get_content_charset(failobj=None) + + Return the ``charset`` parameter of the :mailheader:`Content-Type` header, + coerced to lower case. If there is no :mailheader:`Content-Type` header, or if + that header has no ``charset`` parameter, *failobj* is returned. + + Note that this method differs from :meth:`get_charset` which returns the + :class:`~email.charset.Charset` instance for the default encoding of the message body. + + + .. method:: get_charsets(failobj=None) + + Return a list containing the character set names in the message. If the + message is a :mimetype:`multipart`, then the list will contain one element + for each subpart in the payload, otherwise, it will be a list of length 1. + + Each item in the list will be a string which is the value of the + ``charset`` parameter in the :mailheader:`Content-Type` header for the + represented subpart. However, if the subpart has no + :mailheader:`Content-Type` header, no ``charset`` parameter, or is not of + the :mimetype:`text` main MIME type, then that item in the returned list + will be *failobj*. + + + .. method:: get_content_disposition() + + Return the lowercased value (without parameters) of the message's + :mailheader:`Content-Disposition` header if it has one, or ``None``. The + possible values for this method are *inline*, *attachment* or ``None`` + if the message follows :rfc:`2183`. + + .. versionadded:: 3.5 + + .. method:: walk() + + The :meth:`walk` method is an all-purpose generator which can be used to + iterate over all the parts and subparts of a message object tree, in + depth-first traversal order. You will typically use :meth:`walk` as the + iterator in a ``for`` loop; each iteration returns the next subpart. + + Here's an example that prints the MIME type of every part of a multipart + message structure: + + .. testsetup:: + + >>> from email import message_from_binary_file + >>> with open('Lib/test/test_email/data/msg_16.txt', 'rb') as f: + ... msg = message_from_binary_file(f) + >>> from email.iterators import _structure + + .. doctest:: + + >>> for part in msg.walk(): + ... print(part.get_content_type()) + multipart/report + text/plain + message/delivery-status + text/plain + text/plain + message/rfc822 + text/plain + + ``walk`` iterates over the subparts of any part where + :meth:`is_multipart` returns ``True``, even though + ``msg.get_content_maintype() == 'multipart'`` may return ``False``. We + can see this in our example by making use of the ``_structure`` debug + helper function: + + .. doctest:: + + >>> for part in msg.walk(): + ... print(part.get_content_maintype() == 'multipart'), + ... part.is_multipart()) + True True + False False + False True + False False + False False + False True + False False + >>> _structure(msg) + multipart/report + text/plain + message/delivery-status + text/plain + text/plain + message/rfc822 + text/plain + + Here the ``message`` parts are not ``multiparts``, but they do contain + subparts. ``is_multipart()`` returns ``True`` and ``walk`` descends + into the subparts. + + + :class:`Message` objects can also optionally contain two instance attributes, + which can be used when generating the plain text of a MIME message. + + + .. attribute:: preamble + + The format of a MIME document allows for some text between the blank line + following the headers, and the first multipart boundary string. Normally, + this text is never visible in a MIME-aware mail reader because it falls + outside the standard MIME armor. However, when viewing the raw text of + the message, or when viewing the message in a non-MIME aware reader, this + text can become visible. + + The *preamble* attribute contains this leading extra-armor text for MIME + documents. When the :class:`~email.parser.Parser` discovers some text + after the headers but before the first boundary string, it assigns this + text to the message's *preamble* attribute. When the + :class:`~email.generator.Generator` is writing out the plain text + representation of a MIME message, and it finds the + message has a *preamble* attribute, it will write this text in the area + between the headers and the first boundary. See :mod:`email.parser` and + :mod:`email.generator` for details. + + Note that if the message object has no preamble, the *preamble* attribute + will be ``None``. + + + .. attribute:: epilogue + + The *epilogue* attribute acts the same way as the *preamble* attribute, + except that it contains text that appears between the last boundary and + the end of the message. + + You do not need to set the epilogue to the empty string in order for the + :class:`~email.generator.Generator` to print a newline at the end of the + file. + + + .. attribute:: defects + + The *defects* attribute contains a list of all the problems found when + parsing this message. See :mod:`email.errors` for a detailed description + of the possible parsing defects. diff --git a/Doc/library/email.contentmanager.rst b/Doc/library/email.contentmanager.rst index a9c078b..c1b103e 100644 --- a/Doc/library/email.contentmanager.rst +++ b/Doc/library/email.contentmanager.rst @@ -7,251 +7,14 @@ .. moduleauthor:: R. David Murray <rdmurray@bitdance.com> .. sectionauthor:: R. David Murray <rdmurray@bitdance.com> -.. versionadded:: 3.4 - as a :term:`provisional module <provisional package>`. - **Source code:** :source:`Lib/email/contentmanager.py` -.. note:: - - The contentmanager module has been included in the standard library on a - :term:`provisional basis <provisional package>`. Backwards incompatible - changes (up to and including removal of the module) may occur if deemed - necessary by the core developers. - --------------- - -The :mod:`~email.message` module provides a class that can represent an -arbitrary email message. That basic message model has a useful and flexible -API, but it provides only a lower-level API for interacting with the generic -parts of a message (the headers, generic header parameters, and the payload, -which may be a list of sub-parts). This module provides classes and tools -that provide an enhanced and extensible API for dealing with various specific -types of content, including the ability to retrieve the content of the message -as a specialized object type rather than as a simple bytes object. The module -automatically takes care of the RFC-specified MIME details (required headers -and parameters, etc.) for the certain common content types content properties, -and support for additional types can be added by an application using the -extension mechanisms. - -This module defines the eponymous "Content Manager" classes. The base -:class:`.ContentManager` class defines an API for registering content -management functions which extract data from ``Message`` objects or insert data -and headers into ``Message`` objects, thus providing a way of converting -between ``Message`` objects containing data and other representations of that -data (Python data types, specialized Python objects, external files, etc). The -module also defines one concrete content manager: :data:`raw_data_manager` -converts between MIME content types and ``str`` or ``bytes`` data. It also -provides a convenient API for managing the MIME parameters when inserting -content into ``Message``\ s. It also handles inserting and extracting -``Message`` objects when dealing with the ``message/rfc822`` content type. - -Another part of the enhanced interface is subclasses of -:class:`~email.message.Message` that provide new convenience API functions, -including convenience methods for calling the Content Managers derived from -this module. - -.. note:: - - Although :class:`.EmailMessage` and :class:`.MIMEPart` are currently - documented in this module because of the provisional nature of the code, the - implementation lives in the :mod:`email.message` module. - -.. currentmodule:: email.message - -.. class:: EmailMessage(policy=default) - - If *policy* is specified (it must be an instance of a :mod:`~email.policy` - class) use the rules it specifies to udpate and serialize the representation - of the message. If *policy* is not set, use the - :class:`~email.policy.default` policy, which follows the rules of the email - RFCs except for line endings (instead of the RFC mandated ``\r\n``, it uses - the Python standard ``\n`` line endings). For more information see the - :mod:`~email.policy` documentation. - - This class is a subclass of :class:`~email.message.Message`. It adds - the following methods: - - - .. method:: is_attachment - - Return ``True`` if there is a :mailheader:`Content-Disposition` header - and its (case insensitive) value is ``attachment``, ``False`` otherwise. - - .. versionchanged:: 3.4.2 - is_attachment is now a method instead of a property, for consistency - with :meth:`~email.message.Message.is_multipart`. - - - .. method:: get_body(preferencelist=('related', 'html', 'plain')) - - Return the MIME part that is the best candidate to be the "body" of the - message. - - *preferencelist* must be a sequence of strings from the set ``related``, - ``html``, and ``plain``, and indicates the order of preference for the - content type of the part returned. - - Start looking for candidate matches with the object on which the - ``get_body`` method is called. - - If ``related`` is not included in *preferencelist*, consider the root - part (or subpart of the root part) of any related encountered as a - candidate if the (sub-)part matches a preference. - - When encountering a ``multipart/related``, check the ``start`` parameter - and if a part with a matching :mailheader:`Content-ID` is found, consider - only it when looking for candidate matches. Otherwise consider only the - first (default root) part of the ``multipart/related``. - - If a part has a :mailheader:`Content-Disposition` header, only consider - the part a candidate match if the value of the header is ``inline``. - - If none of the candidates matches any of the preferences in - *preferneclist*, return ``None``. - - Notes: (1) For most applications the only *preferencelist* combinations - that really make sense are ``('plain',)``, ``('html', 'plain')``, and the - default, ``('related', 'html', 'plain')``. (2) Because matching starts - with the object on which ``get_body`` is called, calling ``get_body`` on - a ``multipart/related`` will return the object itself unless - *preferencelist* has a non-default value. (3) Messages (or message parts) - that do not specify a :mailheader:`Content-Type` or whose - :mailheader:`Content-Type` header is invalid will be treated as if they - are of type ``text/plain``, which may occasionally cause ``get_body`` to - return unexpected results. - - - .. method:: iter_attachments() - - Return an iterator over all of the parts of the message that are not - candidate "body" parts. That is, skip the first occurrence of each of - ``text/plain``, ``text/html``, ``multipart/related``, or - ``multipart/alternative`` (unless they are explicitly marked as - attachments via :mailheader:`Content-Disposition: attachment`), and - return all remaining parts. When applied directly to a - ``multipart/related``, return an iterator over the all the related parts - except the root part (ie: the part pointed to by the ``start`` parameter, - or the first part if there is no ``start`` parameter or the ``start`` - parameter doesn't match the :mailheader:`Content-ID` of any of the - parts). When applied directly to a ``multipart/alternative`` or a - non-``multipart``, return an empty iterator. - - - .. method:: iter_parts() - - Return an iterator over all of the immediate sub-parts of the message, - which will be empty for a non-``multipart``. (See also - :meth:`~email.message.walk`.) - - - .. method:: get_content(*args, content_manager=None, **kw) - - Call the ``get_content`` method of the *content_manager*, passing self - as the message object, and passing along any other arguments or keywords - as additional arguments. If *content_manager* is not specified, use - the ``content_manager`` specified by the current :mod:`~email.policy`. - - - .. method:: set_content(*args, content_manager=None, **kw) - - Call the ``set_content`` method of the *content_manager*, passing self - as the message object, and passing along any other arguments or keywords - as additional arguments. If *content_manager* is not specified, use - the ``content_manager`` specified by the current :mod:`~email.policy`. - - - .. method:: make_related(boundary=None) - - Convert a non-``multipart`` message into a ``multipart/related`` message, - moving any existing :mailheader:`Content-` headers and payload into a - (new) first part of the ``multipart``. If *boundary* is specified, use - it as the boundary string in the multipart, otherwise leave the boundary - to be automatically created when it is needed (for example, when the - message is serialized). - - - .. method:: make_alternative(boundary=None) - - Convert a non-``multipart`` or a ``multipart/related`` into a - ``multipart/alternative``, moving any existing :mailheader:`Content-` - headers and payload into a (new) first part of the ``multipart``. If - *boundary* is specified, use it as the boundary string in the multipart, - otherwise leave the boundary to be automatically created when it is - needed (for example, when the message is serialized). - - - .. method:: make_mixed(boundary=None) - - Convert a non-``multipart``, a ``multipart/related``, or a - ``multipart-alternative`` into a ``multipart/mixed``, moving any existing - :mailheader:`Content-` headers and payload into a (new) first part of the - ``multipart``. If *boundary* is specified, use it as the boundary string - in the multipart, otherwise leave the boundary to be automatically - created when it is needed (for example, when the message is serialized). - - - .. method:: add_related(*args, content_manager=None, **kw) - - If the message is a ``multipart/related``, create a new message - object, pass all of the arguments to its :meth:`set_content` method, - and :meth:`~email.message.Message.attach` it to the ``multipart``. If - the message is a non-``multipart``, call :meth:`make_related` and then - proceed as above. If the message is any other type of ``multipart``, - raise a :exc:`TypeError`. If *content_manager* is not specified, use - the ``content_manager`` specified by the current :mod:`~email.policy`. - If the added part has no :mailheader:`Content-Disposition` header, - add one with the value ``inline``. - - - .. method:: add_alternative(*args, content_manager=None, **kw) - - If the message is a ``multipart/alternative``, create a new message - object, pass all of the arguments to its :meth:`set_content` method, and - :meth:`~email.message.Message.attach` it to the ``multipart``. If the - message is a non-``multipart`` or ``multipart/related``, call - :meth:`make_alternative` and then proceed as above. If the message is - any other type of ``multipart``, raise a :exc:`TypeError`. If - *content_manager* is not specified, use the ``content_manager`` specified - by the current :mod:`~email.policy`. - - - .. method:: add_attachment(*args, content_manager=None, **kw) - - If the message is a ``multipart/mixed``, create a new message object, - pass all of the arguments to its :meth:`set_content` method, and - :meth:`~email.message.Message.attach` it to the ``multipart``. If the - message is a non-``multipart``, ``multipart/related``, or - ``multipart/alternative``, call :meth:`make_mixed` and then proceed as - above. If *content_manager* is not specified, use the ``content_manager`` - specified by the current :mod:`~email.policy`. If the added part - has no :mailheader:`Content-Disposition` header, add one with the value - ``attachment``. This method can be used both for explicit attachments - (:mailheader:`Content-Disposition: attachment` and ``inline`` attachments - (:mailheader:`Content-Disposition: inline`), by passing appropriate - options to the ``content_manager``. - - - .. method:: clear() - - Remove the payload and all of the headers. - - - .. method:: clear_content() - - Remove the payload and all of the :exc:`Content-` headers, leaving - all other headers intact and in their original order. - - -.. class:: MIMEPart(policy=default) +------------ - This class represents a subpart of a MIME message. It is identical to - :class:`EmailMessage`, except that no :mailheader:`MIME-Version` headers are - added when :meth:`~EmailMessage.set_content` is called, since sub-parts do - not need their own :mailheader:`MIME-Version` headers. +.. versionadded:: 3.4 as a :term:`provisional module <provisional package>`. +.. versionchanged:: 3.6 provisional status removed. -.. currentmodule:: email.contentmanager .. class:: ContentManager() @@ -362,7 +125,7 @@ Currently the email package provides only one concrete content manager, set_content(msg, <'bytes'>, maintype, subtype, cte="base64", \ disposition=None, filename=None, cid=None, \ params=None, headers=None) - set_content(msg, <'Message'>, cte=None, \ + set_content(msg, <'EmailMessage'>, cte=None, \ disposition=None, filename=None, cid=None, \ params=None, headers=None) set_content(msg, <'list'>, subtype='mixed', \ @@ -378,14 +141,14 @@ Currently the email package provides only one concrete content manager, subtype to *subtype* if it is specified, or ``plain`` if it is not. * For ``bytes``, use the specified *maintype* and *subtype*, or raise a :exc:`TypeError` if they are not specified. - * For :class:`~email.message.Message` objects, set the maintype to - ``message``, and set the subtype to *subtype* if it is specified - or ``rfc822`` if it is not. If *subtype* is ``partial``, raise an - error (``bytes`` objects must be used to construct - ``message/partial`` parts). + * For :class:`~email.message.EmailMessage` objects, set the maintype + to ``message``, and set the subtype to *subtype* if it is + specified or ``rfc822`` if it is not. If *subtype* is + ``partial``, raise an error (``bytes`` objects must be used to + construct ``message/partial`` parts). * For *<'list'>*, which should be a list of - :class:`~email.message.Message` objects, set the ``maintype`` to - ``multipart``, and the ``subtype`` to *subtype* if it is + :class:`~email.message.EmailMessage` objects, set the ``maintype`` + to ``multipart``, and the ``subtype`` to *subtype* if it is specified, and ``mixed`` if it is not. If the message parts in the *<'list'>* have :mailheader:`MIME-Version` headers, remove them. @@ -397,32 +160,35 @@ Currently the email package provides only one concrete content manager, If *cte* is set, encode the payload using the specified content transfer encoding, and set the :mailheader:`Content-Transfer-Endcoding` header to - that value. For ``str`` objects, if it is not set use heuristics to - determine the most compact encoding. Possible values for *cte* are - ``quoted-printable``, ``base64``, ``7bit``, ``8bit``, and ``binary``. - If the input cannot be encoded in the specified encoding (eg: ``7bit``), - raise a :exc:`ValueError`. For :class:`~email.message.Message`, per - :rfc:`2046`, raise an error if a *cte* of ``quoted-printable`` or - ``base64`` is requested for *subtype* ``rfc822``, and for any *cte* - other than ``7bit`` for *subtype* ``external-body``. For - ``message/rfc822``, use ``8bit`` if *cte* is not specified. For all - other values of *subtype*, use ``7bit``. + that value. Possible values for *cte* are ``quoted-printable``, + ``base64``, ``7bit``, ``8bit``, and ``binary``. If the input cannot be + encoded in the specified encoding (for example, specifying a *cte* of + ``7bit`` for an input that contains non-ASCII values), raise a + :exc:`ValueError`. + + * For ``str`` objects, if *cte* is not set use heuristics to + determine the most compact encoding. + * For :class:`~email.message.EmailMessage`, per :rfc:`2046`, raise + an error if a *cte* of ``quoted-printable`` or ``base64`` is + requested for *subtype* ``rfc822``, and for any *cte* other than + ``7bit`` for *subtype* ``external-body``. For + ``message/rfc822``, use ``8bit`` if *cte* is not specified. For + all other values of *subtype*, use ``7bit``. .. note:: A *cte* of ``binary`` does not actually work correctly yet. - The ``Message`` object as modified by ``set_content`` is correct, but - :class:`~email.generator.BytesGenerator` does not serialize it - correctly. + The ``EmailMessage`` object as modified by ``set_content`` is + correct, but :class:`~email.generator.BytesGenerator` does not + serialize it correctly. If *disposition* is set, use it as the value of the :mailheader:`Content-Disposition` header. If not specified, and *filename* is specified, add the header with the value ``attachment``. - If it is not specified and *filename* is also not specified, do not add - the header. The only valid values for *disposition* are ``attachment`` - and ``inline``. + If *disposition* is not specified and *filename* is also not specified, + do not add the header. The only valid values for *disposition* are + ``attachment`` and ``inline``. If *filename* is specified, use it as the value of the ``filename`` - parameter of the :mailheader:`Content-Disposition` header. There is no - default. + parameter of the :mailheader:`Content-Disposition` header. If *cid* is specified, add a :mailheader:`Content-ID` header with *cid* as its value. diff --git a/Doc/library/email.encoders.rst b/Doc/library/email.encoders.rst index 9d7f9bf..e24ac7b 100644 --- a/Doc/library/email.encoders.rst +++ b/Doc/library/email.encoders.rst @@ -8,6 +8,12 @@ -------------- +This module is part of the legacy (``Compat32``) email API. In the +new API the functionality is provided by the *cte* parameter of +the :meth:`~email.message.EmailMessage.set_content` method. + +The remaining text in this section is the original documentation of the module. + When creating :class:`~email.message.Message` objects from scratch, you often need to encode the payloads for transport through compliant mail servers. This is especially true for :mimetype:`image/\*` and :mimetype:`text/\*` type messages diff --git a/Doc/library/email.errors.rst b/Doc/library/email.errors.rst index 8470783..2d0d192 100644 --- a/Doc/library/email.errors.rst +++ b/Doc/library/email.errors.rst @@ -20,33 +20,27 @@ The following exception classes are defined in the :mod:`email.errors` module: .. exception:: MessageParseError() - This is the base class for exceptions raised by the :class:`~email.parser.Parser` - class. It is derived from :exc:`MessageError`. + This is the base class for exceptions raised by the + :class:`~email.parser.Parser` class. It is derived from + :exc:`MessageError`. This class is also used internally by the parser used + by :mod:`~email.headerregistry`. .. exception:: HeaderParseError() - Raised under some error conditions when parsing the :rfc:`2822` headers of a - message, this class is derived from :exc:`MessageParseError`. It can be raised - from the :meth:`Parser.parse <email.parser.Parser.parse>` or - :meth:`Parser.parsestr <email.parser.Parser.parsestr>` methods. - - Situations where it can be raised include finding an envelope header after the - first :rfc:`2822` header of the message, finding a continuation line before the - first :rfc:`2822` header is found, or finding a line in the headers which is - neither a header or a continuation line. + Raised under some error conditions when parsing the :rfc:`5322` headers of a + message, this class is derived from :exc:`MessageParseError`. The + :meth:`~email.message.EmailMessage.set_boundary` method will raise this + error if the content type is unknown when the method is called. + :class:`~email.header.Header` may raise this error for certain base64 + decoding errors, and when an attempt is made to create a header that appears + to contain an embedded header (that is, there is what is supposed to be a + continuation line that has no leading whitespace and looks like a header). .. exception:: BoundaryError() - Raised under some error conditions when parsing the :rfc:`2822` headers of a - message, this class is derived from :exc:`MessageParseError`. It can be raised - from the :meth:`Parser.parse <email.parser.Parser.parse>` or - :meth:`Parser.parsestr <email.parser.Parser.parsestr>` methods. - - Situations where it can be raised include not being able to find the starting or - terminating boundary in a :mimetype:`multipart/\*` message when strict parsing - is used. + Deprecated and no longer used. .. exception:: MultipartConversionError() @@ -64,14 +58,14 @@ The following exception classes are defined in the :mod:`email.errors` module: :class:`~email.mime.nonmultipart.MIMENonMultipart` (e.g. :class:`~email.mime.image.MIMEImage`). -Here's the list of the defects that the :class:`~email.parser.FeedParser` + +Here is the list of the defects that the :class:`~email.parser.FeedParser` can find while parsing messages. Note that the defects are added to the message where the problem was found, so for example, if a message nested inside a :mimetype:`multipart/alternative` had a malformed header, that nested message object would have a defect, but the containing messages would not. -All defect classes are subclassed from :class:`email.errors.MessageDefect`, but -this class is *not* an exception! +All defect classes are subclassed from :class:`email.errors.MessageDefect`. * :class:`NoBoundaryInMultipartDefect` -- A message claimed to be a multipart, but had no :mimetype:`boundary` parameter. diff --git a/Doc/library/email-examples.rst b/Doc/library/email.examples.rst index ad93b5c..84e9aee 100644 --- a/Doc/library/email-examples.rst +++ b/Doc/library/email.examples.rst @@ -6,13 +6,14 @@ Here are a few examples of how to use the :mod:`email` package to read, write, and send simple email messages, as well as more complex MIME messages. -First, let's see how to create and send a simple text message: +First, let's see how to create and send a simple text message (both the +text content and the addresses may contain unicode characters): .. literalinclude:: ../includes/email-simple.py -And parsing RFC822 headers can easily be done by the parse(filename) or -parsestr(message_as_string) methods of the Parser() class: +Parsing RFC822 headers can easily be done by the using the classes +from the :mod:`~email.parser` module: .. literalinclude:: ../includes/email-headers.py @@ -34,30 +35,19 @@ above, into a directory of files: .. literalinclude:: ../includes/email-unpack.py + Here's an example of how to create an HTML message with an alternative plain -text version: [2]_ +text version. To make things a bit more interesting, we include a related +image in the html part, and we save a copy of what we are going to send to +disk, as well as sending it. .. literalinclude:: ../includes/email-alternative.py -.. _email-contentmanager-api-examples: - -Examples using the Provisional API -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Here is a reworking of the last example using the provisional API. To make -things a bit more interesting, we include a related image in the html part, and -we save a copy of what we are going to send to disk, as well as sending it. - -This example also shows how easy it is to include non-ASCII, and simplifies the -sending of the message using the :meth:`.send_message` method of the -:mod:`smtplib` module. - -.. literalinclude:: ../includes/email-alternative-new-api.py - -If we were instead sent the message from the last example, here is one -way we could process it: +If we were sent the message from the last example, here is one way we could +process it: -.. literalinclude:: ../includes/email-read-alternative-new-api.py +.. literalinclude:: ../includes/email-read-alternative.py Up to the prompt, the output from the above is: @@ -75,4 +65,3 @@ Up to the prompt, the output from the above is: .. rubric:: Footnotes .. [1] Thanks to Matthew Dixon Cowles for the original inspiration and examples. -.. [2] Contributed by Martin Matejek. diff --git a/Doc/library/email.generator.rst b/Doc/library/email.generator.rst index d596ed8..c1d94ca 100644 --- a/Doc/library/email.generator.rst +++ b/Doc/library/email.generator.rst @@ -8,210 +8,243 @@ -------------- -One of the most common tasks is to generate the flat text of the email message -represented by a message object structure. You will need to do this if you want -to send your message via the :mod:`smtplib` module or the :mod:`nntplib` module, -or print the message on the console. Taking a message object structure and -producing a flat text document is the job of the :class:`Generator` class. - -Again, as with the :mod:`email.parser` module, you aren't limited to the -functionality of the bundled generator; you could write one from scratch -yourself. However the bundled generator knows how to generate most email in a -standards-compliant way, should handle MIME and non-MIME email messages just -fine, and is designed so that the transformation from flat text, to a message -structure via the :class:`~email.parser.Parser` class, and back to flat text, -is idempotent (the input is identical to the output) [#]_. On the other hand, -using the Generator on a :class:`~email.message.Message` constructed by program -may result in changes to the :class:`~email.message.Message` object as defaults -are filled in. - -:class:`bytes` output can be generated using the :class:`BytesGenerator` class. -If the message object structure contains non-ASCII bytes, this generator's -:meth:`~BytesGenerator.flatten` method will emit the original bytes. Parsing a -binary message and then flattening it with :class:`BytesGenerator` should be -idempotent for standards compliant messages. - -Here are the public methods of the :class:`Generator` class, imported from the -:mod:`email.generator` module: - - -.. class:: Generator(outfp, mangle_from_=True, maxheaderlen=78, *, policy=None) - - The constructor for the :class:`Generator` class takes a :term:`file-like object` - called *outfp* for an argument. *outfp* must support the :meth:`write` method - and be usable as the output file for the :func:`print` function. - - Optional *mangle_from_* is a flag that, when ``True``, puts a ``>`` character in - front of any line in the body that starts exactly as ``From``, i.e. ``From`` - followed by a space at the beginning of the line. This is the only guaranteed - portable way to avoid having such lines be mistaken for a Unix mailbox format - envelope header separator (see `WHY THE CONTENT-LENGTH FORMAT IS BAD - <https://www.jwz.org/doc/content-length.html>`_ for details). *mangle_from_* - defaults to ``True``, but you might want to set this to ``False`` if you are not - writing Unix mailbox format files. - - Optional *maxheaderlen* specifies the longest length for a non-continued header. - When a header line is longer than *maxheaderlen* (in characters, with tabs - expanded to 8 spaces), the header will be split as defined in the - :class:`~email.header.Header` class. Set to zero to disable header wrapping. - The default is 78, as recommended (but not required) by :rfc:`2822`. - - The *policy* keyword specifies a :mod:`~email.policy` object that controls a - number of aspects of the generator's operation. If no *policy* is specified, - then the *policy* attached to the message object passed to :attr:`flatten` - is used. +One of the most common tasks is to generate the flat (serialized) version of +the email message represented by a message object structure. You will need to +do this if you want to send your message via :meth:`smtplib.SMTP.sendmail` or +the :mod:`nntplib` module, or print the message on the console. Taking a +message object structure and producing a serialized representation is the job +of the generator classes. + +As with the :mod:`email.parser` module, you aren't limited to the functionality +of the bundled generator; you could write one from scratch yourself. However +the bundled generator knows how to generate most email in a standards-compliant +way, should handle MIME and non-MIME email messages just fine, and is designed +so that the bytes-oriented parsing and generation operations are inverses, +assuming the same non-transforming :mod:`~email.policy` is used for both. That +is, parsing the serialized byte stream via the +:class:`~email.parser.BytesParser` class and then regenerating the serialized +byte stream using :class:`BytesGenerator` should produce output identical to +the input [#]_. (On the other hand, using the generator on an +:class:`~email.message.EmailMessage` constructed by program may result in +changes to the :class:`~email.message.EmailMessage` object as defaults are +filled in.) + +The :class:`Generator` class can be used to flatten a message into a text (as +opposed to binary) serialized representation, but since Unicode cannot +represent binary data directly, the message is of necessity transformed into +something that contains only ASCII characters, using the standard email RFC +Content Transfer Encoding techniques for encoding email messages for transport +over channels that are not "8 bit clean". + + +.. class:: BytesGenerator(outfp, mangle_from_=None, maxheaderlen=None, *, \ + policy=None) - .. versionchanged:: 3.3 Added the *policy* keyword. + Return a :class:`BytesGenerator` object that will write any message provided + to the :meth:`flatten` method, or any surrogateescape encoded text provided + to the :meth:`write` method, to the :term:`file-like object` *outfp*. + *outfp* must support a ``write`` method that accepts binary data. + + If optional *mangle_from_* is ``True``, put a ``>`` character in front of + any line in the body that starts with the exact string ``"From "``, that is + ``From`` followed by a space at the beginning of a line. *mangle_from_* + defaults to the value of the :attr:`~email.policy.Policy.mangle_from_` + setting of the *policy* (which is ``True`` for the + :data:`~email.policy.compat32` policy and ``False`` for all others). + *mangle_from_* is intended for use when messages are stored in unix mbox + format (see :mod:`mailbox` and `WHY THE CONTENT-LENGTH FORMAT IS BAD + <http://www.jwz.org/doc/content-length.html>`_). + + If *maxheaderlen* is not ``None``, refold any header lines that are longer + than *maxheaderlen*, or if ``0``, do not rewrap any headers. If + *manheaderlen* is ``None`` (the default), wrap headers and other message + lines according to the *policy* settings. + + If *policy* is specified, use that policy to control message generation. If + *policy* is ``None`` (the default), use the policy associated with the + :class:`~email.message.Message` or :class:`~email.message.EmailMessage` + object passed to ``flatten`` to control the message generation. See + :mod:`email.policy` for details on what *policy* controls. - The other public :class:`Generator` methods are: + .. versionadded:: 3.2 + .. versionchanged:: 3.3 Added the *policy* keyword. - .. method:: flatten(msg, unixfrom=False, linesep=None) + .. versionchanged:: 3.6 The default behavior of the *mangle_from_* + and *maxheaderlen* parameters is to follow the policy. - Print the textual representation of the message object structure rooted at - *msg* to the output file specified when the :class:`Generator` instance - was created. Subparts are visited depth-first and the resulting text will - be properly MIME encoded. - Optional *unixfrom* is a flag that forces the printing of the envelope - header delimiter before the first :rfc:`2822` header of the root message - object. If the root object has no envelope header, a standard one is - crafted. By default, this is set to ``False`` to inhibit the printing of - the envelope delimiter. + .. method:: flatten(msg, unixfrom=False, linesep=None) + Print the textual representation of the message object structure rooted + at *msg* to the output file specified when the :class:`BytesGenerator` + instance was created. + + If the :mod:`~email.policy` option :attr:`~email.policy.Policy.cte_type` + is ``8bit`` (the default), copy any headers in the original parsed + message that have not been modified to the output with any bytes with the + high bit set reproduced as in the original, and preserve the non-ASCII + :mailheader:`Content-Transfer-Encoding` of any body parts that have them. + If ``cte_type`` is ``7bit``, convert the bytes with the high bit set as + needed using an ASCII-compatible :mailheader:`Content-Transfer-Encoding`. + That is, transform parts with non-ASCII + :mailheader:`Cotnent-Transfer-Encoding` + (:mailheader:`Content-Transfer-Encoding: 8bit`) to an ASCII compatibile + :mailheader:`Content-Transfer-Encoding`, and encode RFC-invalid non-ASCII + bytes in headers using the MIME ``unknown-8bit`` character set, thus + rendering them RFC-compliant. + + .. XXX: There should be an option that just does the RFC + compliance transformation on headers but leaves CTE 8bit parts alone. + + If *unixfrom* is ``True``, print the envelope header delimiter used by + the Unix mailbox format (see :mod:`mailbox`) before the first of the + :rfc:`5322` headers of the root message object. If the root object has + no envelope header, craft a standard one. The default is ``False``. Note that for subparts, no envelope header is ever printed. - Optional *linesep* specifies the line separator character used to - terminate lines in the output. If specified it overrides the value - specified by the *msg*\'s or ``Generator``\'s ``policy``. + If *linesep* is not ``None``, use it as the separator character between + all the lines of the flattened message. If *linesep* is ``None`` (the + default), use the value specified in the *policy*. - Because strings cannot represent non-ASCII bytes, if the policy that - applies when ``flatten`` is run has :attr:`~email.policy.Policy.cte_type` - set to ``8bit``, ``Generator`` will operate as if it were set to - ``7bit``. This means that messages parsed with a Bytes parser that have - a :mailheader:`Content-Transfer-Encoding` of ``8bit`` will be converted - to a use a ``7bit`` Content-Transfer-Encoding. Non-ASCII bytes in the - headers will be :rfc:`2047` encoded with a charset of ``unknown-8bit``. + .. XXX: flatten should take a *policy* keyword. - .. versionchanged:: 3.2 - Added support for re-encoding ``8bit`` message bodies, and the - *linesep* argument. .. method:: clone(fp) - Return an independent clone of this :class:`Generator` instance with the - exact same options. - - .. method:: write(s) - - Write the string *s* to the underlying file object, i.e. *outfp* passed to - :class:`Generator`'s constructor. This provides just enough file-like API - for :class:`Generator` instances to be used in the :func:`print` function. + Return an independent clone of this :class:`BytesGenerator` instance with + the exact same option settings, and *fp* as the new *outfp*. -As a convenience, see the :class:`~email.message.Message` methods -:meth:`~email.message.Message.as_string` and ``str(aMessage)``, a.k.a. -:meth:`~email.message.Message.__str__`, which simplify the generation of a -formatted string representation of a message object. For more detail, see -:mod:`email.message`. -.. class:: BytesGenerator(outfp, mangle_from_=True, maxheaderlen=78, *, \ - policy=None) + .. method:: write(s) - The constructor for the :class:`BytesGenerator` class takes a binary - :term:`file-like object` called *outfp* for an argument. *outfp* must - support a :meth:`write` method that accepts binary data. + Encode *s* using the ``ASCII`` codec and the ``surrogateescape`` error + handler, and pass it to the *write* method of the *outfp* passed to the + :class:`BytesGenerator`'s constructor. - Optional *mangle_from_* is a flag that, when ``True``, puts a ``>`` - character in front of any line in the body that starts exactly as ``From``, - i.e. ``From`` followed by a space at the beginning of the line. This is the - only guaranteed portable way to avoid having such lines be mistaken for a - Unix mailbox format envelope header separator (see `WHY THE CONTENT-LENGTH - FORMAT IS BAD <https://www.jwz.org/doc/content-length.html>`_ for details). - *mangle_from_* defaults to ``True``, but you might want to set this to - ``False`` if you are not writing Unix mailbox format files. - Optional *maxheaderlen* specifies the longest length for a non-continued - header. When a header line is longer than *maxheaderlen* (in characters, - with tabs expanded to 8 spaces), the header will be split as defined in the - :class:`~email.header.Header` class. Set to zero to disable header - wrapping. The default is 78, as recommended (but not required) by - :rfc:`2822`. +As a convenience, :class:`~email.message.EmailMessage` provides the methods +:meth:`~email.message.EmailMessage.as_bytes` and ``bytes(aMessage)`` (a.k.a. +:meth:`~email.message.EmailMessage.__bytes__`), which simplify the generation of +a serialized binary representation of a message object. For more detail, see +:mod:`email.message`. - The *policy* keyword specifies a :mod:`~email.policy` object that controls a - number of aspects of the generator's operation. If no *policy* is specified, - then the *policy* attached to the message object passed to :attr:`flatten` - is used. +Because strings cannot represent binary data, the :class:`Generator` class must +convert any binary data in any message it flattens to an ASCII compatible +format, by converting them to an ASCII compatible +:mailheader:`Content-Transfer_Encoding`. Using the terminology of the email +RFCs, you can think of this as :class:`Generator` serializing to an I/O stream +that is not "8 bit clean". In other words, most applications will want +to be using :class:`BytesGenerator`, and not :class:`Generator`. + +.. class:: Generator(outfp, mangle_from_=None, maxheaderlen=None, *, \ + policy=None) + + Return a :class:`Generator` object that will write any message provided + to the :meth:`flatten` method, or any text provided to the :meth:`write` + method, to the :term:`file-like object` *outfp*. *outfp* must support a + ``write`` method that accepts string data. + + If optional *mangle_from_* is ``True``, put a ``>`` character in front of + any line in the body that starts with the exact string ``"From "``, that is + ``From`` followed by a space at the beginning of a line. *mangle_from_* + defaults to the value of the :attr:`~email.policy.Policy.mangle_from_` + setting of the *policy* (which is ``True`` for the + :data:`~email.policy.compat32` policy and ``False`` for all others). + *mangle_from_* is intended for use when messages are stored in unix mbox + format (see :mod:`mailbox` and `WHY THE CONTENT-LENGTH FORMAT IS BAD + <http://www.jwz.org/doc/content-length.html>`_). + + If *maxheaderlen* is not ``None``, refold any header lines that are longer + than *maxheaderlen*, or if ``0``, do not rewrap any headers. If + *manheaderlen* is ``None`` (the default), wrap headers and other message + lines according to the *policy* settings. + + If *policy* is specified, use that policy to control message generation. If + *policy* is ``None`` (the default), use the policy associated with the + :class:`~email.message.Message` or :class:`~email.message.EmailMessage` + object passed to ``flatten`` to control the message generation. See + :mod:`email.policy` for details on what *policy* controls. .. versionchanged:: 3.3 Added the *policy* keyword. - The other public :class:`BytesGenerator` methods are: + .. versionchanged:: 3.6 The default behavior of the *mangle_from_* + and *maxheaderlen* parameters is to follow the policy. .. method:: flatten(msg, unixfrom=False, linesep=None) Print the textual representation of the message object structure rooted - at *msg* to the output file specified when the :class:`BytesGenerator` - instance was created. Subparts are visited depth-first and the resulting - text will be properly MIME encoded. If the :mod:`~email.policy` option - :attr:`~email.policy.Policy.cte_type` is ``8bit`` (the default), - then any bytes with the high bit set in the original parsed message that - have not been modified will be copied faithfully to the output. If - ``cte_type`` is ``7bit``, the bytes will be converted as needed - using an ASCII-compatible Content-Transfer-Encoding. In particular, - RFC-invalid non-ASCII bytes in headers will be encoded using the MIME - ``unknown-8bit`` character set, thus rendering them RFC-compliant. - - .. XXX: There should be a complementary option that just does the RFC - compliance transformation but leaves CTE 8bit parts alone. - - Messages parsed with a Bytes parser that have a - :mailheader:`Content-Transfer-Encoding` of 8bit will be reconstructed - as 8bit if they have not been modified. - - Optional *unixfrom* is a flag that forces the printing of the envelope - header delimiter before the first :rfc:`2822` header of the root message - object. If the root object has no envelope header, a standard one is - crafted. By default, this is set to ``False`` to inhibit the printing of - the envelope delimiter. - + at *msg* to the output file specified when the :class:`Generator` + instance was created. + + If the :mod:`~email.policy` option :attr:`~email.policy.Policy.cte_type` + is ``8bit``, generate the message as if the option were set to ``7bit``. + (This is required because strings cannot represent non-ASCII bytes.) + Convert any bytes with the high bit set as needed using an + ASCII-compatible :mailheader:`Content-Transfer-Encoding`. That is, + transform parts with non-ASCII :mailheader:`Cotnent-Transfer-Encoding` + (:mailheader:`Content-Transfer-Encoding: 8bit`) to an ASCII compatibile + :mailheader:`Content-Transfer-Encoding`, and encode RFC-invalid non-ASCII + bytes in headers using the MIME ``unknown-8bit`` character set, thus + rendering them RFC-compliant. + + If *unixfrom* is ``True``, print the envelope header delimiter used by + the Unix mailbox format (see :mod:`mailbox`) before the first of the + :rfc:`5322` headers of the root message object. If the root object has + no envelope header, craft a standard one. The default is ``False``. Note that for subparts, no envelope header is ever printed. - Optional *linesep* specifies the line separator character used to - terminate lines in the output. If specified it overrides the value - specified by the ``Generator``\ or *msg*\ 's ``policy``. + If *linesep* is not ``None``, use it as the separator character between + all the lines of the flattened message. If *linesep* is ``None`` (the + default), use the value specified in the *policy*. + + .. XXX: flatten should take a *policy* keyword. + + .. versionchanged:: 3.2 + Added support for re-encoding ``8bit`` message bodies, and the + *linesep* argument. + .. method:: clone(fp) - Return an independent clone of this :class:`BytesGenerator` instance with - the exact same options. + Return an independent clone of this :class:`Generator` instance with the + exact same options, and *fp* as the new *outfp*. + .. method:: write(s) - Write the string *s* to the underlying file object. *s* is encoded using - the ``ASCII`` codec and written to the *write* method of the *outfp* - *outfp* passed to the :class:`BytesGenerator`'s constructor. This - provides just enough file-like API for :class:`BytesGenerator` instances - to be used in the :func:`print` function. + Write *s* to the *write* method of the *outfp* passed to the + :class:`Generator`'s constructor. This provides just enough file-like + API for :class:`Generator` instances to be used in the :func:`print` + function. - .. versionadded:: 3.2 -The :mod:`email.generator` module also provides a derived class, called -:class:`DecodedGenerator` which is like the :class:`Generator` base class, -except that non-\ :mimetype:`text` parts are substituted with a format string -representing the part. +As a convenience, :class:`~email.message.EmailMessage` provides the methods +:meth:`~email.message.EmailMessage.as_string` and ``str(aMessage)`` (a.k.a. +:meth:`~email.message.EmailMessage.__str__`), which simplify the generation of +a formatted string representation of a message object. For more detail, see +:mod:`email.message`. + +The :mod:`email.generator` module also provides a derived class, +:class:`DecodedGenerator`, which is like the :class:`Generator` base class, +except that non-\ :mimetype:`text` parts are not serialized, but are instead +represented in the output stream by a string derived from a template filled +in with information about the part. -.. class:: DecodedGenerator(outfp, mangle_from_=True, maxheaderlen=78, fmt=None) +.. class:: DecodedGenerator(outfp, mangle_from_=None, maxheaderlen=78, fmt=None) - This class, derived from :class:`Generator` walks through all the subparts of a - message. If the subpart is of main type :mimetype:`text`, then it prints the - decoded payload of the subpart. Optional *_mangle_from_* and *maxheaderlen* are - as with the :class:`Generator` base class. + Act like :class:`Generator`, except that for any subpart of the message + passed to :meth:`Generator.flatten`, if the subpart is of main type + :mimetype:`text`, print the decoded payload of the subpart, and if the main + type is not :mimetype:`text`, instead of printing it fill in the string + *fmt* using information from the part and print the resulting + filled-in string. - If the subpart is not of main type :mimetype:`text`, optional *fmt* is a format - string that is used instead of the message payload. *fmt* is expanded with the - following keywords, ``%(keyword)s`` format: + To fill in *fmt*, execute ``fmt % part_info``, where ``part_info`` + is a dictionary composed of the following keys and values: * ``type`` -- Full MIME type of the non-\ :mimetype:`text` part @@ -225,15 +258,22 @@ representing the part. * ``encoding`` -- Content transfer encoding of the non-\ :mimetype:`text` part - The default value for *fmt* is ``None``, meaning :: + If *fmt* is ``None``, use the following default *fmt*: + + "[Non-text (%(type)s) part of message omitted, filename %(filename)s]" - [Non-text (%(type)s) part of message omitted, filename %(filename)s] + Optional *_mangle_from_* and *maxheaderlen* are as with the + :class:`Generator` base class, except that the default value for + *maxheaderlen* is ``78`` (the RFC standard default header length). .. rubric:: Footnotes -.. [#] This statement assumes that you use the appropriate setting for the - ``unixfrom`` argument, and that you set maxheaderlen=0 (which will - preserve whatever the input line lengths were). It is also not strictly - true, since in many cases runs of whitespace in headers are collapsed - into single blanks. The latter is a bug that will eventually be fixed. +.. [#] This statement assumes that you use the appropriate setting for + ``unixfrom``, and that there are no :mod:`policy` settings calling for + automatic adjustments (for example, + :attr:`~email.policy.Policy.refold_source` must be ``none``, which is + *not* the default). It is also not 100% true, since if the message + does not conform to the RFC standards occasionally information about the + exact original text is lost during parsing error recovery. It is a goal + to fix these latter edge cases when possible. diff --git a/Doc/library/email.header.rst b/Doc/library/email.header.rst index e94837c..07152c2 100644 --- a/Doc/library/email.header.rst +++ b/Doc/library/email.header.rst @@ -8,6 +8,14 @@ -------------- +This module is part of the legacy (``Compat32``) email API. In the current API +encoding and decoding of headers is handled transparently by the +dictionary-like API of the :class:`~email.message.EmailMessage` class. In +addition to uses in legacy code, this module can be useful in applications that +need to completely control the character sets used when encoding headers. + +The remaining text in this section is the original documentation of the module. + :rfc:`2822` is the base standard that describes the format of email messages. It derives from the older :rfc:`822` standard which came into widespread use at a time when most email was composed of ASCII characters only. :rfc:`2822` is a diff --git a/Doc/library/email.headerregistry.rst b/Doc/library/email.headerregistry.rst index 0707bd8..feec497 100644 --- a/Doc/library/email.headerregistry.rst +++ b/Doc/library/email.headerregistry.rst @@ -7,19 +7,13 @@ .. moduleauthor:: R. David Murray <rdmurray@bitdance.com> .. sectionauthor:: R. David Murray <rdmurray@bitdance.com> -.. versionadded:: 3.3 - as a :term:`provisional module <provisional package>`. - **Source code:** :source:`Lib/email/headerregistry.py` -.. note:: +-------------- - The headerregistry module has been included in the standard library on a - :term:`provisional basis <provisional package>`. Backwards incompatible - changes (up to and including removal of the module) may occur if deemed - necessary by the core developers. +.. versionadded:: 3.3 as a :term:`provisional module <provisional package>`. --------------- +.. versionchanged:: 3.6 provisonal status removed. Headers are represented by customized subclasses of :class:`str`. The particular class used to represent a given header is determined by the @@ -86,10 +80,11 @@ headers. .. method:: fold(*, policy) Return a string containing :attr:`~email.policy.Policy.linesep` - characters as required to correctly fold the header according - to *policy*. A :attr:`~email.policy.Policy.cte_type` of - ``8bit`` will be treated as if it were ``7bit``, since strings - may not contain binary data. + characters as required to correctly fold the header according to + *policy*. A :attr:`~email.policy.Policy.cte_type` of ``8bit`` will be + treated as if it were ``7bit``, since headers may not contain arbitrary + binary data. If :attr:`~email.policy.EmailPolicy.utf8` is ``False``, + non-ASCII data will be :rfc:`2047` encoded. ``BaseHeader`` by itself cannot be used to create a header object. It @@ -106,7 +101,7 @@ headers. values for at least the keys ``decoded`` and ``defects``. ``decoded`` should be the string value for the header (that is, the header value fully decoded to unicode). The parse method should assume that *string* may - contain transport encoded parts, but should correctly handle all valid + contain content-transfer-encoded parts, but should correctly handle all valid unicode characters as well so that it can parse un-encoded header values. ``BaseHeader``'s ``__new__`` then creates the header instance, and calls its @@ -135,11 +130,10 @@ headers. mechanism for encoding non-ASCII text as ASCII characters within a header value. When a *value* containing encoded words is passed to the constructor, the ``UnstructuredHeader`` parser converts such encoded words - back in to the original unicode, following the :rfc:`2047` rules for - unstructured text. The parser uses heuristics to attempt to decode certain - non-compliant encoded words. Defects are registered in such cases, as well - as defects for issues such as invalid characters within the encoded words or - the non-encoded text. + into unicode, following the :rfc:`2047` rules for unstructured text. The + parser uses heuristics to attempt to decode certain non-compliant encoded + words. Defects are registered in such cases, as well as defects for issues + such as invalid characters within the encoded words or the non-encoded text. This header type provides no additional attributes. @@ -213,15 +207,16 @@ headers. the list of addresses is "flattened" into a one dimensional list). The ``decoded`` value of the header will have all encoded words decoded to - unicode. :class:`~encodings.idna` encoded domain names are also decoded to unicode. The - ``decoded`` value is set by :attr:`~str.join`\ ing the :class:`str` value of - the elements of the ``groups`` attribute with ``', '``. + unicode. :class:`~encodings.idna` encoded domain names are also decoded to + unicode. The ``decoded`` value is set by :attr:`~str.join`\ ing the + :class:`str` value of the elements of the ``groups`` attribute with ``', + '``. A list of :class:`.Address` and :class:`.Group` objects in any combination may be used to set the value of an address header. ``Group`` objects whose ``display_name`` is ``None`` will be interpreted as single addresses, which allows an address list to be copied with groups intact by using the list - obtained ``groups`` attribute of the source header. + obtained from the ``groups`` attribute of the source header. .. class:: SingleAddressHeader @@ -267,7 +262,7 @@ variant, :attr:`~.BaseHeader.max_count` is set to 1. .. class:: ParameterizedMIMEHeader - MOME headers all start with the prefix 'Content-'. Each specific header has + MIME headers all start with the prefix 'Content-'. Each specific header has a certain value, described under the class for that header. Some can also take a list of supplemental parameters, which have a common format. This class serves as a base for all the MIME headers that take parameters. diff --git a/Doc/library/email.message.rst b/Doc/library/email.message.rst index 2907975..c888673 100644 --- a/Doc/library/email.message.rst +++ b/Doc/library/email.message.rst @@ -3,91 +3,108 @@ .. module:: email.message :synopsis: The base class representing email messages. +.. moduleauthor:: R. David Murray <rdmurray@bitdance.com> +.. sectionauthor:: R. David Murray <rdmurray@bitdance.com>, + Barry A. Warsaw <barry@python.org> **Source code:** :source:`Lib/email/message.py` -------------- -The central class in the :mod:`email` package is the :class:`Message` class, -imported from the :mod:`email.message` module. It is the base class for the -:mod:`email` object model. :class:`Message` provides the core functionality for -setting and querying header fields, and for accessing message bodies. - -Conceptually, a :class:`Message` object consists of *headers* and *payloads*. -Headers are :rfc:`2822` style field names and values where the field name and -value are separated by a colon. The colon is not part of either the field name -or the field value. - -Headers are stored and returned in case-preserving form but are matched -case-insensitively. There may also be a single envelope header, also known as -the *Unix-From* header or the ``From_`` header. The payload is either a string -in the case of simple message objects or a list of :class:`Message` objects for -MIME container documents (e.g. :mimetype:`multipart/\*` and -:mimetype:`message/rfc822`). - -:class:`Message` objects provide a mapping style interface for accessing the -message headers, and an explicit interface for accessing both the headers and -the payload. It provides convenience methods for generating a flat text -representation of the message object tree, for accessing commonly used header -parameters, and for recursively walking over the object tree. - -Here are the methods of the :class:`Message` class: - - -.. class:: Message(policy=compat32) - - If *policy* is specified (it must be an instance of a :mod:`~email.policy` - class) use the rules it specifies to update and serialize the representation - of the message. If *policy* is not set, use the :class:`compat32 - <email.policy.Compat32>` policy, which maintains backward compatibility with - the Python 3.2 version of the email package. For more information see the +.. versionadded:: 3.4 + the classes documented here were added :term:`provisionaly <provisional + package>`. + +.. versionchanged:: 3.6 + provisional status removed, docs for legacy message class moved + to :ref:`compat32_message`. + +The central class in the :mod:`email` package is the :class:`EmailMessage` +class, imported from the :mod:`email.message` module. It is the base class for +the :mod:`email` object model. :class:`EmailMessage` provides the core +functionality for setting and querying header fields, for accessing message +bodies, and for creating or modifying structured messages. + +An email message consists of *headers* and a *payload* (which is also referred +to as the *content*). Headers are :rfc:`5322` or :rfc:`6532` style field names +and values, where the field name and value are separated by a colon. The colon +is not part of either the field name or the field value. The payload may be a +simple text message, or a binary object, or a structured sequence of +sub-messages each with their own set of headers and their own payload. The +latter type of payload is indicated by the message having a MIME type such as +:mimetype:`multipart/\*` or :mimetype:`message/rfc822`. + +The conceptual model provided by an :class:`EmailMessage` object is that of an +ordered dictionary of headers coupled with a *payload* that represents the +:rfc:`5322` body of the message, which might be a list of sub-``EmailMessage`` +objects. In addition to the normal dictionary methods for accessing the header +names and values, there are methods for accessing specialized information from +the headers (for example the MIME content type), for operating on the payload, +for generating a serialized version of the message, and for recursively walking +over the object tree. + +The :class:`EmailMessage` dictionary-like interface is indexed by the header +names, which must be ASCII values. The values of the dictionary are strings +with some extra methods. Headers are stored and returned in case-preserving +form, but field names are matched case-insensitively. Unlike a real dict, +there is an ordering to the keys, and there can be duplicate keys. Additional +methods are provided for working with headers that have duplicate keys. + +The *payload* is either a string or bytes object, in the case of simple message +objects, or a list of :class:`EmailMessage` objects, for MIME container +documents such as :mimetype:`multipart/\*` and :mimetype:`message/rfc822` +message objects. + + +.. class:: EmailMessage(policy=default) + + If *policy* is specified use the rules it specifies to udpate and serialize + the representation of the message. If *policy* is not set, use the + :class:`~email.policy.default` policy, which follows the rules of the email + RFCs except for line endings (instead of the RFC mandated ``\r\n``, it uses + the Python standard ``\n`` line endings). For more information see the :mod:`~email.policy` documentation. - .. versionchanged:: 3.3 The *policy* keyword argument was added. - - - .. method:: as_string(unixfrom=False, maxheaderlen=0, policy=None) - - Return the entire message flattened as a string. When optional *unixfrom* - is true, the envelope header is included in the returned string. - *unixfrom* defaults to ``False``. For backward compabitility reasons, - *maxheaderlen* defaults to ``0``, so if you want a different value you - must override it explicitly (the value specified for *max_line_length* in - the policy will be ignored by this method). The *policy* argument may be - used to override the default policy obtained from the message instance. - This can be used to control some of the formatting produced by the - method, since the specified *policy* will be passed to the ``Generator``. - - Flattening the message may trigger changes to the :class:`Message` if - defaults need to be filled in to complete the transformation to a string - (for example, MIME boundaries may be generated or modified). - - Note that this method is provided as a convenience and may not always - format the message the way you want. For example, by default it does - not do the mangling of lines that begin with ``From`` that is - required by the unix mbox format. For more flexibility, instantiate a - :class:`~email.generator.Generator` instance and use its - :meth:`~email.generator.Generator.flatten` method directly. For example:: + .. method:: as_string(unixfrom=False, maxheaderlen=None, policy=None) - from io import StringIO - from email.generator import Generator - fp = StringIO() - g = Generator(fp, mangle_from_=True, maxheaderlen=60) - g.flatten(msg) - text = fp.getvalue() - - If the message object contains binary data that is not encoded according - to RFC standards, the non-compliant data will be replaced by unicode - "unknown character" code points. (See also :meth:`.as_bytes` and - :class:`~email.generator.BytesGenerator`.) - - .. versionchanged:: 3.4 the *policy* keyword argument was added. + Return the entire message flattened as a string. When optional + *unixfrom* is true, the envelope header is included in the returned + string. *unixfrom* defaults to ``False``. For backward compabitility + with the base :class:`~email.message.Message` class *maxheaderlen* is + accepted, but defaults to ``None``, which means that by default the line + length is controlled by the + :attr:`~email.policy.EmailPolicy.max_line_length` of the policy. The + *policy* argument may be used to override the default policy obtained + from the message instance. This can be used to control some of the + formatting produced by the method, since the specified *policy* will be + passed to the :class:`~email.generator.Generator`. + + Flattening the message may trigger changes to the :class:`EmailMessage` + if defaults need to be filled in to complete the transformation to a + string (for example, MIME boundaries may be generated or modified). + + Note that this method is provided as a convenience and may not be the + most useful way to serialize messages in your application, especially if + you are dealing with multiple messages. See + :class:`email.generator.Generator` for a more flexible API for + serializing messages. Note also that this method is restricted to + producing messages serialized as "7 bit clean" when + :attr:`~email.policy.EmailPolicy.utf8` is ``False``, which is the default. + + .. versionchanged:: 3.6 the default behavior when *maxheaderlen* + is not specified was changed from defaulting to 0 to defaulting + to the value of *max_line_length* from the policy. .. method:: __str__() - Equivalent to :meth:`.as_string()`. Allows ``str(msg)`` to produce a - string containing the formatted message. + Equivalent to `as_string(policy=self.policy.clone(utf8=True)`. Allows + ``str(msg)`` to produce a string containing the serialized message in a + readable format. + + .. versionchanged:: 3.4 the method was changed to use ``utf8=True``, + thus producing an :rfc:`6531`-like message representation, instead of + being a direct alias for :meth:`as_string`. .. method:: as_bytes(unixfrom=False, policy=None) @@ -98,52 +115,42 @@ Here are the methods of the :class:`Message` class: used to override the default policy obtained from the message instance. This can be used to control some of the formatting produced by the method, since the specified *policy* will be passed to the - ``BytesGenerator``. + :class:`~email.generator.BytesGenerator`. - Flattening the message may trigger changes to the :class:`Message` if - defaults need to be filled in to complete the transformation to a string - (for example, MIME boundaries may be generated or modified). + Flattening the message may trigger changes to the :class:`EmailMessage` + if defaults need to be filled in to complete the transformation to a + string (for example, MIME boundaries may be generated or modified). - Note that this method is provided as a convenience and may not always - format the message the way you want. For example, by default it does - not do the mangling of lines that begin with ``From`` that is - required by the unix mbox format. For more flexibility, instantiate a - :class:`~email.generator.BytesGenerator` instance and use its - :meth:`~email.generator.BytesGenerator.flatten` method directly. - For example:: - - from io import BytesIO - from email.generator import BytesGenerator - fp = BytesIO() - g = BytesGenerator(fp, mangle_from_=True, maxheaderlen=60) - g.flatten(msg) - text = fp.getvalue() - - .. versionadded:: 3.4 + Note that this method is provided as a convenience and may not be the + most useful way to serialize messages in your application, especially if + you are dealing with multiple messages. See + :class:`email.generator.BytesGenerator` for a more flexible API for + serializing messages. .. method:: __bytes__() Equivalent to :meth:`.as_bytes()`. Allows ``bytes(msg)`` to produce a - bytes object containing the formatted message. - - .. versionadded:: 3.4 + bytes object containing the serialized message. .. method:: is_multipart() Return ``True`` if the message's payload is a list of sub-\ - :class:`Message` objects, otherwise return ``False``. When + :class:`EmailMessage` objects, otherwise return ``False``. When :meth:`is_multipart` returns ``False``, the payload should be a string - object. (Note that :meth:`is_multipart` returning ``True`` does not - necessarily mean that "msg.get_content_maintype() == 'multipart'" will - return the ``True``. For example, ``is_multipart`` will return ``True`` - when the :class:`Message` is of type ``message/rfc822``.) + object (which might be a CTE encoded binary payload). Note that + :meth:`is_multipart` returning ``True`` does not necessarily mean that + "msg.get_content_maintype() == 'multipart'" will return the ``True``. + For example, ``is_multipart`` will return ``True`` when the + :class:`EmailMessage` is of type ``message/rfc822``. .. method:: set_unixfrom(unixfrom) - Set the message's envelope header to *unixfrom*, which should be a string. + Set the message's envelope header to *unixfrom*, which should be a + string. (See :class:`~mailbox.mboxMessage` for a brief description of + this header.) .. method:: get_unixfrom() @@ -152,109 +159,23 @@ Here are the methods of the :class:`Message` class: envelope header was never set. - .. method:: attach(payload) - - Add the given *payload* to the current payload, which must be ``None`` or - a list of :class:`Message` objects before the call. After the call, the - payload will always be a list of :class:`Message` objects. If you want to - set the payload to a scalar object (e.g. a string), use - :meth:`set_payload` instead. - - - .. method:: get_payload(i=None, decode=False) - - Return the current payload, which will be a list of - :class:`Message` objects when :meth:`is_multipart` is ``True``, or a - string when :meth:`is_multipart` is ``False``. If the payload is a list - and you mutate the list object, you modify the message's payload in place. - - With optional argument *i*, :meth:`get_payload` will return the *i*-th - element of the payload, counting from zero, if :meth:`is_multipart` is - ``True``. An :exc:`IndexError` will be raised if *i* is less than 0 or - greater than or equal to the number of items in the payload. If the - payload is a string (i.e. :meth:`is_multipart` is ``False``) and *i* is - given, a :exc:`TypeError` is raised. - - Optional *decode* is a flag indicating whether the payload should be - decoded or not, according to the :mailheader:`Content-Transfer-Encoding` - header. When ``True`` and the message is not a multipart, the payload will - be decoded if this header's value is ``quoted-printable`` or ``base64``. - If some other encoding is used, or :mailheader:`Content-Transfer-Encoding` - header is missing, the payload is - returned as-is (undecoded). In all cases the returned value is binary - data. If the message is a multipart and the *decode* flag is ``True``, - then ``None`` is returned. If the payload is base64 and it was not - perfectly formed (missing padding, characters outside the base64 - alphabet), then an appropriate defect will be added to the message's - defect property (:class:`~email.errors.InvalidBase64PaddingDefect` or - :class:`~email.errors.InvalidBase64CharactersDefect`, respectively). - - When *decode* is ``False`` (the default) the body is returned as a string - without decoding the :mailheader:`Content-Transfer-Encoding`. However, - for a :mailheader:`Content-Transfer-Encoding` of 8bit, an attempt is made - to decode the original bytes using the ``charset`` specified by the - :mailheader:`Content-Type` header, using the ``replace`` error handler. - If no ``charset`` is specified, or if the ``charset`` given is not - recognized by the email package, the body is decoded using the default - ASCII charset. - - - .. method:: set_payload(payload, charset=None) - - Set the entire message object's payload to *payload*. It is the client's - responsibility to ensure the payload invariants. Optional *charset* sets - the message's default character set; see :meth:`set_charset` for details. - - .. method:: set_charset(charset) - - Set the character set of the payload to *charset*, which can either be a - :class:`~email.charset.Charset` instance (see :mod:`email.charset`), a - string naming a character set, or ``None``. If it is a string, it will - be converted to a :class:`~email.charset.Charset` instance. If *charset* - is ``None``, the ``charset`` parameter will be removed from the - :mailheader:`Content-Type` header (the message will not be otherwise - modified). Anything else will generate a :exc:`TypeError`. - - If there is no existing :mailheader:`MIME-Version` header one will be - added. If there is no existing :mailheader:`Content-Type` header, one - will be added with a value of :mimetype:`text/plain`. Whether the - :mailheader:`Content-Type` header already exists or not, its ``charset`` - parameter will be set to *charset.output_charset*. If - *charset.input_charset* and *charset.output_charset* differ, the payload - will be re-encoded to the *output_charset*. If there is no existing - :mailheader:`Content-Transfer-Encoding` header, then the payload will be - transfer-encoded, if needed, using the specified - :class:`~email.charset.Charset`, and a header with the appropriate value - will be added. If a :mailheader:`Content-Transfer-Encoding` header - already exists, the payload is assumed to already be correctly encoded - using that :mailheader:`Content-Transfer-Encoding` and is not modified. - - .. method:: get_charset() - - Return the :class:`~email.charset.Charset` instance associated with the - message's payload. - - The following methods implement a mapping-like interface for accessing the - message's :rfc:`2822` headers. Note that there are some semantic differences + The following methods implement the mapping-like interface for accessing the + message's headers. Note that there are some semantic differences between these methods and a normal mapping (i.e. dictionary) interface. For example, in a dictionary there are no duplicate keys, but here there may be duplicate message headers. Also, in dictionaries there is no guaranteed - order to the keys returned by :meth:`keys`, but in a :class:`Message` object, - headers are always returned in the order they appeared in the original - message, or were added to the message later. Any header deleted and then - re-added are always appended to the end of the header list. + order to the keys returned by :meth:`keys`, but in an :class:`EmailMessage` + object, headers are always returned in the order they appeared in the + original message, or in which they were added to the message later. Any + header deleted and then re-added is always appended to the end of the + header list. - These semantic differences are intentional and are biased toward maximal - convenience. + These semantic differences are intentional and are biased toward + convenience in the most common use cases. Note that in all cases, any envelope header present in the message is not included in the mapping interface. - In a model generated from bytes, any header values that (in contravention of - the RFCs) contain non-ASCII bytes will, when retrieved through this - interface, be represented as :class:`~email.header.Header` objects with - a charset of `unknown-8bit`. - .. method:: __len__() @@ -264,8 +185,8 @@ Here are the methods of the :class:`Message` class: .. method:: __contains__(name) Return true if the message object has a field named *name*. Matching is - done case-insensitively and *name* should not include the trailing colon. - Used for the ``in`` operator, e.g.:: + done without regard to case and *name* does not include the trailing + colon. Used for the ``in`` operator. For example:: if 'message-id' in myMessage: print('Message-ID:', myMessage['message-id']) @@ -273,20 +194,23 @@ Here are the methods of the :class:`Message` class: .. method:: __getitem__(name) - Return the value of the named header field. *name* should not include the + Return the value of the named header field. *name* does not include the colon field separator. If the header is missing, ``None`` is returned; a :exc:`KeyError` is never raised. Note that if the named field appears more than once in the message's headers, exactly which of those field values will be returned is undefined. Use the :meth:`get_all` method to get the values of all the - extant named headers. + extant headers named *name*. + + Using the standard (non-``compat32``) policies, the returned value is an + instance of a subclass of :class:`email.headerregistry.BaseHeader`. .. method:: __setitem__(name, val) Add a header to the message with field name *name* and value *val*. The - field is appended to the end of the message's existing fields. + field is appended to the end of the message's existing headers. Note that this does *not* overwrite or delete any existing header with the same name. If you want to ensure that the new header is the only one present in the @@ -295,6 +219,13 @@ Here are the methods of the :class:`Message` class: del msg['subject'] msg['subject'] = 'Python roolz!' + If the :mod:`policy` defines certain haders to be unique (as the standard + policies do), this method may raise a :exc:`ValueError` when an attempt + is made to assign a value to such a header when one already exists. This + behavior is intentional for consistency's sake, but do not depend on it + as we may choose to make such assignments do an automatic deletion of the + existing header in the future. + .. method:: __delitem__(name) @@ -323,9 +254,10 @@ Here are the methods of the :class:`Message` class: Return the value of the named header field. This is identical to :meth:`__getitem__` except that optional *failobj* is returned if the - named header is missing (defaults to ``None``). + named header is missing (*failobj* defaults to ``None``). + - Here are some additional useful methods: + Here are some additional useful header related methods: .. method:: get_all(name, failobj=None) @@ -346,17 +278,19 @@ Here are the methods of the :class:`Message` class: taken as the parameter name, with underscores converted to dashes (since dashes are illegal in Python identifiers). Normally, the parameter will be added as ``key="value"`` unless the value is ``None``, in which case - only the key will be added. If the value contains non-ASCII characters, - it can be specified as a three tuple in the format - ``(CHARSET, LANGUAGE, VALUE)``, where ``CHARSET`` is a string naming the - charset to be used to encode the value, ``LANGUAGE`` can usually be set - to ``None`` or the empty string (see :rfc:`2231` for other possibilities), - and ``VALUE`` is the string value containing non-ASCII code points. If - a three tuple is not passed and the value contains non-ASCII characters, - it is automatically encoded in :rfc:`2231` format using a ``CHARSET`` - of ``utf-8`` and a ``LANGUAGE`` of ``None``. - - Here's an example:: + only the key will be added. + + If the value contains non-ASCII characters, the charset and language may + be explicitly controlled by specifing the value as a three tuple in the + format ``(CHARSET, LANGUAGE, VALUE)``, where ``CHARSET`` is a string + naming the charset to be used to encode the value, ``LANGUAGE`` can + usually be set to ``None`` or the empty string (see :rfc:`2231` for other + possibilities), and ``VALUE`` is the string value containing non-ASCII + code points. If a three tuple is not passed and the value contains + non-ASCII characters, it is automatically encoded in :rfc:`2231` format + using a ``CHARSET`` of ``utf-8`` and a ``LANGUAGE`` of ``None``. + + Here is an example:: msg.add_header('Content-Disposition', 'attachment', filename='bud.gif') @@ -364,37 +298,35 @@ Here are the methods of the :class:`Message` class: Content-Disposition: attachment; filename="bud.gif" - An example with non-ASCII characters:: + An example of the extended interface with non-ASCII characters:: msg.add_header('Content-Disposition', 'attachment', filename=('iso-8859-1', '', 'Fußballer.ppt')) - Which produces :: - - Content-Disposition: attachment; filename*="iso-8859-1''Fu%DFballer.ppt" - .. method:: replace_header(_name, _value) Replace a header. Replace the first header found in the message that - matches *_name*, retaining header order and field name case. If no - matching header was found, a :exc:`KeyError` is raised. + matches *_name*, retaining header order and field name case of the + original header. If no matching header is found, raise a + :exc:`KeyError`. .. method:: get_content_type() - Return the message's content type. The returned string is coerced to - lower case of the form :mimetype:`maintype/subtype`. If there was no - :mailheader:`Content-Type` header in the message the default type as given - by :meth:`get_default_type` will be returned. Since according to - :rfc:`2045`, messages always have a default type, :meth:`get_content_type` - will always return a value. + Return the message's content type, coerced to lower case of the form + :mimetype:`maintype/subtype`. If there is no :mailheader:`Content-Type` + header in the message return the value returned by + :meth:`get_default_type`. If the :mailheader:`Content-Type` header is + invalid, return ``text/plain``. - :rfc:`2045` defines a message's default type to be :mimetype:`text/plain` - unless it appears inside a :mimetype:`multipart/digest` container, in - which case it would be :mimetype:`message/rfc822`. If the - :mailheader:`Content-Type` header has an invalid type specification, - :rfc:`2045` mandates that the default type be :mimetype:`text/plain`. + (According to :rfc:`2045`, messages always have a default type, + :meth:`get_content_type` will always return a value. :rfc:`2045` defines + a message's default type to be :mimetype:`text/plain` unless it appears + inside a :mimetype:`multipart/digest` container, in which case it would + be :mimetype:`message/rfc822`. If the :mailheader:`Content-Type` header + has an invalid type specification, :rfc:`2045` mandates that the default + type be :mimetype:`text/plain`.) .. method:: get_content_maintype() @@ -420,81 +352,41 @@ Here are the methods of the :class:`Message` class: .. method:: set_default_type(ctype) Set the default content type. *ctype* should either be - :mimetype:`text/plain` or :mimetype:`message/rfc822`, although this is not - enforced. The default content type is not stored in the - :mailheader:`Content-Type` header. - - - .. method:: get_params(failobj=None, header='content-type', unquote=True) - - Return the message's :mailheader:`Content-Type` parameters, as a list. - The elements of the returned list are 2-tuples of key/value pairs, as - split on the ``'='`` sign. The left hand side of the ``'='`` is the key, - while the right hand side is the value. If there is no ``'='`` sign in - the parameter the value is the empty string, otherwise the value is as - described in :meth:`get_param` and is unquoted if optional *unquote* is - ``True`` (the default). - - Optional *failobj* is the object to return if there is no - :mailheader:`Content-Type` header. Optional *header* is the header to - search instead of :mailheader:`Content-Type`. - - - .. method:: get_param(param, failobj=None, header='content-type', unquote=True) - - Return the value of the :mailheader:`Content-Type` header's parameter - *param* as a string. If the message has no :mailheader:`Content-Type` - header or if there is no such parameter, then *failobj* is returned - (defaults to ``None``). - - Optional *header* if given, specifies the message header to use instead of - :mailheader:`Content-Type`. - - Parameter keys are always compared case insensitively. The return value - can either be a string, or a 3-tuple if the parameter was :rfc:`2231` - encoded. When it's a 3-tuple, the elements of the value are of the form - ``(CHARSET, LANGUAGE, VALUE)``. Note that both ``CHARSET`` and - ``LANGUAGE`` can be ``None``, in which case you should consider ``VALUE`` - to be encoded in the ``us-ascii`` charset. You can usually ignore - ``LANGUAGE``. - - If your application doesn't care whether the parameter was encoded as in - :rfc:`2231`, you can collapse the parameter value by calling - :func:`email.utils.collapse_rfc2231_value`, passing in the return value - from :meth:`get_param`. This will return a suitably decoded Unicode - string when the value is a tuple, or the original string unquoted if it - isn't. For example:: - - rawparam = msg.get_param('foo') - param = email.utils.collapse_rfc2231_value(rawparam) - - In any case, the parameter value (either the returned string, or the - ``VALUE`` item in the 3-tuple) is always unquoted, unless *unquote* is set - to ``False``. + :mimetype:`text/plain` or :mimetype:`message/rfc822`, although this is + not enforced. The default content type is not stored in the + :mailheader:`Content-Type` header, so it only affects the return value of + the ``get_content_type`` methods when no :mailheader:`Content-Type` + header is present in the message. .. method:: set_param(param, value, header='Content-Type', requote=True, \ charset=None, language='', replace=False) Set a parameter in the :mailheader:`Content-Type` header. If the - parameter already exists in the header, its value will be replaced with - *value*. If the :mailheader:`Content-Type` header as not yet been defined - for this message, it will be set to :mimetype:`text/plain` and the new - parameter value will be appended as per :rfc:`2045`. - - Optional *header* specifies an alternative header to - :mailheader:`Content-Type`, and all parameters will be quoted as necessary - unless optional *requote* is ``False`` (the default is ``True``). - - If optional *charset* is specified, the parameter will be encoded - according to :rfc:`2231`. Optional *language* specifies the RFC 2231 - language, defaulting to the empty string. Both *charset* and *language* - should be strings. + parameter already exists in the header, replace its value with *value*. + When *header* is ``Content-Type`` (the default) and the header does not + yet exist in the message, add it, set its value to + :mimetype:`text/plain`, and append the new parameter value. Optional + *header* specifies an alternative header to :mailheader:`Content-Type`. + + If the value contains non-ASCII characters, the charset and language may + be explicity specified using the optional *charset* and *language* + parameters. Optional *language* specifies the :rfc:`2231` language, + defaulting to the empty string. Both *charset* and *language* should be + strings. The default is to use the ``utf8`` *charset* and ``None`` for + the *language*. If *replace* is ``False`` (the default) the header is moved to the end of the list of headers. If *replace* is ``True``, the header will be updated in place. + Use of the *requote* parameter with :class:`EmailMessage` objects is + deprecated. + + Note that existing parameter values of headers may be accessed through + the :attr:`~email.headerregistry.BaseHeader.params` attribute of the + header value (for example, ``msg['Content-Type'].params['charset']``. + .. versionchanged:: 3.4 ``replace`` keyword was added. @@ -502,25 +394,11 @@ Here are the methods of the :class:`Message` class: Remove the given parameter completely from the :mailheader:`Content-Type` header. The header will be re-written in place without the parameter or - its value. All values will be quoted as necessary unless *requote* is - ``False`` (the default is ``True``). Optional *header* specifies an - alternative to :mailheader:`Content-Type`. - - - .. method:: set_type(type, header='Content-Type', requote=True) - - Set the main type and subtype for the :mailheader:`Content-Type` - header. *type* must be a string in the form :mimetype:`maintype/subtype`, - otherwise a :exc:`ValueError` is raised. - - This method replaces the :mailheader:`Content-Type` header, keeping all - the parameters in place. If *requote* is ``False``, this leaves the - existing header's quoting as is, otherwise the parameters will be quoted - (the default). + its value. Optional *header* specifies an alternative to + :mailheader:`Content-Type`. - An alternative header can be specified in the *header* argument. When the - :mailheader:`Content-Type` header is set a :mailheader:`MIME-Version` - header is also added. + Use of the *requote* parameter with :class:`EmailMessage` objects is + deprecated. .. method:: get_filename(failobj=None) @@ -549,12 +427,11 @@ Here are the methods of the :class:`Message` class: necessary. A :exc:`~email.errors.HeaderParseError` is raised if the message object has no :mailheader:`Content-Type` header. - Note that using this method is subtly different than deleting the old + Note that using this method is subtly different from deleting the old :mailheader:`Content-Type` header and adding a new one with the new boundary via :meth:`add_header`, because :meth:`set_boundary` preserves the order of the :mailheader:`Content-Type` header in the list of - headers. However, it does *not* preserve any continuation lines which may - have been present in the original :mailheader:`Content-Type` header. + headers. .. method:: get_content_charset(failobj=None) @@ -563,9 +440,6 @@ Here are the methods of the :class:`Message` class: coerced to lower case. If there is no :mailheader:`Content-Type` header, or if that header has no ``charset`` parameter, *failobj* is returned. - Note that this method differs from :meth:`get_charset` which returns the - :class:`~email.charset.Charset` instance for the default encoding of the message body. - .. method:: get_charsets(failobj=None) @@ -575,10 +449,19 @@ Here are the methods of the :class:`Message` class: Each item in the list will be a string which is the value of the ``charset`` parameter in the :mailheader:`Content-Type` header for the - represented subpart. However, if the subpart has no - :mailheader:`Content-Type` header, no ``charset`` parameter, or is not of - the :mimetype:`text` main MIME type, then that item in the returned list - will be *failobj*. + represented subpart. If the subpart has no :mailheader:`Content-Type` + header, no ``charset`` parameter, or is not of the :mimetype:`text` main + MIME type, then that item in the returned list will be *failobj*. + + + .. method:: is_attachment + + Return ``True`` if there is a :mailheader:`Content-Disposition` header + and its (case insensitive) value is ``attachment``, ``False`` otherwise. + + .. versionchanged:: 3.4.2 + is_attachment is now a method instead of a property, for consistency + with :meth:`~email.message.Message.is_multipart`. .. method:: get_content_disposition() @@ -590,6 +473,11 @@ Here are the methods of the :class:`Message` class: .. versionadded:: 3.5 + + The following methods relate to interrogating and manipulating the content + (payload) of the message. + + .. method:: walk() The :meth:`walk` method is an all-purpose generator which can be used to @@ -651,8 +539,169 @@ Here are the methods of the :class:`Message` class: into the subparts. - :class:`Message` objects can also optionally contain two instance attributes, - which can be used when generating the plain text of a MIME message. + .. method:: get_body(preferencelist=('related', 'html', 'plain')) + + Return the MIME part that is the best candidate to be the "body" of the + message. + + *preferencelist* must be a sequence of strings from the set ``related``, + ``html``, and ``plain``, and indicates the order of preference for the + content type of the part returned. + + Start looking for candidate matches with the object on which the + ``get_body`` method is called. + + If ``related`` is not included in *preferencelist*, consider the root + part (or subpart of the root part) of any related encountered as a + candidate if the (sub-)part matches a preference. + + When encountering a ``multipart/related``, check the ``start`` parameter + and if a part with a matching :mailheader:`Content-ID` is found, consider + only it when looking for candidate matches. Otherwise consider only the + first (default root) part of the ``multipart/related``. + + If a part has a :mailheader:`Content-Disposition` header, only consider + the part a candidate match if the value of the header is ``inline``. + + If none of the candidates matches any of the preferences in + *preferneclist*, return ``None``. + + Notes: (1) For most applications the only *preferencelist* combinations + that really make sense are ``('plain',)``, ``('html', 'plain')``, and the + default ``('related', 'html', 'plain')``. (2) Because matching starts + with the object on which ``get_body`` is called, calling ``get_body`` on + a ``multipart/related`` will return the object itself unless + *preferencelist* has a non-default value. (3) Messages (or message parts) + that do not specify a :mailheader:`Content-Type` or whose + :mailheader:`Content-Type` header is invalid will be treated as if they + are of type ``text/plain``, which may occasionally cause ``get_body`` to + return unexpected results. + + + .. method:: iter_attachments() + + Return an iterator over all of the immediate sub-parts of the message + that are not candidate "body" parts. That is, skip the first occurrence + of each of ``text/plain``, ``text/html``, ``multipart/related``, or + ``multipart/alternative`` (unless they are explicitly marked as + attachments via :mailheader:`Content-Disposition: attachment`), and + return all remaining parts. When applied directly to a + ``multipart/related``, return an iterator over the all the related parts + except the root part (ie: the part pointed to by the ``start`` parameter, + or the first part if there is no ``start`` parameter or the ``start`` + parameter doesn't match the :mailheader:`Content-ID` of any of the + parts). When applied directly to a ``multipart/alternative`` or a + non-``multipart``, return an empty iterator. + + + .. method:: iter_parts() + + Return an iterator over all of the immediate sub-parts of the message, + which will be empty for a non-``multipart``. (See also + :meth:`~email.message.EmailMessage.walk`.) + + + .. method:: get_content(*args, content_manager=None, **kw) + + Call the :meth:`~email.contentmanager.ContentManager.get_content` method + of the *content_manager*, passing self as the message object, and passing + along any other arguments or keywords as additional arguments. If + *content_manager* is not specified, use the ``content_manager`` specified + by the current :mod:`~email.policy`. + + + .. method:: set_content(*args, content_manager=None, **kw) + + Call the :meth:`~email.contentmanager.ContentManager.set_content` method + of the *content_manager*, passing self as the message object, and passing + along any other arguments or keywords as additional arguments. If + *content_manager* is not specified, use the ``content_manager`` specified + by the current :mod:`~email.policy`. + + + .. method:: make_related(boundary=None) + + Convert a non-``multipart`` message into a ``multipart/related`` message, + moving any existing :mailheader:`Content-` headers and payload into a + (new) first part of the ``multipart``. If *boundary* is specified, use + it as the boundary string in the multipart, otherwise leave the boundary + to be automatically created when it is needed (for example, when the + message is serialized). + + + .. method:: make_alternative(boundary=None) + + Convert a non-``multipart`` or a ``multipart/related`` into a + ``multipart/alternative``, moving any existing :mailheader:`Content-` + headers and payload into a (new) first part of the ``multipart``. If + *boundary* is specified, use it as the boundary string in the multipart, + otherwise leave the boundary to be automatically created when it is + needed (for example, when the message is serialized). + + + .. method:: make_mixed(boundary=None) + + Convert a non-``multipart``, a ``multipart/related``, or a + ``multipart-alternative`` into a ``multipart/mixed``, moving any existing + :mailheader:`Content-` headers and payload into a (new) first part of the + ``multipart``. If *boundary* is specified, use it as the boundary string + in the multipart, otherwise leave the boundary to be automatically + created when it is needed (for example, when the message is serialized). + + + .. method:: add_related(*args, content_manager=None, **kw) + + If the message is a ``multipart/related``, create a new message + object, pass all of the arguments to its :meth:`set_content` method, + and :meth:`~email.message.Message.attach` it to the ``multipart``. If + the message is a non-``multipart``, call :meth:`make_related` and then + proceed as above. If the message is any other type of ``multipart``, + raise a :exc:`TypeError`. If *content_manager* is not specified, use + the ``content_manager`` specified by the current :mod:`~email.policy`. + If the added part has no :mailheader:`Content-Disposition` header, + add one with the value ``inline``. + + + .. method:: add_alternative(*args, content_manager=None, **kw) + + If the message is a ``multipart/alternative``, create a new message + object, pass all of the arguments to its :meth:`set_content` method, and + :meth:`~email.message.Message.attach` it to the ``multipart``. If the + message is a non-``multipart`` or ``multipart/related``, call + :meth:`make_alternative` and then proceed as above. If the message is + any other type of ``multipart``, raise a :exc:`TypeError`. If + *content_manager* is not specified, use the ``content_manager`` specified + by the current :mod:`~email.policy`. + + + .. method:: add_attachment(*args, content_manager=None, **kw) + + If the message is a ``multipart/mixed``, create a new message object, + pass all of the arguments to its :meth:`set_content` method, and + :meth:`~email.message.Message.attach` it to the ``multipart``. If the + message is a non-``multipart``, ``multipart/related``, or + ``multipart/alternative``, call :meth:`make_mixed` and then proceed as + above. If *content_manager* is not specified, use the ``content_manager`` + specified by the current :mod:`~email.policy`. If the added part + has no :mailheader:`Content-Disposition` header, add one with the value + ``attachment``. This method can be used both for explicit attachments + (:mailheader:`Content-Disposition: attachment` and ``inline`` attachments + (:mailheader:`Content-Disposition: inline`), by passing appropriate + options to the ``content_manager``. + + + .. method:: clear() + + Remove the payload and all of the headers. + + + .. method:: clear_content() + + Remove the payload and all of the :exc:`Content-` headers, leaving + all other headers intact and in their original order. + + + :class:`EmailMessage` objects have the following instance attributes: .. attribute:: preamble @@ -682,11 +731,8 @@ Here are the methods of the :class:`Message` class: The *epilogue* attribute acts the same way as the *preamble* attribute, except that it contains text that appears between the last boundary and - the end of the message. - - You do not need to set the epilogue to the empty string in order for the - :class:`~email.generator.Generator` to print a newline at the end of the - file. + the end of the message. As with the :attr:`~EmailMessage.preamble`, + if there is no epilog text this attribute will be ``None``. .. attribute:: defects @@ -694,3 +740,11 @@ Here are the methods of the :class:`Message` class: The *defects* attribute contains a list of all the problems found when parsing this message. See :mod:`email.errors` for a detailed description of the possible parsing defects. + + +.. class:: MIMEPart(policy=default) + + This class represents a subpart of a MIME message. It is identical to + :class:`EmailMessage`, except that no :mailheader:`MIME-Version` headers are + added when :meth:`~EmailMessage.set_content` is called, since sub-parts do + not need their own :mailheader:`MIME-Version` headers. diff --git a/Doc/library/email.mime.rst b/Doc/library/email.mime.rst index 165011d..d9dae9f 100644 --- a/Doc/library/email.mime.rst +++ b/Doc/library/email.mime.rst @@ -8,6 +8,11 @@ -------------- +This module is part of the legacy (``Compat32``) email API. Its functionality +is partially replaced by the :mod:`~email.contentmanager` in the new API, but +in certain applications these classes may still be useful, even in non-legacy +code. + Ordinarily, you get a message object structure by passing a file or some text to a parser, which parses the text and returns the root message object. However you can also build a complete message structure from scratch, or even individual diff --git a/Doc/library/email.parser.rst b/Doc/library/email.parser.rst index b8eb7c5..4dbad49 100644 --- a/Doc/library/email.parser.rst +++ b/Doc/library/email.parser.rst @@ -8,210 +8,219 @@ -------------- -Message object structures can be created in one of two ways: they can be created -from whole cloth by instantiating :class:`~email.message.Message` objects and -stringing them together via :meth:`~email.message.Message.attach` and -:meth:`~email.message.Message.set_payload` calls, or they -can be created by parsing a flat text representation of the email message. +Message object structures can be created in one of two ways: they can be +created from whole cloth by creating an :class:`~email.message.EmailMessage` +object, adding headers using the dictionary interface, and adding payload(s) +using :meth:`~email.message.EmailMessage.set_content` and related methods, or +they can be created by parsing a serialized representation of the email +message. The :mod:`email` package provides a standard parser that understands most email -document structures, including MIME documents. You can pass the parser a string -or a file object, and the parser will return to you the root -:class:`~email.message.Message` instance of the object structure. For simple, -non-MIME messages the payload of this root object will likely be a string -containing the text of the message. For MIME messages, the root object will -return ``True`` from its :meth:`~email.message.Message.is_multipart` method, and -the subparts can be accessed via the :meth:`~email.message.Message.get_payload` -and :meth:`~email.message.Message.walk` methods. - -There are actually two parser interfaces available for use, the classic -:class:`Parser` API and the incremental :class:`FeedParser` API. The classic -:class:`Parser` API is fine if you have the entire text of the message in memory -as a string, or if the entire message lives in a file on the file system. -:class:`FeedParser` is more appropriate for when you're reading the message from -a stream which might block waiting for more input (e.g. reading an email message -from a socket). The :class:`FeedParser` can consume and parse the message -incrementally, and only returns the root object when you close the parser [#]_. +document structures, including MIME documents. You can pass the parser a +bytes, string or file object, and the parser will return to you the root +:class:`~email.message.EmailMessage` instance of the object structure. For +simple, non-MIME messages the payload of this root object will likely be a +string containing the text of the message. For MIME messages, the root object +will return ``True`` from its :meth:`~email.message.EmailMessage.is_multipart` +method, and the subparts can be accessed via the payload manipulation methods, +such as :meth:`~email.message.EmailMessage.get_body`, +:meth:`~email.message.EmailMessage.iter_parts`, and +:meth:`~email.message.EmailMessage.walk`. + +There are actually two parser interfaces available for use, the :class:`Parser` +API and the incremental :class:`FeedParser` API. The :class:`Parser` API is +most useful if you have the entire text of the message in memory, or if the +entire message lives in a file on the file system. :class:`FeedParser` is more +appropriate when you are reading the message from a stream which might block +waiting for more input (such as reading an email message from a socket). The +:class:`FeedParser` can consume and parse the message incrementally, and only +returns the root object when you close the parser. Note that the parser can be extended in limited ways, and of course you can -implement your own parser completely from scratch. There is no magical -connection between the :mod:`email` package's bundled parser and the -:class:`~email.message.Message` class, so your custom parser can create message -object trees any way it finds necessary. +implement your own parser completely from scratch. All of the logic that +connects the :mod:`email` package's bundled parser and the +:class:`~email.message.EmailMessage` class is embodied in the :mod:`policy` +class, so a custom parser can create message object trees any way it finds +necessary by implementing custom versions of the appropriate :mod:`policy` +methods. FeedParser API ^^^^^^^^^^^^^^ -The :class:`FeedParser`, imported from the :mod:`email.feedparser` module, -provides an API that is conducive to incremental parsing of email messages, such -as would be necessary when reading the text of an email message from a source -that can block (e.g. a socket). The :class:`FeedParser` can of course be used -to parse an email message fully contained in a string or a file, but the classic -:class:`Parser` API may be more convenient for such use cases. The semantics -and results of the two parser APIs are identical. - -The :class:`FeedParser`'s API is simple; you create an instance, feed it a bunch -of text until there's no more to feed it, then close the parser to retrieve the -root message object. The :class:`FeedParser` is extremely accurate when parsing -standards-compliant messages, and it does a very good job of parsing -non-compliant messages, providing information about how a message was deemed -broken. It will populate a message object's *defects* attribute with a list of -any problems it found in a message. See the :mod:`email.errors` module for the +The :class:`BytesFeedParser`, imported from the :mod:`email.feedparser` module, +provides an API that is conducive to incremental parsing of email messages, +such as would be necessary when reading the text of an email message from a +source that can block (such as a socket). The :class:`BytesFeedParser` can of +course be used to parse an email message fully contained in a :term:`bytes-like +object`, string, or file, but the :class:`BytesParser` API may be more +convenient for such use cases. The semantics and results of the two parser +APIs are identical. + +The :class:`BytesFeedParser`'s API is simple; you create an instance, feed it a +bunch of bytes until there's no more to feed it, then close the parser to +retrieve the root message object. The :class:`BytesFeedParser` is extremely +accurate when parsing standards-compliant messages, and it does a very good job +of parsing non-compliant messages, providing information about how a message +was deemed broken. It will populate a message object's +:attr:`~email.message.EmailMessage.defects` attribute with a list of any +problems it found in a message. See the :mod:`email.errors` module for the list of defects that it can find. -Here is the API for the :class:`FeedParser`: +Here is the API for the :class:`BytesFeedParser`: -.. class:: FeedParser(_factory=email.message.Message, *, policy=policy.compat32) +.. class:: BytesFeedParser(_factory=None, *, policy=policy.compat32) - Create a :class:`FeedParser` instance. Optional *_factory* is a no-argument - callable that will be called whenever a new message object is needed. It - defaults to the :class:`email.message.Message` class. + Create a :class:`BytesFeedParser` instance. Optional *_factory* is a + no-argument callable; if not specified determine the default based on the + *policy*. Call *_factory* whenever a new message object is needed. - If *policy* is specified (it must be an instance of a :mod:`~email.policy` - class) use the rules it specifies to update the representation of the - message. If *policy* is not set, use the :class:`compat32 - <email.policy.Compat32>` policy, which maintains backward compatibility with - the Python 3.2 version of the email package. For more information see the + If *policy* is specified use the rules it specifies to update the + representation of the message. If *policy* is not set, use the + :class:`compat32 <email.policy.Compat32>` policy, which maintains backward + compatibility with the Python 3.2 version of the email package and provides + :class:`~email.message.Message` as the default factory. All other policies + provide :class:`~email.message.EmailMessage` as the default *_factory*. For + more information on what else *policy* controls, see the :mod:`~email.policy` documentation. + Note: **The policy keyword should always be specified**; The default will + change to :data:`email.policy.default` in a future version of Python. + + .. versionadded:: 3.2 + .. versionchanged:: 3.3 Added the *policy* keyword. + .. method:: feed(data) - Feed the :class:`FeedParser` some more data. *data* should be a string - containing one or more lines. The lines can be partial and the - :class:`FeedParser` will stitch such partial lines together properly. The - lines in the string can have any of the common three line endings, - carriage return, newline, or carriage return and newline (they can even be - mixed). + Feed the parser some more data. *data* should be a :term:`bytes-like + object` containing one or more lines. The lines can be partial and the + parser will stitch such partial lines together properly. The lines can + have any of the three common line endings: carriage return, newline, or + carriage return and newline (they can even be mixed). + .. method:: close() - Closing a :class:`FeedParser` completes the parsing of all previously fed - data, and returns the root message object. It is undefined what happens - if you feed more data to a closed :class:`FeedParser`. + Complete the parsing of all previously fed data and return the root + message object. It is undefined what happens if :meth:`~feed` is called + after this method has been called. -.. class:: BytesFeedParser(_factory=email.message.Message) +.. class:: FeedParser(_factory=None, *, policy=policy.compat32) - Works exactly like :class:`FeedParser` except that the input to the - :meth:`~FeedParser.feed` method must be bytes and not string. + Works like :class:`BytesFeedParser` except that the input to the + :meth:`~BytesFeedParser.feed` method must be a string. This is of limited + utility, since the only way for such a message to be valid is for it to + contain only ASCII text or, if :attr:`~email.policy.Policy.utf8` is + ``True``, no binary attachments. - .. versionadded:: 3.2 + .. versionchanged:: 3.3 Added the *policy* keyword. -Parser class API -^^^^^^^^^^^^^^^^ +Parser API +^^^^^^^^^^ -The :class:`Parser` class, imported from the :mod:`email.parser` module, +The :class:`BytesParser` class, imported from the :mod:`email.parser` module, provides an API that can be used to parse a message when the complete contents -of the message are available in a string or file. The :mod:`email.parser` -module also provides header-only parsers, called :class:`HeaderParser` and -:class:`BytesHeaderParser`, which can be used if you're only interested in the -headers of the message. :class:`HeaderParser` and :class:`BytesHeaderParser` +of the message are available in a :term:`bytes-like object` or file. The +:mod:`email.parser` module also provides :class:`Parser` for parsing strings, +and header-only parsers, :class:`BytesHeaderParser` and +:class:`HeaderParser`, which can be used if you're only interested in the +headers of the message. :class:`BytesHeaderParser` and :class:`HeaderParser` can be much faster in these situations, since they do not attempt to parse the -message body, instead setting the payload to the raw body as a string. They -have the same API as the :class:`Parser` and :class:`BytesParser` classes. +message body, instead setting the payload to the raw body. -.. versionadded:: 3.3 - The BytesHeaderParser class. +.. class:: BytesParser(_class=None, *, policy=policy.compat32) -.. class:: Parser(_class=email.message.Message, *, policy=policy.compat32) + Create a :class:`BytesParser` instance. The *_class* and *policy* + arguments have the same meaning and sematnics as the *_factory* + and *policy* arguments of :class:`BytesFeedParser`. - The constructor for the :class:`Parser` class takes an optional argument - *_class*. This must be a callable factory (such as a function or a class), and - it is used whenever a sub-message object needs to be created. It defaults to - :class:`~email.message.Message` (see :mod:`email.message`). The factory will - be called without arguments. - - If *policy* is specified (it must be an instance of a :mod:`~email.policy` - class) use the rules it specifies to update the representation of the - message. If *policy* is not set, use the :class:`compat32 - <email.policy.Compat32>` policy, which maintains backward compatibility with - the Python 3.2 version of the email package. For more information see the - :mod:`~email.policy` documentation. + Note: **The policy keyword should always be specified**; The default will + change to :data:`email.policy.default` in a future version of Python. .. versionchanged:: 3.3 Removed the *strict* argument that was deprecated in 2.4. Added the *policy* keyword. - The other public :class:`Parser` methods are: - .. method:: parse(fp, headersonly=False) - Read all the data from the file-like object *fp*, parse the resulting - text, and return the root message object. *fp* must support both the - :meth:`~io.TextIOBase.readline` and the :meth:`~io.TextIOBase.read` - methods on file-like objects. + Read all the data from the binary file-like object *fp*, parse the + resulting bytes, and return the message object. *fp* must support + both the :meth:`~io.IOBase.readline` and the :meth:`~io.IOBase.read` + methods. - The text contained in *fp* must be formatted as a block of :rfc:`2822` + The bytes contained in *fp* must be formatted as a block of :rfc:`5322` + (or, if :attr:`~email.policy.Policy.utf8` is ``True``, :rfc:`6532`) style headers and header continuation lines, optionally preceded by an envelope header. The header block is terminated either by the end of the data or by a blank line. Following the header block is the body of the - message (which may contain MIME-encoded subparts). + message (which may contain MIME-encoded subparts, including subparts + with a :mailheader:`Content-Transfer-Encoding` of ``8bit``. Optional *headersonly* is a flag specifying whether to stop parsing after reading the headers or not. The default is ``False``, meaning it parses the entire contents of the file. - .. method:: parsestr(text, headersonly=False) - Similar to the :meth:`parse` method, except it takes a string object - instead of a file-like object. Calling this method on a string is exactly - equivalent to wrapping *text* in a :class:`~io.StringIO` instance first and - calling :meth:`parse`. + .. method:: parsebytes(bytes, headersonly=False) + + Similar to the :meth:`parse` method, except it takes a :term:`bytes-like + object` instead of a file-like object. Calling this method on a + :term:`bytes-like object` is equivalent to wrapping *bytes* in a + :class:`~io.BytesIO` instance first and calling :meth:`parse`. Optional *headersonly* is as with the :meth:`parse` method. + .. versionadded:: 3.2 -.. class:: BytesParser(_class=email.message.Message, *, policy=policy.compat32) - This class is exactly parallel to :class:`Parser`, but handles bytes input. - The *_class* and *strict* arguments are interpreted in the same way as for - the :class:`Parser` constructor. +.. class:: BytesHeaderParser(_class=None, *, policy=policy.compat32) + + Exactly like :class:`BytesParser`, except that *headersonly* + defaults to ``True``. + + .. versionadded:: 3.3 - If *policy* is specified (it must be an instance of a :mod:`~email.policy` - class) use the rules it specifies to update the representation of the - message. If *policy* is not set, use the :class:`compat32 - <email.policy.Compat32>` policy, which maintains backward compatibility with - the Python 3.2 version of the email package. For more information see the - :mod:`~email.policy` documentation. + +.. class:: Parser(_class=None, *, policy=policy.compat32) + + This class is parallel to :class:`BytesParser`, but handles string input. .. versionchanged:: 3.3 Removed the *strict* argument. Added the *policy* keyword. + .. method:: parse(fp, headersonly=False) - Read all the data from the binary file-like object *fp*, parse the - resulting bytes, and return the message object. *fp* must support - both the :meth:`~io.IOBase.readline` and the :meth:`~io.IOBase.read` - methods on file-like objects. + Read all the data from the text-mode file-like object *fp*, parse the + resulting text, and return the root message object. *fp* must support + both the :meth:`~io.TextIOBase.readline` and the + :meth:`~io.TextIOBase.read` methods on file-like objects. - The bytes contained in *fp* must be formatted as a block of :rfc:`2822` - style headers and header continuation lines, optionally preceded by an - envelope header. The header block is terminated either by the end of the - data or by a blank line. Following the header block is the body of the - message (which may contain MIME-encoded subparts, including subparts - with a :mailheader:`Content-Transfer-Encoding` of ``8bit``. + Other than the text mode requirement, this method operates like + :meth:`BytesParser.parse`. - Optional *headersonly* is a flag specifying whether to stop parsing after - reading the headers or not. The default is ``False``, meaning it parses - the entire contents of the file. - .. method:: parsebytes(text, headersonly=False) + .. method:: parsestr(text, headersonly=False) - Similar to the :meth:`parse` method, except it takes a :term:`bytes-like - object` instead of a file-like object. Calling this method is equivalent - to wrapping *text* in a :class:`~io.BytesIO` instance first and calling - :meth:`parse`. + Similar to the :meth:`parse` method, except it takes a string object + instead of a file-like object. Calling this method on a string is + equivalent to wrapping *text* in a :class:`~io.StringIO` instance first + and calling :meth:`parse`. Optional *headersonly* is as with the :meth:`parse` method. - .. versionadded:: 3.2 + +.. class:: HeaderParser(_class=None, *, policy=policy.compat32) + + Exactly like :class:`Parser`, except that *headersonly* + defaults to ``True``. Since creating a message object structure from a string or a file object is such @@ -220,55 +229,60 @@ in the top-level :mod:`email` package namespace. .. currentmodule:: email -.. function:: message_from_string(s, _class=email.message.Message, *, \ - policy=policy.compat32) - Return a message object structure from a string. This is exactly equivalent to - ``Parser().parsestr(s)``. *_class* and *policy* are interpreted as - with the :class:`~email.parser.Parser` class constructor. +.. function:: message_from_bytes(s, _class=None, *, \ + policy=policy.compat32) + Return a message object structure from a :term:`bytes-like object`. This is + equivalent to ``BytesParser().parsebytes(s)``. Optional *_class* and + *strict* are interpreted as with the :class:`~email.parser.BytesParser` class + constructor. + + .. versionadded:: 3.2 .. versionchanged:: 3.3 Removed the *strict* argument. Added the *policy* keyword. -.. function:: message_from_bytes(s, _class=email.message.Message, *, \ - policy=policy.compat32) - Return a message object structure from a :term:`bytes-like object`. This is exactly - equivalent to ``BytesParser().parsebytes(s)``. Optional *_class* and - *strict* are interpreted as with the :class:`~email.parser.Parser` class +.. function:: message_from_binary_file(fp, _class=None, *, \ + policy=policy.compat32) + + Return a message object structure tree from an open binary :term:`file + object`. This is equivalent to ``BytesParser().parse(fp)``. *_class* and + *policy* are interpreted as with the :class:`~email.parser.BytesParser` class constructor. .. versionadded:: 3.2 .. versionchanged:: 3.3 Removed the *strict* argument. Added the *policy* keyword. -.. function:: message_from_file(fp, _class=email.message.Message, *, \ - policy=policy.compat32) - Return a message object structure tree from an open :term:`file object`. - This is exactly equivalent to ``Parser().parse(fp)``. *_class* - and *policy* are interpreted as with the :class:`~email.parser.Parser` class - constructor. +.. function:: message_from_string(s, _class=None, *, \ + policy=policy.compat32) + + Return a message object structure from a string. This is equivalent to + ``Parser().parsestr(s)``. *_class* and *policy* are interpreted as + with the :class:`~email.parser.Parser` class constructor. .. versionchanged:: 3.3 Removed the *strict* argument. Added the *policy* keyword. -.. function:: message_from_binary_file(fp, _class=email.message.Message, *, \ - policy=policy.compat32) - Return a message object structure tree from an open binary :term:`file - object`. This is exactly equivalent to ``BytesParser().parse(fp)``. - *_class* and *policy* are interpreted as with the - :class:`~email.parser.Parser` class constructor. +.. function:: message_from_file(fp, _class=None, *, \ + policy=policy.compat32) + + Return a message object structure tree from an open :term:`file object`. + This is equivalent to ``Parser().parse(fp)``. *_class* and *policy* are + interpreted as with the :class:`~email.parser.Parser` class constructor. - .. versionadded:: 3.2 .. versionchanged:: 3.3 Removed the *strict* argument. Added the *policy* keyword. -Here's an example of how you might use this at an interactive Python prompt:: + +Here's an example of how you might use :func:`message_from_bytes` at an +interactive Python prompt:: >>> import email - >>> msg = email.message_from_string(myString) # doctest: +SKIP + >>> msg = email.message_from_bytes(myBytes) # doctest: +SKIP Additional notes @@ -278,35 +292,27 @@ Here are some notes on the parsing semantics: * Most non-\ :mimetype:`multipart` type messages are parsed as a single message object with a string payload. These objects will return ``False`` for - :meth:`~email.message.Message.is_multipart`. Their - :meth:`~email.message.Message.get_payload` method will return a string object. + :meth:`~email.message.EmailMessage.is_multipart`, and + :meth:`~email.message.EmailMessage.iter_parts` will yield an empty list. * All :mimetype:`multipart` type messages will be parsed as a container message object with a list of sub-message objects for their payload. The outer container message will return ``True`` for - :meth:`~email.message.Message.is_multipart` and their - :meth:`~email.message.Message.get_payload` method will return the list of - :class:`~email.message.Message` subparts. + :meth:`~email.message.EmailMessage.is_multipart`, and + :meth:`~email.message.EmailMessage.iter_parts` will yield a list of subparts. -* Most messages with a content type of :mimetype:`message/\*` (e.g. - :mimetype:`message/delivery-status` and :mimetype:`message/rfc822`) will also be - parsed as container object containing a list payload of length 1. Their - :meth:`~email.message.Message.is_multipart` method will return ``True``. - The single element in the list payload will be a sub-message object. +* Most messages with a content type of :mimetype:`message/\*` (such as + :mimetype:`message/delivery-status` and :mimetype:`message/rfc822`) will also + be parsed as container object containing a list payload of length 1. Their + :meth:`~email.message.EmailMessage.is_multipart` method will return ``True``. + The single element yielded by :meth:`~email.message.EmailMessage.iter_parts` + will be a sub-message object. -* Some non-standards compliant messages may not be internally consistent about +* Some non-standards-compliant messages may not be internally consistent about their :mimetype:`multipart`\ -edness. Such messages may have a :mailheader:`Content-Type` header of type :mimetype:`multipart`, but their - :meth:`~email.message.Message.is_multipart` method may return ``False``. + :meth:`~email.message.EmailMessage.is_multipart` method may return ``False``. If such messages were parsed with the :class:`~email.parser.FeedParser`, they will have an instance of the :class:`~email.errors.MultipartInvariantViolationDefect` class in their *defects* attribute list. See :mod:`email.errors` for details. - -.. rubric:: Footnotes - -.. [#] As of email package version 3.0, introduced in Python 2.4, the classic - :class:`~email.parser.Parser` was re-implemented in terms of the - :class:`~email.parser.FeedParser`, so the semantics and results are - identical between the two parsers. - diff --git a/Doc/library/email.policy.rst b/Doc/library/email.policy.rst index 2a6047d..0d6c27a 100644 --- a/Doc/library/email.policy.rst +++ b/Doc/library/email.policy.rst @@ -18,9 +18,12 @@ described by the various email and MIME RFCs. However, the general format of email messages (a block of header fields each consisting of a name followed by a colon followed by a value, the whole block followed by a blank line and an arbitrary 'body'), is a format that has found utility outside of the realm of -email. Some of these uses conform fairly closely to the main RFCs, some do -not. And even when working with email, there are times when it is desirable to -break strict compliance with the RFCs. +email. Some of these uses conform fairly closely to the main email RFCs, some +do not. Even when working with email, there are times when it is desirable to +break strict compliance with the RFCs, such as generating emails that +interoperate with email servers that do not themselves follow the standards, or +that implement extensions you want to use in ways that violate the +standards. Policy objects give the email package the flexibility to handle all these disparate use cases. @@ -31,27 +34,40 @@ control the behavior of various components of the email package during use. email package to alter the default behavior. The settable values and their defaults are described below. -There is a default policy used by all classes in the email package. This -policy is named :class:`Compat32`, with a corresponding pre-defined instance -named :const:`compat32`. It provides for complete backward compatibility (in -some cases, including bug compatibility) with the pre-Python3.3 version of the -email package. +There is a default policy used by all classes in the email package. For all of +the :mod:`~email.parser` classes and the related convenience functions, and for +the :class:`~email.message.Message` class, this is the :class:`Compat32` +policy, via its corresponding pre-defined instance :const:`compat32`. This +policy provides for complete backward compatibility (in some cases, including +bug compatibility) with the pre-Python3.3 version of the email package. + +This default value for the *policy* keyword to +:class:`~email.message.EmailMessage` is the :class:`EmailPolicy` policy, via +its pre-defined instance :data:`~default`. + +When a :class:`~email.message.Message` or :class:`~email.message.EmailMessage` +object is created, it acquires a policy. If the message is created by a +:mod:`~email.parser`, a policy passed to the parser will be the policy used by +the message it creates. If the message is created by the program, then the +policy can be specified when it is created. When a message is passed to a +:mod:`~email.generator`, the generator uses the policy from the message by +default, but you can also pass a specific policy to the generator that will +override the one stored on the message object. + +The default value for the *policy* keyword for the :mod:`email.parser` classes +and the parser convenience functions **will be changing** in a future version of +Python. Therefore you should **always specify explicitly which policy you want +to use** when calling any of the classes and functions described in the +:mod:`~email.parser` module. The first part of this documentation covers the features of :class:`Policy`, an -:term:`abstract base class` that defines the features that are common to all +:term:`abstract base class` that defines the features that are common to all policy objects, including :const:`compat32`. This includes certain hook methods that are called internally by the email package, which a custom policy -could override to obtain different behavior. - -When a :class:`~email.message.Message` object is created, it acquires a policy. -By default this will be :const:`compat32`, but a different policy can be -specified. If the ``Message`` is created by a :mod:`~email.parser`, a policy -passed to the parser will be the policy used by the ``Message`` it creates. If -the ``Message`` is created by the program, then the policy can be specified -when it is created. When a ``Message`` is passed to a :mod:`~email.generator`, -the generator uses the policy from the ``Message`` by default, but you can also -pass a specific policy to the generator that will override the one stored on -the ``Message`` object. +could override to obtain different behavior. The second part describes the +concrete classes :class:`EmailPolicy` and :class:`Compat32`, which implement +the hooks that provide the standard behavior and the backward compatible +behavior and features, respectively. :class:`Policy` instances are immutable, but they can be cloned, accepting the same keyword arguments as the class constructor and returning a new @@ -147,6 +163,7 @@ added matters. To illustrate:: This class defines the following properties, and thus values for the following may be passed in the constructor of any policy class: + .. attribute:: max_line_length The maximum length of any line in the serialized output, not counting the @@ -154,12 +171,14 @@ added matters. To illustrate:: ``0`` or :const:`None` indicates that no line wrapping should be done at all. + .. attribute:: linesep The string to be used to terminate lines in serialized output. The default is ``\n`` because that's the internal end-of-line discipline used by Python, though ``\r\n`` is required by the RFCs. + .. attribute:: cte_type Controls the type of Content Transfer Encodings that may be or are @@ -174,8 +193,8 @@ added matters. To illustrate:: ``8bit`` data is not constrained to be 7 bit clean. Data in headers is still required to be ASCII-only and so will be encoded (see - 'binary_fold' below for an exception), but body parts may use - the ``8bit`` CTE. + :meth:`fold_binary` and :attr:`~EmailPolicy.utf8` below for + exceptions), but body parts may use the ``8bit`` CTE. ======== =============================================================== A ``cte_type`` value of ``8bit`` only works with ``BytesGenerator``, not @@ -183,6 +202,7 @@ added matters. To illustrate:: ``Generator`` is operating under a policy that specifies ``cte_type=8bit``, it will act as if ``cte_type`` is ``7bit``. + .. attribute:: raise_on_defect If :const:`True`, any defects encountered will be raised as errors. If @@ -190,7 +210,6 @@ added matters. To illustrate:: :meth:`register_defect` method. - .. attribute:: mangle_from\_ If :const:`True`, lines starting with *"From "* in the body are @@ -201,19 +220,23 @@ added matters. To illustrate:: .. versionadded:: 3.5 The *mangle_from_* parameter. + The following :class:`Policy` method is intended to be called by code using the email library to create policy instances with custom settings: + .. method:: clone(**kw) Return a new :class:`Policy` instance whose attributes have the same values as the current instance, except where those attributes are given new values by the keyword arguments. + The remaining :class:`Policy` methods are called by the email package code, and are not intended to be called by an application using the email package. A custom policy must implement all of these methods. + .. method:: handle_defect(obj, defect) Handle a *defect* found on *obj*. When the email package calls this @@ -224,6 +247,7 @@ added matters. To illustrate:: it is ``True``, *defect* is raised as an exception. If it is ``False`` (the default), *obj* and *defect* are passed to :meth:`register_defect`. + .. method:: register_defect(obj, defect) Register a *defect* on *obj*. In the email package, *defect* will always @@ -236,14 +260,16 @@ added matters. To illustrate:: custom ``Message`` objects) should also provide such an attribute, otherwise defects in parsed messages will raise unexpected errors. + .. method:: header_max_count(name) Return the maximum allowed number of headers named *name*. - Called when a header is added to a :class:`~email.message.Message` - object. If the returned value is not ``0`` or ``None``, and there are - already a number of headers with the name *name* equal to the value - returned, a :exc:`ValueError` is raised. + Called when a header is added to an :class:`~email.message.EmailMessage` + or :class:`~email.message.Message` object. If the returned value is not + ``0`` or ``None``, and there are already a number of headers with the + name *name* greather than or equal to the value returned, a + :exc:`ValueError` is raised. Because the default behavior of ``Message.__setitem__`` is to append the value to the list of headers, it is easy to create duplicate headers @@ -255,6 +281,7 @@ added matters. To illustrate:: The default implementation returns ``None`` for all header names. + .. method:: header_source_parse(sourcelines) The email package calls this method with a list of strings, each string @@ -274,6 +301,7 @@ added matters. To illustrate:: There is no default implementation + .. method:: header_store_parse(name, value) The email package calls this method with the name and value provided by @@ -289,6 +317,7 @@ added matters. To illustrate:: There is no default implementation + .. method:: header_fetch_parse(name, value) The email package calls this method with the *name* and *value* currently @@ -304,6 +333,7 @@ added matters. To illustrate:: There is no default implementation + .. method:: fold(name, value) The email package calls this method with the *name* and *value* currently @@ -316,6 +346,7 @@ added matters. To illustrate:: *value* may contain surrogateescaped binary data. There should be no surrogateescaped binary data in the string returned by the method. + .. method:: fold_binary(name, value) The same as :meth:`fold`, except that the returned value should be a @@ -325,73 +356,6 @@ added matters. To illustrate:: converted back into binary data in the returned bytes object. -.. class:: Compat32(**kw) - - This concrete :class:`Policy` is the backward compatibility policy. It - replicates the behavior of the email package in Python 3.2. The - :mod:`~email.policy` module also defines an instance of this class, - :const:`compat32`, that is used as the default policy. Thus the default - behavior of the email package is to maintain compatibility with Python 3.2. - - The following attributes have values that are different from the - :class:`Policy` default: - - .. attribute:: mangle_from_ - - The default is ``True``. - - The class provides the following concrete implementations of the - abstract methods of :class:`Policy`: - - .. method:: header_source_parse(sourcelines) - - The name is parsed as everything up to the '``:``' and returned - unmodified. The value is determined by stripping leading whitespace off - the remainder of the first line, joining all subsequent lines together, - and stripping any trailing carriage return or linefeed characters. - - .. method:: header_store_parse(name, value) - - The name and value are returned unmodified. - - .. method:: header_fetch_parse(name, value) - - If the value contains binary data, it is converted into a - :class:`~email.header.Header` object using the ``unknown-8bit`` charset. - Otherwise it is returned unmodified. - - .. method:: fold(name, value) - - Headers are folded using the :class:`~email.header.Header` folding - algorithm, which preserves existing line breaks in the value, and wraps - each resulting line to the ``max_line_length``. Non-ASCII binary data are - CTE encoded using the ``unknown-8bit`` charset. - - .. method:: fold_binary(name, value) - - Headers are folded using the :class:`~email.header.Header` folding - algorithm, which preserves existing line breaks in the value, and wraps - each resulting line to the ``max_line_length``. If ``cte_type`` is - ``7bit``, non-ascii binary data is CTE encoded using the ``unknown-8bit`` - charset. Otherwise the original source header is used, with its existing - line breaks and any (RFC invalid) binary data it may contain. - - -An instance of :class:`Compat32` is provided as a module constant: - -.. data:: compat32 - - An instance of :class:`Compat32`, providing backward compatibility with the - behavior of the email package in Python 3.2. - - -.. note:: - - The documentation below describes new policies that are included in the - standard library on a :term:`provisional basis <provisional package>`. - Backwards incompatible changes (up to and including removal of the feature) - may occur if deemed necessary by the core developers. - .. class:: EmailPolicy(**kw) @@ -407,6 +371,11 @@ An instance of :class:`Compat32` is provided as a module constant: In addition to the settable attributes listed above that apply to all policies, this policy adds the following additional attributes: + .. versionadded:: 3.3 as a :term:`provisional feature <provisional package>`. + + .. versionchanged:: 3.6 provisional status removed. + + .. attribute:: utf8 If ``False``, follow :rfc:`5322`, supporting non-ASCII characters in @@ -415,13 +384,14 @@ An instance of :class:`Compat32` is provided as a module constant: formatted in this way may be passed to SMTP servers that support the ``SMTPUTF8`` extension (:rfc:`6531`). + .. attribute:: refold_source If the value for a header in the ``Message`` object originated from a :mod:`~email.parser` (as opposed to being set by a program), this attribute indicates whether or not a generator should refold that value - when transforming the message back into stream form. The possible values - are: + when transforming the message back into serialized form. The possible + values are: ======== =============================================================== ``none`` all source values use original folding @@ -434,23 +404,24 @@ An instance of :class:`Compat32` is provided as a module constant: The default is ``long``. + .. attribute:: header_factory A callable that takes two arguments, ``name`` and ``value``, where ``name`` is a header field name and ``value`` is an unfolded header field value, and returns a string subclass that represents that header. A default ``header_factory`` (see :mod:`~email.headerregistry`) is provided - that understands some of the :RFC:`5322` header field types. (Currently - address fields and date fields have special treatment, while all other - fields are treated as unstructured. This list will be completed before - the extension is marked stable.) + that supports custom parsing for the various address and date :RFC:`5322` + header field types, and the major MIME header field stypes. Support for + additional custom parsing will be added in the future. + .. attribute:: content_manager An object with at least two methods: get_content and set_content. When - the :meth:`~email.message.Message.get_content` or - :meth:`~email.message.Message.set_content` method of a - :class:`~email.message.Message` object is called, it calls the + the :meth:`~email.message.EmailMessage.get_content` or + :meth:`~email.message.EmailMessage.set_content` method of an + :class:`~email.message.EmailMessage` object is called, it calls the corresponding method of this object, passing it the message object as its first argument, and any arguments or keywords that were passed to it as additional arguments. By default ``content_manager`` is set to @@ -462,16 +433,22 @@ An instance of :class:`Compat32` is provided as a module constant: The class provides the following concrete implementations of the abstract methods of :class:`Policy`: + .. method:: header_max_count(name) Returns the value of the :attr:`~email.headerregistry.BaseHeader.max_count` attribute of the specialized class used to represent the header with the given name. + .. method:: header_source_parse(sourcelines) - The implementation of this method is the same as that for the - :class:`Compat32` policy. + + The name is parsed as everything up to the '``:``' and returned + unmodified. The value is determined by stripping leading whitespace off + the remainder of the first line, joining all subsequent lines together, + and stripping any trailing carriage return or linefeed characters. + .. method:: header_store_parse(name, value) @@ -482,6 +459,7 @@ An instance of :class:`Compat32` is provided as a module constant: the value. In this case a ``ValueError`` is raised if the input value contains CR or LF characters. + .. method:: header_fetch_parse(name, value) If the value has a ``name`` attribute, it is returned to unmodified. @@ -490,6 +468,7 @@ An instance of :class:`Compat32` is provided as a module constant: header object is returned. Any surrogateescaped bytes get turned into the unicode unknown-character glyph. + .. method:: fold(name, value) Header folding is controlled by the :attr:`refold_source` policy setting. @@ -508,6 +487,7 @@ An instance of :class:`Compat32` is provided as a module constant: regardless of the ``refold_source`` setting, which causes the binary data to be CTE encoded using the ``unknown-8bit`` charset. + .. method:: fold_binary(name, value) The same as :meth:`fold` if :attr:`~Policy.cte_type` is ``7bit``, except @@ -519,23 +499,27 @@ An instance of :class:`Compat32` is provided as a module constant: ``refold_header`` setting, since there is no way to know whether the binary data consists of single byte characters or multibyte characters. + The following instances of :class:`EmailPolicy` provide defaults suitable for specific application domains. Note that in the future the behavior of these instances (in particular the ``HTTP`` instance) may be adjusted to conform even more closely to the RFCs relevant to their domains. + .. data:: default An instance of ``EmailPolicy`` with all defaults unchanged. This policy uses the standard Python ``\n`` line endings rather than the RFC-correct ``\r\n``. + .. data:: SMTP Suitable for serializing messages in conformance with the email RFCs. Like ``default``, but with ``linesep`` set to ``\r\n``, which is RFC compliant. + .. data:: SMTPUTF8 The same as ``SMTP`` except that :attr:`~EmailPolicy.utf8` is ``True``. @@ -544,11 +528,13 @@ more closely to the RFCs relevant to their domains. sender or recipient addresses have non-ASCII characters (the :meth:`smtplib.SMTP.send_message` method handles this automatically). + .. data:: HTTP Suitable for serializing headers with for use in HTTP traffic. Like ``SMTP`` except that ``max_line_length`` is set to ``None`` (unlimited). + .. data:: strict Convenience instance. The same as ``default`` except that @@ -557,6 +543,7 @@ more closely to the RFCs relevant to their domains. somepolicy + policy.strict + With all of these :class:`EmailPolicies <.EmailPolicy>`, the effective API of the email package is changed from the Python 3.2 API in the following ways: @@ -573,7 +560,7 @@ the email package is changed from the Python 3.2 API in the following ways: and allowed. From the application view, this means that any header obtained through the -:class:`~email.message.Message` is a header object with extra +:class:`~email.message.EmailMessage` is a header object with extra attributes, whose string value is the fully decoded unicode value of the header. Likewise, a header may be assigned a new value, or a new header created, using a unicode string, and the policy will take care of converting @@ -581,3 +568,69 @@ the unicode string into the correct RFC encoded form. The header objects and their attributes are described in :mod:`~email.headerregistry`. + + + +.. class:: Compat32(**kw) + + This concrete :class:`Policy` is the backward compatibility policy. It + replicates the behavior of the email package in Python 3.2. The + :mod:`~email.policy` module also defines an instance of this class, + :const:`compat32`, that is used as the default policy. Thus the default + behavior of the email package is to maintain compatibility with Python 3.2. + + The following attributes have values that are different from the + :class:`Policy` default: + + + .. attribute:: mangle_from_ + + The default is ``True``. + + + The class provides the following concrete implementations of the + abstract methods of :class:`Policy`: + + + .. method:: header_source_parse(sourcelines) + + The name is parsed as everything up to the '``:``' and returned + unmodified. The value is determined by stripping leading whitespace off + the remainder of the first line, joining all subsequent lines together, + and stripping any trailing carriage return or linefeed characters. + + + .. method:: header_store_parse(name, value) + + The name and value are returned unmodified. + + + .. method:: header_fetch_parse(name, value) + + If the value contains binary data, it is converted into a + :class:`~email.header.Header` object using the ``unknown-8bit`` charset. + Otherwise it is returned unmodified. + + + .. method:: fold(name, value) + + Headers are folded using the :class:`~email.header.Header` folding + algorithm, which preserves existing line breaks in the value, and wraps + each resulting line to the ``max_line_length``. Non-ASCII binary data are + CTE encoded using the ``unknown-8bit`` charset. + + + .. method:: fold_binary(name, value) + + Headers are folded using the :class:`~email.header.Header` folding + algorithm, which preserves existing line breaks in the value, and wraps + each resulting line to the ``max_line_length``. If ``cte_type`` is + ``7bit``, non-ascii binary data is CTE encoded using the ``unknown-8bit`` + charset. Otherwise the original source header is used, with its existing + line breaks and any (RFC invalid) binary data it may contain. + + +.. data:: compat32 + + An instance of :class:`Compat32`, providing backward compatibility with the + behavior of the email package in Python 3.2. diff --git a/Doc/library/email.rst b/Doc/library/email.rst index e8bb02b..01bd380 100644 --- a/Doc/library/email.rst +++ b/Doc/library/email.rst @@ -3,50 +3,99 @@ .. module:: email :synopsis: Package supporting the parsing, manipulating, and generating - email messages, including MIME documents. - -.. moduleauthor:: Barry A. Warsaw <barry@python.org> -.. sectionauthor:: Barry A. Warsaw <barry@python.org> -.. Copyright (C) 2001-2010 Python Software Foundation + email messages. +.. moduleauthor:: Barry A. Warsaw <barry@python.org>, + R. David Murray <rdmurray@bitdance.com> +.. sectionauthor:: R. David Murray <rdmurray@bitdance.com> **Source code:** :source:`Lib/email/__init__.py` -------------- -The :mod:`email` package is a library for managing email messages, including -MIME and other :rfc:`2822`\ -based message documents. It is specifically *not* -designed to do any sending of email messages to SMTP (:rfc:`2821`), NNTP, or -other servers; those are functions of modules such as :mod:`smtplib` and -:mod:`nntplib`. The :mod:`email` package attempts to be as RFC-compliant as -possible, supporting in addition to :rfc:`2822`, such MIME-related RFCs as -:rfc:`2045`, :rfc:`2046`, :rfc:`2047`, and :rfc:`2231`. - -The primary distinguishing feature of the :mod:`email` package is that it splits -the parsing and generating of email messages from the internal *object model* -representation of email. Applications using the :mod:`email` package deal -primarily with objects; you can add sub-objects to messages, remove sub-objects -from messages, completely re-arrange the contents, etc. There is a separate -parser and a separate generator which handles the transformation from flat text -to the object model, and then back to flat text again. There are also handy -subclasses for some common MIME object types, and a few miscellaneous utilities -that help with such common tasks as extracting and parsing message field values, -creating RFC-compliant dates, etc. +The :mod:`email` package is a library for managing email messages. It is +specifically *not* designed to do any sending of email messages to SMTP +(:rfc:`2821`), NNTP, or other servers; those are functions of modules such as +:mod:`smtplib` and :mod:`nntplib`. The :mod:`email` package attempts to be as +RFC-compliant as possible, supporting :rfc:`5233` and :rfc:`6532`, as well as +such MIME-related RFCs as :rfc:`2045`, :rfc:`2046`, :rfc:`2047`, :rfc:`2183`, +and :rfc:`2231`. + +The overall structure of the email package can be divided into three major +components, plus a fourth component that controls the behavior of the other +components. + +The central component of the package is an "object model" that represents email +messages. An application interacts with the package primarily through the +object model interface defined in the :mod:`~email.message` sub-module. The +application can use this API to ask questions about an existing email, to +construct a new email, or to add or remove email subcomponents that themselves +use the same object model interface. That is, following the nature of email +messages and their MIME subcomponents, the email object model is a tree +structure of objects that all provide the :class:`~email.message.EmailMessage` +API. + +The other two major components of the package are the :mod:`~email.parser` and +the :mod:`~email.generator`. The parser takes the serialized version of an +email message (a stream of bytes) and converts it into a tree of +:class:`~email.message.EmailMessage` objects. The generator takes an +:class:`~email.message.EmailMessage` and turns it back into a serialized byte +stream. (The parser and generator also handle streams of text characters, but +this usage is discouraged as it is too easy to end up with messages that are +not valid in one way or another.) + +The control component is the :mod:`~email.policy` module. Every +:class:`~email.message.EmailMessage`, every :mod:`~email.generator`, and every +:mod:`~email.parser` has an associated :mod:`~email.policy` object that +controls its behavior. Usually an application only needs to specify the policy +when an :class:`~email.message.EmailMessage` is created, either by directly +instantiating an :class:`~email.message.EmailMessage` to create a new email, +or by parsing an input stream using a :mod:`~email.parser`. But the policy can +be changed when the message is serialized using a :mod:`~email.generator`. +This allows, for example, a generic email message to be parsed from disk, but +to serialize it using standard SMTP settings when sending it to an email +server. + +The email package does its best to hide the details of the various governing +RFCs from the application. Conceptually the application should be able to +treat the email message as a structured tree of unicode text and binary +attachments, without having to worry about how these are represented when +serialized. In practice, however, it is often necessary to be aware of at +least some of the rules governing MIME messages and their structure, +specifically the names and nature of the MIME "content types" and how they +identify multipart documents. For the most part this knowledge should only be +required for more complex applications, and even then it should only be the +high level structure in question, and not the details of how those structures +are represented. Since MIME content types are used widely in modern internet +software (not just email), this will be a familiar concept to many programmers. The following sections describe the functionality of the :mod:`email` package. -The ordering follows a progression that should be common in applications: an -email message is read as flat text from a file or other source, the text is -parsed to produce the object structure of the email message, this structure is -manipulated, and finally, the object tree is rendered back into flat text. - -It is perfectly feasible to create the object structure out of whole cloth --- -i.e. completely from scratch. From there, a similar progression can be taken as -above. - -Also included are detailed specifications of all the classes and modules that -the :mod:`email` package provides, the exception classes you might encounter -while using the :mod:`email` package, some auxiliary utilities, and a few -examples. For users of the older :mod:`mimelib` package, or previous versions -of the :mod:`email` package, a section on differences and porting is provided. +We start with the :mod:`~email.message` object model, which is the primary +interface an application will use, and follow that with the +:mod:`~email.parser` and :mod:`~email.generator` components. Then we cover the +:mod:`~email.policy` controls, which completes the treatment of the main +components of the library. + +The next three sections cover the exceptions the package may raise and the +defects (non-compliance with the RFCs) that the :mod:`~email.parser` may +detect. Then we cover the :mod:`~email.headerregistry` and the +:mod:`~email.contentmanager` sub-components, which provide tools for doing more +detailed manipulation of headers and payloads, respectively. Both of these +components contain features relevant to consuming and producing non-trivial +messages, but also document their extensibility APIs, which will be of interest +to advanced applications. + +Following those is a set of examples of using the fundamental parts of the APIs +covered in the preceding sections. + +The forgoing represent the modern (unicode friendly) API of the email package. +The remaining sections, starting with the :class:`~email.message.Message` +class, cover the legacy :data:`~email.policy.compat32` API that deals much more +directly with the details of how email messages are represented. The +:data:`~email.policy.compat32` API does *not* hide the details of the RFCs from +the application, but for applications that need to operate at that level, they +can be useful tools. This documentation is also relevant for applications that +are still using the :mod:`~email.policy.compat32` API for backward +compatibility reasons. Contents of the :mod:`email` package documentation: @@ -56,335 +105,39 @@ Contents of the :mod:`email` package documentation: email.parser.rst email.generator.rst email.policy.rst + + email.errors.rst email.headerregistry.rst email.contentmanager.rst + + email.examples.rst + + email.compat32-message.rst email.mime.rst email.header.rst email.charset.rst email.encoders.rst - email.errors.rst email.util.rst email.iterators.rst - email-examples.rst .. seealso:: Module :mod:`smtplib` - SMTP protocol client - - Module :mod:`nntplib` - NNTP protocol client - - -.. _email-pkg-history: - -Package History ---------------- - -This table describes the release history of the email package, corresponding to -the version of Python that the package was released with. For purposes of this -document, when you see a note about change or added versions, these refer to the -Python version the change was made in, *not* the email package version. This -table also describes the Python compatibility of each version of the package. - -+---------------+------------------------------+-----------------------+ -| email version | distributed with | compatible with | -+===============+==============================+=======================+ -| :const:`1.x` | Python 2.2.0 to Python 2.2.1 | *no longer supported* | -+---------------+------------------------------+-----------------------+ -| :const:`2.5` | Python 2.2.2+ and Python 2.3 | Python 2.1 to 2.5 | -+---------------+------------------------------+-----------------------+ -| :const:`3.0` | Python 2.4 and Python 2.5 | Python 2.3 to 2.6 | -+---------------+------------------------------+-----------------------+ -| :const:`4.0` | Python 2.5 to Python 2.7 | Python 2.3 to 2.7 | -+---------------+------------------------------+-----------------------+ -| :const:`5.0` | Python 3.0 and Python 3.1 | Python 3.0 to 3.2 | -+---------------+------------------------------+-----------------------+ -| :const:`5.1` | Python 3.2 | Python 3.2 | -+---------------+------------------------------+-----------------------+ - -After Version 5.1 (Python 3.2), the email package no longer has a version that -is separate from the Python version. (See the :ref:`whatsnew-index` documents -for the respective Python versions for details on changes.) - -Here are the major differences between :mod:`email` version 5.1 and -version 5.0: - -* It is once again possible to parse messages containing non-ASCII bytes, - and to reproduce such messages if the data containing the non-ASCII - bytes is not modified. - -* New functions :func:`message_from_bytes` and :func:`message_from_binary_file`, - and new classes :class:`~email.parser.BytesFeedParser` and - :class:`~email.parser.BytesParser` allow binary message data to be parsed - into model objects. - -* Given bytes input to the model, :meth:`~email.message.Message.get_payload` - will by default decode a message body that has a - :mailheader:`Content-Transfer-Encoding` of ``8bit`` using the charset - specified in the MIME headers and return the resulting string. - -* Given bytes input to the model, :class:`~email.generator.Generator` will - convert message bodies that have a :mailheader:`Content-Transfer-Encoding` of - 8bit to instead have a 7bit Content-Transfer-Encoding. - -* New class :class:`~email.generator.BytesGenerator` produces bytes - as output, preserving any unchanged non-ASCII data that was - present in the input used to build the model, including message bodies - with a :mailheader:`Content-Transfer-Encoding` of 8bit. - -Here are the major differences between :mod:`email` version 5.0 and version 4: - -* All operations are on unicode strings. Text inputs must be strings, - text outputs are strings. Outputs are limited to the ASCII character - set and so can be encoded to ASCII for transmission. Inputs are also - limited to ASCII; this is an acknowledged limitation of email 5.0 and - means it can only be used to parse email that is 7bit clean. - -Here are the major differences between :mod:`email` version 4 and version 3: - -* All modules have been renamed according to :pep:`8` standards. For example, - the version 3 module :mod:`email.Message` was renamed to :mod:`email.message` in - version 4. - -* A new subpackage :mod:`email.mime` was added and all the version 3 - :mod:`email.MIME\*` modules were renamed and situated into the :mod:`email.mime` - subpackage. For example, the version 3 module :mod:`email.MIMEText` was renamed - to :mod:`email.mime.text`. - - *Note that the version 3 names will continue to work until Python 2.6*. - -* The :mod:`email.mime.application` module was added, which contains the - :class:`~email.mime.application.MIMEApplication` class. - -* Methods that were deprecated in version 3 have been removed. These include - :meth:`Generator.__call__`, :meth:`Message.get_type`, - :meth:`Message.get_main_type`, :meth:`Message.get_subtype`. - -* Fixes have been added for :rfc:`2231` support which can change some of the - return types for :func:`Message.get_param <email.message.Message.get_param>` - and friends. Under some - circumstances, values which used to return a 3-tuple now return simple strings - (specifically, if all extended parameter segments were unencoded, there is no - language and charset designation expected, so the return type is now a simple - string). Also, %-decoding used to be done for both encoded and unencoded - segments; this decoding is now done only for encoded segments. - -Here are the major differences between :mod:`email` version 3 and version 2: - -* The :class:`~email.parser.FeedParser` class was introduced, and the - :class:`~email.parser.Parser` class was implemented in terms of the - :class:`~email.parser.FeedParser`. All parsing therefore is - non-strict, and parsing will make a best effort never to raise an exception. - Problems found while parsing messages are stored in the message's *defect* - attribute. - -* All aspects of the API which raised :exc:`DeprecationWarning`\ s in version 2 - have been removed. These include the *_encoder* argument to the - :class:`~email.mime.text.MIMEText` constructor, the - :meth:`Message.add_payload` method, the :func:`Utils.dump_address_pair` - function, and the functions :func:`Utils.decode` and :func:`Utils.encode`. - -* New :exc:`DeprecationWarning`\ s have been added to: - :meth:`Generator.__call__`, :meth:`Message.get_type`, - :meth:`Message.get_main_type`, :meth:`Message.get_subtype`, and the *strict* - argument to the :class:`~email.parser.Parser` class. These are expected to - be removed in future versions. - -* Support for Pythons earlier than 2.3 has been removed. - -Here are the differences between :mod:`email` version 2 and version 1: - -* The :mod:`email.Header` and :mod:`email.Charset` modules have been added. - -* The pickle format for :class:`~email.message.Message` instances has changed. - Since this was never (and still isn't) formally defined, this isn't - considered a backward incompatibility. However if your application pickles - and unpickles :class:`~email.message.Message` instances, be aware that in - :mod:`email` version 2, :class:`~email.message.Message` instances now have - private variables *_charset* and *_default_type*. - -* Several methods in the :class:`~email.message.Message` class have been - deprecated, or their signatures changed. Also, many new methods have been - added. See the documentation for the :class:`~email.message.Message` class - for details. The changes should be completely backward compatible. - -* The object structure has changed in the face of :mimetype:`message/rfc822` - content types. In :mod:`email` version 1, such a type would be represented - by a scalar payload, i.e. the container message's - :meth:`~email.message.Message.is_multipart` returned false, - :meth:`~email.message.Message.get_payload` was not a list object, but a - single :class:`~email.message.Message` instance. + SMTP (Simple Mail Transport Protcol) client - This structure was inconsistent with the rest of the package, so the object - representation for :mimetype:`message/rfc822` content types was changed. In - :mod:`email` version 2, the container *does* return ``True`` from - :meth:`~email.message.Message.is_multipart`, and - :meth:`~email.message.Message.get_payload` returns a list containing a single - :class:`~email.message.Message` item. + Module :mod:`poplib` + POP (Post Office Protocol) client - Note that this is one place that backward compatibility could not be - completely maintained. However, if you're already testing the return type of - :meth:`~email.message.Message.get_payload`, you should be fine. You just need - to make sure your code doesn't do a :meth:`~email.message.Message.set_payload` - with a :class:`~email.message.Message` instance on a container with a content - type of :mimetype:`message/rfc822`. + Module :mod:`imaplib` + IMAP (Internet Message Access Protocol) client -* The :class:`~email.parser.Parser` constructor's *strict* argument was added, - and its :meth:`~email.parser.Parser.parse` and - :meth:`~email.parser.Parser.parsestr` methods grew a *headersonly* argument. - The *strict* flag was also added to functions :func:`email.message_from_file` - and :func:`email.message_from_string`. - -* :meth:`Generator.__call__` is deprecated; use :meth:`Generator.flatten - <email.generator.Generator.flatten>` instead. The - :class:`~email.generator.Generator` class has also grown the - :meth:`~email.generator.Generator.clone` method. - -* The :class:`~email.generator.DecodedGenerator` class in the - :mod:`email.generator` module was added. - -* The intermediate base classes - :class:`~email.mime.nonmultipart.MIMENonMultipart` and - :class:`~email.mime.multipart.MIMEMultipart` have been added, and interposed - in the class hierarchy for most of the other MIME-related derived classes. - -* The *_encoder* argument to the :class:`~email.mime.text.MIMEText` constructor - has been deprecated. Encoding now happens implicitly based on the - *_charset* argument. - -* The following functions in the :mod:`email.Utils` module have been deprecated: - :func:`dump_address_pairs`, :func:`decode`, and :func:`encode`. The following - functions have been added to the module: :func:`make_msgid`, - :func:`decode_rfc2231`, :func:`encode_rfc2231`, and :func:`decode_params`. - -* The non-public function :func:`email.Iterators._structure` was added. - - -Differences from :mod:`mimelib` -------------------------------- - -The :mod:`email` package was originally prototyped as a separate library called -`mimelib <http://mimelib.sourceforge.net/>`_. Changes have been made so that method names -are more consistent, and some methods or modules have either been added or -removed. The semantics of some of the methods have also changed. For the most -part, any functionality available in :mod:`mimelib` is still available in the -:mod:`email` package, albeit often in a different way. Backward compatibility -between the :mod:`mimelib` package and the :mod:`email` package was not a -priority. - -Here is a brief description of the differences between the :mod:`mimelib` and -the :mod:`email` packages, along with hints on how to port your applications. - -Of course, the most visible difference between the two packages is that the -package name has been changed to :mod:`email`. In addition, the top-level -package has the following differences: - -* :func:`messageFromString` has been renamed to :func:`message_from_string`. - -* :func:`messageFromFile` has been renamed to :func:`message_from_file`. - -The :class:`~email.message.Message` class has the following differences: - -* The method :meth:`asString` was renamed to - :meth:`~email.message.Message.as_string`. - -* The method :meth:`ismultipart` was renamed to - :meth:`~email.message.Message.is_multipart`. - -* The :meth:`~email.message.Message.get_payload` method has grown a *decode* - optional argument. - -* The method :meth:`getall` was renamed to - :meth:`~email.message.Message.get_all`. - -* The method :meth:`addheader` was renamed to - :meth:`~email.message.Message.add_header`. - -* The method :meth:`gettype` was renamed to :meth:`get_type`. - -* The method :meth:`getmaintype` was renamed to :meth:`get_main_type`. - -* The method :meth:`getsubtype` was renamed to :meth:`get_subtype`. - -* The method :meth:`getparams` was renamed to - :meth:`~email.message.Message.get_params`. Also, whereas :meth:`getparams` - returned a list of strings, :meth:`~email.message.Message.get_params` returns - a list of 2-tuples, effectively the key/value pairs of the parameters, split - on the ``'='`` sign. - -* The method :meth:`getparam` was renamed to - :meth:`~email.message.Message.get_param`. - -* The method :meth:`getcharsets` was renamed to - :meth:`~email.message.Message.get_charsets`. - -* The method :meth:`getfilename` was renamed to - :meth:`~email.message.Message.get_filename`. - -* The method :meth:`getboundary` was renamed to - :meth:`~email.message.Message.get_boundary`. - -* The method :meth:`setboundary` was renamed to - :meth:`~email.message.Message.set_boundary`. - -* The method :meth:`getdecodedpayload` was removed. To get similar - functionality, pass the value 1 to the *decode* flag of the - :meth:`~email.message.Message.get_payload` method. - -* The method :meth:`getpayloadastext` was removed. Similar functionality is - supported by the :class:`~email.generator.DecodedGenerator` class in the - :mod:`email.generator` module. - -* The method :meth:`getbodyastext` was removed. You can get similar - functionality by creating an iterator with - :func:`~email.iterators.typed_subpart_iterator` in the :mod:`email.iterators` - module. - -The :class:`~email.parser.Parser` class has no differences in its public -interface. It does have some additional smarts to recognize -:mimetype:`message/delivery-status` type messages, which it represents as a -:class:`~email.message.Message` instance containing separate -:class:`~email.message.Message` subparts for each header block in the delivery -status notification [#]_. - -The :class:`~email.generator.Generator` class has no differences in its public -interface. There is a new class in the :mod:`email.generator` module though, -called :class:`~email.generator.DecodedGenerator` which provides most of the -functionality previously available in the :meth:`Message.getpayloadastext` -method. - -The following modules and classes have been changed: - -* The :class:`~email.mime.base.MIMEBase` class constructor arguments *_major* - and *_minor* have changed to *_maintype* and *_subtype* respectively. - -* The ``Image`` class/module has been renamed to ``MIMEImage``. The *_minor* - argument has been renamed to *_subtype*. - -* The ``Text`` class/module has been renamed to ``MIMEText``. The *_minor* - argument has been renamed to *_subtype*. - -* The ``MessageRFC822`` class/module has been renamed to ``MIMEMessage``. Note - that an earlier version of :mod:`mimelib` called this class/module ``RFC822``, - but that clashed with the Python standard library module :mod:`rfc822` on some - case-insensitive file systems. - - Also, the :class:`~email.mime.message.MIMEMessage` class now represents any - kind of MIME message - with main type :mimetype:`message`. It takes an optional argument *_subtype* - which is used to set the MIME subtype. *_subtype* defaults to - :mimetype:`rfc822`. - -:mod:`mimelib` provided some utility functions in its :mod:`address` and -:mod:`date` modules. All of these functions have been moved to the -:mod:`email.utils` module. - -The ``MsgReader`` class/module has been removed. Its functionality is most -closely supported in the :func:`~email.iterators.body_line_iterator` function -in the :mod:`email.iterators` module. + Module :mod:`nntplib` + NNTP (Net News Transport Protocol) client -.. rubric:: Footnotes + Module :mod:`mailbox` + Tools for creating, reading, and managing collections of messages on disk + using a variety standard formats. -.. [#] Delivery Status Notifications (DSN) are defined in :rfc:`1894`. + Module :mod:`smtpd` + SMTP server framework (primarily useful for testing) diff --git a/Doc/library/email.util.rst b/Doc/library/email.util.rst index 5cff746..63fae2a 100644 --- a/Doc/library/email.util.rst +++ b/Doc/library/email.util.rst @@ -8,7 +8,43 @@ -------------- -There are several useful utilities provided in the :mod:`email.utils` module: +There are a couple of useful utilities provided in the :mod:`email.utils` +module: + +.. function:: localtime(dt=None) + + Return local time as an aware datetime object. If called without + arguments, return current time. Otherwise *dt* argument should be a + :class:`~datetime.datetime` instance, and it is converted to the local time + zone according to the system time zone database. If *dt* is naive (that + is, ``dt.tzinfo`` is ``None``), it is assumed to be in local time. In this + case, a positive or zero value for *isdst* causes ``localtime`` to presume + initially that summer time (for example, Daylight Saving Time) is or is not + (respectively) in effect for the specified time. A negative value for + *isdst* causes the ``localtime`` to attempt to divine whether summer time + is in effect for the specified time. + + .. versionadded:: 3.3 + + +.. function:: make_msgid(idstring=None, domain=None) + + Returns a string suitable for an :rfc:`2822`\ -compliant + :mailheader:`Message-ID` header. Optional *idstring* if given, is a string + used to strengthen the uniqueness of the message id. Optional *domain* if + given provides the portion of the msgid after the '@'. The default is the + local hostname. It is not normally necessary to override this default, but + may be useful certain cases, such as a constructing distributed system that + uses a consistent domain name across multiple hosts. + + .. versionchanged:: 3.2 + Added the *domain* keyword. + + +The remaining functions are part of the legacy (``Compat32``) email API. There +is no need to directly use these with the new API, since the parsing and +formatting they provide is done automatically by the header parsing machinery +of the new API. .. function:: quote(str) @@ -141,36 +177,6 @@ There are several useful utilities provided in the :mod:`email.utils` module: .. versionadded:: 3.3 -.. function:: localtime(dt=None) - - Return local time as an aware datetime object. If called without - arguments, return current time. Otherwise *dt* argument should be a - :class:`~datetime.datetime` instance, and it is converted to the local time - zone according to the system time zone database. If *dt* is naive (that - is, ``dt.tzinfo`` is ``None``), it is assumed to be in local time. In this - case, a positive or zero value for *isdst* causes ``localtime`` to presume - initially that summer time (for example, Daylight Saving Time) is or is not - (respectively) in effect for the specified time. A negative value for - *isdst* causes the ``localtime`` to attempt to divine whether summer time - is in effect for the specified time. - - .. versionadded:: 3.3 - - -.. function:: make_msgid(idstring=None, domain=None) - - Returns a string suitable for an :rfc:`2822`\ -compliant - :mailheader:`Message-ID` header. Optional *idstring* if given, is a string - used to strengthen the uniqueness of the message id. Optional *domain* if - given provides the portion of the msgid after the '@'. The default is the - local hostname. It is not normally necessary to override this default, but - may be useful certain cases, such as a constructing distributed system that - uses a consistent domain name across multiple hosts. - - .. versionchanged:: 3.2 - Added the *domain* keyword. - - .. function:: decode_rfc2231(s) Decode the string *s* according to :rfc:`2231`. diff --git a/Lib/email/message.py b/Lib/email/message.py index 4b04283..c07da43 100644 --- a/Lib/email/message.py +++ b/Lib/email/message.py @@ -951,6 +951,26 @@ class MIMEPart(Message): policy = default Message.__init__(self, policy) + + def as_string(self, unixfrom=False, maxheaderlen=None, policy=None): + """Return the entire formatted message as a string. + + Optional 'unixfrom', when true, means include the Unix From_ envelope + header. maxheaderlen is retained for backward compatibility with the + base Message class, but defaults to None, meaning that the policy value + for max_line_length controls the header maximum length. 'policy' is + passed to the Generator instance used to serialize the mesasge; if it + is not specified the policy associated with the message instance is + used. + """ + policy = self.policy if policy is None else policy + if maxheaderlen is None: + maxheaderlen = policy.max_line_length + return super().as_string(maxheaderlen=maxheaderlen, policy=policy) + + def __str__(self): + return self.as_string(policy=self.policy.clone(utf8=True)) + def is_attachment(self): c_d = self.get('content-disposition') return False if c_d is None else c_d.content_disposition == 'attachment' diff --git a/Lib/test/test_email/test_message.py b/Lib/test/test_email/test_message.py index 4345162..f3a57df 100644 --- a/Lib/test/test_email/test_message.py +++ b/Lib/test/test_email/test_message.py @@ -764,6 +764,26 @@ class TestEmailMessage(TestEmailMessageBase, TestEmailBase): m.set_content(content_manager=cm) self.assertEqual(m['MIME-Version'], '1.0') + def test_as_string_uses_max_header_length_by_default(self): + m = self._str_msg('Subject: long line' + ' ab'*50 + '\n\n') + self.assertEqual(len(m.as_string().strip().splitlines()), 3) + + def test_as_string_allows_maxheaderlen(self): + m = self._str_msg('Subject: long line' + ' ab'*50 + '\n\n') + self.assertEqual(len(m.as_string(maxheaderlen=0).strip().splitlines()), + 1) + self.assertEqual(len(m.as_string(maxheaderlen=34).strip().splitlines()), + 6) + + def test_str_defaults_to_policy_max_line_length(self): + m = self._str_msg('Subject: long line' + ' ab'*50 + '\n\n') + self.assertEqual(len(str(m).strip().splitlines()), 3) + + def test_str_defaults_to_utf8(self): + m = EmailMessage() + m['Subject'] = 'unicöde' + self.assertEqual(str(m), 'Subject: unicöde\n\n') + class TestMIMEPart(TestEmailMessageBase, TestEmailBase): # Doing the full test run here may seem a bit redundant, since the two |