16 files changed, 679 insertions, 157 deletions
diff --git a/Lib/email/_encoded_words.py b/Lib/email/_encoded_words.py
index 9e0cc75..5eaab36 100644
--- a/Lib/email/_encoded_words.py
+++ b/Lib/email/_encoded_words.py
@@ -152,7 +152,7 @@ def decode(ew):
     then from the resulting bytes into unicode using the specified charset.  If
     the cte-decoded string does not successfully decode using the specified
     character set, a defect is added to the defects list and the unknown octets
-    are replaced by the unicode 'unknown' character \uFDFF.
+    are replaced by the unicode 'unknown' character \\uFDFF.
 
     The specified charset and language are returned.  The default for language,
     which is rarely if ever encountered, is the empty string.
diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py
index 0369e01..a9bdf44 100644
--- a/Lib/email/_header_value_parser.py
+++ b/Lib/email/_header_value_parser.py
@@ -70,7 +70,8 @@ XXX: provide complete list of token types.
 import re
 import urllib   # For urllib.parse.unquote
 from string import hexdigits
-from collections import namedtuple, OrderedDict
+from collections import OrderedDict
+from operator import itemgetter
 from email import _encoded_words as _ew
 from email import errors
 from email import utils
@@ -368,8 +369,7 @@ class TokenList(list):
                 yield (indent + '    !! invalid element in token '
                                         'list: {!r}'.format(token))
             else:
-                for line in token._pp(indent+'    '):
-                    yield line
+                yield from token._pp(indent+'    ')
         if self.defects:
             extra = ' Defects: {}'.format(self.defects)
         else:
@@ -1099,15 +1099,34 @@ class MimeParameters(TokenList):
                 params[name] = []
             params[name].append((token.section_number, token))
         for name, parts in params.items():
-            parts = sorted(parts)
-            # XXX: there might be more recovery we could do here if, for
-            # example, this is really a case of a duplicate attribute name.
+            parts = sorted(parts, key=itemgetter(0))
+            first_param = parts[0][1]
+            charset = first_param.charset
+            # Our arbitrary error recovery is to ignore duplicate parameters,
+            # to use appearance order if there are duplicate rfc 2231 parts,
+            # and to ignore gaps.  This mimics the error recovery of get_param.
+            if not first_param.extended and len(parts) > 1:
+                if parts[1][0] == 0:
+                    parts[1][1].defects.append(errors.InvalidHeaderDefect(
+                        'duplicate parameter name; duplicate(s) ignored'))
+                    parts = parts[:1]
+                # Else assume the *0* was missing...note that this is different
+                # from get_param, but we registered a defect for this earlier.
             value_parts = []
-            charset = parts[0][1].charset
-            for i, (section_number, param) in enumerate(parts):
+            i = 0
+            for section_number, param in parts:
                 if section_number != i:
-                    param.defects.append(errors.InvalidHeaderDefect(
-                        "inconsistent multipart parameter numbering"))
+                    # We could get fancier here and look for a complete
+                    # duplicate extended parameter and ignore the second one
+                    # seen.  But we're not doing that.  The old code didn't.
+                    if not param.extended:
+                        param.defects.append(errors.InvalidHeaderDefect(
+                            'duplicate parameter name; duplicate ignored'))
+                        continue
+                    else:
+                        param.defects.append(errors.InvalidHeaderDefect(
+                            "inconsistent RFC2231 parameter numbering"))
+                i += 1
                 value = param.param_value
                 if param.extended:
                     try:
@@ -1315,24 +1334,22 @@ RouteComponentMarker = ValueTerminal('@', 'route-component-marker')
 # Parser
 #
 
-"""Parse strings according to RFC822/2047/2822/5322 rules.
-
-This is a stateless parser.  Each get_XXX function accepts a string and
-returns either a Terminal or a TokenList representing the RFC object named
-by the method and a string containing the remaining unparsed characters
-from the input.  Thus a parser method consumes the next syntactic construct
-of a given type and returns a token representing the construct plus the
-unparsed remainder of the input string.
-
-For example, if the first element of a structured header is a 'phrase',
-then:
-
-    phrase, value = get_phrase(value)
-
-returns the complete phrase from the start of the string value, plus any
-characters left in the string after the phrase is removed.
-
-"""
+# Parse strings according to RFC822/2047/2822/5322 rules.
+#
+# This is a stateless parser.  Each get_XXX function accepts a string and
+# returns either a Terminal or a TokenList representing the RFC object named
+# by the method and a string containing the remaining unparsed characters
+# from the input.  Thus a parser method consumes the next syntactic construct
+# of a given type and returns a token representing the construct plus the
+# unparsed remainder of the input string.
+#
+# For example, if the first element of a structured header is a 'phrase',
+# then:
+#
+#     phrase, value = get_phrase(value)
+#
+# returns the complete phrase from the start of the string value, plus any
+# characters left in the string after the phrase is removed.
 
 _wsp_splitter = re.compile(r'([{}]+)'.format(''.join(WSP))).split
 _non_atom_end_matcher = re.compile(r"[^{}]+".format(
@@ -2900,7 +2917,7 @@ def parse_content_disposition_header(value):
     try:
         token, value = get_token(value)
     except errors.HeaderParseError:
-        ctype.defects.append(errors.InvalidHeaderDefect(
+        disp_header.defects.append(errors.InvalidHeaderDefect(
             "Expected content disposition but found {!r}".format(value)))
         _find_mime_parameters(disp_header, value)
         return disp_header
@@ -2931,8 +2948,8 @@ def parse_content_transfer_encoding_header(value):
     try:
         token, value = get_token(value)
     except errors.HeaderParseError:
-        ctype.defects.append(errors.InvalidHeaderDefect(
-            "Expected content trnasfer encoding but found {!r}".format(value)))
+        cte_header.defects.append(errors.InvalidHeaderDefect(
+            "Expected content transfer encoding but found {!r}".format(value)))
     else:
         cte_header.append(token)
         cte_header.cte = token.value.strip().lower()
diff --git a/Lib/email/contentmanager.py b/Lib/email/contentmanager.py
new file mode 100644
index 0000000..d363652
--- /dev/null
+++ b/Lib/email/contentmanager.py
@@ -0,0 +1,249 @@
+import binascii
+import email.charset
+import email.message
+import email.errors
+from email import quoprimime
+
+class ContentManager:
+
+    def __init__(self):
+        self.get_handlers = {}
+        self.set_handlers = {}
+
+    def add_get_handler(self, key, handler):
+        self.get_handlers[key] = handler
+
+    def get_content(self, msg, *args, **kw):
+        content_type = msg.get_content_type()
+        if content_type in self.get_handlers:
+            return self.get_handlers[content_type](msg, *args, **kw)
+        maintype = msg.get_content_maintype()
+        if maintype in self.get_handlers:
+            return self.get_handlers[maintype](msg, *args, **kw)
+        if '' in self.get_handlers:
+            return self.get_handlers[''](msg, *args, **kw)
+        raise KeyError(content_type)
+
+    def add_set_handler(self, typekey, handler):
+        self.set_handlers[typekey] = handler
+
+    def set_content(self, msg, obj, *args, **kw):
+        if msg.get_content_maintype() == 'multipart':
+            # XXX: is this error a good idea or not?  We can remove it later,
+            # but we can't add it later, so do it for now.
+            raise TypeError("set_content not valid on multipart")
+        handler = self._find_set_handler(msg, obj)
+        msg.clear_content()
+        handler(msg, obj, *args, **kw)
+
+    def _find_set_handler(self, msg, obj):
+        full_path_for_error = None
+        for typ in type(obj).__mro__:
+            if typ in self.set_handlers:
+                return self.set_handlers[typ]
+            qname = typ.__qualname__
+            modname = getattr(typ, '__module__', '')
+            full_path = '.'.join((modname, qname)) if modname else qname
+            if full_path_for_error is None:
+                full_path_for_error = full_path
+            if full_path in self.set_handlers:
+                return self.set_handlers[full_path]
+            if qname in self.set_handlers:
+                return self.set_handlers[qname]
+            name = typ.__name__
+            if name in self.set_handlers:
+                return self.set_handlers[name]
+        if None in self.set_handlers:
+            return self.set_handlers[None]
+        raise KeyError(full_path_for_error)
+
+
+raw_data_manager = ContentManager()
+
+
+def get_text_content(msg, errors='replace'):
+    content = msg.get_payload(decode=True)
+    charset = msg.get_param('charset', 'ASCII')
+    return content.decode(charset, errors=errors)
+raw_data_manager.add_get_handler('text', get_text_content)
+
+
+def get_non_text_content(msg):
+    return msg.get_payload(decode=True)
+for maintype in 'audio image video application'.split():
+    raw_data_manager.add_get_handler(maintype, get_non_text_content)
+
+
+def get_message_content(msg):
+    return msg.get_payload(0)
+for subtype in 'rfc822 external-body'.split():
+    raw_data_manager.add_get_handler('message/'+subtype, get_message_content)
+
+
+def get_and_fixup_unknown_message_content(msg):
+    # If we don't understand a message subtype, we are supposed to treat it as
+    # if it were application/octet-stream, per
+    # tools.ietf.org/html/rfc2046#section-5.2.4.  Feedparser doesn't do that,
+    # so do our best to fix things up.  Note that it is *not* appropriate to
+    # model message/partial content as Message objects, so they are handled
+    # here as well.  (How to reassemble them is out of scope for this comment :)
+    return bytes(msg.get_payload(0))
+raw_data_manager.add_get_handler('message',
+                                 get_and_fixup_unknown_message_content)
+
+
+def _prepare_set(msg, maintype, subtype, headers):
+    msg['Content-Type'] = '/'.join((maintype, subtype))
+    if headers:
+        if not hasattr(headers[0], 'name'):
+            mp = msg.policy
+            headers = [mp.header_factory(*mp.header_source_parse([header]))
+                       for header in headers]
+        try:
+            for header in headers:
+                if header.defects:
+                    raise header.defects[0]
+                msg[header.name] = header
+        except email.errors.HeaderDefect as exc:
+            raise ValueError("Invalid header: {}".format(
+                                header.fold(policy=msg.policy))) from exc
+
+
+def _finalize_set(msg, disposition, filename, cid, params):
+    if disposition is None and filename is not None:
+        disposition = 'attachment'
+    if disposition is not None:
+        msg['Content-Disposition'] = disposition
+    if filename is not None:
+        msg.set_param('filename',
+                      filename,
+                      header='Content-Disposition',
+                      replace=True)
+    if cid is not None:
+        msg['Content-ID'] = cid
+    if params is not None:
+        for key, value in params.items():
+            msg.set_param(key, value)
+
+
+# XXX: This is a cleaned-up version of base64mime.body_encode.  It would
+# be nice to drop both this and quoprimime.body_encode in favor of
+# enhanced binascii routines that accepted a max_line_length parameter.
+def _encode_base64(data, max_line_length):
+    encoded_lines = []
+    unencoded_bytes_per_line = max_line_length * 3 // 4
+    for i in range(0, len(data), unencoded_bytes_per_line):
+        thisline = data[i:i+unencoded_bytes_per_line]
+        encoded_lines.append(binascii.b2a_base64(thisline).decode('ascii'))
+    return ''.join(encoded_lines)
+
+
+def _encode_text(string, charset, cte, policy):
+    lines = string.encode(charset).splitlines()
+    linesep = policy.linesep.encode('ascii')
+    def embeded_body(lines): return linesep.join(lines) + linesep
+    def normal_body(lines): return b'\n'.join(lines) + b'\n'
+    if cte==None:
+        # Use heuristics to decide on the "best" encoding.
+        try:
+            return '7bit', normal_body(lines).decode('ascii')
+        except UnicodeDecodeError:
+            pass
+        if (policy.cte_type == '8bit' and
+                max(len(x) for x in lines) <= policy.max_line_length):
+            return '8bit', normal_body(lines).decode('ascii', 'surrogateescape')
+        sniff = embeded_body(lines[:10])
+        sniff_qp = quoprimime.body_encode(sniff.decode('latin-1'),
+                                          policy.max_line_length)
+        sniff_base64 = binascii.b2a_base64(sniff)
+        # This is a little unfair to qp; it includes lineseps, base64 doesn't.
+        if len(sniff_qp) > len(sniff_base64):
+            cte = 'base64'
+        else:
+            cte = 'quoted-printable'
+            if len(lines) <= 10:
+                return cte, sniff_qp
+    if cte == '7bit':
+        data = normal_body(lines).decode('ascii')
+    elif cte == '8bit':
+        data = normal_body(lines).decode('ascii', 'surrogateescape')
+    elif cte == 'quoted-printable':
+        data = quoprimime.body_encode(normal_body(lines).decode('latin-1'),
+                                      policy.max_line_length)
+    elif cte == 'base64':
+        data = _encode_base64(embeded_body(lines), policy.max_line_length)
+    else:
+        raise ValueError("Unknown content transfer encoding {}".format(cte))
+    return cte, data
+
+
+def set_text_content(msg, string, subtype="plain", charset='utf-8', cte=None,
+                     disposition=None, filename=None, cid=None,
+                     params=None, headers=None):
+    _prepare_set(msg, 'text', subtype, headers)
+    cte, payload = _encode_text(string, charset, cte, msg.policy)
+    msg.set_payload(payload)
+    msg.set_param('charset',
+                  email.charset.ALIASES.get(charset, charset),
+                  replace=True)
+    msg['Content-Transfer-Encoding'] = cte
+    _finalize_set(msg, disposition, filename, cid, params)
+raw_data_manager.add_set_handler(str, set_text_content)
+
+
+def set_message_content(msg, message, subtype="rfc822", cte=None,
+                       disposition=None, filename=None, cid=None,
+                       params=None, headers=None):
+    if subtype == 'partial':
+        raise ValueError("message/partial is not supported for Message objects")
+    if subtype == 'rfc822':
+        if cte not in (None, '7bit', '8bit', 'binary'):
+            # http://tools.ietf.org/html/rfc2046#section-5.2.1 mandate.
+            raise ValueError(
+                "message/rfc822 parts do not support cte={}".format(cte))
+        # 8bit will get coerced on serialization if policy.cte_type='7bit'.  We
+        # may end up claiming 8bit when it isn't needed, but the only negative
+        # result of that should be a gateway that needs to coerce to 7bit
+        # having to look through the whole embedded message to discover whether
+        # or not it actually has to do anything.
+        cte = '8bit' if cte is None else cte
+    elif subtype == 'external-body':
+        if cte not in (None, '7bit'):
+            # http://tools.ietf.org/html/rfc2046#section-5.2.3 mandate.
+            raise ValueError(
+                "message/external-body parts do not support cte={}".format(cte))
+        cte = '7bit'
+    elif cte is None:
+        # http://tools.ietf.org/html/rfc2046#section-5.2.4 says all future
+        # subtypes should be restricted to 7bit, so assume that.
+        cte = '7bit'
+    _prepare_set(msg, 'message', subtype, headers)
+    msg.set_payload([message])
+    msg['Content-Transfer-Encoding'] = cte
+    _finalize_set(msg, disposition, filename, cid, params)
+raw_data_manager.add_set_handler(email.message.Message, set_message_content)
+
+
+def set_bytes_content(msg, data, maintype, subtype, cte='base64',
+                     disposition=None, filename=None, cid=None,
+                     params=None, headers=None):
+    _prepare_set(msg, maintype, subtype, headers)
+    if cte == 'base64':
+        data = _encode_base64(data, max_line_length=msg.policy.max_line_length)
+    elif cte == 'quoted-printable':
+        # XXX: quoprimime.body_encode won't encode newline characters in data,
+        # so we can't use it.  This means max_line_length is ignored.  Another
+        # bug to fix later.  (Note: encoders.quopri is broken on line ends.)
+        data = binascii.b2a_qp(data, istext=False, header=False, quotetabs=True)
+        data = data.decode('ascii')
+    elif cte == '7bit':
+        # Make sure it really is only ASCII.  The early warning here seems
+        # worth the overhead...if you care write your own content manager :).
+        data.encode('ascii')
+    elif cte in ('8bit', 'binary'):
+        data = data.decode('ascii', 'surrogateescape')
+    msg.set_payload(data)
+    msg['Content-Transfer-Encoding'] = cte
+    _finalize_set(msg, disposition, filename, cid, params)
+for typ in (bytes, bytearray, memoryview):
+    raw_data_manager.add_set_handler(typ, set_bytes_content)
diff --git a/Lib/email/encoders.py b/Lib/email/encoders.py
index f9657f0..0a66acb 100644
--- a/Lib/email/encoders.py
+++ b/Lib/email/encoders.py
@@ -54,21 +54,12 @@ def encode_7or8bit(msg):
         # There's no payload.  For backwards compatibility we use 7bit
         msg['Content-Transfer-Encoding'] = '7bit'
         return
-    # We play a trick to make this go fast.  If encoding/decode to ASCII
-    # succeeds, we know the data must be 7bit, otherwise treat it as 8bit.
+    # We play a trick to make this go fast.  If decoding from ASCII succeeds,
+    # we know the data must be 7bit, otherwise treat it as 8bit.
     try:
-        if isinstance(orig, str):
-            orig.encode('ascii')
-        else:
-            orig.decode('ascii')
+        orig.decode('ascii')
     except UnicodeError:
-        charset = msg.get_charset()
-        output_cset = charset and charset.output_charset
-        # iso-2022-* is non-ASCII but encodes to a 7-bit representation
-        if output_cset and output_cset.lower().startswith('iso-2022-'):
-            msg['Content-Transfer-Encoding'] = '7bit'
-        else:
-            msg['Content-Transfer-Encoding'] = '8bit'
+        msg['Content-Transfer-Encoding'] = '8bit'
     else:
         msg['Content-Transfer-Encoding'] = '7bit'
 
diff --git a/Lib/email/feedparser.py b/Lib/email/feedparser.py
index ea41e95..c95b27f 100644
--- a/Lib/email/feedparser.py
+++ b/Lib/email/feedparser.py
@@ -33,7 +33,7 @@ NLCRE_eol = re.compile('(\r\n|\r|\n)\Z')
 NLCRE_crack = re.compile('(\r\n|\r|\n)')
 # RFC 2822 $3.6.8 Optional fields.  ftext is %d33-57 / %d59-126, Any character
 # except controls, SP, and ":".
-headerRE = re.compile(r'^(From |[\041-\071\073-\176]{1,}:|[\t ])')
+headerRE = re.compile(r'^(From |[\041-\071\073-\176]*:|[\t ])')
 EMPTYSTRING = ''
 NL = '\n'
 
@@ -50,8 +50,8 @@ class BufferedSubFile(object):
     simple abstraction -- it parses until EOF closes the current message.
     """
     def __init__(self):
-        # The last partial line pushed into this object.
-        self._partial = ''
+        # Chunks of the last partial line pushed into this object.
+        self._partial = []
         # The list of full, pushed lines, in reverse order
         self._lines = []
         # The stack of false-EOF checking predicates.
@@ -67,8 +67,8 @@ class BufferedSubFile(object):
 
     def close(self):
         # Don't forget any trailing partial line.
-        self._lines.append(self._partial)
-        self._partial = ''
+        self.pushlines(''.join(self._partial).splitlines(True))
+        self._partial = []
         self._closed = True
 
     def readline(self):
@@ -96,26 +96,27 @@ class BufferedSubFile(object):
 
     def push(self, data):
         """Push some new data into this object."""
-        # Handle any previous leftovers
-        data, self._partial = self._partial + data, ''
-        # Crack into lines, but preserve the newlines on the end of each
-        parts = NLCRE_crack.split(data)
-        # The *ahem* interesting behaviour of re.split when supplied grouping
-        # parentheses is that the last element of the resulting list is the
-        # data after the final RE.  In the case of a NL/CR terminated string,
-        # this is the empty string.
-        self._partial = parts.pop()
-        #GAN 29Mar09  bugs 1555570, 1721862  Confusion at 8K boundary ending with \r:
-        # is there a \n to follow later?
-        if not self._partial and parts and parts[-1].endswith('\r'):
-            self._partial = parts.pop(-2)+parts.pop()
-        # parts is a list of strings, alternating between the line contents
-        # and the eol character(s).  Gather up a list of lines after
-        # re-attaching the newlines.
-        lines = []
-        for i in range(len(parts) // 2):
-            lines.append(parts[i*2] + parts[i*2+1])
-        self.pushlines(lines)
+        # Crack into lines, but preserve the linesep characters on the end of each
+        parts = data.splitlines(True)
+
+        if not parts or not parts[0].endswith(('\n', '\r')):
+            # No new complete lines, so just accumulate partials
+            self._partial += parts
+            return
+
+        if self._partial:
+            # If there are previous leftovers, complete them now
+            self._partial.append(parts[0])
+            parts[0:1] = ''.join(self._partial).splitlines(True)
+            del self._partial[:]
+
+        # If the last element of the list does not end in a newline, then treat
+        # it as a partial line.  We only check for '\n' here because a line
+        # ending with '\r' might be a line that was split in the middle of a
+        # '\r\n' sequence (see bugs 1555570 and 1721862).
+        if not parts[-1].endswith('\n'):
+            self._partial = [parts.pop()]
+        self.pushlines(parts)
 
     def pushlines(self, lines):
         # Reverse and insert at the front of the lines.
@@ -135,7 +136,7 @@ class BufferedSubFile(object):
 class FeedParser:
     """A feed-style parser of email."""
 
-    def __init__(self, _factory=message.Message, *, policy=compat32):
+    def __init__(self, _factory=None, *, policy=compat32):
         """_factory is called with no arguments to create a new message obj
 
         The policy keyword specifies a policy object that controls a number of
@@ -143,14 +144,23 @@ class FeedParser:
         backward compatibility.
 
         """
-        self._factory = _factory
         self.policy = policy
-        try:
-            _factory(policy=self.policy)
-            self._factory_kwds = lambda: {'policy': self.policy}
-        except TypeError:
-            # Assume this is an old-style factory
-            self._factory_kwds = lambda: {}
+        self._factory_kwds = lambda: {'policy': self.policy}
+        if _factory is None:
+            # What this should be:
+            #self._factory = policy.default_message_factory
+            # but, because we are post 3.4 feature freeze, fix with temp hack:
+            if self.policy is compat32:
+                self._factory = message.Message
+            else:
+                self._factory = message.EmailMessage
+        else:
+            self._factory = _factory
+            try:
+                _factory(policy=self.policy)
+            except TypeError:
+                # Assume this is an old-style factory
+                self._factory_kwds = lambda: {}
         self._input = BufferedSubFile()
         self._msgstack = []
         self._parse = self._parsegen().__next__
@@ -501,6 +511,15 @@ class FeedParser:
             # There will always be a colon, because if there wasn't the part of
             # the parser that calls us would have started parsing the body.
             i = line.find(':')
+
+            # If the colon is on the start of the line the header is clearly
+            # malformed, but we might be able to salvage the rest of the
+            # message. Track the error but keep going.
+            if i == 0:
+                defect = errors.InvalidHeaderDefect("Missing header name.")
+                self._cur.defects.append(defect)
+                continue
+
             assert i>0, "_parse_headers fed line with no : and no leading WS"
             lastheader = line[:i]
             lastvalue = [line]
diff --git a/Lib/email/generator.py b/Lib/email/generator.py
index e4a86d4..4735721 100644
--- a/Lib/email/generator.py
+++ b/Lib/email/generator.py
@@ -10,14 +10,10 @@ import re
 import sys
 import time
 import random
-import warnings
 
 from copy import deepcopy
 from io import StringIO, BytesIO
-from email._policybase import compat32
-from email.header import Header
 from email.utils import _has_surrogates
-import email.charset as _charset
 
 UNDERSCORE = '_'
 NL = '\n'  # XXX: no longer used by the code below.
@@ -55,8 +51,9 @@ class Generator:
         by RFC 2822.
 
         The policy keyword specifies a policy object that controls a number of
-        aspects of the generator's operation.  The default policy maintains
-        backward compatibility.
+        aspects of the generator's operation.  If no policy is specified,
+        the policy associated with the Message object passed to the
+        flatten method is used.
 
         """
         self._fp = outfp
@@ -80,7 +77,9 @@ class Generator:
         Note that for subobjects, no From_ line is printed.
 
         linesep specifies the characters used to indicate a new line in
-        the output.  The default value is determined by the policy.
+        the output.  The default value is determined by the policy specified
+        when the Generator instance was created or, if none was specified,
+        from the policy associated with the msg.
 
         """
         # We use the _XXX constants for operating on data that comes directly
diff --git a/Lib/email/header.py b/Lib/email/header.py
index 5bd0638..9c89589 100644
--- a/Lib/email/header.py
+++ b/Lib/email/header.py
@@ -100,7 +100,6 @@ def decode_header(header):
                 words.append((encoded, encoding, charset))
     # Now loop over words and remove words that consist of whitespace
     # between two encoded strings.
-    import sys
     droplist = []
     for n, w in enumerate(words):
         if n>1 and w[1] and words[n-2][1] and words[n-1][0].isspace():
@@ -362,7 +361,6 @@ class Header:
         for string, charset in self._chunks:
             if hasspace is not None:
                 hasspace = string and self._nonctext(string[0])
-                import sys
                 if lastcs not in (None, 'us-ascii'):
                     if not hasspace or charset not in (None, 'us-ascii'):
                         formatter.add_transition()
diff --git a/Lib/email/headerregistry.py b/Lib/email/headerregistry.py
index 1fae950..911a2af 100644
--- a/Lib/email/headerregistry.py
+++ b/Lib/email/headerregistry.py
@@ -7,6 +7,7 @@ Eventually HeaderRegistry will be a public API, but it isn't yet,
 and will probably change some before that happens.
 
 """
+from types import MappingProxyType
 
 from email import utils
 from email import errors
@@ -454,7 +455,7 @@ class ParameterizedMIMEHeader:
 
     @property
     def params(self):
-        return self._params.copy()
+        return MappingProxyType(self._params)
 
 
 class ContentTypeHeader(ParameterizedMIMEHeader):
diff --git a/Lib/email/iterators.py b/Lib/email/iterators.py
index 3adc4a0..b5502ee 100644
--- a/Lib/email/iterators.py
+++ b/Lib/email/iterators.py
@@ -26,8 +26,7 @@ def walk(self):
     yield self
     if self.is_multipart():
         for subpart in self.get_payload():
-            for subsubpart in subpart.walk():
-                yield subsubpart
+            yield from subpart.walk()
 
 
 
@@ -40,8 +39,7 @@ def body_line_iterator(msg, decode=False):
     for subpart in msg.walk():
         payload = subpart.get_payload(decode=decode)
         if isinstance(payload, str):
-            for line in StringIO(payload):
-                yield line
+            yield from StringIO(payload)
 
 
 def typed_subpart_iterator(msg, maintype='text', subtype=None):
diff --git a/Lib/email/message.py b/Lib/email/message.py
index afe350c..2f37dbb 100644
--- a/Lib/email/message.py
+++ b/Lib/email/message.py
@@ -8,8 +8,8 @@ __all__ = ['Message']
 
 import re
 import uu
-import base64
-import binascii
+import quopri
+import warnings
 from io import BytesIO, StringIO
 
 # Intrapackage imports
@@ -132,22 +132,50 @@ class Message:
 
     def __str__(self):
         """Return the entire formatted message as a string.
-        This includes the headers, body, and envelope header.
         """
         return self.as_string()
 
-    def as_string(self, unixfrom=False, maxheaderlen=0):
+    def as_string(self, unixfrom=False, maxheaderlen=0, policy=None):
         """Return the entire formatted message as a string.
-        Optional `unixfrom' when True, means include the Unix From_ envelope
-        header.
 
-        This is a convenience method and may not generate the message exactly
-        as you intend.  For more flexibility, use the flatten() method of a
-        Generator instance.
+        Optional 'unixfrom', when true, means include the Unix From_ envelope
+        header.  For backward compatibility reasons, if maxheaderlen is
+        not specified it defaults to 0, so you must override it explicitly
+        if you want a different maxheaderlen.  'policy' is passed to the
+        Generator instance used to serialize the mesasge; if it is not
+        specified the policy associated with the message instance is used.
+
+        If the message object contains binary data that is not encoded
+        according to RFC standards, the non-compliant data will be replaced by
+        unicode "unknown character" code points.
         """
         from email.generator import Generator
+        policy = self.policy if policy is None else policy
         fp = StringIO()
-        g = Generator(fp, mangle_from_=False, maxheaderlen=maxheaderlen)
+        g = Generator(fp,
+                      mangle_from_=False,
+                      maxheaderlen=maxheaderlen,
+                      policy=policy)
+        g.flatten(self, unixfrom=unixfrom)
+        return fp.getvalue()
+
+    def __bytes__(self):
+        """Return the entire formatted message as a bytes object.
+        """
+        return self.as_bytes()
+
+    def as_bytes(self, unixfrom=False, policy=None):
+        """Return the entire formatted message as a bytes object.
+
+        Optional 'unixfrom', when true, means include the Unix From_ envelope
+        header.  'policy' is passed to the BytesGenerator instance used to
+        serialize the message; if not specified the policy associated with
+        the message instance is used.
+        """
+        from email.generator import BytesGenerator
+        policy = self.policy if policy is None else policy
+        fp = BytesIO()
+        g = BytesGenerator(fp, mangle_from_=False, policy=policy)
         g.flatten(self, unixfrom=unixfrom)
         return fp.getvalue()
 
@@ -177,7 +205,11 @@ class Message:
         if self._payload is None:
             self._payload = [payload]
         else:
-            self._payload.append(payload)
+            try:
+                self._payload.append(payload)
+            except AttributeError:
+                raise TypeError("Attach is not valid on a message with a"
+                                " non-multipart payload")
 
     def get_payload(self, i=None, decode=False):
         """Return a reference to the payload.
@@ -241,14 +273,14 @@ class Message:
                     bpayload = payload.encode('ascii')
                 except UnicodeError:
                     # This won't happen for RFC compliant messages (messages
-                    # containing only ASCII codepoints in the unicode input).
+                    # containing only ASCII code points in the unicode input).
                     # If it does happen, turn the string into bytes in a way
                     # guaranteed not to fail.
                     bpayload = payload.encode('raw-unicode-escape')
         if not decode:
             return payload
         if cte == 'quoted-printable':
-            return utils._qdecode(bpayload)
+            return quopri.decodestring(bpayload)
         elif cte == 'base64':
             # XXX: this is a bit of a hack; decode_b should probably be factored
             # out somewhere, but I haven't figured out where yet.
@@ -668,7 +700,7 @@ class Message:
         return failobj
 
     def set_param(self, param, value, header='Content-Type', requote=True,
-                  charset=None, language=''):
+                  charset=None, language='', replace=False):
         """Set a parameter in the Content-Type header.
 
         If the parameter already exists in the header, its value will be
@@ -712,8 +744,11 @@ class Message:
                 else:
                     ctype = SEMISPACE.join([ctype, append_param])
         if ctype != self.get(header):
-            del self[header]
-            self[header] = ctype
+            if replace:
+                self.replace_header(header, ctype)
+            else:
+                del self[header]
+                self[header] = ctype
 
     def del_param(self, param, header='content-type', requote=True):
         """Remove the given parameter completely from the Content-Type header.
@@ -894,3 +929,219 @@ class Message:
 
     # I.e. def walk(self): ...
     from email.iterators import walk
+
+# XXX Support for temporary deprecation hack for is_attachment property.
+class _IsAttachment:
+    def __init__(self, value):
+        self.value = value
+    def __call__(self):
+        return self.value
+    def __bool__(self):
+        warnings.warn("is_attachment will be a method, not a property, in 3.5",
+                      DeprecationWarning,
+                      stacklevel=3)
+        return self.value
+
+class MIMEPart(Message):
+
+    def __init__(self, policy=None):
+        if policy is None:
+            from email.policy import default
+            policy = default
+        Message.__init__(self, policy)
+
+    @property
+    def is_attachment(self):
+        c_d = self.get('content-disposition')
+        result = False if c_d is None else c_d.content_disposition == 'attachment'
+        # XXX transitional hack to raise deprecation if not called.
+        return _IsAttachment(result)
+
+    def _find_body(self, part, preferencelist):
+        if part.is_attachment():
+            return
+        maintype, subtype = part.get_content_type().split('/')
+        if maintype == 'text':
+            if subtype in preferencelist:
+                yield (preferencelist.index(subtype), part)
+            return
+        if maintype != 'multipart':
+            return
+        if subtype != 'related':
+            for subpart in part.iter_parts():
+                yield from self._find_body(subpart, preferencelist)
+            return
+        if 'related' in preferencelist:
+            yield (preferencelist.index('related'), part)
+        candidate = None
+        start = part.get_param('start')
+        if start:
+            for subpart in part.iter_parts():
+                if subpart['content-id'] == start:
+                    candidate = subpart
+                    break
+        if candidate is None:
+            subparts = part.get_payload()
+            candidate = subparts[0] if subparts else None
+        if candidate is not None:
+            yield from self._find_body(candidate, preferencelist)
+
+    def get_body(self, preferencelist=('related', 'html', 'plain')):
+        """Return best candidate mime part for display as 'body' of message.
+
+        Do a depth first search, starting with self, looking for the first part
+        matching each of the items in preferencelist, and return the part
+        corresponding to the first item that has a match, or None if no items
+        have a match.  If 'related' is not included in preferencelist, consider
+        the root part of any multipart/related encountered as a candidate
+        match.  Ignore parts with 'Content-Disposition: attachment'.
+        """
+        best_prio = len(preferencelist)
+        body = None
+        for prio, part in self._find_body(self, preferencelist):
+            if prio < best_prio:
+                best_prio = prio
+                body = part
+                if prio == 0:
+                    break
+        return body
+
+    _body_types = {('text', 'plain'),
+                   ('text', 'html'),
+                   ('multipart', 'related'),
+                   ('multipart', 'alternative')}
+    def iter_attachments(self):
+        """Return an iterator over the non-main parts of a multipart.
+
+        Skip the first of each occurrence of text/plain, text/html,
+        multipart/related, or multipart/alternative in the multipart (unless
+        they have a 'Content-Disposition: attachment' header) and include all
+        remaining subparts in the returned iterator.  When applied to a
+        multipart/related, return all parts except the root part.  Return an
+        empty iterator when applied to a multipart/alternative or a
+        non-multipart.
+        """
+        maintype, subtype = self.get_content_type().split('/')
+        if maintype != 'multipart' or subtype == 'alternative':
+            return
+        parts = self.get_payload()
+        if maintype == 'multipart' and subtype == 'related':
+            # For related, we treat everything but the root as an attachment.
+            # The root may be indicated by 'start'; if there's no start or we
+            # can't find the named start, treat the first subpart as the root.
+            start = self.get_param('start')
+            if start:
+                found = False
+                attachments = []
+                for part in parts:
+                    if part.get('content-id') == start:
+                        found = True
+                    else:
+                        attachments.append(part)
+                if found:
+                    yield from attachments
+                    return
+            parts.pop(0)
+            yield from parts
+            return
+        # Otherwise we more or less invert the remaining logic in get_body.
+        # This only really works in edge cases (ex: non-text relateds or
+        # alternatives) if the sending agent sets content-disposition.
+        seen = []   # Only skip the first example of each candidate type.
+        for part in parts:
+            maintype, subtype = part.get_content_type().split('/')
+            if ((maintype, subtype) in self._body_types and
+                    not part.is_attachment() and subtype not in seen):
+                seen.append(subtype)
+                continue
+            yield part
+
+    def iter_parts(self):
+        """Return an iterator over all immediate subparts of a multipart.
+
+        Return an empty iterator for a non-multipart.
+        """
+        if self.get_content_maintype() == 'multipart':
+            yield from self.get_payload()
+
+    def get_content(self, *args, content_manager=None, **kw):
+        if content_manager is None:
+            content_manager = self.policy.content_manager
+        return content_manager.get_content(self, *args, **kw)
+
+    def set_content(self, *args, content_manager=None, **kw):
+        if content_manager is None:
+            content_manager = self.policy.content_manager
+        content_manager.set_content(self, *args, **kw)
+
+    def _make_multipart(self, subtype, disallowed_subtypes, boundary):
+        if self.get_content_maintype() == 'multipart':
+            existing_subtype = self.get_content_subtype()
+            disallowed_subtypes = disallowed_subtypes + (subtype,)
+            if existing_subtype in disallowed_subtypes:
+                raise ValueError("Cannot convert {} to {}".format(
+                    existing_subtype, subtype))
+        keep_headers = []
+        part_headers = []
+        for name, value in self._headers:
+            if name.lower().startswith('content-'):
+                part_headers.append((name, value))
+            else:
+                keep_headers.append((name, value))
+        if part_headers:
+            # There is existing content, move it to the first subpart.
+            part = type(self)(policy=self.policy)
+            part._headers = part_headers
+            part._payload = self._payload
+            self._payload = [part]
+        else:
+            self._payload = []
+        self._headers = keep_headers
+        self['Content-Type'] = 'multipart/' + subtype
+        if boundary is not None:
+            self.set_param('boundary', boundary)
+
+    def make_related(self, boundary=None):
+        self._make_multipart('related', ('alternative', 'mixed'), boundary)
+
+    def make_alternative(self, boundary=None):
+        self._make_multipart('alternative', ('mixed',), boundary)
+
+    def make_mixed(self, boundary=None):
+        self._make_multipart('mixed', (), boundary)
+
+    def _add_multipart(self, _subtype, *args, _disp=None, **kw):
+        if (self.get_content_maintype() != 'multipart' or
+                self.get_content_subtype() != _subtype):
+            getattr(self, 'make_' + _subtype)()
+        part = type(self)(policy=self.policy)
+        part.set_content(*args, **kw)
+        if _disp and 'content-disposition' not in part:
+            part['Content-Disposition'] = _disp
+        self.attach(part)
+
+    def add_related(self, *args, **kw):
+        self._add_multipart('related', *args, _disp='inline', **kw)
+
+    def add_alternative(self, *args, **kw):
+        self._add_multipart('alternative', *args, **kw)
+
+    def add_attachment(self, *args, **kw):
+        self._add_multipart('mixed', *args, _disp='attachment', **kw)
+
+    def clear(self):
+        self._headers = []
+        self._payload = None
+
+    def clear_content(self):
+        self._headers = [(n, v) for n, v in self._headers
+                         if not n.lower().startswith('content-')]
+        self._payload = None
+
+
+class EmailMessage(MIMEPart):
+
+    def set_content(self, *args, **kw):
+        super().set_content(*args, **kw)
+        if 'MIME-Version' not in self:
+            self['MIME-Version'] = '1.0'
diff --git a/Lib/email/mime/nonmultipart.py b/Lib/email/mime/nonmultipart.py
index fc3b9eb..e1f5196 100644
--- a/Lib/email/mime/nonmultipart.py
+++ b/Lib/email/mime/nonmultipart.py
@@ -12,7 +12,7 @@ from email.mime.base import MIMEBase
 
 
 class MIMENonMultipart(MIMEBase):
-    """Base class for MIME multipart/* type messages."""
+    """Base class for MIME non-multipart type messages."""
 
     def attach(self, payload):
         # The public API prohibits attaching multiple subparts to MIMEBase
diff --git a/Lib/email/mime/text.py b/Lib/email/mime/text.py
index 3b5b09f..ec18b85 100644
--- a/Lib/email/mime/text.py
+++ b/Lib/email/mime/text.py
@@ -6,7 +6,6 @@
 
 __all__ = ['MIMEText']
 
-from email.encoders import encode_7or8bit
 from email.mime.nonmultipart import MIMENonMultipart
 
 
diff --git a/Lib/email/parser.py b/Lib/email/parser.py
index 752bf35..8c9bc9e 100644
--- a/Lib/email/parser.py
+++ b/Lib/email/parser.py
@@ -4,19 +4,18 @@
 
 """A parser of RFC 2822 and MIME email messages."""
 
-__all__ = ['Parser', 'HeaderParser', 'BytesParser', 'BytesHeaderParser']
+__all__ = ['Parser', 'HeaderParser', 'BytesParser', 'BytesHeaderParser',
+           'FeedParser', 'BytesFeedParser']
 
-import warnings
 from io import StringIO, TextIOWrapper
 
 from email.feedparser import FeedParser, BytesFeedParser
-from email.message import Message
 from email._policybase import compat32
 
 
 
 class Parser:
-    def __init__(self, _class=Message, *, policy=compat32):
+    def __init__(self, _class=None, *, policy=compat32):
         """Parser of RFC 2822 and MIME email messages.
 
         Creates an in-memory object tree representing the email message, which
@@ -107,8 +106,10 @@ class BytesParser:
         meaning it parses the entire contents of the file.
         """
         fp = TextIOWrapper(fp, encoding='ascii', errors='surrogateescape')
-        with fp:
+        try:
             return self.parser.parse(fp, headersonly)
+        finally:
+            fp.detach()
 
 
     def parsebytes(self, text, headersonly=False):
diff --git a/Lib/email/policy.py b/Lib/email/policy.py
index 38e88af..f0b20f4 100644
--- a/Lib/email/policy.py
+++ b/Lib/email/policy.py
@@ -5,6 +5,7 @@ code that adds all the email6 features.
 from email._policybase import Policy, Compat32, compat32, _extend_docstrings
 from email.utils import _has_surrogates
 from email.headerregistry import HeaderRegistry as HeaderRegistry
+from email.contentmanager import raw_data_manager
 
 __all__ = [
     'Compat32',
@@ -58,10 +59,22 @@ class EmailPolicy(Policy):
                            special treatment, while all other fields are
                            treated as unstructured.  This list will be
                            completed before the extension is marked stable.)
+
+    content_manager     -- an object with at least two methods: get_content
+                           and set_content.  When the get_content or
+                           set_content method of a Message object is called,
+                           it calls the corresponding method of this object,
+                           passing it the message object as its first argument,
+                           and any arguments or keywords that were passed to
+                           it as additional arguments.  The default
+                           content_manager is
+                           :data:`~email.contentmanager.raw_data_manager`.
+
     """
 
     refold_source = 'long'
     header_factory = HeaderRegistry()
+    content_manager = raw_data_manager
 
     def __init__(self, **kw):
         # Ensure that each new instance gets a unique header factory
diff --git a/Lib/email/quoprimime.py b/Lib/email/quoprimime.py
index 30bf916..c1fe2b4 100644
--- a/Lib/email/quoprimime.py
+++ b/Lib/email/quoprimime.py
@@ -40,7 +40,6 @@ __all__ = [
     ]
 
 import re
-import io
 
 from string import ascii_letters, digits, hexdigits
 
diff --git a/Lib/email/utils.py b/Lib/email/utils.py
index f76c21e..5080d81 100644
--- a/Lib/email/utils.py
+++ b/Lib/email/utils.py
@@ -25,13 +25,10 @@ __all__ = [
 import os
 import re
 import time
-import base64
 import random
 import socket
 import datetime
 import urllib.parse
-import warnings
-from io import StringIO
 
 from email._parseaddr import quote
 from email._parseaddr import AddressList as _AddressList
@@ -39,10 +36,7 @@ from email._parseaddr import mktime_tz
 
 from email._parseaddr import parsedate, parsedate_tz, _parsedate_tz
 
-from quopri import decodestring as _qdecode
-
 # Intrapackage imports
-from email.encoders import _bencode, _qencode
 from email.charset import Charset
 
 COMMASPACE = ', '
@@ -54,17 +48,27 @@ TICK = "'"
 specialsre = re.compile(r'[][\\()<>@,:;".]')
 escapesre = re.compile(r'[\\"]')
 
-# How to figure out if we are processing strings that come from a byte
-# source with undecodable characters.
-_has_surrogates = re.compile(
-    '([^\ud800-\udbff]|\A)[\udc00-\udfff]([^\udc00-\udfff]|\Z)').search
+def _has_surrogates(s):
+    """Return True if s contains surrogate-escaped binary data."""
+    # This check is based on the fact that unless there are surrogates, utf8
+    # (Python's default encoding) can encode any string.  This is the fastest
+    # way to check for surrogates, see issue 11454 for timings.
+    try:
+        s.encode()
+        return False
+    except UnicodeEncodeError:
+        return True
 
 # How to deal with a string containing bytes before handing it to the
 # application through the 'normal' interface.
 def _sanitize(string):
-    # Turn any escaped bytes into unicode 'unknown' char.
-    original_bytes = string.encode('ascii', 'surrogateescape')
-    return original_bytes.decode('ascii', 'replace')
+    # Turn any escaped bytes into unicode 'unknown' char.  If the escaped
+    # bytes happen to be utf-8 they will instead get decoded, even if they
+    # were invalid in the charset the source was supposed to be in.  This
+    # seems like it is not a bad thing; a defect was still registered.
+    original_bytes = string.encode('utf-8', 'surrogateescape')
+    return original_bytes.decode('utf-8', 'replace')
+
 
 
 # Helpers
@@ -151,30 +155,14 @@ def formatdate(timeval=None, localtime=False, usegmt=False):
     # 2822 requires that day and month names be the English abbreviations.
     if timeval is None:
         timeval = time.time()
-    if localtime:
-        now = time.localtime(timeval)
-        # Calculate timezone offset, based on whether the local zone has
-        # daylight savings time, and whether DST is in effect.
-        if time.daylight and now[-1]:
-            offset = time.altzone
-        else:
-            offset = time.timezone
-        hours, minutes = divmod(abs(offset), 3600)
-        # Remember offset is in seconds west of UTC, but the timezone is in
-        # minutes east of UTC, so the signs differ.
-        if offset > 0:
-            sign = '-'
-        else:
-            sign = '+'
-        zone = '%s%02d%02d' % (sign, hours, minutes // 60)
+    if localtime or usegmt:
+        dt = datetime.datetime.fromtimestamp(timeval, datetime.timezone.utc)
     else:
-        now = time.gmtime(timeval)
-        # Timezone offset is always -0000
-        if usegmt:
-            zone = 'GMT'
-        else:
-            zone = '-0000'
-    return _format_timetuple_and_zone(now, zone)
+        dt = datetime.datetime.utcfromtimestamp(timeval)
+    if localtime:
+        dt = dt.astimezone()
+        usegmt = False
+    return format_datetime(dt, usegmt)
 
 def format_datetime(dt, usegmt=False):
     """Turn a datetime into a date string as specified in RFC 2822.
@@ -198,24 +186,23 @@ def format_datetime(dt, usegmt=False):
 def make_msgid(idstring=None, domain=None):
     """Returns a string suitable for RFC 2822 compliant Message-ID, e.g:
 
-    <20020201195627.33539.96671@nightshade.la.mastaler.com>
+    <142480216486.20800.16526388040877946887@nightshade.la.mastaler.com>
 
     Optional idstring if given is a string used to strengthen the
     uniqueness of the message id.  Optional domain if given provides the
     portion of the message id after the '@'.  It defaults to the locally
     defined hostname.
     """
-    timeval = time.time()
-    utcdate = time.strftime('%Y%m%d%H%M%S', time.gmtime(timeval))
+    timeval = int(time.time()*100)
     pid = os.getpid()
-    randint = random.randrange(100000)
+    randint = random.getrandbits(64)
     if idstring is None:
         idstring = ''
     else:
         idstring = '.' + idstring
     if domain is None:
         domain = socket.getfqdn()
-    msgid = '<%s.%s.%s%s@%s>' % (utcdate, pid, randint, idstring, domain)
+    msgid = '<%d.%d.%d%s@%s>' % (timeval, pid, randint, idstring, domain)
     return msgid