1 files changed, 74 insertions, 52 deletions
diff --git a/Lib/email/generator.py b/Lib/email/generator.py
index ab37e94..e4a86d4 100644
--- a/Lib/email/generator.py
+++ b/Lib/email/generator.py
@@ -12,9 +12,12 @@ import time
 import random
 import warnings
 
+from copy import deepcopy
 from io import StringIO, BytesIO
+from email._policybase import compat32
 from email.header import Header
-from email.message import _has_surrogates
+from email.utils import _has_surrogates
+import email.charset as _charset
 
 UNDERSCORE = '_'
 NL = '\n'  # XXX: no longer used by the code below.
@@ -33,7 +36,8 @@ class Generator:
     # Public interface
     #
 
-    def __init__(self, outfp, mangle_from_=True, maxheaderlen=78):
+    def __init__(self, outfp, mangle_from_=True, maxheaderlen=None, *,
+                 policy=None):
         """Create the generator for message flattening.
 
         outfp is the output file-like object for writing the message to.  It
@@ -49,16 +53,22 @@ class Generator:
         defined in the Header class.  Set maxheaderlen to zero to disable
         header wrapping.  The default is 78, as recommended (but not required)
         by RFC 2822.
+
+        The policy keyword specifies a policy object that controls a number of
+        aspects of the generator's operation.  The default policy maintains
+        backward compatibility.
+
         """
         self._fp = outfp
         self._mangle_from_ = mangle_from_
-        self._maxheaderlen = maxheaderlen
+        self.maxheaderlen = maxheaderlen
+        self.policy = policy
 
     def write(self, s):
         # Just delegate to the file object
         self._fp.write(s)
 
-    def flatten(self, msg, unixfrom=False, linesep='\n'):
+    def flatten(self, msg, unixfrom=False, linesep=None):
         r"""Print the message object tree rooted at msg to the output file
         specified when the Generator instance was created.
 
@@ -70,29 +80,47 @@ class Generator:
         Note that for subobjects, no From_ line is printed.
 
         linesep specifies the characters used to indicate a new line in
-        the output.  The default value is the most useful for typical
-        Python applications, but it can be set to \r\n to produce RFC-compliant
-        line separators when needed.
+        the output.  The default value is determined by the policy.
 
         """
         # We use the _XXX constants for operating on data that comes directly
         # from the msg, and _encoded_XXX constants for operating on data that
         # has already been converted (to bytes in the BytesGenerator) and
         # inserted into a temporary buffer.
-        self._NL = linesep
-        self._encoded_NL = self._encode(linesep)
+        policy = msg.policy if self.policy is None else self.policy
+        if linesep is not None:
+            policy = policy.clone(linesep=linesep)
+        if self.maxheaderlen is not None:
+            policy = policy.clone(max_line_length=self.maxheaderlen)
+        self._NL = policy.linesep
+        self._encoded_NL = self._encode(self._NL)
         self._EMPTY = ''
         self._encoded_EMTPY = self._encode('')
-        if unixfrom:
-            ufrom = msg.get_unixfrom()
-            if not ufrom:
-                ufrom = 'From nobody ' + time.ctime(time.time())
-            self.write(ufrom + self._NL)
-        self._write(msg)
+        # Because we use clone (below) when we recursively process message
+        # subparts, and because clone uses the computed policy (not None),
+        # submessages will automatically get set to the computed policy when
+        # they are processed by this code.
+        old_gen_policy = self.policy
+        old_msg_policy = msg.policy
+        try:
+            self.policy = policy
+            msg.policy = policy
+            if unixfrom:
+                ufrom = msg.get_unixfrom()
+                if not ufrom:
+                    ufrom = 'From nobody ' + time.ctime(time.time())
+                self.write(ufrom + self._NL)
+            self._write(msg)
+        finally:
+            self.policy = old_gen_policy
+            msg.policy = old_msg_policy
 
     def clone(self, fp):
         """Clone this generator with the exact same options."""
-        return self.__class__(fp, self._mangle_from_, self._maxheaderlen)
+        return self.__class__(fp,
+                              self._mangle_from_,
+                              None, # Use policy setting, which we've adjusted
+                              policy=self.policy)
 
     #
     # Protected interface - undocumented ;/
@@ -146,10 +174,18 @@ class Generator:
         # necessary.
         oldfp = self._fp
         try:
+            self._munge_cte = None
             self._fp = sfp = self._new_buffer()
             self._dispatch(msg)
         finally:
             self._fp = oldfp
+            munge_cte = self._munge_cte
+            del self._munge_cte
+        # If we munged the cte, copy the message again and re-fix the CTE.
+        if munge_cte:
+            msg = deepcopy(msg)
+            msg.replace_header('content-transfer-encoding', munge_cte[0])
+            msg.replace_header('content-type', munge_cte[1])
         # Write the headers.  First we see if the message object wants to
         # handle that itself.  If not, we'll do it generically.
         meth = getattr(msg, '_write_headers', None)
@@ -180,16 +216,8 @@ class Generator:
     #
 
     def _write_headers(self, msg):
-        for h, v in msg.items():
-            self.write('%s: ' % h)
-            if isinstance(v, Header):
-                self.write(v.encode(
-                    maxlinelen=self._maxheaderlen, linesep=self._NL)+self._NL)
-            else:
-                # Header's got lots of smarts, so use it.
-                header = Header(v, maxlinelen=self._maxheaderlen,
-                                header_name=h)
-                self.write(header.encode(linesep=self._NL)+self._NL)
+        for h, v in msg.raw_items():
+            self.write(self.policy.fold(h, v))
         # A blank line always separates headers from body
         self.write(self._NL)
 
@@ -206,9 +234,14 @@ class Generator:
         if _has_surrogates(msg._payload):
             charset = msg.get_param('charset')
             if charset is not None:
+                # XXX: This copy stuff is an ugly hack to avoid modifying the
+                # existing message.
+                msg = deepcopy(msg)
                 del msg['content-transfer-encoding']
                 msg.set_payload(payload, charset)
                 payload = msg.get_payload()
+                self._munge_cte = (msg['content-transfer-encoding'],
+                                   msg['content-type'])
         if self._mangle_from_:
             payload = fcre.sub('>From ', payload)
         self._write_lines(payload)
@@ -266,9 +299,8 @@ class Generator:
             # body-part
             self._fp.write(body_part)
         # close-delimiter transport-padding
-        self.write(self._NL + '--' + boundary + '--')
+        self.write(self._NL + '--' + boundary + '--' + self._NL)
         if msg.epilogue is not None:
-            self.write(self._NL)
             if self._mangle_from_:
                 epilogue = fcre.sub('>From ', msg.epilogue)
             else:
@@ -279,12 +311,12 @@ class Generator:
         # The contents of signed parts has to stay unmodified in order to keep
         # the signature intact per RFC1847 2.1, so we disable header wrapping.
         # RDM: This isn't enough to completely preserve the part, but it helps.
-        old_maxheaderlen = self._maxheaderlen
+        p = self.policy
+        self.policy = p.clone(max_line_length=0)
         try:
-            self._maxheaderlen = 0
             self._handle_multipart(msg)
         finally:
-            self._maxheaderlen = old_maxheaderlen
+            self.policy = p
 
     def _handle_message_delivery_status(self, msg):
         # We can't just write the headers directly to self's file object
@@ -319,16 +351,18 @@ class Generator:
         # message/rfc822.  Such messages are generated by, for example,
         # Groupwise when forwarding unadorned messages.  (Issue 7970.)  So
         # in that case we just emit the string body.
-        payload = msg.get_payload()
+        payload = msg._payload
         if isinstance(payload, list):
             g.flatten(msg.get_payload(0), unixfrom=False, linesep=self._NL)
             payload = s.getvalue()
+        else:
+            payload = self._encode(payload)
         self._fp.write(payload)
 
     # This used to be a module level function; we use a classmethod for this
     # and _compile_re so we can continue to provide the module level function
     # for backward compatibility by doing
-    #   _make_boudary = Generator._make_boundary
+    #   _make_boundary = Generator._make_boundary
     # at the end of the module.  It *is* internal, so we could drop that...
     @classmethod
     def _make_boundary(cls, text=None):
@@ -358,7 +392,10 @@ class BytesGenerator(Generator):
 
     Functionally identical to the base Generator except that the output is
     bytes and not string.  When surrogates were used in the input to encode
-    bytes, these are decoded back to bytes for output.
+    bytes, these are decoded back to bytes for output.  If the policy has
+    cte_type set to 7bit, then the message is transformed such that the
+    non-ASCII bytes are properly content transfer encoded, using the charset
+    unknown-8bit.
 
     The outfp object must accept bytes in its write method.
     """
@@ -379,23 +416,8 @@ class BytesGenerator(Generator):
     def _write_headers(self, msg):
         # This is almost the same as the string version, except for handling
         # strings with 8bit bytes.
-        for h, v in msg._headers:
-            self.write('%s: ' % h)
-            if isinstance(v, Header):
-                self.write(v.encode(maxlinelen=self._maxheaderlen)+self._NL)
-            elif _has_surrogates(v):
-                # If we have raw 8bit data in a byte string, we have no idea
-                # what the encoding is.  There is no safe way to split this
-                # string.  If it's ascii-subset, then we could do a normal
-                # ascii split, but if it's multibyte then we could break the
-                # string.  There's no way to know so the least harm seems to
-                # be to not split the string and risk it being too long.
-                self.write(v+NL)
-            else:
-                # Header's got lots of smarts and this string is safe...
-                header = Header(v, maxlinelen=self._maxheaderlen,
-                                header_name=h)
-                self.write(header.encode(linesep=self._NL)+self._NL)
+        for h, v in msg.raw_items():
+            self._fp.write(self.policy.fold_binary(h, v))
         # A blank line always separates headers from body
         self.write(self._NL)
 
@@ -404,7 +426,7 @@ class BytesGenerator(Generator):
         # just write it back out.
         if msg._payload is None:
             return
-        if _has_surrogates(msg._payload):
+        if _has_surrogates(msg._payload) and not self.policy.cte_type=='7bit':
             if self._mangle_from_:
                 msg._payload = fcre.sub(">From ", msg._payload)
             self._write_lines(msg._payload)