#11731: simplify/enhance parser/generator API by introducing policy objects.

This new interface will also allow for future planned enhancements in control over the parser/generator without requiring any additional complexity in the parser/generator API. Patch reviewed by Éric Araujo and Barry Warsaw.
author: R David Murray <rdmurray@bitdance.com> 2011-04-18 17:59:37 (GMT)
committer: R David Murray <rdmurray@bitdance.com> 2011-04-18 17:59:37 (GMT)
commit: 3edd22ac950d3a2bcc1ad2e5a83554970aef3369 (patch)
tree: b4661afc1be45e0d072c1c83ab354b2362f05afb /Lib/email/generator.py
parent: ce16be91dc68597b0c5bfc7b4b1c5136fe5697a6 (diff)
download: cpython-3edd22ac950d3a2bcc1ad2e5a83554970aef3369.zip
cpython-3edd22ac950d3a2bcc1ad2e5a83554970aef3369.tar.gz
cpython-3edd22ac950d3a2bcc1ad2e5a83554970aef3369.tar.bz2
1 files changed, 37 insertions, 25 deletions
diff --git a/Lib/email/generator.py b/Lib/email/generator.py
index fdd34e4..d8b8fa9 100644
--- a/Lib/email/generator.py
+++ b/Lib/email/generator.py
@@ -13,8 +13,10 @@ import random
 import warnings
 
 from io import StringIO, BytesIO
+from email import policy
 from email.header import Header
 from email.message import _has_surrogates
+import email.charset as _charset
 
 UNDERSCORE = '_'
 NL = '\n'  # XXX: no longer used by the code below.
@@ -33,7 +35,8 @@ class Generator:
     # Public interface
     #
 
-    def __init__(self, outfp, mangle_from_=True, maxheaderlen=78):
+    def __init__(self, outfp, mangle_from_=True, maxheaderlen=None, *,
+                 policy=policy.default):
         """Create the generator for message flattening.
 
         outfp is the output file-like object for writing the message to.  It
@@ -49,16 +52,23 @@ class Generator:
         defined in the Header class.  Set maxheaderlen to zero to disable
         header wrapping.  The default is 78, as recommended (but not required)
         by RFC 2822.
+
+        The policy keyword specifies a policy object that controls a number of
+        aspects of the generator's operation.  The default policy maintains
+        backward compatibility.
+
         """
         self._fp = outfp
         self._mangle_from_ = mangle_from_
-        self._maxheaderlen = maxheaderlen
+        self._maxheaderlen = (maxheaderlen if maxheaderlen is not None else
+                                 policy.max_line_length)
+        self.policy = policy
 
     def write(self, s):
         # Just delegate to the file object
         self._fp.write(s)
 
-    def flatten(self, msg, unixfrom=False, linesep='\n'):
+    def flatten(self, msg, unixfrom=False, linesep=None):
         r"""Print the message object tree rooted at msg to the output file
         specified when the Generator instance was created.
 
@@ -70,17 +80,15 @@ class Generator:
         Note that for subobjects, no From_ line is printed.
 
         linesep specifies the characters used to indicate a new line in
-        the output.  The default value is the most useful for typical
-        Python applications, but it can be set to \r\n to produce RFC-compliant
-        line separators when needed.
+        the output.  The default value is determined by the policy.
 
         """
         # We use the _XXX constants for operating on data that comes directly
         # from the msg, and _encoded_XXX constants for operating on data that
         # has already been converted (to bytes in the BytesGenerator) and
         # inserted into a temporary buffer.
-        self._NL = linesep
-        self._encoded_NL = self._encode(linesep)
+        self._NL = linesep if linesep is not None else self.policy.linesep
+        self._encoded_NL = self._encode(self._NL)
         self._EMPTY = ''
         self._encoded_EMTPY = self._encode('')
         if unixfrom:
@@ -338,7 +346,10 @@ class BytesGenerator(Generator):
 
     Functionally identical to the base Generator except that the output is
     bytes and not string.  When surrogates were used in the input to encode
-    bytes, these are decoded back to bytes for output.
+    bytes, these are decoded back to bytes for output.  If the policy has
+    must_be_7bit set true, then the message is transformed such that the
+    non-ASCII bytes are properly content transfer encoded, using the
+    charset unknown-8bit.
 
     The outfp object must accept bytes in its write method.
     """
@@ -361,21 +372,22 @@ class BytesGenerator(Generator):
         # strings with 8bit bytes.
         for h, v in msg._headers:
             self.write('%s: ' % h)
-            if isinstance(v, Header):
-                self.write(v.encode(maxlinelen=self._maxheaderlen)+NL)
-            elif _has_surrogates(v):
-                # If we have raw 8bit data in a byte string, we have no idea
-                # what the encoding is.  There is no safe way to split this
-                # string.  If it's ascii-subset, then we could do a normal
-                # ascii split, but if it's multibyte then we could break the
-                # string.  There's no way to know so the least harm seems to
-                # be to not split the string and risk it being too long.
-                self.write(v+NL)
-            else:
-                # Header's got lots of smarts and this string is safe...
-                header = Header(v, maxlinelen=self._maxheaderlen,
-                                header_name=h)
-                self.write(header.encode(linesep=self._NL)+self._NL)
+            if isinstance(v, str):
+                if _has_surrogates(v):
+                    if not self.policy.must_be_7bit:
+                        # If we have raw 8bit data in a byte string, we have no idea
+                        # what the encoding is.  There is no safe way to split this
+                        # string.  If it's ascii-subset, then we could do a normal
+                        # ascii split, but if it's multibyte then we could break the
+                        # string.  There's no way to know so the least harm seems to
+                        # be to not split the string and risk it being too long.
+                        self.write(v+NL)
+                        continue
+                    h = Header(v, charset=_charset.UNKNOWN8BIT, header_name=h)
+                else:
+                    h = Header(v, header_name=h)
+            self.write(h.encode(linesep=self._NL,
+                                maxlinelen=self._maxheaderlen)+self._NL)
         # A blank line always separates headers from body
         self.write(self._NL)
 
@@ -384,7 +396,7 @@ class BytesGenerator(Generator):
         # just write it back out.
         if msg._payload is None:
             return
-        if _has_surrogates(msg._payload):
+        if _has_surrogates(msg._payload) and not self.policy.must_be_7bit:
             self.write(msg._payload)
         else:
             super(BytesGenerator,self)._handle_text(msg)
author	R David Murray <rdmurray@bitdance.com>	2011-04-18 17:59:37 (GMT)
committer	R David Murray <rdmurray@bitdance.com>	2011-04-18 17:59:37 (GMT)
commit	3edd22ac950d3a2bcc1ad2e5a83554970aef3369 (patch)
tree	b4661afc1be45e0d072c1c83ab354b2362f05afb /Lib/email/generator.py
parent	ce16be91dc68597b0c5bfc7b4b1c5136fe5697a6 (diff)
download	cpython-3edd22ac950d3a2bcc1ad2e5a83554970aef3369.zip cpython-3edd22ac950d3a2bcc1ad2e5a83554970aef3369.tar.gz cpython-3edd22ac950d3a2bcc1ad2e5a83554970aef3369.tar.bz2