diff options
Diffstat (limited to 'Lib/email/generator.py')
-rw-r--r-- | Lib/email/generator.py | 107 |
1 files changed, 58 insertions, 49 deletions
diff --git a/Lib/email/generator.py b/Lib/email/generator.py index ab37e94..24f2abf 100644 --- a/Lib/email/generator.py +++ b/Lib/email/generator.py @@ -13,8 +13,10 @@ import random import warnings from io import StringIO, BytesIO +from email._policybase import compat32 from email.header import Header -from email.message import _has_surrogates +from email.utils import _has_surrogates +import email.charset as _charset UNDERSCORE = '_' NL = '\n' # XXX: no longer used by the code below. @@ -33,7 +35,8 @@ class Generator: # Public interface # - def __init__(self, outfp, mangle_from_=True, maxheaderlen=78): + def __init__(self, outfp, mangle_from_=True, maxheaderlen=None, *, + policy=None): """Create the generator for message flattening. outfp is the output file-like object for writing the message to. It @@ -49,16 +52,22 @@ class Generator: defined in the Header class. Set maxheaderlen to zero to disable header wrapping. The default is 78, as recommended (but not required) by RFC 2822. + + The policy keyword specifies a policy object that controls a number of + aspects of the generator's operation. The default policy maintains + backward compatibility. + """ self._fp = outfp self._mangle_from_ = mangle_from_ - self._maxheaderlen = maxheaderlen + self.maxheaderlen = maxheaderlen + self.policy = policy def write(self, s): # Just delegate to the file object self._fp.write(s) - def flatten(self, msg, unixfrom=False, linesep='\n'): + def flatten(self, msg, unixfrom=False, linesep=None): r"""Print the message object tree rooted at msg to the output file specified when the Generator instance was created. @@ -70,29 +79,47 @@ class Generator: Note that for subobjects, no From_ line is printed. linesep specifies the characters used to indicate a new line in - the output. The default value is the most useful for typical - Python applications, but it can be set to \r\n to produce RFC-compliant - line separators when needed. + the output. The default value is determined by the policy. """ # We use the _XXX constants for operating on data that comes directly # from the msg, and _encoded_XXX constants for operating on data that # has already been converted (to bytes in the BytesGenerator) and # inserted into a temporary buffer. - self._NL = linesep - self._encoded_NL = self._encode(linesep) + policy = msg.policy if self.policy is None else self.policy + if linesep is not None: + policy = policy.clone(linesep=linesep) + if self.maxheaderlen is not None: + policy = policy.clone(max_line_length=self.maxheaderlen) + self._NL = policy.linesep + self._encoded_NL = self._encode(self._NL) self._EMPTY = '' self._encoded_EMTPY = self._encode('') - if unixfrom: - ufrom = msg.get_unixfrom() - if not ufrom: - ufrom = 'From nobody ' + time.ctime(time.time()) - self.write(ufrom + self._NL) - self._write(msg) + # Because we use clone (below) when we recursively process message + # subparts, and because clone uses the computed policy (not None), + # submessages will automatically get set to the computed policy when + # they are processed by this code. + old_gen_policy = self.policy + old_msg_policy = msg.policy + try: + self.policy = policy + msg.policy = policy + if unixfrom: + ufrom = msg.get_unixfrom() + if not ufrom: + ufrom = 'From nobody ' + time.ctime(time.time()) + self.write(ufrom + self._NL) + self._write(msg) + finally: + self.policy = old_gen_policy + msg.policy = old_msg_policy def clone(self, fp): """Clone this generator with the exact same options.""" - return self.__class__(fp, self._mangle_from_, self._maxheaderlen) + return self.__class__(fp, + self._mangle_from_, + None, # Use policy setting, which we've adjusted + policy=self.policy) # # Protected interface - undocumented ;/ @@ -180,16 +207,8 @@ class Generator: # def _write_headers(self, msg): - for h, v in msg.items(): - self.write('%s: ' % h) - if isinstance(v, Header): - self.write(v.encode( - maxlinelen=self._maxheaderlen, linesep=self._NL)+self._NL) - else: - # Header's got lots of smarts, so use it. - header = Header(v, maxlinelen=self._maxheaderlen, - header_name=h) - self.write(header.encode(linesep=self._NL)+self._NL) + for h, v in msg.raw_items(): + self.write(self.policy.fold(h, v)) # A blank line always separates headers from body self.write(self._NL) @@ -279,12 +298,12 @@ class Generator: # The contents of signed parts has to stay unmodified in order to keep # the signature intact per RFC1847 2.1, so we disable header wrapping. # RDM: This isn't enough to completely preserve the part, but it helps. - old_maxheaderlen = self._maxheaderlen + p = self.policy + self.policy = p.clone(max_line_length=0) try: - self._maxheaderlen = 0 self._handle_multipart(msg) finally: - self._maxheaderlen = old_maxheaderlen + self.policy = p def _handle_message_delivery_status(self, msg): # We can't just write the headers directly to self's file object @@ -319,10 +338,12 @@ class Generator: # message/rfc822. Such messages are generated by, for example, # Groupwise when forwarding unadorned messages. (Issue 7970.) So # in that case we just emit the string body. - payload = msg.get_payload() + payload = msg._payload if isinstance(payload, list): g.flatten(msg.get_payload(0), unixfrom=False, linesep=self._NL) payload = s.getvalue() + else: + payload = self._encode(payload) self._fp.write(payload) # This used to be a module level function; we use a classmethod for this @@ -358,7 +379,10 @@ class BytesGenerator(Generator): Functionally identical to the base Generator except that the output is bytes and not string. When surrogates were used in the input to encode - bytes, these are decoded back to bytes for output. + bytes, these are decoded back to bytes for output. If the policy has + cte_type set to 7bit, then the message is transformed such that the + non-ASCII bytes are properly content transfer encoded, using the charset + unknown-8bit. The outfp object must accept bytes in its write method. """ @@ -379,23 +403,8 @@ class BytesGenerator(Generator): def _write_headers(self, msg): # This is almost the same as the string version, except for handling # strings with 8bit bytes. - for h, v in msg._headers: - self.write('%s: ' % h) - if isinstance(v, Header): - self.write(v.encode(maxlinelen=self._maxheaderlen)+self._NL) - elif _has_surrogates(v): - # If we have raw 8bit data in a byte string, we have no idea - # what the encoding is. There is no safe way to split this - # string. If it's ascii-subset, then we could do a normal - # ascii split, but if it's multibyte then we could break the - # string. There's no way to know so the least harm seems to - # be to not split the string and risk it being too long. - self.write(v+NL) - else: - # Header's got lots of smarts and this string is safe... - header = Header(v, maxlinelen=self._maxheaderlen, - header_name=h) - self.write(header.encode(linesep=self._NL)+self._NL) + for h, v in msg.raw_items(): + self._fp.write(self.policy.fold_binary(h, v)) # A blank line always separates headers from body self.write(self._NL) @@ -404,7 +413,7 @@ class BytesGenerator(Generator): # just write it back out. if msg._payload is None: return - if _has_surrogates(msg._payload): + if _has_surrogates(msg._payload) and not self.policy.cte_type=='7bit': if self._mangle_from_: msg._payload = fcre.sub(">From ", msg._payload) self._write_lines(msg._payload) |