summaryrefslogtreecommitdiffstats
path: root/Lib/email
diff options
context:
space:
mode:
authorR. David Murray <rdmurray@bitdance.com>2010-10-23 22:19:56 (GMT)
committerR. David Murray <rdmurray@bitdance.com>2010-10-23 22:19:56 (GMT)
commit8451c4b6e044f83efc2298a79af58c3e56d946a2 (patch)
treeedaad1a89627de27ad30b465b7a416c468850653 /Lib/email
parent29aad0005dd56634363dabd74cf6708c9a255b43 (diff)
downloadcpython-8451c4b6e044f83efc2298a79af58c3e56d946a2.zip
cpython-8451c4b6e044f83efc2298a79af58c3e56d946a2.tar.gz
cpython-8451c4b6e044f83efc2298a79af58c3e56d946a2.tar.bz2
#1349106: add linesep argument to generator.flatten and header.encode.
Diffstat (limited to 'Lib/email')
-rw-r--r--Lib/email/generator.py74
-rw-r--r--Lib/email/header.py16
-rw-r--r--Lib/email/test/data/msg_26.txt3
-rw-r--r--Lib/email/test/test_email.py24
4 files changed, 80 insertions, 37 deletions
diff --git a/Lib/email/generator.py b/Lib/email/generator.py
index 40b95c4..05019d9 100644
--- a/Lib/email/generator.py
+++ b/Lib/email/generator.py
@@ -17,7 +17,7 @@ from email.header import Header
from email.message import _has_surrogates
UNDERSCORE = '_'
-NL = '\n'
+NL = '\n' # XXX: no longer used by the code below.
fcre = re.compile(r'^From ', re.MULTILINE)
@@ -58,7 +58,7 @@ class Generator:
# Just delegate to the file object
self._fp.write(s)
- def flatten(self, msg, unixfrom=False):
+ def flatten(self, msg, unixfrom=False, linesep='\n'):
"""Print the message object tree rooted at msg to the output file
specified when the Generator instance was created.
@@ -68,12 +68,23 @@ class Generator:
is False to inhibit the printing of any From_ delimiter.
Note that for subobjects, no From_ line is printed.
+
+ linesep specifies the characters used to indicate a new line in
+ the output.
"""
+ # We use the _XXX constants for operating on data that comes directly
+ # from the msg, and _encoded_XXX constants for operating on data that
+ # has already been converted (to bytes in the BytesGenerator) and
+ # inserted into a temporary buffer.
+ self._NL = linesep
+ self._encoded_NL = self._encode(linesep)
+ self._EMPTY = ''
+ self._encoded_EMTPY = self._encode('')
if unixfrom:
ufrom = msg.get_unixfrom()
if not ufrom:
ufrom = 'From nobody ' + time.ctime(time.time())
- self.write(ufrom + NL)
+ self.write(ufrom + self._NL)
self._write(msg)
def clone(self, fp):
@@ -93,20 +104,18 @@ class Generator:
# it has already transformed the input; but, since this whole thing is a
# hack anyway this seems good enough.
- # We use these class constants when we need to manipulate data that has
- # already been written to a buffer (ex: constructing a re to check the
- # boundary), and the module level NL constant when adding new output to a
- # buffer via self.write, because 'write' always takes strings.
- # Having write always take strings makes the code simpler, but there are
- # a few occasions when we need to write previously created data back
- # to the buffer or to a new buffer; for those cases we use self._fp.write.
- _NL = NL
- _EMPTY = ''
+ # Similarly, we have _XXX and _encoded_XXX attributes that are used on
+ # source and buffer data, respectively.
+ _encoded_EMPTY = ''
def _new_buffer(self):
# BytesGenerator overrides this to return BytesIO.
return StringIO()
+ def _encode(self, s):
+ # BytesGenerator overrides this to encode strings to bytes.
+ return s
+
def _write(self, msg):
# We can't write the headers yet because of the following scenario:
# say a multipart message includes the boundary string somewhere in
@@ -158,14 +167,15 @@ class Generator:
for h, v in msg.items():
self.write('%s: ' % h)
if isinstance(v, Header):
- self.write(v.encode(maxlinelen=self._maxheaderlen)+NL)
+ self.write(v.encode(
+ maxlinelen=self._maxheaderlen, linesep=self._NL)+self._NL)
else:
# Header's got lots of smarts, so use it.
header = Header(v, maxlinelen=self._maxheaderlen,
header_name=h)
- self.write(header.encode()+NL)
+ self.write(header.encode(linesep=self._NL)+self._NL)
# A blank line always separates headers from body
- self.write(NL)
+ self.write(self._NL)
#
# Handlers for writing types and subtypes
@@ -208,11 +218,11 @@ class Generator:
for part in subparts:
s = self._new_buffer()
g = self.clone(s)
- g.flatten(part, unixfrom=False)
+ g.flatten(part, unixfrom=False, linesep=self._NL)
msgtexts.append(s.getvalue())
# Now make sure the boundary we've selected doesn't appear in any of
# the message texts.
- alltext = self._NL.join(msgtexts)
+ alltext = self._encoded_NL.join(msgtexts)
# BAW: What about boundaries that are wrapped in double-quotes?
boundary = msg.get_boundary(failobj=self._make_boundary(alltext))
# If we had to calculate a new boundary because the body text
@@ -225,9 +235,9 @@ class Generator:
msg.set_boundary(boundary)
# If there's a preamble, write it out, with a trailing CRLF
if msg.preamble is not None:
- self.write(msg.preamble + NL)
+ self.write(msg.preamble + self._NL)
# dash-boundary transport-padding CRLF
- self.write('--' + boundary + NL)
+ self.write('--' + boundary + self._NL)
# body-part
if msgtexts:
self._fp.write(msgtexts.pop(0))
@@ -236,13 +246,13 @@ class Generator:
# --> CRLF body-part
for body_part in msgtexts:
# delimiter transport-padding CRLF
- self.write('\n--' + boundary + NL)
+ self.write(self._NL + '--' + boundary + self._NL)
# body-part
self._fp.write(body_part)
# close-delimiter transport-padding
- self.write('\n--' + boundary + '--')
+ self.write(self._NL + '--' + boundary + '--')
if msg.epilogue is not None:
- self.write(NL)
+ self.write(self._NL)
self.write(msg.epilogue)
def _handle_multipart_signed(self, msg):
@@ -266,16 +276,16 @@ class Generator:
g = self.clone(s)
g.flatten(part, unixfrom=False)
text = s.getvalue()
- lines = text.split(self._NL)
+ lines = text.split(self._encoded_NL)
# Strip off the unnecessary trailing empty line
- if lines and lines[-1] == self._EMPTY:
- blocks.append(self._NL.join(lines[:-1]))
+ if lines and lines[-1] == self._encoded_EMPTY:
+ blocks.append(self._encoded_NL.join(lines[:-1]))
else:
blocks.append(text)
# Now join all the blocks with an empty line. This has the lovely
# effect of separating each block with an empty line, but not adding
# an extra one after the last one.
- self._fp.write(self._NL.join(blocks))
+ self._fp.write(self._encoded_NL.join(blocks))
def _handle_message(self, msg):
s = self._new_buffer()
@@ -333,10 +343,9 @@ class BytesGenerator(Generator):
The outfp object must accept bytes in its write method.
"""
- # Bytes versions of these constants for use in manipulating data from
+ # Bytes versions of this constant for use in manipulating data from
# the BytesIO buffer.
- _NL = NL.encode('ascii')
- _EMPTY = b''
+ _encoded_EMPTY = b''
def write(self, s):
self._fp.write(s.encode('ascii', 'surrogateescape'))
@@ -344,6 +353,9 @@ class BytesGenerator(Generator):
def _new_buffer(self):
return BytesIO()
+ def _encode(self, s):
+ return s.encode('ascii')
+
def _write_headers(self, msg):
# This is almost the same as the string version, except for handling
# strings with 8bit bytes.
@@ -363,9 +375,9 @@ class BytesGenerator(Generator):
# Header's got lots of smarts and this string is safe...
header = Header(v, maxlinelen=self._maxheaderlen,
header_name=h)
- self.write(header.encode()+NL)
+ self.write(header.encode(linesep=self._NL)+self._NL)
# A blank line always separates headers from body
- self.write(NL)
+ self.write(self._NL)
def _handle_text(self, msg):
# If the string has surrogates the original source was bytes, so
diff --git a/Lib/email/header.py b/Lib/email/header.py
index 89c1391..88fa80f 100644
--- a/Lib/email/header.py
+++ b/Lib/email/header.py
@@ -272,7 +272,7 @@ class Header:
output_string = input_bytes.decode(output_charset, errors)
self._chunks.append((output_string, charset))
- def encode(self, splitchars=';, \t', maxlinelen=None):
+ def encode(self, splitchars=';, \t', maxlinelen=None, linesep='\n'):
"""Encode a message header into an RFC-compliant format.
There are many issues involved in converting a given string for use in
@@ -293,6 +293,11 @@ class Header:
Optional splitchars is a string containing characters to split long
ASCII lines on, in rough support of RFC 2822's `highest level
syntactic breaks'. This doesn't affect RFC 2047 encoded lines.
+
+ Optional linesep is a string to be used to separate the lines of
+ the value. The default value is the most useful for typical
+ Python applications, but it can be set to \r\n to produce RFC-compliant
+ line separators when needed.
"""
self._normalize()
if maxlinelen is None:
@@ -311,7 +316,7 @@ class Header:
if len(lines) > 1:
formatter.newline()
formatter.add_transition()
- return str(formatter)
+ return formatter._str(linesep)
def _normalize(self):
# Step 1: Normalize the chunks so that all runs of identical charsets
@@ -342,9 +347,12 @@ class _ValueFormatter:
self._lines = []
self._current_line = _Accumulator(headerlen)
- def __str__(self):
+ def _str(self, linesep):
self.newline()
- return NL.join(self._lines)
+ return linesep.join(self._lines)
+
+ def __str__(self):
+ return self._str(NL)
def newline(self):
end_of_line = self._current_line.pop()
diff --git a/Lib/email/test/data/msg_26.txt b/Lib/email/test/data/msg_26.txt
index 6c71bce..58efaa9 100644
--- a/Lib/email/test/data/msg_26.txt
+++ b/Lib/email/test/data/msg_26.txt
@@ -24,7 +24,8 @@ Simple email with attachment.
--1618492860--2051301190--113853680
-Content-Type: application/riscos; name="clock.bmp,69c"; type=BMP; load=&fff69c4b; exec=&355dd4d1; access=&03
+Content-Type: application/riscos; name="clock.bmp,69c"; type=BMP;
+ load=&fff69c4b; exec=&355dd4d1; access=&03
Content-Disposition: attachment; filename="clock.bmp"
Content-Transfer-Encoding: base64
diff --git a/Lib/email/test/test_email.py b/Lib/email/test/test_email.py
index e5e51c6..f40d770 100644
--- a/Lib/email/test/test_email.py
+++ b/Lib/email/test/test_email.py
@@ -77,7 +77,7 @@ class TestMessageAPI(TestEmailBase):
eq(msg.get_all('cc'), ['ccc@zzz.org', 'ddd@zzz.org', 'eee@zzz.org'])
eq(msg.get_all('xx', 'n/a'), 'n/a')
- def test_getset_charset(self):
+ def TEst_getset_charset(self):
eq = self.assertEqual
msg = Message()
eq(msg.get_charset(), None)
@@ -2600,6 +2600,18 @@ Here's the message body
part2 = msg.get_payload(1)
eq(part2.get_content_type(), 'application/riscos')
+ def test_crlf_flatten(self):
+ # Using newline='\n' preserves the crlfs in this input file.
+ with openfile('msg_26.txt', newline='\n') as fp:
+ text = fp.read()
+ msg = email.message_from_string(text)
+ s = StringIO()
+ g = Generator(s)
+ g.flatten(msg, linesep='\r\n')
+ self.assertEqual(s.getvalue(), text)
+
+ maxDiff = None
+
def test_multipart_digest_with_extra_mime_headers(self):
eq = self.assertEqual
neq = self.ndiffAssertEqual
@@ -2931,6 +2943,16 @@ class Test8BitBytesHandling(unittest.TestCase):
m = bfp.close()
self.assertEqual(str(m), self.latin_bin_msg_as7bit)
+ def test_crlf_flatten(self):
+ with openfile('msg_26.txt', 'rb') as fp:
+ text = fp.read()
+ msg = email.message_from_bytes(text)
+ s = BytesIO()
+ g = email.generator.BytesGenerator(s)
+ g.flatten(msg, linesep='\r\n')
+ self.assertEqual(s.getvalue(), text)
+ maxDiff = None
+
class TestBytesGeneratorIdempotent(TestIdempotent):