From bb17d2b857d33e474f830c4a665f1fa412eb29f1 Mon Sep 17 00:00:00 2001 From: R David Murray Date: Fri, 9 Aug 2013 16:15:28 -0400 Subject: #18600: add policy to add_string, and as_bytes and __bytes__ methods. This was triggered by wanting to make the doctest in email.policy.rst pass; as_bytes and __bytes__ are clearly useful now that we have BytesGenerator. Also updated the Message docs to document the policy keyword that was added in 3.3. --- Doc/library/email.message.rst | 78 ++++++++++++++++++++++++++++++++++----- Doc/library/email.policy.rst | 3 +- Doc/whatsnew/3.4.rst | 20 ++++++++++ Lib/email/message.py | 44 ++++++++++++++++++---- Lib/test/test_email/test_email.py | 33 +++++++++++++++-- Misc/NEWS | 3 ++ 6 files changed, 160 insertions(+), 21 deletions(-) diff --git a/Doc/library/email.message.rst b/Doc/library/email.message.rst index 2c7be8b..7a74220 100644 --- a/Doc/library/email.message.rst +++ b/Doc/library/email.message.rst @@ -31,19 +31,32 @@ parameters, and for recursively walking over the object tree. Here are the methods of the :class:`Message` class: -.. class:: Message() +.. class:: Message(policy=compat32) - The constructor takes no arguments. + The *policy* argument determiens the :mod:`~email.policy` that will be used + to update the message model. The default value, :class:`compat32 + ` maintains backward compatibility with the + Python 3.2 version of the email package. For more information see the + :mod:`~email.policy` documentation. + .. versionchanged:: 3.3 The *policy* keyword argument was added. - .. method:: as_string(unixfrom=False, maxheaderlen=0) + + .. method:: as_string(unixfrom=False, maxheaderlen=0, policy=None) Return the entire message flattened as a string. When optional *unixfrom* - is ``True``, the envelope header is included in the returned string. - *unixfrom* defaults to ``False``. Flattening the message may trigger - changes to the :class:`Message` if defaults need to be filled in to - complete the transformation to a string (for example, MIME boundaries may - be generated or modified). + is true, the envelope header is included in the returned string. + *unixfrom* defaults to ``False``. For backward compabitility reasons, + *maxheaderlen* defaults to ``0``, so if you want a different value you + must override it explicitly (the value specified for *max_line_length* in + the policy will be ignored by this method). The *policy* argument may be + used to override the default policy obtained from the message instance. + This can be used to control some of the formatting produced by the + method, since the specified *policy* will be passed to the ``Generator``. + + Flattening the message may trigger changes to the :class:`Message` if + defaults need to be filled in to complete the transformation to a string + (for example, MIME boundaries may be generated or modified). Note that this method is provided as a convenience and may not always format the message the way you want. For example, by default it does @@ -59,10 +72,57 @@ Here are the methods of the :class:`Message` class: g.flatten(msg) text = fp.getvalue() + If the message object contains binary data that is not encoded according + to RFC standards, the non-compliant data will be replaced by unicode + "unknown character" code points. (See also :meth:`.as_bytes` and + :class:`~email.generator.BytesGenerator`.) + + .. versionchanged:: 3.4 the *policy* keyword argument was added. + .. method:: __str__() - Equivalent to ``as_string(unixfrom=True)``. + Equivalent to :meth:`.as_string()`. Allows ``str(msg)`` to produce a + string containing the formatted message. + + + .. method:: as_bytes(unixfrom=False, policy=None) + + Return the entire message flattened as a bytes object. When optional + *unixfrom* is true, the envelope header is included in the returned + string. *unixfrom* defaults to ``False``. The *policy* argument may be + used to override the default policy obtained from the message instance. + This can be used to control some of the formatting produced by the + method, since the specified *policy* will be passed to the + ``BytesGenerator``. + + Flattening the message may trigger changes to the :class:`Message` if + defaults need to be filled in to complete the transformation to a string + (for example, MIME boundaries may be generated or modified). + + Note that this method is provided as a convenience and may not always + format the message the way you want. For example, by default it does + not do the mangling of lines that begin with ``From`` that is + required by the unix mbox format. For more flexibility, instantiate a + :class:`~email.generator.BytesGenerator` instance and use its + :meth:`flatten` method directly. For example:: + + from io import BytesIO + from email.generator import BytesGenerator + fp = BytesIO() + g = BytesGenerator(fp, mangle_from_=True, maxheaderlen=60) + g.flatten(msg) + text = fp.getvalue() + + .. versionadded:: 3.4 + + + .. method:: __bytes__() + + Equivalent to :meth:`.as_bytes()`. Allows ``bytes(msg)`` to produce a + bytes object containing the formatted message. + + .. versionadded:: 3.4 .. method:: is_multipart() diff --git a/Doc/library/email.policy.rst b/Doc/library/email.policy.rst index 54ebb1c..93c5350 100644 --- a/Doc/library/email.policy.rst +++ b/Doc/library/email.policy.rst @@ -105,7 +105,8 @@ separators for the platform on which it is running:: >>> import os >>> with open('converted.txt', 'wb') as f: - ... f.write(msg.as_string(policy=msg.policy.clone(linesep=os.linesep))) + ... f.write(msg.as_bytes(policy=msg.policy.clone(linesep=os.linesep))) + 17 Policy objects can also be combined using the addition operator, producing a policy object whose settings are a combination of the non-default values of the diff --git a/Doc/whatsnew/3.4.rst b/Doc/whatsnew/3.4.rst index 80b4b01..6011f8e 100644 --- a/Doc/whatsnew/3.4.rst +++ b/Doc/whatsnew/3.4.rst @@ -195,6 +195,26 @@ The :meth:`~aifc.getparams` method now returns a namedtuple rather than a plain tuple. (Contributed by Claudiu Popa in :issue:`17818`.) +email +----- + +:meth:`~email.message.Message.as_string` now accepts a *policy* argument to +override the default policy of the message when generating a string +representation of it. This means that ``as_string`` can now be used in more +circumstances, instead of having to create and use a :mod:`~email.generator` in +order to pass formatting parameters to its ``flatten`` method. + +New method :meth:`~email.message.Message.as_bytes` added to produce a bytes +representation of the message in a fashion similar to how ``as_string`` +produces a string representation. It does not accept the *maxheaderlen* +argument, but does accept the *unixfrom* and *policy* arguments. The +:class:`~email.message.Message` :meth:`~email.message.Message.__bytes__` method +calls it, meaning that ``bytes(mymsg)`` will now produce the intuitive +result: a bytes object containing the fully formatted message. + +(Contributed by R. David Murray in :issue:`18600`.) + + functools --------- diff --git a/Lib/email/message.py b/Lib/email/message.py index 3feab52..b5f7b3a 100644 --- a/Lib/email/message.py +++ b/Lib/email/message.py @@ -132,22 +132,50 @@ class Message: def __str__(self): """Return the entire formatted message as a string. - This includes the headers, body, and envelope header. """ return self.as_string() - def as_string(self, unixfrom=False, maxheaderlen=0): + def as_string(self, unixfrom=False, maxheaderlen=0, policy=None): """Return the entire formatted message as a string. - Optional `unixfrom' when True, means include the Unix From_ envelope - header. - This is a convenience method and may not generate the message exactly - as you intend. For more flexibility, use the flatten() method of a - Generator instance. + Optional 'unixfrom', when true, means include the Unix From_ envelope + header. For backward compatibility reasons, if maxheaderlen is + not specified it defaults to 0, so you must override it explicitly + if you want a different maxheaderlen. 'policy' is passed to the + Generator instance used to serialize the mesasge; if it is not + specified the policy associated with the message instance is used. + + If the message object contains binary data that is not encoded + according to RFC standards, the non-compliant data will be replaced by + unicode "unknown character" code points. """ from email.generator import Generator + policy = self.policy if policy is None else policy fp = StringIO() - g = Generator(fp, mangle_from_=False, maxheaderlen=maxheaderlen) + g = Generator(fp, + mangle_from_=False, + maxheaderlen=maxheaderlen, + policy=policy) + g.flatten(self, unixfrom=unixfrom) + return fp.getvalue() + + def __bytes__(self): + """Return the entire formatted message as a bytes object. + """ + return self.as_bytes() + + def as_bytes(self, unixfrom=False, policy=None): + """Return the entire formatted message as a bytes object. + + Optional 'unixfrom', when true, means include the Unix From_ envelope + header. 'policy' is passed to the BytesGenerator instance used to + serialize the message; if not specified the policy associated with + the message instance is used. + """ + from email.generator import BytesGenerator + policy = self.policy if policy is None else policy + fp = BytesIO() + g = BytesGenerator(fp, mangle_from_=False, policy=policy) g.flatten(self, unixfrom=unixfrom) return fp.getvalue() diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py index 4c82952..3507b1e 100644 --- a/Lib/test/test_email/test_email.py +++ b/Lib/test/test_email/test_email.py @@ -249,15 +249,42 @@ class TestMessageAPI(TestEmailBase): self.assertTrue('TO' in msg) def test_as_string(self): - eq = self.ndiffAssertEqual msg = self._msgobj('msg_01.txt') with openfile('msg_01.txt') as fp: text = fp.read() - eq(text, str(msg)) + self.assertEqual(text, str(msg)) fullrepr = msg.as_string(unixfrom=True) lines = fullrepr.split('\n') self.assertTrue(lines[0].startswith('From ')) - eq(text, NL.join(lines[1:])) + self.assertEqual(text, NL.join(lines[1:])) + + def test_as_string_policy(self): + msg = self._msgobj('msg_01.txt') + newpolicy = msg.policy.clone(linesep='\r\n') + fullrepr = msg.as_string(policy=newpolicy) + s = StringIO() + g = Generator(s, policy=newpolicy) + g.flatten(msg) + self.assertEqual(fullrepr, s.getvalue()) + + def test_as_bytes(self): + msg = self._msgobj('msg_01.txt') + with openfile('msg_01.txt', 'rb') as fp: + data = fp.read() + self.assertEqual(data, bytes(msg)) + fullrepr = msg.as_bytes(unixfrom=True) + lines = fullrepr.split(b'\n') + self.assertTrue(lines[0].startswith(b'From ')) + self.assertEqual(data, b'\n'.join(lines[1:])) + + def test_as_bytes_policy(self): + msg = self._msgobj('msg_01.txt') + newpolicy = msg.policy.clone(linesep='\r\n') + fullrepr = msg.as_bytes(policy=newpolicy) + s = BytesIO() + g = BytesGenerator(s,policy=newpolicy) + g.flatten(msg) + self.assertEqual(fullrepr, s.getvalue()) # test_headerregistry.TestContentTypeHeader.bad_params def test_bad_param(self): diff --git a/Misc/NEWS b/Misc/NEWS index 261402e..b0e392f 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -22,6 +22,9 @@ Core and Builtins Library ------- +- Issue #18600: Added policy argument to email.message.Message.as_string, + and as_bytes and __bytes__ methods to Message. + - Issue #18671: Output more information when logging exceptions occur. - Issue #18621: Prevent the site module's patched builtins from keeping -- cgit v0.12