diff options
author | R David Murray <rdmurray@bitdance.com> | 2013-08-22 01:10:31 (GMT) |
---|---|---|
committer | R David Murray <rdmurray@bitdance.com> | 2013-08-22 01:10:31 (GMT) |
commit | 00ae435deef434f471e39bea3f3ab3a3e3cd90fe (patch) | |
tree | ec2589f471e6ee602b6bdf169c87db706ddd1a8c /Lib | |
parent | 0b169125caf768b2dce97537d10470b3fdee9ec9 (diff) | |
download | cpython-00ae435deef434f471e39bea3f3ab3a3e3cd90fe.zip cpython-00ae435deef434f471e39bea3f3ab3a3e3cd90fe.tar.gz cpython-00ae435deef434f471e39bea3f3ab3a3e3cd90fe.tar.bz2 |
#18324: set_payload now correctly handles binary input.
This also backs out the previous fixes for for #14360, #1717, and #16564.
Those bugs were actually caused by the fact that set_payload didn't decode to
str, thus rendering the model inconsistent. This fix does mean the data
processed by the encoder functions goes through an extra encode/decode cycle,
but it means the model is always consistent. Future API updates will provide
a better way to encode payloads, which will bypass this minor de-optimization.
Tests by Vajrasky Kok.
Diffstat (limited to 'Lib')
-rw-r--r-- | Lib/email/encoders.py | 20 | ||||
-rw-r--r-- | Lib/email/message.py | 2 | ||||
-rw-r--r-- | Lib/test/test_email/test_email.py | 36 |
3 files changed, 42 insertions, 16 deletions
diff --git a/Lib/email/encoders.py b/Lib/email/encoders.py index a0d062a..f9657f0 100644 --- a/Lib/email/encoders.py +++ b/Lib/email/encoders.py @@ -28,7 +28,7 @@ def encode_base64(msg): Also, add an appropriate Content-Transfer-Encoding header. """ - orig = msg.get_payload() + orig = msg.get_payload(decode=True) encdata = str(_bencode(orig), 'ascii') msg.set_payload(encdata) msg['Content-Transfer-Encoding'] = 'base64' @@ -40,20 +40,16 @@ def encode_quopri(msg): Also, add an appropriate Content-Transfer-Encoding header. """ - orig = msg.get_payload() - if isinstance(orig, str): - # If it is a string, the model data may have binary data encoded in via - # surrogateescape. Convert back to bytes so we can CTE encode it. - orig = orig.encode('ascii', 'surrogateescape') + orig = msg.get_payload(decode=True) encdata = _qencode(orig) - msg.set_payload(encdata.decode('ascii', 'surrogateescape')) + msg.set_payload(encdata) msg['Content-Transfer-Encoding'] = 'quoted-printable' def encode_7or8bit(msg): """Set the Content-Transfer-Encoding header to 7bit or 8bit.""" - orig = msg.get_payload() + orig = msg.get_payload(decode=True) if orig is None: # There's no payload. For backwards compatibility we use 7bit msg['Content-Transfer-Encoding'] = '7bit' @@ -75,16 +71,8 @@ def encode_7or8bit(msg): msg['Content-Transfer-Encoding'] = '8bit' else: msg['Content-Transfer-Encoding'] = '7bit' - if not isinstance(orig, str): - msg.set_payload(orig.decode('ascii', 'surrogateescape')) def encode_noop(msg): """Do nothing.""" - # Well, not quite *nothing*: in Python3 we have to turn bytes into a string - # in our internal surrogateescaped form in order to keep the model - # consistent. - orig = msg.get_payload() - if not isinstance(orig, str): - msg.set_payload(orig.decode('ascii', 'surrogateescape')) diff --git a/Lib/email/message.py b/Lib/email/message.py index 3feab52..5020a03 100644 --- a/Lib/email/message.py +++ b/Lib/email/message.py @@ -275,6 +275,8 @@ class Message: Optional charset sets the message's default character set. See set_charset() for details. """ + if isinstance(payload, bytes): + payload = payload.decode('ascii', 'surrogateescape') self._payload = payload if charset is not None: self.set_charset(charset) diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py index 56be794..e11194b 100644 --- a/Lib/test/test_email/test_email.py +++ b/Lib/test/test_email/test_email.py @@ -593,6 +593,42 @@ class TestMessageAPI(TestEmailBase): "attachment; filename*=utf-8''Fu%C3%9Fballer%20%5Bfilename%5D.ppt", msg['Content-Disposition']) + def test_binary_quopri_payload(self): + for charset in ('latin-1', 'ascii'): + msg = Message() + msg['content-type'] = 'text/plain; charset=%s' % charset + msg['content-transfer-encoding'] = 'quoted-printable' + msg.set_payload(b'foo=e6=96=87bar') + self.assertEqual( + msg.get_payload(decode=True), + b'foo\xe6\x96\x87bar', + 'get_payload returns wrong result with charset %s.' % charset) + + def test_binary_base64_payload(self): + for charset in ('latin-1', 'ascii'): + msg = Message() + msg['content-type'] = 'text/plain; charset=%s' % charset + msg['content-transfer-encoding'] = 'base64' + msg.set_payload(b'Zm9v5paHYmFy') + self.assertEqual( + msg.get_payload(decode=True), + b'foo\xe6\x96\x87bar', + 'get_payload returns wrong result with charset %s.' % charset) + + def test_binary_uuencode_payload(self): + for charset in ('latin-1', 'ascii'): + for encoding in ('x-uuencode', 'uuencode', 'uue', 'x-uue'): + msg = Message() + msg['content-type'] = 'text/plain; charset=%s' % charset + msg['content-transfer-encoding'] = encoding + msg.set_payload(b"begin 666 -\n)9F]OYI:'8F%R\n \nend\n") + self.assertEqual( + msg.get_payload(decode=True), + b'foo\xe6\x96\x87bar', + str(('get_payload returns wrong result ', + 'with charset {0} and encoding {1}.')).\ + format(charset, encoding)) + def test_add_header_with_name_only_param(self): msg = Message() msg.add_header('Content-Disposition', 'inline', foo_bar=None) |