From f1046ca8173380e2c320c56e1cdc911493371057 Mon Sep 17 00:00:00 2001 From: Florent Xicluna Date: Tue, 27 Jul 2010 21:20:15 +0000 Subject: Issue #4770: Restrict binascii module to accept only bytes (as specified). And fix the email package to encode to ASCII instead of ``raw-unicode-escape`` before ASCII-to-binary decoding. --- Doc/library/binascii.rst | 15 ++++++++++++++- Lib/email/base64mime.py | 4 ++-- Lib/email/message.py | 6 ++++-- Lib/test/test_binascii.py | 26 +++++++++++++------------- Lib/test/test_struct.py | 1 + Misc/NEWS | 4 ++++ Modules/binascii.c | 8 ++++---- 7 files changed, 42 insertions(+), 22 deletions(-) diff --git a/Doc/library/binascii.rst b/Doc/library/binascii.rst index 2f7851a..a34c843 100644 --- a/Doc/library/binascii.rst +++ b/Doc/library/binascii.rst @@ -18,6 +18,11 @@ use these functions directly but use wrapper modules like :mod:`uu`, low-level functions written in C for greater speed that are used by the higher-level modules. +.. note:: + + Encoding and decoding functions do not accept Unicode strings. Only bytestring + and bytearray objects can be processed. + The :mod:`binascii` module defines the following functions: @@ -54,6 +59,9 @@ The :mod:`binascii` module defines the following functions: data. More than one line may be passed at a time. If the optional argument *header* is present and true, underscores will be decoded as spaces. + .. versionchanged:: 3.2 + accept only bytestring or bytearray object as input. + .. function:: b2a_qp(data, quotetabs=False, istext=True, header=False) @@ -83,6 +91,9 @@ The :mod:`binascii` module defines the following functions: decompressed data, unless data input data ends in an orphaned repeat indicator, in which case the :exc:`Incomplete` exception is raised. + .. versionchanged:: 3.2 + accept only bytestring or bytearray object as input. + .. function:: rlecode_hqx(data) @@ -139,6 +150,9 @@ The :mod:`binascii` module defines the following functions: of hexadecimal digits (which can be upper or lower case), otherwise a :exc:`TypeError` is raised. + .. versionchanged:: 3.2 + accept only bytestring or bytearray object as input. + .. exception:: Error @@ -164,4 +178,3 @@ The :mod:`binascii` module defines the following functions: Module :mod:`quopri` Support for quoted-printable encoding used in MIME email messages. - diff --git a/Lib/email/base64mime.py b/Lib/email/base64mime.py index 6cbfdf6..f3bbac1 100644 --- a/Lib/email/base64mime.py +++ b/Lib/email/base64mime.py @@ -74,12 +74,12 @@ def header_encode(header_bytes, charset='iso-8859-1'): def body_encode(s, maxlinelen=76, eol=NL): - """Encode a string with base64. + r"""Encode a string with base64. Each line will be wrapped at, at most, maxlinelen characters (defaults to 76 characters). - Each line of encoded text will end with eol, which defaults to "\\n". Set + Each line of encoded text will end with eol, which defaults to "\n". Set this to "\r\n" if you will be using the result of this function directly in an email. """ diff --git a/Lib/email/message.py b/Lib/email/message.py index 27a577d..520d63d 100644 --- a/Lib/email/message.py +++ b/Lib/email/message.py @@ -198,17 +198,19 @@ class Message: return None cte = self.get('content-transfer-encoding', '').lower() if cte == 'quoted-printable': + if isinstance(payload, str): + payload = payload.encode('ascii') return utils._qdecode(payload) elif cte == 'base64': try: if isinstance(payload, str): - payload = payload.encode('raw-unicode-escape') + payload = payload.encode('ascii') return base64.b64decode(payload) except binascii.Error: # Incorrect padding pass elif cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'): - in_file = BytesIO(payload.encode('raw-unicode-escape')) + in_file = BytesIO(payload.encode('ascii')) out_file = BytesIO() try: uu.decode(in_file, out_file, quiet=True) diff --git a/Lib/test/test_binascii.py b/Lib/test/test_binascii.py index 3c6d88f..1e9e888 100644 --- a/Lib/test/test_binascii.py +++ b/Lib/test/test_binascii.py @@ -55,7 +55,7 @@ class BinASCIITest(unittest.TestCase): "{!r} != {!r}".format(fb, fa, res, raw)) self.assertIsInstance(res, bytes) self.assertIsInstance(a, bytes) - self.assertLess(max(c for c in a), 128) + self.assertLess(max(a), 128) self.assertIsInstance(binascii.crc_hqx(raw, 0), int) self.assertIsInstance(binascii.crc32(raw), int) @@ -167,7 +167,7 @@ class BinASCIITest(unittest.TestCase): def test_qp(self): # A test for SF bug 534347 (segfaults without the proper fix) try: - binascii.a2b_qp("", **{1:1}) + binascii.a2b_qp(b"", **{1:1}) except TypeError: pass else: @@ -179,12 +179,10 @@ class BinASCIITest(unittest.TestCase): self.assertEqual(binascii.a2b_qp(b"=00\r\n=00"), b"\x00\r\n\x00") self.assertEqual( binascii.b2a_qp(b"\xff\r\n\xff\n\xff"), - b"=FF\r\n=FF\r\n=FF" - ) + b"=FF\r\n=FF\r\n=FF") self.assertEqual( binascii.b2a_qp(b"0"*75+b"\xff\r\n\xff\r\n\xff"), - b"0"*75+b"=\r\n=FF\r\n=FF\r\n=FF" - ) + b"0"*75+b"=\r\n=FF\r\n=FF\r\n=FF") self.assertEqual(binascii.b2a_qp(b'\0\n'), b'=00\n') self.assertEqual(binascii.b2a_qp(b'\0\n', quotetabs=True), b'=00\n') @@ -210,13 +208,15 @@ class BinASCIITest(unittest.TestCase): except Exception as err: self.fail("{}({!r}) raises {!r}".format(func, empty, err)) - def test_no_binary_strings(self): - # b2a_ must not accept strings - for f in (binascii.b2a_uu, binascii.b2a_base64, - binascii.b2a_hqx, binascii.b2a_qp, - binascii.hexlify, binascii.rlecode_hqx, - binascii.crc_hqx, binascii.crc32): - self.assertRaises(TypeError, f, "test") + def test_unicode_strings(self): + # Unicode strings are not accepted. + for func in all_functions: + try: + self.assertRaises(TypeError, getattr(binascii, func), "test") + except Exception as err: + self.fail('{}("test") raises {!r}'.format(func, err)) + # crc_hqx needs 2 arguments + self.assertRaises(TypeError, binascii.crc_hqx, "test", 0) class ArrayBinASCIITest(BinASCIITest): diff --git a/Lib/test/test_struct.py b/Lib/test/test_struct.py index b923f45..6ca35ca 100644 --- a/Lib/test/test_struct.py +++ b/Lib/test/test_struct.py @@ -213,6 +213,7 @@ class StructTest(unittest.TestCase): expected = '%x' % expected if len(expected) & 1: expected = "0" + expected + expected = expected.encode('ascii') expected = unhexlify(expected) expected = (b"\x00" * (self.bytesize - len(expected)) + expected) diff --git a/Misc/NEWS b/Misc/NEWS index 3f565f6..2437614 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -473,6 +473,10 @@ C-API Library ------- +- Issue #4770: Restrict binascii module to accept only bytes (as specified). + And fix the email package to encode to ASCII instead of + ``raw-unicode-escape`` before ASCII-to-binary decoding. + - Issue #9384: python -m tkinter will now display a simple demo applet. - The default size of the re module's compiled regular expression cache has diff --git a/Modules/binascii.c b/Modules/binascii.c index d21404b..23ce3f0 100644 --- a/Modules/binascii.c +++ b/Modules/binascii.c @@ -546,7 +546,7 @@ binascii_a2b_hqx(PyObject *self, PyObject *args) Py_ssize_t len; int done = 0; - if ( !PyArg_ParseTuple(args, "s*:a2b_hqx", &pascii) ) + if ( !PyArg_ParseTuple(args, "y*:a2b_hqx", &pascii) ) return NULL; ascii_data = pascii.buf; len = pascii.len; @@ -750,7 +750,7 @@ binascii_rledecode_hqx(PyObject *self, PyObject *args) PyObject *rv; Py_ssize_t in_len, out_len, out_len_left; - if ( !PyArg_ParseTuple(args, "s*:rledecode_hqx", &pin) ) + if ( !PyArg_ParseTuple(args, "y*:rledecode_hqx", &pin) ) return NULL; in_data = pin.buf; in_len = pin.len; @@ -1121,7 +1121,7 @@ binascii_unhexlify(PyObject *self, PyObject *args) char* retbuf; Py_ssize_t i, j; - if (!PyArg_ParseTuple(args, "s*:a2b_hex", &parg)) + if (!PyArg_ParseTuple(args, "y*:a2b_hex", &parg)) return NULL; argbuf = parg.buf; arglen = parg.len; @@ -1199,7 +1199,7 @@ binascii_a2b_qp(PyObject *self, PyObject *args, PyObject *kwargs) static char *kwlist[] = {"data", "header", NULL}; int header = 0; - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*|i", kwlist, &pdata, + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*|i", kwlist, &pdata, &header)) return NULL; data = pdata.buf; -- cgit v0.12