summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAntoine Pitrou <solipsis@pitrou.net>2011-12-20 12:58:41 (GMT)
committerAntoine Pitrou <solipsis@pitrou.net>2011-12-20 12:58:41 (GMT)
commit08316769620c83e32a6913eee0a6555cd6e1efa9 (patch)
tree4e5f419fe6e92ebfa05f0115c8925f83969acb93
parent8691bff6db78a45fd89a385401ece64921867500 (diff)
downloadcpython-08316769620c83e32a6913eee0a6555cd6e1efa9.zip
cpython-08316769620c83e32a6913eee0a6555cd6e1efa9.tar.gz
cpython-08316769620c83e32a6913eee0a6555cd6e1efa9.tar.bz2
Issue #13637: "a2b" functions in the binascii module now accept ASCII-only unicode strings.
-rw-r--r--Doc/library/binascii.rst9
-rw-r--r--Lib/test/test_binascii.py34
-rw-r--r--Misc/NEWS3
-rw-r--r--Modules/binascii.c50
4 files changed, 85 insertions, 11 deletions
diff --git a/Doc/library/binascii.rst b/Doc/library/binascii.rst
index 2aa3702..baf430d 100644
--- a/Doc/library/binascii.rst
+++ b/Doc/library/binascii.rst
@@ -20,8 +20,13 @@ higher-level modules.
.. note::
- Encoding and decoding functions do not accept Unicode strings. Only bytestring
- and bytearray objects can be processed.
+ ``a2b_*`` functions accept Unicode strings containing only ASCII characters.
+ Other functions only accept bytes and bytes-compatible objects (such as
+ bytearray objects and other objects implementing the buffer API).
+
+ .. versionchanged:: 3.3
+ ASCII-only unicode strings are now accepted by the ``a2b_*`` functions.
+
The :mod:`binascii` module defines the following functions:
diff --git a/Lib/test/test_binascii.py b/Lib/test/test_binascii.py
index 1e9e888..04d8f9d 100644
--- a/Lib/test/test_binascii.py
+++ b/Lib/test/test_binascii.py
@@ -208,9 +208,9 @@ class BinASCIITest(unittest.TestCase):
except Exception as err:
self.fail("{}({!r}) raises {!r}".format(func, empty, err))
- def test_unicode_strings(self):
- # Unicode strings are not accepted.
- for func in all_functions:
+ def test_unicode_b2a(self):
+ # Unicode strings are not accepted by b2a_* functions.
+ for func in set(all_functions) - set(a2b_functions) | {'rledecode_hqx'}:
try:
self.assertRaises(TypeError, getattr(binascii, func), "test")
except Exception as err:
@@ -218,6 +218,34 @@ class BinASCIITest(unittest.TestCase):
# crc_hqx needs 2 arguments
self.assertRaises(TypeError, binascii.crc_hqx, "test", 0)
+ def test_unicode_a2b(self):
+ # Unicode strings are accepted by a2b_* functions.
+ MAX_ALL = 45
+ raw = self.rawdata[:MAX_ALL]
+ for fa, fb in zip(a2b_functions, b2a_functions):
+ if fa == 'rledecode_hqx':
+ # Takes non-ASCII data
+ continue
+ a2b = getattr(binascii, fa)
+ b2a = getattr(binascii, fb)
+ try:
+ a = b2a(self.type2test(raw))
+ binary_res = a2b(a)
+ a = a.decode('ascii')
+ res = a2b(a)
+ except Exception as err:
+ self.fail("{}/{} conversion raises {!r}".format(fb, fa, err))
+ if fb == 'b2a_hqx':
+ # b2a_hqx returns a tuple
+ res, _ = res
+ binary_res, _ = binary_res
+ self.assertEqual(res, raw, "{}/{} conversion: "
+ "{!r} != {!r}".format(fb, fa, res, raw))
+ self.assertEqual(res, binary_res)
+ self.assertIsInstance(res, bytes)
+ # non-ASCII string
+ self.assertRaises(ValueError, a2b, "\x80")
+
class ArrayBinASCIITest(BinASCIITest):
def type2test(self, s):
diff --git a/Misc/NEWS b/Misc/NEWS
index 7f72133..6955258 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -419,6 +419,9 @@ Core and Builtins
Library
-------
+- Issue #13637: "a2b" functions in the binascii module now accept ASCII-only
+ unicode strings.
+
- Issue #13634: Add support for querying and disabling SSL compression.
- Issue #13627: Add support for SSL Elliptic Curve-based Diffie-Hellman
diff --git a/Modules/binascii.c b/Modules/binascii.c
index dc4fef5..ad5e1b1 100644
--- a/Modules/binascii.c
+++ b/Modules/binascii.c
@@ -183,6 +183,44 @@ static unsigned short crctab_hqx[256] = {
0x6e17, 0x7e36, 0x4e55, 0x5e74, 0x2e93, 0x3eb2, 0x0ed1, 0x1ef0,
};
+static int
+ascii_buffer_converter(PyObject *arg, Py_buffer *buf)
+{
+ if (arg == NULL) {
+ PyBuffer_Release(buf);
+ return 1;
+ }
+ if (PyUnicode_Check(arg)) {
+ if (PyUnicode_READY(arg) < 0)
+ return 0;
+ if (!PyUnicode_IS_ASCII(arg)) {
+ PyErr_SetString(PyExc_ValueError,
+ "string argument should contain only ASCII characters");
+ return 0;
+ }
+ assert(PyUnicode_KIND(arg) == PyUnicode_1BYTE_KIND);
+ buf->buf = (void *) PyUnicode_1BYTE_DATA(arg);
+ buf->len = PyUnicode_GET_LENGTH(arg);
+ buf->obj = NULL;
+ return 1;
+ }
+ if (PyObject_GetBuffer(arg, buf, PyBUF_SIMPLE) != 0) {
+ PyErr_Format(PyExc_TypeError,
+ "argument should be bytes, buffer or ASCII string, "
+ "not %R", Py_TYPE(arg));
+ return 0;
+ }
+ if (!PyBuffer_IsContiguous(buf, 'C')) {
+ PyErr_Format(PyExc_TypeError,
+ "argument should be a contiguous buffer, "
+ "not %R", Py_TYPE(arg));
+ PyBuffer_Release(buf);
+ return 0;
+ }
+ return Py_CLEANUP_SUPPORTED;
+}
+
+
PyDoc_STRVAR(doc_a2b_uu, "(ascii) -> bin. Decode a line of uuencoded data");
static PyObject *
@@ -196,7 +234,7 @@ binascii_a2b_uu(PyObject *self, PyObject *args)
PyObject *rv;
Py_ssize_t ascii_len, bin_len;
- if ( !PyArg_ParseTuple(args, "y*:a2b_uu", &pascii) )
+ if ( !PyArg_ParseTuple(args, "O&:a2b_uu", ascii_buffer_converter, &pascii) )
return NULL;
ascii_data = pascii.buf;
ascii_len = pascii.len;
@@ -370,7 +408,7 @@ binascii_a2b_base64(PyObject *self, PyObject *args)
Py_ssize_t ascii_len, bin_len;
int quad_pos = 0;
- if ( !PyArg_ParseTuple(args, "y*:a2b_base64", &pascii) )
+ if ( !PyArg_ParseTuple(args, "O&:a2b_base64", ascii_buffer_converter, &pascii) )
return NULL;
ascii_data = pascii.buf;
ascii_len = pascii.len;
@@ -546,7 +584,7 @@ binascii_a2b_hqx(PyObject *self, PyObject *args)
Py_ssize_t len;
int done = 0;
- if ( !PyArg_ParseTuple(args, "y*:a2b_hqx", &pascii) )
+ if ( !PyArg_ParseTuple(args, "O&:a2b_hqx", ascii_buffer_converter, &pascii) )
return NULL;
ascii_data = pascii.buf;
len = pascii.len;
@@ -1119,7 +1157,7 @@ binascii_unhexlify(PyObject *self, PyObject *args)
char* retbuf;
Py_ssize_t i, j;
- if (!PyArg_ParseTuple(args, "y*:a2b_hex", &parg))
+ if (!PyArg_ParseTuple(args, "O&:a2b_hex", ascii_buffer_converter, &parg))
return NULL;
argbuf = parg.buf;
arglen = parg.len;
@@ -1197,8 +1235,8 @@ binascii_a2b_qp(PyObject *self, PyObject *args, PyObject *kwargs)
static char *kwlist[] = {"data", "header", NULL};
int header = 0;
- if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*|i", kwlist, &pdata,
- &header))
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O&|i:a2b_qp", kwlist,
+ ascii_buffer_converter, &pdata, &header))
return NULL;
data = pdata.buf;
datalen = pdata.len;