From 0e225aa09bb8059c333424d58beecd833b2d2b6c Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Tue, 22 May 2007 20:24:57 +0000 Subject: Make binascii use byte strings everywhere (in and out). --- Lib/test/test_binascii.py | 87 +++++++++++++++++----------------- Modules/binascii.c | 118 ++++++++++++++++++++++++++++------------------ 2 files changed, 116 insertions(+), 89 deletions(-) diff --git a/Lib/test/test_binascii.py b/Lib/test/test_binascii.py index a4ee7f8..d1a4cfb 100755 --- a/Lib/test/test_binascii.py +++ b/Lib/test/test_binascii.py @@ -7,10 +7,10 @@ import binascii class BinASCIITest(unittest.TestCase): # Create binary test data - data = "The quick brown fox jumps over the lazy dog.\r\n" + data = b"The quick brown fox jumps over the lazy dog.\r\n" # Be slow so we don't depend on other modules - data += "".join(map(chr, range(256))) - data += "\r\nHello world.\n" + data += bytes(range(256)) + data += b"\r\nHello world.\n" def test_exceptions(self): # Check module exceptions @@ -40,10 +40,10 @@ class BinASCIITest(unittest.TestCase): b = self.data[i:i+MAX_BASE64] a = binascii.b2a_base64(b) lines.append(a) - res = "" + res = bytes() for line in lines: b = binascii.a2b_base64(line) - res = res + b + res += b self.assertEqual(res, self.data) def test_base64invalid(self): @@ -56,24 +56,23 @@ class BinASCIITest(unittest.TestCase): a = binascii.b2a_base64(b) lines.append(a) - fillers = "" - valid = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789+/" + fillers = bytes() + valid = b"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789+/" for i in range(256): - c = chr(i) - if c not in valid: - fillers += c + if i not in valid: + fillers.append(i) def addnoise(line): noise = fillers ratio = len(line) // len(noise) - res = "" + res = bytes() while line and noise: if len(line) // len(noise) > ratio: c, line = line[0], line[1:] else: c, noise = noise[0], noise[1:] - res += c + res.append(c) return res + noise + line - res = "" + res = bytes() for line in map(addnoise, lines): b = binascii.a2b_base64(line) res += b @@ -81,7 +80,7 @@ class BinASCIITest(unittest.TestCase): # Test base64 with just invalid characters, which should return # empty strings. TBD: shouldn't it raise an exception instead ? - self.assertEqual(binascii.a2b_base64(fillers), '') + self.assertEqual(binascii.a2b_base64(fillers), b'') def test_uu(self): MAX_UU = 45 @@ -90,23 +89,23 @@ class BinASCIITest(unittest.TestCase): b = self.data[i:i+MAX_UU] a = binascii.b2a_uu(b) lines.append(a) - res = "" + res = bytes() for line in lines: b = binascii.a2b_uu(line) res += b self.assertEqual(res, self.data) - self.assertEqual(binascii.a2b_uu("\x7f"), "\x00"*31) - self.assertEqual(binascii.a2b_uu("\x80"), "\x00"*32) - self.assertEqual(binascii.a2b_uu("\xff"), "\x00"*31) - self.assertRaises(binascii.Error, binascii.a2b_uu, "\xff\x00") - self.assertRaises(binascii.Error, binascii.a2b_uu, "!!!!") + self.assertEqual(binascii.a2b_uu(b"\x7f"), b"\x00"*31) + self.assertEqual(binascii.a2b_uu(b"\x80"), b"\x00"*32) + self.assertEqual(binascii.a2b_uu(b"\xff"), b"\x00"*31) + self.assertRaises(binascii.Error, binascii.a2b_uu, b"\xff\x00") + self.assertRaises(binascii.Error, binascii.a2b_uu, b"!!!!") - self.assertRaises(binascii.Error, binascii.b2a_uu, 46*"!") + self.assertRaises(binascii.Error, binascii.b2a_uu, 46*b"!") def test_crc32(self): - crc = binascii.crc32("Test the CRC-32 of") - crc = binascii.crc32(" this string.", crc) + crc = binascii.crc32(b"Test the CRC-32 of") + crc = binascii.crc32(b" this string.", crc) self.assertEqual(crc, 1571220330) self.assertRaises(TypeError, binascii.crc32) @@ -115,16 +114,16 @@ class BinASCIITest(unittest.TestCase): def test_hex(self): # test hexlification - s = '{s\005\000\000\000worldi\002\000\000\000s\005\000\000\000helloi\001\000\000\0000' + s = b'{s\005\000\000\000worldi\002\000\000\000s\005\000\000\000helloi\001\000\000\0000' t = binascii.b2a_hex(s) u = binascii.a2b_hex(t) self.assertEqual(s, u) self.assertRaises(TypeError, binascii.a2b_hex, t[:-1]) - self.assertRaises(TypeError, binascii.a2b_hex, t[:-1] + 'q') + self.assertRaises(TypeError, binascii.a2b_hex, t[:-1] + b'q') # Verify the treatment of Unicode strings if test_support.have_unicode: - self.assertEqual(binascii.hexlify(str('a', 'ascii')), '61') + self.assertEqual(binascii.hexlify('a'), b'61') def test_qp(self): # A test for SF bug 534347 (segfaults without the proper fix) @@ -134,28 +133,29 @@ class BinASCIITest(unittest.TestCase): pass else: self.fail("binascii.a2b_qp(**{1:1}) didn't raise TypeError") - self.assertEqual(binascii.a2b_qp("= "), "= ") - self.assertEqual(binascii.a2b_qp("=="), "=") - self.assertEqual(binascii.a2b_qp("=AX"), "=AX") + self.assertEqual(binascii.a2b_qp(b"= "), b"= ") + self.assertEqual(binascii.a2b_qp(b"=="), b"=") + self.assertEqual(binascii.a2b_qp(b"=AX"), b"=AX") self.assertRaises(TypeError, binascii.b2a_qp, foo="bar") - self.assertEqual(binascii.a2b_qp("=00\r\n=00"), "\x00\r\n\x00") + self.assertEqual(binascii.a2b_qp(b"=00\r\n=00"), b"\x00\r\n\x00") self.assertEqual( - binascii.b2a_qp("\xff\r\n\xff\n\xff"), - "=FF\r\n=FF\r\n=FF" + binascii.b2a_qp(b"\xff\r\n\xff\n\xff"), + b"=FF\r\n=FF\r\n=FF" ) self.assertEqual( - binascii.b2a_qp("0"*75+"\xff\r\n\xff\r\n\xff"), - "0"*75+"=\r\n=FF\r\n=FF\r\n=FF" + binascii.b2a_qp(b"0"*75+b"\xff\r\n\xff\r\n\xff"), + b"0"*75+b"=\r\n=FF\r\n=FF\r\n=FF" ) - self.assertEqual(binascii.b2a_qp('\0\n'), '=00\n') - self.assertEqual(binascii.b2a_qp('\0\n', quotetabs=True), '=00\n') - self.assertEqual(binascii.b2a_qp('foo\tbar\t\n'), 'foo\tbar=09\n') - self.assertEqual(binascii.b2a_qp('foo\tbar\t\n', quotetabs=True), 'foo=09bar=09\n') + self.assertEqual(binascii.b2a_qp(b'\0\n'), b'=00\n') + self.assertEqual(binascii.b2a_qp(b'\0\n', quotetabs=True), b'=00\n') + self.assertEqual(binascii.b2a_qp(b'foo\tbar\t\n'), b'foo\tbar=09\n') + self.assertEqual(binascii.b2a_qp(b'foo\tbar\t\n', quotetabs=True), + b'foo=09bar=09\n') - self.assertEqual(binascii.b2a_qp('.'), '=2E') - self.assertEqual(binascii.b2a_qp('.\n'), '=2E\n') - self.assertEqual(binascii.b2a_qp('a.\n'), 'a.\n') + self.assertEqual(binascii.b2a_qp(b'.'), b'=2E') + self.assertEqual(binascii.b2a_qp(b'.\n'), b'=2E\n') + self.assertEqual(binascii.b2a_qp(b'a.\n'), b'a.\n') def test_empty_string(self): # A test for SF bug #1022953. Make sure SystemError is not raised. @@ -164,7 +164,10 @@ class BinASCIITest(unittest.TestCase): 'a2b_hqx', 'a2b_base64', 'rlecode_hqx', 'b2a_uu', 'rledecode_hqx']: f = getattr(binascii, n) - f('') + try: + f(b'') + except SystemError as err: + self.fail("%s(b'') raises SystemError: %s" % (n, err)) binascii.crc_hqx('', 0) def test_main(): diff --git a/Modules/binascii.c b/Modules/binascii.c index 00f950d..8db73e7 100644 --- a/Modules/binascii.c +++ b/Modules/binascii.c @@ -138,7 +138,7 @@ static char table_a2b_base64[] = { #define BASE64_PAD '=' /* Max binary chunk size; limited only by available memory */ -#define BASE64_MAXBIN (INT_MAX/2 - sizeof(PyStringObject) - 3) +#define BASE64_MAXBIN ((PY_SSIZE_T_MAX - 3) / 2) static unsigned char table_b2a_base64[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; @@ -200,9 +200,9 @@ binascii_a2b_uu(PyObject *self, PyObject *args) ascii_len--; /* Allocate the buffer */ - if ( (rv=PyString_FromStringAndSize(NULL, bin_len)) == NULL ) + if ( (rv=PyBytes_FromStringAndSize(NULL, bin_len)) == NULL ) return NULL; - bin_data = (unsigned char *)PyString_AsString(rv); + bin_data = (unsigned char *)PyBytes_AS_STRING(rv); for( ; bin_len > 0 ; ascii_len--, ascii_data++ ) { /* XXX is it really best to add NULs if there's no more data */ @@ -277,9 +277,9 @@ binascii_b2a_uu(PyObject *self, PyObject *args) } /* We're lazy and allocate to much (fixed up later) */ - if ( (rv=PyString_FromStringAndSize(NULL, bin_len*2+2)) == NULL ) + if ( (rv=PyBytes_FromStringAndSize(NULL, bin_len*2+2)) == NULL ) return NULL; - ascii_data = (unsigned char *)PyString_AsString(rv); + ascii_data = (unsigned char *)PyBytes_AS_STRING(rv); /* Store the length */ *ascii_data++ = ' ' + (bin_len & 077); @@ -301,8 +301,12 @@ binascii_b2a_uu(PyObject *self, PyObject *args) } *ascii_data++ = '\n'; /* Append a courtesy newline */ - _PyString_Resize(&rv, (ascii_data - - (unsigned char *)PyString_AsString(rv))); + if (PyBytes_Resize(rv, + (ascii_data - + (unsigned char *)PyBytes_AS_STRING(rv))) < 0) { + Py_DECREF(rv); + rv = NULL; + } return rv; } @@ -351,9 +355,9 @@ binascii_a2b_base64(PyObject *self, PyObject *args) bin_len = ((ascii_len+3)/4)*3; /* Upper bound, corrected later */ /* Allocate the buffer */ - if ( (rv=PyString_FromStringAndSize(NULL, bin_len)) == NULL ) + if ( (rv=PyBytes_FromStringAndSize(NULL, bin_len)) == NULL ) return NULL; - bin_data = (unsigned char *)PyString_AsString(rv); + bin_data = (unsigned char *)PyBytes_AS_STRING(rv); bin_len = 0; for( ; ascii_len > 0; ascii_len--, ascii_data++) { @@ -412,13 +416,17 @@ binascii_a2b_base64(PyObject *self, PyObject *args) /* And set string size correctly. If the result string is empty ** (because the input was all invalid) return the shared empty - ** string instead; _PyString_Resize() won't do this for us. + ** string instead; PyBytes_Resize() won't do this for us. */ - if (bin_len > 0) - _PyString_Resize(&rv, bin_len); + if (bin_len > 0) { + if (PyBytes_Resize(rv, bin_len) < 0) { + Py_DECREF(rv); + rv = NULL; + } + } else { Py_DECREF(rv); - rv = PyString_FromString(""); + rv = PyBytes_FromStringAndSize("", 0); } return rv; } @@ -445,9 +453,9 @@ binascii_b2a_base64(PyObject *self, PyObject *args) /* We're lazy and allocate too much (fixed up later). "+3" leaves room for up to two pad characters and a trailing newline. Note that 'b' gets encoded as 'Yg==\n' (1 in, 5 out). */ - if ( (rv=PyString_FromStringAndSize(NULL, bin_len*2 + 3)) == NULL ) + if ( (rv=PyBytes_FromStringAndSize(NULL, bin_len*2 + 3)) == NULL ) return NULL; - ascii_data = (unsigned char *)PyString_AsString(rv); + ascii_data = (unsigned char *)PyBytes_AS_STRING(rv); for( ; bin_len > 0 ; bin_len--, bin_data++ ) { /* Shift the data into our buffer */ @@ -471,8 +479,12 @@ binascii_b2a_base64(PyObject *self, PyObject *args) } *ascii_data++ = '\n'; /* Append a courtesy newline */ - _PyString_Resize(&rv, (ascii_data - - (unsigned char *)PyString_AsString(rv))); + if (PyBytes_Resize(rv, + (ascii_data - + (unsigned char *)PyBytes_AS_STRING(rv))) < 0) { + Py_DECREF(rv); + rv = NULL; + } return rv; } @@ -495,9 +507,9 @@ binascii_a2b_hqx(PyObject *self, PyObject *args) /* Allocate a string that is too big (fixed later) Add two to the initial length to prevent interning which would preclude subsequent resizing. */ - if ( (rv=PyString_FromStringAndSize(NULL, len+2)) == NULL ) + if ( (rv=PyBytes_FromStringAndSize(NULL, len+2)) == NULL ) return NULL; - bin_data = (unsigned char *)PyString_AsString(rv); + bin_data = (unsigned char *)PyBytes_AS_STRING(rv); for( ; len > 0 ; len--, ascii_data++ ) { /* Get the byte and look it up */ @@ -531,8 +543,12 @@ binascii_a2b_hqx(PyObject *self, PyObject *args) Py_DECREF(rv); return NULL; } - _PyString_Resize( - &rv, (bin_data - (unsigned char *)PyString_AsString(rv))); + if (PyBytes_Resize(rv, + (bin_data - + (unsigned char *)PyBytes_AS_STRING(rv))) < 0) { + Py_DECREF(rv); + rv = NULL; + } if (rv) { PyObject *rrv = Py_BuildValue("Oi", rv, done); Py_DECREF(rv); @@ -556,9 +572,9 @@ binascii_rlecode_hqx(PyObject *self, PyObject *args) return NULL; /* Worst case: output is twice as big as input (fixed later) */ - if ( (rv=PyString_FromStringAndSize(NULL, len*2+2)) == NULL ) + if ( (rv=PyBytes_FromStringAndSize(NULL, len*2+2)) == NULL ) return NULL; - out_data = (unsigned char *)PyString_AsString(rv); + out_data = (unsigned char *)PyBytes_AS_STRING(rv); for( in=0; in 0 ; len--, bin_data++ ) { /* Shift into our buffer, and output any 6bits ready */ @@ -624,8 +644,12 @@ binascii_b2a_hqx(PyObject *self, PyObject *args) leftchar <<= (6-leftbits); *ascii_data++ = table_b2a_hqx[leftchar & 0x3f]; } - _PyString_Resize(&rv, (ascii_data - - (unsigned char *)PyString_AsString(rv))); + if (PyBytes_Resize(rv, + (ascii_data - + (unsigned char *)PyBytes_AS_STRING(rv))) < 0) { + Py_DECREF(rv); + rv = NULL; + } return rv; } @@ -644,14 +668,14 @@ binascii_rledecode_hqx(PyObject *self, PyObject *args) /* Empty string is a special case */ if ( in_len == 0 ) - return PyString_FromString(""); + return PyBytes_FromStringAndSize("", 0); /* Allocate a buffer of reasonable size. Resized when needed */ out_len = in_len*2; - if ( (rv=PyString_FromStringAndSize(NULL, out_len)) == NULL ) + if ( (rv=PyBytes_FromStringAndSize(NULL, out_len)) == NULL ) return NULL; out_len_left = out_len; - out_data = (unsigned char *)PyString_AsString(rv); + out_data = (unsigned char *)PyBytes_AS_STRING(rv); /* ** We need two macros here to get/put bytes and handle @@ -670,9 +694,9 @@ binascii_rledecode_hqx(PyObject *self, PyObject *args) #define OUTBYTE(b) \ do { \ if ( --out_len_left < 0 ) { \ - _PyString_Resize(&rv, 2*out_len); \ - if ( rv == NULL ) return NULL; \ - out_data = (unsigned char *)PyString_AsString(rv) \ + if (PyBytes_Resize(rv, 2*out_len) < 0) \ + { Py_DECREF(rv); return NULL; } \ + out_data = (unsigned char *)PyBytes_AS_STRING(rv) \ + out_len; \ out_len_left = out_len-1; \ out_len = out_len * 2; \ @@ -720,8 +744,12 @@ binascii_rledecode_hqx(PyObject *self, PyObject *args) OUTBYTE(in_byte); } } - _PyString_Resize(&rv, (out_data - - (unsigned char *)PyString_AsString(rv))); + if (PyBytes_Resize(rv, + (out_data - + (unsigned char *)PyBytes_AS_STRING(rv))) < 0) { + Py_DECREF(rv); + rv = NULL; + } return rv; } @@ -912,12 +940,10 @@ binascii_hexlify(PyObject *self, PyObject *args) if (!PyArg_ParseTuple(args, "s#:b2a_hex", &argbuf, &arglen)) return NULL; - retval = PyString_FromStringAndSize(NULL, arglen*2); + retval = PyBytes_FromStringAndSize(NULL, arglen*2); if (!retval) return NULL; - retbuf = PyString_AsString(retval); - if (!retbuf) - goto finally; + retbuf = PyBytes_AS_STRING(retval); /* make hex version of string, taken from shamodule.c */ for (i=j=0; i < arglen; i++) { @@ -978,12 +1004,10 @@ binascii_unhexlify(PyObject *self, PyObject *args) return NULL; } - retval = PyString_FromStringAndSize(NULL, (arglen/2)); + retval = PyBytes_FromStringAndSize(NULL, (arglen/2)); if (!retval) return NULL; - retbuf = PyString_AsString(retval); - if (!retbuf) - goto finally; + retbuf = PyBytes_AS_STRING(retval); for (i=j=0; i < arglen; i += 2) { int top = to_int(Py_CHARMASK(argbuf[i])); @@ -1095,7 +1119,7 @@ binascii_a2b_qp(PyObject *self, PyObject *args, PyObject *kwargs) out++; } } - if ((rv = PyString_FromStringAndSize((char *)odata, out)) == NULL) { + if ((rv = PyBytes_FromStringAndSize((char *)odata, out)) == NULL) { PyMem_Free(odata); return NULL; } @@ -1295,7 +1319,7 @@ binascii_b2a_qp (PyObject *self, PyObject *args, PyObject *kwargs) } } } - if ((rv = PyString_FromStringAndSize((char *)odata, out)) == NULL) { + if ((rv = PyBytes_FromStringAndSize((char *)odata, out)) == NULL) { PyMem_Free(odata); return NULL; } -- cgit v0.12