diff options
author | Serhiy Storchaka <storchaka@gmail.com> | 2017-10-29 10:24:45 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-10-29 10:24:45 (GMT) |
commit | 9ea5a3a45b35d01b602e7e4da4f72b2db407e5c6 (patch) | |
tree | 11ee75c2a6cb1780e7aac0137058379498ca376e | |
parent | 0f1973d06e2116deafb19bbb9443b138187803c7 (diff) | |
download | cpython-9ea5a3a45b35d01b602e7e4da4f72b2db407e5c6.zip cpython-9ea5a3a45b35d01b602e7e4da4f72b2db407e5c6.tar.gz cpython-9ea5a3a45b35d01b602e7e4da4f72b2db407e5c6.tar.bz2 |
[3.6] bpo-20047: Make bytearray methods partition() and rpartition() rejecting (GH-4158) (#4162)
separators that are not bytes-like objects..
(cherry picked from commit a2314283ff87c65e1745a42c2f2b716b1a209128)
-rw-r--r-- | Doc/library/stdtypes.rst | 10 | ||||
-rw-r--r-- | Lib/test/test_bytes.py | 35 | ||||
-rw-r--r-- | Misc/NEWS.d/next/Core and Builtins/2017-10-28-19-11-05.bpo-20047.GuNAto.rst | 3 | ||||
-rw-r--r-- | Objects/bytearrayobject.c | 46 | ||||
-rw-r--r-- | Objects/bytesobject.c | 4 | ||||
-rw-r--r-- | Objects/clinic/bytearrayobject.c.h | 17 | ||||
-rw-r--r-- | Objects/clinic/bytesobject.c.h | 4 |
7 files changed, 85 insertions, 34 deletions
diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst index b8c4d59..75e97b9 100644 --- a/Doc/library/stdtypes.rst +++ b/Doc/library/stdtypes.rst @@ -2564,8 +2564,9 @@ arbitrary binary data. bytearray.partition(sep) Split the sequence at the first occurrence of *sep*, and return a 3-tuple - containing the part before the separator, the separator, and the part - after the separator. If the separator is not found, return a 3-tuple + containing the part before the separator, the separator itself or its + bytearray copy, and the part after the separator. + If the separator is not found, return a 3-tuple containing a copy of the original sequence, followed by two empty bytes or bytearray objects. @@ -2620,8 +2621,9 @@ arbitrary binary data. bytearray.rpartition(sep) Split the sequence at the last occurrence of *sep*, and return a 3-tuple - containing the part before the separator, the separator, and the part - after the separator. If the separator is not found, return a 3-tuple + containing the part before the separator, the separator itself or its + bytearray copy, and the part after the separator. + If the separator is not found, return a 3-tuple containing a copy of the original sequence, followed by two empty bytes or bytearray objects. diff --git a/Lib/test/test_bytes.py b/Lib/test/test_bytes.py index cd82fa6..6fcc26a 100644 --- a/Lib/test/test_bytes.py +++ b/Lib/test/test_bytes.py @@ -540,8 +540,16 @@ class BaseBytesTest: self.assertEqual(b.replace(b'i', b'a'), b'massassappa') self.assertEqual(b.replace(b'ss', b'x'), b'mixixippi') + def test_replace_int_error(self): + self.assertRaises(TypeError, self.type2test(b'a b').replace, 32, b'') + def test_split_string_error(self): self.assertRaises(TypeError, self.type2test(b'a b').split, ' ') + self.assertRaises(TypeError, self.type2test(b'a b').rsplit, ' ') + + def test_split_int_error(self): + self.assertRaises(TypeError, self.type2test(b'a b').split, 32) + self.assertRaises(TypeError, self.type2test(b'a b').rsplit, 32) def test_split_unicodewhitespace(self): for b in (b'a\x1Cb', b'a\x1Db', b'a\x1Eb', b'a\x1Fb'): @@ -550,9 +558,6 @@ class BaseBytesTest: b = self.type2test(b"\x09\x0A\x0B\x0C\x0D\x1C\x1D\x1E\x1F") self.assertEqual(b.split(), [b'\x1c\x1d\x1e\x1f']) - def test_rsplit_string_error(self): - self.assertRaises(TypeError, self.type2test(b'a b').rsplit, ' ') - def test_rsplit_unicodewhitespace(self): b = self.type2test(b"\x09\x0A\x0B\x0C\x0D\x1C\x1D\x1E\x1F") self.assertEqual(b.rsplit(), [b'\x1c\x1d\x1e\x1f']) @@ -568,6 +573,14 @@ class BaseBytesTest: self.assertEqual(b.rpartition(b'i'), (b'mississipp', b'i', b'')) self.assertEqual(b.rpartition(b'w'), (b'', b'', b'mississippi')) + def test_partition_string_error(self): + self.assertRaises(TypeError, self.type2test(b'a b').partition, ' ') + self.assertRaises(TypeError, self.type2test(b'a b').rpartition, ' ') + + def test_partition_int_error(self): + self.assertRaises(TypeError, self.type2test(b'a b').partition, 32) + self.assertRaises(TypeError, self.type2test(b'a b').rpartition, 32) + def test_pickling(self): for proto in range(pickle.HIGHEST_PROTOCOL + 1): for b in b"", b"a", b"abc", b"\xffab\x80", b"\0\0\377\0\0": @@ -600,9 +613,14 @@ class BaseBytesTest: self.assertEqual(self.type2test(b'abc').rstrip(memoryview(b'ac')), b'ab') def test_strip_string_error(self): - self.assertRaises(TypeError, self.type2test(b'abc').strip, 'b') - self.assertRaises(TypeError, self.type2test(b'abc').lstrip, 'b') - self.assertRaises(TypeError, self.type2test(b'abc').rstrip, 'b') + self.assertRaises(TypeError, self.type2test(b'abc').strip, 'ac') + self.assertRaises(TypeError, self.type2test(b'abc').lstrip, 'ac') + self.assertRaises(TypeError, self.type2test(b'abc').rstrip, 'ac') + + def test_strip_int_error(self): + self.assertRaises(TypeError, self.type2test(b' abc ').strip, 32) + self.assertRaises(TypeError, self.type2test(b' abc ').lstrip, 32) + self.assertRaises(TypeError, self.type2test(b' abc ').rstrip, 32) def test_center(self): # Fill character can be either bytes or bytearray (issue 12380) @@ -625,6 +643,11 @@ class BaseBytesTest: self.assertEqual(b.rjust(7, fill_type(b'-')), self.type2test(b'----abc')) + def test_xjust_int_error(self): + self.assertRaises(TypeError, self.type2test(b'abc').center, 7, 32) + self.assertRaises(TypeError, self.type2test(b'abc').ljust, 7, 32) + self.assertRaises(TypeError, self.type2test(b'abc').rjust, 7, 32) + def test_ord(self): b = self.type2test(b'\0A\x7f\x80\xff') self.assertEqual([ord(b[i:i+1]) for i in range(len(b))], diff --git a/Misc/NEWS.d/next/Core and Builtins/2017-10-28-19-11-05.bpo-20047.GuNAto.rst b/Misc/NEWS.d/next/Core and Builtins/2017-10-28-19-11-05.bpo-20047.GuNAto.rst new file mode 100644 index 0000000..3594bac --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2017-10-28-19-11-05.bpo-20047.GuNAto.rst @@ -0,0 +1,3 @@ +Bytearray methods partition() and rpartition() now accept only bytes-like +objects as separator, as documented. In particular they now raise TypeError +rather of returning a bogus result when an integer is passed as a separator. diff --git a/Objects/bytearrayobject.c b/Objects/bytearrayobject.c index a9c8ca6..7653322 100644 --- a/Objects/bytearrayobject.c +++ b/Objects/bytearrayobject.c @@ -102,6 +102,26 @@ PyByteArray_FromObject(PyObject *input) input, NULL); } +static PyObject * +_PyByteArray_FromBufferObject(PyObject *obj) +{ + PyObject *result; + Py_buffer view; + + if (PyObject_GetBuffer(obj, &view, PyBUF_FULL_RO) < 0) { + return NULL; + } + result = PyByteArray_FromStringAndSize(NULL, view.len); + if (result != NULL && + PyBuffer_ToContiguous(PyByteArray_AS_STRING(result), + &view, view.len, 'C') < 0) + { + Py_CLEAR(result); + } + PyBuffer_Release(&view); + return result; +} + PyObject * PyByteArray_FromStringAndSize(const char *bytes, Py_ssize_t size) { @@ -534,7 +554,8 @@ bytearray_setslice(PyByteArrayObject *self, Py_ssize_t lo, Py_ssize_t hi, if (values == (PyObject *)self) { /* Make a copy and call this function recursively */ int err; - values = PyByteArray_FromObject(values); + values = PyByteArray_FromStringAndSize(PyByteArray_AS_STRING(values), + PyByteArray_GET_SIZE(values)); if (values == NULL) return -1; err = bytearray_setslice(self, lo, hi, values); @@ -1381,19 +1402,19 @@ Partition the bytearray into three parts using the given separator. This will search for the separator sep in the bytearray. If the separator is found, returns a 3-tuple containing the part before the separator, the -separator itself, and the part after it. +separator itself, and the part after it as new bytearray objects. -If the separator is not found, returns a 3-tuple containing the original -bytearray object and two empty bytearray objects. +If the separator is not found, returns a 3-tuple containing the copy of the +original bytearray object and two empty bytearray objects. [clinic start generated code]*/ static PyObject * bytearray_partition(PyByteArrayObject *self, PyObject *sep) -/*[clinic end generated code: output=45d2525ddd35f957 input=86f89223892b70b5]*/ +/*[clinic end generated code: output=45d2525ddd35f957 input=8f644749ee4fc83a]*/ { PyObject *bytesep, *result; - bytesep = PyByteArray_FromObject(sep); + bytesep = _PyByteArray_FromBufferObject(sep); if (! bytesep) return NULL; @@ -1414,23 +1435,24 @@ bytearray.rpartition sep: object / -Partition the bytes into three parts using the given separator. +Partition the bytearray into three parts using the given separator. -This will search for the separator sep in the bytearray, starting and the end. +This will search for the separator sep in the bytearray, starting at the end. If the separator is found, returns a 3-tuple containing the part before the -separator, the separator itself, and the part after it. +separator, the separator itself, and the part after it as new bytearray +objects. If the separator is not found, returns a 3-tuple containing two empty bytearray -objects and the original bytearray object. +objects and the copy of the original bytearray object. [clinic start generated code]*/ static PyObject * bytearray_rpartition(PyByteArrayObject *self, PyObject *sep) -/*[clinic end generated code: output=440de3c9426115e8 input=5f4094f2de87c8f3]*/ +/*[clinic end generated code: output=440de3c9426115e8 input=7e3df3e6cb8fa0ac]*/ { PyObject *bytesep, *result; - bytesep = PyByteArray_FromObject(sep); + bytesep = _PyByteArray_FromBufferObject(sep); if (! bytesep) return NULL; diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index 489062e..4950d01 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -1832,7 +1832,7 @@ bytes.rpartition Partition the bytes into three parts using the given separator. -This will search for the separator sep in the bytes, starting and the end. If +This will search for the separator sep in the bytes, starting at the end. If the separator is found, returns a 3-tuple containing the part before the separator, the separator itself, and the part after it. @@ -1842,7 +1842,7 @@ objects and the original bytes object. static PyObject * bytes_rpartition_impl(PyBytesObject *self, Py_buffer *sep) -/*[clinic end generated code: output=191b114cbb028e50 input=67f689e63a62d478]*/ +/*[clinic end generated code: output=191b114cbb028e50 input=d78db010c8cfdbe1]*/ { return stringlib_rpartition( (PyObject*) self, diff --git a/Objects/clinic/bytearrayobject.c.h b/Objects/clinic/bytearrayobject.c.h index c75acb7..c164c79 100644 --- a/Objects/clinic/bytearrayobject.c.h +++ b/Objects/clinic/bytearrayobject.c.h @@ -214,10 +214,10 @@ PyDoc_STRVAR(bytearray_partition__doc__, "\n" "This will search for the separator sep in the bytearray. If the separator is\n" "found, returns a 3-tuple containing the part before the separator, the\n" -"separator itself, and the part after it.\n" +"separator itself, and the part after it as new bytearray objects.\n" "\n" -"If the separator is not found, returns a 3-tuple containing the original\n" -"bytearray object and two empty bytearray objects."); +"If the separator is not found, returns a 3-tuple containing the copy of the\n" +"original bytearray object and two empty bytearray objects."); #define BYTEARRAY_PARTITION_METHODDEF \ {"partition", (PyCFunction)bytearray_partition, METH_O, bytearray_partition__doc__}, @@ -226,14 +226,15 @@ PyDoc_STRVAR(bytearray_rpartition__doc__, "rpartition($self, sep, /)\n" "--\n" "\n" -"Partition the bytes into three parts using the given separator.\n" +"Partition the bytearray into three parts using the given separator.\n" "\n" -"This will search for the separator sep in the bytearray, starting and the end.\n" +"This will search for the separator sep in the bytearray, starting at the end.\n" "If the separator is found, returns a 3-tuple containing the part before the\n" -"separator, the separator itself, and the part after it.\n" +"separator, the separator itself, and the part after it as new bytearray\n" +"objects.\n" "\n" "If the separator is not found, returns a 3-tuple containing two empty bytearray\n" -"objects and the original bytearray object."); +"objects and the copy of the original bytearray object."); #define BYTEARRAY_RPARTITION_METHODDEF \ {"rpartition", (PyCFunction)bytearray_rpartition, METH_O, bytearray_rpartition__doc__}, @@ -711,4 +712,4 @@ bytearray_sizeof(PyByteArrayObject *self, PyObject *Py_UNUSED(ignored)) { return bytearray_sizeof_impl(self); } -/*[clinic end generated code: output=225342a680391b9c input=a9049054013a1b77]*/ +/*[clinic end generated code: output=8f022100f059226c input=a9049054013a1b77]*/ diff --git a/Objects/clinic/bytesobject.c.h b/Objects/clinic/bytesobject.c.h index a11ebd2..191de20 100644 --- a/Objects/clinic/bytesobject.c.h +++ b/Objects/clinic/bytesobject.c.h @@ -86,7 +86,7 @@ PyDoc_STRVAR(bytes_rpartition__doc__, "\n" "Partition the bytes into three parts using the given separator.\n" "\n" -"This will search for the separator sep in the bytes, starting and the end. If\n" +"This will search for the separator sep in the bytes, starting at the end. If\n" "the separator is found, returns a 3-tuple containing the part before the\n" "separator, the separator itself, and the part after it.\n" "\n" @@ -499,4 +499,4 @@ bytes_fromhex(PyTypeObject *type, PyObject *arg) exit: return return_value; } -/*[clinic end generated code: output=2dc3c93cfd2dc440 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=4ac7e35150d47467 input=a9049054013a1b77]*/ |