From ad7d8d10b70b62b25fc8ebd1a6bfef0c008a232a Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Fri, 13 Apr 2007 01:39:34 +0000 Subject: Rough and dirty job -- allow concatenation of bytes and arbitrary buffer-supporting objects (Unicode always excluded), and also of str and bytes. (For some reason u"" + b"" doesn't fail, I'll investigate later.) --- Include/bytesobject.h | 1 + Lib/test/test_bytes.py | 54 +++++++++--- Objects/bytesobject.c | 224 ++++++++++++++++++++++++++++++++++++------------- Objects/object.c | 3 +- Objects/stringobject.c | 2 + 5 files changed, 212 insertions(+), 72 deletions(-) diff --git a/Include/bytesobject.h b/Include/bytesobject.h index 849078d..bc385c1 100644 --- a/Include/bytesobject.h +++ b/Include/bytesobject.h @@ -34,6 +34,7 @@ PyAPI_DATA(PyTypeObject) PyBytes_Type; /* Direct API functions */ PyAPI_FUNC(PyObject *) PyBytes_FromObject(PyObject *); +PyAPI_FUNC(PyObject *) PyBytes_Concat(PyObject *, PyObject *); PyAPI_FUNC(PyObject *) PyBytes_FromStringAndSize(const char *, Py_ssize_t); PyAPI_FUNC(Py_ssize_t) PyBytes_Size(PyObject *); PyAPI_FUNC(char *) PyBytes_AsString(PyObject *); diff --git a/Lib/test/test_bytes.py b/Lib/test/test_bytes.py index 682f5d7..1d826b6 100644 --- a/Lib/test/test_bytes.py +++ b/Lib/test/test_bytes.py @@ -225,7 +225,7 @@ class BytesTest(unittest.TestCase): # Skip step 0 (invalid) for step in indices[1:]: self.assertEqual(b[start:stop:step], bytes(L[start:stop:step])) - + def test_regexps(self): def by(s): return bytes(map(ord, s)) @@ -298,7 +298,7 @@ class BytesTest(unittest.TestCase): b[3:5] = [3, 4, 5, 6] self.assertEqual(b, bytes(range(10))) - + b[3:0] = [42, 42, 42] self.assertEqual(b, bytes([0, 1, 2, 42, 42, 42, 3, 4, 5, 6, 7, 8, 9])) @@ -317,7 +317,7 @@ class BytesTest(unittest.TestCase): L[start:stop:step] = data b[start:stop:step] = data self.assertEquals(b, bytes(L)) - + del L[start:stop:step] del b[start:stop:step] self.assertEquals(b, bytes(L)) @@ -371,8 +371,10 @@ class BytesTest(unittest.TestCase): b1 = bytes("abc") b2 = bytes("def") self.assertEqual(b1 + b2, bytes("abcdef")) - self.assertRaises(TypeError, lambda: b1 + "def") - self.assertRaises(TypeError, lambda: "abc" + b2) + self.assertEqual(b1 + "def", bytes("abcdef")) + self.assertEqual("def" + b1, bytes("defabc")) + self.assertRaises(TypeError, lambda: b1 + u"def") + ##self.assertRaises(TypeError, lambda: u"abc" + b2) # XXX FIXME def test_repeat(self): b = bytes("abc") @@ -393,6 +395,14 @@ class BytesTest(unittest.TestCase): self.assertEqual(b, bytes("abcdef")) self.assertEqual(b, b1) self.failUnless(b is b1) + b += "xyz" + self.assertEqual(b, b"abcdefxyz") + try: + b += u"" + except TypeError: + pass + else: + self.fail("bytes += unicode didn't raise TypeError") def test_irepeat(self): b = bytes("abc") @@ -490,7 +500,7 @@ class BytesTest(unittest.TestCase): a.extend(a) self.assertEqual(a, orig + orig) self.assertEqual(a[5:], orig) - + def test_remove(self): b = b'hello' b.remove(ord('l')) @@ -643,14 +653,36 @@ class BytesTest(unittest.TestCase): q = pm.loads(ps) self.assertEqual(b, q) + def test_strip(self): + b = b'mississippi' + self.assertEqual(b.strip(b'i'), b'mississipp') + self.assertEqual(b.strip(b'm'), b'ississippi') + self.assertEqual(b.strip(b'pi'), b'mississ') + self.assertEqual(b.strip(b'im'), b'ssissipp') + self.assertEqual(b.strip(b'pim'), b'ssiss') + + def test_lstrip(self): + b = b'mississippi' + self.assertEqual(b.lstrip(b'i'), b'mississippi') + self.assertEqual(b.lstrip(b'm'), b'ississippi') + self.assertEqual(b.lstrip(b'pi'), b'mississippi') + self.assertEqual(b.lstrip(b'im'), b'ssissippi') + self.assertEqual(b.lstrip(b'pim'), b'ssissippi') + + def test_rstrip(self): + b = b'mississippi' + self.assertEqual(b.rstrip(b'i'), b'mississipp') + self.assertEqual(b.rstrip(b'm'), b'mississippi') + self.assertEqual(b.rstrip(b'pi'), b'mississ') + self.assertEqual(b.rstrip(b'im'), b'mississipp') + self.assertEqual(b.rstrip(b'pim'), b'mississ') + # Optimizations: # __iter__? (optimization) # __reversed__? (optimization) - # XXX Some string methods? (Those that don't use character properties) - # lstrip, rstrip, strip?? (currently un-pepped) - # join - + # XXX More string methods? (Those that don't use character properties) + # There are tests in string_tests.py that are more # comprehensive for things like split, partition, etc. # Unfortunately they are all bundled with tests that @@ -675,7 +707,7 @@ class BytesAsStringTest(test.string_tests.BaseTest): getattr(bytes, methodname), object, *args - ) + ) # Currently the bytes containment testing uses a single integer # value. This may not be the final design, but until then the diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index 6d257a5..213dbfc 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -31,7 +31,10 @@ PyBytes_Init(void) /* end nullbytes support */ -static int _getbytevalue(PyObject* arg, int *value) +/* Helpers */ + +static int +_getbytevalue(PyObject* arg, int *value) { PyObject *intarg = PyNumber_Int(arg); if (! intarg) @@ -45,6 +48,24 @@ static int _getbytevalue(PyObject* arg, int *value) return 1; } +Py_ssize_t +_getbuffer(PyObject *obj, void **ptr) +{ + PyBufferProcs *buffer = obj->ob_type->tp_as_buffer; + + if (buffer == NULL || + PyUnicode_Check(obj) || + buffer->bf_getreadbuffer == NULL || + buffer->bf_getsegcount == NULL || + buffer->bf_getsegcount(obj, NULL) != 1) + { + *ptr = NULL; + return -1; + } + + return buffer->bf_getreadbuffer(obj, 0, ptr); +} + /* Direct API functions */ PyObject * @@ -140,56 +161,63 @@ PyBytes_Resize(PyObject *self, Py_ssize_t size) return 0; } -/* Functions stuffed into the type object */ - -static Py_ssize_t -bytes_length(PyBytesObject *self) -{ - return self->ob_size; -} - -static PyObject * -bytes_concat(PyBytesObject *self, PyObject *other) +PyObject * +PyBytes_Concat(PyObject *a, PyObject *b) { + Py_ssize_t asize, bsize, size; + void *aptr, *bptr; PyBytesObject *result; - Py_ssize_t mysize; - Py_ssize_t size; - if (!PyBytes_Check(other)) { - PyErr_Format(PyExc_TypeError, - "can't concat bytes to %.100s", other->ob_type->tp_name); + asize = _getbuffer(a, &aptr); + bsize = _getbuffer(b, &bptr); + if (asize < 0 || bsize < 0) { + PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s", + a->ob_type->tp_name, b->ob_type->tp_name); return NULL; } - mysize = self->ob_size; - size = mysize + ((PyBytesObject *)other)->ob_size; + size = asize + bsize; if (size < 0) return PyErr_NoMemory(); + result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, size); if (result != NULL) { - memcpy(result->ob_bytes, self->ob_bytes, self->ob_size); - memcpy(result->ob_bytes + self->ob_size, - ((PyBytesObject *)other)->ob_bytes, - ((PyBytesObject *)other)->ob_size); + memcpy(result->ob_bytes, aptr, asize); + memcpy(result->ob_bytes + asize, bptr, bsize); } return (PyObject *)result; } +/* Functions stuffed into the type object */ + +static Py_ssize_t +bytes_length(PyBytesObject *self) +{ + return self->ob_size; +} + +static PyObject * +bytes_concat(PyBytesObject *self, PyObject *other) +{ + return PyBytes_Concat((PyObject *)self, other); +} + static PyObject * bytes_iconcat(PyBytesObject *self, PyObject *other) { - Py_ssize_t mysize; + void *optr; Py_ssize_t osize; + Py_ssize_t mysize; Py_ssize_t size; - if (!PyBytes_Check(other)) { + osize = _getbuffer(other, &optr); + if (osize < 0) { PyErr_Format(PyExc_TypeError, "can't concat bytes to %.100s", other->ob_type->tp_name); return NULL; } mysize = self->ob_size; - osize = ((PyBytesObject *)other)->ob_size; size = mysize + osize; if (size < 0) return PyErr_NoMemory(); @@ -197,7 +225,7 @@ bytes_iconcat(PyBytesObject *self, PyObject *other) self->ob_size = size; else if (PyBytes_Resize((PyObject *)self, size) < 0) return NULL; - memcpy(self->ob_bytes + mysize, ((PyBytesObject *)other)->ob_bytes, osize); + memcpy(self->ob_bytes + mysize, optr, osize); Py_INCREF(self); return (PyObject *)self; } @@ -366,15 +394,10 @@ static int bytes_setslice(PyBytesObject *self, Py_ssize_t lo, Py_ssize_t hi, PyObject *values) { - int avail; - int needed; - char *bytes; + Py_ssize_t avail, needed; + void *bytes; - if (values == NULL) { - bytes = NULL; - needed = 0; - } - else if (values == (PyObject *)self || !PyBytes_Check(values)) { + if (values == (PyObject *)self) { /* Make a copy an call this function recursively */ int err; values = PyBytes_FromObject(values); @@ -384,10 +407,19 @@ bytes_setslice(PyBytesObject *self, Py_ssize_t lo, Py_ssize_t hi, Py_DECREF(values); return err; } + if (values == NULL) { + /* del b[lo:hi] */ + bytes = NULL; + needed = 0; + } else { - assert(PyBytes_Check(values)); - bytes = ((PyBytesObject *)values)->ob_bytes; - needed = ((PyBytesObject *)values)->ob_size; + needed = _getbuffer(values, &bytes); + if (needed < 0) { + PyErr_Format(PyExc_TypeError, + "can't set bytes slice from %.100s", + values->ob_type->tp_name); + return -1; + } } if (lo < 0) @@ -840,42 +872,26 @@ bytes_str(PyBytesObject *self) static PyObject * bytes_richcompare(PyObject *self, PyObject *other, int op) { - PyBufferProcs *self_buffer, *other_buffer; Py_ssize_t self_size, other_size; void *self_bytes, *other_bytes; PyObject *res; Py_ssize_t minsize; int cmp; - /* For backwards compatibility, bytes can be compared to anything that - supports the (binary) buffer API. Except Unicode. */ - - if (PyUnicode_Check(self) || PyUnicode_Check(other)) { - Py_INCREF(Py_NotImplemented); - return Py_NotImplemented; - } + /* Bytes can be compared to anything that supports the (binary) buffer + API. Except Unicode. */ - self_buffer = self->ob_type->tp_as_buffer; - if (self_buffer == NULL || - self_buffer->bf_getreadbuffer == NULL || - self_buffer->bf_getsegcount == NULL || - self_buffer->bf_getsegcount(self, NULL) != 1) - { + self_size = _getbuffer(self, &self_bytes); + if (self_size < 0) { Py_INCREF(Py_NotImplemented); return Py_NotImplemented; } - self_size = self_buffer->bf_getreadbuffer(self, 0, &self_bytes); - other_buffer = other->ob_type->tp_as_buffer; - if (other_buffer == NULL || - other_buffer->bf_getreadbuffer == NULL || - other_buffer->bf_getsegcount == NULL || - other_buffer->bf_getsegcount(self, NULL) != 1) - { + other_size = _getbuffer(other, &other_bytes); + if (other_size < 0) { Py_INCREF(Py_NotImplemented); return Py_NotImplemented; } - other_size = other_buffer->bf_getreadbuffer(other, 0, &other_bytes); if (self_size != other_size && (op == Py_EQ || op == Py_NE)) { /* Shortcut: if the lengths differ, the objects differ */ @@ -2435,6 +2451,93 @@ bytes_remove(PyBytesObject *self, PyObject *arg) Py_RETURN_NONE; } +/* XXX These two helpers could be optimized if argsize == 1 */ + +Py_ssize_t +lstrip_helper(unsigned char *myptr, Py_ssize_t mysize, + void *argptr, Py_ssize_t argsize) +{ + Py_ssize_t i = 0; + while (i < mysize && memchr(argptr, myptr[i], argsize)) + i++; + return i; +} + +Py_ssize_t +rstrip_helper(unsigned char *myptr, Py_ssize_t mysize, + void *argptr, Py_ssize_t argsize) +{ + Py_ssize_t i = mysize - 1; + while (i >= 0 && memchr(argptr, myptr[i], argsize)) + i--; + return i + 1; +} + +PyDoc_STRVAR(strip__doc__, +"B.strip(bytes) -> bytes\n\ +\n\ +Strip leading and trailing bytes contained in the argument."); +static PyObject * +bytes_strip(PyBytesObject *self, PyObject *arg) +{ + Py_ssize_t left, right, mysize, argsize; + void *myptr, *argptr; + if (arg == NULL || !PyBytes_Check(arg)) { + PyErr_SetString(PyExc_TypeError, "strip() requires a bytes argument"); + return NULL; + } + myptr = self->ob_bytes; + mysize = self->ob_size; + argptr = ((PyBytesObject *)arg)->ob_bytes; + argsize = ((PyBytesObject *)arg)->ob_size; + left = lstrip_helper(myptr, mysize, argptr, argsize); + right = rstrip_helper(myptr, mysize, argptr, argsize); + return PyBytes_FromStringAndSize(self->ob_bytes + left, right - left); +} + +PyDoc_STRVAR(lstrip__doc__, +"B.lstrip(bytes) -> bytes\n\ +\n\ +Strip leading bytes contained in the argument."); +static PyObject * +bytes_lstrip(PyBytesObject *self, PyObject *arg) +{ + Py_ssize_t left, right, mysize, argsize; + void *myptr, *argptr; + if (arg == NULL || !PyBytes_Check(arg)) { + PyErr_SetString(PyExc_TypeError, "strip() requires a bytes argument"); + return NULL; + } + myptr = self->ob_bytes; + mysize = self->ob_size; + argptr = ((PyBytesObject *)arg)->ob_bytes; + argsize = ((PyBytesObject *)arg)->ob_size; + left = lstrip_helper(myptr, mysize, argptr, argsize); + right = mysize; + return PyBytes_FromStringAndSize(self->ob_bytes + left, right - left); +} + +PyDoc_STRVAR(rstrip__doc__, +"B.rstrip(bytes) -> bytes\n\ +\n\ +Strip trailing bytes contained in the argument."); +static PyObject * +bytes_rstrip(PyBytesObject *self, PyObject *arg) +{ + Py_ssize_t left, right, mysize, argsize; + void *myptr, *argptr; + if (arg == NULL || !PyBytes_Check(arg)) { + PyErr_SetString(PyExc_TypeError, "strip() requires a bytes argument"); + return NULL; + } + myptr = self->ob_bytes; + mysize = self->ob_size; + argptr = ((PyBytesObject *)arg)->ob_bytes; + argsize = ((PyBytesObject *)arg)->ob_size; + left = 0; + right = rstrip_helper(myptr, mysize, argptr, argsize); + return PyBytes_FromStringAndSize(self->ob_bytes + left, right - left); +} PyDoc_STRVAR(decode_doc, "B.decode([encoding[,errors]]) -> unicode obect.\n\ @@ -2659,6 +2762,9 @@ bytes_methods[] = { {"reverse", (PyCFunction)bytes_reverse, METH_NOARGS, reverse__doc__}, {"pop", (PyCFunction)bytes_pop, METH_VARARGS, pop__doc__}, {"remove", (PyCFunction)bytes_remove, METH_O, remove__doc__}, + {"strip", (PyCFunction)bytes_strip, METH_O, strip__doc__}, + {"lstrip", (PyCFunction)bytes_lstrip, METH_O, lstrip__doc__}, + {"rstrip", (PyCFunction)bytes_rstrip, METH_O, rstrip__doc__}, {"decode", (PyCFunction)bytes_decode, METH_VARARGS, decode_doc}, {"__alloc__", (PyCFunction)bytes_alloc, METH_NOARGS, alloc_doc}, {"fromhex", (PyCFunction)bytes_fromhex, METH_VARARGS|METH_CLASS, diff --git a/Objects/object.c b/Objects/object.c index f4ae4f3..0bf0c60 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -1347,7 +1347,7 @@ merge_class_dict(PyObject* dict, PyObject* aclass) /* Helper for PyObject_Dir without arguments: returns the local scope. */ static PyObject * -_dir_locals() +_dir_locals(void) { PyObject *names; PyObject *locals = PyEval_GetLocals(); @@ -1892,4 +1892,3 @@ _PyTrash_destroy_chain(void) #ifdef __cplusplus } #endif - diff --git a/Objects/stringobject.c b/Objects/stringobject.c index 7212df9..94943f6 100644 --- a/Objects/stringobject.c +++ b/Objects/stringobject.c @@ -948,6 +948,8 @@ string_concat(register PyStringObject *a, register PyObject *bb) if (PyUnicode_Check(bb)) return PyUnicode_Concat((PyObject *)a, bb); #endif + if (PyBytes_Check(bb)) + return PyBytes_Concat((PyObject *)a, bb); PyErr_Format(PyExc_TypeError, "cannot concatenate 'str' and '%.200s' objects", bb->ob_type->tp_name); -- cgit v0.12