From 4dfe8a1131b551687659b9339eaee163a24f82f1 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Sat, 22 Apr 2006 23:28:04 +0000 Subject: Here is a bytes type. It's very minimal but it's a start. --- Include/Python.h | 1 + Include/bytesobject.h | 47 +++++++ Lib/test/test_bytes.py | 109 +++++++++++++++ Makefile.pre.in | 2 + Objects/bytesobject.c | 373 +++++++++++++++++++++++++++++++++++++++++++++++++ Objects/object.c | 3 + Python/bltinmodule.c | 1 + 7 files changed, 536 insertions(+) create mode 100644 Include/bytesobject.h create mode 100644 Lib/test/test_bytes.py create mode 100644 Objects/bytesobject.c diff --git a/Include/Python.h b/Include/Python.h index 161c838..fffc688 100644 --- a/Include/Python.h +++ b/Include/Python.h @@ -78,6 +78,7 @@ #include "pydebug.h" +#include "bytesobject.h" #include "unicodeobject.h" #include "intobject.h" #include "boolobject.h" diff --git a/Include/bytesobject.h b/Include/bytesobject.h new file mode 100644 index 0000000..9c11624 --- /dev/null +++ b/Include/bytesobject.h @@ -0,0 +1,47 @@ +/* Bytes object interface */ + +#ifndef Py_BYTESOBJECT_H +#define Py_BYTESOBJECT_H +#ifdef __cplusplus +extern "C" { +#endif + +#include + +/* Type PyBytesObject represents a mutable array of bytes. + * The Python API is that of a sequence; + * the bytes are mapped to ints in [0, 256). + * Bytes are not characters; they may be used to encode characters. + * The only way to go between bytes and str/unicode is via encoding + * and decoding. + * For the concenience of C programmers, the bytes type is considered + * to contain a char pointer, not an unsigned char pointer. + */ + +/* Object layout */ +typedef struct { + PyObject_VAR_HEAD + char *ob_sval; +} PyBytesObject; + +/* Type object */ +PyAPI_DATA(PyTypeObject) PyBytes_Type; + +/* Type check macros */ +#define PyBytes_Check(self) PyObject_TypeCheck(self, &PyBytes_Type) +#define PyBytes_CheckExact(self) ((self)->ob_type == &PyBytes_Type) + +/* Direct API functions */ +PyAPI_FUNC(PyObject *) PyBytes_FromStringAndSize(const char *, Py_ssize_t); +PyAPI_FUNC(Py_ssize_t) PyBytes_Size(PyObject *); +PyAPI_FUNC(char *) PyBytes_AsString(PyObject *); +PyAPI_FUNC(int) PyBytes_Resize(PyObject *, Py_ssize_t); + +/* Macros, trading safety for speed */ +#define PyBytes_AS_STRING(self) (((PyBytesObject *)(self))->ob_sval) +#define PyBytes_GET_SIZE(self) (((PyBytesObject *)(self))->ob_size) + +#ifdef __cplusplus +} +#endif +#endif /* !Py_BYTESOBJECT_H */ diff --git a/Lib/test/test_bytes.py b/Lib/test/test_bytes.py new file mode 100644 index 0000000..ce224c2 --- /dev/null +++ b/Lib/test/test_bytes.py @@ -0,0 +1,109 @@ +"""Unit tests for the bytes type.""" + +import sys +import unittest +import test.test_support + + +class BytesTest(unittest.TestCase): + + def test_basics(self): + b = bytes() + self.assertEqual(type(b), bytes) + self.assertEqual(b.__class__, bytes) + + def test_empty_sequence(self): + b = bytes() + self.assertEqual(len(b), 0) + self.assertRaises(IndexError, lambda: b[0]) + self.assertRaises(IndexError, lambda: b[1]) + self.assertRaises(IndexError, lambda: b[sys.maxint]) + self.assertRaises(IndexError, lambda: b[sys.maxint+1]) + self.assertRaises(IndexError, lambda: b[10**100]) + self.assertRaises(IndexError, lambda: b[-1]) + self.assertRaises(IndexError, lambda: b[-2]) + self.assertRaises(IndexError, lambda: b[-sys.maxint]) + self.assertRaises(IndexError, lambda: b[-sys.maxint-1]) + self.assertRaises(IndexError, lambda: b[-sys.maxint-2]) + self.assertRaises(IndexError, lambda: b[-10**100]) + + def test_from_list(self): + ints = list(range(256)) + b = bytes(i for i in ints) + self.assertEqual(len(b), 256) + self.assertEqual(list(b), ints) + + def test_from_index(self): + class C: + def __init__(self, i=0): + self.i = i + def __index__(self): + return self.i + b = bytes([C(), C(1), C(254), C(255)]) + self.assertEqual(list(b), [0, 1, 254, 255]) + self.assertRaises(ValueError, lambda: bytes([C(-1)])) + self.assertRaises(ValueError, lambda: bytes([C(256)])) + + def test_constructor_type_errors(self): + class C: + pass + self.assertRaises(TypeError, lambda: bytes(["0"])) + self.assertRaises(TypeError, lambda: bytes([0.0])) + self.assertRaises(TypeError, lambda: bytes([None])) + self.assertRaises(TypeError, lambda: bytes([C()])) + + def test_constructor_value_errors(self): + self.assertRaises(ValueError, lambda: bytes([-1])) + self.assertRaises(ValueError, lambda: bytes([-sys.maxint])) + self.assertRaises(ValueError, lambda: bytes([-sys.maxint-1])) + self.assertRaises(ValueError, lambda: bytes([-sys.maxint-2])) + self.assertRaises(ValueError, lambda: bytes([-10**100])) + self.assertRaises(ValueError, lambda: bytes([256])) + self.assertRaises(ValueError, lambda: bytes([257])) + self.assertRaises(ValueError, lambda: bytes([sys.maxint])) + self.assertRaises(ValueError, lambda: bytes([sys.maxint+1])) + self.assertRaises(ValueError, lambda: bytes([10**100])) + + def test_repr(self): + self.assertEqual(repr(bytes()), "bytes()") + self.assertEqual(repr(bytes([0])), "bytes([0x00])") + self.assertEqual(repr(bytes([0, 1, 254, 255])), "bytes([0x00, 0x01, 0xfe, 0xff])") + + def test_compare(self): + b1 = bytes([1, 2, 3]) + b2 = bytes([1, 2, 3]) + b3 = bytes([1, 3]) + + self.failUnless(b1 == b2) + self.failUnless(b2 != b3) + self.failUnless(b1 <= b2) + self.failUnless(b1 <= b3) + self.failUnless(b1 < b3) + self.failUnless(b1 >= b2) + self.failUnless(b3 >= b2) + self.failUnless(b3 > b2) + + self.failIf(b1 != b2) + self.failIf(b2 == b3) + self.failIf(b1 > b2) + self.failIf(b1 > b3) + self.failIf(b1 >= b3) + self.failIf(b1 < b2) + self.failIf(b3 < b2) + self.failIf(b3 <= b2) + + def test_nohash(self): + self.assertRaises(TypeError, hash, bytes()) + + def test_doc(self): + self.failUnless(bytes.__doc__ != None) + self.failUnless(bytes.__doc__.startswith("bytes(")) + + +def test_main(): + test.test_support.run_unittest(XrangeTest) + + +if __name__ == "__main__": + ##test_main() + unittest.main() diff --git a/Makefile.pre.in b/Makefile.pre.in index f8a7481..c3af94c 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -278,6 +278,7 @@ OBJECT_OBJS= \ Objects/abstract.o \ Objects/boolobject.o \ Objects/bufferobject.o \ + Objects/bytesobject.o \ Objects/cellobject.o \ Objects/classobject.o \ Objects/cobject.o \ @@ -494,6 +495,7 @@ PYTHON_HEADERS= \ Include/abstract.h \ Include/boolobject.h \ Include/bufferobject.h \ + Include/bytesobject.h \ Include/ceval.h \ Include/classobject.h \ Include/cobject.h \ diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c new file mode 100644 index 0000000..8fc089b --- /dev/null +++ b/Objects/bytesobject.c @@ -0,0 +1,373 @@ +/* Bytes object implementation */ + +/* XXX TO DO: optimizations */ + +#define PY_SSIZE_T_CLEAN +#include "Python.h" + +/* Direct API functions */ + +PyObject * +PyBytes_FromStringAndSize(const char *sval, Py_ssize_t size) +{ + PyBytesObject *new; + + if (size != 0) { + assert(sval != NULL); + assert(size > 0); + } + + new = PyObject_New(PyBytesObject, &PyBytes_Type); + if (new == NULL) + return NULL; + + if (size > 0) { + new->ob_sval = PyMem_Malloc(size); + if (new->ob_sval == NULL) { + Py_DECREF(new); + return NULL; + } + memcpy(new->ob_sval, sval, size); + new->ob_size = size; + } + + return (PyObject *)new; +} + +Py_ssize_t +PyBytes_Size(PyObject *self) +{ + assert(self != NULL); + assert(PyBytes_Check(self)); + + return ((PyBytesObject *)self)->ob_size; +} + +char * +PyBytes_AsString(PyObject *self) +{ + assert(self != NULL); + assert(PyBytes_Check(self)); + + return ((PyBytesObject *)self)->ob_sval; +} + +int +PyBytes_Resize(PyObject *self, Py_ssize_t size) +{ + void *sval; + + assert(self != NULL); + assert(PyBytes_Check(self)); + assert(size >= 0); + + sval = PyMem_Realloc(((PyBytesObject *)self)->ob_sval, size); + if (sval == NULL) { + PyErr_NoMemory(); + return -1; + } + + ((PyBytesObject *)self)->ob_sval = sval; + ((PyBytesObject *)self)->ob_size = size; + + return 0; +} + +/* Functions stuffed into the type object */ + +static Py_ssize_t +bytes_length(PyBytesObject *self) +{ + return self->ob_size; +} + +static PyObject * +bytes_getitem(PyBytesObject *self, Py_ssize_t i) +{ + if (i < 0) + i += self->ob_size; + if (i < 0 || i >= self->ob_size) { + PyErr_SetString(PyExc_IndexError, "bytes index out of range"); + return NULL; + } + return PyInt_FromLong((unsigned char)(self->ob_sval[i])); +} + +static long +bytes_nohash(PyObject *self) +{ + PyErr_SetString(PyExc_TypeError, "bytes objects are unhashable"); + return -1; +} + +static int +bytes_init(PyBytesObject *self, PyObject *args, PyObject *kwds) +{ + static char *kwlist[] = {"sequence", 0}; + PyObject *arg = NULL; + PyObject *it; /* iter(arg) */ + PyObject *(*iternext)(PyObject *); + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:bytes", kwlist, &arg)) + return -1; + + /* Verify list invariants established by PyType_GenericAlloc() */ + if (self->ob_size != 0) { + assert(self->ob_sval != NULL); + assert(self->ob_size > 0); + } + + /* Empty previous contents */ + if (PyBytes_Resize((PyObject *)self, 0) < 0) + return -1; + + /* Quick check if we're done */ + if (arg == 0) + return 0; + + /* XXX Optimize this if the arguments is a list, tuple, or bytes */ + + /* Get the iterator */ + it = PyObject_GetIter(arg); + if (it == NULL) + return 0; + iternext = *it->ob_type->tp_iternext; + + /* Run the iterator to exhaustion */ + for (;;) { + PyObject *item; + Py_ssize_t value; + + /* Get the next item */ + item = iternext(it); + if (item == NULL) { + if (PyErr_Occurred()) { + if (!PyErr_ExceptionMatches(PyExc_StopIteration)) + goto error; + PyErr_Clear(); + } + break; + } + + /* Interpret it as an int (__index__) */ + value = PyNumber_Index(item); + if (value == -1 && PyErr_Occurred()) + goto error; + + /* Range check */ + if (value < 0 || value >= 256) { + PyErr_SetString(PyExc_ValueError, "bytes must be in range(0, 256)"); + goto error; + } + + /* Append the byte */ + /* XXX Speed this up */ + if (PyBytes_Resize((PyObject *)self, self->ob_size+1) < 0) + goto error; + self->ob_sval[self->ob_size-1] = value; + } + + /* Clean up and return success */ + Py_DECREF(it); + return 0; + + error: + /* Error handling when it != NULL */ + Py_DECREF(it); + return -1; +} + +static PyObject * +bytes_repr(PyBytesObject *self) +{ + PyObject *list; + PyObject *str; + PyObject *result; + int err; + int i; + + if (self->ob_size == 0) + return PyString_FromString("bytes()"); + + list = PyList_New(0); + if (list == NULL) + return NULL; + + str = PyString_FromString("bytes(["); + if (str == NULL) + goto error; + + err = PyList_Append(list, str); + Py_DECREF(str); + if (err < 0) + goto error; + + for (i = 0; i < self->ob_size; i++) { + char buffer[20]; + sprintf(buffer, ", 0x%02x", (unsigned char) (self->ob_sval[i])); + str = PyString_FromString((i == 0) ? buffer+2 : buffer); + if (str == NULL) + goto error; + err = PyList_Append(list, str); + Py_DECREF(str); + if (err < 0) + goto error; + } + + str = PyString_FromString("])"); + if (str == NULL) + goto error; + + err = PyList_Append(list, str); + Py_DECREF(str); + if (err < 0) + goto error; + + str = PyString_FromString(""); + if (str == NULL) + goto error; + + result = _PyString_Join(str, list); + Py_DECREF(str); + Py_DECREF(list); + return result; + + error: + /* Error handling when list != NULL */ + Py_DECREF(list); + return NULL; +} + +static PyObject * +bytes_richcompare(PyBytesObject *self, PyBytesObject *other, int op) +{ + PyObject *res; + int minsize; + int cmp; + + if (!PyBytes_Check(self) || !PyBytes_Check(other)) { + Py_INCREF(Py_NotImplemented); + return Py_NotImplemented; + } + + if (self->ob_size != other->ob_size && (op == Py_EQ || op == Py_NE)) { + /* Shortcut: if the lengths differ, the objects differ */ + cmp = (op == Py_NE); + } + else { + minsize = self->ob_size; + if (other->ob_size < minsize) + minsize = other->ob_size; + + cmp = memcmp(self->ob_sval, other->ob_sval, minsize); + /* In ISO C, memcmp() guarantees to use unsigned bytes! */ + + if (cmp == 0) { + if (self->ob_size < other->ob_size) + cmp = -1; + else if (self->ob_size > other->ob_size) + cmp = 1; + } + + switch (op) { + case Py_LT: cmp = cmp < 0; break; + case Py_LE: cmp = cmp <= 0; break; + case Py_EQ: cmp = cmp == 0; break; + case Py_NE: cmp = cmp != 0; break; + case Py_GT: cmp = cmp > 0; break; + case Py_GE: cmp = cmp >= 0; break; + } + } + + res = cmp ? Py_True : Py_False; + Py_INCREF(res); + return res; +} + +static void +bytes_dealloc(PyBytesObject *self) +{ + if (self->ob_sval != 0) { + PyMem_Free(self->ob_sval); + } + self->ob_type->tp_free((PyObject *)self); +} + +static PySequenceMethods bytes_as_sequence = { + (lenfunc)bytes_length, /*sq_length*/ + (binaryfunc)0, /*sq_concat*/ + (ssizeargfunc)0, /*sq_repeat*/ + (ssizeargfunc)bytes_getitem, /*sq_item*/ + (ssizessizeargfunc)0, /*sq_slice*/ + 0, /*sq_ass_item*/ + 0, /*sq_ass_slice*/ + (objobjproc)0, /*sq_contains*/ +}; + +static PyMappingMethods bytes_as_mapping = { + (lenfunc)bytes_length, + (binaryfunc)0, + 0, +}; + +static PyBufferProcs bytes_as_buffer = { +/* + (readbufferproc)bytes_buffer_getreadbuf, + (writebufferproc)bytes_buffer_getwritebuf, + (segcountproc)bytes_buffer_getsegcount, + (charbufferproc)bytes_buffer_getcharbuf, +*/ +}; + +static PyMethodDef +bytes_methods[] = { + {NULL, NULL} +}; + +PyDoc_STRVAR(bytes_doc, +"bytes([iterable]) -> new array of bytes.\n\ +\n\ +If an argument is given it must be an iterable yielding ints in range(256)."); + +PyTypeObject PyBytes_Type = { + PyObject_HEAD_INIT(&PyType_Type) + 0, + "bytes", + sizeof(PyBytesObject), + 0, + (destructor)bytes_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + (reprfunc)bytes_repr, /* tp_repr */ + 0, /* tp_as_number */ + &bytes_as_sequence, /* tp_as_sequence */ + &bytes_as_mapping, /* tp_as_mapping */ + bytes_nohash, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + PyObject_GenericGetAttr, /* tp_getattro */ + 0, /* tp_setattro */ + &bytes_as_buffer, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES, /* tp_flags */ /* bytes is 'final' or 'sealed' */ + bytes_doc, /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + (richcmpfunc)bytes_richcompare, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + bytes_methods, /* tp_methods */ + 0, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + (initproc)bytes_init, /* tp_init */ + PyType_GenericAlloc, /* tp_alloc */ + PyType_GenericNew, /* tp_new */ + PyObject_Del, /* tp_free */ +}; diff --git a/Objects/object.c b/Objects/object.c index a75c14e..9a451d2 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -1881,6 +1881,9 @@ _Py_ReadyTypes(void) if (PyType_Ready(&PyBool_Type) < 0) Py_FatalError("Can't initialize 'bool'"); + if (PyType_Ready(&PyBytes_Type) < 0) + Py_FatalError("Can't initialize 'bytes'"); + if (PyType_Ready(&PyString_Type) < 0) Py_FatalError("Can't initialize 'str'"); diff --git a/Python/bltinmodule.c b/Python/bltinmodule.c index 914e0d1..54e8fe8 100644 --- a/Python/bltinmodule.c +++ b/Python/bltinmodule.c @@ -2139,6 +2139,7 @@ _PyBuiltin_Init(void) SETBUILTIN("basestring", &PyBaseString_Type); SETBUILTIN("bool", &PyBool_Type); SETBUILTIN("buffer", &PyBuffer_Type); + SETBUILTIN("bytes", &PyBytes_Type); SETBUILTIN("classmethod", &PyClassMethod_Type); #ifndef WITHOUT_COMPLEX SETBUILTIN("complex", &PyComplex_Type); -- cgit v0.12