summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Include/Python.h1
-rw-r--r--Include/bytesobject.h47
-rw-r--r--Lib/test/test_bytes.py109
-rw-r--r--Makefile.pre.in2
-rw-r--r--Objects/bytesobject.c373
-rw-r--r--Objects/object.c3
-rw-r--r--Python/bltinmodule.c1
7 files changed, 536 insertions, 0 deletions
diff --git a/Include/Python.h b/Include/Python.h
index 161c838..fffc688 100644
--- a/Include/Python.h
+++ b/Include/Python.h
@@ -78,6 +78,7 @@
#include "pydebug.h"
+#include "bytesobject.h"
#include "unicodeobject.h"
#include "intobject.h"
#include "boolobject.h"
diff --git a/Include/bytesobject.h b/Include/bytesobject.h
new file mode 100644
index 0000000..9c11624
--- /dev/null
+++ b/Include/bytesobject.h
@@ -0,0 +1,47 @@
+/* Bytes object interface */
+
+#ifndef Py_BYTESOBJECT_H
+#define Py_BYTESOBJECT_H
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdarg.h>
+
+/* Type PyBytesObject represents a mutable array of bytes.
+ * The Python API is that of a sequence;
+ * the bytes are mapped to ints in [0, 256).
+ * Bytes are not characters; they may be used to encode characters.
+ * The only way to go between bytes and str/unicode is via encoding
+ * and decoding.
+ * For the concenience of C programmers, the bytes type is considered
+ * to contain a char pointer, not an unsigned char pointer.
+ */
+
+/* Object layout */
+typedef struct {
+ PyObject_VAR_HEAD
+ char *ob_sval;
+} PyBytesObject;
+
+/* Type object */
+PyAPI_DATA(PyTypeObject) PyBytes_Type;
+
+/* Type check macros */
+#define PyBytes_Check(self) PyObject_TypeCheck(self, &PyBytes_Type)
+#define PyBytes_CheckExact(self) ((self)->ob_type == &PyBytes_Type)
+
+/* Direct API functions */
+PyAPI_FUNC(PyObject *) PyBytes_FromStringAndSize(const char *, Py_ssize_t);
+PyAPI_FUNC(Py_ssize_t) PyBytes_Size(PyObject *);
+PyAPI_FUNC(char *) PyBytes_AsString(PyObject *);
+PyAPI_FUNC(int) PyBytes_Resize(PyObject *, Py_ssize_t);
+
+/* Macros, trading safety for speed */
+#define PyBytes_AS_STRING(self) (((PyBytesObject *)(self))->ob_sval)
+#define PyBytes_GET_SIZE(self) (((PyBytesObject *)(self))->ob_size)
+
+#ifdef __cplusplus
+}
+#endif
+#endif /* !Py_BYTESOBJECT_H */
diff --git a/Lib/test/test_bytes.py b/Lib/test/test_bytes.py
new file mode 100644
index 0000000..ce224c2
--- /dev/null
+++ b/Lib/test/test_bytes.py
@@ -0,0 +1,109 @@
+"""Unit tests for the bytes type."""
+
+import sys
+import unittest
+import test.test_support
+
+
+class BytesTest(unittest.TestCase):
+
+ def test_basics(self):
+ b = bytes()
+ self.assertEqual(type(b), bytes)
+ self.assertEqual(b.__class__, bytes)
+
+ def test_empty_sequence(self):
+ b = bytes()
+ self.assertEqual(len(b), 0)
+ self.assertRaises(IndexError, lambda: b[0])
+ self.assertRaises(IndexError, lambda: b[1])
+ self.assertRaises(IndexError, lambda: b[sys.maxint])
+ self.assertRaises(IndexError, lambda: b[sys.maxint+1])
+ self.assertRaises(IndexError, lambda: b[10**100])
+ self.assertRaises(IndexError, lambda: b[-1])
+ self.assertRaises(IndexError, lambda: b[-2])
+ self.assertRaises(IndexError, lambda: b[-sys.maxint])
+ self.assertRaises(IndexError, lambda: b[-sys.maxint-1])
+ self.assertRaises(IndexError, lambda: b[-sys.maxint-2])
+ self.assertRaises(IndexError, lambda: b[-10**100])
+
+ def test_from_list(self):
+ ints = list(range(256))
+ b = bytes(i for i in ints)
+ self.assertEqual(len(b), 256)
+ self.assertEqual(list(b), ints)
+
+ def test_from_index(self):
+ class C:
+ def __init__(self, i=0):
+ self.i = i
+ def __index__(self):
+ return self.i
+ b = bytes([C(), C(1), C(254), C(255)])
+ self.assertEqual(list(b), [0, 1, 254, 255])
+ self.assertRaises(ValueError, lambda: bytes([C(-1)]))
+ self.assertRaises(ValueError, lambda: bytes([C(256)]))
+
+ def test_constructor_type_errors(self):
+ class C:
+ pass
+ self.assertRaises(TypeError, lambda: bytes(["0"]))
+ self.assertRaises(TypeError, lambda: bytes([0.0]))
+ self.assertRaises(TypeError, lambda: bytes([None]))
+ self.assertRaises(TypeError, lambda: bytes([C()]))
+
+ def test_constructor_value_errors(self):
+ self.assertRaises(ValueError, lambda: bytes([-1]))
+ self.assertRaises(ValueError, lambda: bytes([-sys.maxint]))
+ self.assertRaises(ValueError, lambda: bytes([-sys.maxint-1]))
+ self.assertRaises(ValueError, lambda: bytes([-sys.maxint-2]))
+ self.assertRaises(ValueError, lambda: bytes([-10**100]))
+ self.assertRaises(ValueError, lambda: bytes([256]))
+ self.assertRaises(ValueError, lambda: bytes([257]))
+ self.assertRaises(ValueError, lambda: bytes([sys.maxint]))
+ self.assertRaises(ValueError, lambda: bytes([sys.maxint+1]))
+ self.assertRaises(ValueError, lambda: bytes([10**100]))
+
+ def test_repr(self):
+ self.assertEqual(repr(bytes()), "bytes()")
+ self.assertEqual(repr(bytes([0])), "bytes([0x00])")
+ self.assertEqual(repr(bytes([0, 1, 254, 255])), "bytes([0x00, 0x01, 0xfe, 0xff])")
+
+ def test_compare(self):
+ b1 = bytes([1, 2, 3])
+ b2 = bytes([1, 2, 3])
+ b3 = bytes([1, 3])
+
+ self.failUnless(b1 == b2)
+ self.failUnless(b2 != b3)
+ self.failUnless(b1 <= b2)
+ self.failUnless(b1 <= b3)
+ self.failUnless(b1 < b3)
+ self.failUnless(b1 >= b2)
+ self.failUnless(b3 >= b2)
+ self.failUnless(b3 > b2)
+
+ self.failIf(b1 != b2)
+ self.failIf(b2 == b3)
+ self.failIf(b1 > b2)
+ self.failIf(b1 > b3)
+ self.failIf(b1 >= b3)
+ self.failIf(b1 < b2)
+ self.failIf(b3 < b2)
+ self.failIf(b3 <= b2)
+
+ def test_nohash(self):
+ self.assertRaises(TypeError, hash, bytes())
+
+ def test_doc(self):
+ self.failUnless(bytes.__doc__ != None)
+ self.failUnless(bytes.__doc__.startswith("bytes("))
+
+
+def test_main():
+ test.test_support.run_unittest(XrangeTest)
+
+
+if __name__ == "__main__":
+ ##test_main()
+ unittest.main()
diff --git a/Makefile.pre.in b/Makefile.pre.in
index f8a7481..c3af94c 100644
--- a/Makefile.pre.in
+++ b/Makefile.pre.in
@@ -278,6 +278,7 @@ OBJECT_OBJS= \
Objects/abstract.o \
Objects/boolobject.o \
Objects/bufferobject.o \
+ Objects/bytesobject.o \
Objects/cellobject.o \
Objects/classobject.o \
Objects/cobject.o \
@@ -494,6 +495,7 @@ PYTHON_HEADERS= \
Include/abstract.h \
Include/boolobject.h \
Include/bufferobject.h \
+ Include/bytesobject.h \
Include/ceval.h \
Include/classobject.h \
Include/cobject.h \
diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c
new file mode 100644
index 0000000..8fc089b
--- /dev/null
+++ b/Objects/bytesobject.c
@@ -0,0 +1,373 @@
+/* Bytes object implementation */
+
+/* XXX TO DO: optimizations */
+
+#define PY_SSIZE_T_CLEAN
+#include "Python.h"
+
+/* Direct API functions */
+
+PyObject *
+PyBytes_FromStringAndSize(const char *sval, Py_ssize_t size)
+{
+ PyBytesObject *new;
+
+ if (size != 0) {
+ assert(sval != NULL);
+ assert(size > 0);
+ }
+
+ new = PyObject_New(PyBytesObject, &PyBytes_Type);
+ if (new == NULL)
+ return NULL;
+
+ if (size > 0) {
+ new->ob_sval = PyMem_Malloc(size);
+ if (new->ob_sval == NULL) {
+ Py_DECREF(new);
+ return NULL;
+ }
+ memcpy(new->ob_sval, sval, size);
+ new->ob_size = size;
+ }
+
+ return (PyObject *)new;
+}
+
+Py_ssize_t
+PyBytes_Size(PyObject *self)
+{
+ assert(self != NULL);
+ assert(PyBytes_Check(self));
+
+ return ((PyBytesObject *)self)->ob_size;
+}
+
+char *
+PyBytes_AsString(PyObject *self)
+{
+ assert(self != NULL);
+ assert(PyBytes_Check(self));
+
+ return ((PyBytesObject *)self)->ob_sval;
+}
+
+int
+PyBytes_Resize(PyObject *self, Py_ssize_t size)
+{
+ void *sval;
+
+ assert(self != NULL);
+ assert(PyBytes_Check(self));
+ assert(size >= 0);
+
+ sval = PyMem_Realloc(((PyBytesObject *)self)->ob_sval, size);
+ if (sval == NULL) {
+ PyErr_NoMemory();
+ return -1;
+ }
+
+ ((PyBytesObject *)self)->ob_sval = sval;
+ ((PyBytesObject *)self)->ob_size = size;
+
+ return 0;
+}
+
+/* Functions stuffed into the type object */
+
+static Py_ssize_t
+bytes_length(PyBytesObject *self)
+{
+ return self->ob_size;
+}
+
+static PyObject *
+bytes_getitem(PyBytesObject *self, Py_ssize_t i)
+{
+ if (i < 0)
+ i += self->ob_size;
+ if (i < 0 || i >= self->ob_size) {
+ PyErr_SetString(PyExc_IndexError, "bytes index out of range");
+ return NULL;
+ }
+ return PyInt_FromLong((unsigned char)(self->ob_sval[i]));
+}
+
+static long
+bytes_nohash(PyObject *self)
+{
+ PyErr_SetString(PyExc_TypeError, "bytes objects are unhashable");
+ return -1;
+}
+
+static int
+bytes_init(PyBytesObject *self, PyObject *args, PyObject *kwds)
+{
+ static char *kwlist[] = {"sequence", 0};
+ PyObject *arg = NULL;
+ PyObject *it; /* iter(arg) */
+ PyObject *(*iternext)(PyObject *);
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:bytes", kwlist, &arg))
+ return -1;
+
+ /* Verify list invariants established by PyType_GenericAlloc() */
+ if (self->ob_size != 0) {
+ assert(self->ob_sval != NULL);
+ assert(self->ob_size > 0);
+ }
+
+ /* Empty previous contents */
+ if (PyBytes_Resize((PyObject *)self, 0) < 0)
+ return -1;
+
+ /* Quick check if we're done */
+ if (arg == 0)
+ return 0;
+
+ /* XXX Optimize this if the arguments is a list, tuple, or bytes */
+
+ /* Get the iterator */
+ it = PyObject_GetIter(arg);
+ if (it == NULL)
+ return 0;
+ iternext = *it->ob_type->tp_iternext;
+
+ /* Run the iterator to exhaustion */
+ for (;;) {
+ PyObject *item;
+ Py_ssize_t value;
+
+ /* Get the next item */
+ item = iternext(it);
+ if (item == NULL) {
+ if (PyErr_Occurred()) {
+ if (!PyErr_ExceptionMatches(PyExc_StopIteration))
+ goto error;
+ PyErr_Clear();
+ }
+ break;
+ }
+
+ /* Interpret it as an int (__index__) */
+ value = PyNumber_Index(item);
+ if (value == -1 && PyErr_Occurred())
+ goto error;
+
+ /* Range check */
+ if (value < 0 || value >= 256) {
+ PyErr_SetString(PyExc_ValueError, "bytes must be in range(0, 256)");
+ goto error;
+ }
+
+ /* Append the byte */
+ /* XXX Speed this up */
+ if (PyBytes_Resize((PyObject *)self, self->ob_size+1) < 0)
+ goto error;
+ self->ob_sval[self->ob_size-1] = value;
+ }
+
+ /* Clean up and return success */
+ Py_DECREF(it);
+ return 0;
+
+ error:
+ /* Error handling when it != NULL */
+ Py_DECREF(it);
+ return -1;
+}
+
+static PyObject *
+bytes_repr(PyBytesObject *self)
+{
+ PyObject *list;
+ PyObject *str;
+ PyObject *result;
+ int err;
+ int i;
+
+ if (self->ob_size == 0)
+ return PyString_FromString("bytes()");
+
+ list = PyList_New(0);
+ if (list == NULL)
+ return NULL;
+
+ str = PyString_FromString("bytes([");
+ if (str == NULL)
+ goto error;
+
+ err = PyList_Append(list, str);
+ Py_DECREF(str);
+ if (err < 0)
+ goto error;
+
+ for (i = 0; i < self->ob_size; i++) {
+ char buffer[20];
+ sprintf(buffer, ", 0x%02x", (unsigned char) (self->ob_sval[i]));
+ str = PyString_FromString((i == 0) ? buffer+2 : buffer);
+ if (str == NULL)
+ goto error;
+ err = PyList_Append(list, str);
+ Py_DECREF(str);
+ if (err < 0)
+ goto error;
+ }
+
+ str = PyString_FromString("])");
+ if (str == NULL)
+ goto error;
+
+ err = PyList_Append(list, str);
+ Py_DECREF(str);
+ if (err < 0)
+ goto error;
+
+ str = PyString_FromString("");
+ if (str == NULL)
+ goto error;
+
+ result = _PyString_Join(str, list);
+ Py_DECREF(str);
+ Py_DECREF(list);
+ return result;
+
+ error:
+ /* Error handling when list != NULL */
+ Py_DECREF(list);
+ return NULL;
+}
+
+static PyObject *
+bytes_richcompare(PyBytesObject *self, PyBytesObject *other, int op)
+{
+ PyObject *res;
+ int minsize;
+ int cmp;
+
+ if (!PyBytes_Check(self) || !PyBytes_Check(other)) {
+ Py_INCREF(Py_NotImplemented);
+ return Py_NotImplemented;
+ }
+
+ if (self->ob_size != other->ob_size && (op == Py_EQ || op == Py_NE)) {
+ /* Shortcut: if the lengths differ, the objects differ */
+ cmp = (op == Py_NE);
+ }
+ else {
+ minsize = self->ob_size;
+ if (other->ob_size < minsize)
+ minsize = other->ob_size;
+
+ cmp = memcmp(self->ob_sval, other->ob_sval, minsize);
+ /* In ISO C, memcmp() guarantees to use unsigned bytes! */
+
+ if (cmp == 0) {
+ if (self->ob_size < other->ob_size)
+ cmp = -1;
+ else if (self->ob_size > other->ob_size)
+ cmp = 1;
+ }
+
+ switch (op) {
+ case Py_LT: cmp = cmp < 0; break;
+ case Py_LE: cmp = cmp <= 0; break;
+ case Py_EQ: cmp = cmp == 0; break;
+ case Py_NE: cmp = cmp != 0; break;
+ case Py_GT: cmp = cmp > 0; break;
+ case Py_GE: cmp = cmp >= 0; break;
+ }
+ }
+
+ res = cmp ? Py_True : Py_False;
+ Py_INCREF(res);
+ return res;
+}
+
+static void
+bytes_dealloc(PyBytesObject *self)
+{
+ if (self->ob_sval != 0) {
+ PyMem_Free(self->ob_sval);
+ }
+ self->ob_type->tp_free((PyObject *)self);
+}
+
+static PySequenceMethods bytes_as_sequence = {
+ (lenfunc)bytes_length, /*sq_length*/
+ (binaryfunc)0, /*sq_concat*/
+ (ssizeargfunc)0, /*sq_repeat*/
+ (ssizeargfunc)bytes_getitem, /*sq_item*/
+ (ssizessizeargfunc)0, /*sq_slice*/
+ 0, /*sq_ass_item*/
+ 0, /*sq_ass_slice*/
+ (objobjproc)0, /*sq_contains*/
+};
+
+static PyMappingMethods bytes_as_mapping = {
+ (lenfunc)bytes_length,
+ (binaryfunc)0,
+ 0,
+};
+
+static PyBufferProcs bytes_as_buffer = {
+/*
+ (readbufferproc)bytes_buffer_getreadbuf,
+ (writebufferproc)bytes_buffer_getwritebuf,
+ (segcountproc)bytes_buffer_getsegcount,
+ (charbufferproc)bytes_buffer_getcharbuf,
+*/
+};
+
+static PyMethodDef
+bytes_methods[] = {
+ {NULL, NULL}
+};
+
+PyDoc_STRVAR(bytes_doc,
+"bytes([iterable]) -> new array of bytes.\n\
+\n\
+If an argument is given it must be an iterable yielding ints in range(256).");
+
+PyTypeObject PyBytes_Type = {
+ PyObject_HEAD_INIT(&PyType_Type)
+ 0,
+ "bytes",
+ sizeof(PyBytesObject),
+ 0,
+ (destructor)bytes_dealloc, /* tp_dealloc */
+ 0, /* tp_print */
+ 0, /* tp_getattr */
+ 0, /* tp_setattr */
+ 0, /* tp_compare */
+ (reprfunc)bytes_repr, /* tp_repr */
+ 0, /* tp_as_number */
+ &bytes_as_sequence, /* tp_as_sequence */
+ &bytes_as_mapping, /* tp_as_mapping */
+ bytes_nohash, /* tp_hash */
+ 0, /* tp_call */
+ 0, /* tp_str */
+ PyObject_GenericGetAttr, /* tp_getattro */
+ 0, /* tp_setattro */
+ &bytes_as_buffer, /* tp_as_buffer */
+ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES, /* tp_flags */ /* bytes is 'final' or 'sealed' */
+ bytes_doc, /* tp_doc */
+ 0, /* tp_traverse */
+ 0, /* tp_clear */
+ (richcmpfunc)bytes_richcompare, /* tp_richcompare */
+ 0, /* tp_weaklistoffset */
+ 0, /* tp_iter */
+ 0, /* tp_iternext */
+ bytes_methods, /* tp_methods */
+ 0, /* tp_members */
+ 0, /* tp_getset */
+ 0, /* tp_base */
+ 0, /* tp_dict */
+ 0, /* tp_descr_get */
+ 0, /* tp_descr_set */
+ 0, /* tp_dictoffset */
+ (initproc)bytes_init, /* tp_init */
+ PyType_GenericAlloc, /* tp_alloc */
+ PyType_GenericNew, /* tp_new */
+ PyObject_Del, /* tp_free */
+};
diff --git a/Objects/object.c b/Objects/object.c
index a75c14e..9a451d2 100644
--- a/Objects/object.c
+++ b/Objects/object.c
@@ -1881,6 +1881,9 @@ _Py_ReadyTypes(void)
if (PyType_Ready(&PyBool_Type) < 0)
Py_FatalError("Can't initialize 'bool'");
+ if (PyType_Ready(&PyBytes_Type) < 0)
+ Py_FatalError("Can't initialize 'bytes'");
+
if (PyType_Ready(&PyString_Type) < 0)
Py_FatalError("Can't initialize 'str'");
diff --git a/Python/bltinmodule.c b/Python/bltinmodule.c
index 914e0d1..54e8fe8 100644
--- a/Python/bltinmodule.c
+++ b/Python/bltinmodule.c
@@ -2139,6 +2139,7 @@ _PyBuiltin_Init(void)
SETBUILTIN("basestring", &PyBaseString_Type);
SETBUILTIN("bool", &PyBool_Type);
SETBUILTIN("buffer", &PyBuffer_Type);
+ SETBUILTIN("bytes", &PyBytes_Type);
SETBUILTIN("classmethod", &PyClassMethod_Type);
#ifndef WITHOUT_COMPLEX
SETBUILTIN("complex", &PyComplex_Type);