summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Include/bytesobject.h5
-rw-r--r--Lib/test/test_bytes.py264
-rw-r--r--Lib/test/test_file.py19
-rw-r--r--Objects/bytesobject.c630
-rw-r--r--Objects/fileobject.c5
5 files changed, 717 insertions, 206 deletions
diff --git a/Include/bytesobject.h b/Include/bytesobject.h
index 9c11624..72ca076 100644
--- a/Include/bytesobject.h
+++ b/Include/bytesobject.h
@@ -21,7 +21,7 @@ extern "C" {
/* Object layout */
typedef struct {
PyObject_VAR_HEAD
- char *ob_sval;
+ char *ob_bytes;
} PyBytesObject;
/* Type object */
@@ -32,13 +32,14 @@ PyAPI_DATA(PyTypeObject) PyBytes_Type;
#define PyBytes_CheckExact(self) ((self)->ob_type == &PyBytes_Type)
/* Direct API functions */
+PyAPI_FUNC(PyObject *) PyBytes_FromObject(PyObject *);
PyAPI_FUNC(PyObject *) PyBytes_FromStringAndSize(const char *, Py_ssize_t);
PyAPI_FUNC(Py_ssize_t) PyBytes_Size(PyObject *);
PyAPI_FUNC(char *) PyBytes_AsString(PyObject *);
PyAPI_FUNC(int) PyBytes_Resize(PyObject *, Py_ssize_t);
/* Macros, trading safety for speed */
-#define PyBytes_AS_STRING(self) (((PyBytesObject *)(self))->ob_sval)
+#define PyBytes_AS_STRING(self) (((PyBytesObject *)(self))->ob_bytes)
#define PyBytes_GET_SIZE(self) (((PyBytesObject *)(self))->ob_size)
#ifdef __cplusplus
diff --git a/Lib/test/test_bytes.py b/Lib/test/test_bytes.py
index 1ba5e11..cf5cd5a 100644
--- a/Lib/test/test_bytes.py
+++ b/Lib/test/test_bytes.py
@@ -1,6 +1,9 @@
"""Unit tests for the bytes type."""
+import os
+import re
import sys
+import tempfile
import unittest
import test.test_support
@@ -45,7 +48,7 @@ class BytesTest(unittest.TestCase):
self.assertRaises(ValueError, bytes, [C(256)])
def test_constructor_type_errors(self):
- self.assertRaises(TypeError, bytes, 0)
+ self.assertRaises(TypeError, bytes, 0.0)
class C:
pass
self.assertRaises(TypeError, bytes, ["0"])
@@ -100,36 +103,233 @@ class BytesTest(unittest.TestCase):
self.failUnless(bytes.__doc__ != None)
self.failUnless(bytes.__doc__.startswith("bytes("))
- # XXX More stuff to test and build (TDD):
- # constructor from str: bytes(<str>) == bytes(map(ord, <str>))?
- # encoding constructor: bytes(<unicode>[, <encoding>[, <errors>]])
- # default encoding Latin-1? (Matching ord)
- # slicing
- # extended slicing?
- # item assignment
- # slice assignment
- # extended slice assignment?
- # __contains__ with simple int arg
- # __contains__ with another bytes arg?
- # find/index? (int or bytes arg?)
- # count? (int arg)
- # concatenation (+)
- # repeat?
- # extend?
- # append?
- # insert?
- # pop?
- # __reversed__?
- # reverse? (inplace)
- # NOT sort!
+ def test_buffer_api(self):
+ short_sample = "Hello world\n"
+ sample = short_sample + "x"*(20 - len(short_sample))
+ tfn = tempfile.mktemp()
+ try:
+ # Prepare
+ with open(tfn, "wb") as f:
+ f.write(short_sample)
+ # Test readinto
+ with open(tfn, "rb") as f:
+ b = bytes([ord('x')]*20)
+ n = f.readinto(b)
+ self.assertEqual(n, len(short_sample))
+ self.assertEqual(list(b), map(ord, sample))
+ # Test writing in binary mode
+ with open(tfn, "wb") as f:
+ f.write(b)
+ with open(tfn, "rb") as f:
+ self.assertEqual(f.read(), sample)
+ # Test writing in text mode
+ with open(tfn, "w") as f:
+ f.write(b)
+ with open(tfn, "r") as f:
+ self.assertEqual(f.read(), sample)
+ # Can't use readinto in text mode
+ with open(tfn, "r") as f:
+ self.assertRaises(TypeError, f.readinto, b)
+ finally:
+ try:
+ os.remove(tfn)
+ except os.error:
+ pass
+
+ def test_reversed(self):
+ input = map(ord, "Hello")
+ b = bytes(input)
+ output = list(reversed(b))
+ input.reverse()
+ self.assertEqual(output, input)
+
+ def test_getslice(self):
+ def by(s):
+ return bytes(map(ord, s))
+ b = by("Hello, world")
+
+ self.assertEqual(b[:5], by("Hello"))
+ self.assertEqual(b[1:5], by("ello"))
+ self.assertEqual(b[5:7], by(", "))
+ self.assertEqual(b[7:], by("world"))
+ self.assertEqual(b[7:12], by("world"))
+ self.assertEqual(b[7:100], by("world"))
+
+ self.assertEqual(b[:-7], by("Hello"))
+ self.assertEqual(b[-11:-7], by("ello"))
+ self.assertEqual(b[-7:-5], by(", "))
+ self.assertEqual(b[-5:], by("world"))
+ self.assertEqual(b[-5:12], by("world"))
+ self.assertEqual(b[-5:100], by("world"))
+ self.assertEqual(b[-100:5], by("Hello"))
+
+ def test_regexps(self):
+ def by(s):
+ return bytes(map(ord, s))
+ b = by("Hello, world")
+ self.assertEqual(re.findall(r"\w+", b), [by("Hello"), by("world")])
+
+ def test_setitem(self):
+ b = bytes([1, 2, 3])
+ b[1] = 100
+ self.assertEqual(b, bytes([1, 100, 3]))
+ b[-1] = 200
+ self.assertEqual(b, bytes([1, 100, 200]))
+ class C:
+ def __init__(self, i=0):
+ self.i = i
+ def __index__(self):
+ return self.i
+ b[0] = C(10)
+ self.assertEqual(b, bytes([10, 100, 200]))
+ try:
+ b[3] = 0
+ self.fail("Didn't raise IndexError")
+ except IndexError:
+ pass
+ try:
+ b[-10] = 0
+ self.fail("Didn't raise IndexError")
+ except IndexError:
+ pass
+ try:
+ b[0] = 256
+ self.fail("Didn't raise ValueError")
+ except ValueError:
+ pass
+ try:
+ b[0] = C(-1)
+ self.fail("Didn't raise ValueError")
+ except ValueError:
+ pass
+ try:
+ b[0] = None
+ self.fail("Didn't raise TypeError")
+ except TypeError:
+ pass
+
+ def test_delitem(self):
+ b = bytes(range(10))
+ del b[0]
+ self.assertEqual(b, bytes(range(1, 10)))
+ del b[-1]
+ self.assertEqual(b, bytes(range(1, 9)))
+ del b[4]
+ self.assertEqual(b, bytes([1, 2, 3, 4, 6, 7, 8]))
+
+ def test_setslice(self):
+ b = bytes(range(10))
+ self.assertEqual(list(b), list(range(10)))
+
+ b[0:5] = bytes([1, 1, 1, 1, 1])
+ self.assertEqual(b, bytes([1, 1, 1, 1, 1, 5, 6, 7, 8, 9]))
+
+ del b[0:-5]
+ self.assertEqual(b, bytes([5, 6, 7, 8, 9]))
+
+ b[0:0] = bytes([0, 1, 2, 3, 4])
+ self.assertEqual(b, bytes(range(10)))
+
+ b[-7:-3] = bytes([100, 101])
+ self.assertEqual(b, bytes([0, 1, 2, 100, 101, 7, 8, 9]))
+
+ b[3:5] = [3, 4, 5, 6]
+ self.assertEqual(b, bytes(range(10)))
+
+ def test_setslice_trap(self):
+ # This test verifies that we correctly handle assigning self
+ # to a slice of self (the old Lambert Meertens trap).
+ b = bytes(range(256))
+ b[8:] = b
+ self.assertEqual(b, bytes(list(range(8)) + list(range(256))))
+
+ def test_encoding(self):
+ sample = u"Hello world\n\u1234\u5678\u9abc\udef0"
+ for enc in ("utf8", "utf16"):
+ b = bytes(sample, enc)
+ self.assertEqual(b, bytes(map(ord, sample.encode(enc))))
+ self.assertRaises(UnicodeEncodeError, bytes, sample, "latin1")
+ b = bytes(sample, "latin1", "ignore")
+ self.assertEqual(b, bytes(sample[:-4]))
+
+ def test_decode(self):
+ sample = u"Hello world\n\u1234\u5678\u9abc\def0\def0"
+ for enc in ("utf8", "utf16"):
+ b = bytes(sample, enc)
+ self.assertEqual(b.decode(enc), sample)
+ sample = u"Hello world\n\x80\x81\xfe\xff"
+ b = bytes(sample, "latin1")
+ self.assertRaises(UnicodeDecodeError, b.decode, "utf8")
+ self.assertEqual(b.decode("utf8", "ignore"), "Hello world\n")
+
+ def test_from_buffer(self):
+ sample = "Hello world\n\x80\x81\xfe\xff"
+ buf = buffer(sample)
+ b = bytes(buf)
+ self.assertEqual(b, bytes(map(ord, sample)))
+
+ def test_to_str(self):
+ sample = "Hello world\n\x80\x81\xfe\xff"
+ b = bytes(sample)
+ self.assertEqual(str(b), sample)
+
+ def test_from_int(self):
+ b = bytes(0)
+ self.assertEqual(b, bytes())
+ b = bytes(10)
+ self.assertEqual(b, bytes([0]*10))
+ b = bytes(10000)
+ self.assertEqual(b, bytes([0]*10000))
+
+ def test_concat(self):
+ b1 = bytes("abc")
+ b2 = bytes("def")
+ self.assertEqual(b1 + b2, bytes("abcdef"))
+ self.assertRaises(TypeError, lambda: b1 + "def")
+ self.assertRaises(TypeError, lambda: "abc" + b2)
+
+ def test_repeat(self):
+ b = bytes("abc")
+ self.assertEqual(b * 3, bytes("abcabcabc"))
+ self.assertEqual(b * 0, bytes())
+ self.assertEqual(b * -1, bytes())
+ self.assertRaises(TypeError, lambda: b * 3.14)
+ self.assertRaises(TypeError, lambda: 3.14 * b)
+ self.assertRaises(MemoryError, lambda: b * sys.maxint)
+ self.assertEqual(bytes('x')*100, bytes('x'*100))
+
+ # Optimizations:
# __iter__? (optimization)
- # __str__? (could return "".join(map(chr, self))
- # decode
- # buffer API
- # check that regexp searches work
- # (I suppose re.sub() returns a string)
- # file.readinto
- # file.write
+ # __reversed__? (optimization)
+
+ # XXX Some list methods?
+ # extended slicing
+ # extended slice assignment
+ # extend (same as b[len(b):] = src)
+ # reverse (in-place)
+ # remove
+ # pop
+ # NOT sort!
+ # With int arg:
+ # __contains__
+ # index
+ # count
+ # append
+ # insert
+
+ # XXX Some string methods? (Those that don't use character properties)
+ # startswith
+ # endswidth
+ # find, rfind
+ # __contains__ (bytes arg)
+ # index, rindex (bytes arg)
+ # join
+ # replace
+ # translate
+ # split, rsplit
+ # lstrip, rstrip, strip??
+
+ # XXX pickle and marshal support?
def test_main():
@@ -137,5 +337,5 @@ def test_main():
if __name__ == "__main__":
- ##test_main()
- unittest.main()
+ test_main()
+ ##unittest.main()
diff --git a/Lib/test/test_file.py b/Lib/test/test_file.py
index ab3da86..cfc1019 100644
--- a/Lib/test/test_file.py
+++ b/Lib/test/test_file.py
@@ -67,6 +67,17 @@ n = f.readinto(a)
f.close()
verify(buf == a.tostring()[:n])
+# verify readinto refuses text files
+a = array('c', 'x'*10)
+f = open(TESTFN, 'r')
+try:
+ f.readinto(a)
+ raise TestFailed("readinto shouldn't work in text mode")
+except TypeError:
+ pass
+finally:
+ f.close()
+
# verify writelines with integers
f = open(TESTFN, 'wb')
try:
@@ -261,13 +272,13 @@ methods = [("readline", ()), ("read", ()), ("readlines", ()),
try:
# Prepare the testfile
- bag = open(TESTFN, "w")
+ bag = open(TESTFN, "wb")
bag.write(filler * nchunks)
bag.writelines(testlines)
bag.close()
# Test for appropriate errors mixing read* and iteration
for methodname, args in methods:
- f = open(TESTFN)
+ f = open(TESTFN, 'rb')
if f.next() != filler:
raise TestFailed, "Broken testfile"
meth = getattr(f, methodname)
@@ -286,7 +297,7 @@ try:
# Each line in the bag o' ham is 4 bytes ("h", "a", "m", "\n"), so
# 4096 lines of that should get us exactly on the buffer boundary for
# any power-of-2 buffersize between 4 and 16384 (inclusive).
- f = open(TESTFN)
+ f = open(TESTFN, 'rb')
for i in range(nchunks):
f.next()
testline = testlines.pop(0)
@@ -328,7 +339,7 @@ try:
raise TestFailed("readlines() after next() with empty buffer "
"failed. Got %r, expected %r" % (line, testline))
# Reading after iteration hit EOF shouldn't hurt either
- f = open(TESTFN)
+ f = open(TESTFN, 'rb')
try:
for line in f:
pass
diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c
index f221395..36b4424 100644
--- a/Objects/bytesobject.c
+++ b/Objects/bytesobject.c
@@ -8,27 +8,34 @@
/* Direct API functions */
PyObject *
-PyBytes_FromStringAndSize(const char *sval, Py_ssize_t size)
+PyBytes_FromObject(PyObject *input)
+{
+ return PyObject_CallFunctionObjArgs((PyObject *)&PyBytes_Type,
+ input, NULL);
+}
+
+PyObject *
+PyBytes_FromStringAndSize(const char *bytes, Py_ssize_t size)
{
PyBytesObject *new;
- if (size != 0) {
- assert(sval != NULL);
- assert(size > 0);
- }
+ assert(size >= 0);
new = PyObject_New(PyBytesObject, &PyBytes_Type);
if (new == NULL)
- return NULL;
-
- if (size > 0) {
- new->ob_sval = PyMem_Malloc(size);
- if (new->ob_sval == NULL) {
- Py_DECREF(new);
- return NULL;
- }
- memcpy(new->ob_sval, sval, size);
- new->ob_size = size;
+ return NULL;
+
+ new->ob_size = size;
+ if (size == 0)
+ new->ob_bytes = NULL;
+ else {
+ new->ob_bytes = PyMem_Malloc(size);
+ if (new->ob_bytes == NULL) {
+ Py_DECREF(new);
+ return NULL;
+ }
+ if (bytes != NULL)
+ memcpy(new->ob_bytes, bytes, size);
}
return (PyObject *)new;
@@ -49,7 +56,7 @@ PyBytes_AsString(PyObject *self)
assert(self != NULL);
assert(PyBytes_Check(self));
- return ((PyBytesObject *)self)->ob_sval;
+ return ((PyBytesObject *)self)->ob_bytes;
}
int
@@ -61,13 +68,13 @@ PyBytes_Resize(PyObject *self, Py_ssize_t size)
assert(PyBytes_Check(self));
assert(size >= 0);
- sval = PyMem_Realloc(((PyBytesObject *)self)->ob_sval, size);
+ sval = PyMem_Realloc(((PyBytesObject *)self)->ob_bytes, size);
if (sval == NULL) {
- PyErr_NoMemory();
- return -1;
+ PyErr_NoMemory();
+ return -1;
}
- ((PyBytesObject *)self)->ob_sval = sval;
+ ((PyBytesObject *)self)->ob_bytes = sval;
((PyBytesObject *)self)->ob_size = size;
return 0;
@@ -82,15 +89,178 @@ bytes_length(PyBytesObject *self)
}
static PyObject *
+bytes_concat(PyBytesObject *self, PyObject *other)
+{
+ PyBytesObject *result;
+ Py_ssize_t mysize;
+ Py_ssize_t size;
+
+ if (!PyBytes_Check(other)) {
+ PyErr_Format(PyExc_TypeError,
+ "can't concat bytes to %.100s", other->ob_type->tp_name);
+ return NULL;
+ }
+
+ mysize = self->ob_size;
+ size = mysize + ((PyBytesObject *)other)->ob_size;
+ if (size < 0)
+ return PyErr_NoMemory();
+ result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, size);
+ if (result != NULL) {
+ memcpy(result->ob_bytes, self->ob_bytes, self->ob_size);
+ memcpy(result->ob_bytes + self->ob_size,
+ ((PyBytesObject *)other)->ob_bytes,
+ ((PyBytesObject *)other)->ob_size);
+ }
+ return (PyObject *)result;
+}
+
+static PyObject *
+bytes_repeat(PyBytesObject *self, Py_ssize_t count)
+{
+ PyBytesObject *result;
+ Py_ssize_t mysize;
+ Py_ssize_t size;
+
+ if (count < 0)
+ count = 0;
+ mysize = self->ob_size;
+ size = mysize * count;
+ if (count != 0 && size / count != mysize)
+ return PyErr_NoMemory();
+ result = (PyBytesObject *)PyBytes_FromStringAndSize(NULL, size);
+ if (result != NULL && size != 0) {
+ if (mysize == 1)
+ memset(result->ob_bytes, self->ob_bytes[0], size);
+ else {
+ int i;
+ for (i = 0; i < count; i++)
+ memcpy(result->ob_bytes + i*mysize, self->ob_bytes, mysize);
+ }
+ }
+ return (PyObject *)result;
+}
+
+static PyObject *
bytes_getitem(PyBytesObject *self, Py_ssize_t i)
{
if (i < 0)
- i += self->ob_size;
+ i += self->ob_size;
if (i < 0 || i >= self->ob_size) {
- PyErr_SetString(PyExc_IndexError, "bytes index out of range");
- return NULL;
+ PyErr_SetString(PyExc_IndexError, "bytes index out of range");
+ return NULL;
}
- return PyInt_FromLong((unsigned char)(self->ob_sval[i]));
+ return PyInt_FromLong((unsigned char)(self->ob_bytes[i]));
+}
+
+static PyObject *
+bytes_getslice(PyBytesObject *self, Py_ssize_t lo, Py_ssize_t hi)
+{
+ if (lo < 0)
+ lo = 0;
+ if (hi > self->ob_size)
+ hi = self->ob_size;
+ if (lo >= hi)
+ lo = hi = 0;
+ return PyBytes_FromStringAndSize(self->ob_bytes + lo, hi - lo);
+}
+
+static int
+bytes_setslice(PyBytesObject *self, Py_ssize_t lo, Py_ssize_t hi,
+ PyObject *values)
+{
+ int avail;
+ int needed;
+ char *bytes;
+
+ if (values == NULL) {
+ bytes = NULL;
+ needed = 0;
+ }
+ else if (values == (PyObject *)self || !PyBytes_Check(values)) {
+ /* Make a copy an call this function recursively */
+ int err;
+ values = PyBytes_FromObject(values);
+ if (values == NULL)
+ return -1;
+ err = bytes_setslice(self, lo, hi, values);
+ Py_DECREF(values);
+ return err;
+ }
+ else {
+ assert(PyBytes_Check(values));
+ bytes = ((PyBytesObject *)values)->ob_bytes;
+ needed = ((PyBytesObject *)values)->ob_size;
+ }
+
+ if (lo < 0)
+ lo = 0;
+ if (hi > self->ob_size)
+ hi = self->ob_size;
+
+ avail = hi - lo;
+ if (avail < 0)
+ lo = hi = avail = 0;
+
+ if (avail != needed) {
+ if (avail > needed) {
+ /*
+ 0 lo hi old_size
+ | |<----avail----->|<-----tomove------>|
+ | |<-needed->|<-----tomove------>|
+ 0 lo new_hi new_size
+ */
+ memmove(self->ob_bytes + lo + needed, self->ob_bytes + hi,
+ self->ob_size - hi);
+ }
+ if (PyBytes_Resize((PyObject *)self,
+ self->ob_size + needed - avail) < 0)
+ return -1;
+ if (avail < needed) {
+ /*
+ 0 lo hi old_size
+ | |<-avail->|<-----tomove------>|
+ | |<----needed---->|<-----tomove------>|
+ 0 lo new_hi new_size
+ */
+ memmove(self->ob_bytes + lo + needed, self->ob_bytes + hi,
+ self->ob_size - lo - needed);
+ }
+ }
+
+ if (needed > 0)
+ memcpy(self->ob_bytes + lo, bytes, needed);
+
+ return 0;
+}
+
+static int
+bytes_setitem(PyBytesObject *self, Py_ssize_t i, PyObject *value)
+{
+ Py_ssize_t ival;
+
+ if (i < 0)
+ i += self->ob_size;
+
+ if (i < 0 || i >= self->ob_size) {
+ PyErr_SetString(PyExc_IndexError, "bytes index out of range");
+ return -1;
+ }
+
+ if (value == NULL)
+ return bytes_setslice(self, i, i+1, NULL);
+
+ ival = PyNumber_Index(value);
+ if (ival == -1 && PyErr_Occurred())
+ return -1;
+
+ if (ival < 0 || ival >= 256) {
+ PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
+ return -1;
+ }
+
+ self->ob_bytes[i] = ival;
+ return 0;
}
static long
@@ -103,69 +273,138 @@ bytes_nohash(PyObject *self)
static int
bytes_init(PyBytesObject *self, PyObject *args, PyObject *kwds)
{
- static char *kwlist[] = {"sequence", 0};
+ static char *kwlist[] = {"source", "encoding", "errors", 0};
PyObject *arg = NULL;
- PyObject *it; /* iter(arg) */
+ const char *encoding = NULL;
+ const char *errors = NULL;
+ Py_ssize_t count;
+ PyObject *it;
PyObject *(*iternext)(PyObject *);
- if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:bytes", kwlist, &arg))
- return -1;
+ /* Empty previous contents (yes, do this first of all!) */
+ if (PyBytes_Resize((PyObject *)self, 0) < 0)
+ return -1;
+
+ /* Parse arguments */
+ if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist,
+ &arg, &encoding, &errors))
+ return -1;
+
+ /* Make a quick exit if no first argument */
+ if (arg == NULL) {
+ if (encoding != NULL || errors != NULL) {
+ PyErr_SetString(PyExc_TypeError,
+ "encoding or errors without sequence argument");
+ return -1;
+ }
+ return 0;
+ }
- /* Verify list invariants established by PyType_GenericAlloc() */
- if (self->ob_size != 0) {
- assert(self->ob_sval != NULL);
- assert(self->ob_size > 0);
+ if (PyUnicode_Check(arg)) {
+ /* Encode via the codec registry */
+ PyObject *encoded;
+ char *bytes;
+ Py_ssize_t size;
+ if (encoding == NULL)
+ encoding = PyUnicode_GetDefaultEncoding();
+ encoded = PyCodec_Encode(arg, encoding, errors);
+ if (encoded == NULL)
+ return -1;
+ if (!PyString_Check(encoded)) {
+ PyErr_Format(PyExc_TypeError,
+ "encoder did not return a string object (type=%.400s)",
+ encoded->ob_type->tp_name);
+ Py_DECREF(encoded);
+ return -1;
+ }
+ bytes = PyString_AS_STRING(encoded);
+ size = PyString_GET_SIZE(encoded);
+ if (PyBytes_Resize((PyObject *)self, size) < 0) {
+ Py_DECREF(encoded);
+ return -1;
+ }
+ memcpy(self->ob_bytes, bytes, size);
+ Py_DECREF(encoded);
+ return 0;
}
- /* Empty previous contents */
- if (PyBytes_Resize((PyObject *)self, 0) < 0)
- return -1;
+ /* If it's not unicode, there can't be encoding or errors */
+ if (encoding != NULL || errors != NULL) {
+ PyErr_SetString(PyExc_TypeError,
+ "encoding or errors without a string argument");
+ return -1;
+ }
- /* Quick check if we're done */
- if (arg == 0)
- return 0;
+ /* Is it an int? */
+ count = PyNumber_Index(arg);
+ if (count == -1 && PyErr_Occurred())
+ PyErr_Clear();
+ else {
+ if (count < 0) {
+ PyErr_SetString(PyExc_ValueError, "negative count");
+ return -1;
+ }
+ if (count > 0) {
+ if (PyBytes_Resize((PyObject *)self, count))
+ return -1;
+ memset(self->ob_bytes, 0, count);
+ }
+ return 0;
+ }
+
+ if (PyObject_CheckReadBuffer(arg)) {
+ const void *bytes;
+ Py_ssize_t size;
+ if (PyObject_AsReadBuffer(arg, &bytes, &size) < 0)
+ return -1;
+ if (PyBytes_Resize((PyObject *)self, size) < 0)
+ return -1;
+ memcpy(self->ob_bytes, bytes, size);
+ return 0;
+ }
- /* XXX Optimize this if the arguments is a list, tuple, or bytes */
+ /* XXX Optimize this if the arguments is a list, tuple */
/* Get the iterator */
it = PyObject_GetIter(arg);
if (it == NULL)
- return -1;
+ return -1;
iternext = *it->ob_type->tp_iternext;
/* Run the iterator to exhaustion */
for (;;) {
- PyObject *item;
- Py_ssize_t value;
-
- /* Get the next item */
- item = iternext(it);
- if (item == NULL) {
- if (PyErr_Occurred()) {
- if (!PyErr_ExceptionMatches(PyExc_StopIteration))
- goto error;
- PyErr_Clear();
- }
- break;
- }
-
- /* Interpret it as an int (__index__) */
- value = PyNumber_Index(item);
- Py_DECREF(item);
- if (value == -1 && PyErr_Occurred())
- goto error;
-
- /* Range check */
- if (value < 0 || value >= 256) {
- PyErr_SetString(PyExc_ValueError, "bytes must be in range(0, 256)");
- goto error;
- }
-
- /* Append the byte */
- /* XXX Speed this up */
- if (PyBytes_Resize((PyObject *)self, self->ob_size+1) < 0)
- goto error;
- self->ob_sval[self->ob_size-1] = value;
+ PyObject *item;
+ Py_ssize_t value;
+
+ /* Get the next item */
+ item = iternext(it);
+ if (item == NULL) {
+ if (PyErr_Occurred()) {
+ if (!PyErr_ExceptionMatches(PyExc_StopIteration))
+ goto error;
+ PyErr_Clear();
+ }
+ break;
+ }
+
+ /* Interpret it as an int (__index__) */
+ value = PyNumber_Index(item);
+ Py_DECREF(item);
+ if (value == -1 && PyErr_Occurred())
+ goto error;
+
+ /* Range check */
+ if (value < 0 || value >= 256) {
+ PyErr_SetString(PyExc_ValueError,
+ "bytes must be in range(0, 256)");
+ goto error;
+ }
+
+ /* Append the byte */
+ /* XXX Speed this up */
+ if (PyBytes_Resize((PyObject *)self, self->ob_size+1) < 0)
+ goto error;
+ self->ob_bytes[self->ob_size-1] = value;
}
/* Clean up and return success */
@@ -188,45 +427,45 @@ bytes_repr(PyBytesObject *self)
int i;
if (self->ob_size == 0)
- return PyString_FromString("bytes()");
+ return PyString_FromString("bytes()");
list = PyList_New(0);
if (list == NULL)
- return NULL;
+ return NULL;
str = PyString_FromString("bytes([");
if (str == NULL)
- goto error;
+ goto error;
err = PyList_Append(list, str);
Py_DECREF(str);
if (err < 0)
- goto error;
+ goto error;
for (i = 0; i < self->ob_size; i++) {
- char buffer[20];
- sprintf(buffer, ", 0x%02x", (unsigned char) (self->ob_sval[i]));
- str = PyString_FromString((i == 0) ? buffer+2 : buffer);
- if (str == NULL)
- goto error;
- err = PyList_Append(list, str);
- Py_DECREF(str);
- if (err < 0)
- goto error;
+ char buffer[20];
+ sprintf(buffer, ", 0x%02x", (unsigned char) (self->ob_bytes[i]));
+ str = PyString_FromString((i == 0) ? buffer+2 : buffer);
+ if (str == NULL)
+ goto error;
+ err = PyList_Append(list, str);
+ Py_DECREF(str);
+ if (err < 0)
+ goto error;
}
str = PyString_FromString("])");
if (str == NULL)
- goto error;
+ goto error;
err = PyList_Append(list, str);
Py_DECREF(str);
if (err < 0)
- goto error;
+ goto error;
str = PyString_FromString("");
if (str == NULL)
- goto error;
+ goto error;
result = _PyString_Join(str, list);
Py_DECREF(str);
@@ -240,6 +479,12 @@ bytes_repr(PyBytesObject *self)
}
static PyObject *
+bytes_str(PyBytesObject *self)
+{
+ return PyString_FromStringAndSize(self->ob_bytes, self->ob_size);
+}
+
+static PyObject *
bytes_richcompare(PyBytesObject *self, PyBytesObject *other, int op)
{
PyObject *res;
@@ -247,37 +492,37 @@ bytes_richcompare(PyBytesObject *self, PyBytesObject *other, int op)
int cmp;
if (!PyBytes_Check(self) || !PyBytes_Check(other)) {
- Py_INCREF(Py_NotImplemented);
- return Py_NotImplemented;
+ Py_INCREF(Py_NotImplemented);
+ return Py_NotImplemented;
}
if (self->ob_size != other->ob_size && (op == Py_EQ || op == Py_NE)) {
- /* Shortcut: if the lengths differ, the objects differ */
- cmp = (op == Py_NE);
+ /* Shortcut: if the lengths differ, the objects differ */
+ cmp = (op == Py_NE);
}
else {
- minsize = self->ob_size;
- if (other->ob_size < minsize)
- minsize = other->ob_size;
-
- cmp = memcmp(self->ob_sval, other->ob_sval, minsize);
- /* In ISO C, memcmp() guarantees to use unsigned bytes! */
-
- if (cmp == 0) {
- if (self->ob_size < other->ob_size)
- cmp = -1;
- else if (self->ob_size > other->ob_size)
- cmp = 1;
- }
-
- switch (op) {
- case Py_LT: cmp = cmp < 0; break;
- case Py_LE: cmp = cmp <= 0; break;
- case Py_EQ: cmp = cmp == 0; break;
- case Py_NE: cmp = cmp != 0; break;
- case Py_GT: cmp = cmp > 0; break;
- case Py_GE: cmp = cmp >= 0; break;
- }
+ minsize = self->ob_size;
+ if (other->ob_size < minsize)
+ minsize = other->ob_size;
+
+ cmp = memcmp(self->ob_bytes, other->ob_bytes, minsize);
+ /* In ISO C, memcmp() guarantees to use unsigned bytes! */
+
+ if (cmp == 0) {
+ if (self->ob_size < other->ob_size)
+ cmp = -1;
+ else if (self->ob_size > other->ob_size)
+ cmp = 1;
+ }
+
+ switch (op) {
+ case Py_LT: cmp = cmp < 0; break;
+ case Py_LE: cmp = cmp <= 0; break;
+ case Py_EQ: cmp = cmp == 0; break;
+ case Py_NE: cmp = cmp != 0; break;
+ case Py_GT: cmp = cmp > 0; break;
+ case Py_GE: cmp = cmp >= 0; break;
+ }
}
res = cmp ? Py_True : Py_False;
@@ -288,41 +533,89 @@ bytes_richcompare(PyBytesObject *self, PyBytesObject *other, int op)
static void
bytes_dealloc(PyBytesObject *self)
{
- if (self->ob_sval != 0) {
- PyMem_Free(self->ob_sval);
+ if (self->ob_bytes != 0) {
+ PyMem_Free(self->ob_bytes);
}
self->ob_type->tp_free((PyObject *)self);
}
+static Py_ssize_t
+bytes_getbuffer(PyBytesObject *self, Py_ssize_t index, const void **ptr)
+{
+ if (index != 0) {
+ PyErr_SetString(PyExc_SystemError,
+ "accessing non-existent string segment");
+ return -1;
+ }
+ *ptr = (void *)self->ob_bytes;
+ return self->ob_size;
+}
+
+static Py_ssize_t
+bytes_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
+{
+ if (lenp)
+ *lenp = self->ob_size;
+ return 1;
+}
+
+PyDoc_STRVAR(decode_doc,
+"B.decode([encoding[,errors]]) -> unicode obect.\n\
+\n\
+Decodes B using the codec registered for encoding. encoding defaults\n\
+to the default encoding. errors may be given to set a different error\n\
+handling scheme. Default is 'strict' meaning that encoding errors raise\n\
+a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
+as well as any other name registerd with codecs.register_error that is\n\
+able to handle UnicodeDecodeErrors.");
+
+static PyObject *
+bytes_decode(PyObject *self, PyObject *args)
+{
+ const char *encoding = NULL;
+ const char *errors = NULL;
+
+ if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
+ return NULL;
+ if (encoding == NULL)
+ encoding = PyUnicode_GetDefaultEncoding();
+ return PyCodec_Decode(self, encoding, errors);
+}
+
static PySequenceMethods bytes_as_sequence = {
- (lenfunc)bytes_length, /*sq_length*/
- (binaryfunc)0, /*sq_concat*/
- (ssizeargfunc)0, /*sq_repeat*/
- (ssizeargfunc)bytes_getitem, /*sq_item*/
- (ssizessizeargfunc)0, /*sq_slice*/
- 0, /*sq_ass_item*/
- 0, /*sq_ass_slice*/
- (objobjproc)0, /*sq_contains*/
+ (lenfunc)bytes_length, /*sq_length*/
+ (binaryfunc)bytes_concat, /*sq_concat*/
+ (ssizeargfunc)bytes_repeat, /*sq_repeat*/
+ (ssizeargfunc)bytes_getitem, /*sq_item*/
+ (ssizessizeargfunc)bytes_getslice, /*sq_slice*/
+ (ssizeobjargproc)bytes_setitem, /*sq_ass_item*/
+ (ssizessizeobjargproc)bytes_setslice, /* sq_ass_slice */
+#if 0
+ (objobjproc)bytes_contains, /* sq_contains */
+ (binaryfunc)bytes_inplace_concat, /* sq_inplace_concat */
+ (ssizeargfunc)bytes_inplace_repeat, /* sq_inplace_repeat */
+#endif
};
static PyMappingMethods bytes_as_mapping = {
- (lenfunc)bytes_length,
- (binaryfunc)0,
- 0,
+ (lenfunc)bytes_length,
+ (binaryfunc)0,
+ 0,
};
static PyBufferProcs bytes_as_buffer = {
-/*
- (readbufferproc)bytes_buffer_getreadbuf,
- (writebufferproc)bytes_buffer_getwritebuf,
- (segcountproc)bytes_buffer_getsegcount,
- (charbufferproc)bytes_buffer_getcharbuf,
-*/
+ (readbufferproc)bytes_getbuffer,
+ (writebufferproc)bytes_getbuffer,
+ (segcountproc)bytes_getsegcount,
+ /* XXX Bytes are not characters! But we need to implement
+ bf_getcharbuffer() so we can be used as 't#' argument to codecs. */
+ (charbufferproc)bytes_getbuffer,
};
static PyMethodDef
bytes_methods[] = {
- {NULL, NULL}
+ {"decode", (PyCFunction)bytes_decode, METH_VARARGS, decode_doc},
+ {NULL, NULL}
};
PyDoc_STRVAR(bytes_doc,
@@ -336,39 +629,40 @@ PyTypeObject PyBytes_Type = {
"bytes",
sizeof(PyBytesObject),
0,
- (destructor)bytes_dealloc, /* tp_dealloc */
- 0, /* tp_print */
- 0, /* tp_getattr */
- 0, /* tp_setattr */
- 0, /* tp_compare */
- (reprfunc)bytes_repr, /* tp_repr */
- 0, /* tp_as_number */
- &bytes_as_sequence, /* tp_as_sequence */
- &bytes_as_mapping, /* tp_as_mapping */
- bytes_nohash, /* tp_hash */
- 0, /* tp_call */
- 0, /* tp_str */
- PyObject_GenericGetAttr, /* tp_getattro */
- 0, /* tp_setattro */
- &bytes_as_buffer, /* tp_as_buffer */
- Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES, /* tp_flags */ /* bytes is 'final' or 'sealed' */
- bytes_doc, /* tp_doc */
- 0, /* tp_traverse */
- 0, /* tp_clear */
- (richcmpfunc)bytes_richcompare, /* tp_richcompare */
- 0, /* tp_weaklistoffset */
- 0, /* tp_iter */
- 0, /* tp_iternext */
- bytes_methods, /* tp_methods */
- 0, /* tp_members */
- 0, /* tp_getset */
- 0, /* tp_base */
- 0, /* tp_dict */
- 0, /* tp_descr_get */
- 0, /* tp_descr_set */
- 0, /* tp_dictoffset */
- (initproc)bytes_init, /* tp_init */
- PyType_GenericAlloc, /* tp_alloc */
- PyType_GenericNew, /* tp_new */
- PyObject_Del, /* tp_free */
+ (destructor)bytes_dealloc, /* tp_dealloc */
+ 0, /* tp_print */
+ 0, /* tp_getattr */
+ 0, /* tp_setattr */
+ 0, /* tp_compare */
+ (reprfunc)bytes_repr, /* tp_repr */
+ 0, /* tp_as_number */
+ &bytes_as_sequence, /* tp_as_sequence */
+ &bytes_as_mapping, /* tp_as_mapping */
+ bytes_nohash, /* tp_hash */
+ 0, /* tp_call */
+ (reprfunc)bytes_str, /* tp_str */
+ PyObject_GenericGetAttr, /* tp_getattro */
+ 0, /* tp_setattro */
+ &bytes_as_buffer, /* tp_as_buffer */
+ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES, /* tp_flags */
+ /* bytes is 'final' or 'sealed' */
+ bytes_doc, /* tp_doc */
+ 0, /* tp_traverse */
+ 0, /* tp_clear */
+ (richcmpfunc)bytes_richcompare, /* tp_richcompare */
+ 0, /* tp_weaklistoffset */
+ 0, /* tp_iter */
+ 0, /* tp_iternext */
+ bytes_methods, /* tp_methods */
+ 0, /* tp_members */
+ 0, /* tp_getset */
+ 0, /* tp_base */
+ 0, /* tp_dict */
+ 0, /* tp_descr_get */
+ 0, /* tp_descr_set */
+ 0, /* tp_dictoffset */
+ (initproc)bytes_init, /* tp_init */
+ PyType_GenericAlloc, /* tp_alloc */
+ PyType_GenericNew, /* tp_new */
+ PyObject_Del, /* tp_free */
};
diff --git a/Objects/fileobject.c b/Objects/fileobject.c
index 632ab04..ab2616d 100644
--- a/Objects/fileobject.c
+++ b/Objects/fileobject.c
@@ -880,6 +880,11 @@ file_readinto(PyFileObject *f, PyObject *args)
if (f->f_fp == NULL)
return err_closed();
+ if (!f->f_binary) {
+ PyErr_SetString(PyExc_TypeError,
+ "readinto() requires binary mode");
+ return NULL;
+ }
/* refuse to mix with f.next() */
if (f->f_buf != NULL &&
(f->f_bufend - f->f_bufptr) > 0 &&