diff options
Diffstat (limited to 'Modules/_io/bytesio.c')
-rw-r--r-- | Modules/_io/bytesio.c | 263 |
1 files changed, 260 insertions, 3 deletions
diff --git a/Modules/_io/bytesio.c b/Modules/_io/bytesio.c index c8fb3eb..b40513f 100644 --- a/Modules/_io/bytesio.c +++ b/Modules/_io/bytesio.c @@ -10,8 +10,15 @@ typedef struct { size_t buf_size; PyObject *dict; PyObject *weakreflist; + Py_ssize_t exports; } bytesio; +typedef struct { + PyObject_HEAD + bytesio *source; +} bytesiobuf; + + #define CHECK_CLOSED(self) \ if ((self)->buf == NULL) { \ PyErr_SetString(PyExc_ValueError, \ @@ -19,6 +26,14 @@ typedef struct { return NULL; \ } +#define CHECK_EXPORTS(self) \ + if ((self)->exports > 0) { \ + PyErr_SetString(PyExc_BufferError, \ + "Existing exports of data: object cannot be re-sized"); \ + return NULL; \ + } + + /* Internal routine to get a line from the buffer of a BytesIO object. Returns the length between the current position to the next newline character. */ @@ -173,6 +188,30 @@ bytesio_flush(bytesio *self) Py_RETURN_NONE; } +PyDoc_STRVAR(getbuffer_doc, +"getbuffer() -> bytes.\n" +"\n" +"Get a read-write view over the contents of the BytesIO object."); + +static PyObject * +bytesio_getbuffer(bytesio *self) +{ + PyTypeObject *type = &_PyBytesIOBuffer_Type; + bytesiobuf *buf; + PyObject *view; + + CHECK_CLOSED(self); + + buf = (bytesiobuf *) type->tp_alloc(type, 0); + if (buf == NULL) + return NULL; + Py_INCREF(self); + buf->source = self; + view = PyMemoryView_FromObject((PyObject *) buf); + Py_DECREF(buf); + return view; +} + PyDoc_STRVAR(getval_doc, "getvalue() -> bytes.\n" "\n" @@ -427,6 +466,7 @@ bytesio_truncate(bytesio *self, PyObject *args) PyObject *arg = Py_None; CHECK_CLOSED(self); + CHECK_EXPORTS(self); if (!PyArg_ParseTuple(args, "|O:truncate", &arg)) return NULL; @@ -548,6 +588,7 @@ bytesio_write(bytesio *self, PyObject *obj) PyObject *result = NULL; CHECK_CLOSED(self); + CHECK_EXPORTS(self); if (PyObject_GetBuffer(obj, &buf, PyBUF_CONTIG_RO) < 0) return NULL; @@ -611,10 +652,130 @@ bytesio_close(bytesio *self) Py_RETURN_NONE; } +/* Pickling support. + + Note that only pickle protocol 2 and onward are supported since we use + extended __reduce__ API of PEP 307 to make BytesIO instances picklable. + + Providing support for protocol < 2 would require the __reduce_ex__ method + which is notably long-winded when defined properly. + + For BytesIO, the implementation would similar to one coded for + object.__reduce_ex__, but slightly less general. To be more specific, we + could call bytesio_getstate directly and avoid checking for the presence of + a fallback __reduce__ method. However, we would still need a __newobj__ + function to use the efficient instance representation of PEP 307. + */ + +static PyObject * +bytesio_getstate(bytesio *self) +{ + PyObject *initvalue = bytesio_getvalue(self); + PyObject *dict; + PyObject *state; + + if (initvalue == NULL) + return NULL; + if (self->dict == NULL) { + Py_INCREF(Py_None); + dict = Py_None; + } + else { + dict = PyDict_Copy(self->dict); + if (dict == NULL) + return NULL; + } + + state = Py_BuildValue("(OnN)", initvalue, self->pos, dict); + Py_DECREF(initvalue); + return state; +} + +static PyObject * +bytesio_setstate(bytesio *self, PyObject *state) +{ + PyObject *result; + PyObject *position_obj; + PyObject *dict; + Py_ssize_t pos; + + assert(state != NULL); + + /* We allow the state tuple to be longer than 3, because we may need + someday to extend the object's state without breaking + backward-compatibility. */ + if (!PyTuple_Check(state) || Py_SIZE(state) < 3) { + PyErr_Format(PyExc_TypeError, + "%.200s.__setstate__ argument should be 3-tuple, got %.200s", + Py_TYPE(self)->tp_name, Py_TYPE(state)->tp_name); + return NULL; + } + CHECK_EXPORTS(self); + /* Reset the object to its default state. This is only needed to handle + the case of repeated calls to __setstate__. */ + self->string_size = 0; + self->pos = 0; + + /* Set the value of the internal buffer. If state[0] does not support the + buffer protocol, bytesio_write will raise the appropriate TypeError. */ + result = bytesio_write(self, PyTuple_GET_ITEM(state, 0)); + if (result == NULL) + return NULL; + Py_DECREF(result); + + /* Set carefully the position value. Alternatively, we could use the seek + method instead of modifying self->pos directly to better protect the + object internal state against errneous (or malicious) inputs. */ + position_obj = PyTuple_GET_ITEM(state, 1); + if (!PyLong_Check(position_obj)) { + PyErr_Format(PyExc_TypeError, + "second item of state must be an integer, not %.200s", + Py_TYPE(position_obj)->tp_name); + return NULL; + } + pos = PyLong_AsSsize_t(position_obj); + if (pos == -1 && PyErr_Occurred()) + return NULL; + if (pos < 0) { + PyErr_SetString(PyExc_ValueError, + "position value cannot be negative"); + return NULL; + } + self->pos = pos; + + /* Set the dictionary of the instance variables. */ + dict = PyTuple_GET_ITEM(state, 2); + if (dict != Py_None) { + if (!PyDict_Check(dict)) { + PyErr_Format(PyExc_TypeError, + "third item of state should be a dict, got a %.200s", + Py_TYPE(dict)->tp_name); + return NULL; + } + if (self->dict) { + /* Alternatively, we could replace the internal dictionary + completely. However, it seems more practical to just update it. */ + if (PyDict_Update(self->dict, dict) < 0) + return NULL; + } + else { + Py_INCREF(dict); + self->dict = dict; + } + } + + Py_RETURN_NONE; +} + static void bytesio_dealloc(bytesio *self) { _PyObject_GC_UNTRACK(self); + if (self->exports > 0) { + PyErr_SetString(PyExc_SystemError, + "deallocated BytesIO object has exported buffers"); + PyErr_Print(); + } if (self->buf != NULL) { PyMem_Free(self->buf); self->buf = NULL; @@ -635,9 +796,9 @@ bytesio_new(PyTypeObject *type, PyObject *args, PyObject *kwds) if (self == NULL) return NULL; - self->string_size = 0; - self->pos = 0; - self->buf_size = 0; + /* tp_alloc initializes all the fields to zero. So we don't have to + initialize them here. */ + self->buf = (char *)PyMem_Malloc(0); if (self->buf == NULL) { Py_DECREF(self); @@ -709,9 +870,12 @@ static struct PyMethodDef bytesio_methods[] = { {"readline", (PyCFunction)bytesio_readline, METH_VARARGS, readline_doc}, {"readlines", (PyCFunction)bytesio_readlines, METH_VARARGS, readlines_doc}, {"read", (PyCFunction)bytesio_read, METH_VARARGS, read_doc}, + {"getbuffer", (PyCFunction)bytesio_getbuffer, METH_NOARGS, getbuffer_doc}, {"getvalue", (PyCFunction)bytesio_getvalue, METH_NOARGS, getval_doc}, {"seek", (PyCFunction)bytesio_seek, METH_VARARGS, seek_doc}, {"truncate", (PyCFunction)bytesio_truncate, METH_VARARGS, truncate_doc}, + {"__getstate__", (PyCFunction)bytesio_getstate, METH_NOARGS, NULL}, + {"__setstate__", (PyCFunction)bytesio_setstate, METH_O, NULL}, {NULL, NULL} /* sentinel */ }; @@ -762,3 +926,96 @@ PyTypeObject PyBytesIO_Type = { 0, /*tp_alloc*/ bytesio_new, /*tp_new*/ }; + + +/* + * Implementation of the small intermediate object used by getbuffer(). + * getbuffer() returns a memoryview over this object, which should make it + * invisible from Python code. + */ + +static int +bytesiobuf_getbuffer(bytesiobuf *obj, Py_buffer *view, int flags) +{ + int ret; + void *ptr; + bytesio *b = (bytesio *) obj->source; + if (view == NULL) { + b->exports++; + return 0; + } + ptr = (void *) obj; + ret = PyBuffer_FillInfo(view, (PyObject*)obj, b->buf, b->string_size, + 0, flags); + if (ret >= 0) { + b->exports++; + } + return ret; +} + +static void +bytesiobuf_releasebuffer(bytesiobuf *obj, Py_buffer *view) +{ + bytesio *b = (bytesio *) obj->source; + b->exports--; +} + +static int +bytesiobuf_traverse(bytesiobuf *self, visitproc visit, void *arg) +{ + Py_VISIT(self->source); + return 0; +} + +static void +bytesiobuf_dealloc(bytesiobuf *self) +{ + Py_CLEAR(self->source); + Py_TYPE(self)->tp_free(self); +} + +static PyBufferProcs bytesiobuf_as_buffer = { + (getbufferproc) bytesiobuf_getbuffer, + (releasebufferproc) bytesiobuf_releasebuffer, +}; + +PyTypeObject _PyBytesIOBuffer_Type = { + PyVarObject_HEAD_INIT(NULL, 0) + "_io._BytesIOBuffer", /*tp_name*/ + sizeof(bytesiobuf), /*tp_basicsize*/ + 0, /*tp_itemsize*/ + (destructor)bytesiobuf_dealloc, /*tp_dealloc*/ + 0, /*tp_print*/ + 0, /*tp_getattr*/ + 0, /*tp_setattr*/ + 0, /*tp_reserved*/ + 0, /*tp_repr*/ + 0, /*tp_as_number*/ + 0, /*tp_as_sequence*/ + 0, /*tp_as_mapping*/ + 0, /*tp_hash*/ + 0, /*tp_call*/ + 0, /*tp_str*/ + 0, /*tp_getattro*/ + 0, /*tp_setattro*/ + &bytesiobuf_as_buffer, /*tp_as_buffer*/ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /*tp_flags*/ + 0, /*tp_doc*/ + (traverseproc)bytesiobuf_traverse, /*tp_traverse*/ + 0, /*tp_clear*/ + 0, /*tp_richcompare*/ + 0, /*tp_weaklistoffset*/ + 0, /*tp_iter*/ + 0, /*tp_iternext*/ + 0, /*tp_methods*/ + 0, /*tp_members*/ + 0, /*tp_getset*/ + 0, /*tp_base*/ + 0, /*tp_dict*/ + 0, /*tp_descr_get*/ + 0, /*tp_descr_set*/ + 0, /*tp_dictoffset*/ + 0, /*tp_init*/ + 0, /*tp_alloc*/ + 0, /*tp_new*/ +}; |