summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Doc/library/io.rst18
-rw-r--r--Lib/_pyio.py5
-rw-r--r--Lib/test/test_memoryio.py26
-rw-r--r--Misc/NEWS4
-rw-r--r--Modules/_io/_iomodule.c2
-rw-r--r--Modules/_io/_iomodule.h2
-rw-r--r--Modules/_io/bytesio.c141
7 files changed, 197 insertions, 1 deletions
diff --git a/Doc/library/io.rst b/Doc/library/io.rst
index 2476acc..e61aa90 100644
--- a/Doc/library/io.rst
+++ b/Doc/library/io.rst
@@ -518,6 +518,24 @@ In many situations, buffered I/O streams will provide higher performance
:class:`BytesIO` provides or overrides these methods in addition to those
from :class:`BufferedIOBase` and :class:`IOBase`:
+ .. method:: getbuffer()
+
+ Return a readable and writable view over the contents of the buffer
+ without copying them. Also, mutating the view will transparently
+ update the contents of the buffer::
+
+ >>> b = io.BytesIO(b"abcdef")
+ >>> view = b.getbuffer()
+ >>> view[2:4] = b"56"
+ >>> b.getvalue()
+ b'ab56ef'
+
+ .. note::
+ As long as the view exists, the :class:`BytesIO` object cannot be
+ resized.
+
+ .. versionadded:: 3.2
+
.. method:: getvalue()
Return ``bytes`` containing the entire contents of the buffer.
diff --git a/Lib/_pyio.py b/Lib/_pyio.py
index 12ae4b6..6b25640 100644
--- a/Lib/_pyio.py
+++ b/Lib/_pyio.py
@@ -785,6 +785,11 @@ class BytesIO(BufferedIOBase):
raise ValueError("getvalue on closed file")
return bytes(self._buffer)
+ def getbuffer(self):
+ """Return a readable and writable view of the buffer.
+ """
+ return memoryview(self._buffer)
+
def read(self, n=None):
if self.closed:
raise ValueError("read from closed file")
diff --git a/Lib/test/test_memoryio.py b/Lib/test/test_memoryio.py
index 0decda5..dcf6d51 100644
--- a/Lib/test/test_memoryio.py
+++ b/Lib/test/test_memoryio.py
@@ -384,7 +384,31 @@ class MemoryTestMixin:
del __main__.PickleTestMemIO
-class PyBytesIOTest(MemoryTestMixin, MemorySeekTestMixin, unittest.TestCase):
+class BytesIOMixin:
+
+ def test_getbuffer(self):
+ memio = self.ioclass(b"1234567890")
+ buf = memio.getbuffer()
+ self.assertEqual(bytes(buf), b"1234567890")
+ memio.seek(5)
+ buf = memio.getbuffer()
+ self.assertEqual(bytes(buf), b"1234567890")
+ # Trying to change the size of the BytesIO while a buffer is exported
+ # raises a BufferError.
+ self.assertRaises(BufferError, memio.write, b'x' * 100)
+ self.assertRaises(BufferError, memio.truncate)
+ # Mutating the buffer updates the BytesIO
+ buf[3:6] = b"abc"
+ self.assertEqual(bytes(buf), b"123abc7890")
+ self.assertEqual(memio.getvalue(), b"123abc7890")
+ # After the buffer gets released, we can resize the BytesIO again
+ del buf
+ support.gc_collect()
+ memio.truncate()
+
+
+class PyBytesIOTest(MemoryTestMixin, MemorySeekTestMixin,
+ BytesIOMixin, unittest.TestCase):
UnsupportedOperation = pyio.UnsupportedOperation
diff --git a/Misc/NEWS b/Misc/NEWS
index 1ac1dec..a64f694 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -13,6 +13,10 @@ Core and Builtins
Library
-------
+- Issue #5506: BytesIO objects now have a getbuffer() method exporting a
+ view of their contents without duplicating them. The view is both readable
+ and writable.
+
- Issue #7566: Implement os.path.sameopenfile for Windows.
- Issue #9293: I/O streams now raise ``io.UnsupportedOperation`` when an
diff --git a/Modules/_io/_iomodule.c b/Modules/_io/_iomodule.c
index 733a7b9..c0c8154 100644
--- a/Modules/_io/_iomodule.c
+++ b/Modules/_io/_iomodule.c
@@ -710,6 +710,8 @@ PyInit__io(void)
/* BytesIO */
PyBytesIO_Type.tp_base = &PyBufferedIOBase_Type;
ADD_TYPE(&PyBytesIO_Type, "BytesIO");
+ if (PyType_Ready(&_PyBytesIOBuffer_Type) < 0)
+ goto fail;
/* StringIO */
PyStringIO_Type.tp_base = &PyTextIOBase_Type;
diff --git a/Modules/_io/_iomodule.h b/Modules/_io/_iomodule.h
index 2b8e8a1..925e4f2 100644
--- a/Modules/_io/_iomodule.h
+++ b/Modules/_io/_iomodule.h
@@ -169,3 +169,5 @@ extern PyObject *_PyIO_str_write;
extern PyObject *_PyIO_empty_str;
extern PyObject *_PyIO_empty_bytes;
extern PyObject *_PyIO_zero;
+
+extern PyTypeObject _PyBytesIOBuffer_Type;
diff --git a/Modules/_io/bytesio.c b/Modules/_io/bytesio.c
index 3ef9e2e..c565404 100644
--- a/Modules/_io/bytesio.c
+++ b/Modules/_io/bytesio.c
@@ -10,8 +10,15 @@ typedef struct {
size_t buf_size;
PyObject *dict;
PyObject *weakreflist;
+ Py_ssize_t exports;
} bytesio;
+typedef struct {
+ PyObject_HEAD
+ bytesio *source;
+} bytesiobuf;
+
+
#define CHECK_CLOSED(self) \
if ((self)->buf == NULL) { \
PyErr_SetString(PyExc_ValueError, \
@@ -19,6 +26,14 @@ typedef struct {
return NULL; \
}
+#define CHECK_EXPORTS(self) \
+ if ((self)->exports > 0) { \
+ PyErr_SetString(PyExc_BufferError, \
+ "Existing exports of data: object cannot be re-sized"); \
+ return NULL; \
+ }
+
+
/* Internal routine to get a line from the buffer of a BytesIO
object. Returns the length between the current position to the
next newline character. */
@@ -173,6 +188,30 @@ bytesio_flush(bytesio *self)
Py_RETURN_NONE;
}
+PyDoc_STRVAR(getbuffer_doc,
+"getbuffer() -> bytes.\n"
+"\n"
+"Get a read-write view over the contents of the BytesIO object.");
+
+static PyObject *
+bytesio_getbuffer(bytesio *self)
+{
+ PyTypeObject *type = &_PyBytesIOBuffer_Type;
+ bytesiobuf *buf;
+ PyObject *view;
+
+ CHECK_CLOSED(self);
+
+ buf = (bytesiobuf *) type->tp_alloc(type, 0);
+ if (buf == NULL)
+ return NULL;
+ Py_INCREF(self);
+ buf->source = self;
+ view = PyMemoryView_FromObject((PyObject *) buf);
+ Py_DECREF(buf);
+ return view;
+}
+
PyDoc_STRVAR(getval_doc,
"getvalue() -> bytes.\n"
"\n"
@@ -422,6 +461,7 @@ bytesio_truncate(bytesio *self, PyObject *args)
PyObject *arg = Py_None;
CHECK_CLOSED(self);
+ CHECK_EXPORTS(self);
if (!PyArg_ParseTuple(args, "|O:truncate", &arg))
return NULL;
@@ -543,6 +583,7 @@ bytesio_write(bytesio *self, PyObject *obj)
PyObject *result = NULL;
CHECK_CLOSED(self);
+ CHECK_EXPORTS(self);
if (PyObject_GetBuffer(obj, &buf, PyBUF_CONTIG_RO) < 0)
return NULL;
@@ -664,6 +705,7 @@ bytesio_setstate(bytesio *self, PyObject *state)
Py_TYPE(self)->tp_name, Py_TYPE(state)->tp_name);
return NULL;
}
+ CHECK_EXPORTS(self);
/* Reset the object to its default state. This is only needed to handle
the case of repeated calls to __setstate__. */
self->string_size = 0;
@@ -724,6 +766,11 @@ static void
bytesio_dealloc(bytesio *self)
{
_PyObject_GC_UNTRACK(self);
+ if (self->exports > 0) {
+ PyErr_SetString(PyExc_SystemError,
+ "deallocated BytesIO object has exported buffers");
+ PyErr_Print();
+ }
if (self->buf != NULL) {
PyMem_Free(self->buf);
self->buf = NULL;
@@ -818,6 +865,7 @@ static struct PyMethodDef bytesio_methods[] = {
{"readline", (PyCFunction)bytesio_readline, METH_VARARGS, readline_doc},
{"readlines", (PyCFunction)bytesio_readlines, METH_VARARGS, readlines_doc},
{"read", (PyCFunction)bytesio_read, METH_VARARGS, read_doc},
+ {"getbuffer", (PyCFunction)bytesio_getbuffer, METH_NOARGS, getbuffer_doc},
{"getvalue", (PyCFunction)bytesio_getvalue, METH_NOARGS, getval_doc},
{"seek", (PyCFunction)bytesio_seek, METH_VARARGS, seek_doc},
{"truncate", (PyCFunction)bytesio_truncate, METH_VARARGS, truncate_doc},
@@ -873,3 +921,96 @@ PyTypeObject PyBytesIO_Type = {
0, /*tp_alloc*/
bytesio_new, /*tp_new*/
};
+
+
+/*
+ * Implementation of the small intermediate object used by getbuffer().
+ * getbuffer() returns a memoryview over this object, which should make it
+ * invisible from Python code.
+ */
+
+static int
+bytesiobuf_getbuffer(bytesiobuf *obj, Py_buffer *view, int flags)
+{
+ int ret;
+ void *ptr;
+ bytesio *b = (bytesio *) obj->source;
+ if (view == NULL) {
+ b->exports++;
+ return 0;
+ }
+ ptr = (void *) obj;
+ ret = PyBuffer_FillInfo(view, (PyObject*)obj, b->buf, b->string_size,
+ 0, flags);
+ if (ret >= 0) {
+ b->exports++;
+ }
+ return ret;
+}
+
+static void
+bytesiobuf_releasebuffer(bytesiobuf *obj, Py_buffer *view)
+{
+ bytesio *b = (bytesio *) obj->source;
+ b->exports--;
+}
+
+static int
+bytesiobuf_traverse(bytesiobuf *self, visitproc visit, void *arg)
+{
+ Py_VISIT(self->source);
+ return 0;
+}
+
+static void
+bytesiobuf_dealloc(bytesiobuf *self)
+{
+ Py_CLEAR(self->source);
+ Py_TYPE(self)->tp_free(self);
+}
+
+static PyBufferProcs bytesiobuf_as_buffer = {
+ (getbufferproc) bytesiobuf_getbuffer,
+ (releasebufferproc) bytesiobuf_releasebuffer,
+};
+
+PyTypeObject _PyBytesIOBuffer_Type = {
+ PyVarObject_HEAD_INIT(NULL, 0)
+ "_io._BytesIOBuffer", /*tp_name*/
+ sizeof(bytesiobuf), /*tp_basicsize*/
+ 0, /*tp_itemsize*/
+ (destructor)bytesiobuf_dealloc, /*tp_dealloc*/
+ 0, /*tp_print*/
+ 0, /*tp_getattr*/
+ 0, /*tp_setattr*/
+ 0, /*tp_reserved*/
+ 0, /*tp_repr*/
+ 0, /*tp_as_number*/
+ 0, /*tp_as_sequence*/
+ 0, /*tp_as_mapping*/
+ 0, /*tp_hash*/
+ 0, /*tp_call*/
+ 0, /*tp_str*/
+ 0, /*tp_getattro*/
+ 0, /*tp_setattro*/
+ &bytesiobuf_as_buffer, /*tp_as_buffer*/
+ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
+ 0, /*tp_doc*/
+ (traverseproc)bytesiobuf_traverse, /*tp_traverse*/
+ 0, /*tp_clear*/
+ 0, /*tp_richcompare*/
+ 0, /*tp_weaklistoffset*/
+ 0, /*tp_iter*/
+ 0, /*tp_iternext*/
+ 0, /*tp_methods*/
+ 0, /*tp_members*/
+ 0, /*tp_getset*/
+ 0, /*tp_base*/
+ 0, /*tp_dict*/
+ 0, /*tp_descr_get*/
+ 0, /*tp_descr_set*/
+ 0, /*tp_dictoffset*/
+ 0, /*tp_init*/
+ 0, /*tp_alloc*/
+ 0, /*tp_new*/
+};