From 3486a98dcd7f11215b61be3428edbbc9b6aa3164 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Thu, 12 May 2011 01:57:53 +0200 Subject: Issue #9971: Write an optimized implementation of BufferedReader.readinto(). Patch by John O'Connor. --- Lib/test/test_io.py | 6 ++++ Misc/ACKS | 1 + Misc/NEWS | 4 +++ Modules/_io/bufferedio.c | 86 +++++++++++++++++++++++++++++++++++++++--------- 4 files changed, 81 insertions(+), 16 deletions(-) diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py index 8d293d0..153548e 100644 --- a/Lib/test/test_io.py +++ b/Lib/test/test_io.py @@ -794,6 +794,12 @@ class BufferedReaderTest(unittest.TestCase, CommonBufferedTests): self.assertEqual(b, b"gf") self.assertEqual(bufio.readinto(b), 0) self.assertEqual(b, b"gf") + rawio = self.MockRawIO((b"abc", None)) + bufio = self.tp(rawio) + self.assertEqual(bufio.readinto(b), 2) + self.assertEqual(b, b"ab") + self.assertEqual(bufio.readinto(b), 1) + self.assertEqual(b, b"cb") def test_readlines(self): def bufio(): diff --git a/Misc/ACKS b/Misc/ACKS index 36778ff..506e5c9 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -645,6 +645,7 @@ Neal Norwitz Michal Nowikowski Steffen Daode Nurpmeso Nigel O'Brian +John O'Connor Kevin O'Connor Tim O'Malley Pascal Oberndoerfer diff --git a/Misc/NEWS b/Misc/NEWS index c9946bc..be26c72 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -142,6 +142,10 @@ Core and Builtins Library ------- + +- Issue #9971: Write an optimized implementation of BufferedReader.readinto(). + Patch by John O'Connor. + - Issue #1028: Tk returns invalid Unicode null in %A: UnicodeDecodeError. With Tk < 8.5 _tkinter.c:PythonCmd() raised UnicodeDecodeError, caused IDLE to exit. Converted to valid Unicode null in PythonCmd(). diff --git a/Modules/_io/bufferedio.c b/Modules/_io/bufferedio.c index 386a880..8e3d3c6 100644 --- a/Modules/_io/bufferedio.c +++ b/Modules/_io/bufferedio.c @@ -1,9 +1,9 @@ /* An implementation of Buffered I/O as defined by PEP 3116 - "New I/O" - + Classes defined here: BufferedIOBase, BufferedReader, BufferedWriter, BufferedRandom. - + Written by Amaury Forgeot d'Arc and Antoine Pitrou */ @@ -198,7 +198,7 @@ typedef struct { int readable; int writable; int deallocating; - + /* True if this is a vanilla Buffered object (rather than a user derived class) *and* the raw stream is a vanilla FileIO object. */ int fast_closed_checks; @@ -237,7 +237,7 @@ typedef struct { /* Implementation notes: - + * BufferedReader, BufferedWriter and BufferedRandom try to share most methods (this is helped by the members `readable` and `writable`, which are initialized in the respective constructors) @@ -255,7 +255,7 @@ typedef struct { NOTE: we should try to maintain block alignment of reads and writes to the raw stream (according to the buffer size), but for now it is only done in read() and friends. - + */ /* These macros protect the buffered object against concurrent operations. */ @@ -596,7 +596,8 @@ static PyObject * _bufferedreader_read_fast(buffered *self, Py_ssize_t); static PyObject * _bufferedreader_read_generic(buffered *self, Py_ssize_t); - +static Py_ssize_t +_bufferedreader_raw_read(buffered *self, char *start, Py_ssize_t len); /* * Helpers @@ -635,7 +636,7 @@ _buffered_raw_tell(buffered *self) if (!PyErr_Occurred()) PyErr_Format(PyExc_IOError, "Raw stream returned invalid position %" PY_PRIdOFF, - (PY_OFF_T_COMPAT)n); + (PY_OFF_T_COMPAT)n); return -1; } self->abs_pos = n; @@ -668,7 +669,7 @@ _buffered_raw_seek(buffered *self, Py_off_t target, int whence) if (!PyErr_Occurred()) PyErr_Format(PyExc_IOError, "Raw stream returned invalid position %" PY_PRIdOFF, - (PY_OFF_T_COMPAT)n); + (PY_OFF_T_COMPAT)n); return -1; } self->abs_pos = n; @@ -863,7 +864,7 @@ buffered_read1(buffered *self, PyObject *args) if (!ENTER_BUFFERED(self)) return NULL; - + if (self->writable) { res = _bufferedwriter_flush_unlocked(self, 1); if (res == NULL) @@ -912,23 +913,75 @@ end: static PyObject * buffered_readinto(buffered *self, PyObject *args) { + Py_buffer buf; + Py_ssize_t n, written = 0, remaining; PyObject *res = NULL; CHECK_INITIALIZED(self) - - /* TODO: use raw.readinto() instead! */ + + if (!PyArg_ParseTuple(args, "w*:readinto", &buf)) + return NULL; + + n = Py_SAFE_DOWNCAST(READAHEAD(self), Py_off_t, Py_ssize_t); + if (n > 0) { + if (n >= buf.len) { + memcpy(buf.buf, self->buffer + self->pos, buf.len); + self->pos += buf.len; + res = PyLong_FromSsize_t(buf.len); + goto end_unlocked; + } + memcpy(buf.buf, self->buffer + self->pos, n); + self->pos += n; + written = n; + } + + if (!ENTER_BUFFERED(self)) + goto end_unlocked; + if (self->writable) { - if (!ENTER_BUFFERED(self)) - return NULL; res = _bufferedwriter_flush_unlocked(self, 0); - LEAVE_BUFFERED(self) if (res == NULL) goto end; - Py_DECREF(res); + Py_CLEAR(res); } - res = bufferediobase_readinto((PyObject *)self, args); + + _bufferedreader_reset_buf(self); + self->pos = 0; + + for (remaining = buf.len - written; + remaining > 0; + written += n, remaining -= n) { + /* If remaining bytes is larger than internal buffer size, copy + * directly into caller's buffer. */ + if (remaining > self->buffer_size) { + n = _bufferedreader_raw_read(self, buf.buf + written, remaining); + } + else { + n = _bufferedreader_fill_buffer(self); + if (n > 0) { + if (n > remaining) + n = remaining; + memcpy(buf.buf + written, self->buffer + self->pos, n); + self->pos += n; + continue; /* short circuit */ + } + } + if (n == 0 || (n == -2 && written > 0)) + break; + if (n < 0) { + if (n == -2) { + Py_INCREF(Py_None); + res = Py_None; + } + goto end; + } + } + res = PyLong_FromSsize_t(written); end: + LEAVE_BUFFERED(self); +end_unlocked: + PyBuffer_Release(&buf); return res; } @@ -1573,6 +1626,7 @@ static PyMethodDef bufferedreader_methods[] = { {"read", (PyCFunction)buffered_read, METH_VARARGS}, {"peek", (PyCFunction)buffered_peek, METH_VARARGS}, {"read1", (PyCFunction)buffered_read1, METH_VARARGS}, + {"readinto", (PyCFunction)buffered_readinto, METH_VARARGS}, {"readline", (PyCFunction)buffered_readline, METH_VARARGS}, {"seek", (PyCFunction)buffered_seek, METH_VARARGS}, {"tell", (PyCFunction)buffered_tell, METH_NOARGS}, -- cgit v0.12