6 files changed, 258 insertions, 76 deletions
diff --git a/Modules/_io/_iomodule.c b/Modules/_io/_iomodule.c
index 44bdac6..6f5bd48 100644
--- a/Modules/_io/_iomodule.c
+++ b/Modules/_io/_iomodule.c
@@ -36,6 +36,7 @@ PyObject *_PyIO_str_nl;
 PyObject *_PyIO_str_read;
 PyObject *_PyIO_str_read1;
 PyObject *_PyIO_str_readable;
+PyObject *_PyIO_str_readall;
 PyObject *_PyIO_str_readinto;
 PyObject *_PyIO_str_readline;
 PyObject *_PyIO_str_reset;
@@ -767,6 +768,8 @@ PyInit__io(void)
         goto fail;
     if (!(_PyIO_str_readable = PyUnicode_InternFromString("readable")))
         goto fail;
+    if (!(_PyIO_str_readall = PyUnicode_InternFromString("readall")))
+        goto fail;
     if (!(_PyIO_str_readinto = PyUnicode_InternFromString("readinto")))
         goto fail;
     if (!(_PyIO_str_readline = PyUnicode_InternFromString("readline")))
diff --git a/Modules/_io/_iomodule.h b/Modules/_io/_iomodule.h
index 925e4f2..9174bdd 100644
--- a/Modules/_io/_iomodule.h
+++ b/Modules/_io/_iomodule.h
@@ -155,6 +155,7 @@ extern PyObject *_PyIO_str_nl;
 extern PyObject *_PyIO_str_read;
 extern PyObject *_PyIO_str_read1;
 extern PyObject *_PyIO_str_readable;
+extern PyObject *_PyIO_str_readall;
 extern PyObject *_PyIO_str_readinto;
 extern PyObject *_PyIO_str_readline;
 extern PyObject *_PyIO_str_reset;
diff --git a/Modules/_io/bufferedio.c b/Modules/_io/bufferedio.c
index d6f0c9c..a8631e0 100644
--- a/Modules/_io/bufferedio.c
+++ b/Modules/_io/bufferedio.c
@@ -1,9 +1,9 @@
 /*
     An implementation of Buffered I/O as defined by PEP 3116 - "New I/O"
-    
+
     Classes defined here: BufferedIOBase, BufferedReader, BufferedWriter,
     BufferedRandom.
-    
+
     Written by Amaury Forgeot d'Arc and Antoine Pitrou
 */
 
@@ -198,7 +198,7 @@ typedef struct {
     int readable;
     int writable;
     int deallocating;
-    
+
     /* True if this is a vanilla Buffered object (rather than a user derived
        class) *and* the raw stream is a vanilla FileIO object. */
     int fast_closed_checks;
@@ -237,7 +237,7 @@ typedef struct {
 
 /*
     Implementation notes:
-    
+
     * BufferedReader, BufferedWriter and BufferedRandom try to share most
       methods (this is helped by the members `readable` and `writable`, which
       are initialized in the respective constructors)
@@ -255,7 +255,7 @@ typedef struct {
     NOTE: we should try to maintain block alignment of reads and writes to the
     raw stream (according to the buffer size), but for now it is only done
     in read() and friends.
-    
+
 */
 
 /* These macros protect the buffered object against concurrent operations. */
@@ -589,14 +589,15 @@ _bufferedreader_reset_buf(buffered *self);
 static void
 _bufferedwriter_reset_buf(buffered *self);
 static PyObject *
-_bufferedreader_peek_unlocked(buffered *self, Py_ssize_t);
+_bufferedreader_peek_unlocked(buffered *self);
 static PyObject *
 _bufferedreader_read_all(buffered *self);
 static PyObject *
 _bufferedreader_read_fast(buffered *self, Py_ssize_t);
 static PyObject *
 _bufferedreader_read_generic(buffered *self, Py_ssize_t);
-
+static Py_ssize_t
+_bufferedreader_raw_read(buffered *self, char *start, Py_ssize_t len);
 
 /*
  * Helpers
@@ -635,7 +636,7 @@ _buffered_raw_tell(buffered *self)
         if (!PyErr_Occurred())
             PyErr_Format(PyExc_IOError,
                          "Raw stream returned invalid position %" PY_PRIdOFF,
-			 (PY_OFF_T_COMPAT)n);
+                         (PY_OFF_T_COMPAT)n);
         return -1;
     }
     self->abs_pos = n;
@@ -668,7 +669,7 @@ _buffered_raw_seek(buffered *self, Py_off_t target, int whence)
         if (!PyErr_Occurred())
             PyErr_Format(PyExc_IOError,
                          "Raw stream returned invalid position %" PY_PRIdOFF,
-			 (PY_OFF_T_COMPAT)n);
+                         (PY_OFF_T_COMPAT)n);
         return -1;
     }
     self->abs_pos = n;
@@ -809,7 +810,7 @@ buffered_peek(buffered *self, PyObject *args)
             goto end;
         Py_CLEAR(res);
     }
-    res = _bufferedreader_peek_unlocked(self, n);
+    res = _bufferedreader_peek_unlocked(self);
 
 end:
     LEAVE_BUFFERED(self)
@@ -875,7 +876,7 @@ buffered_read1(buffered *self, PyObject *args)
 
     if (!ENTER_BUFFERED(self))
         return NULL;
-    
+
     /* Return up to n bytes.  If at least one byte is buffered, we
        only return buffered bytes.  Otherwise, we do one raw read. */
 
@@ -924,10 +925,78 @@ end:
 static PyObject *
 buffered_readinto(buffered *self, PyObject *args)
 {
+    Py_buffer buf;
+    Py_ssize_t n, written = 0, remaining;
+    PyObject *res = NULL;
+
     CHECK_INITIALIZED(self)
-    
-    /* TODO: use raw.readinto() (or a direct copy from our buffer) instead! */
-    return bufferediobase_readinto((PyObject *)self, args);
+
+    if (!PyArg_ParseTuple(args, "w*:readinto", &buf))
+        return NULL;
+
+    n = Py_SAFE_DOWNCAST(READAHEAD(self), Py_off_t, Py_ssize_t);
+    if (n > 0) {
+        if (n >= buf.len) {
+            memcpy(buf.buf, self->buffer + self->pos, buf.len);
+            self->pos += buf.len;
+            res = PyLong_FromSsize_t(buf.len);
+            goto end_unlocked;
+        }
+        memcpy(buf.buf, self->buffer + self->pos, n);
+        self->pos += n;
+        written = n;
+    }
+
+    if (!ENTER_BUFFERED(self))
+        goto end_unlocked;
+
+    if (self->writable) {
+        res = buffered_flush_and_rewind_unlocked(self);
+        if (res == NULL)
+            goto end;
+        Py_CLEAR(res);
+    }
+
+    _bufferedreader_reset_buf(self);
+    self->pos = 0;
+
+    for (remaining = buf.len - written;
+         remaining > 0;
+         written += n, remaining -= n) {
+        /* If remaining bytes is larger than internal buffer size, copy
+         * directly into caller's buffer. */
+        if (remaining > self->buffer_size) {
+            n = _bufferedreader_raw_read(self, (char *) buf.buf + written,
+                                         remaining);
+        }
+        else {
+            n = _bufferedreader_fill_buffer(self);
+            if (n > 0) {
+                if (n > remaining)
+                    n = remaining;
+                memcpy((char *) buf.buf + written,
+                       self->buffer + self->pos, n);
+                self->pos += n;
+                continue; /* short circuit */
+            }
+        }
+        if (n == 0 || (n == -2 && written > 0))
+            break;
+        if (n < 0) {
+            if (n == -2) {
+                Py_INCREF(Py_None);
+                res = Py_None;
+            }
+            goto end;
+        }
+    }
+    res = PyLong_FromSsize_t(written);
+
+end:
+    LEAVE_BUFFERED(self);
+end_unlocked:
+    PyBuffer_Release(&buf);
+    return res;
 }
 
 static PyObject *
@@ -1342,33 +1411,58 @@ static PyObject *
 _bufferedreader_read_all(buffered *self)
 {
     Py_ssize_t current_size;
-    PyObject *res, *data = NULL;
-    PyObject *chunks = PyList_New(0);
-
-    if (chunks == NULL)
-        return NULL;
+    PyObject *res, *data = NULL, *chunk, *chunks;
 
     /* First copy what we have in the current buffer. */
     current_size = Py_SAFE_DOWNCAST(READAHEAD(self), Py_off_t, Py_ssize_t);
     if (current_size) {
         data = PyBytes_FromStringAndSize(
             self->buffer + self->pos, current_size);
-        if (data == NULL) {
-            Py_DECREF(chunks);
+        if (data == NULL)
             return NULL;
-        }
         self->pos += current_size;
     }
     /* We're going past the buffer's bounds, flush it */
     if (self->writable) {
         res = buffered_flush_and_rewind_unlocked(self);
-        if (res == NULL) {
-            Py_DECREF(chunks);
+        if (res == NULL)
             return NULL;
-        }
         Py_CLEAR(res);
     }
     _bufferedreader_reset_buf(self);
+
+    if (PyObject_HasAttr(self->raw, _PyIO_str_readall)) {
+        chunk = PyObject_CallMethodObjArgs(self->raw, _PyIO_str_readall, NULL);
+        if (chunk == NULL)
+            return NULL;
+        if (chunk != Py_None && !PyBytes_Check(chunk)) {
+            Py_XDECREF(data);
+            Py_DECREF(chunk);
+            PyErr_SetString(PyExc_TypeError, "readall() should return bytes");
+            return NULL;
+        }
+        if (chunk == Py_None) {
+            if (current_size == 0)
+                return chunk;
+            else {
+                Py_DECREF(chunk);
+                return data;
+            }
+        }
+        else if (current_size) {
+            PyBytes_Concat(&data, chunk);
+            Py_DECREF(chunk);
+            if (data == NULL)
+                return NULL;
+            return data;
+        } else
+            return chunk;
+    }
+
+    chunks = PyList_New(0);
+    if (chunks == NULL)
+        return NULL;
+
     while (1) {
         if (data) {
             if (PyList_Append(chunks, data) < 0) {
@@ -1530,7 +1624,7 @@ error:
 }
 
 static PyObject *
-_bufferedreader_peek_unlocked(buffered *self, Py_ssize_t n)
+_bufferedreader_peek_unlocked(buffered *self)
 {
     Py_ssize_t have, r;
 
@@ -1572,6 +1666,7 @@ static PyMethodDef bufferedreader_methods[] = {
     {"read", (PyCFunction)buffered_read, METH_VARARGS},
     {"peek", (PyCFunction)buffered_peek, METH_VARARGS},
     {"read1", (PyCFunction)buffered_read1, METH_VARARGS},
+    {"readinto", (PyCFunction)buffered_readinto, METH_VARARGS},
     {"readline", (PyCFunction)buffered_readline, METH_VARARGS},
     {"seek", (PyCFunction)buffered_seek, METH_VARARGS},
     {"tell", (PyCFunction)buffered_tell, METH_NOARGS},
diff --git a/Modules/_io/bytesio.c b/Modules/_io/bytesio.c
index b40513f..65ec931 100644
--- a/Modules/_io/bytesio.c
+++ b/Modules/_io/bytesio.c
@@ -938,13 +938,11 @@ static int
 bytesiobuf_getbuffer(bytesiobuf *obj, Py_buffer *view, int flags)
 {
     int ret;
-    void *ptr;
     bytesio *b = (bytesio *) obj->source;
     if (view == NULL) {
         b->exports++;
         return 0;
     }
-    ptr = (void *) obj;
     ret = PyBuffer_FillInfo(view, (PyObject*)obj, b->buf, b->string_size,
                             0, flags);
     if (ret >= 0) {
diff --git a/Modules/_io/fileio.c b/Modules/_io/fileio.c
index b1d492b..3de1ff5 100644
--- a/Modules/_io/fileio.c
+++ b/Modules/_io/fileio.c
@@ -547,14 +547,14 @@ fileio_readinto(fileio *self, PyObject *args)
 }
 
 static size_t
-new_buffersize(fileio *self, size_t currentsize)
+new_buffersize(fileio *self, size_t currentsize
+#ifdef HAVE_FSTAT
+               , off_t pos, off_t end
+#endif
+               )
 {
 #ifdef HAVE_FSTAT
-    off_t pos, end;
-    struct stat st;
-    if (fstat(self->fd, &st) == 0) {
-        end = st.st_size;
-        pos = lseek(self->fd, 0L, SEEK_CUR);
+    if (end != (off_t)-1) {
         /* Files claiming a size smaller than SMALLCHUNK may
            actually be streaming pseudo-files. In this case, we
            apply the more aggressive algorithm below.
@@ -579,9 +579,14 @@ new_buffersize(fileio *self, size_t currentsize)
 static PyObject *
 fileio_readall(fileio *self)
 {
+#ifdef HAVE_FSTAT
+    struct stat st;
+    off_t pos, end;
+#endif
     PyObject *result;
     Py_ssize_t total = 0;
     int n;
+    size_t newsize;
 
     if (self->fd < 0)
         return err_closed();
@@ -592,8 +597,23 @@ fileio_readall(fileio *self)
     if (result == NULL)
         return NULL;
 
+#ifdef HAVE_FSTAT
+#if defined(MS_WIN64) || defined(MS_WINDOWS)
+    pos = _lseeki64(self->fd, 0L, SEEK_CUR);
+#else
+    pos = lseek(self->fd, 0L, SEEK_CUR);
+#endif
+    if (fstat(self->fd, &st) == 0)
+        end = st.st_size;
+    else
+        end = (off_t)-1;
+#endif
     while (1) {
-        size_t newsize = new_buffersize(self, total);
+#ifdef HAVE_FSTAT
+        newsize = new_buffersize(self, total, pos, end);
+#else
+        newsize = new_buffersize(self, total);
+#endif
         if (newsize > PY_SSIZE_T_MAX || newsize <= 0) {
             PyErr_SetString(PyExc_OverflowError,
                 "unbounded read returned more bytes "
@@ -632,6 +652,9 @@ fileio_readall(fileio *self)
             return NULL;
         }
         total += n;
+#ifdef HAVE_FSTAT
+        pos += n;
+#endif
     }
 
     if (PyBytes_GET_SIZE(result) > total) {
diff --git a/Modules/_io/textio.c b/Modules/_io/textio.c
index 9c5f441..13d4bd9 100644
--- a/Modules/_io/textio.c
+++ b/Modules/_io/textio.c
@@ -680,12 +680,16 @@ typedef struct
     PyObject *pending_bytes;       /* list of bytes objects waiting to be
                                       written, or NULL */
     Py_ssize_t pending_bytes_count;
-    PyObject *snapshot;
+
     /* snapshot is either None, or a tuple (dec_flags, next_input) where
      * dec_flags is the second (integer) item of the decoder state and
      * next_input is the chunk of input bytes that comes next after the
      * snapshot point.  We use this to reconstruct decoder states in tell().
      */
+    PyObject *snapshot;
+    /* Bytes-to-characters ratio for the current chunk. Serves as input for
+       the heuristic in tell(). */
+    double b2cratio;
 
     /* Cache raw object if it's a FileIO object */
     PyObject *raw;
@@ -852,6 +856,7 @@ textiowrapper_init(textio *self, PyObject *args, PyObject *kwds)
     self->decoded_chars_used = 0;
     self->pending_bytes_count = 0;
     self->encodefunc = NULL;
+    self->b2cratio = 0.0;
 
     if (encoding == NULL) {
         /* Try os.device_encoding(fileno) */
@@ -1397,6 +1402,7 @@ textiowrapper_read_chunk(textio *self)
     PyObject *dec_flags = NULL;
     PyObject *input_chunk = NULL;
     PyObject *decoded_chars, *chunk_size;
+    Py_ssize_t nbytes, nchars;
     int eof;
 
     /* The return value is True unless EOF was reached.  The decoded string is
@@ -1443,7 +1449,8 @@ textiowrapper_read_chunk(textio *self)
         goto fail;
     assert(PyBytes_Check(input_chunk));
 
-    eof = (PyBytes_Size(input_chunk) == 0);
+    nbytes = PyBytes_Size(input_chunk);
+    eof = (nbytes == 0);
 
     if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type) {
         decoded_chars = _PyIncrementalNewlineDecoder_decode(
@@ -1458,7 +1465,12 @@ textiowrapper_read_chunk(textio *self)
     if (decoded_chars == NULL)
         goto fail;
     textiowrapper_set_decoded_chars(self, decoded_chars);
-    if (PyUnicode_GET_SIZE(decoded_chars) > 0)
+    nchars = PyUnicode_GET_SIZE(decoded_chars);
+    if (nchars > 0)
+        self->b2cratio = (double) nbytes / nchars;
+    else
+        self->b2cratio = 0.0;
+    if (nchars > 0)
         eof = 0;
 
     if (self->telling) {
@@ -1509,8 +1521,13 @@ textiowrapper_read(textio *self, PyObject *args)
         PyObject *decoded;
         if (bytes == NULL)
             goto fail;
-        decoded = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_decode,
-                                             bytes, Py_True, NULL);
+
+        if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type)
+            decoded = _PyIncrementalNewlineDecoder_decode(self->decoder,
+                                                          bytes, 1);
+        else
+            decoded = PyObject_CallMethodObjArgs(
+                self->decoder, _PyIO_str_decode, bytes, Py_True, NULL);
         Py_DECREF(bytes);
         if (decoded == NULL)
             goto fail;
@@ -2147,8 +2164,12 @@ textiowrapper_tell(textio *self, PyObject *args)
     cookie_type cookie = {0,0,0,0,0};
     PyObject *next_input;
     Py_ssize_t chars_to_skip, chars_decoded;
+    Py_ssize_t skip_bytes, skip_back;
     PyObject *saved_state = NULL;
     char *input, *input_end;
+    char *dec_buffer;
+    Py_ssize_t dec_buffer_len;
+    int dec_flags;
 
     CHECK_INITIALIZED(self);
     CHECK_CLOSED(self);
@@ -2184,6 +2205,7 @@ textiowrapper_tell(textio *self, PyObject *args)
 #else
     cookie.start_pos = PyLong_AsLong(posobj);
 #endif
+    Py_DECREF(posobj);
     if (PyErr_Occurred())
         goto fail;
 
@@ -2198,57 +2220,99 @@ textiowrapper_tell(textio *self, PyObject *args)
     /* How many decoded characters have been used up since the snapshot? */
     if (self->decoded_chars_used == 0)  {
         /* We haven't moved from the snapshot point. */
-        Py_DECREF(posobj);
         return textiowrapper_build_cookie(&cookie);
     }
 
     chars_to_skip = self->decoded_chars_used;
 
-    /* Starting from the snapshot position, we will walk the decoder
-     * forward until it gives us enough decoded characters.
-     */
+    /* Decoder state will be restored at the end */
     saved_state = PyObject_CallMethodObjArgs(self->decoder,
                                              _PyIO_str_getstate, NULL);
     if (saved_state == NULL)
         goto fail;
 
-    /* Note our initial start point. */
-    if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
-        goto fail;
+#define DECODER_GETSTATE() do { \
+        PyObject *_state = PyObject_CallMethodObjArgs(self->decoder, \
+            _PyIO_str_getstate, NULL); \
+        if (_state == NULL) \
+            goto fail; \
+        if (!PyArg_Parse(_state, "(y#i)", &dec_buffer, &dec_buffer_len, &dec_flags)) { \
+            Py_DECREF(_state); \
+            goto fail; \
+        } \
+        Py_DECREF(_state); \
+    } while (0)
+
+    /* TODO: replace assert with exception */
+#define DECODER_DECODE(start, len, res) do { \
+        PyObject *_decoded = PyObject_CallMethod( \
+            self->decoder, "decode", "y#", start, len); \
+        if (_decoded == NULL) \
+            goto fail; \
+        assert (PyUnicode_Check(_decoded)); \
+        res = PyUnicode_GET_SIZE(_decoded); \
+        Py_DECREF(_decoded); \
+    } while (0)
+
+    /* Fast search for an acceptable start point, close to our
+       current pos */
+    skip_bytes = (Py_ssize_t) (self->b2cratio * chars_to_skip);
+    skip_back = 1;
+    assert(skip_back <= PyBytes_GET_SIZE(next_input));
+    input = PyBytes_AS_STRING(next_input);
+    while (skip_bytes > 0) {
+        /* Decode up to temptative start point */
+        if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
+            goto fail;
+        DECODER_DECODE(input, skip_bytes, chars_decoded);
+        if (chars_decoded <= chars_to_skip) {
+            DECODER_GETSTATE();
+            if (dec_buffer_len == 0) {
+                /* Before pos and no bytes buffered in decoder => OK */
+                cookie.dec_flags = dec_flags;
+                chars_to_skip -= chars_decoded;
+                break;
+            }
+            /* Skip back by buffered amount and reset heuristic */
+            skip_bytes -= dec_buffer_len;
+            skip_back = 1;
+        }
+        else {
+            /* We're too far ahead, skip back a bit */
+            skip_bytes -= skip_back;
+            skip_back *= 2;
+        }
+    }
+    if (skip_bytes <= 0) {
+        skip_bytes = 0;
+        if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
+            goto fail;
+    }
 
-    /* Feed the decoder one byte at a time.  As we go, note the
-     * nearest "safe start point" before the current location
-     * (a point where the decoder has nothing buffered, so seek()
+    /* Note our initial start point. */
+    cookie.start_pos += skip_bytes;
+    cookie.chars_to_skip = chars_to_skip;
+    if (chars_to_skip == 0)
+        goto finally;
+
+    /* We should be close to the desired position.  Now feed the decoder one
+     * byte at a time until we reach the `chars_to_skip` target.
+     * As we go, note the nearest "safe start point" before the current
+     * location (a point where the decoder has nothing buffered, so seek()
      * can safely start from there and advance to this location).
      */
     chars_decoded = 0;
     input = PyBytes_AS_STRING(next_input);
     input_end = input + PyBytes_GET_SIZE(next_input);
+    input += skip_bytes;
     while (input < input_end) {
-        PyObject *state;
-        char *dec_buffer;
-        Py_ssize_t dec_buffer_len;
-        int dec_flags;
-
-        PyObject *decoded = PyObject_CallMethod(
-            self->decoder, "decode", "y#", input, 1);
-        if (decoded == NULL)
-            goto fail;
-        assert (PyUnicode_Check(decoded));
-        chars_decoded += PyUnicode_GET_SIZE(decoded);
-        Py_DECREF(decoded);
+        Py_ssize_t n;
 
+        DECODER_DECODE(input, 1, n);
+        /* We got n chars for 1 byte */
+        chars_decoded += n;
         cookie.bytes_to_feed += 1;
-
-        state = PyObject_CallMethodObjArgs(self->decoder,
-                                           _PyIO_str_getstate, NULL);
-        if (state == NULL)
-            goto fail;
-        if (!PyArg_Parse(state, "(y#i)", &dec_buffer, &dec_buffer_len, &dec_flags)) {
-            Py_DECREF(state);
-            goto fail;
-        }
-        Py_DECREF(state);
+        DECODER_GETSTATE();
 
         if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) {
             /* Decoder buffer is empty, so this is a safe start point. */
@@ -2280,8 +2344,7 @@ textiowrapper_tell(textio *self, PyObject *args)
         }
     }
 
-    /* finally */
-    Py_XDECREF(posobj);
+finally:
     res = PyObject_CallMethod(self->decoder, "setstate", "(O)", saved_state);
     Py_DECREF(saved_state);
     if (res == NULL)
@@ -2292,8 +2355,7 @@ textiowrapper_tell(textio *self, PyObject *args)
     cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
     return textiowrapper_build_cookie(&cookie);
 
-  fail:
-    Py_XDECREF(posobj);
+fail:
     if (saved_state) {
         PyObject *type, *value, *traceback;
         PyErr_Fetch(&type, &value, &traceback);