diff options
Diffstat (limited to 'Modules/_io')
-rw-r--r-- | Modules/_io/_iomodule.c | 3 | ||||
-rw-r--r-- | Modules/_io/_iomodule.h | 1 | ||||
-rw-r--r-- | Modules/_io/bufferedio.c | 147 | ||||
-rw-r--r-- | Modules/_io/bytesio.c | 2 | ||||
-rw-r--r-- | Modules/_io/fileio.c | 37 | ||||
-rw-r--r-- | Modules/_io/textio.c | 144 |
6 files changed, 258 insertions, 76 deletions
diff --git a/Modules/_io/_iomodule.c b/Modules/_io/_iomodule.c index 44bdac6..6f5bd48 100644 --- a/Modules/_io/_iomodule.c +++ b/Modules/_io/_iomodule.c @@ -36,6 +36,7 @@ PyObject *_PyIO_str_nl; PyObject *_PyIO_str_read; PyObject *_PyIO_str_read1; PyObject *_PyIO_str_readable; +PyObject *_PyIO_str_readall; PyObject *_PyIO_str_readinto; PyObject *_PyIO_str_readline; PyObject *_PyIO_str_reset; @@ -767,6 +768,8 @@ PyInit__io(void) goto fail; if (!(_PyIO_str_readable = PyUnicode_InternFromString("readable"))) goto fail; + if (!(_PyIO_str_readall = PyUnicode_InternFromString("readall"))) + goto fail; if (!(_PyIO_str_readinto = PyUnicode_InternFromString("readinto"))) goto fail; if (!(_PyIO_str_readline = PyUnicode_InternFromString("readline"))) diff --git a/Modules/_io/_iomodule.h b/Modules/_io/_iomodule.h index 925e4f2..9174bdd 100644 --- a/Modules/_io/_iomodule.h +++ b/Modules/_io/_iomodule.h @@ -155,6 +155,7 @@ extern PyObject *_PyIO_str_nl; extern PyObject *_PyIO_str_read; extern PyObject *_PyIO_str_read1; extern PyObject *_PyIO_str_readable; +extern PyObject *_PyIO_str_readall; extern PyObject *_PyIO_str_readinto; extern PyObject *_PyIO_str_readline; extern PyObject *_PyIO_str_reset; diff --git a/Modules/_io/bufferedio.c b/Modules/_io/bufferedio.c index d6f0c9c..a8631e0 100644 --- a/Modules/_io/bufferedio.c +++ b/Modules/_io/bufferedio.c @@ -1,9 +1,9 @@ /* An implementation of Buffered I/O as defined by PEP 3116 - "New I/O" - + Classes defined here: BufferedIOBase, BufferedReader, BufferedWriter, BufferedRandom. - + Written by Amaury Forgeot d'Arc and Antoine Pitrou */ @@ -198,7 +198,7 @@ typedef struct { int readable; int writable; int deallocating; - + /* True if this is a vanilla Buffered object (rather than a user derived class) *and* the raw stream is a vanilla FileIO object. */ int fast_closed_checks; @@ -237,7 +237,7 @@ typedef struct { /* Implementation notes: - + * BufferedReader, BufferedWriter and BufferedRandom try to share most methods (this is helped by the members `readable` and `writable`, which are initialized in the respective constructors) @@ -255,7 +255,7 @@ typedef struct { NOTE: we should try to maintain block alignment of reads and writes to the raw stream (according to the buffer size), but for now it is only done in read() and friends. - + */ /* These macros protect the buffered object against concurrent operations. */ @@ -589,14 +589,15 @@ _bufferedreader_reset_buf(buffered *self); static void _bufferedwriter_reset_buf(buffered *self); static PyObject * -_bufferedreader_peek_unlocked(buffered *self, Py_ssize_t); +_bufferedreader_peek_unlocked(buffered *self); static PyObject * _bufferedreader_read_all(buffered *self); static PyObject * _bufferedreader_read_fast(buffered *self, Py_ssize_t); static PyObject * _bufferedreader_read_generic(buffered *self, Py_ssize_t); - +static Py_ssize_t +_bufferedreader_raw_read(buffered *self, char *start, Py_ssize_t len); /* * Helpers @@ -635,7 +636,7 @@ _buffered_raw_tell(buffered *self) if (!PyErr_Occurred()) PyErr_Format(PyExc_IOError, "Raw stream returned invalid position %" PY_PRIdOFF, - (PY_OFF_T_COMPAT)n); + (PY_OFF_T_COMPAT)n); return -1; } self->abs_pos = n; @@ -668,7 +669,7 @@ _buffered_raw_seek(buffered *self, Py_off_t target, int whence) if (!PyErr_Occurred()) PyErr_Format(PyExc_IOError, "Raw stream returned invalid position %" PY_PRIdOFF, - (PY_OFF_T_COMPAT)n); + (PY_OFF_T_COMPAT)n); return -1; } self->abs_pos = n; @@ -809,7 +810,7 @@ buffered_peek(buffered *self, PyObject *args) goto end; Py_CLEAR(res); } - res = _bufferedreader_peek_unlocked(self, n); + res = _bufferedreader_peek_unlocked(self); end: LEAVE_BUFFERED(self) @@ -875,7 +876,7 @@ buffered_read1(buffered *self, PyObject *args) if (!ENTER_BUFFERED(self)) return NULL; - + /* Return up to n bytes. If at least one byte is buffered, we only return buffered bytes. Otherwise, we do one raw read. */ @@ -924,10 +925,78 @@ end: static PyObject * buffered_readinto(buffered *self, PyObject *args) { + Py_buffer buf; + Py_ssize_t n, written = 0, remaining; + PyObject *res = NULL; + CHECK_INITIALIZED(self) - - /* TODO: use raw.readinto() (or a direct copy from our buffer) instead! */ - return bufferediobase_readinto((PyObject *)self, args); + + if (!PyArg_ParseTuple(args, "w*:readinto", &buf)) + return NULL; + + n = Py_SAFE_DOWNCAST(READAHEAD(self), Py_off_t, Py_ssize_t); + if (n > 0) { + if (n >= buf.len) { + memcpy(buf.buf, self->buffer + self->pos, buf.len); + self->pos += buf.len; + res = PyLong_FromSsize_t(buf.len); + goto end_unlocked; + } + memcpy(buf.buf, self->buffer + self->pos, n); + self->pos += n; + written = n; + } + + if (!ENTER_BUFFERED(self)) + goto end_unlocked; + + if (self->writable) { + res = buffered_flush_and_rewind_unlocked(self); + if (res == NULL) + goto end; + Py_CLEAR(res); + } + + _bufferedreader_reset_buf(self); + self->pos = 0; + + for (remaining = buf.len - written; + remaining > 0; + written += n, remaining -= n) { + /* If remaining bytes is larger than internal buffer size, copy + * directly into caller's buffer. */ + if (remaining > self->buffer_size) { + n = _bufferedreader_raw_read(self, (char *) buf.buf + written, + remaining); + } + else { + n = _bufferedreader_fill_buffer(self); + if (n > 0) { + if (n > remaining) + n = remaining; + memcpy((char *) buf.buf + written, + self->buffer + self->pos, n); + self->pos += n; + continue; /* short circuit */ + } + } + if (n == 0 || (n == -2 && written > 0)) + break; + if (n < 0) { + if (n == -2) { + Py_INCREF(Py_None); + res = Py_None; + } + goto end; + } + } + res = PyLong_FromSsize_t(written); + +end: + LEAVE_BUFFERED(self); +end_unlocked: + PyBuffer_Release(&buf); + return res; } static PyObject * @@ -1342,33 +1411,58 @@ static PyObject * _bufferedreader_read_all(buffered *self) { Py_ssize_t current_size; - PyObject *res, *data = NULL; - PyObject *chunks = PyList_New(0); - - if (chunks == NULL) - return NULL; + PyObject *res, *data = NULL, *chunk, *chunks; /* First copy what we have in the current buffer. */ current_size = Py_SAFE_DOWNCAST(READAHEAD(self), Py_off_t, Py_ssize_t); if (current_size) { data = PyBytes_FromStringAndSize( self->buffer + self->pos, current_size); - if (data == NULL) { - Py_DECREF(chunks); + if (data == NULL) return NULL; - } self->pos += current_size; } /* We're going past the buffer's bounds, flush it */ if (self->writable) { res = buffered_flush_and_rewind_unlocked(self); - if (res == NULL) { - Py_DECREF(chunks); + if (res == NULL) return NULL; - } Py_CLEAR(res); } _bufferedreader_reset_buf(self); + + if (PyObject_HasAttr(self->raw, _PyIO_str_readall)) { + chunk = PyObject_CallMethodObjArgs(self->raw, _PyIO_str_readall, NULL); + if (chunk == NULL) + return NULL; + if (chunk != Py_None && !PyBytes_Check(chunk)) { + Py_XDECREF(data); + Py_DECREF(chunk); + PyErr_SetString(PyExc_TypeError, "readall() should return bytes"); + return NULL; + } + if (chunk == Py_None) { + if (current_size == 0) + return chunk; + else { + Py_DECREF(chunk); + return data; + } + } + else if (current_size) { + PyBytes_Concat(&data, chunk); + Py_DECREF(chunk); + if (data == NULL) + return NULL; + return data; + } else + return chunk; + } + + chunks = PyList_New(0); + if (chunks == NULL) + return NULL; + while (1) { if (data) { if (PyList_Append(chunks, data) < 0) { @@ -1530,7 +1624,7 @@ error: } static PyObject * -_bufferedreader_peek_unlocked(buffered *self, Py_ssize_t n) +_bufferedreader_peek_unlocked(buffered *self) { Py_ssize_t have, r; @@ -1572,6 +1666,7 @@ static PyMethodDef bufferedreader_methods[] = { {"read", (PyCFunction)buffered_read, METH_VARARGS}, {"peek", (PyCFunction)buffered_peek, METH_VARARGS}, {"read1", (PyCFunction)buffered_read1, METH_VARARGS}, + {"readinto", (PyCFunction)buffered_readinto, METH_VARARGS}, {"readline", (PyCFunction)buffered_readline, METH_VARARGS}, {"seek", (PyCFunction)buffered_seek, METH_VARARGS}, {"tell", (PyCFunction)buffered_tell, METH_NOARGS}, diff --git a/Modules/_io/bytesio.c b/Modules/_io/bytesio.c index b40513f..65ec931 100644 --- a/Modules/_io/bytesio.c +++ b/Modules/_io/bytesio.c @@ -938,13 +938,11 @@ static int bytesiobuf_getbuffer(bytesiobuf *obj, Py_buffer *view, int flags) { int ret; - void *ptr; bytesio *b = (bytesio *) obj->source; if (view == NULL) { b->exports++; return 0; } - ptr = (void *) obj; ret = PyBuffer_FillInfo(view, (PyObject*)obj, b->buf, b->string_size, 0, flags); if (ret >= 0) { diff --git a/Modules/_io/fileio.c b/Modules/_io/fileio.c index b1d492b..3de1ff5 100644 --- a/Modules/_io/fileio.c +++ b/Modules/_io/fileio.c @@ -547,14 +547,14 @@ fileio_readinto(fileio *self, PyObject *args) } static size_t -new_buffersize(fileio *self, size_t currentsize) +new_buffersize(fileio *self, size_t currentsize +#ifdef HAVE_FSTAT + , off_t pos, off_t end +#endif + ) { #ifdef HAVE_FSTAT - off_t pos, end; - struct stat st; - if (fstat(self->fd, &st) == 0) { - end = st.st_size; - pos = lseek(self->fd, 0L, SEEK_CUR); + if (end != (off_t)-1) { /* Files claiming a size smaller than SMALLCHUNK may actually be streaming pseudo-files. In this case, we apply the more aggressive algorithm below. @@ -579,9 +579,14 @@ new_buffersize(fileio *self, size_t currentsize) static PyObject * fileio_readall(fileio *self) { +#ifdef HAVE_FSTAT + struct stat st; + off_t pos, end; +#endif PyObject *result; Py_ssize_t total = 0; int n; + size_t newsize; if (self->fd < 0) return err_closed(); @@ -592,8 +597,23 @@ fileio_readall(fileio *self) if (result == NULL) return NULL; +#ifdef HAVE_FSTAT +#if defined(MS_WIN64) || defined(MS_WINDOWS) + pos = _lseeki64(self->fd, 0L, SEEK_CUR); +#else + pos = lseek(self->fd, 0L, SEEK_CUR); +#endif + if (fstat(self->fd, &st) == 0) + end = st.st_size; + else + end = (off_t)-1; +#endif while (1) { - size_t newsize = new_buffersize(self, total); +#ifdef HAVE_FSTAT + newsize = new_buffersize(self, total, pos, end); +#else + newsize = new_buffersize(self, total); +#endif if (newsize > PY_SSIZE_T_MAX || newsize <= 0) { PyErr_SetString(PyExc_OverflowError, "unbounded read returned more bytes " @@ -632,6 +652,9 @@ fileio_readall(fileio *self) return NULL; } total += n; +#ifdef HAVE_FSTAT + pos += n; +#endif } if (PyBytes_GET_SIZE(result) > total) { diff --git a/Modules/_io/textio.c b/Modules/_io/textio.c index 9c5f441..13d4bd9 100644 --- a/Modules/_io/textio.c +++ b/Modules/_io/textio.c @@ -680,12 +680,16 @@ typedef struct PyObject *pending_bytes; /* list of bytes objects waiting to be written, or NULL */ Py_ssize_t pending_bytes_count; - PyObject *snapshot; + /* snapshot is either None, or a tuple (dec_flags, next_input) where * dec_flags is the second (integer) item of the decoder state and * next_input is the chunk of input bytes that comes next after the * snapshot point. We use this to reconstruct decoder states in tell(). */ + PyObject *snapshot; + /* Bytes-to-characters ratio for the current chunk. Serves as input for + the heuristic in tell(). */ + double b2cratio; /* Cache raw object if it's a FileIO object */ PyObject *raw; @@ -852,6 +856,7 @@ textiowrapper_init(textio *self, PyObject *args, PyObject *kwds) self->decoded_chars_used = 0; self->pending_bytes_count = 0; self->encodefunc = NULL; + self->b2cratio = 0.0; if (encoding == NULL) { /* Try os.device_encoding(fileno) */ @@ -1397,6 +1402,7 @@ textiowrapper_read_chunk(textio *self) PyObject *dec_flags = NULL; PyObject *input_chunk = NULL; PyObject *decoded_chars, *chunk_size; + Py_ssize_t nbytes, nchars; int eof; /* The return value is True unless EOF was reached. The decoded string is @@ -1443,7 +1449,8 @@ textiowrapper_read_chunk(textio *self) goto fail; assert(PyBytes_Check(input_chunk)); - eof = (PyBytes_Size(input_chunk) == 0); + nbytes = PyBytes_Size(input_chunk); + eof = (nbytes == 0); if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type) { decoded_chars = _PyIncrementalNewlineDecoder_decode( @@ -1458,7 +1465,12 @@ textiowrapper_read_chunk(textio *self) if (decoded_chars == NULL) goto fail; textiowrapper_set_decoded_chars(self, decoded_chars); - if (PyUnicode_GET_SIZE(decoded_chars) > 0) + nchars = PyUnicode_GET_SIZE(decoded_chars); + if (nchars > 0) + self->b2cratio = (double) nbytes / nchars; + else + self->b2cratio = 0.0; + if (nchars > 0) eof = 0; if (self->telling) { @@ -1509,8 +1521,13 @@ textiowrapper_read(textio *self, PyObject *args) PyObject *decoded; if (bytes == NULL) goto fail; - decoded = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_decode, - bytes, Py_True, NULL); + + if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type) + decoded = _PyIncrementalNewlineDecoder_decode(self->decoder, + bytes, 1); + else + decoded = PyObject_CallMethodObjArgs( + self->decoder, _PyIO_str_decode, bytes, Py_True, NULL); Py_DECREF(bytes); if (decoded == NULL) goto fail; @@ -2147,8 +2164,12 @@ textiowrapper_tell(textio *self, PyObject *args) cookie_type cookie = {0,0,0,0,0}; PyObject *next_input; Py_ssize_t chars_to_skip, chars_decoded; + Py_ssize_t skip_bytes, skip_back; PyObject *saved_state = NULL; char *input, *input_end; + char *dec_buffer; + Py_ssize_t dec_buffer_len; + int dec_flags; CHECK_INITIALIZED(self); CHECK_CLOSED(self); @@ -2184,6 +2205,7 @@ textiowrapper_tell(textio *self, PyObject *args) #else cookie.start_pos = PyLong_AsLong(posobj); #endif + Py_DECREF(posobj); if (PyErr_Occurred()) goto fail; @@ -2198,57 +2220,99 @@ textiowrapper_tell(textio *self, PyObject *args) /* How many decoded characters have been used up since the snapshot? */ if (self->decoded_chars_used == 0) { /* We haven't moved from the snapshot point. */ - Py_DECREF(posobj); return textiowrapper_build_cookie(&cookie); } chars_to_skip = self->decoded_chars_used; - /* Starting from the snapshot position, we will walk the decoder - * forward until it gives us enough decoded characters. - */ + /* Decoder state will be restored at the end */ saved_state = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_getstate, NULL); if (saved_state == NULL) goto fail; - /* Note our initial start point. */ - if (_textiowrapper_decoder_setstate(self, &cookie) < 0) - goto fail; +#define DECODER_GETSTATE() do { \ + PyObject *_state = PyObject_CallMethodObjArgs(self->decoder, \ + _PyIO_str_getstate, NULL); \ + if (_state == NULL) \ + goto fail; \ + if (!PyArg_Parse(_state, "(y#i)", &dec_buffer, &dec_buffer_len, &dec_flags)) { \ + Py_DECREF(_state); \ + goto fail; \ + } \ + Py_DECREF(_state); \ + } while (0) + + /* TODO: replace assert with exception */ +#define DECODER_DECODE(start, len, res) do { \ + PyObject *_decoded = PyObject_CallMethod( \ + self->decoder, "decode", "y#", start, len); \ + if (_decoded == NULL) \ + goto fail; \ + assert (PyUnicode_Check(_decoded)); \ + res = PyUnicode_GET_SIZE(_decoded); \ + Py_DECREF(_decoded); \ + } while (0) + + /* Fast search for an acceptable start point, close to our + current pos */ + skip_bytes = (Py_ssize_t) (self->b2cratio * chars_to_skip); + skip_back = 1; + assert(skip_back <= PyBytes_GET_SIZE(next_input)); + input = PyBytes_AS_STRING(next_input); + while (skip_bytes > 0) { + /* Decode up to temptative start point */ + if (_textiowrapper_decoder_setstate(self, &cookie) < 0) + goto fail; + DECODER_DECODE(input, skip_bytes, chars_decoded); + if (chars_decoded <= chars_to_skip) { + DECODER_GETSTATE(); + if (dec_buffer_len == 0) { + /* Before pos and no bytes buffered in decoder => OK */ + cookie.dec_flags = dec_flags; + chars_to_skip -= chars_decoded; + break; + } + /* Skip back by buffered amount and reset heuristic */ + skip_bytes -= dec_buffer_len; + skip_back = 1; + } + else { + /* We're too far ahead, skip back a bit */ + skip_bytes -= skip_back; + skip_back *= 2; + } + } + if (skip_bytes <= 0) { + skip_bytes = 0; + if (_textiowrapper_decoder_setstate(self, &cookie) < 0) + goto fail; + } - /* Feed the decoder one byte at a time. As we go, note the - * nearest "safe start point" before the current location - * (a point where the decoder has nothing buffered, so seek() + /* Note our initial start point. */ + cookie.start_pos += skip_bytes; + cookie.chars_to_skip = chars_to_skip; + if (chars_to_skip == 0) + goto finally; + + /* We should be close to the desired position. Now feed the decoder one + * byte at a time until we reach the `chars_to_skip` target. + * As we go, note the nearest "safe start point" before the current + * location (a point where the decoder has nothing buffered, so seek() * can safely start from there and advance to this location). */ chars_decoded = 0; input = PyBytes_AS_STRING(next_input); input_end = input + PyBytes_GET_SIZE(next_input); + input += skip_bytes; while (input < input_end) { - PyObject *state; - char *dec_buffer; - Py_ssize_t dec_buffer_len; - int dec_flags; - - PyObject *decoded = PyObject_CallMethod( - self->decoder, "decode", "y#", input, 1); - if (decoded == NULL) - goto fail; - assert (PyUnicode_Check(decoded)); - chars_decoded += PyUnicode_GET_SIZE(decoded); - Py_DECREF(decoded); + Py_ssize_t n; + DECODER_DECODE(input, 1, n); + /* We got n chars for 1 byte */ + chars_decoded += n; cookie.bytes_to_feed += 1; - - state = PyObject_CallMethodObjArgs(self->decoder, - _PyIO_str_getstate, NULL); - if (state == NULL) - goto fail; - if (!PyArg_Parse(state, "(y#i)", &dec_buffer, &dec_buffer_len, &dec_flags)) { - Py_DECREF(state); - goto fail; - } - Py_DECREF(state); + DECODER_GETSTATE(); if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) { /* Decoder buffer is empty, so this is a safe start point. */ @@ -2280,8 +2344,7 @@ textiowrapper_tell(textio *self, PyObject *args) } } - /* finally */ - Py_XDECREF(posobj); +finally: res = PyObject_CallMethod(self->decoder, "setstate", "(O)", saved_state); Py_DECREF(saved_state); if (res == NULL) @@ -2292,8 +2355,7 @@ textiowrapper_tell(textio *self, PyObject *args) cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int); return textiowrapper_build_cookie(&cookie); - fail: - Py_XDECREF(posobj); +fail: if (saved_state) { PyObject *type, *value, *traceback; PyErr_Fetch(&type, &value, &traceback); |