diff options
Diffstat (limited to 'Modules/_io')
-rw-r--r-- | Modules/_io/_iomodule.c | 28 | ||||
-rw-r--r-- | Modules/_io/bufferedio.c | 66 | ||||
-rw-r--r-- | Modules/_io/bytesio.c | 260 | ||||
-rw-r--r-- | Modules/_io/fileio.c | 58 | ||||
-rw-r--r-- | Modules/_io/textio.c | 42 |
5 files changed, 305 insertions, 149 deletions
diff --git a/Modules/_io/_iomodule.c b/Modules/_io/_iomodule.c index 45c31a5..e70c4b7 100644 --- a/Modules/_io/_iomodule.c +++ b/Modules/_io/_iomodule.c @@ -237,8 +237,8 @@ io_open(PyObject *self, PyObject *args, PyObject *kwds) PyObject *raw, *modeobj = NULL, *buffer, *wrapper, *result = NULL; + _Py_IDENTIFIER(_blksize); _Py_IDENTIFIER(isatty); - _Py_IDENTIFIER(fileno); _Py_IDENTIFIER(mode); _Py_IDENTIFIER(close); @@ -380,24 +380,14 @@ io_open(PyObject *self, PyObject *args, PyObject *kwds) line_buffering = 0; if (buffering < 0) { - buffering = DEFAULT_BUFFER_SIZE; -#ifdef HAVE_STRUCT_STAT_ST_BLKSIZE - { - struct stat st; - long fileno; - PyObject *res = _PyObject_CallMethodId(raw, &PyId_fileno, NULL); - if (res == NULL) - goto error; - - fileno = PyLong_AsLong(res); - Py_DECREF(res); - if (fileno == -1 && PyErr_Occurred()) - goto error; - - if (fstat(fileno, &st) >= 0 && st.st_blksize > 1) - buffering = st.st_blksize; - } -#endif + PyObject *blksize_obj; + blksize_obj = _PyObject_GetAttrId(raw, &PyId__blksize); + if (blksize_obj == NULL) + goto error; + buffering = PyLong_AsLong(blksize_obj); + Py_DECREF(blksize_obj); + if (buffering == -1 && PyErr_Occurred()) + goto error; } if (buffering < 0) { PyErr_SetString(PyExc_ValueError, diff --git a/Modules/_io/bufferedio.c b/Modules/_io/bufferedio.c index 8084aae..ce85f56 100644 --- a/Modules/_io/bufferedio.c +++ b/Modules/_io/bufferedio.c @@ -24,6 +24,7 @@ _Py_IDENTIFIER(read); _Py_IDENTIFIER(read1); _Py_IDENTIFIER(readable); _Py_IDENTIFIER(readinto); +_Py_IDENTIFIER(readinto1); _Py_IDENTIFIER(writable); _Py_IDENTIFIER(write); @@ -47,17 +48,21 @@ PyDoc_STRVAR(bufferediobase_doc, ); static PyObject * -bufferediobase_readinto(PyObject *self, PyObject *args) +_bufferediobase_readinto_generic(PyObject *self, PyObject *args, char readinto1) { Py_buffer buf; Py_ssize_t len; PyObject *data; - if (!PyArg_ParseTuple(args, "w*:readinto", &buf)) { + if (!PyArg_ParseTuple(args, + readinto1 ? "w*:readinto1" : "w*:readinto", + &buf)) { return NULL; } - data = _PyObject_CallMethodId(self, &PyId_read, "n", buf.len); + data = _PyObject_CallMethodId(self, + readinto1 ? &PyId_read1 : &PyId_read, + "n", buf.len); if (data == NULL) goto error; @@ -89,6 +94,18 @@ bufferediobase_readinto(PyObject *self, PyObject *args) } static PyObject * +bufferediobase_readinto(PyObject *self, PyObject *args) +{ + return _bufferediobase_readinto_generic(self, args, 0); +} + +static PyObject * +bufferediobase_readinto1(PyObject *self, PyObject *args) +{ + return _bufferediobase_readinto_generic(self, args, 1); +} + +static PyObject * bufferediobase_unsupported(const char *message) { _PyIO_State *state = IO_STATE(); @@ -167,6 +184,7 @@ static PyMethodDef bufferediobase_methods[] = { {"read", bufferediobase_read, METH_VARARGS, bufferediobase_read_doc}, {"read1", bufferediobase_read1, METH_VARARGS, bufferediobase_read1_doc}, {"readinto", bufferediobase_readinto, METH_VARARGS, NULL}, + {"readinto1", bufferediobase_readinto1, METH_VARARGS, NULL}, {"write", bufferediobase_write, METH_VARARGS, bufferediobase_write_doc}, {NULL, NULL} }; @@ -977,7 +995,7 @@ buffered_read1(buffered *self, PyObject *args) } static PyObject * -buffered_readinto(buffered *self, PyObject *args) +_buffered_readinto_generic(buffered *self, PyObject *args, char readinto1) { Py_buffer buf; Py_ssize_t n, written = 0, remaining; @@ -985,7 +1003,9 @@ buffered_readinto(buffered *self, PyObject *args) CHECK_INITIALIZED(self) - if (!PyArg_ParseTuple(args, "w*:readinto", &buf)) + if (!PyArg_ParseTuple(args, + readinto1 ? "w*:readinto1" : "w*:readinto", + &buf)) return NULL; n = Py_SAFE_DOWNCAST(READAHEAD(self), Py_off_t, Py_ssize_t); @@ -1023,7 +1043,10 @@ buffered_readinto(buffered *self, PyObject *args) n = _bufferedreader_raw_read(self, (char *) buf.buf + written, remaining); } - else { + + /* In readinto1 mode, we do not want to fill the internal + buffer if we already have some data to return */ + else if (!(readinto1 && written)) { n = _bufferedreader_fill_buffer(self); if (n > 0) { if (n > remaining) @@ -1034,6 +1057,9 @@ buffered_readinto(buffered *self, PyObject *args) continue; /* short circuit */ } } + else + n = 0; + if (n == 0 || (n == -2 && written > 0)) break; if (n < 0) { @@ -1043,6 +1069,12 @@ buffered_readinto(buffered *self, PyObject *args) } goto end; } + + /* At most one read in readinto1 mode */ + if (readinto1) { + written += n; + break; + } } res = PyLong_FromSsize_t(written); @@ -1054,6 +1086,19 @@ end_unlocked: } static PyObject * +buffered_readinto(buffered *self, PyObject *args) +{ + return _buffered_readinto_generic(self, args, 0); +} + +static PyObject * +buffered_readinto1(buffered *self, PyObject *args) +{ + return _buffered_readinto_generic(self, args, 1); +} + + +static PyObject * _buffered_readline(buffered *self, Py_ssize_t limit) { PyObject *res = NULL; @@ -1738,6 +1783,7 @@ static PyMethodDef bufferedreader_methods[] = { {"peek", (PyCFunction)buffered_peek, METH_VARARGS}, {"read1", (PyCFunction)buffered_read1, METH_VARARGS}, {"readinto", (PyCFunction)buffered_readinto, METH_VARARGS}, + {"readinto1", (PyCFunction)buffered_readinto1, METH_VARARGS}, {"readline", (PyCFunction)buffered_readline, METH_VARARGS}, {"seek", (PyCFunction)buffered_seek, METH_VARARGS}, {"tell", (PyCFunction)buffered_tell, METH_NOARGS}, @@ -2339,6 +2385,12 @@ bufferedrwpair_readinto(rwpair *self, PyObject *args) } static PyObject * +bufferedrwpair_readinto1(rwpair *self, PyObject *args) +{ + return _forward_call(self->reader, &PyId_readinto1, args); +} + +static PyObject * bufferedrwpair_write(rwpair *self, PyObject *args) { return _forward_call(self->writer, &PyId_write, args); @@ -2403,6 +2455,7 @@ static PyMethodDef bufferedrwpair_methods[] = { {"peek", (PyCFunction)bufferedrwpair_peek, METH_VARARGS}, {"read1", (PyCFunction)bufferedrwpair_read1, METH_VARARGS}, {"readinto", (PyCFunction)bufferedrwpair_readinto, METH_VARARGS}, + {"readinto1", (PyCFunction)bufferedrwpair_readinto1, METH_VARARGS}, {"write", (PyCFunction)bufferedrwpair_write, METH_VARARGS}, {"flush", (PyCFunction)bufferedrwpair_flush, METH_NOARGS}, @@ -2551,6 +2604,7 @@ static PyMethodDef bufferedrandom_methods[] = { {"read", (PyCFunction)buffered_read, METH_VARARGS}, {"read1", (PyCFunction)buffered_read1, METH_VARARGS}, {"readinto", (PyCFunction)buffered_readinto, METH_VARARGS}, + {"readinto1", (PyCFunction)buffered_readinto1, METH_VARARGS}, {"readline", (PyCFunction)buffered_readline, METH_VARARGS}, {"peek", (PyCFunction)buffered_peek, METH_VARARGS}, {"write", (PyCFunction)bufferedwriter_write, METH_VARARGS}, diff --git a/Modules/_io/bytesio.c b/Modules/_io/bytesio.c index 54840bb..56ad788 100644 --- a/Modules/_io/bytesio.c +++ b/Modules/_io/bytesio.c @@ -11,6 +11,10 @@ typedef struct { PyObject *dict; PyObject *weakreflist; Py_ssize_t exports; + /** If `initvalue' != NULL, `buf' is a read-only pointer into the PyBytes + * referenced by `initvalue'. It must be copied prior to mutation, and + * released during finalization */ + PyObject *initvalue; } bytesio; typedef struct { @@ -19,11 +23,11 @@ typedef struct { } bytesiobuf; -#define CHECK_CLOSED(self) \ +#define CHECK_CLOSED(self, ret) \ if ((self)->buf == NULL) { \ PyErr_SetString(PyExc_ValueError, \ "I/O operation on closed file."); \ - return NULL; \ + return ret; \ } #define CHECK_EXPORTS(self) \ @@ -33,36 +37,74 @@ typedef struct { return NULL; \ } +/* Ensure we have a buffer suitable for writing, in the case that an initvalue + * object was provided, and we're currently borrowing its buffer. `size' + * indicates the new buffer size allocated as part of unsharing, to avoid a + * redundant reallocation caused by any subsequent mutation. `truncate' + * indicates whether truncation should occur if `size` < self->string_size. + * + * Do nothing if the buffer wasn't shared. Returns 0 on success, or sets an + * exception and returns -1 on failure. Existing state is preserved on failure. + */ +static int +unshare(bytesio *self, size_t preferred_size, int truncate) +{ + if (self->initvalue) { + Py_ssize_t copy_size; + char *new_buf; + + if((! truncate) && preferred_size < (size_t)self->string_size) { + preferred_size = self->string_size; + } + + /* PyMem_Malloc() returns NULL if preferred_size is bigger + than PY_SSIZE_T_MAX */ + new_buf = (char *)PyMem_Malloc(preferred_size); + if (new_buf == NULL) { + PyErr_NoMemory(); + return -1; + } + + copy_size = self->string_size; + if ((size_t)copy_size > preferred_size) { + copy_size = preferred_size; + } + + memcpy(new_buf, self->buf, copy_size); + Py_CLEAR(self->initvalue); + self->buf = new_buf; + self->buf_size = preferred_size; + self->string_size = (Py_ssize_t) copy_size; + } + return 0; +} /* Internal routine to get a line from the buffer of a BytesIO object. Returns the length between the current position to the next newline character. */ static Py_ssize_t -get_line(bytesio *self, char **output) +scan_eol(bytesio *self, Py_ssize_t len) { - char *n; - const char *str_end; - Py_ssize_t len; + const char *start, *n; + Py_ssize_t maxlen; assert(self->buf != NULL); /* Move to the end of the line, up to the end of the string, s. */ - str_end = self->buf + self->string_size; - for (n = self->buf + self->pos; - n < str_end && *n != '\n'; - n++); - - /* Skip the newline character */ - if (n < str_end) - n++; - - /* Get the length from the current position to the end of the line. */ - len = n - (self->buf + self->pos); - *output = self->buf + self->pos; - + start = self->buf + self->pos; + maxlen = self->string_size - self->pos; + if (len < 0 || len > maxlen) + len = maxlen; + + if (len) { + n = memchr(start, '\n', len); + if (n) + /* Get the length from the current position to the end of + the line. */ + len = n - start + 1; + } assert(len >= 0); assert(self->pos < PY_SSIZE_T_MAX - len); - self->pos += len; return len; } @@ -125,11 +167,18 @@ resize_buffer(bytesio *self, size_t size) static Py_ssize_t write_bytes(bytesio *self, const char *bytes, Py_ssize_t len) { + size_t desired; + assert(self->buf != NULL); assert(self->pos >= 0); assert(len >= 0); - if ((size_t)self->pos + len > self->buf_size) { + desired = (size_t)self->pos + len; + if (unshare(self, desired, 0) < 0) { + return -1; + } + + if (desired > self->buf_size) { if (resize_buffer(self, (size_t)self->pos + len) < 0) return -1; } @@ -160,6 +209,74 @@ write_bytes(bytesio *self, const char *bytes, Py_ssize_t len) return len; } +/* Release or free any existing buffer, and place the BytesIO in the closed + * state. */ +static void +reset(bytesio *self) +{ + if (self->initvalue) { + Py_CLEAR(self->initvalue); + } else if (self->buf) { + PyMem_Free(self->buf); + } + self->buf = NULL; + self->string_size = 0; + self->pos = 0; +} + +/* Reinitialize with a new heap-allocated buffer of size `size`. Returns 0 on + * success, or sets an exception and returns -1 on failure. Existing state is + * preserved on failure. */ +static int +reinit_private(bytesio *self, Py_ssize_t size) +{ + char *tmp = (char *)PyMem_Malloc(size); + if (tmp == NULL) { + PyErr_NoMemory(); + return -1; + } + reset(self); + self->buf = tmp; + self->buf_size = size; + return 0; +} + +/* Internal version of BytesIO.__init__; resets the object to its initial + * (closed) state before repopulating it, optionally by sharing a PyBytes + * buffer provided by `initvalue'. Returns 0 on success, or sets an exception + * and returns -1 on failure. */ +static int +reinit(bytesio *self, PyObject *initvalue) +{ + CHECK_CLOSED(self, -1); + + if (initvalue == NULL || initvalue == Py_None) { + if (reinit_private(self, 0) < 0) { + return -1; + } + } else if (PyBytes_CheckExact(initvalue)) { + reset(self); + Py_INCREF(initvalue); + self->initvalue = initvalue; + self->buf = PyBytes_AS_STRING(initvalue); + self->buf_size = PyBytes_GET_SIZE(initvalue); + self->string_size = PyBytes_GET_SIZE(initvalue); + } else { + Py_buffer buf; + if (PyObject_GetBuffer(initvalue, &buf, PyBUF_CONTIG_RO) < 0) { + return -1; + } + if (reinit_private(self, buf.len) < 0) { + PyBuffer_Release(&buf); + return -1; + } + memcpy(self->buf, buf.buf, buf.len); + self->string_size = buf.len; + PyBuffer_Release(&buf); + } + return 0; +} + static PyObject * bytesio_get_closed(bytesio *self) { @@ -184,7 +301,7 @@ PyDoc_STRVAR(seekable_doc, static PyObject * return_not_closed(bytesio *self) { - CHECK_CLOSED(self); + CHECK_CLOSED(self, NULL); Py_RETURN_TRUE; } @@ -194,7 +311,7 @@ PyDoc_STRVAR(flush_doc, static PyObject * bytesio_flush(bytesio *self) { - CHECK_CLOSED(self); + CHECK_CLOSED(self, NULL); Py_RETURN_NONE; } @@ -210,7 +327,7 @@ bytesio_getbuffer(bytesio *self) bytesiobuf *buf; PyObject *view; - CHECK_CLOSED(self); + CHECK_CLOSED(self, NULL); buf = (bytesiobuf *) type->tp_alloc(type, 0); if (buf == NULL) @@ -230,7 +347,7 @@ PyDoc_STRVAR(getval_doc, static PyObject * bytesio_getvalue(bytesio *self) { - CHECK_CLOSED(self); + CHECK_CLOSED(self, NULL); return PyBytes_FromStringAndSize(self->buf, self->string_size); } @@ -243,7 +360,7 @@ PyDoc_STRVAR(isatty_doc, static PyObject * bytesio_isatty(bytesio *self) { - CHECK_CLOSED(self); + CHECK_CLOSED(self, NULL); Py_RETURN_FALSE; } @@ -253,7 +370,7 @@ PyDoc_STRVAR(tell_doc, static PyObject * bytesio_tell(bytesio *self) { - CHECK_CLOSED(self); + CHECK_CLOSED(self, NULL); return PyLong_FromSsize_t(self->pos); } @@ -270,7 +387,7 @@ bytesio_read(bytesio *self, PyObject *args) char *output; PyObject *arg = Py_None; - CHECK_CLOSED(self); + CHECK_CLOSED(self, NULL); if (!PyArg_ParseTuple(args, "|O:read", &arg)) return NULL; @@ -339,7 +456,7 @@ bytesio_readline(bytesio *self, PyObject *args) char *output; PyObject *arg = Py_None; - CHECK_CLOSED(self); + CHECK_CLOSED(self, NULL); if (!PyArg_ParseTuple(args, "|O:readline", &arg)) return NULL; @@ -359,14 +476,10 @@ bytesio_readline(bytesio *self, PyObject *args) return NULL; } - n = get_line(self, &output); - - if (size >= 0 && size < n) { - size = n - size; - n -= size; - self->pos -= size; - } + n = scan_eol(self, size); + output = self->buf + self->pos; + self->pos += n; return PyBytes_FromStringAndSize(output, n); } @@ -385,7 +498,7 @@ bytesio_readlines(bytesio *self, PyObject *args) char *output; PyObject *arg = Py_None; - CHECK_CLOSED(self); + CHECK_CLOSED(self, NULL); if (!PyArg_ParseTuple(args, "|O:readlines", &arg)) return NULL; @@ -410,7 +523,9 @@ bytesio_readlines(bytesio *self, PyObject *args) if (!result) return NULL; - while ((n = get_line(self, &output)) != 0) { + output = self->buf + self->pos; + while ((n = scan_eol(self, -1)) != 0) { + self->pos += n; line = PyBytes_FromStringAndSize(output, n); if (!line) goto on_error; @@ -422,6 +537,7 @@ bytesio_readlines(bytesio *self, PyObject *args) size += n; if (maxsize > 0 && size >= maxsize) break; + output += n; } return result; @@ -442,7 +558,7 @@ bytesio_readinto(bytesio *self, PyObject *buffer) void *raw_buffer; Py_ssize_t len, n; - CHECK_CLOSED(self); + CHECK_CLOSED(self, NULL); if (PyObject_AsWriteBuffer(buffer, &raw_buffer, &len) == -1) return NULL; @@ -475,7 +591,7 @@ bytesio_truncate(bytesio *self, PyObject *args) Py_ssize_t size; PyObject *arg = Py_None; - CHECK_CLOSED(self); + CHECK_CLOSED(self, NULL); CHECK_EXPORTS(self); if (!PyArg_ParseTuple(args, "|O:truncate", &arg)) @@ -502,6 +618,10 @@ bytesio_truncate(bytesio *self, PyObject *args) return NULL; } + if (unshare(self, size, 1) < 0) { + return NULL; + } + if (size < self->string_size) { self->string_size = size; if (resize_buffer(self, size) < 0) @@ -514,16 +634,18 @@ bytesio_truncate(bytesio *self, PyObject *args) static PyObject * bytesio_iternext(bytesio *self) { - char *next; + const char *next; Py_ssize_t n; - CHECK_CLOSED(self); + CHECK_CLOSED(self, NULL); - n = get_line(self, &next); + n = scan_eol(self, -1); - if (!next || n == 0) + if (n == 0) return NULL; + next = self->buf + self->pos; + self->pos += n; return PyBytes_FromStringAndSize(next, n); } @@ -542,7 +664,7 @@ bytesio_seek(bytesio *self, PyObject *args) Py_ssize_t pos; int mode = 0; - CHECK_CLOSED(self); + CHECK_CLOSED(self, NULL); if (!PyArg_ParseTuple(args, "n|i:seek", &pos, &mode)) return NULL; @@ -597,7 +719,7 @@ bytesio_write(bytesio *self, PyObject *obj) Py_buffer buf; PyObject *result = NULL; - CHECK_CLOSED(self); + CHECK_CLOSED(self, NULL); CHECK_EXPORTS(self); if (PyObject_GetBuffer(obj, &buf, PyBUF_CONTIG_RO) < 0) @@ -625,7 +747,7 @@ bytesio_writelines(bytesio *self, PyObject *v) PyObject *it, *item; PyObject *ret; - CHECK_CLOSED(self); + CHECK_CLOSED(self, NULL); it = PyObject_GetIter(v); if (it == NULL) @@ -655,10 +777,7 @@ PyDoc_STRVAR(close_doc, static PyObject * bytesio_close(bytesio *self) { - if (self->buf != NULL) { - PyMem_Free(self->buf); - self->buf = NULL; - } + reset(self); Py_RETURN_NONE; } @@ -706,11 +825,11 @@ bytesio_getstate(bytesio *self) static PyObject * bytesio_setstate(bytesio *self, PyObject *state) { - PyObject *result; PyObject *position_obj; PyObject *dict; Py_ssize_t pos; + CHECK_EXPORTS(self); assert(state != NULL); /* We allow the state tuple to be longer than 3, because we may need @@ -722,18 +841,13 @@ bytesio_setstate(bytesio *self, PyObject *state) Py_TYPE(self)->tp_name, Py_TYPE(state)->tp_name); return NULL; } - CHECK_EXPORTS(self); - /* Reset the object to its default state. This is only needed to handle - the case of repeated calls to __setstate__. */ - self->string_size = 0; - self->pos = 0; - /* Set the value of the internal buffer. If state[0] does not support the - buffer protocol, bytesio_write will raise the appropriate TypeError. */ - result = bytesio_write(self, PyTuple_GET_ITEM(state, 0)); - if (result == NULL) + /* Reset the object to its default state and set the value of the internal + * buffer. If state[0] does not support the buffer protocol, reinit() will + * raise the appropriate TypeError. */ + if (reinit(self, PyTuple_GET_ITEM(state, 0)) < 0) { return NULL; - Py_DECREF(result); + } /* Set carefully the position value. Alternatively, we could use the seek method instead of modifying self->pos directly to better protect the @@ -788,10 +902,9 @@ bytesio_dealloc(bytesio *self) "deallocated BytesIO object has exported buffers"); PyErr_Print(); } - if (self->buf != NULL) { - PyMem_Free(self->buf); - self->buf = NULL; - } + + reset(self); + Py_CLEAR(self->dict); if (self->weakreflist != NULL) PyObject_ClearWeakRefs((PyObject *) self); @@ -830,20 +943,7 @@ bytesio_init(bytesio *self, PyObject *args, PyObject *kwds) &initvalue)) return -1; - /* In case, __init__ is called multiple times. */ - self->string_size = 0; - self->pos = 0; - - if (initvalue && initvalue != Py_None) { - PyObject *res; - res = bytesio_write(self, initvalue); - if (res == NULL) - return -1; - Py_DECREF(res); - self->pos = 0; - } - - return 0; + return reinit(self, initvalue); } static PyObject * diff --git a/Modules/_io/fileio.c b/Modules/_io/fileio.c index a2b253b..5c1316e 100644 --- a/Modules/_io/fileio.c +++ b/Modules/_io/fileio.c @@ -53,6 +53,7 @@ typedef struct { signed int seekable : 2; /* -1 means unknown */ unsigned int closefd : 1; char finalizing; + unsigned int blksize; PyObject *weakreflist; PyObject *dict; } fileio; @@ -161,6 +162,7 @@ fileio_new(PyTypeObject *type, PyObject *args, PyObject *kwds) self->writable = 0; self->appending = 0; self->seekable = -1; + self->blksize = 0; self->closefd = 1; self->weakreflist = NULL; } @@ -168,26 +170,6 @@ fileio_new(PyTypeObject *type, PyObject *args, PyObject *kwds) return (PyObject *) self; } -/* On Unix, open will succeed for directories. - In Python, there should be no file objects referring to - directories, so we need a check. */ - -static int -dircheck(fileio* self, PyObject *nameobj) -{ -#if defined(HAVE_FSTAT) && defined(S_ISDIR) && defined(EISDIR) - struct stat buf; - if (self->fd < 0) - return 0; - if (fstat(self->fd, &buf) == 0 && S_ISDIR(buf.st_mode)) { - errno = EISDIR; - PyErr_SetFromErrnoWithFilenameObject(PyExc_IOError, nameobj); - return -1; - } -#endif - return 0; -} - static int check_fd(int fd) { @@ -233,6 +215,9 @@ fileio_init(PyObject *oself, PyObject *args, PyObject *kwds) #elif !defined(MS_WINDOWS) int *atomic_flag_works = NULL; #endif +#ifdef HAVE_FSTAT + struct stat fdfstat; +#endif assert(PyFileIO_Check(oself)); if (self->fd >= 0) { @@ -271,7 +256,7 @@ fileio_init(PyObject *oself, PyObject *args, PyObject *kwds) int rv = _PyUnicode_HasNULChars(nameobj); if (rv) { if (rv != -1) - PyErr_SetString(PyExc_TypeError, "embedded NUL character"); + PyErr_SetString(PyExc_ValueError, "embedded null character"); return -1; } widename = PyUnicode_AsUnicode(nameobj); @@ -421,8 +406,26 @@ fileio_init(PyObject *oself, PyObject *args, PyObject *kwds) goto error; #endif } - if (dircheck(self, nameobj) < 0) + + self->blksize = DEFAULT_BUFFER_SIZE; +#ifdef HAVE_FSTAT + if (fstat(self->fd, &fdfstat) < 0) goto error; +#if defined(S_ISDIR) && defined(EISDIR) + /* On Unix, open will succeed for directories. + In Python, there should be no file objects referring to + directories, so we need a check. */ + if (S_ISDIR(fdfstat.st_mode)) { + errno = EISDIR; + PyErr_SetFromErrnoWithFilenameObject(PyExc_IOError, nameobj); + goto error; + } +#endif /* defined(S_ISDIR) */ +#ifdef HAVE_STRUCT_STAT_ST_BLKSIZE + if (fdfstat.st_blksize > 1) + self->blksize = fdfstat.st_blksize; +#endif /* HAVE_STRUCT_STAT_ST_BLKSIZE */ +#endif /* HAVE_FSTAT */ #if defined(MS_WINDOWS) || defined(__CYGWIN__) /* don't translate newlines (\r\n <=> \n) */ @@ -1051,12 +1054,14 @@ fileio_repr(fileio *self) PyErr_Clear(); else return NULL; - res = PyUnicode_FromFormat("<_io.FileIO fd=%d mode='%s'>", - self->fd, mode_string(self)); + res = PyUnicode_FromFormat( + "<_io.FileIO fd=%d mode='%s' closefd='%d'>", + self->fd, mode_string(self), self->closefd); } else { - res = PyUnicode_FromFormat("<_io.FileIO name=%R mode='%s'>", - nameobj, mode_string(self)); + res = PyUnicode_FromFormat( + "<_io.FileIO name=%R mode='%s' closefd='%d'>", + nameobj, mode_string(self), self->closefd); Py_DECREF(nameobj); } return res; @@ -1218,6 +1223,7 @@ static PyGetSetDef fileio_getsetlist[] = { }; static PyMemberDef fileio_members[] = { + {"_blksize", T_UINT, offsetof(fileio, blksize), 0}, {"_finalizing", T_BOOL, offsetof(fileio, finalizing), 0}, {NULL} }; diff --git a/Modules/_io/textio.c b/Modules/_io/textio.c index 4555bf7..0f53b0f 100644 --- a/Modules/_io/textio.c +++ b/Modules/_io/textio.c @@ -224,8 +224,8 @@ typedef struct { PyObject_HEAD PyObject *decoder; PyObject *errors; - signed int pendingcr: 1; - signed int translate: 1; + unsigned int pendingcr: 1; + unsigned int translate: 1; unsigned int seennl: 3; } nldecoder_object; @@ -546,7 +546,7 @@ incrementalnewlinedecoder_setstate(nldecoder_object *self, PyObject *state) if (!PyArg_Parse(state, "(OK)", &buffer, &flag)) return NULL; - self->pendingcr = (int) flag & 1; + self->pendingcr = (int) (flag & 1); flag >>= 1; if (self->decoder != Py_None) @@ -1440,6 +1440,7 @@ textiowrapper_read_chunk(textio *self, Py_ssize_t size_hint) PyObject *dec_buffer = NULL; PyObject *dec_flags = NULL; PyObject *input_chunk = NULL; + Py_buffer input_chunk_buf; PyObject *decoded_chars, *chunk_size; Py_ssize_t nbytes, nchars; int eof; @@ -1471,6 +1472,15 @@ textiowrapper_read_chunk(textio *self, Py_ssize_t size_hint) Py_DECREF(state); return -1; } + + if (!PyBytes_Check(dec_buffer)) { + PyErr_Format(PyExc_TypeError, + "decoder getstate() should have returned a bytes " + "object, not '%.200s'", + Py_TYPE(dec_buffer)->tp_name); + Py_DECREF(state); + return -1; + } Py_INCREF(dec_buffer); Py_INCREF(dec_flags); Py_DECREF(state); @@ -1483,23 +1493,24 @@ textiowrapper_read_chunk(textio *self, Py_ssize_t size_hint) chunk_size = PyLong_FromSsize_t(Py_MAX(self->chunk_size, size_hint)); if (chunk_size == NULL) goto fail; + input_chunk = PyObject_CallMethodObjArgs(self->buffer, (self->has_read1 ? _PyIO_str_read1: _PyIO_str_read), chunk_size, NULL); Py_DECREF(chunk_size); if (input_chunk == NULL) goto fail; - if (!PyBytes_Check(input_chunk)) { + + if (PyObject_GetBuffer(input_chunk, &input_chunk_buf, 0) != 0) { PyErr_Format(PyExc_TypeError, - "underlying %s() should have returned a bytes object, " + "underlying %s() should have returned a bytes-like object, " "not '%.200s'", (self->has_read1 ? "read1": "read"), Py_TYPE(input_chunk)->tp_name); goto fail; } - nbytes = PyBytes_Size(input_chunk); + nbytes = input_chunk_buf.len; eof = (nbytes == 0); - if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type) { decoded_chars = _PyIncrementalNewlineDecoder_decode( self->decoder, input_chunk, eof); @@ -1508,6 +1519,7 @@ textiowrapper_read_chunk(textio *self, Py_ssize_t size_hint) decoded_chars = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_decode, input_chunk, eof ? Py_True : Py_False, NULL); } + PyBuffer_Release(&input_chunk_buf); if (check_decoded(decoded_chars) < 0) goto fail; @@ -1524,18 +1536,12 @@ textiowrapper_read_chunk(textio *self, Py_ssize_t size_hint) /* At the snapshot point, len(dec_buffer) bytes before the read, the * next input to be decoded is dec_buffer + input_chunk. */ - PyObject *next_input = PyNumber_Add(dec_buffer, input_chunk); - if (next_input == NULL) - goto fail; - if (!PyBytes_Check(next_input)) { - PyErr_Format(PyExc_TypeError, - "decoder getstate() should have returned a bytes " - "object, not '%.200s'", - Py_TYPE(next_input)->tp_name); - Py_DECREF(next_input); + PyObject *next_input = dec_buffer; + PyBytes_Concat(&next_input, input_chunk); + if (next_input == NULL) { + dec_buffer = NULL; /* Reference lost to PyBytes_Concat */ goto fail; } - Py_DECREF(dec_buffer); Py_CLEAR(self->snapshot); self->snapshot = Py_BuildValue("NN", dec_flags, next_input); } @@ -1724,7 +1730,7 @@ _PyIO_find_line_ending( else { /* Non-universal mode. */ Py_ssize_t readnl_len = PyUnicode_GET_LENGTH(readnl); - char *nl = PyUnicode_DATA(readnl); + Py_UCS1 *nl = PyUnicode_1BYTE_DATA(readnl); /* Assume that readnl is an ASCII character. */ assert(PyUnicode_KIND(readnl) == PyUnicode_1BYTE_KIND); if (readnl_len == 1) { |