diff options
Diffstat (limited to 'Modules/_io')
-rw-r--r-- | Modules/_io/_iomodule.c | 2 | ||||
-rw-r--r-- | Modules/_io/_iomodule.h | 20 | ||||
-rw-r--r-- | Modules/_io/bufferedio.c | 116 | ||||
-rw-r--r-- | Modules/_io/bytesio.c | 263 | ||||
-rw-r--r-- | Modules/_io/fileio.c | 97 | ||||
-rw-r--r-- | Modules/_io/iobase.c | 15 | ||||
-rw-r--r-- | Modules/_io/stringio.c | 140 | ||||
-rw-r--r-- | Modules/_io/textio.c | 94 |
8 files changed, 682 insertions, 65 deletions
diff --git a/Modules/_io/_iomodule.c b/Modules/_io/_iomodule.c index a1c451e..44bdac6 100644 --- a/Modules/_io/_iomodule.c +++ b/Modules/_io/_iomodule.c @@ -710,6 +710,8 @@ PyInit__io(void) /* BytesIO */ PyBytesIO_Type.tp_base = &PyBufferedIOBase_Type; ADD_TYPE(&PyBytesIO_Type, "BytesIO"); + if (PyType_Ready(&_PyBytesIOBuffer_Type) < 0) + goto fail; /* StringIO */ PyStringIO_Type.tp_base = &PyTextIOBase_Type; diff --git a/Modules/_io/_iomodule.h b/Modules/_io/_iomodule.h index 16ab6c2..925e4f2 100644 --- a/Modules/_io/_iomodule.h +++ b/Modules/_io/_iomodule.h @@ -73,6 +73,14 @@ PyAPI_DATA(PyObject *) PyExc_BlockingIOError; * Offset type for positioning. */ +/* Printing a variable of type off_t (with e.g., PyString_FromFormat) + correctly and without producing compiler warnings is surprisingly painful. + We identify an integer type whose size matches off_t and then: (1) cast the + off_t to that integer type and (2) use the appropriate conversion + specification. The cast is necessary: gcc complains about formatting a + long with "%lld" even when both long and long long have the same + precision. */ + #if defined(MS_WIN64) || defined(MS_WINDOWS) /* Windows uses long long for offsets */ @@ -81,6 +89,8 @@ typedef PY_LONG_LONG Py_off_t; # define PyLong_FromOff_t PyLong_FromLongLong # define PY_OFF_T_MAX PY_LLONG_MAX # define PY_OFF_T_MIN PY_LLONG_MIN +# define PY_OFF_T_COMPAT PY_LONG_LONG /* type compatible with off_t */ +# define PY_PRIdOFF "lld" /* format to use for that type */ #else @@ -91,16 +101,22 @@ typedef off_t Py_off_t; # define PyLong_FromOff_t PyLong_FromSsize_t # define PY_OFF_T_MAX PY_SSIZE_T_MAX # define PY_OFF_T_MIN PY_SSIZE_T_MIN -#elif (SIZEOF_OFF_T == SIZEOF_LONG_LONG) +# define PY_OFF_T_COMPAT Py_ssize_t +# define PY_PRIdOFF "zd" +#elif (HAVE_LONG_LONG && SIZEOF_OFF_T == SIZEOF_LONG_LONG) # define PyLong_AsOff_t PyLong_AsLongLong # define PyLong_FromOff_t PyLong_FromLongLong # define PY_OFF_T_MAX PY_LLONG_MAX # define PY_OFF_T_MIN PY_LLONG_MIN +# define PY_OFF_T_COMPAT PY_LONG_LONG +# define PY_PRIdOFF "lld" #elif (SIZEOF_OFF_T == SIZEOF_LONG) # define PyLong_AsOff_t PyLong_AsLong # define PyLong_FromOff_t PyLong_FromLong # define PY_OFF_T_MAX LONG_MAX # define PY_OFF_T_MIN LONG_MIN +# define PY_OFF_T_COMPAT long +# define PY_PRIdOFF "ld" #else # error off_t does not match either size_t, long, or long long! #endif @@ -153,3 +169,5 @@ extern PyObject *_PyIO_str_write; extern PyObject *_PyIO_empty_str; extern PyObject *_PyIO_empty_bytes; extern PyObject *_PyIO_zero; + +extern PyTypeObject _PyBytesIOBuffer_Type; diff --git a/Modules/_io/bufferedio.c b/Modules/_io/bufferedio.c index 25b91e3..26ebde0 100644 --- a/Modules/_io/bufferedio.c +++ b/Modules/_io/bufferedio.c @@ -197,6 +197,7 @@ typedef struct { int detached; int readable; int writable; + int deallocating; /* True if this is a vanilla Buffered object (rather than a user derived class) *and* the raw stream is a vanilla FileIO object. */ @@ -260,6 +261,7 @@ typedef struct { /* These macros protect the buffered object against concurrent operations. */ #ifdef WITH_THREAD + static int _enter_buffered_busy(buffered *self) { @@ -359,6 +361,7 @@ _enter_buffered_busy(buffered *self) static void buffered_dealloc(buffered *self) { + self->deallocating = 1; if (self->ok && _PyIOBase_finalize((PyObject *) self) < 0) return; _PyObject_GC_UNTRACK(self); @@ -399,6 +402,23 @@ buffered_clear(buffered *self) return 0; } +/* Because this can call arbitrary code, it shouldn't be called when + the refcount is 0 (that is, not directly from tp_dealloc unless + the refcount has been temporarily re-incremented). */ +static PyObject * +buffered_dealloc_warn(buffered *self, PyObject *source) +{ + if (self->ok && self->raw) { + PyObject *r; + r = PyObject_CallMethod(self->raw, "_dealloc_warn", "O", source); + if (r) + Py_DECREF(r); + else + PyErr_Clear(); + } + Py_RETURN_NONE; +} + /* * _BufferedIOMixin methods * This is not a class, just a collection of methods that will be reused @@ -453,6 +473,14 @@ buffered_close(buffered *self, PyObject *args) Py_INCREF(res); goto end; } + + if (self->deallocating) { + PyObject *r = buffered_dealloc_warn(self, (PyObject *) self); + if (r) + Py_DECREF(r); + else + PyErr_Clear(); + } /* flush() will most probably re-take the lock, so drop it first */ LEAVE_BUFFERED(self) res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL); @@ -541,6 +569,15 @@ buffered_isatty(buffered *self, PyObject *args) return PyObject_CallMethodObjArgs(self->raw, _PyIO_str_isatty, NULL); } +/* Serialization */ + +static PyObject * +buffered_getstate(buffered *self, PyObject *args) +{ + PyErr_Format(PyExc_TypeError, + "cannot serialize '%s' object", Py_TYPE(self)->tp_name); + return NULL; +} /* Forward decls */ static PyObject * @@ -597,7 +634,8 @@ _buffered_raw_tell(buffered *self) if (n < 0) { if (!PyErr_Occurred()) PyErr_Format(PyExc_IOError, - "Raw stream returned invalid position %zd", n); + "Raw stream returned invalid position %" PY_PRIdOFF, + (PY_OFF_T_COMPAT)n); return -1; } self->abs_pos = n; @@ -629,7 +667,8 @@ _buffered_raw_seek(buffered *self, Py_off_t target, int whence) if (n < 0) { if (!PyErr_Occurred()) PyErr_Format(PyExc_IOError, - "Raw stream returned invalid position %zd", n); + "Raw stream returned invalid position %" PY_PRIdOFF, + (PY_OFF_T_COMPAT)n); return -1; } self->abs_pos = n; @@ -675,6 +714,39 @@ _buffered_init(buffered *self) return 0; } +/* Return 1 if an EnvironmentError with errno == EINTR is set (and then + clears the error indicator), 0 otherwise. + Should only be called when PyErr_Occurred() is true. +*/ +static int +_trap_eintr(void) +{ + static PyObject *eintr_int = NULL; + PyObject *typ, *val, *tb; + PyEnvironmentErrorObject *env_err; + + if (eintr_int == NULL) { + eintr_int = PyLong_FromLong(EINTR); + assert(eintr_int != NULL); + } + if (!PyErr_ExceptionMatches(PyExc_EnvironmentError)) + return 0; + PyErr_Fetch(&typ, &val, &tb); + PyErr_NormalizeException(&typ, &val, &tb); + env_err = (PyEnvironmentErrorObject *) val; + assert(env_err != NULL); + if (env_err->myerrno != NULL && + PyObject_RichCompareBool(env_err->myerrno, eintr_int, Py_EQ) > 0) { + Py_DECREF(typ); + Py_DECREF(val); + Py_XDECREF(tb); + return 1; + } + /* This silences any error set by PyObject_RichCompareBool() */ + PyErr_Restore(typ, val, tb); + return 0; +} + /* * Shared methods and wrappers */ @@ -1230,7 +1302,14 @@ _bufferedreader_raw_read(buffered *self, char *start, Py_ssize_t len) memobj = PyMemoryView_FromBuffer(&buf); if (memobj == NULL) return -1; - res = PyObject_CallMethodObjArgs(self->raw, _PyIO_str_readinto, memobj, NULL); + /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals() when EINTR + occurs so we needn't do it ourselves. + We then retry reading, ignoring the signal if no handler has + raised (see issue #10956). + */ + do { + res = PyObject_CallMethodObjArgs(self->raw, _PyIO_str_readinto, memobj, NULL); + } while (res == NULL && _trap_eintr()); Py_DECREF(memobj); if (res == NULL) return -1; @@ -1488,6 +1567,8 @@ static PyMethodDef bufferedreader_methods[] = { {"writable", (PyCFunction)buffered_writable, METH_NOARGS}, {"fileno", (PyCFunction)buffered_fileno, METH_NOARGS}, {"isatty", (PyCFunction)buffered_isatty, METH_NOARGS}, + {"_dealloc_warn", (PyCFunction)buffered_dealloc_warn, METH_O}, + {"__getstate__", (PyCFunction)buffered_getstate, METH_NOARGS}, {"read", (PyCFunction)buffered_read, METH_VARARGS}, {"peek", (PyCFunction)buffered_peek, METH_VARARGS}, @@ -1637,7 +1718,14 @@ _bufferedwriter_raw_write(buffered *self, char *start, Py_ssize_t len) memobj = PyMemoryView_FromBuffer(&buf); if (memobj == NULL) return -1; - res = PyObject_CallMethodObjArgs(self->raw, _PyIO_str_write, memobj, NULL); + /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals() when EINTR + occurs so we needn't do it ourselves. + We then retry writing, ignoring the signal if no handler has + raised (see issue #10956). + */ + do { + res = PyObject_CallMethodObjArgs(self->raw, _PyIO_str_write, memobj, NULL); + } while (res == NULL && _trap_eintr()); Py_DECREF(memobj); if (res == NULL) return -1; @@ -1723,7 +1811,8 @@ bufferedwriter_write(buffered *self, PyObject *args) { PyObject *res = NULL; Py_buffer buf; - Py_ssize_t written, avail, remaining, n; + Py_ssize_t written, avail, remaining; + Py_off_t offset; CHECK_INITIALIZED(self) if (!PyArg_ParseTuple(args, "y*:write", &buf)) { @@ -1801,18 +1890,18 @@ bufferedwriter_write(buffered *self, PyObject *args) the raw stream by itself). Fixes issue #6629. */ - n = RAW_OFFSET(self); - if (n != 0) { - if (_buffered_raw_seek(self, -n, 1) < 0) + offset = RAW_OFFSET(self); + if (offset != 0) { + if (_buffered_raw_seek(self, -offset, 1) < 0) goto error; - self->raw_pos -= n; + self->raw_pos -= offset; } /* Then write buf itself. At this point the buffer has been emptied. */ remaining = buf.len; written = 0; while (remaining > self->buffer_size) { - n = _bufferedwriter_raw_write( + Py_ssize_t n = _bufferedwriter_raw_write( self, (char *) buf.buf + written, buf.len - written); if (n == -1) { Py_ssize_t *w = _buffered_check_blocking_error(); @@ -1872,6 +1961,8 @@ static PyMethodDef bufferedwriter_methods[] = { {"writable", (PyCFunction)buffered_writable, METH_NOARGS}, {"fileno", (PyCFunction)buffered_fileno, METH_NOARGS}, {"isatty", (PyCFunction)buffered_isatty, METH_NOARGS}, + {"_dealloc_warn", (PyCFunction)buffered_dealloc_warn, METH_O}, + {"__getstate__", (PyCFunction)buffered_getstate, METH_NOARGS}, {"write", (PyCFunction)bufferedwriter_write, METH_VARARGS}, {"truncate", (PyCFunction)buffered_truncate, METH_VARARGS}, @@ -2137,6 +2228,8 @@ static PyMethodDef bufferedrwpair_methods[] = { {"close", (PyCFunction)bufferedrwpair_close, METH_NOARGS}, {"isatty", (PyCFunction)bufferedrwpair_isatty, METH_NOARGS}, + {"__getstate__", (PyCFunction)buffered_getstate, METH_NOARGS}, + {NULL, NULL} }; @@ -2256,6 +2349,8 @@ static PyMethodDef bufferedrandom_methods[] = { {"writable", (PyCFunction)buffered_writable, METH_NOARGS}, {"fileno", (PyCFunction)buffered_fileno, METH_NOARGS}, {"isatty", (PyCFunction)buffered_isatty, METH_NOARGS}, + {"_dealloc_warn", (PyCFunction)buffered_dealloc_warn, METH_O}, + {"__getstate__", (PyCFunction)buffered_getstate, METH_NOARGS}, {"flush", (PyCFunction)buffered_flush, METH_NOARGS}, @@ -2325,4 +2420,3 @@ PyTypeObject PyBufferedRandom_Type = { 0, /* tp_alloc */ PyType_GenericNew, /* tp_new */ }; - diff --git a/Modules/_io/bytesio.c b/Modules/_io/bytesio.c index c8fb3eb..b40513f 100644 --- a/Modules/_io/bytesio.c +++ b/Modules/_io/bytesio.c @@ -10,8 +10,15 @@ typedef struct { size_t buf_size; PyObject *dict; PyObject *weakreflist; + Py_ssize_t exports; } bytesio; +typedef struct { + PyObject_HEAD + bytesio *source; +} bytesiobuf; + + #define CHECK_CLOSED(self) \ if ((self)->buf == NULL) { \ PyErr_SetString(PyExc_ValueError, \ @@ -19,6 +26,14 @@ typedef struct { return NULL; \ } +#define CHECK_EXPORTS(self) \ + if ((self)->exports > 0) { \ + PyErr_SetString(PyExc_BufferError, \ + "Existing exports of data: object cannot be re-sized"); \ + return NULL; \ + } + + /* Internal routine to get a line from the buffer of a BytesIO object. Returns the length between the current position to the next newline character. */ @@ -173,6 +188,30 @@ bytesio_flush(bytesio *self) Py_RETURN_NONE; } +PyDoc_STRVAR(getbuffer_doc, +"getbuffer() -> bytes.\n" +"\n" +"Get a read-write view over the contents of the BytesIO object."); + +static PyObject * +bytesio_getbuffer(bytesio *self) +{ + PyTypeObject *type = &_PyBytesIOBuffer_Type; + bytesiobuf *buf; + PyObject *view; + + CHECK_CLOSED(self); + + buf = (bytesiobuf *) type->tp_alloc(type, 0); + if (buf == NULL) + return NULL; + Py_INCREF(self); + buf->source = self; + view = PyMemoryView_FromObject((PyObject *) buf); + Py_DECREF(buf); + return view; +} + PyDoc_STRVAR(getval_doc, "getvalue() -> bytes.\n" "\n" @@ -427,6 +466,7 @@ bytesio_truncate(bytesio *self, PyObject *args) PyObject *arg = Py_None; CHECK_CLOSED(self); + CHECK_EXPORTS(self); if (!PyArg_ParseTuple(args, "|O:truncate", &arg)) return NULL; @@ -548,6 +588,7 @@ bytesio_write(bytesio *self, PyObject *obj) PyObject *result = NULL; CHECK_CLOSED(self); + CHECK_EXPORTS(self); if (PyObject_GetBuffer(obj, &buf, PyBUF_CONTIG_RO) < 0) return NULL; @@ -611,10 +652,130 @@ bytesio_close(bytesio *self) Py_RETURN_NONE; } +/* Pickling support. + + Note that only pickle protocol 2 and onward are supported since we use + extended __reduce__ API of PEP 307 to make BytesIO instances picklable. + + Providing support for protocol < 2 would require the __reduce_ex__ method + which is notably long-winded when defined properly. + + For BytesIO, the implementation would similar to one coded for + object.__reduce_ex__, but slightly less general. To be more specific, we + could call bytesio_getstate directly and avoid checking for the presence of + a fallback __reduce__ method. However, we would still need a __newobj__ + function to use the efficient instance representation of PEP 307. + */ + +static PyObject * +bytesio_getstate(bytesio *self) +{ + PyObject *initvalue = bytesio_getvalue(self); + PyObject *dict; + PyObject *state; + + if (initvalue == NULL) + return NULL; + if (self->dict == NULL) { + Py_INCREF(Py_None); + dict = Py_None; + } + else { + dict = PyDict_Copy(self->dict); + if (dict == NULL) + return NULL; + } + + state = Py_BuildValue("(OnN)", initvalue, self->pos, dict); + Py_DECREF(initvalue); + return state; +} + +static PyObject * +bytesio_setstate(bytesio *self, PyObject *state) +{ + PyObject *result; + PyObject *position_obj; + PyObject *dict; + Py_ssize_t pos; + + assert(state != NULL); + + /* We allow the state tuple to be longer than 3, because we may need + someday to extend the object's state without breaking + backward-compatibility. */ + if (!PyTuple_Check(state) || Py_SIZE(state) < 3) { + PyErr_Format(PyExc_TypeError, + "%.200s.__setstate__ argument should be 3-tuple, got %.200s", + Py_TYPE(self)->tp_name, Py_TYPE(state)->tp_name); + return NULL; + } + CHECK_EXPORTS(self); + /* Reset the object to its default state. This is only needed to handle + the case of repeated calls to __setstate__. */ + self->string_size = 0; + self->pos = 0; + + /* Set the value of the internal buffer. If state[0] does not support the + buffer protocol, bytesio_write will raise the appropriate TypeError. */ + result = bytesio_write(self, PyTuple_GET_ITEM(state, 0)); + if (result == NULL) + return NULL; + Py_DECREF(result); + + /* Set carefully the position value. Alternatively, we could use the seek + method instead of modifying self->pos directly to better protect the + object internal state against errneous (or malicious) inputs. */ + position_obj = PyTuple_GET_ITEM(state, 1); + if (!PyLong_Check(position_obj)) { + PyErr_Format(PyExc_TypeError, + "second item of state must be an integer, not %.200s", + Py_TYPE(position_obj)->tp_name); + return NULL; + } + pos = PyLong_AsSsize_t(position_obj); + if (pos == -1 && PyErr_Occurred()) + return NULL; + if (pos < 0) { + PyErr_SetString(PyExc_ValueError, + "position value cannot be negative"); + return NULL; + } + self->pos = pos; + + /* Set the dictionary of the instance variables. */ + dict = PyTuple_GET_ITEM(state, 2); + if (dict != Py_None) { + if (!PyDict_Check(dict)) { + PyErr_Format(PyExc_TypeError, + "third item of state should be a dict, got a %.200s", + Py_TYPE(dict)->tp_name); + return NULL; + } + if (self->dict) { + /* Alternatively, we could replace the internal dictionary + completely. However, it seems more practical to just update it. */ + if (PyDict_Update(self->dict, dict) < 0) + return NULL; + } + else { + Py_INCREF(dict); + self->dict = dict; + } + } + + Py_RETURN_NONE; +} + static void bytesio_dealloc(bytesio *self) { _PyObject_GC_UNTRACK(self); + if (self->exports > 0) { + PyErr_SetString(PyExc_SystemError, + "deallocated BytesIO object has exported buffers"); + PyErr_Print(); + } if (self->buf != NULL) { PyMem_Free(self->buf); self->buf = NULL; @@ -635,9 +796,9 @@ bytesio_new(PyTypeObject *type, PyObject *args, PyObject *kwds) if (self == NULL) return NULL; - self->string_size = 0; - self->pos = 0; - self->buf_size = 0; + /* tp_alloc initializes all the fields to zero. So we don't have to + initialize them here. */ + self->buf = (char *)PyMem_Malloc(0); if (self->buf == NULL) { Py_DECREF(self); @@ -709,9 +870,12 @@ static struct PyMethodDef bytesio_methods[] = { {"readline", (PyCFunction)bytesio_readline, METH_VARARGS, readline_doc}, {"readlines", (PyCFunction)bytesio_readlines, METH_VARARGS, readlines_doc}, {"read", (PyCFunction)bytesio_read, METH_VARARGS, read_doc}, + {"getbuffer", (PyCFunction)bytesio_getbuffer, METH_NOARGS, getbuffer_doc}, {"getvalue", (PyCFunction)bytesio_getvalue, METH_NOARGS, getval_doc}, {"seek", (PyCFunction)bytesio_seek, METH_VARARGS, seek_doc}, {"truncate", (PyCFunction)bytesio_truncate, METH_VARARGS, truncate_doc}, + {"__getstate__", (PyCFunction)bytesio_getstate, METH_NOARGS, NULL}, + {"__setstate__", (PyCFunction)bytesio_setstate, METH_O, NULL}, {NULL, NULL} /* sentinel */ }; @@ -762,3 +926,96 @@ PyTypeObject PyBytesIO_Type = { 0, /*tp_alloc*/ bytesio_new, /*tp_new*/ }; + + +/* + * Implementation of the small intermediate object used by getbuffer(). + * getbuffer() returns a memoryview over this object, which should make it + * invisible from Python code. + */ + +static int +bytesiobuf_getbuffer(bytesiobuf *obj, Py_buffer *view, int flags) +{ + int ret; + void *ptr; + bytesio *b = (bytesio *) obj->source; + if (view == NULL) { + b->exports++; + return 0; + } + ptr = (void *) obj; + ret = PyBuffer_FillInfo(view, (PyObject*)obj, b->buf, b->string_size, + 0, flags); + if (ret >= 0) { + b->exports++; + } + return ret; +} + +static void +bytesiobuf_releasebuffer(bytesiobuf *obj, Py_buffer *view) +{ + bytesio *b = (bytesio *) obj->source; + b->exports--; +} + +static int +bytesiobuf_traverse(bytesiobuf *self, visitproc visit, void *arg) +{ + Py_VISIT(self->source); + return 0; +} + +static void +bytesiobuf_dealloc(bytesiobuf *self) +{ + Py_CLEAR(self->source); + Py_TYPE(self)->tp_free(self); +} + +static PyBufferProcs bytesiobuf_as_buffer = { + (getbufferproc) bytesiobuf_getbuffer, + (releasebufferproc) bytesiobuf_releasebuffer, +}; + +PyTypeObject _PyBytesIOBuffer_Type = { + PyVarObject_HEAD_INIT(NULL, 0) + "_io._BytesIOBuffer", /*tp_name*/ + sizeof(bytesiobuf), /*tp_basicsize*/ + 0, /*tp_itemsize*/ + (destructor)bytesiobuf_dealloc, /*tp_dealloc*/ + 0, /*tp_print*/ + 0, /*tp_getattr*/ + 0, /*tp_setattr*/ + 0, /*tp_reserved*/ + 0, /*tp_repr*/ + 0, /*tp_as_number*/ + 0, /*tp_as_sequence*/ + 0, /*tp_as_mapping*/ + 0, /*tp_hash*/ + 0, /*tp_call*/ + 0, /*tp_str*/ + 0, /*tp_getattro*/ + 0, /*tp_setattro*/ + &bytesiobuf_as_buffer, /*tp_as_buffer*/ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /*tp_flags*/ + 0, /*tp_doc*/ + (traverseproc)bytesiobuf_traverse, /*tp_traverse*/ + 0, /*tp_clear*/ + 0, /*tp_richcompare*/ + 0, /*tp_weaklistoffset*/ + 0, /*tp_iter*/ + 0, /*tp_iternext*/ + 0, /*tp_methods*/ + 0, /*tp_members*/ + 0, /*tp_getset*/ + 0, /*tp_base*/ + 0, /*tp_dict*/ + 0, /*tp_descr_get*/ + 0, /*tp_descr_set*/ + 0, /*tp_dictoffset*/ + 0, /*tp_init*/ + 0, /*tp_alloc*/ + 0, /*tp_new*/ +}; diff --git a/Modules/_io/fileio.c b/Modules/_io/fileio.c index 473919b..1aa5ee9 100644 --- a/Modules/_io/fileio.c +++ b/Modules/_io/fileio.c @@ -2,9 +2,16 @@ #define PY_SSIZE_T_CLEAN #include "Python.h" +#include "structmember.h" +#ifdef HAVE_SYS_TYPES_H #include <sys/types.h> +#endif +#ifdef HAVE_SYS_STAT_H #include <sys/stat.h> +#endif +#ifdef HAVE_FCNTL_H #include <fcntl.h> +#endif #include <stddef.h> /* For offsetof */ #include "_iomodule.h" @@ -49,6 +56,7 @@ typedef struct { unsigned int writable : 1; signed int seekable : 2; /* -1 means unknown */ unsigned int closefd : 1; + unsigned int deallocating: 1; PyObject *weakreflist; PyObject *dict; } fileio; @@ -63,6 +71,26 @@ _PyFileIO_closed(PyObject *self) return ((fileio *)self)->fd < 0; } +/* Because this can call arbitrary code, it shouldn't be called when + the refcount is 0 (that is, not directly from tp_dealloc unless + the refcount has been temporarily re-incremented). */ +static PyObject * +fileio_dealloc_warn(fileio *self, PyObject *source) +{ + if (self->fd >= 0 && self->closefd) { + PyObject *exc, *val, *tb; + PyErr_Fetch(&exc, &val, &tb); + if (PyErr_WarnFormat(PyExc_ResourceWarning, 1, + "unclosed file %R", source)) { + /* Spurious errors can appear at shutdown */ + if (PyErr_ExceptionMatches(PyExc_Warning)) + PyErr_WriteUnraisable((PyObject *) self); + } + PyErr_Restore(exc, val, tb); + } + Py_RETURN_NONE; +} + static PyObject * portable_lseek(int fd, PyObject *posobj, int whence); @@ -104,6 +132,13 @@ fileio_close(fileio *self) self->fd = -1; Py_RETURN_NONE; } + if (self->deallocating) { + PyObject *r = fileio_dealloc_warn(self, (PyObject *) self); + if (r) + Py_DECREF(r); + else + PyErr_Clear(); + } errno = internal_close(self); if (errno < 0) return NULL; @@ -224,11 +259,8 @@ fileio_init(PyObject *oself, PyObject *args, PyObject *kwds) } #ifdef MS_WINDOWS - if (GetVersion() < 0x80000000) { - /* On NT, so wide API available */ - if (PyUnicode_Check(nameobj)) - widename = PyUnicode_AS_UNICODE(nameobj); - } + if (PyUnicode_Check(nameobj)) + widename = PyUnicode_AS_UNICODE(nameobj); if (widename == NULL) #endif if (fd < 0) @@ -244,8 +276,7 @@ fileio_init(PyObject *oself, PyObject *args, PyObject *kwds) if (u == NULL) return -1; - stringobj = PyUnicode_AsEncodedString( - u, Py_FileSystemDefaultEncoding, "surrogateescape"); + stringobj = PyUnicode_EncodeFSDefault(u); Py_DECREF(u); if (stringobj == NULL) return -1; @@ -353,10 +384,15 @@ fileio_init(PyObject *oself, PyObject *args, PyObject *kwds) PyErr_SetFromErrnoWithFilename(PyExc_IOError, name); goto error; } - if(dircheck(self, name) < 0) + if (dircheck(self, name) < 0) goto error; } +#if defined(MS_WINDOWS) || defined(__CYGWIN__) + /* don't translate newlines (\r\n <=> \n) */ + _setmode(self->fd, O_BINARY); +#endif + if (PyObject_SetAttrString((PyObject *)self, "name", nameobj) < 0) goto error; @@ -379,6 +415,8 @@ fileio_init(PyObject *oself, PyObject *args, PyObject *kwds) error: ret = -1; + if (self->fd >= 0) + internal_close(self); done: Py_CLEAR(stringobj); @@ -402,6 +440,7 @@ fileio_clear(fileio *self) static void fileio_dealloc(fileio *self) { + self->deallocating = 1; if (_PyIOBase_finalize((PyObject *) self) < 0) return; _PyObject_GC_UNTRACK(self); @@ -421,7 +460,8 @@ err_closed(void) static PyObject * err_mode(char *action) { - PyErr_Format(PyExc_ValueError, "File not open for %s", action); + PyErr_Format(IO_STATE->unsupported_operation, + "File not open for %s", action); return NULL; } @@ -471,7 +511,7 @@ static PyObject * fileio_readinto(fileio *self, PyObject *args) { Py_buffer pbuf; - Py_ssize_t n; + Py_ssize_t n, len; if (self->fd < 0) return err_closed(); @@ -482,9 +522,16 @@ fileio_readinto(fileio *self, PyObject *args) return NULL; if (_PyVerify_fd(self->fd)) { + len = pbuf.len; Py_BEGIN_ALLOW_THREADS errno = 0; - n = read(self->fd, pbuf.buf, pbuf.len); +#if defined(MS_WIN64) || defined(MS_WINDOWS) + if (len > INT_MAX) + len = INT_MAX; + n = read(self->fd, pbuf.buf, (int)len); +#else + n = read(self->fd, pbuf.buf, len); +#endif Py_END_ALLOW_THREADS } else n = -1; @@ -650,7 +697,7 @@ static PyObject * fileio_write(fileio *self, PyObject *args) { Py_buffer pbuf; - Py_ssize_t n; + Py_ssize_t n, len; if (self->fd < 0) return err_closed(); @@ -663,7 +710,21 @@ fileio_write(fileio *self, PyObject *args) if (_PyVerify_fd(self->fd)) { Py_BEGIN_ALLOW_THREADS errno = 0; - n = write(self->fd, pbuf.buf, pbuf.len); + len = pbuf.len; +#if defined(MS_WIN64) || defined(MS_WINDOWS) + if (len > 32767 && isatty(self->fd)) { + /* Issue #11395: the Windows console returns an error (12: not + enough space error) on writing into stdout if stdout mode is + binary and the length is greater than 66,000 bytes (or less, + depending on heap usage). */ + len = 32767; + } + else if (len > INT_MAX) + len = INT_MAX; + n = write(self->fd, pbuf.buf, (int)len); +#else + n = write(self->fd, pbuf.buf, len); +#endif Py_END_ALLOW_THREADS } else n = -1; @@ -917,6 +978,14 @@ fileio_isatty(fileio *self) return PyBool_FromLong(res); } +static PyObject * +fileio_getstate(fileio *self) +{ + PyErr_Format(PyExc_TypeError, + "cannot serialize '%s' object", Py_TYPE(self)->tp_name); + return NULL; +} + PyDoc_STRVAR(fileio_doc, "file(name: str[, mode: str]) -> file IO object\n" @@ -1010,6 +1079,8 @@ static PyMethodDef fileio_methods[] = { {"writable", (PyCFunction)fileio_writable, METH_NOARGS, writable_doc}, {"fileno", (PyCFunction)fileio_fileno, METH_NOARGS, fileno_doc}, {"isatty", (PyCFunction)fileio_isatty, METH_NOARGS, isatty_doc}, + {"_dealloc_warn", (PyCFunction)fileio_dealloc_warn, METH_O, NULL}, + {"__getstate__", (PyCFunction)fileio_getstate, METH_NOARGS, NULL}, {NULL, NULL} /* sentinel */ }; diff --git a/Modules/_io/iobase.c b/Modules/_io/iobase.c index 84b560a..ec7a242 100644 --- a/Modules/_io/iobase.c +++ b/Modules/_io/iobase.c @@ -35,7 +35,8 @@ PyDoc_STRVAR(iobase_doc, "Even though IOBase does not declare read, readinto, or write because\n" "their signatures will vary, implementations and clients should\n" "consider those methods part of the interface. Also, implementations\n" - "may raise a IOError when operations they do not support are called.\n" + "may raise UnsupportedOperation when operations they do not support are\n" + "called.\n" "\n" "The basic type used for binary data read from or written to a file is\n" "bytes. bytearrays are accepted too, and in some cases (such as\n" @@ -300,7 +301,7 @@ iobase_dealloc(iobase *self) PyDoc_STRVAR(iobase_seekable_doc, "Return whether object supports random access.\n" "\n" - "If False, seek(), tell() and truncate() will raise IOError.\n" + "If False, seek(), tell() and truncate() will raise UnsupportedOperation.\n" "This method may need to do a test seek()."); static PyObject * @@ -317,7 +318,7 @@ _PyIOBase_check_seekable(PyObject *self, PyObject *args) return NULL; if (res != Py_True) { Py_CLEAR(res); - PyErr_SetString(PyExc_IOError, "File or stream is not seekable."); + iobase_unsupported("File or stream is not seekable."); return NULL; } if (args == Py_True) { @@ -329,7 +330,7 @@ _PyIOBase_check_seekable(PyObject *self, PyObject *args) PyDoc_STRVAR(iobase_readable_doc, "Return whether object was opened for reading.\n" "\n" - "If False, read() will raise IOError."); + "If False, read() will raise UnsupportedOperation."); static PyObject * iobase_readable(PyObject *self, PyObject *args) @@ -346,7 +347,7 @@ _PyIOBase_check_readable(PyObject *self, PyObject *args) return NULL; if (res != Py_True) { Py_CLEAR(res); - PyErr_SetString(PyExc_IOError, "File or stream is not readable."); + iobase_unsupported("File or stream is not readable."); return NULL; } if (args == Py_True) { @@ -358,7 +359,7 @@ _PyIOBase_check_readable(PyObject *self, PyObject *args) PyDoc_STRVAR(iobase_writable_doc, "Return whether object was opened for writing.\n" "\n" - "If False, read() will raise IOError."); + "If False, write() will raise UnsupportedOperation."); static PyObject * iobase_writable(PyObject *self, PyObject *args) @@ -375,7 +376,7 @@ _PyIOBase_check_writable(PyObject *self, PyObject *args) return NULL; if (res != Py_True) { Py_CLEAR(res); - PyErr_SetString(PyExc_IOError, "File or stream is not writable."); + iobase_unsupported("File or stream is not writable."); return NULL; } if (args == Py_True) { diff --git a/Modules/_io/stringio.c b/Modules/_io/stringio.c index 8d83f73..c9d14b1 100644 --- a/Modules/_io/stringio.c +++ b/Modules/_io/stringio.c @@ -532,9 +532,9 @@ stringio_new(PyTypeObject *type, PyObject *args, PyObject *kwds) if (self == NULL) return NULL; - self->string_size = 0; - self->pos = 0; - self->buf_size = 0; + /* tp_alloc initializes all the fields to zero. So we don't have to + initialize them here. */ + self->buf = (Py_UNICODE *)PyMem_Malloc(0); if (self->buf == NULL) { Py_DECREF(self); @@ -671,6 +671,135 @@ stringio_writable(stringio *self, PyObject *args) Py_RETURN_TRUE; } +/* Pickling support. + + The implementation of __getstate__ is similar to the one for BytesIO, + except that we also save the newline parameter. For __setstate__ and unlike + BytesIO, we call __init__ to restore the object's state. Doing so allows us + to avoid decoding the complex newline state while keeping the object + representation compact. + + See comment in bytesio.c regarding why only pickle protocols and onward are + supported. +*/ + +static PyObject * +stringio_getstate(stringio *self) +{ + PyObject *initvalue = stringio_getvalue(self); + PyObject *dict; + PyObject *state; + + if (initvalue == NULL) + return NULL; + if (self->dict == NULL) { + Py_INCREF(Py_None); + dict = Py_None; + } + else { + dict = PyDict_Copy(self->dict); + if (dict == NULL) + return NULL; + } + + state = Py_BuildValue("(OOnN)", initvalue, + self->readnl ? self->readnl : Py_None, + self->pos, dict); + Py_DECREF(initvalue); + return state; +} + +static PyObject * +stringio_setstate(stringio *self, PyObject *state) +{ + PyObject *initarg; + PyObject *position_obj; + PyObject *dict; + Py_ssize_t pos; + + assert(state != NULL); + CHECK_CLOSED(self); + + /* We allow the state tuple to be longer than 4, because we may need + someday to extend the object's state without breaking + backward-compatibility. */ + if (!PyTuple_Check(state) || Py_SIZE(state) < 4) { + PyErr_Format(PyExc_TypeError, + "%.200s.__setstate__ argument should be 4-tuple, got %.200s", + Py_TYPE(self)->tp_name, Py_TYPE(state)->tp_name); + return NULL; + } + + /* Initialize the object's state. */ + initarg = PyTuple_GetSlice(state, 0, 2); + if (initarg == NULL) + return NULL; + if (stringio_init(self, initarg, NULL) < 0) { + Py_DECREF(initarg); + return NULL; + } + Py_DECREF(initarg); + + /* Restore the buffer state. Even if __init__ did initialize the buffer, + we have to initialize it again since __init__ may translates the + newlines in the inital_value string. We clearly do not want that + because the string value in the state tuple has already been translated + once by __init__. So we do not take any chance and replace object's + buffer completely. */ + { + Py_UNICODE *buf = PyUnicode_AS_UNICODE(PyTuple_GET_ITEM(state, 0)); + Py_ssize_t bufsize = PyUnicode_GET_SIZE(PyTuple_GET_ITEM(state, 0)); + if (resize_buffer(self, bufsize) < 0) + return NULL; + memcpy(self->buf, buf, bufsize * sizeof(Py_UNICODE)); + self->string_size = bufsize; + } + + /* Set carefully the position value. Alternatively, we could use the seek + method instead of modifying self->pos directly to better protect the + object internal state against errneous (or malicious) inputs. */ + position_obj = PyTuple_GET_ITEM(state, 2); + if (!PyLong_Check(position_obj)) { + PyErr_Format(PyExc_TypeError, + "third item of state must be an integer, got %.200s", + Py_TYPE(position_obj)->tp_name); + return NULL; + } + pos = PyLong_AsSsize_t(position_obj); + if (pos == -1 && PyErr_Occurred()) + return NULL; + if (pos < 0) { + PyErr_SetString(PyExc_ValueError, + "position value cannot be negative"); + return NULL; + } + self->pos = pos; + + /* Set the dictionary of the instance variables. */ + dict = PyTuple_GET_ITEM(state, 3); + if (dict != Py_None) { + if (!PyDict_Check(dict)) { + PyErr_Format(PyExc_TypeError, + "fourth item of state should be a dict, got a %.200s", + Py_TYPE(dict)->tp_name); + return NULL; + } + if (self->dict) { + /* Alternatively, we could replace the internal dictionary + completely. However, it seems more practical to just update it. */ + if (PyDict_Update(self->dict, dict) < 0) + return NULL; + } + else { + Py_INCREF(dict); + self->dict = dict; + } + } + + Py_RETURN_NONE; +} + + static PyObject * stringio_closed(stringio *self, void *context) { @@ -705,10 +834,13 @@ static struct PyMethodDef stringio_methods[] = { {"truncate", (PyCFunction)stringio_truncate, METH_VARARGS, stringio_truncate_doc}, {"seek", (PyCFunction)stringio_seek, METH_VARARGS, stringio_seek_doc}, {"write", (PyCFunction)stringio_write, METH_O, stringio_write_doc}, - + {"seekable", (PyCFunction)stringio_seekable, METH_NOARGS}, {"readable", (PyCFunction)stringio_readable, METH_NOARGS}, {"writable", (PyCFunction)stringio_writable, METH_NOARGS}, + + {"__getstate__", (PyCFunction)stringio_getstate, METH_NOARGS}, + {"__setstate__", (PyCFunction)stringio_setstate, METH_O}, {NULL, NULL} /* sentinel */ }; diff --git a/Modules/_io/textio.c b/Modules/_io/textio.c index b039c2f..73d83a1 100644 --- a/Modules/_io/textio.c +++ b/Modules/_io/textio.c @@ -658,6 +658,7 @@ typedef struct char writetranslate; char seekable; char telling; + char deallocating; /* Specialized encoding func (see below) */ encodefunc_t encodefunc; /* Whether or not it's the start of the stream */ @@ -905,8 +906,11 @@ textiowrapper_init(textio *self, PyObject *args, PyObject *kwds) Py_CLEAR(self->encoding); } } - if (self->encoding != NULL) + if (self->encoding != NULL) { encoding = _PyUnicode_AsString(self->encoding); + if (encoding == NULL) + goto error; + } else if (encoding != NULL) { self->encoding = PyUnicode_FromString(encoding); if (self->encoding == NULL) @@ -935,6 +939,8 @@ textiowrapper_init(textio *self, PyObject *args, PyObject *kwds) self->writetranslate = (newline == NULL || newline[0] != '\0'); if (!self->readuniversal && self->readnl) { self->writenl = _PyUnicode_AsString(self->readnl); + if (self->writenl == NULL) + goto error; if (!strcmp(self->writenl, "\n")) self->writenl = NULL; } @@ -1089,6 +1095,7 @@ _textiowrapper_clear(textio *self) static void textiowrapper_dealloc(textio *self) { + self->deallocating = 1; if (_textiowrapper_clear(self) < 0) return; _PyObject_GC_UNTRACK(self); @@ -1254,10 +1261,8 @@ textiowrapper_write(textio *self, PyObject *args) CHECK_CLOSED(self); - if (self->encoder == NULL) { - PyErr_SetString(PyExc_IOError, "not writable"); - return NULL; - } + if (self->encoder == NULL) + return _unsupported("not writable"); Py_INCREF(text); @@ -1394,7 +1399,7 @@ textiowrapper_read_chunk(textio *self) */ if (self->decoder == NULL) { - PyErr_SetString(PyExc_IOError, "not readable"); + _unsupported("not readable"); return -1; } @@ -1484,10 +1489,8 @@ textiowrapper_read(textio *self, PyObject *args) CHECK_CLOSED(self); - if (self->decoder == NULL) { - PyErr_SetString(PyExc_IOError, "not readable"); - return NULL; - } + if (self->decoder == NULL) + return _unsupported("not readable"); if (_textiowrapper_writeflush(self) < 0) return NULL; @@ -1978,8 +1981,7 @@ textiowrapper_seek(textio *self, PyObject *args) Py_INCREF(cookieObj); if (!self->seekable) { - PyErr_SetString(PyExc_IOError, - "underlying stream is not seekable"); + _unsupported("underlying stream is not seekable"); goto fail; } @@ -1990,8 +1992,7 @@ textiowrapper_seek(textio *self, PyObject *args) goto fail; if (cmp == 0) { - PyErr_SetString(PyExc_IOError, - "can't do nonzero cur-relative seeks"); + _unsupported("can't do nonzero cur-relative seeks"); goto fail; } @@ -2011,8 +2012,7 @@ textiowrapper_seek(textio *self, PyObject *args) goto fail; if (cmp == 0) { - PyErr_SetString(PyExc_IOError, - "can't do nonzero end-relative seeks"); + _unsupported("can't do nonzero end-relative seeks"); goto fail; } @@ -2146,8 +2146,7 @@ textiowrapper_tell(textio *self, PyObject *args) CHECK_CLOSED(self); if (!self->seekable) { - PyErr_SetString(PyExc_IOError, - "underlying stream is not seekable"); + _unsupported("underlying stream is not seekable"); goto fail; } if (!self->telling) { @@ -2324,25 +2323,52 @@ textiowrapper_truncate(textio *self, PyObject *args) static PyObject * textiowrapper_repr(textio *self) { - PyObject *nameobj, *res; + PyObject *nameobj, *modeobj, *res, *s; CHECK_INITIALIZED(self); + res = PyUnicode_FromString("<_io.TextIOWrapper"); + if (res == NULL) + return NULL; nameobj = PyObject_GetAttrString((PyObject *) self, "name"); if (nameobj == NULL) { if (PyErr_ExceptionMatches(PyExc_AttributeError)) PyErr_Clear(); else - return NULL; - res = PyUnicode_FromFormat("<_io.TextIOWrapper encoding=%R>", - self->encoding); + goto error; } else { - res = PyUnicode_FromFormat("<_io.TextIOWrapper name=%R encoding=%R>", - nameobj, self->encoding); + s = PyUnicode_FromFormat(" name=%R", nameobj); Py_DECREF(nameobj); + if (s == NULL) + goto error; + PyUnicode_AppendAndDel(&res, s); + if (res == NULL) + return NULL; } - return res; + modeobj = PyObject_GetAttrString((PyObject *) self, "mode"); + if (modeobj == NULL) { + if (PyErr_ExceptionMatches(PyExc_AttributeError)) + PyErr_Clear(); + else + goto error; + } + else { + s = PyUnicode_FromFormat(" mode=%R", modeobj); + Py_DECREF(modeobj); + if (s == NULL) + goto error; + PyUnicode_AppendAndDel(&res, s); + if (res == NULL) + return NULL; + } + s = PyUnicode_FromFormat("%U encoding=%R>", + res, self->encoding); + Py_DECREF(res); + return s; +error: + Py_XDECREF(res); + return NULL; } @@ -2384,6 +2410,14 @@ textiowrapper_isatty(textio *self, PyObject *args) } static PyObject * +textiowrapper_getstate(textio *self, PyObject *args) +{ + PyErr_Format(PyExc_TypeError, + "cannot serialize '%s' object", Py_TYPE(self)->tp_name); + return NULL; +} + +static PyObject * textiowrapper_flush(textio *self, PyObject *args) { CHECK_INITIALIZED(self); @@ -2408,11 +2442,18 @@ textiowrapper_close(textio *self, PyObject *args) Py_DECREF(res); if (r < 0) return NULL; - + if (r > 0) { Py_RETURN_NONE; /* stream already closed */ } else { + if (self->deallocating) { + res = PyObject_CallMethod(self->buffer, "_dealloc_warn", "O", self); + if (res) + Py_DECREF(res); + else + PyErr_Clear(); + } res = PyObject_CallMethod((PyObject *)self, "flush", NULL); if (res == NULL) { return NULL; @@ -2540,6 +2581,7 @@ static PyMethodDef textiowrapper_methods[] = { {"readable", (PyCFunction)textiowrapper_readable, METH_NOARGS}, {"writable", (PyCFunction)textiowrapper_writable, METH_NOARGS}, {"isatty", (PyCFunction)textiowrapper_isatty, METH_NOARGS}, + {"__getstate__", (PyCFunction)textiowrapper_getstate, METH_NOARGS}, {"seek", (PyCFunction)textiowrapper_seek, METH_VARARGS}, {"tell", (PyCFunction)textiowrapper_tell, METH_NOARGS}, |