summaryrefslogtreecommitdiffstats
path: root/Modules/_io/fileio.c
diff options
context:
space:
mode:
Diffstat (limited to 'Modules/_io/fileio.c')
-rw-r--r--Modules/_io/fileio.c83
1 files changed, 57 insertions, 26 deletions
diff --git a/Modules/_io/fileio.c b/Modules/_io/fileio.c
index 5d9d87d..865b0e3 100644
--- a/Modules/_io/fileio.c
+++ b/Modules/_io/fileio.c
@@ -74,8 +74,13 @@ typedef struct {
signed int seekable : 2; /* -1 means unknown */
unsigned int closefd : 1;
char finalizing;
- unsigned int blksize;
- Py_off_t estimated_size;
+ /* Stat result which was grabbed at file open, useful for optimizing common
+ File I/O patterns to be more efficient. This is only guidance / an
+ estimate, as it is subject to Time-Of-Check to Time-Of-Use (TOCTOU)
+ issues / bugs. Both the underlying file descriptor and file may be
+ modified outside of the fileio object / Python (ex. gh-90102, GH-121941,
+ gh-109523). */
+ struct _Py_stat_struct *stat_atopen;
PyObject *weakreflist;
PyObject *dict;
} fileio;
@@ -199,8 +204,7 @@ fileio_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
self->writable = 0;
self->appending = 0;
self->seekable = -1;
- self->blksize = 0;
- self->estimated_size = -1;
+ self->stat_atopen = NULL;
self->closefd = 1;
self->weakreflist = NULL;
}
@@ -256,7 +260,6 @@ _io_FileIO___init___impl(fileio *self, PyObject *nameobj, const char *mode,
#elif !defined(MS_WINDOWS)
int *atomic_flag_works = NULL;
#endif
- struct _Py_stat_struct fdfstat;
int fstat_result;
int async_err = 0;
@@ -454,9 +457,13 @@ _io_FileIO___init___impl(fileio *self, PyObject *nameobj, const char *mode,
#endif
}
- self->blksize = DEFAULT_BUFFER_SIZE;
+ self->stat_atopen = PyMem_New(struct _Py_stat_struct, 1);
+ if (self->stat_atopen == NULL) {
+ PyErr_NoMemory();
+ goto error;
+ }
Py_BEGIN_ALLOW_THREADS
- fstat_result = _Py_fstat_noraise(self->fd, &fdfstat);
+ fstat_result = _Py_fstat_noraise(self->fd, self->stat_atopen);
Py_END_ALLOW_THREADS
if (fstat_result < 0) {
/* Tolerate fstat() errors other than EBADF. See Issue #25717, where
@@ -471,25 +478,21 @@ _io_FileIO___init___impl(fileio *self, PyObject *nameobj, const char *mode,
#endif
goto error;
}
+
+ PyMem_Free(self->stat_atopen);
+ self->stat_atopen = NULL;
}
else {
#if defined(S_ISDIR) && defined(EISDIR)
/* On Unix, open will succeed for directories.
In Python, there should be no file objects referring to
directories, so we need a check. */
- if (S_ISDIR(fdfstat.st_mode)) {
+ if (S_ISDIR(self->stat_atopen->st_mode)) {
errno = EISDIR;
PyErr_SetFromErrnoWithFilenameObject(PyExc_OSError, nameobj);
goto error;
}
#endif /* defined(S_ISDIR) */
-#ifdef HAVE_STRUCT_STAT_ST_BLKSIZE
- if (fdfstat.st_blksize > 1)
- self->blksize = fdfstat.st_blksize;
-#endif /* HAVE_STRUCT_STAT_ST_BLKSIZE */
- if (fdfstat.st_size < PY_SSIZE_T_MAX) {
- self->estimated_size = (Py_off_t)fdfstat.st_size;
- }
}
#if defined(MS_WINDOWS) || defined(__CYGWIN__)
@@ -521,6 +524,10 @@ _io_FileIO___init___impl(fileio *self, PyObject *nameobj, const char *mode,
internal_close(self);
_PyErr_ChainExceptions1(exc);
}
+ if (self->stat_atopen != NULL) {
+ PyMem_Free(self->stat_atopen);
+ self->stat_atopen = NULL;
+ }
done:
#ifdef MS_WINDOWS
@@ -553,6 +560,10 @@ fileio_dealloc(fileio *self)
if (_PyIOBase_finalize((PyObject *) self) < 0)
return;
_PyObject_GC_UNTRACK(self);
+ if (self->stat_atopen != NULL) {
+ PyMem_Free(self->stat_atopen);
+ self->stat_atopen = NULL;
+ }
if (self->weakreflist != NULL)
PyObject_ClearWeakRefs((PyObject *) self);
(void)fileio_clear(self);
@@ -725,20 +736,27 @@ _io_FileIO_readall_impl(fileio *self)
return err_closed();
}
- end = self->estimated_size;
+ if (self->stat_atopen != NULL && self->stat_atopen->st_size < _PY_READ_MAX) {
+ end = (Py_off_t)self->stat_atopen->st_size;
+ }
+ else {
+ end = -1;
+ }
if (end <= 0) {
/* Use a default size and resize as needed. */
bufsize = SMALLCHUNK;
}
else {
- /* This is probably a real file, so we try to allocate a
- buffer one byte larger than the rest of the file. If the
- calculation is right then we should get EOF without having
- to enlarge the buffer. */
+ /* This is probably a real file. */
if (end > _PY_READ_MAX - 1) {
bufsize = _PY_READ_MAX;
}
else {
+ /* In order to detect end of file, need a read() of at
+ least 1 byte which returns size 0. Oversize the buffer
+ by 1 byte so the I/O can be completed with two read()
+ calls (one for all data, one for EOF) without needing
+ to resize the buffer. */
bufsize = (size_t)end + 1;
}
@@ -1094,11 +1112,13 @@ _io_FileIO_truncate_impl(fileio *self, PyTypeObject *cls, PyObject *posobj)
return NULL;
}
- /* Sometimes a large file is truncated. While estimated_size is used as a
- estimate, that it is much larger than the actual size can result in a
- significant over allocation and sometimes a MemoryError / running out of
- memory. */
- self->estimated_size = pos;
+ /* Since the file was truncated, its size at open is no longer accurate
+ as an estimate. Clear out the stat result, and rely on dynamic resize
+ code if a readall is requested. */
+ if (self->stat_atopen != NULL) {
+ PyMem_Free(self->stat_atopen);
+ self->stat_atopen = NULL;
+ }
return posobj;
}
@@ -1229,16 +1249,27 @@ get_mode(fileio *self, void *closure)
return PyUnicode_FromString(mode_string(self));
}
+static PyObject *
+get_blksize(fileio *self, void *closure)
+{
+#ifdef HAVE_STRUCT_STAT_ST_BLKSIZE
+ if (self->stat_atopen != NULL && self->stat_atopen->st_blksize > 1) {
+ return PyLong_FromLong(self->stat_atopen->st_blksize);
+ }
+#endif /* HAVE_STRUCT_STAT_ST_BLKSIZE */
+ return PyLong_FromLong(DEFAULT_BUFFER_SIZE);
+}
+
static PyGetSetDef fileio_getsetlist[] = {
{"closed", (getter)get_closed, NULL, "True if the file is closed"},
{"closefd", (getter)get_closefd, NULL,
"True if the file descriptor will be closed by close()."},
{"mode", (getter)get_mode, NULL, "String giving the file mode"},
+ {"_blksize", (getter)get_blksize, NULL, "Stat st_blksize if available"},
{NULL},
};
static PyMemberDef fileio_members[] = {
- {"_blksize", Py_T_UINT, offsetof(fileio, blksize), 0},
{"_finalizing", Py_T_BOOL, offsetof(fileio, finalizing), 0},
{"__weaklistoffset__", Py_T_PYSSIZET, offsetof(fileio, weakreflist), Py_READONLY},
{"__dictoffset__", Py_T_PYSSIZET, offsetof(fileio, dict), Py_READONLY},