summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Lib/_pyio.py44
-rw-r--r--Modules/_io/fileio.c83
2 files changed, 81 insertions, 46 deletions
diff --git a/Lib/_pyio.py b/Lib/_pyio.py
index 75b5ad1..18849b3 100644
--- a/Lib/_pyio.py
+++ b/Lib/_pyio.py
@@ -242,14 +242,7 @@ def open(file, mode="r", buffering=-1, encoding=None, errors=None,
buffering = -1
line_buffering = True
if buffering < 0:
- buffering = DEFAULT_BUFFER_SIZE
- try:
- bs = os.fstat(raw.fileno()).st_blksize
- except (OSError, AttributeError):
- pass
- else:
- if bs > 1:
- buffering = bs
+ buffering = raw._blksize
if buffering < 0:
raise ValueError("invalid buffering size")
if buffering == 0:
@@ -1565,19 +1558,15 @@ class FileIO(RawIOBase):
os.set_inheritable(fd, False)
self._closefd = closefd
- fdfstat = os.fstat(fd)
+ self._stat_atopen = os.fstat(fd)
try:
- if stat.S_ISDIR(fdfstat.st_mode):
+ if stat.S_ISDIR(self._stat_atopen.st_mode):
raise IsADirectoryError(errno.EISDIR,
os.strerror(errno.EISDIR), file)
except AttributeError:
# Ignore the AttributeError if stat.S_ISDIR or errno.EISDIR
# don't exist.
pass
- self._blksize = getattr(fdfstat, 'st_blksize', 0)
- if self._blksize <= 1:
- self._blksize = DEFAULT_BUFFER_SIZE
- self._estimated_size = fdfstat.st_size
if _setmode:
# don't translate newlines (\r\n <=> \n)
@@ -1623,6 +1612,17 @@ class FileIO(RawIOBase):
return ('<%s name=%r mode=%r closefd=%r>' %
(class_name, name, self.mode, self._closefd))
+ @property
+ def _blksize(self):
+ if self._stat_atopen is None:
+ return DEFAULT_BUFFER_SIZE
+
+ blksize = getattr(self._stat_atopen, "st_blksize", 0)
+ # WASI sets blsize to 0
+ if not blksize:
+ return DEFAULT_BUFFER_SIZE
+ return blksize
+
def _checkReadable(self):
if not self._readable:
raise UnsupportedOperation('File not open for reading')
@@ -1655,16 +1655,20 @@ class FileIO(RawIOBase):
"""
self._checkClosed()
self._checkReadable()
- if self._estimated_size <= 0:
+ if self._stat_atopen is None or self._stat_atopen.st_size <= 0:
bufsize = DEFAULT_BUFFER_SIZE
else:
- bufsize = self._estimated_size + 1
+ # In order to detect end of file, need a read() of at least 1
+ # byte which returns size 0. Oversize the buffer by 1 byte so the
+ # I/O can be completed with two read() calls (one for all data, one
+ # for EOF) without needing to resize the buffer.
+ bufsize = self._stat_atopen.st_size + 1
- if self._estimated_size > 65536:
+ if self._stat_atopen.st_size > 65536:
try:
pos = os.lseek(self._fd, 0, SEEK_CUR)
- if self._estimated_size >= pos:
- bufsize = self._estimated_size - pos + 1
+ if self._stat_atopen.st_size >= pos:
+ bufsize = self._stat_atopen.st_size - pos + 1
except OSError:
pass
@@ -1742,7 +1746,7 @@ class FileIO(RawIOBase):
if size is None:
size = self.tell()
os.ftruncate(self._fd, size)
- self._estimated_size = size
+ self._stat_atopen = None
return size
def close(self):
diff --git a/Modules/_io/fileio.c b/Modules/_io/fileio.c
index 5d9d87d..865b0e3 100644
--- a/Modules/_io/fileio.c
+++ b/Modules/_io/fileio.c
@@ -74,8 +74,13 @@ typedef struct {
signed int seekable : 2; /* -1 means unknown */
unsigned int closefd : 1;
char finalizing;
- unsigned int blksize;
- Py_off_t estimated_size;
+ /* Stat result which was grabbed at file open, useful for optimizing common
+ File I/O patterns to be more efficient. This is only guidance / an
+ estimate, as it is subject to Time-Of-Check to Time-Of-Use (TOCTOU)
+ issues / bugs. Both the underlying file descriptor and file may be
+ modified outside of the fileio object / Python (ex. gh-90102, GH-121941,
+ gh-109523). */
+ struct _Py_stat_struct *stat_atopen;
PyObject *weakreflist;
PyObject *dict;
} fileio;
@@ -199,8 +204,7 @@ fileio_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
self->writable = 0;
self->appending = 0;
self->seekable = -1;
- self->blksize = 0;
- self->estimated_size = -1;
+ self->stat_atopen = NULL;
self->closefd = 1;
self->weakreflist = NULL;
}
@@ -256,7 +260,6 @@ _io_FileIO___init___impl(fileio *self, PyObject *nameobj, const char *mode,
#elif !defined(MS_WINDOWS)
int *atomic_flag_works = NULL;
#endif
- struct _Py_stat_struct fdfstat;
int fstat_result;
int async_err = 0;
@@ -454,9 +457,13 @@ _io_FileIO___init___impl(fileio *self, PyObject *nameobj, const char *mode,
#endif
}
- self->blksize = DEFAULT_BUFFER_SIZE;
+ self->stat_atopen = PyMem_New(struct _Py_stat_struct, 1);
+ if (self->stat_atopen == NULL) {
+ PyErr_NoMemory();
+ goto error;
+ }
Py_BEGIN_ALLOW_THREADS
- fstat_result = _Py_fstat_noraise(self->fd, &fdfstat);
+ fstat_result = _Py_fstat_noraise(self->fd, self->stat_atopen);
Py_END_ALLOW_THREADS
if (fstat_result < 0) {
/* Tolerate fstat() errors other than EBADF. See Issue #25717, where
@@ -471,25 +478,21 @@ _io_FileIO___init___impl(fileio *self, PyObject *nameobj, const char *mode,
#endif
goto error;
}
+
+ PyMem_Free(self->stat_atopen);
+ self->stat_atopen = NULL;
}
else {
#if defined(S_ISDIR) && defined(EISDIR)
/* On Unix, open will succeed for directories.
In Python, there should be no file objects referring to
directories, so we need a check. */
- if (S_ISDIR(fdfstat.st_mode)) {
+ if (S_ISDIR(self->stat_atopen->st_mode)) {
errno = EISDIR;
PyErr_SetFromErrnoWithFilenameObject(PyExc_OSError, nameobj);
goto error;
}
#endif /* defined(S_ISDIR) */
-#ifdef HAVE_STRUCT_STAT_ST_BLKSIZE
- if (fdfstat.st_blksize > 1)
- self->blksize = fdfstat.st_blksize;
-#endif /* HAVE_STRUCT_STAT_ST_BLKSIZE */
- if (fdfstat.st_size < PY_SSIZE_T_MAX) {
- self->estimated_size = (Py_off_t)fdfstat.st_size;
- }
}
#if defined(MS_WINDOWS) || defined(__CYGWIN__)
@@ -521,6 +524,10 @@ _io_FileIO___init___impl(fileio *self, PyObject *nameobj, const char *mode,
internal_close(self);
_PyErr_ChainExceptions1(exc);
}
+ if (self->stat_atopen != NULL) {
+ PyMem_Free(self->stat_atopen);
+ self->stat_atopen = NULL;
+ }
done:
#ifdef MS_WINDOWS
@@ -553,6 +560,10 @@ fileio_dealloc(fileio *self)
if (_PyIOBase_finalize((PyObject *) self) < 0)
return;
_PyObject_GC_UNTRACK(self);
+ if (self->stat_atopen != NULL) {
+ PyMem_Free(self->stat_atopen);
+ self->stat_atopen = NULL;
+ }
if (self->weakreflist != NULL)
PyObject_ClearWeakRefs((PyObject *) self);
(void)fileio_clear(self);
@@ -725,20 +736,27 @@ _io_FileIO_readall_impl(fileio *self)
return err_closed();
}
- end = self->estimated_size;
+ if (self->stat_atopen != NULL && self->stat_atopen->st_size < _PY_READ_MAX) {
+ end = (Py_off_t)self->stat_atopen->st_size;
+ }
+ else {
+ end = -1;
+ }
if (end <= 0) {
/* Use a default size and resize as needed. */
bufsize = SMALLCHUNK;
}
else {
- /* This is probably a real file, so we try to allocate a
- buffer one byte larger than the rest of the file. If the
- calculation is right then we should get EOF without having
- to enlarge the buffer. */
+ /* This is probably a real file. */
if (end > _PY_READ_MAX - 1) {
bufsize = _PY_READ_MAX;
}
else {
+ /* In order to detect end of file, need a read() of at
+ least 1 byte which returns size 0. Oversize the buffer
+ by 1 byte so the I/O can be completed with two read()
+ calls (one for all data, one for EOF) without needing
+ to resize the buffer. */
bufsize = (size_t)end + 1;
}
@@ -1094,11 +1112,13 @@ _io_FileIO_truncate_impl(fileio *self, PyTypeObject *cls, PyObject *posobj)
return NULL;
}
- /* Sometimes a large file is truncated. While estimated_size is used as a
- estimate, that it is much larger than the actual size can result in a
- significant over allocation and sometimes a MemoryError / running out of
- memory. */
- self->estimated_size = pos;
+ /* Since the file was truncated, its size at open is no longer accurate
+ as an estimate. Clear out the stat result, and rely on dynamic resize
+ code if a readall is requested. */
+ if (self->stat_atopen != NULL) {
+ PyMem_Free(self->stat_atopen);
+ self->stat_atopen = NULL;
+ }
return posobj;
}
@@ -1229,16 +1249,27 @@ get_mode(fileio *self, void *closure)
return PyUnicode_FromString(mode_string(self));
}
+static PyObject *
+get_blksize(fileio *self, void *closure)
+{
+#ifdef HAVE_STRUCT_STAT_ST_BLKSIZE
+ if (self->stat_atopen != NULL && self->stat_atopen->st_blksize > 1) {
+ return PyLong_FromLong(self->stat_atopen->st_blksize);
+ }
+#endif /* HAVE_STRUCT_STAT_ST_BLKSIZE */
+ return PyLong_FromLong(DEFAULT_BUFFER_SIZE);
+}
+
static PyGetSetDef fileio_getsetlist[] = {
{"closed", (getter)get_closed, NULL, "True if the file is closed"},
{"closefd", (getter)get_closefd, NULL,
"True if the file descriptor will be closed by close()."},
{"mode", (getter)get_mode, NULL, "String giving the file mode"},
+ {"_blksize", (getter)get_blksize, NULL, "Stat st_blksize if available"},
{NULL},
};
static PyMemberDef fileio_members[] = {
- {"_blksize", Py_T_UINT, offsetof(fileio, blksize), 0},
{"_finalizing", Py_T_BOOL, offsetof(fileio, finalizing), 0},
{"__weaklistoffset__", Py_T_PYSSIZET, offsetof(fileio, weakreflist), Py_READONLY},
{"__dictoffset__", Py_T_PYSSIZET, offsetof(fileio, dict), Py_READONLY},