summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRichard Oudkerk <shibturn@gmail.com>2013-05-17 22:34:42 (GMT)
committerRichard Oudkerk <shibturn@gmail.com>2013-05-17 22:34:42 (GMT)
commitaf7260e81a13f5ebe38d895cd8c32a080223302d (patch)
tree57a5af8764ba28ce310ea8e92ad94e1e625a01d4
parenta29ac45200ba89c474b7b5c7f2973b4d38116fe7 (diff)
downloadcpython-af7260e81a13f5ebe38d895cd8c32a080223302d.zip
cpython-af7260e81a13f5ebe38d895cd8c32a080223302d.tar.gz
cpython-af7260e81a13f5ebe38d895cd8c32a080223302d.tar.bz2
Issue #15758: Fix FileIO.readall() so it no longer has O(n**2) complexity.
-rw-r--r--Misc/NEWS2
-rw-r--r--Modules/_io/fileio.c118
2 files changed, 54 insertions, 66 deletions
diff --git a/Misc/NEWS b/Misc/NEWS
index d928df9..716378a 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -91,6 +91,8 @@ Core and Builtins
Library
-------
+- Issue #15758: Fix FileIO.readall() so it no longer has O(n**2) complexity.
+
- Issue #14596: The struct.Struct() objects now use more compact implementation.
- Issue #17981: Closed socket on error in SysLogHandler.
diff --git a/Modules/_io/fileio.c b/Modules/_io/fileio.c
index fd7c1fc..56c02dd 100644
--- a/Modules/_io/fileio.c
+++ b/Modules/_io/fileio.c
@@ -556,33 +556,27 @@ fileio_readinto(fileio *self, PyObject *args)
return PyLong_FromSsize_t(n);
}
+#ifndef HAVE_FSTAT
+
+static PyObject *
+fileio_readall(fileio *self)
+{
+ _Py_IDENTIFIER(readall);
+ return _PyObject_CallMethodId((PyObject*)&PyRawIOBase_Type,
+ &PyId_readall, "O", self);
+}
+
+#else
+
static size_t
-new_buffersize(fileio *self, size_t currentsize
-#ifdef HAVE_FSTAT
- , Py_off_t pos, Py_off_t end
-#endif
- )
+new_buffersize(fileio *self, size_t currentsize)
{
size_t addend;
-#ifdef HAVE_FSTAT
- if (end != (Py_off_t)-1) {
- /* Files claiming a size smaller than SMALLCHUNK may
- actually be streaming pseudo-files. In this case, we
- apply the more aggressive algorithm below.
- */
- if (end >= SMALLCHUNK && end >= pos && pos >= 0) {
- /* Add 1 so if the file were to grow we'd notice. */
- Py_off_t bufsize = currentsize + end - pos + 1;
- if (bufsize < PY_SSIZE_T_MAX)
- return (size_t)bufsize;
- else
- return PY_SSIZE_T_MAX;
- }
- }
-#endif
+
/* Expand the buffer by an amount proportional to the current size,
giving us amortized linear-time behavior. For bigger sizes, use a
less-than-double growth factor to avoid excessive allocation. */
+ assert(currentsize <= PY_SSIZE_T_MAX);
if (currentsize > 65536)
addend = currentsize >> 3;
else
@@ -596,25 +590,18 @@ new_buffersize(fileio *self, size_t currentsize
static PyObject *
fileio_readall(fileio *self)
{
-#ifdef HAVE_FSTAT
struct stat st;
Py_off_t pos, end;
-#endif
PyObject *result;
- Py_ssize_t total = 0;
+ Py_ssize_t bytes_read = 0;
Py_ssize_t n;
- size_t newsize;
+ size_t bufsize;
if (self->fd < 0)
return err_closed();
if (!_PyVerify_fd(self->fd))
return PyErr_SetFromErrno(PyExc_IOError);
- result = PyBytes_FromStringAndSize(NULL, SMALLCHUNK);
- if (result == NULL)
- return NULL;
-
-#ifdef HAVE_FSTAT
#if defined(MS_WIN64) || defined(MS_WINDOWS)
pos = _lseeki64(self->fd, 0L, SEEK_CUR);
#else
@@ -624,44 +611,46 @@ fileio_readall(fileio *self)
end = st.st_size;
else
end = (Py_off_t)-1;
-#endif
+
+ if (end > 0 && end >= pos && pos >= 0 && end - pos < PY_SSIZE_T_MAX) {
+ /* This is probably a real file, so we try to allocate a
+ buffer one byte larger than the rest of the file. If the
+ calculation is right then we should get EOF without having
+ to enlarge the buffer. */
+ bufsize = (size_t)(end - pos + 1);
+ } else {
+ bufsize = SMALLCHUNK;
+ }
+
+ result = PyBytes_FromStringAndSize(NULL, bufsize);
+ if (result == NULL)
+ return NULL;
+
while (1) {
-#ifdef HAVE_FSTAT
- newsize = new_buffersize(self, total, pos, end);
-#else
- newsize = new_buffersize(self, total);
-#endif
- if (newsize > PY_SSIZE_T_MAX || newsize <= 0) {
- PyErr_SetString(PyExc_OverflowError,
- "unbounded read returned more bytes "
- "than a Python string can hold ");
- Py_DECREF(result);
- return NULL;
- }
+ if (bytes_read >= (Py_ssize_t)bufsize) {
+ bufsize = new_buffersize(self, bytes_read);
+ if (bufsize > PY_SSIZE_T_MAX || bufsize <= 0) {
+ PyErr_SetString(PyExc_OverflowError,
+ "unbounded read returned more bytes "
+ "than a Python string can hold ");
+ Py_DECREF(result);
+ return NULL;
+ }
- if (PyBytes_GET_SIZE(result) < (Py_ssize_t)newsize) {
- if (_PyBytes_Resize(&result, newsize) < 0) {
- if (total == 0) {
- Py_DECREF(result);
+ if (PyBytes_GET_SIZE(result) < (Py_ssize_t)bufsize) {
+ if (_PyBytes_Resize(&result, bufsize) < 0)
return NULL;
- }
- PyErr_Clear();
- break;
}
}
Py_BEGIN_ALLOW_THREADS
errno = 0;
- n = newsize - total;
+ n = bufsize - bytes_read;
#if defined(MS_WIN64) || defined(MS_WINDOWS)
if (n > INT_MAX)
n = INT_MAX;
- n = read(self->fd,
- PyBytes_AS_STRING(result) + total,
- (int)n);
+ n = read(self->fd, PyBytes_AS_STRING(result) + bytes_read, (int)n);
#else
- n = read(self->fd,
- PyBytes_AS_STRING(result) + total,
- n);
+ n = read(self->fd, PyBytes_AS_STRING(result) + bytes_read, n);
#endif
Py_END_ALLOW_THREADS
if (n == 0)
@@ -674,7 +663,7 @@ fileio_readall(fileio *self)
}
continue;
}
- if (total > 0)
+ if (bytes_read > 0)
break;
if (errno == EAGAIN) {
Py_DECREF(result);
@@ -684,22 +673,19 @@ fileio_readall(fileio *self)
PyErr_SetFromErrno(PyExc_IOError);
return NULL;
}
- total += n;
-#ifdef HAVE_FSTAT
+ bytes_read += n;
pos += n;
-#endif
}
- if (PyBytes_GET_SIZE(result) > total) {
- if (_PyBytes_Resize(&result, total) < 0) {
- /* This should never happen, but just in case */
- Py_DECREF(result);
+ if (PyBytes_GET_SIZE(result) > bytes_read) {
+ if (_PyBytes_Resize(&result, bytes_read) < 0)
return NULL;
- }
}
return result;
}
+#endif /* HAVE_FSTAT */
+
static PyObject *
fileio_read(fileio *self, PyObject *args)
{