diff options
author | Guido van Rossum <guido@python.org> | 1997-05-09 22:27:31 (GMT) |
---|---|---|
committer | Guido van Rossum <guido@python.org> | 1997-05-09 22:27:31 (GMT) |
commit | 5449b6e123825fe8e37402cb28ab377dd662bb1e (patch) | |
tree | ccd3a3bc1790984cd80d4b47e1269320a36348c0 /Objects/fileobject.c | |
parent | 5878b662de976e55dbb502752608cade58372b7a (diff) | |
download | cpython-5449b6e123825fe8e37402cb28ab377dd662bb1e.zip cpython-5449b6e123825fe8e37402cb28ab377dd662bb1e.tar.gz cpython-5449b6e123825fe8e37402cb28ab377dd662bb1e.tar.bz2 |
Speed up read() (i.e. read till EOF) considerably by doing a stat() to
see if we can guess the #bytes until the end of the file. If we
can't, increment the buffer size increments up to 0.5Meg to avoid
realloc'ing too much.
Diffstat (limited to 'Objects/fileobject.c')
-rw-r--r-- | Objects/fileobject.c | 80 |
1 files changed, 65 insertions, 15 deletions
diff --git a/Objects/fileobject.c b/Objects/fileobject.c index 1b32061..057b424 100644 --- a/Objects/fileobject.c +++ b/Objects/fileobject.c @@ -381,47 +381,97 @@ file_isatty(f, args) return PyInt_FromLong(res); } +/* We expect that fstat exists on most systems. + It's confirmed on Unix, Mac and Windows. + If you don't have it, add #define DONT_HAVE_FSTAT to your config.h. */ +#ifndef DONT_HAVE_FSTAT +#define HAVE_FSTAT + +#include <sys/types.h> +#include <sys/stat.h> + +#endif + +#if BUFSIZ < 8192 +#define SMALLCHUNK 8192 +#else +#define SMALLCHUNK BUFSIZ +#endif + +#define BIGCHUNK (512*1024) + +static size_t +new_buffersize(f, currentsize) + PyFileObject *f; + size_t currentsize; +{ +#ifdef HAVE_FSTAT + long pos, end; + struct stat st; + if (fstat(fileno(f->f_fp), &st) == 0) { + end = st.st_size; + pos = ftell(f->f_fp); + if (end > pos && pos >= 0) + return end - pos + 1; + } +#endif + if (currentsize > SMALLCHUNK) { + /* Keep doubling until we reach BIGCHUNK; + then keep adding BIGCHUNK. */ + if (currentsize <= BIGCHUNK) + return currentsize + currentsize; + else + return currentsize + BIGCHUNK; + } + return currentsize + SMALLCHUNK; +} + static PyObject * file_read(f, args) PyFileObject *f; PyObject *args; { - int n, n1, n2, n3; + long bytesrequested; + size_t bytesread, buffersize, chunksize; PyObject *v; if (f->f_fp == NULL) return err_closed(); if (args == NULL) - n = -1; + bytesrequested = -1; else { - if (!PyArg_Parse(args, "i", &n)) + if (!PyArg_Parse(args, "l", &bytesrequested)) return NULL; } - n2 = n >= 0 ? n : BUFSIZ; - v = PyString_FromStringAndSize((char *)NULL, n2); + if (bytesrequested < 0) + buffersize = new_buffersize(f, 0); + else + buffersize = bytesrequested; + v = PyString_FromStringAndSize((char *)NULL, buffersize); if (v == NULL) return NULL; - n1 = 0; + bytesread = 0; Py_BEGIN_ALLOW_THREADS for (;;) { - n3 = fread(BUF(v)+n1, 1, n2-n1, f->f_fp); + chunksize = fread(BUF(v) + bytesread, 1, + buffersize - bytesread, f->f_fp); /* XXX Error check? */ - if (n3 == 0) + if (chunksize == 0) break; - n1 += n3; - if (n1 == n) + bytesread += chunksize; + if (bytesread < buffersize) break; - if (n < 0) { - n2 = n1 + BUFSIZ; + if (bytesrequested < 0) { + buffersize = new_buffersize(f, buffersize); Py_BLOCK_THREADS - if (_PyString_Resize(&v, n2) < 0) + if (_PyString_Resize(&v, buffersize) < 0) return NULL; Py_UNBLOCK_THREADS } } Py_END_ALLOW_THREADS - if (n1 != n2) - _PyString_Resize(&v, n1); + if (bytesread != buffersize) + _PyString_Resize(&v, bytesread); return v; } |