diff options
-rw-r--r-- | Include/fileobject.h | 9 | ||||
-rw-r--r-- | Objects/fileobject.c | 161 |
2 files changed, 136 insertions, 34 deletions
diff --git a/Include/fileobject.h b/Include/fileobject.h index dfc001c..fd04d35 100644 --- a/Include/fileobject.h +++ b/Include/fileobject.h @@ -13,9 +13,12 @@ typedef struct { PyObject *f_name; PyObject *f_mode; int (*f_close)(FILE *); - int f_softspace; /* Flag used by 'print' command */ - int f_binary; /* Flag which indicates whether the file is open - open in binary (1) or test (0) mode */ + int f_softspace; /* Flag used by 'print' command */ + int f_binary; /* Flag which indicates whether the file is + open in binary (1) or text (0) mode */ + char* f_buf; /* Allocated readahead buffer */ + char* f_bufend; /* Points after last occupied position */ + char* f_bufptr; /* Current buffer position */ #ifdef WITH_UNIVERSAL_NEWLINES int f_univ_newline; /* Handle any newline convention */ int f_newlinetypes; /* Types of newlines seen */ diff --git a/Objects/fileobject.c b/Objects/fileobject.c index 0b9bab8..726621a 100644 --- a/Objects/fileobject.c +++ b/Objects/fileobject.c @@ -1,4 +1,3 @@ - /* File object implementation */ #include "Python.h" @@ -116,6 +115,7 @@ fill_file_fields(PyFileObject *f, FILE *fp, char *name, char *mode, f->f_close = close; f->f_softspace = 0; f->f_binary = strchr(mode,'b') != NULL; + f->f_buf = NULL; #ifdef WITH_UNIVERSAL_NEWLINES f->f_univ_newline = (strchr(mode, 'U') != NULL); f->f_newlinetypes = NEWLINE_UNKNOWN; @@ -271,6 +271,8 @@ err_closed(void) return NULL; } +void drop_readahead(PyFileObject *); + /* Methods */ static void @@ -283,6 +285,7 @@ file_dealloc(PyFileObject *f) } Py_XDECREF(f->f_name); Py_XDECREF(f->f_mode); + drop_readahead(f); f->ob_type->tp_free((PyObject *)f); } @@ -405,6 +408,7 @@ file_seek(PyFileObject *f, PyObject *args) if (f->f_fp == NULL) return err_closed(); + drop_readahead(f); whence = 0; if (!PyArg_ParseTuple(args, "O|i:seek", &offobj, &whence)) return NULL; @@ -1178,28 +1182,6 @@ file_readline(PyFileObject *f, PyObject *args) } static PyObject * -file_xreadlines(PyFileObject *f) -{ - static PyObject* xreadlines_function = NULL; - - if (f->f_fp == NULL) - return err_closed(); - if (!xreadlines_function) { - PyObject *xreadlines_module = - PyImport_ImportModule("xreadlines"); - if(!xreadlines_module) - return NULL; - - xreadlines_function = PyObject_GetAttrString(xreadlines_module, - "xreadlines"); - Py_DECREF(xreadlines_module); - if(!xreadlines_function) - return NULL; - } - return PyObject_CallFunction(xreadlines_function, "(O)", f); -} - -static PyObject * file_readlines(PyFileObject *f, PyObject *args) { long sizehint = 0; @@ -1462,6 +1444,15 @@ file_writelines(PyFileObject *f, PyObject *seq) #undef CHUNKSIZE } +static PyObject * +file_getiter(PyFileObject *f) +{ + if (f->f_fp == NULL) + return err_closed(); + Py_INCREF(f); + return (PyObject *)f; +} + PyDoc_STRVAR(readline_doc, "readline([size]) -> next line from the file, as a string.\n" "\n" @@ -1517,10 +1508,10 @@ PyDoc_STRVAR(readlines_doc, "total number of bytes in the lines returned."); PyDoc_STRVAR(xreadlines_doc, -"xreadlines() -> next line from the file, as a string.\n" +"xreadlines() -> returns self.\n" "\n" -"Equivalent to xreadlines.xreadlines(file). This is like readline(), but\n" -"often quicker, due to reading ahead internally."); +"For backward compatibility. File objects now include the performance\n" +"optimizations previously implemented in the xreadlines module."); PyDoc_STRVAR(writelines_doc, "writelines(sequence_of_strings) -> None. Write the strings to the file.\n" @@ -1554,7 +1545,7 @@ static PyMethodDef file_methods[] = { {"tell", (PyCFunction)file_tell, METH_NOARGS, tell_doc}, {"readinto", (PyCFunction)file_readinto, METH_VARARGS, readinto_doc}, {"readlines", (PyCFunction)file_readlines, METH_VARARGS, readlines_doc}, - {"xreadlines", (PyCFunction)file_xreadlines, METH_NOARGS, xreadlines_doc}, + {"xreadlines", (PyCFunction)file_getiter, METH_NOARGS, xreadlines_doc}, {"writelines", (PyCFunction)file_writelines, METH_O, writelines_doc}, {"flush", (PyCFunction)file_flush, METH_NOARGS, flush_doc}, {"close", (PyCFunction)file_close, METH_NOARGS, close_doc}, @@ -1617,12 +1608,120 @@ static PyGetSetDef file_getsetlist[] = { {0}, }; +void +drop_readahead(PyFileObject *f) +{ + if (f->f_buf != NULL) { + PyMem_Free(f->f_buf); + f->f_buf = NULL; + } +} + +/* Make sure that file has a readahead buffer with at least one byte + (unless at EOF) and no more than bufsize. Returns negative value on + error */ +int readahead(PyFileObject *f, int bufsize) { + int chunksize; + + if (f->f_buf != NULL) { + if( (f->f_bufend - f->f_bufptr) >= 1) + return 0; + else + drop_readahead(f); + } + if ((f->f_buf = PyMem_Malloc(bufsize)) == NULL) { + return -1; + } + Py_BEGIN_ALLOW_THREADS + errno = 0; + chunksize = Py_UniversalNewlineFread( + f->f_buf, bufsize, f->f_fp, (PyObject *)f); + Py_END_ALLOW_THREADS + if (chunksize == 0) { + if (ferror(f->f_fp)) { + PyErr_SetFromErrno(PyExc_IOError); + clearerr(f->f_fp); + drop_readahead(f); + return -1; + } + } + f->f_bufptr = f->f_buf; + f->f_bufend = f->f_buf + chunksize; + return 0; +} + +/* Used by file_iternext. The returned string will start with 'skip' + uninitialized bytes followed by the remainder of the line. Don't be + horrified by the recursive call: maximum recursion depth is limited by + logarithmic buffer growth to about 50 even when reading a 1gb line. */ + +PyStringObject * +readahead_get_line_skip(PyFileObject *f, int skip, int bufsize) { + PyStringObject* s; + char *bufptr; + char *buf; + int len; + + if (f->f_buf == NULL) + if (readahead(f, bufsize) < 0) + return NULL; + + len = f->f_bufend - f->f_bufptr; + if (len == 0) + return (PyStringObject *) + PyString_FromStringAndSize(NULL, skip); + bufptr = memchr(f->f_bufptr, '\n', len); + if (bufptr != NULL) { + bufptr++; /* Count the '\n' */ + len = bufptr - f->f_bufptr; + s = (PyStringObject *) + PyString_FromStringAndSize(NULL, skip+len); + if (s == NULL) + return NULL; + memcpy(PyString_AS_STRING(s)+skip, f->f_bufptr, len); + f->f_bufptr = bufptr; + if (bufptr == f->f_bufend) + drop_readahead(f); + } else { + bufptr = f->f_bufptr; + buf = f->f_buf; + f->f_buf = NULL; /* Force new readahead buffer */ + s = readahead_get_line_skip( + f, skip+len, bufsize + (bufsize>>2) ); + if (s == NULL) { + PyMem_Free(buf); + return NULL; + } + memcpy(PyString_AS_STRING(s)+skip, bufptr, len); + PyMem_Free(buf); + } + return s; +} + +/* A larger buffer size may actually decrease performance. */ +#define READAHEAD_BUFSIZE 8192 + static PyObject * -file_getiter(PyObject *f) +file_iternext(PyFileObject *f) { - return PyObject_CallMethod(f, "xreadlines", ""); + PyStringObject* l; + + int i; + + if (f->f_fp == NULL) + return err_closed(); + + i = f->f_softspace; + + l = readahead_get_line_skip(f, 0, READAHEAD_BUFSIZE); + if (l == NULL || PyString_GET_SIZE(l) == 0) { + Py_XDECREF(l); + return NULL; + } + return (PyObject *)l; } + static PyObject * file_new(PyTypeObject *type, PyObject *args, PyObject *kwds) { @@ -1742,8 +1841,8 @@ PyTypeObject PyFile_Type = { 0, /* tp_clear */ 0, /* tp_richcompare */ 0, /* tp_weaklistoffset */ - file_getiter, /* tp_iter */ - 0, /* tp_iternext */ + (getiterfunc)file_getiter, /* tp_iter */ + (iternextfunc)file_iternext, /* tp_iternext */ file_methods, /* tp_methods */ file_memberlist, /* tp_members */ file_getsetlist, /* tp_getset */ |