From 058b141ef77edcd8000bc169f3b9b7cc9d362ffa Mon Sep 17 00:00:00 2001 From: Tim Peters Date: Sun, 21 Apr 2002 07:29:14 +0000 Subject: Py_UniversalNewlineFread(): Many changes. + Continued looping until n bytes in the buffer have been filled, not just when n bytes have been read from the file. This repairs the bug that f.readlines() only sucked up the first 8192 bytes of the file on Windows when universal newlines was enabled and f was opened in U mode (see Python-Dev -- this was the ultimate cause of the test_inspect.py failure). + Changed prototye to take a char* buffer (void* doesn't make much sense). + Squashed size_t vs int mismatches (in particular, besides the unsigned vs signed distinction, size_t may be larger than int). + Gets out under all error conditions now (it's possible for fread() to suffer an error even if it returns a number larger than 0 -- any "short read" is an error or EOF condition). + Rearranged and simplified declarations. --- Include/fileobject.h | 10 +++---- Objects/fileobject.c | 84 +++++++++++++++++++++++++++++----------------------- 2 files changed, 52 insertions(+), 42 deletions(-) diff --git a/Include/fileobject.h b/Include/fileobject.h index d696475..6c34223 100644 --- a/Include/fileobject.h +++ b/Include/fileobject.h @@ -41,7 +41,7 @@ extern DL_IMPORT(int) PyFile_WriteString(const char *, PyObject *); extern DL_IMPORT(int) PyObject_AsFileDescriptor(PyObject *); /* The default encoding used by the platform file system APIs - If non-NULL, this is different than the default encoding for strings + If non-NULL, this is different than the default encoding for strings */ extern DL_IMPORT(const char *) Py_FileSystemDefaultEncoding; @@ -51,12 +51,12 @@ extern DL_IMPORT(const char *) Py_FileSystemDefaultEncoding; */ #define PY_STDIOTEXTMODE "b" char *Py_UniversalNewlineFgets(char *, int, FILE*, PyObject *); -size_t Py_UniversalNewlineFread(void *, size_t, FILE *, PyObject *); +size_t Py_UniversalNewlineFread(char *, size_t, FILE *, PyObject *); #else #define PY_STDIOTEXTMODE "" -#define Py_UniversalNewlineFgets(buf, len, fp, obj) (fgets((buf), (len), (fp))) -#define Py_UniversalNewlineFread(buf, len, fp, obj) \ - (fread((buf), 1, (len), (fp))) +#define Py_UniversalNewlineFgets(buf, len, fp, obj) fgets((buf), (len), (fp)) +#define Py_UniversalNewlineFread(buf, len, fp, obj) + fread((buf), 1, (len), (fp)) #endif /* WITH_UNIVERSAL_NEWLINES */ #ifdef __cplusplus } diff --git a/Objects/fileobject.c b/Objects/fileobject.c index 152ba1a..044dfd8 100644 --- a/Objects/fileobject.c +++ b/Objects/fileobject.c @@ -1228,7 +1228,7 @@ file_readlines(PyFileObject *f, PyObject *args) else { Py_BEGIN_ALLOW_THREADS errno = 0; - nread = Py_UniversalNewlineFread(buffer+nfilled, + nread = Py_UniversalNewlineFread(buffer+nfilled, buffersize-nfilled, f->f_fp, (PyObject *)f); Py_END_ALLOW_THREADS shortread = (nread < buffersize-nfilled); @@ -1943,7 +1943,7 @@ Py_UniversalNewlineFgets(char *buf, int n, FILE *stream, PyObject *fobj) int newlinetypes = 0; int skipnextlf = 0; int univ_newline = 1; - + if (fobj) { if (!PyFile_Check(fobj)) { errno = ENXIO; /* What can you do... */ @@ -2024,61 +2024,71 @@ Py_UniversalNewlineFgets(char *buf, int n, FILE *stream, PyObject *fobj) ** the different types of newlines seen. */ size_t -Py_UniversalNewlineFread(void *buf, size_t n, +Py_UniversalNewlineFread(char *buf, size_t n, FILE *stream, PyObject *fobj) { - char *src = buf, *dst = buf, c; - int nread, ntodo=n; - int newlinetypes, skipnextlf, univ_newline; - + char *dst = buf; + PyFileObject *f = (PyFileObject *)fobj; + int newlinetypes, skipnextlf; + + assert(buf != NULL); + assert(stream != NULL); + if (!fobj || !PyFile_Check(fobj)) { errno = ENXIO; /* What can you do... */ return -1; } - univ_newline = ((PyFileObject *)fobj)->f_univ_newline; - if ( !univ_newline ) + if (!f->f_univ_newline) return fread(buf, 1, n, stream); - newlinetypes = ((PyFileObject *)fobj)->f_newlinetypes; - skipnextlf = ((PyFileObject *)fobj)->f_skipnextlf; - while (ntodo > 0) { - if (ferror(stream)) - break; - nread = fread(dst, 1, ntodo, stream); - src = dst; - if (nread <= 0) { - if (skipnextlf) - newlinetypes |= NEWLINE_CR; - break; - } - ntodo -= nread; - while ( nread-- ) { - c = *src++; + newlinetypes = f->f_newlinetypes; + skipnextlf = f->f_skipnextlf; + /* Invariant: n is the number of bytes remaining to be filled + * in the buffer. + */ + while (n) { + size_t nread; + int shortread; + char *src = dst; + + nread = fread(dst, 1, n, stream); + assert(nread <= n); + shortread = nread != n; /* true iff EOF or error */ + while (nread--) { + char c = *src++; if (c == '\r') { - /* Save CR as LF and set flag to skip next newline - */ + /* Save as LF and set flag to skip next LF. */ *dst++ = '\n'; + --n; skipnextlf = 1; - } else if (skipnextlf && c == '\n') { - /* Skip an LF, and remember that we saw CR LF - */ + } + else if (skipnextlf && c == '\n') { + /* Skip LF, and remember we saw CR LF. */ skipnextlf = 0; newlinetypes |= NEWLINE_CRLF; - } else { - /* Normal char to be stored in buffer. Also update - ** the newlinetypes flag if either this is an LF - ** or the previous char was a CR. - */ + } + else { + /* Normal char to be stored in buffer. Also + * update the newlinetypes flag if either this + * is an LF or the previous char was a CR. + */ if (c == '\n') newlinetypes |= NEWLINE_LF; else if (skipnextlf) newlinetypes |= NEWLINE_CR; *dst++ = c; + --n; skipnextlf = 0; } } + if (shortread) { + /* If this is EOF, update type flags. */ + if (skipnextlf && feof(stream)) + newlinetypes |= NEWLINE_CR; + break; + } } - ((PyFileObject *)fobj)->f_newlinetypes = newlinetypes; - ((PyFileObject *)fobj)->f_skipnextlf = skipnextlf; - return dst - (char *)buf; + f->f_newlinetypes = newlinetypes; + f->f_skipnextlf = skipnextlf; + return dst - buf; } #endif -- cgit v0.12