diff options
Diffstat (limited to 'Objects')
-rw-r--r-- | Objects/fileobject.c | 311 |
1 files changed, 294 insertions, 17 deletions
diff --git a/Objects/fileobject.c b/Objects/fileobject.c index 54d040d..152ba1a 100644 --- a/Objects/fileobject.c +++ b/Objects/fileobject.c @@ -37,6 +37,23 @@ #include <errno.h> #endif +#ifdef HAVE_GETC_UNLOCKED +#define GETC(f) getc_unlocked(f) +#define FLOCKFILE(f) flockfile(f) +#define FUNLOCKFILE(f) funlockfile(f) +#else +#define GETC(f) getc(f) +#define FLOCKFILE(f) +#define FUNLOCKFILE(f) +#endif + +#ifdef WITH_UNIVERSAL_NEWLINES +/* Bits in f_newlinetypes */ +#define NEWLINE_UNKNOWN 0 /* No newline seen, yet */ +#define NEWLINE_CR 1 /* \r newline seen */ +#define NEWLINE_LF 2 /* \n newline seen */ +#define NEWLINE_CRLF 4 /* \r\n newline seen */ +#endif FILE * PyFile_AsFile(PyObject *f) @@ -99,6 +116,11 @@ fill_file_fields(PyFileObject *f, FILE *fp, char *name, char *mode, f->f_close = close; f->f_softspace = 0; f->f_binary = strchr(mode,'b') != NULL; +#ifdef WITH_UNIVERSAL_NEWLINES + f->f_univ_newline = (strchr(mode, 'U') != NULL); + f->f_newlinetypes = NEWLINE_UNKNOWN; + f->f_skipnextlf = 0; +#endif if (f->f_name == NULL || f->f_mode == NULL) return NULL; @@ -134,6 +156,17 @@ open_the_file(PyFileObject *f, char *name, char *mode) #endif { Py_BEGIN_ALLOW_THREADS +#ifdef WITH_UNIVERSAL_NEWLINES + if (strcmp(mode, "U") == 0 || strcmp(mode, "rU") == 0) + mode = "rb"; +#else + /* Compatibility: specifying U in a Python without universal + ** newlines is allowed, and the file is opened as a normal text + ** file. + */ + if (strcmp(mode, "U") == 0 || strcmp(mode, "rU") == 0) + mode = "r"; +#endif f->f_fp = fopen(name, mode); Py_END_ALLOW_THREADS } @@ -394,6 +427,9 @@ file_seek(PyFileObject *f, PyObject *args) clearerr(f->f_fp); return NULL; } +#ifdef WITH_UNIVERSAL_NEWLINES + f->f_skipnextlf = 0; +#endif Py_INCREF(Py_None); return Py_None; } @@ -534,6 +570,16 @@ file_tell(PyFileObject *f) clearerr(f->f_fp); return NULL; } +#ifdef WITH_UNIVERSAL_NEWLINES + if (f->f_skipnextlf) { + int c; + c = GETC(f->f_fp); + if (c == '\n') { + pos++; + f->f_skipnextlf = 0; + } else if (c != EOF) ungetc(c, f->f_fp); + } +#endif #if !defined(HAVE_LARGEFILE_SUPPORT) return PyInt_FromLong(pos); #else @@ -665,8 +711,8 @@ file_read(PyFileObject *f, PyObject *args) for (;;) { Py_BEGIN_ALLOW_THREADS errno = 0; - chunksize = fread(BUF(v) + bytesread, 1, - buffersize - bytesread, f->f_fp); + chunksize = Py_UniversalNewlineFread(BUF(v) + bytesread, + buffersize - bytesread, f->f_fp, (PyObject *)f); Py_END_ALLOW_THREADS if (chunksize == 0) { if (!ferror(f->f_fp)) @@ -705,7 +751,7 @@ file_readinto(PyFileObject *f, PyObject *args) while (ntodo > 0) { Py_BEGIN_ALLOW_THREADS errno = 0; - nnow = fread(ptr+ndone, 1, ntodo, f->f_fp); + nnow = Py_UniversalNewlineFread(ptr+ndone, ntodo, f->f_fp, (PyObject *)f); Py_END_ALLOW_THREADS if (nnow == 0) { if (!ferror(f->f_fp)) @@ -934,16 +980,6 @@ getline_via_fgets(FILE *fp) <= 0: read arbitrary line */ -#ifdef HAVE_GETC_UNLOCKED -#define GETC(f) getc_unlocked(f) -#define FLOCKFILE(f) flockfile(f) -#define FUNLOCKFILE(f) funlockfile(f) -#else -#define GETC(f) getc(f) -#define FLOCKFILE(f) -#define FUNLOCKFILE(f) -#endif - static PyObject * get_line(PyFileObject *f, int n) { @@ -954,9 +990,18 @@ get_line(PyFileObject *f, int n) size_t used_v_size; /* # used slots in buffer */ size_t increment; /* amount to increment the buffer */ PyObject *v; +#ifdef WITH_UNIVERSAL_NEWLINES + int newlinetypes = f->f_newlinetypes; + int skipnextlf = f->f_skipnextlf; + int univ_newline = f->f_univ_newline; +#endif -#ifdef USE_FGETS_IN_GETLINE +#if defined(USE_FGETS_IN_GETLINE) +#ifdef WITH_UNIVERSAL_NEWLINES + if (n <= 0 && !univ_newline ) +#else if (n <= 0) +#endif return getline_via_fgets(fp); #endif total_v_size = n > 0 ? n : 100; @@ -969,12 +1014,45 @@ get_line(PyFileObject *f, int n) for (;;) { Py_BEGIN_ALLOW_THREADS FLOCKFILE(fp); +#ifdef WITH_UNIVERSAL_NEWLINES + if (univ_newline) { + c = 'x'; /* Shut up gcc warning */ + while ( buf != end && (c = GETC(fp)) != EOF ) { + if (skipnextlf ) { + skipnextlf = 0; + if (c == '\n') { + /* Seeing a \n here with skipnextlf true + ** means we saw a \r before. + */ + newlinetypes |= NEWLINE_CRLF; + c = GETC(fp); + if (c == EOF) break; + } else { + newlinetypes |= NEWLINE_CR; + } + } + if (c == '\r') { + skipnextlf = 1; + c = '\n'; + } else if ( c == '\n') + newlinetypes |= NEWLINE_LF; + *buf++ = c; + if (c == '\n') break; + } + if ( c == EOF && skipnextlf ) + newlinetypes |= NEWLINE_CR; + } else /* If not universal newlines use the normal loop */ +#endif while ((c = GETC(fp)) != EOF && (*buf++ = c) != '\n' && buf != end) ; FUNLOCKFILE(fp); Py_END_ALLOW_THREADS +#ifdef WITH_UNIVERSAL_NEWLINES + f->f_newlinetypes = newlinetypes; + f->f_skipnextlf = skipnextlf; +#endif if (c == '\n') break; if (c == EOF) { @@ -1150,8 +1228,8 @@ file_readlines(PyFileObject *f, PyObject *args) else { Py_BEGIN_ALLOW_THREADS errno = 0; - nread = fread(buffer+nfilled, 1, - buffersize-nfilled, f->f_fp); + nread = Py_UniversalNewlineFread(buffer+nfilled, + buffersize-nfilled, f->f_fp, (PyObject *)f); Py_END_ALLOW_THREADS shortread = (nread < buffersize-nfilled); } @@ -1188,7 +1266,8 @@ file_readlines(PyFileObject *f, PyObject *args) } else { /* Grow the big buffer */ - _PyString_Resize(&big_buffer, buffersize); + if ( _PyString_Resize(&big_buffer, buffersize) < 0 ) + goto error; buffer = PyString_AS_STRING(big_buffer); } continue; @@ -1503,9 +1582,40 @@ get_closed(PyFileObject *f, void *closure) { return PyBool_FromLong((long)(f->f_fp == 0)); } +#ifdef WITH_UNIVERSAL_NEWLINES +static PyObject * +get_newlines(PyFileObject *f, void *closure) +{ + switch (f->f_newlinetypes) { + case NEWLINE_UNKNOWN: + Py_INCREF(Py_None); + return Py_None; + case NEWLINE_CR: + return PyString_FromString("\r"); + case NEWLINE_LF: + return PyString_FromString("\n"); + case NEWLINE_CR|NEWLINE_LF: + return Py_BuildValue("(ss)", "\r", "\n"); + case NEWLINE_CRLF: + return PyString_FromString("\r\n"); + case NEWLINE_CR|NEWLINE_CRLF: + return Py_BuildValue("(ss)", "\r", "\r\n"); + case NEWLINE_LF|NEWLINE_CRLF: + return Py_BuildValue("(ss)", "\n", "\r\n"); + case NEWLINE_CR|NEWLINE_LF|NEWLINE_CRLF: + return Py_BuildValue("(sss)", "\r", "\n", "\r\n"); + default: + PyErr_Format(PyExc_SystemError, "Unknown newlines value 0x%x\n", f->f_newlinetypes); + return NULL; + } +} +#endif static PyGetSetDef file_getsetlist[] = { {"closed", (getter)get_closed, NULL, "True if the file is closed"}, +#ifdef WITH_UNIVERSAL_NEWLINES + {"newlines", (getter)get_newlines, NULL, "end-of-line convention used in this file"}, +#endif {0}, }; @@ -1805,3 +1915,170 @@ int PyObject_AsFileDescriptor(PyObject *o) } return fd; } + +#ifdef WITH_UNIVERSAL_NEWLINES +/* From here on we need access to the real fgets and fread */ +#undef fgets +#undef fread + +/* +** Py_UniversalNewlineFgets is an fgets variation that understands +** all of \r, \n and \r\n conventions. +** The stream should be opened in binary mode. +** If fobj is NULL the routine always does newline conversion, and +** it may peek one char ahead to gobble the second char in \r\n. +** If fobj is non-NULL it must be a PyFileObject. In this case there +** is no readahead but in stead a flag is used to skip a following +** \n on the next read. Also, if the file is open in binary mode +** the whole conversion is skipped. Finally, the routine keeps track of +** the different types of newlines seen. +** Note that we need no error handling: fgets() treats error and eof +** identically. +*/ +char * +Py_UniversalNewlineFgets(char *buf, int n, FILE *stream, PyObject *fobj) +{ + char *p = buf; + int c; + int newlinetypes = 0; + int skipnextlf = 0; + int univ_newline = 1; + + if (fobj) { + if (!PyFile_Check(fobj)) { + errno = ENXIO; /* What can you do... */ + return NULL; + } + univ_newline = ((PyFileObject *)fobj)->f_univ_newline; + if ( !univ_newline ) + return fgets(buf, n, stream); + newlinetypes = ((PyFileObject *)fobj)->f_newlinetypes; + skipnextlf = ((PyFileObject *)fobj)->f_skipnextlf; + } + FLOCKFILE(stream); + c = 'x'; /* Shut up gcc warning */ + while (--n > 0 && (c = GETC(stream)) != EOF ) { + if (skipnextlf ) { + skipnextlf = 0; + if (c == '\n') { + /* Seeing a \n here with skipnextlf true + ** means we saw a \r before. + */ + newlinetypes |= NEWLINE_CRLF; + c = GETC(stream); + if (c == EOF) break; + } else { + /* + ** Note that c == EOF also brings us here, + ** so we're okay if the last char in the file + ** is a CR. + */ + newlinetypes |= NEWLINE_CR; + } + } + if (c == '\r') { + /* A \r is translated into a \n, and we skip + ** an adjacent \n, if any. We don't set the + ** newlinetypes flag until we've seen the next char. + */ + skipnextlf = 1; + c = '\n'; + } else if ( c == '\n') { + newlinetypes |= NEWLINE_LF; + } + *p++ = c; + if (c == '\n') break; + } + if ( c == EOF && skipnextlf ) + newlinetypes |= NEWLINE_CR; + FUNLOCKFILE(stream); + *p = '\0'; + if (fobj) { + ((PyFileObject *)fobj)->f_newlinetypes = newlinetypes; + ((PyFileObject *)fobj)->f_skipnextlf = skipnextlf; + } else if ( skipnextlf ) { + /* If we have no file object we cannot save the + ** skipnextlf flag. We have to readahead, which + ** will cause a pause if we're reading from an + ** interactive stream, but that is very unlikely + ** unless we're doing something silly like + ** execfile("/dev/tty"). + */ + c = GETC(stream); + if ( c != '\n' ) + ungetc(c, stream); + } + if (p == buf) + return NULL; + return buf; +} + +/* +** Py_UniversalNewlineFread is an fread variation that understands +** all of \r, \n and \r\n conventions. +** The stream should be opened in binary mode. +** fobj must be a PyFileObject. In this case there +** is no readahead but in stead a flag is used to skip a following +** \n on the next read. Also, if the file is open in binary mode +** the whole conversion is skipped. Finally, the routine keeps track of +** the different types of newlines seen. +*/ +size_t +Py_UniversalNewlineFread(void *buf, size_t n, + FILE *stream, PyObject *fobj) +{ + char *src = buf, *dst = buf, c; + int nread, ntodo=n; + int newlinetypes, skipnextlf, univ_newline; + + if (!fobj || !PyFile_Check(fobj)) { + errno = ENXIO; /* What can you do... */ + return -1; + } + univ_newline = ((PyFileObject *)fobj)->f_univ_newline; + if ( !univ_newline ) + return fread(buf, 1, n, stream); + newlinetypes = ((PyFileObject *)fobj)->f_newlinetypes; + skipnextlf = ((PyFileObject *)fobj)->f_skipnextlf; + while (ntodo > 0) { + if (ferror(stream)) + break; + nread = fread(dst, 1, ntodo, stream); + src = dst; + if (nread <= 0) { + if (skipnextlf) + newlinetypes |= NEWLINE_CR; + break; + } + ntodo -= nread; + while ( nread-- ) { + c = *src++; + if (c == '\r') { + /* Save CR as LF and set flag to skip next newline + */ + *dst++ = '\n'; + skipnextlf = 1; + } else if (skipnextlf && c == '\n') { + /* Skip an LF, and remember that we saw CR LF + */ + skipnextlf = 0; + newlinetypes |= NEWLINE_CRLF; + } else { + /* Normal char to be stored in buffer. Also update + ** the newlinetypes flag if either this is an LF + ** or the previous char was a CR. + */ + if (c == '\n') + newlinetypes |= NEWLINE_LF; + else if (skipnextlf) + newlinetypes |= NEWLINE_CR; + *dst++ = c; + skipnextlf = 0; + } + } + } + ((PyFileObject *)fobj)->f_newlinetypes = newlinetypes; + ((PyFileObject *)fobj)->f_skipnextlf = skipnextlf; + return dst - (char *)buf; +} +#endif |