diff options
author | Jack Jansen <jack.jansen@cwi.nl> | 2002-04-14 20:12:41 (GMT) |
---|---|---|
committer | Jack Jansen <jack.jansen@cwi.nl> | 2002-04-14 20:12:41 (GMT) |
commit | 7b8c7546ebc1fc3688ef95768fa8b82f0f205490 (patch) | |
tree | 2e14b243347a9c38c82e2e774d4b201f23149916 /Objects/fileobject.c | |
parent | dcd2dc2fffce8614c5d2b8d303a303a599b88a23 (diff) | |
download | cpython-7b8c7546ebc1fc3688ef95768fa8b82f0f205490.zip cpython-7b8c7546ebc1fc3688ef95768fa8b82f0f205490.tar.gz cpython-7b8c7546ebc1fc3688ef95768fa8b82f0f205490.tar.bz2 |
Mass checkin of universal newline support.
Highlights: import and friends will understand any of \r, \n and \r\n
as end of line. Python file input will do the same if you use mode 'U'.
Everything can be disabled by configuring with --without-universal-newlines.
See PEP278 for details.
Diffstat (limited to 'Objects/fileobject.c')
-rw-r--r-- | Objects/fileobject.c | 311 |
1 files changed, 294 insertions, 17 deletions
diff --git a/Objects/fileobject.c b/Objects/fileobject.c index 54d040d..152ba1a 100644 --- a/Objects/fileobject.c +++ b/Objects/fileobject.c @@ -37,6 +37,23 @@ #include <errno.h> #endif +#ifdef HAVE_GETC_UNLOCKED +#define GETC(f) getc_unlocked(f) +#define FLOCKFILE(f) flockfile(f) +#define FUNLOCKFILE(f) funlockfile(f) +#else +#define GETC(f) getc(f) +#define FLOCKFILE(f) +#define FUNLOCKFILE(f) +#endif + +#ifdef WITH_UNIVERSAL_NEWLINES +/* Bits in f_newlinetypes */ +#define NEWLINE_UNKNOWN 0 /* No newline seen, yet */ +#define NEWLINE_CR 1 /* \r newline seen */ +#define NEWLINE_LF 2 /* \n newline seen */ +#define NEWLINE_CRLF 4 /* \r\n newline seen */ +#endif FILE * PyFile_AsFile(PyObject *f) @@ -99,6 +116,11 @@ fill_file_fields(PyFileObject *f, FILE *fp, char *name, char *mode, f->f_close = close; f->f_softspace = 0; f->f_binary = strchr(mode,'b') != NULL; +#ifdef WITH_UNIVERSAL_NEWLINES + f->f_univ_newline = (strchr(mode, 'U') != NULL); + f->f_newlinetypes = NEWLINE_UNKNOWN; + f->f_skipnextlf = 0; +#endif if (f->f_name == NULL || f->f_mode == NULL) return NULL; @@ -134,6 +156,17 @@ open_the_file(PyFileObject *f, char *name, char *mode) #endif { Py_BEGIN_ALLOW_THREADS +#ifdef WITH_UNIVERSAL_NEWLINES + if (strcmp(mode, "U") == 0 || strcmp(mode, "rU") == 0) + mode = "rb"; +#else + /* Compatibility: specifying U in a Python without universal + ** newlines is allowed, and the file is opened as a normal text + ** file. + */ + if (strcmp(mode, "U") == 0 || strcmp(mode, "rU") == 0) + mode = "r"; +#endif f->f_fp = fopen(name, mode); Py_END_ALLOW_THREADS } @@ -394,6 +427,9 @@ file_seek(PyFileObject *f, PyObject *args) clearerr(f->f_fp); return NULL; } +#ifdef WITH_UNIVERSAL_NEWLINES + f->f_skipnextlf = 0; +#endif Py_INCREF(Py_None); return Py_None; } @@ -534,6 +570,16 @@ file_tell(PyFileObject *f) clearerr(f->f_fp); return NULL; } +#ifdef WITH_UNIVERSAL_NEWLINES + if (f->f_skipnextlf) { + int c; + c = GETC(f->f_fp); + if (c == '\n') { + pos++; + f->f_skipnextlf = 0; + } else if (c != EOF) ungetc(c, f->f_fp); + } +#endif #if !defined(HAVE_LARGEFILE_SUPPORT) return PyInt_FromLong(pos); #else @@ -665,8 +711,8 @@ file_read(PyFileObject *f, PyObject *args) for (;;) { Py_BEGIN_ALLOW_THREADS errno = 0; - chunksize = fread(BUF(v) + bytesread, 1, - buffersize - bytesread, f->f_fp); + chunksize = Py_UniversalNewlineFread(BUF(v) + bytesread, + buffersize - bytesread, f->f_fp, (PyObject *)f); Py_END_ALLOW_THREADS if (chunksize == 0) { if (!ferror(f->f_fp)) @@ -705,7 +751,7 @@ file_readinto(PyFileObject *f, PyObject *args) while (ntodo > 0) { Py_BEGIN_ALLOW_THREADS errno = 0; - nnow = fread(ptr+ndone, 1, ntodo, f->f_fp); + nnow = Py_UniversalNewlineFread(ptr+ndone, ntodo, f->f_fp, (PyObject *)f); Py_END_ALLOW_THREADS if (nnow == 0) { if (!ferror(f->f_fp)) @@ -934,16 +980,6 @@ getline_via_fgets(FILE *fp) <= 0: read arbitrary line */ -#ifdef HAVE_GETC_UNLOCKED -#define GETC(f) getc_unlocked(f) -#define FLOCKFILE(f) flockfile(f) -#define FUNLOCKFILE(f) funlockfile(f) -#else -#define GETC(f) getc(f) -#define FLOCKFILE(f) -#define FUNLOCKFILE(f) -#endif - static PyObject * get_line(PyFileObject *f, int n) { @@ -954,9 +990,18 @@ get_line(PyFileObject *f, int n) size_t used_v_size; /* # used slots in buffer */ size_t increment; /* amount to increment the buffer */ PyObject *v; +#ifdef WITH_UNIVERSAL_NEWLINES + int newlinetypes = f->f_newlinetypes; + int skipnextlf = f->f_skipnextlf; + int univ_newline = f->f_univ_newline; +#endif -#ifdef USE_FGETS_IN_GETLINE +#if defined(USE_FGETS_IN_GETLINE) +#ifdef WITH_UNIVERSAL_NEWLINES + if (n <= 0 && !univ_newline ) +#else if (n <= 0) +#endif return getline_via_fgets(fp); #endif total_v_size = n > 0 ? n : 100; @@ -969,12 +1014,45 @@ get_line(PyFileObject *f, int n) for (;;) { Py_BEGIN_ALLOW_THREADS FLOCKFILE(fp); +#ifdef WITH_UNIVERSAL_NEWLINES + if (univ_newline) { + c = 'x'; /* Shut up gcc warning */ + while ( buf != end && (c = GETC(fp)) != EOF ) { + if (skipnextlf ) { + skipnextlf = 0; + if (c == '\n') { + /* Seeing a \n here with skipnextlf true + ** means we saw a \r before. + */ + newlinetypes |= NEWLINE_CRLF; + c = GETC(fp); + if (c == EOF) break; + } else { + newlinetypes |= NEWLINE_CR; + } + } + if (c == '\r') { + skipnextlf = 1; + c = '\n'; + } else if ( c == '\n') + newlinetypes |= NEWLINE_LF; + *buf++ = c; + if (c == '\n') break; + } + if ( c == EOF && skipnextlf ) + newlinetypes |= NEWLINE_CR; + } else /* If not universal newlines use the normal loop */ +#endif while ((c = GETC(fp)) != EOF && (*buf++ = c) != '\n' && buf != end) ; FUNLOCKFILE(fp); Py_END_ALLOW_THREADS +#ifdef WITH_UNIVERSAL_NEWLINES + f->f_newlinetypes = newlinetypes; + f->f_skipnextlf = skipnextlf; +#endif if (c == '\n') break; if (c == EOF) { @@ -1150,8 +1228,8 @@ file_readlines(PyFileObject *f, PyObject *args) else { Py_BEGIN_ALLOW_THREADS errno = 0; - nread = fread(buffer+nfilled, 1, - buffersize-nfilled, f->f_fp); + nread = Py_UniversalNewlineFread(buffer+nfilled, + buffersize-nfilled, f->f_fp, (PyObject *)f); Py_END_ALLOW_THREADS shortread = (nread < buffersize-nfilled); } @@ -1188,7 +1266,8 @@ file_readlines(PyFileObject *f, PyObject *args) } else { /* Grow the big buffer */ - _PyString_Resize(&big_buffer, buffersize); + if ( _PyString_Resize(&big_buffer, buffersize) < 0 ) + goto error; buffer = PyString_AS_STRING(big_buffer); } continue; @@ -1503,9 +1582,40 @@ get_closed(PyFileObject *f, void *closure) { return PyBool_FromLong((long)(f->f_fp == 0)); } +#ifdef WITH_UNIVERSAL_NEWLINES +static PyObject * +get_newlines(PyFileObject *f, void *closure) +{ + switch (f->f_newlinetypes) { + case NEWLINE_UNKNOWN: + Py_INCREF(Py_None); + return Py_None; + case NEWLINE_CR: + return PyString_FromString("\r"); + case NEWLINE_LF: + return PyString_FromString("\n"); + case NEWLINE_CR|NEWLINE_LF: + return Py_BuildValue("(ss)", "\r", "\n"); + case NEWLINE_CRLF: + return PyString_FromString("\r\n"); + case NEWLINE_CR|NEWLINE_CRLF: + return Py_BuildValue("(ss)", "\r", "\r\n"); + case NEWLINE_LF|NEWLINE_CRLF: + return Py_BuildValue("(ss)", "\n", "\r\n"); + case NEWLINE_CR|NEWLINE_LF|NEWLINE_CRLF: + return Py_BuildValue("(sss)", "\r", "\n", "\r\n"); + default: + PyErr_Format(PyExc_SystemError, "Unknown newlines value 0x%x\n", f->f_newlinetypes); + return NULL; + } +} +#endif static PyGetSetDef file_getsetlist[] = { {"closed", (getter)get_closed, NULL, "True if the file is closed"}, +#ifdef WITH_UNIVERSAL_NEWLINES + {"newlines", (getter)get_newlines, NULL, "end-of-line convention used in this file"}, +#endif {0}, }; @@ -1805,3 +1915,170 @@ int PyObject_AsFileDescriptor(PyObject *o) } return fd; } + +#ifdef WITH_UNIVERSAL_NEWLINES +/* From here on we need access to the real fgets and fread */ +#undef fgets +#undef fread + +/* +** Py_UniversalNewlineFgets is an fgets variation that understands +** all of \r, \n and \r\n conventions. +** The stream should be opened in binary mode. +** If fobj is NULL the routine always does newline conversion, and +** it may peek one char ahead to gobble the second char in \r\n. +** If fobj is non-NULL it must be a PyFileObject. In this case there +** is no readahead but in stead a flag is used to skip a following +** \n on the next read. Also, if the file is open in binary mode +** the whole conversion is skipped. Finally, the routine keeps track of +** the different types of newlines seen. +** Note that we need no error handling: fgets() treats error and eof +** identically. +*/ +char * +Py_UniversalNewlineFgets(char *buf, int n, FILE *stream, PyObject *fobj) +{ + char *p = buf; + int c; + int newlinetypes = 0; + int skipnextlf = 0; + int univ_newline = 1; + + if (fobj) { + if (!PyFile_Check(fobj)) { + errno = ENXIO; /* What can you do... */ + return NULL; + } + univ_newline = ((PyFileObject *)fobj)->f_univ_newline; + if ( !univ_newline ) + return fgets(buf, n, stream); + newlinetypes = ((PyFileObject *)fobj)->f_newlinetypes; + skipnextlf = ((PyFileObject *)fobj)->f_skipnextlf; + } + FLOCKFILE(stream); + c = 'x'; /* Shut up gcc warning */ + while (--n > 0 && (c = GETC(stream)) != EOF ) { + if (skipnextlf ) { + skipnextlf = 0; + if (c == '\n') { + /* Seeing a \n here with skipnextlf true + ** means we saw a \r before. + */ + newlinetypes |= NEWLINE_CRLF; + c = GETC(stream); + if (c == EOF) break; + } else { + /* + ** Note that c == EOF also brings us here, + ** so we're okay if the last char in the file + ** is a CR. + */ + newlinetypes |= NEWLINE_CR; + } + } + if (c == '\r') { + /* A \r is translated into a \n, and we skip + ** an adjacent \n, if any. We don't set the + ** newlinetypes flag until we've seen the next char. + */ + skipnextlf = 1; + c = '\n'; + } else if ( c == '\n') { + newlinetypes |= NEWLINE_LF; + } + *p++ = c; + if (c == '\n') break; + } + if ( c == EOF && skipnextlf ) + newlinetypes |= NEWLINE_CR; + FUNLOCKFILE(stream); + *p = '\0'; + if (fobj) { + ((PyFileObject *)fobj)->f_newlinetypes = newlinetypes; + ((PyFileObject *)fobj)->f_skipnextlf = skipnextlf; + } else if ( skipnextlf ) { + /* If we have no file object we cannot save the + ** skipnextlf flag. We have to readahead, which + ** will cause a pause if we're reading from an + ** interactive stream, but that is very unlikely + ** unless we're doing something silly like + ** execfile("/dev/tty"). + */ + c = GETC(stream); + if ( c != '\n' ) + ungetc(c, stream); + } + if (p == buf) + return NULL; + return buf; +} + +/* +** Py_UniversalNewlineFread is an fread variation that understands +** all of \r, \n and \r\n conventions. +** The stream should be opened in binary mode. +** fobj must be a PyFileObject. In this case there +** is no readahead but in stead a flag is used to skip a following +** \n on the next read. Also, if the file is open in binary mode +** the whole conversion is skipped. Finally, the routine keeps track of +** the different types of newlines seen. +*/ +size_t +Py_UniversalNewlineFread(void *buf, size_t n, + FILE *stream, PyObject *fobj) +{ + char *src = buf, *dst = buf, c; + int nread, ntodo=n; + int newlinetypes, skipnextlf, univ_newline; + + if (!fobj || !PyFile_Check(fobj)) { + errno = ENXIO; /* What can you do... */ + return -1; + } + univ_newline = ((PyFileObject *)fobj)->f_univ_newline; + if ( !univ_newline ) + return fread(buf, 1, n, stream); + newlinetypes = ((PyFileObject *)fobj)->f_newlinetypes; + skipnextlf = ((PyFileObject *)fobj)->f_skipnextlf; + while (ntodo > 0) { + if (ferror(stream)) + break; + nread = fread(dst, 1, ntodo, stream); + src = dst; + if (nread <= 0) { + if (skipnextlf) + newlinetypes |= NEWLINE_CR; + break; + } + ntodo -= nread; + while ( nread-- ) { + c = *src++; + if (c == '\r') { + /* Save CR as LF and set flag to skip next newline + */ + *dst++ = '\n'; + skipnextlf = 1; + } else if (skipnextlf && c == '\n') { + /* Skip an LF, and remember that we saw CR LF + */ + skipnextlf = 0; + newlinetypes |= NEWLINE_CRLF; + } else { + /* Normal char to be stored in buffer. Also update + ** the newlinetypes flag if either this is an LF + ** or the previous char was a CR. + */ + if (c == '\n') + newlinetypes |= NEWLINE_LF; + else if (skipnextlf) + newlinetypes |= NEWLINE_CR; + *dst++ = c; + skipnextlf = 0; + } + } + } + ((PyFileObject *)fobj)->f_newlinetypes = newlinetypes; + ((PyFileObject *)fobj)->f_skipnextlf = skipnextlf; + return dst - (char *)buf; +} +#endif |