summaryrefslogtreecommitdiffstats
path: root/Objects/fileobject.c
diff options
context:
space:
mode:
Diffstat (limited to 'Objects/fileobject.c')
-rw-r--r--Objects/fileobject.c311
1 files changed, 294 insertions, 17 deletions
diff --git a/Objects/fileobject.c b/Objects/fileobject.c
index 54d040d..152ba1a 100644
--- a/Objects/fileobject.c
+++ b/Objects/fileobject.c
@@ -37,6 +37,23 @@
#include <errno.h>
#endif
+#ifdef HAVE_GETC_UNLOCKED
+#define GETC(f) getc_unlocked(f)
+#define FLOCKFILE(f) flockfile(f)
+#define FUNLOCKFILE(f) funlockfile(f)
+#else
+#define GETC(f) getc(f)
+#define FLOCKFILE(f)
+#define FUNLOCKFILE(f)
+#endif
+
+#ifdef WITH_UNIVERSAL_NEWLINES
+/* Bits in f_newlinetypes */
+#define NEWLINE_UNKNOWN 0 /* No newline seen, yet */
+#define NEWLINE_CR 1 /* \r newline seen */
+#define NEWLINE_LF 2 /* \n newline seen */
+#define NEWLINE_CRLF 4 /* \r\n newline seen */
+#endif
FILE *
PyFile_AsFile(PyObject *f)
@@ -99,6 +116,11 @@ fill_file_fields(PyFileObject *f, FILE *fp, char *name, char *mode,
f->f_close = close;
f->f_softspace = 0;
f->f_binary = strchr(mode,'b') != NULL;
+#ifdef WITH_UNIVERSAL_NEWLINES
+ f->f_univ_newline = (strchr(mode, 'U') != NULL);
+ f->f_newlinetypes = NEWLINE_UNKNOWN;
+ f->f_skipnextlf = 0;
+#endif
if (f->f_name == NULL || f->f_mode == NULL)
return NULL;
@@ -134,6 +156,17 @@ open_the_file(PyFileObject *f, char *name, char *mode)
#endif
{
Py_BEGIN_ALLOW_THREADS
+#ifdef WITH_UNIVERSAL_NEWLINES
+ if (strcmp(mode, "U") == 0 || strcmp(mode, "rU") == 0)
+ mode = "rb";
+#else
+ /* Compatibility: specifying U in a Python without universal
+ ** newlines is allowed, and the file is opened as a normal text
+ ** file.
+ */
+ if (strcmp(mode, "U") == 0 || strcmp(mode, "rU") == 0)
+ mode = "r";
+#endif
f->f_fp = fopen(name, mode);
Py_END_ALLOW_THREADS
}
@@ -394,6 +427,9 @@ file_seek(PyFileObject *f, PyObject *args)
clearerr(f->f_fp);
return NULL;
}
+#ifdef WITH_UNIVERSAL_NEWLINES
+ f->f_skipnextlf = 0;
+#endif
Py_INCREF(Py_None);
return Py_None;
}
@@ -534,6 +570,16 @@ file_tell(PyFileObject *f)
clearerr(f->f_fp);
return NULL;
}
+#ifdef WITH_UNIVERSAL_NEWLINES
+ if (f->f_skipnextlf) {
+ int c;
+ c = GETC(f->f_fp);
+ if (c == '\n') {
+ pos++;
+ f->f_skipnextlf = 0;
+ } else if (c != EOF) ungetc(c, f->f_fp);
+ }
+#endif
#if !defined(HAVE_LARGEFILE_SUPPORT)
return PyInt_FromLong(pos);
#else
@@ -665,8 +711,8 @@ file_read(PyFileObject *f, PyObject *args)
for (;;) {
Py_BEGIN_ALLOW_THREADS
errno = 0;
- chunksize = fread(BUF(v) + bytesread, 1,
- buffersize - bytesread, f->f_fp);
+ chunksize = Py_UniversalNewlineFread(BUF(v) + bytesread,
+ buffersize - bytesread, f->f_fp, (PyObject *)f);
Py_END_ALLOW_THREADS
if (chunksize == 0) {
if (!ferror(f->f_fp))
@@ -705,7 +751,7 @@ file_readinto(PyFileObject *f, PyObject *args)
while (ntodo > 0) {
Py_BEGIN_ALLOW_THREADS
errno = 0;
- nnow = fread(ptr+ndone, 1, ntodo, f->f_fp);
+ nnow = Py_UniversalNewlineFread(ptr+ndone, ntodo, f->f_fp, (PyObject *)f);
Py_END_ALLOW_THREADS
if (nnow == 0) {
if (!ferror(f->f_fp))
@@ -934,16 +980,6 @@ getline_via_fgets(FILE *fp)
<= 0: read arbitrary line
*/
-#ifdef HAVE_GETC_UNLOCKED
-#define GETC(f) getc_unlocked(f)
-#define FLOCKFILE(f) flockfile(f)
-#define FUNLOCKFILE(f) funlockfile(f)
-#else
-#define GETC(f) getc(f)
-#define FLOCKFILE(f)
-#define FUNLOCKFILE(f)
-#endif
-
static PyObject *
get_line(PyFileObject *f, int n)
{
@@ -954,9 +990,18 @@ get_line(PyFileObject *f, int n)
size_t used_v_size; /* # used slots in buffer */
size_t increment; /* amount to increment the buffer */
PyObject *v;
+#ifdef WITH_UNIVERSAL_NEWLINES
+ int newlinetypes = f->f_newlinetypes;
+ int skipnextlf = f->f_skipnextlf;
+ int univ_newline = f->f_univ_newline;
+#endif
-#ifdef USE_FGETS_IN_GETLINE
+#if defined(USE_FGETS_IN_GETLINE)
+#ifdef WITH_UNIVERSAL_NEWLINES
+ if (n <= 0 && !univ_newline )
+#else
if (n <= 0)
+#endif
return getline_via_fgets(fp);
#endif
total_v_size = n > 0 ? n : 100;
@@ -969,12 +1014,45 @@ get_line(PyFileObject *f, int n)
for (;;) {
Py_BEGIN_ALLOW_THREADS
FLOCKFILE(fp);
+#ifdef WITH_UNIVERSAL_NEWLINES
+ if (univ_newline) {
+ c = 'x'; /* Shut up gcc warning */
+ while ( buf != end && (c = GETC(fp)) != EOF ) {
+ if (skipnextlf ) {
+ skipnextlf = 0;
+ if (c == '\n') {
+ /* Seeing a \n here with skipnextlf true
+ ** means we saw a \r before.
+ */
+ newlinetypes |= NEWLINE_CRLF;
+ c = GETC(fp);
+ if (c == EOF) break;
+ } else {
+ newlinetypes |= NEWLINE_CR;
+ }
+ }
+ if (c == '\r') {
+ skipnextlf = 1;
+ c = '\n';
+ } else if ( c == '\n')
+ newlinetypes |= NEWLINE_LF;
+ *buf++ = c;
+ if (c == '\n') break;
+ }
+ if ( c == EOF && skipnextlf )
+ newlinetypes |= NEWLINE_CR;
+ } else /* If not universal newlines use the normal loop */
+#endif
while ((c = GETC(fp)) != EOF &&
(*buf++ = c) != '\n' &&
buf != end)
;
FUNLOCKFILE(fp);
Py_END_ALLOW_THREADS
+#ifdef WITH_UNIVERSAL_NEWLINES
+ f->f_newlinetypes = newlinetypes;
+ f->f_skipnextlf = skipnextlf;
+#endif
if (c == '\n')
break;
if (c == EOF) {
@@ -1150,8 +1228,8 @@ file_readlines(PyFileObject *f, PyObject *args)
else {
Py_BEGIN_ALLOW_THREADS
errno = 0;
- nread = fread(buffer+nfilled, 1,
- buffersize-nfilled, f->f_fp);
+ nread = Py_UniversalNewlineFread(buffer+nfilled,
+ buffersize-nfilled, f->f_fp, (PyObject *)f);
Py_END_ALLOW_THREADS
shortread = (nread < buffersize-nfilled);
}
@@ -1188,7 +1266,8 @@ file_readlines(PyFileObject *f, PyObject *args)
}
else {
/* Grow the big buffer */
- _PyString_Resize(&big_buffer, buffersize);
+ if ( _PyString_Resize(&big_buffer, buffersize) < 0 )
+ goto error;
buffer = PyString_AS_STRING(big_buffer);
}
continue;
@@ -1503,9 +1582,40 @@ get_closed(PyFileObject *f, void *closure)
{
return PyBool_FromLong((long)(f->f_fp == 0));
}
+#ifdef WITH_UNIVERSAL_NEWLINES
+static PyObject *
+get_newlines(PyFileObject *f, void *closure)
+{
+ switch (f->f_newlinetypes) {
+ case NEWLINE_UNKNOWN:
+ Py_INCREF(Py_None);
+ return Py_None;
+ case NEWLINE_CR:
+ return PyString_FromString("\r");
+ case NEWLINE_LF:
+ return PyString_FromString("\n");
+ case NEWLINE_CR|NEWLINE_LF:
+ return Py_BuildValue("(ss)", "\r", "\n");
+ case NEWLINE_CRLF:
+ return PyString_FromString("\r\n");
+ case NEWLINE_CR|NEWLINE_CRLF:
+ return Py_BuildValue("(ss)", "\r", "\r\n");
+ case NEWLINE_LF|NEWLINE_CRLF:
+ return Py_BuildValue("(ss)", "\n", "\r\n");
+ case NEWLINE_CR|NEWLINE_LF|NEWLINE_CRLF:
+ return Py_BuildValue("(sss)", "\r", "\n", "\r\n");
+ default:
+ PyErr_Format(PyExc_SystemError, "Unknown newlines value 0x%x\n", f->f_newlinetypes);
+ return NULL;
+ }
+}
+#endif
static PyGetSetDef file_getsetlist[] = {
{"closed", (getter)get_closed, NULL, "True if the file is closed"},
+#ifdef WITH_UNIVERSAL_NEWLINES
+ {"newlines", (getter)get_newlines, NULL, "end-of-line convention used in this file"},
+#endif
{0},
};
@@ -1805,3 +1915,170 @@ int PyObject_AsFileDescriptor(PyObject *o)
}
return fd;
}
+
+#ifdef WITH_UNIVERSAL_NEWLINES
+/* From here on we need access to the real fgets and fread */
+#undef fgets
+#undef fread
+
+/*
+** Py_UniversalNewlineFgets is an fgets variation that understands
+** all of \r, \n and \r\n conventions.
+** The stream should be opened in binary mode.
+** If fobj is NULL the routine always does newline conversion, and
+** it may peek one char ahead to gobble the second char in \r\n.
+** If fobj is non-NULL it must be a PyFileObject. In this case there
+** is no readahead but in stead a flag is used to skip a following
+** \n on the next read. Also, if the file is open in binary mode
+** the whole conversion is skipped. Finally, the routine keeps track of
+** the different types of newlines seen.
+** Note that we need no error handling: fgets() treats error and eof
+** identically.
+*/
+char *
+Py_UniversalNewlineFgets(char *buf, int n, FILE *stream, PyObject *fobj)
+{
+ char *p = buf;
+ int c;
+ int newlinetypes = 0;
+ int skipnextlf = 0;
+ int univ_newline = 1;
+
+ if (fobj) {
+ if (!PyFile_Check(fobj)) {
+ errno = ENXIO; /* What can you do... */
+ return NULL;
+ }
+ univ_newline = ((PyFileObject *)fobj)->f_univ_newline;
+ if ( !univ_newline )
+ return fgets(buf, n, stream);
+ newlinetypes = ((PyFileObject *)fobj)->f_newlinetypes;
+ skipnextlf = ((PyFileObject *)fobj)->f_skipnextlf;
+ }
+ FLOCKFILE(stream);
+ c = 'x'; /* Shut up gcc warning */
+ while (--n > 0 && (c = GETC(stream)) != EOF ) {
+ if (skipnextlf ) {
+ skipnextlf = 0;
+ if (c == '\n') {
+ /* Seeing a \n here with skipnextlf true
+ ** means we saw a \r before.
+ */
+ newlinetypes |= NEWLINE_CRLF;
+ c = GETC(stream);
+ if (c == EOF) break;
+ } else {
+ /*
+ ** Note that c == EOF also brings us here,
+ ** so we're okay if the last char in the file
+ ** is a CR.
+ */
+ newlinetypes |= NEWLINE_CR;
+ }
+ }
+ if (c == '\r') {
+ /* A \r is translated into a \n, and we skip
+ ** an adjacent \n, if any. We don't set the
+ ** newlinetypes flag until we've seen the next char.
+ */
+ skipnextlf = 1;
+ c = '\n';
+ } else if ( c == '\n') {
+ newlinetypes |= NEWLINE_LF;
+ }
+ *p++ = c;
+ if (c == '\n') break;
+ }
+ if ( c == EOF && skipnextlf )
+ newlinetypes |= NEWLINE_CR;
+ FUNLOCKFILE(stream);
+ *p = '\0';
+ if (fobj) {
+ ((PyFileObject *)fobj)->f_newlinetypes = newlinetypes;
+ ((PyFileObject *)fobj)->f_skipnextlf = skipnextlf;
+ } else if ( skipnextlf ) {
+ /* If we have no file object we cannot save the
+ ** skipnextlf flag. We have to readahead, which
+ ** will cause a pause if we're reading from an
+ ** interactive stream, but that is very unlikely
+ ** unless we're doing something silly like
+ ** execfile("/dev/tty").
+ */
+ c = GETC(stream);
+ if ( c != '\n' )
+ ungetc(c, stream);
+ }
+ if (p == buf)
+ return NULL;
+ return buf;
+}
+
+/*
+** Py_UniversalNewlineFread is an fread variation that understands
+** all of \r, \n and \r\n conventions.
+** The stream should be opened in binary mode.
+** fobj must be a PyFileObject. In this case there
+** is no readahead but in stead a flag is used to skip a following
+** \n on the next read. Also, if the file is open in binary mode
+** the whole conversion is skipped. Finally, the routine keeps track of
+** the different types of newlines seen.
+*/
+size_t
+Py_UniversalNewlineFread(void *buf, size_t n,
+ FILE *stream, PyObject *fobj)
+{
+ char *src = buf, *dst = buf, c;
+ int nread, ntodo=n;
+ int newlinetypes, skipnextlf, univ_newline;
+
+ if (!fobj || !PyFile_Check(fobj)) {
+ errno = ENXIO; /* What can you do... */
+ return -1;
+ }
+ univ_newline = ((PyFileObject *)fobj)->f_univ_newline;
+ if ( !univ_newline )
+ return fread(buf, 1, n, stream);
+ newlinetypes = ((PyFileObject *)fobj)->f_newlinetypes;
+ skipnextlf = ((PyFileObject *)fobj)->f_skipnextlf;
+ while (ntodo > 0) {
+ if (ferror(stream))
+ break;
+ nread = fread(dst, 1, ntodo, stream);
+ src = dst;
+ if (nread <= 0) {
+ if (skipnextlf)
+ newlinetypes |= NEWLINE_CR;
+ break;
+ }
+ ntodo -= nread;
+ while ( nread-- ) {
+ c = *src++;
+ if (c == '\r') {
+ /* Save CR as LF and set flag to skip next newline
+ */
+ *dst++ = '\n';
+ skipnextlf = 1;
+ } else if (skipnextlf && c == '\n') {
+ /* Skip an LF, and remember that we saw CR LF
+ */
+ skipnextlf = 0;
+ newlinetypes |= NEWLINE_CRLF;
+ } else {
+ /* Normal char to be stored in buffer. Also update
+ ** the newlinetypes flag if either this is an LF
+ ** or the previous char was a CR.
+ */
+ if (c == '\n')
+ newlinetypes |= NEWLINE_LF;
+ else if (skipnextlf)
+ newlinetypes |= NEWLINE_CR;
+ *dst++ = c;
+ skipnextlf = 0;
+ }
+ }
+ }
+ ((PyFileObject *)fobj)->f_newlinetypes = newlinetypes;
+ ((PyFileObject *)fobj)->f_skipnextlf = skipnextlf;
+ return dst - (char *)buf;
+}
+#endif