New I/O code from Tony Lownds implement newline feature correctly,

and implements .newlines attribute in a 2.x-compatible fashion.
author: Guido van Rossum <guido@python.org> 2007-08-18 21:39:55 (GMT)
committer: Guido van Rossum <guido@python.org> 2007-08-18 21:39:55 (GMT)
commit: 8358db22faa3d8fa9ac0ef2f2c1ff1770a843996 (patch)
tree: d3be700f72499f59816b5472ff3a03ae0150ac46 /Lib/io.py
parent: b99f762f10edb2646a634c2290ecb064bd52e5c7 (diff)
download: cpython-8358db22faa3d8fa9ac0ef2f2c1ff1770a843996.zip
cpython-8358db22faa3d8fa9ac0ef2f2c1ff1770a843996.tar.gz
cpython-8358db22faa3d8fa9ac0ef2f2c1ff1770a843996.tar.bz2
1 files changed, 160 insertions, 50 deletions
diff --git a/Lib/io.py b/Lib/io.py
index 2f2d3c5..40ea687 100644
--- a/Lib/io.py
+++ b/Lib/io.py
@@ -61,10 +61,26 @@ def open(file, mode="r", buffering=None, encoding=None, newline=None):
                  can be: 0 = unbuffered, 1 = line buffered,
                  larger = fully buffered.
       encoding: optional string giving the text encoding.
-      newline: optional newlines specifier; must be None, '\n' or '\r\n';
-               specifies the line ending expected on input and written on
-               output.  If None, use universal newlines on input and
-               use os.linesep on output.
+      newline: optional newlines specifier; must be None, '', '\n', '\r'
+               or '\r\n'; all other values are illegal.  It controls the
+               handling of line endings.  It works as follows:
+
+        * On input, if `newline` is `None`, universal newlines
+          mode is enabled.  Lines in the input can end in `'\n'`,
+          `'\r'`, or `'\r\n'`, and these are translated into
+          `'\n'` before being returned to the caller.  If it is
+          `''`, universal newline mode is enabled, but line endings
+          are returned to the caller untranslated.  If it has any of
+          the other legal values, input lines are only terminated by
+          the given string, and the line ending is returned to the
+          caller untranslated.
+
+        * On output, if `newline` is `None`, any `'\n'`
+          characters written are translated to the system default
+          line separator, `os.linesep`.  If `newline` is `''`,
+          no translation takes place.  If `newline` is any of the
+          other legal values, any `'\n'` characters written are
+          translated to the given string.
 
     (*) If a file descriptor is given, it is closed when the returned
     I/O object is closed.  If you don't want this to happen, use
@@ -958,6 +974,17 @@ class TextIOBase(IOBase):
         """Subclasses should override."""
         return None
 
+    @property
+    def newlines(self):
+        """newlines -> None | str | tuple of str. Line endings translated
+        so far.
+
+        Only line endings translated during reading are considered.
+
+        Subclasses should override.
+        """
+        return None
+
 
 class TextIOWrapper(TextIOBase):
 
@@ -969,7 +996,7 @@ class TextIOWrapper(TextIOBase):
     _CHUNK_SIZE = 128
 
     def __init__(self, buffer, encoding=None, newline=None):
-        if newline not in (None, "\n", "\r\n"):
+        if newline not in (None, "", "\n", "\r", "\r\n"):
             raise ValueError("illegal newline value: %r" % (newline,))
         if encoding is None:
             try:
@@ -987,8 +1014,12 @@ class TextIOWrapper(TextIOBase):
 
         self.buffer = buffer
         self._encoding = encoding
-        self._newline = newline or os.linesep
-        self._fix_newlines = newline is None
+        self._readuniversal = not newline
+        self._readtranslate = newline is None
+        self._readnl = newline
+        self._writetranslate = newline != ''
+        self._writenl = newline or os.linesep
+        self._seennl = 0
         self._decoder = None
         self._pending = ""
         self._snapshot = None
@@ -1032,13 +1063,15 @@ class TextIOWrapper(TextIOBase):
     def write(self, s: str):
         if self.closed:
             raise ValueError("write to closed file")
+        haslf = "\n" in s
+        if haslf and self._writetranslate and self._writenl != "\n":
+            s = s.replace("\n", self._writenl)
         # XXX What if we were just reading?
         b = s.encode(self._encoding)
         if isinstance(b, str):
             b = bytes(b)
-        n = self.buffer.write(b)
-        if "\n" in s:
-            # XXX only if isatty
+        self.buffer.write(b)
+        if haslf and self.isatty():
             self.flush()
         self._snapshot = self._decoder = None
         return len(s)
@@ -1159,7 +1192,7 @@ class TextIOWrapper(TextIOBase):
             res += decoder.decode(self.buffer.read(), True)
             self._pending = ""
             self._snapshot = None
-            return res.replace("\r\n", "\n")
+            return self._replacenl(res)
         else:
             while len(res) < n:
                 readahead, pending = self._read_chunk()
@@ -1167,7 +1200,7 @@ class TextIOWrapper(TextIOBase):
                 if not readahead:
                     break
             self._pending = res[n:]
-            return res[:n].replace("\r\n", "\n")
+            return self._replacenl(res[:n])
 
     def __next__(self):
         self._telling = False
@@ -1189,59 +1222,136 @@ class TextIOWrapper(TextIOBase):
 
         line = self._pending
         start = 0
+        cr_eof = False
         decoder = self._decoder or self._get_decoder()
 
+        pos = endpos = None
+        ending = None
         while True:
-            # In C we'd look for these in parallel of course.
-            nlpos = line.find("\n", start)
-            crpos = line.find("\r", start)
-            if nlpos >= 0 and crpos >= 0:
-                endpos = min(nlpos, crpos)
-            else:
-                endpos = nlpos if nlpos >= 0 else crpos
-
-            if endpos != -1:
-                endc = line[endpos]
-                if endc == "\n":
-                    ending = "\n"
-                    break
-
-                # We've seen \r - is it standalone, \r\n or \r at end of line?
-                if endpos + 1 < len(line):
-                    if line[endpos+1] == "\n":
-                        ending = "\r\n"
+            if self._readuniversal:
+                # Universal newline search. Find any of \r, \r\n, \n
+
+                # In C we'd look for these in parallel of course.
+                nlpos = line.find("\n", start)
+                crpos = line.find("\r", start)
+                if crpos == -1:
+                    if nlpos == -1:
+                        start = len(line)
                     else:
-                        ending = "\r"
+                        # Found \n
+                        pos = nlpos
+                        endpos = pos + 1
+                        ending = self._LF
+                        break
+                elif nlpos == -1:
+                    if crpos == len(line) - 1:
+                        # Found \r at end of buffer, must keep reading
+                        start = crpos
+                        cr_eof = True
+                    else:
+                        # Found lone \r
+                        ending = self._CR
+                        pos = crpos
+                        endpos = pos + 1
+                        break
+                elif nlpos < crpos:
+                    # Found \n
+                    pos = nlpos
+                    endpos = pos + 1
+                    ending = self._LF
+                    break
+                elif nlpos == crpos + 1:
+                    # Found \r\n
+                    ending = self._CRLF
+                    pos = crpos
+                    endpos = pos + 2
+                    break
+                else:
+                    # Found \r
+                    pos = crpos
+                    endpos = pos + 1
+                    ending = self._CR
                     break
-                # There might be a following \n in the next block of data ...
-                start = endpos
             else:
-                start = len(line)
+                # non-universal
+                pos = line.find(self._readnl)
+                if pos >= 0:
+                    endpos = pos+len(self._readnl)
+                    ending = self._nlflag(self._readnl)
+                    break
 
             # No line ending seen yet - get more data
+            more_line = ''
             while True:
                 readahead, pending = self._read_chunk()
                 more_line = pending
                 if more_line or not readahead:
                     break
+            if more_line:
+                line += more_line
+            else:
+                # end of file
+                self._pending = ''
+                self._snapshot = None
+                if cr_eof:
+                    self._seennl |= self._CR
+                    return line[:-1] + '\n'
+                else:
+                    return line
 
-            if not more_line:
-                ending = ""
-                endpos = len(line)
-                break
-
-            line += more_line
-
-        nextpos = endpos + len(ending)
-        self._pending = line[nextpos:]
-
-        # XXX Update self.newlines here if we want to support that
-
-        if self._fix_newlines and ending not in ("\n", ""):
-            return line[:endpos] + "\n"
+        self._pending = line[endpos:]
+        if self._readtranslate:
+            self._seennl |= ending
+            if ending != self._LF:
+                return line[:pos] + '\n'
+            else:
+                return line[:endpos]
         else:
-            return line[:nextpos]
-
+            return line[:endpos]
+
+    def _replacenl(self, data):
+        # Replace newlines in data as needed and record that they have
+        # been seen.
+        if not self._readtranslate:
+            return data
+        if self._readuniversal:
+            crlf = data.count('\r\n')
+            cr = data.count('\r') - crlf
+            lf = data.count('\n') - crlf
+            self._seennl |= (lf and self._LF) | (cr and self._CR) \
+                         | (crlf and self._CRLF)
+            if crlf:
+                data = data.replace("\r\n", "\n")
+            if cr:
+                data = data.replace("\r", "\n")
+        elif self._readnl == '\n':
+            # Only need to detect if \n was seen.
+            if data.count('\n'):
+                self._seennl |= self._LF
+        else:
+            newdata = data.replace(self._readnl, '\n')
+            if newdata is not data:
+                self._seennl |= self._nlflag(self._readnl)
+            data = newdata
+        return data
+
+    _LF = 1
+    _CR = 2
+    _CRLF = 4
+    @property
+    def newlines(self):
+        return (None,
+                "\n",
+                "\r",
+                ("\r", "\n"),
+                "\r\n",
+                ("\n", "\r\n"),
+                ("\r", "\r\n"),
+                ("\r", "\n", "\r\n")
+               )[self._seennl]
+
+    def _nlflag(self, nlstr):
+        return [None, "\n", "\r", None, "\r\n"].index(nlstr)
 
 class StringIO(TextIOWrapper):
author	Guido van Rossum <guido@python.org>	2007-08-18 21:39:55 (GMT)
committer	Guido van Rossum <guido@python.org>	2007-08-18 21:39:55 (GMT)
commit	8358db22faa3d8fa9ac0ef2f2c1ff1770a843996 (patch)
tree	d3be700f72499f59816b5472ff3a03ae0150ac46 /Lib/io.py
parent	b99f762f10edb2646a634c2290ecb064bd52e5c7 (diff)
download	cpython-8358db22faa3d8fa9ac0ef2f2c1ff1770a843996.zip cpython-8358db22faa3d8fa9ac0ef2f2c1ff1770a843996.tar.gz cpython-8358db22faa3d8fa9ac0ef2f2c1ff1770a843996.tar.bz2