From bc8e642c1bfbef3b94f5c31dd8fdd824549039b0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Walter=20D=C3=B6rwald?= <walter@livinglogic.de>
Date: Thu, 21 Apr 2005 21:32:03 +0000
Subject: If the data read from the bytestream in readline() ends in a '\r'
 read one more byte, even if the user has passed a size parameter. This extra
 byte shouldn't cause a buffer overflow in the tokenizer. The original plan
 was to return a line ending in '\r', which might be recognizable as a
 complete line and skip any '\n' that was read afterwards. Unfortunately this
 didn't work, as the tokenizer only recognizes '\n' as line ends, which in
 turn lead to joined lines and SyntaxErrors, so this special treatment of a
 split '\r\n' has been dropped. (It can only happen with a temporarily
 exhausted bytestream now anyway.) Fixes parts of SF bugs #1163244 and
 #1175396.

---
 Lib/codecs.py | 16 ++++------------
 Misc/NEWS     |  6 ++++++
 2 files changed, 10 insertions(+), 12 deletions(-)

diff --git a/Lib/codecs.py b/Lib/codecs.py
index 58bba73..eb52ea2 100644
--- a/Lib/codecs.py
+++ b/Lib/codecs.py
@@ -230,7 +230,6 @@ class StreamReader(Codec):
         self.errors = errors
         self.bytebuffer = ""
         self.charbuffer = u""
-        self.atcr = False
 
     def decode(self, input, errors='strict'):
         raise NotImplementedError
@@ -306,18 +305,12 @@ class StreamReader(Codec):
         # If size is given, we call read() only once
         while True:
             data = self.read(readsize)
-            if self.atcr and data.startswith(u"\n"):
-                data = data[1:]
             if data:
-                self.atcr = data.endswith(u"\r")
-                # If we're at a "\r" (and are allowed to read more), read one
-                # extra character (which might be a "\n") to get a proper
-                # line ending. (If the stream is temporarily exhausted we return
-                # the wrong line ending, but at least we won't generate a bogus
-                # second line.)
-                if self.atcr and size is None:
+                # If we're at a "\r" read one # extra character # (which might
+                # be a "\n") to get a proper # line ending. If the stream is
+                # temporarily exhausted we return the wrong line ending.
+                if data.endswith(u"\r"):
                     data += self.read(size=1, chars=1)
-                    self.atcr = data.endswith(u"\r")
 
             line += data
             lines = line.splitlines(True)
@@ -367,7 +360,6 @@ class StreamReader(Codec):
         """
         self.bytebuffer = ""
         self.charbuffer = u""
-        self.atcr = False
 
     def seek(self, offset, whence=0):
         """ Set the input stream's current position.
diff --git a/Misc/NEWS b/Misc/NEWS
index 01a1c73..23f760b 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -266,6 +266,12 @@ Library
 - Bug #1149508: ``textwrap`` now handles hyphenated numbers (eg. "2004-03-05")
   correctly.
 
+- Partial fixes for SF bugs #1163244 and #1175396: If a chunk read by
+  ``codecs.StreamReader.readline()`` has a trailing "\r", read one more
+  character even if the user has passed a size parameter to get a proper
+  line ending. Remove the special handling of a "\r\n" that has been split
+  between two lines.
+
 
 Build
 -----
-- 
cgit v0.12