summaryrefslogtreecommitdiffstats
path: root/Lib/codecs.py
diff options
context:
space:
mode:
Diffstat (limited to 'Lib/codecs.py')
-rw-r--r--Lib/codecs.py71
1 files changed, 41 insertions, 30 deletions
diff --git a/Lib/codecs.py b/Lib/codecs.py
index ab12237..f6d480c 100644
--- a/Lib/codecs.py
+++ b/Lib/codecs.py
@@ -230,6 +230,7 @@ class StreamReader(Codec):
self.errors = errors
self.bytebuffer = ""
self.charbuffer = u""
+ self.atcr = False
def decode(self, input, errors='strict'):
raise NotImplementedError
@@ -256,41 +257,39 @@ class StreamReader(Codec):
definition of the encoding and the given size, e.g. if
optional encoding endings or state markers are available
on the stream, these should be read too.
-
"""
# read until we get the required number of characters (if available)
- done = False
while True:
# can the request can be satisfied from the character buffer?
if chars < 0:
if self.charbuffer:
- done = True
+ break
else:
if len(self.charbuffer) >= chars:
- done = True
- if done:
- if chars < 0:
- result = self.charbuffer
- self.charbuffer = u""
- break
- else:
- result = self.charbuffer[:chars]
- self.charbuffer = self.charbuffer[chars:]
break
# we need more data
if size < 0:
newdata = self.stream.read()
else:
newdata = self.stream.read(size)
+ # decode bytes (those remaining from the last call included)
data = self.bytebuffer + newdata
- object, decodedbytes = self.decode(data, self.errors)
+ newchars, decodedbytes = self.decode(data, self.errors)
# keep undecoded bytes until the next call
self.bytebuffer = data[decodedbytes:]
# put new characters in the character buffer
- self.charbuffer += object
+ self.charbuffer += newchars
# there was no data available
if not newdata:
- done = True
+ break
+ if chars < 0:
+ # Return everything we've got
+ result = self.charbuffer
+ self.charbuffer = u""
+ else:
+ # Return the first chars characters
+ result = self.charbuffer[:chars]
+ self.charbuffer = self.charbuffer[chars:]
return result
def readline(self, size=None, keepends=True):
@@ -302,24 +301,36 @@ class StreamReader(Codec):
read() method.
"""
- if size is None:
- size = 10
+ readsize = size or 72
line = u""
+ # If size is given, we call read() only once
while True:
- data = self.read(size)
+ data = self.read(readsize)
+ if self.atcr and data.startswith(u"\n"):
+ data = data[1:]
+ if data:
+ self.atcr = data.endswith(u"\r")
line += data
- pos = line.find("\n")
- if pos>=0:
- self.charbuffer = line[pos+1:] + self.charbuffer
- if keepends:
- line = line[:pos+1]
- else:
- line = line[:pos]
- return line
- elif not data:
- return line
- if size<8000:
- size *= 2
+ lines = line.splitlines(True)
+ if lines:
+ line0withend = lines[0]
+ line0withoutend = lines[0].splitlines(False)[0]
+ if line0withend != line0withoutend: # We really have a line end
+ # Put the rest back together and keep it until the next call
+ self.charbuffer = u"".join(lines[1:]) + self.charbuffer
+ if keepends:
+ line = line0withend
+ else:
+ line = line0withoutend
+ break
+ # we didn't get anything or this was our only try
+ elif not data or size is not None:
+ if line and not keepends:
+ line = line.splitlines(False)[0]
+ break
+ if readsize<8000:
+ readsize *= 2
+ return line
def readlines(self, sizehint=None, keepends=True):