summaryrefslogtreecommitdiffstats
path: root/Lib/codecs.py
diff options
context:
space:
mode:
authorMartin v. Löwis <martin@v.loewis.de>2005-09-18 08:34:39 (GMT)
committerMartin v. Löwis <martin@v.loewis.de>2005-09-18 08:34:39 (GMT)
commit4ed673877dd91172775e0e549bd205efe2c25567 (patch)
tree78feca4f3a8657755cef2b78998710106b8a1c1e /Lib/codecs.py
parent8b291e2d66390b8511b9e9d861e34714d40d5f5b (diff)
downloadcpython-4ed673877dd91172775e0e549bd205efe2c25567.zip
cpython-4ed673877dd91172775e0e549bd205efe2c25567.tar.gz
cpython-4ed673877dd91172775e0e549bd205efe2c25567.tar.bz2
Patch #1268314: Cache lines in StreamReader.readlines for performance.
Will backport to Python 2.4.
Diffstat (limited to 'Lib/codecs.py')
-rw-r--r--Lib/codecs.py37
1 files changed, 37 insertions, 0 deletions
diff --git a/Lib/codecs.py b/Lib/codecs.py
index d972a51..9d29acc 100644
--- a/Lib/codecs.py
+++ b/Lib/codecs.py
@@ -232,6 +232,7 @@ class StreamReader(Codec):
# For str->str decoding this will stay a str
# For str->unicode decoding the first read will promote it to unicode
self.charbuffer = ""
+ self.linebuffer = None
def decode(self, input, errors='strict'):
raise NotImplementedError
@@ -264,6 +265,11 @@ class StreamReader(Codec):
optional encoding endings or state markers are available
on the stream, these should be read too.
"""
+ # If we have lines cached, first merge them back into characters
+ if self.linebuffer:
+ self.charbuffer = "".join(self.linebuffer)
+ self.linebuffer = None
+
# read until we get the required number of characters (if available)
while True:
# can the request can be satisfied from the character buffer?
@@ -316,6 +322,20 @@ class StreamReader(Codec):
read() method.
"""
+ # If we have lines cached from an earlier read, return
+ # them unconditionally
+ if self.linebuffer:
+ line = self.linebuffer[0]
+ del self.linebuffer[0]
+ if len(self.linebuffer) == 1:
+ # revert to charbuffer mode; we might need more data
+ # next time
+ self.charbuffer = self.linebuffer[0]
+ self.linebuffer = None
+ if not keepends:
+ line = line.splitlines(False)[0]
+ return line
+
readsize = size or 72
line = ""
# If size is given, we call read() only once
@@ -331,6 +351,22 @@ class StreamReader(Codec):
line += data
lines = line.splitlines(True)
if lines:
+ if len(lines) > 1:
+ # More than one line result; the first line is a full line
+ # to return
+ line = lines[0]
+ del lines[0]
+ if len(lines) > 1:
+ # cache the remaining lines
+ lines[-1] += self.charbuffer
+ self.linebuffer = lines
+ self.charbuffer = None
+ else:
+ # only one remaining line, put it back into charbuffer
+ self.charbuffer = lines[0] + self.charbuffer
+ if not keepends:
+ line = line.splitlines(False)[0]
+ break
line0withend = lines[0]
line0withoutend = lines[0].splitlines(False)[0]
if line0withend != line0withoutend: # We really have a line end
@@ -376,6 +412,7 @@ class StreamReader(Codec):
"""
self.bytebuffer = ""
self.charbuffer = u""
+ self.linebuffer = None
def seek(self, offset, whence=0):
""" Set the input stream's current position.