diff options
| author | Martin v. Löwis <martin@v.loewis.de> | 2005-09-18 08:34:39 (GMT) | 
|---|---|---|
| committer | Martin v. Löwis <martin@v.loewis.de> | 2005-09-18 08:34:39 (GMT) | 
| commit | 4ed673877dd91172775e0e549bd205efe2c25567 (patch) | |
| tree | 78feca4f3a8657755cef2b78998710106b8a1c1e /Lib/codecs.py | |
| parent | 8b291e2d66390b8511b9e9d861e34714d40d5f5b (diff) | |
| download | cpython-4ed673877dd91172775e0e549bd205efe2c25567.zip cpython-4ed673877dd91172775e0e549bd205efe2c25567.tar.gz cpython-4ed673877dd91172775e0e549bd205efe2c25567.tar.bz2  | |
Patch #1268314: Cache lines in StreamReader.readlines for performance.
Will backport to Python 2.4.
Diffstat (limited to 'Lib/codecs.py')
| -rw-r--r-- | Lib/codecs.py | 37 | 
1 files changed, 37 insertions, 0 deletions
diff --git a/Lib/codecs.py b/Lib/codecs.py index d972a51..9d29acc 100644 --- a/Lib/codecs.py +++ b/Lib/codecs.py @@ -232,6 +232,7 @@ class StreamReader(Codec):          # For str->str decoding this will stay a str          # For str->unicode decoding the first read will promote it to unicode          self.charbuffer = "" +        self.linebuffer = None      def decode(self, input, errors='strict'):          raise NotImplementedError @@ -264,6 +265,11 @@ class StreamReader(Codec):              optional encoding endings or state markers are available              on the stream, these should be read too.          """ +        # If we have lines cached, first merge them back into characters +        if self.linebuffer: +            self.charbuffer = "".join(self.linebuffer) +            self.linebuffer = None +                      # read until we get the required number of characters (if available)          while True:              # can the request can be satisfied from the character buffer? @@ -316,6 +322,20 @@ class StreamReader(Codec):              read() method.          """ +        # If we have lines cached from an earlier read, return +        # them unconditionally +        if self.linebuffer: +            line = self.linebuffer[0] +            del self.linebuffer[0] +            if len(self.linebuffer) == 1: +                # revert to charbuffer mode; we might need more data +                # next time +                self.charbuffer = self.linebuffer[0] +                self.linebuffer = None +            if not keepends: +                line = line.splitlines(False)[0] +            return line +                      readsize = size or 72          line = ""          # If size is given, we call read() only once @@ -331,6 +351,22 @@ class StreamReader(Codec):              line += data              lines = line.splitlines(True)              if lines: +                if len(lines) > 1: +                    # More than one line result; the first line is a full line +                    # to return +                    line = lines[0] +                    del lines[0] +                    if len(lines) > 1: +                        # cache the remaining lines +                        lines[-1] += self.charbuffer +                        self.linebuffer = lines +                        self.charbuffer = None +                    else: +                        # only one remaining line, put it back into charbuffer +                        self.charbuffer = lines[0] + self.charbuffer +                    if not keepends: +                        line = line.splitlines(False)[0] +                    break                  line0withend = lines[0]                  line0withoutend = lines[0].splitlines(False)[0]                  if line0withend != line0withoutend: # We really have a line end @@ -376,6 +412,7 @@ class StreamReader(Codec):          """          self.bytebuffer = ""          self.charbuffer = u"" +        self.linebuffer = None      def seek(self, offset, whence=0):          """ Set the input stream's current position.  | 
