diff options
author | Bob Ippolito <bob@redivi.com> | 2006-05-22 15:22:46 (GMT) |
---|---|---|
committer | Bob Ippolito <bob@redivi.com> | 2006-05-22 15:22:46 (GMT) |
commit | b97597316b1176e62d538c812b9e468ff3372b6b (patch) | |
tree | aaeac4c49960ecbc0e0f0ab0a60df0331c387484 /Lib/gzip.py | |
parent | d72aab5e31f831edb2b8e837e2ab387f2db07aee (diff) | |
download | cpython-b97597316b1176e62d538c812b9e468ff3372b6b.zip cpython-b97597316b1176e62d538c812b9e468ff3372b6b.tar.gz cpython-b97597316b1176e62d538c812b9e468ff3372b6b.tar.bz2 |
Revert gzip readline performance patch #1281707 until a more generic performance improvement can be found
Diffstat (limited to 'Lib/gzip.py')
-rw-r--r-- | Lib/gzip.py | 48 |
1 files changed, 20 insertions, 28 deletions
diff --git a/Lib/gzip.py b/Lib/gzip.py index 8c7870e..3c1ebf2 100644 --- a/Lib/gzip.py +++ b/Lib/gzip.py @@ -107,7 +107,6 @@ class GzipFile: self.extrabuf = "" self.extrasize = 0 self.filename = filename - self.min_readsize = 64 # Starts small, scales exponentially elif mode[0:1] == 'w' or mode[0:1] == 'a': self.mode = WRITE @@ -382,39 +381,32 @@ class GzipFile: self.read(count % 1024) def readline(self, size=-1): - if size < 0: - size = sys.maxint # Line can be as long as maxint - readsize = self.min_readsize # Read from file in small chunks - else: - readsize = size # Only read in as much as specified - - bufs = "" - + if size < 0: size = sys.maxint + bufs = [] + readsize = min(100, size) # Read from the file in small chunks while True: - if size == 0: return bufs # Return line (reached max len) + if size == 0: + return "".join(bufs) # Return resulting line c = self.read(readsize) i = c.find('\n') + if size is not None: + # We set i=size to break out of the loop under two + # conditions: 1) there's no newline, and the chunk is + # larger than size, or 2) there is a newline, but the + # resulting line would be longer than 'size'. + if i==-1 and len(c) > size: i=size-1 + elif size <= i: i = size -1 - # If there is a newline, or the string is empty if i >= 0 or c == '': - if size <= i: i = size - 1 # Another larger than size check - - self._unread(c[i+1:]) # Push back rest of chunk - - return bufs + c[:i+1] # Stored line, plus new segment - - # If there is no newline - else: - if len(c) > size: i = size - 1 # If lineis larger than size - - bufs = bufs + c - size = size - len(c) - readsize = min(size, int(readsize * 1.1)) - - # Optimize future readline() calls - if readsize > self.min_readsize: - self.min_readsize = readsize + bufs.append(c[:i+1]) # Add portion of last chunk + self._unread(c[i+1:]) # Push back rest of chunk + return ''.join(bufs) # Return resulting line + + # Append chunk to list, decrease 'size', + bufs.append(c) + size = size - len(c) + readsize = min(size, readsize * 2) def readlines(self, sizehint=0): # Negative numbers result in reading all the lines |