summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGuido van Rossum <guido@python.org>2007-04-11 14:19:59 (GMT)
committerGuido van Rossum <guido@python.org>2007-04-11 14:19:59 (GMT)
commitcba608cadbf24f2f88f5db215a58fc9428a9aafd (patch)
treeb99f066084e02d12ba2a5419fa7b600205db382d
parent0dd32e246cd232012d07926ae312205decb74b61 (diff)
downloadcpython-cba608cadbf24f2f88f5db215a58fc9428a9aafd.zip
cpython-cba608cadbf24f2f88f5db215a58fc9428a9aafd.tar.gz
cpython-cba608cadbf24f2f88f5db215a58fc9428a9aafd.tar.bz2
More efficient implementation of tell(); _read_chunk() doesn't have to
call self.buffer.tell().
-rw-r--r--Lib/io.py63
-rw-r--r--Lib/test/test_io.py6
2 files changed, 35 insertions, 34 deletions
diff --git a/Lib/io.py b/Lib/io.py
index fd39826..f9e846b 100644
--- a/Lib/io.py
+++ b/Lib/io.py
@@ -897,11 +897,11 @@ class TextIOWrapper(TextIOBase):
self._seekable = self.buffer.seekable()
# A word about _snapshot. This attribute is either None, or a
- # tuple (position, decoder_pickle, readahead) where position is a
- # position of the underlying buffer, decoder_pickle is a pickled
- # decoder state, and readahead is the chunk of bytes that was read
- # from that position. We use this to reconstruct intermediate
- # decoder states in tell().
+ # tuple (decoder_pickle, readahead, pending) where decoder_pickle
+ # is a pickled decoder state, readahead is the chunk of bytes that
+ # was read, and pending is the characters that were rendered by
+ # the decoder after feeding it those bytes. We use this to
+ # reconstruct intermediate decoder states in tell().
def _seekable(self):
return self._seekable
@@ -944,14 +944,16 @@ class TextIOWrapper(TextIOBase):
return decoder
def _read_chunk(self):
- if not self._seekable:
- return self.buffer.read(self._CHUNK_SIZE)
assert self._decoder is not None
- position = self.buffer.tell()
+ if not self._seekable:
+ readahead = self.buffer.read(self._CHUNK_SIZE)
+ pending = self._decoder.decode(readahead, not readahead)
+ return readahead, pending
decoder_state = pickle.dumps(self._decoder, 2)
readahead = self.buffer.read(self._CHUNK_SIZE)
- self._snapshot = (position, decoder_state, readahead)
- return readahead
+ pending = self._decoder.decode(readahead, not readahead)
+ self._snapshot = (decoder_state, readahead, pending)
+ return readahead, pending
def _encode_decoder_state(self, ds, pos):
if ds == self._decoder_in_rest_pickle:
@@ -975,21 +977,22 @@ class TextIOWrapper(TextIOBase):
if not self._seekable:
raise IOError("Underlying stream is not seekable")
self.flush()
+ position = self.buffer.tell()
if self._decoder is None or self._snapshot is None:
assert self._pending == ""
- return self.buffer.tell()
- position, decoder_state, readahead = self._snapshot
+ return position
+ decoder_state, readahead, pending = self._snapshot
+ position -= len(readahead)
+ needed = len(pending) - len(self._pending)
+ if not needed:
+ return self._encode_decoder_state(decoder_state, position)
decoder = pickle.loads(decoder_state)
- characters = ""
- sequence = []
+ n = 0
for i, b in enumerate(readahead):
- c = decoder.decode(bytes([b]))
- if c:
- characters += c
- sequence.append((characters, i+1, pickle.dumps(decoder, 2)))
- for ch, i, st in sequence:
- if ch + self._pending == characters:
- return self._encode_decoder_state(st, position + i)
+ n += len(decoder.decode(bytes([b])))
+ if n >= needed:
+ decoder_state = pickle.dumps(decoder, 2)
+ return self._encode_decoder_state(decoder_state, position+i+1)
raise IOError("Can't reconstruct logical file position")
def seek(self, pos, whence=0):
@@ -1023,9 +1026,11 @@ class TextIOWrapper(TextIOBase):
return pos
decoder = pickle.loads(ds)
self.buffer.seek(pos)
- self._snapshot = (pos, ds, "")
+ self._snapshot = (ds, b"", "")
self._pending = ""
- self._decoder = None
+ if not self._decoder_in_rest_pickle:
+ self._get_decoder() # For its side effect
+ self._decoder = decoder
return orig_pos
def read(self, n: int = -1):
@@ -1038,9 +1043,9 @@ class TextIOWrapper(TextIOBase):
return res
else:
while len(res) < n:
- data = self._read_chunk()
- res += decoder.decode(data, not data)
- if not data:
+ readahead, pending = self._read_chunk()
+ res += pending
+ if not readahead:
break
self._pending = res[n:]
return res[:n]
@@ -1087,9 +1092,9 @@ class TextIOWrapper(TextIOBase):
# No line ending seen yet - get more data
while True:
- data = self._read_chunk()
- more_line = decoder.decode(data, not data)
- if more_line or not data:
+ readahead, pending = self._read_chunk()
+ more_line = pending
+ if more_line or not readahead:
break
if not more_line:
diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py
index d19b2a0..5542a5b 100644
--- a/Lib/test/test_io.py
+++ b/Lib/test/test_io.py
@@ -532,25 +532,21 @@ class TextIOWrapperTest(unittest.TestCase):
f.truncate()
sample = u"s\xff\u0fff\uffff"
wlines = []
- for size in (0, 1, 2, 3, 4, 5, 15, 16, 17, 31, 32, 33, 63, 64, 65,
- 100, 200, 300, 400, 500, 1000):
+ for size in (0, 1, 2, 3, 4, 5, 30, 31, 32, 33, 62, 63, 64, 65, 1000):
chars = []
for i in xrange(size):
chars.append(sample[i % len(sample)])
line = u"".join(chars) + "\n"
wlines.append((f.tell(), line))
f.write(line)
- wendpos = f.tell()
f.seek(0)
rlines = []
while True:
pos = f.tell()
line = f.readline()
if not line:
- rendpos = pos
break
rlines.append((pos, line))
- self.assertEquals(rendpos, wendpos)
self.assertEquals(rlines, wlines)