diff options
author | Lars Gustäbel <lars@gustaebel.de> | 2006-12-23 16:51:47 (GMT) |
---|---|---|
committer | Lars Gustäbel <lars@gustaebel.de> | 2006-12-23 16:51:47 (GMT) |
commit | aedb92e59c2f4c3c33fbb33d5dc4afefe344620c (patch) | |
tree | eb17ea20b2ac6b85bfb819630ce1d70e5cfa3fa3 | |
parent | 60775f29de0e6107a46f668144cb1c133d6e5147 (diff) | |
download | cpython-aedb92e59c2f4c3c33fbb33d5dc4afefe344620c.zip cpython-aedb92e59c2f4c3c33fbb33d5dc4afefe344620c.tar.gz cpython-aedb92e59c2f4c3c33fbb33d5dc4afefe344620c.tar.bz2 |
Patch #1230446: tarfile.py: fix ExFileObject so that read() and tell()
work correctly together with readline().
(backport from rev. 53153)
-rw-r--r-- | Lib/tarfile.py | 259 | ||||
-rw-r--r-- | Lib/test/test_tarfile.py | 12 | ||||
-rw-r--r-- | Misc/NEWS | 3 |
3 files changed, 167 insertions, 107 deletions
diff --git a/Lib/tarfile.py b/Lib/tarfile.py index 1b8f140..cffde45 100644 --- a/Lib/tarfile.py +++ b/Lib/tarfile.py @@ -622,140 +622,194 @@ class _BZ2Proxy(object): #------------------------ # Extraction file object #------------------------ -class ExFileObject(object): - """File-like object for reading an archive member. - Is returned by TarFile.extractfile(). Support for - sparse files included. +class _FileInFile(object): + """A thin wrapper around an existing file object that + provides a part of its data as an individual file + object. """ - def __init__(self, tarfile, tarinfo): - self.fileobj = tarfile.fileobj - self.name = tarinfo.name - self.mode = "r" - self.closed = False - self.offset = tarinfo.offset_data - self.size = tarinfo.size - self.pos = 0L - self.linebuffer = "" - if tarinfo.issparse(): - self.sparse = tarinfo.sparse - self.read = self._readsparse - else: - self.read = self._readnormal + def __init__(self, fileobj, offset, size, sparse=None): + self.fileobj = fileobj + self.offset = offset + self.size = size + self.sparse = sparse + self.position = 0 - def __read(self, size): - """Overloadable read method. + def tell(self): + """Return the current file position. """ - return self.fileobj.read(size) + return self.position - def readline(self, size=-1): - """Read a line with approx. size. If size is negative, - read a whole line. readline() and read() must not - be mixed up (!). + def seek(self, position): + """Seek to a position in the file. """ - if size < 0: - size = sys.maxint + self.position = position - nl = self.linebuffer.find("\n") - if nl >= 0: - nl = min(nl, size) + def read(self, size=None): + """Read data from the file. + """ + if size is None: + size = self.size - self.position else: - size -= len(self.linebuffer) - while (nl < 0 and size > 0): - buf = self.read(min(size, 100)) - if not buf: - break - self.linebuffer += buf - size -= len(buf) - nl = self.linebuffer.find("\n") - if nl == -1: - s = self.linebuffer - self.linebuffer = "" - return s - buf = self.linebuffer[:nl] - self.linebuffer = self.linebuffer[nl + 1:] - while buf[-1:] == "\r": - buf = buf[:-1] - return buf + "\n" + size = min(size, self.size - self.position) - def readlines(self): - """Return a list with all (following) lines. - """ - result = [] - while True: - line = self.readline() - if not line: break - result.append(line) - return result + if self.sparse is None: + return self.readnormal(size) + else: + return self.readsparse(size) - def _readnormal(self, size=None): + def readnormal(self, size): """Read operation for regular files. """ - if self.closed: - raise ValueError("file is closed") - self.fileobj.seek(self.offset + self.pos) - bytesleft = self.size - self.pos - if size is None: - bytestoread = bytesleft - else: - bytestoread = min(size, bytesleft) - self.pos += bytestoread - return self.__read(bytestoread) + self.fileobj.seek(self.offset + self.position) + self.position += size + return self.fileobj.read(size) - def _readsparse(self, size=None): + def readsparse(self, size): """Read operation for sparse files. """ - if self.closed: - raise ValueError("file is closed") - - if size is None: - size = self.size - self.pos - data = [] while size > 0: - buf = self._readsparsesection(size) + buf = self.readsparsesection(size) if not buf: break size -= len(buf) data.append(buf) return "".join(data) - def _readsparsesection(self, size): + def readsparsesection(self, size): """Read a single section of a sparse file. """ - section = self.sparse.find(self.pos) + section = self.sparse.find(self.position) if section is None: return "" - toread = min(size, section.offset + section.size - self.pos) + size = min(size, section.offset + section.size - self.position) + if isinstance(section, _data): - realpos = section.realpos + self.pos - section.offset - self.pos += toread + realpos = section.realpos + self.position - section.offset self.fileobj.seek(self.offset + realpos) - return self.__read(toread) + self.position += size + return self.fileobj.read(size) else: - self.pos += toread - return NUL * toread + self.position += size + return NUL * size +#class _FileInFile + + +class ExFileObject(object): + """File-like object for reading an archive member. + Is returned by TarFile.extractfile(). + """ + blocksize = 1024 + + def __init__(self, tarfile, tarinfo): + self.fileobj = _FileInFile(tarfile.fileobj, + tarinfo.offset_data, + tarinfo.size, + getattr(tarinfo, "sparse", None)) + self.name = tarinfo.name + self.mode = "r" + self.closed = False + self.size = tarinfo.size + + self.position = 0 + self.buffer = "" + + def read(self, size=None): + """Read at most size bytes from the file. If size is not + present or None, read all data until EOF is reached. + """ + if self.closed: + raise ValueError("I/O operation on closed file") + + buf = "" + if self.buffer: + if size is None: + buf = self.buffer + self.buffer = "" + else: + buf = self.buffer[:size] + self.buffer = self.buffer[size:] + + if size is None: + buf += self.fileobj.read() + else: + buf += self.fileobj.read(size - len(buf)) + + self.position += len(buf) + return buf + + def readline(self, size=-1): + """Read one entire line from the file. If size is present + and non-negative, return a string with at most that + size, which may be an incomplete line. + """ + if self.closed: + raise ValueError("I/O operation on closed file") + + if "\n" in self.buffer: + pos = self.buffer.find("\n") + 1 + else: + buffers = [self.buffer] + while True: + buf = self.fileobj.read(self.blocksize) + buffers.append(buf) + if not buf or "\n" in buf: + self.buffer = "".join(buffers) + pos = self.buffer.find("\n") + 1 + if pos == 0: + # no newline found. + pos = len(self.buffer) + break + + if size != -1: + pos = min(size, pos) + + buf = self.buffer[:pos] + self.buffer = self.buffer[pos:] + self.position += len(buf) + return buf + + def readlines(self): + """Return a list with all remaining lines. + """ + result = [] + while True: + line = self.readline() + if not line: break + result.append(line) + return result def tell(self): """Return the current file position. """ - return self.pos + if self.closed: + raise ValueError("I/O operation on closed file") + + return self.position - def seek(self, pos, whence=0): + def seek(self, pos, whence=os.SEEK_SET): """Seek to a position in the file. """ - self.linebuffer = "" - if whence == 0: - self.pos = min(max(pos, 0), self.size) - if whence == 1: + if self.closed: + raise ValueError("I/O operation on closed file") + + if whence == os.SEEK_SET: + self.position = min(max(pos, 0), self.size) + elif whence == os.SEEK_CUR: if pos < 0: - self.pos = max(self.pos + pos, 0) + self.position = max(self.position + pos, 0) else: - self.pos = min(self.pos + pos, self.size) - if whence == 2: - self.pos = max(min(self.size + pos, self.size), 0) + self.position = min(self.position + pos, self.size) + elif whence == os.SEEK_END: + self.position = max(min(self.size + pos, self.size), 0) + else: + raise ValueError("Invalid argument") + + self.buffer = "" + self.fileobj.seek(self.position) def close(self): """Close the file object. @@ -763,20 +817,13 @@ class ExFileObject(object): self.closed = True def __iter__(self): - """Get an iterator over the file object. - """ - if self.closed: - raise ValueError("I/O operation on closed file") - return self - - def next(self): - """Get the next item from the file iterator. + """Get an iterator over the file's lines. """ - result = self.readline() - if not result: - raise StopIteration - return result - + while True: + line = self.readline() + if not line: + break + yield line #class ExFileObject #------------------ diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py index 2685d67..f229fa5 100644 --- a/Lib/test/test_tarfile.py +++ b/Lib/test/test_tarfile.py @@ -110,7 +110,7 @@ class ReadTest(BaseTest): """Test seek() method of _FileObject, incl. random reading. """ if self.sep != "|": - filename = "0-REGTYPE" + filename = "0-REGTYPE-TEXT" self.tar.extract(filename, dirname()) f = open(os.path.join(dirname(), filename), "rb") data = f.read() @@ -149,6 +149,16 @@ class ReadTest(BaseTest): s2 = fobj.readlines() self.assert_(s1 == s2, "readlines() after seek failed") + fobj.seek(0) + self.assert_(len(fobj.readline()) == fobj.tell(), + "tell() after readline() failed") + fobj.seek(512) + self.assert_(len(fobj.readline()) + 512 == fobj.tell(), + "tell() after seek() and readline() failed") + fobj.seek(0) + line = fobj.readline() + self.assert_(fobj.read() == data[len(line):], + "read() after readline() failed") fobj.close() def test_old_dirtype(self): @@ -620,6 +620,9 @@ Core and builtins Library ------- +- Patch #1230446: tarfile.py: fix ExFileObject so that read() and tell() + work correctly together with readline(). + - Correction of patch #1455898: In the mbcs decoder, set final=False for stream decoder, but final=True for the decode function. |