diff options
author | John Jolly <john.jolly@gmail.com> | 2018-01-30 08:51:35 (GMT) |
---|---|---|
committer | Gregory P. Smith <greg@krypto.org> | 2018-01-30 08:51:35 (GMT) |
commit | 066df4fd454d6ff9be66e80b2a65995b10af174f (patch) | |
tree | 5806066d761dc72a1992e8adb2f53f78e4ab9a27 /Lib/zipfile.py | |
parent | 2e0ecde8d74f5fc0e3e3e39216975cc70efc4796 (diff) | |
download | cpython-066df4fd454d6ff9be66e80b2a65995b10af174f.zip cpython-066df4fd454d6ff9be66e80b2a65995b10af174f.tar.gz cpython-066df4fd454d6ff9be66e80b2a65995b10af174f.tar.bz2 |
bpo-22908: Add seek and tell functionality to ZipExtFile (GH-4966)
This allows for nested zip files, tar files within zip files, zip files within tar files, etc.
Contributed by: John Jolly
Diffstat (limited to 'Lib/zipfile.py')
-rw-r--r-- | Lib/zipfile.py | 82 |
1 files changed, 82 insertions, 0 deletions
diff --git a/Lib/zipfile.py b/Lib/zipfile.py index f9db45f..5df7b1b 100644 --- a/Lib/zipfile.py +++ b/Lib/zipfile.py @@ -696,6 +696,18 @@ class _SharedFile: self._close = close self._lock = lock self._writing = writing + self.seekable = file.seekable + self.tell = file.tell + + def seek(self, offset, whence=0): + with self._lock: + if self.writing(): + raise ValueError("Can't reposition in the ZIP file while " + "there is an open writing handle on it. " + "Close the writing handle before trying to read.") + self._file.seek(self._pos) + self._pos = self._file.tell() + return self._pos def read(self, n=-1): with self._lock: @@ -746,6 +758,9 @@ class ZipExtFile(io.BufferedIOBase): # Read from compressed files in 4k blocks. MIN_READ_SIZE = 4096 + # Chunk size to read during seek + MAX_SEEK_READ = 1 << 24 + def __init__(self, fileobj, mode, zipinfo, decrypter=None, close_fileobj=False): self._fileobj = fileobj @@ -778,6 +793,17 @@ class ZipExtFile(io.BufferedIOBase): else: self._expected_crc = None + self._seekable = False + try: + if fileobj.seekable(): + self._orig_compress_start = fileobj.tell() + self._orig_compress_size = zipinfo.compress_size + self._orig_file_size = zipinfo.file_size + self._orig_start_crc = self._running_crc + self._seekable = True + except AttributeError: + pass + def __repr__(self): result = ['<%s.%s' % (self.__class__.__module__, self.__class__.__qualname__)] @@ -963,6 +989,62 @@ class ZipExtFile(io.BufferedIOBase): finally: super().close() + def seekable(self): + return self._seekable + + def seek(self, offset, whence=0): + if not self._seekable: + raise io.UnsupportedOperation("underlying stream is not seekable") + curr_pos = self.tell() + if whence == 0: # Seek from start of file + new_pos = offset + elif whence == 1: # Seek from current position + new_pos = curr_pos + offset + elif whence == 2: # Seek from EOF + new_pos = self._orig_file_size + offset + else: + raise ValueError("whence must be os.SEEK_SET (0), " + "os.SEEK_CUR (1), or os.SEEK_END (2)") + + if new_pos > self._orig_file_size: + new_pos = self._orig_file_size + + if new_pos < 0: + new_pos = 0 + + read_offset = new_pos - curr_pos + buff_offset = read_offset + self._offset + + if buff_offset >= 0 and buff_offset < len(self._readbuffer): + # Just move the _offset index if the new position is in the _readbuffer + self._offset = buff_offset + read_offset = 0 + elif read_offset < 0: + # Position is before the current position. Reset the ZipExtFile + + self._fileobj.seek(self._orig_compress_start) + self._running_crc = self._orig_start_crc + self._compress_left = self._orig_compress_size + self._left = self._orig_file_size + self._readbuffer = b'' + self._offset = 0 + self._decompressor = zipfile._get_decompressor(self._compress_type) + self._eof = False + read_offset = new_pos + + while read_offset > 0: + read_len = min(self.MAX_SEEK_READ, read_offset) + self.read(read_len) + read_offset -= read_len + + return self.tell() + + def tell(self): + if not self._seekable: + raise io.UnsupportedOperation("underlying stream is not seekable") + filepos = self._orig_file_size - self._left - len(self._readbuffer) + self._offset + return filepos + class _ZipWriteFile(io.BufferedIOBase): def __init__(self, zf, zinfo, zip64): |