diff options
author | JuniorJPDJ <github.com@juniorjpdj.pl> | 2022-08-06 23:21:23 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-08-06 23:21:23 (GMT) |
commit | 330f1d58282517bdf1f19577ab9317fa9810bf95 (patch) | |
tree | c3e6ed80f849028056fbf64a9bb5cad8ad0d974d | |
parent | 56af5a200d60e86a8ac450264729d693053275e3 (diff) | |
download | cpython-330f1d58282517bdf1f19577ab9317fa9810bf95.zip cpython-330f1d58282517bdf1f19577ab9317fa9810bf95.tar.gz cpython-330f1d58282517bdf1f19577ab9317fa9810bf95.tar.bz2 |
gh-88339: enable fast seeking of uncompressed unencrypted zipfile.ZipExtFile (GH-27737)
Avoid reading all of the intermediate data in uncompressed items in a zip file when the user seeks forward.
Contributed by: @JuniorJPDJ
-rw-r--r-- | Lib/test/test_zipfile.py | 2 | ||||
-rw-r--r-- | Lib/zipfile.py | 24 | ||||
-rw-r--r-- | Misc/NEWS.d/next/Library/2021-08-27-18-07-35.bpo-44173.oW92Ev.rst | 1 |
3 files changed, 22 insertions, 5 deletions
diff --git a/Lib/test/test_zipfile.py b/Lib/test/test_zipfile.py index fa0ca5a..2125778 100644 --- a/Lib/test/test_zipfile.py +++ b/Lib/test/test_zipfile.py @@ -2032,6 +2032,7 @@ class OtherTests(unittest.TestCase): fp.seek(bloc, os.SEEK_CUR) self.assertEqual(fp.tell(), bloc) self.assertEqual(fp.read(5), txt[bloc:bloc+5]) + self.assertEqual(fp.tell(), bloc + 5) fp.seek(0, os.SEEK_END) self.assertEqual(fp.tell(), len(txt)) fp.seek(0, os.SEEK_SET) @@ -2049,6 +2050,7 @@ class OtherTests(unittest.TestCase): fp.seek(bloc, os.SEEK_CUR) self.assertEqual(fp.tell(), bloc) self.assertEqual(fp.read(5), txt[bloc:bloc+5]) + self.assertEqual(fp.tell(), bloc + 5) fp.seek(0, os.SEEK_END) self.assertEqual(fp.tell(), len(txt)) fp.seek(0, os.SEEK_SET) diff --git a/Lib/zipfile.py b/Lib/zipfile.py index e3b7a61..9815600 100644 --- a/Lib/zipfile.py +++ b/Lib/zipfile.py @@ -847,6 +847,7 @@ class ZipExtFile(io.BufferedIOBase): self._orig_compress_size = zipinfo.compress_size self._orig_file_size = zipinfo.file_size self._orig_start_crc = self._running_crc + self._orig_crc = self._expected_crc self._seekable = True except AttributeError: pass @@ -1069,17 +1070,17 @@ class ZipExtFile(io.BufferedIOBase): raise ValueError("I/O operation on closed file.") return self._seekable - def seek(self, offset, whence=0): + def seek(self, offset, whence=os.SEEK_SET): if self.closed: raise ValueError("seek on closed file.") if not self._seekable: raise io.UnsupportedOperation("underlying stream is not seekable") curr_pos = self.tell() - if whence == 0: # Seek from start of file + if whence == os.SEEK_SET: new_pos = offset - elif whence == 1: # Seek from current position + elif whence == os.SEEK_CUR: new_pos = curr_pos + offset - elif whence == 2: # Seek from EOF + elif whence == os.SEEK_END: new_pos = self._orig_file_size + offset else: raise ValueError("whence must be os.SEEK_SET (0), " @@ -1094,7 +1095,19 @@ class ZipExtFile(io.BufferedIOBase): read_offset = new_pos - curr_pos buff_offset = read_offset + self._offset - if buff_offset >= 0 and buff_offset < len(self._readbuffer): + # Fast seek uncompressed unencrypted file + if self._compress_type == ZIP_STORED and self._decrypter is None and read_offset > 0: + # disable CRC checking after first seeking - it would be invalid + self._expected_crc = None + # seek actual file taking already buffered data into account + read_offset -= len(self._readbuffer) - self._offset + self._fileobj.seek(read_offset, os.SEEK_CUR) + self._left -= read_offset + read_offset = 0 + # flush read buffer + self._readbuffer = b'' + self._offset = 0 + elif buff_offset >= 0 and buff_offset < len(self._readbuffer): # Just move the _offset index if the new position is in the _readbuffer self._offset = buff_offset read_offset = 0 @@ -1102,6 +1115,7 @@ class ZipExtFile(io.BufferedIOBase): # Position is before the current position. Reset the ZipExtFile self._fileobj.seek(self._orig_compress_start) self._running_crc = self._orig_start_crc + self._expected_crc = self._orig_crc self._compress_left = self._orig_compress_size self._left = self._orig_file_size self._readbuffer = b'' diff --git a/Misc/NEWS.d/next/Library/2021-08-27-18-07-35.bpo-44173.oW92Ev.rst b/Misc/NEWS.d/next/Library/2021-08-27-18-07-35.bpo-44173.oW92Ev.rst new file mode 100644 index 0000000..abc9826 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2021-08-27-18-07-35.bpo-44173.oW92Ev.rst @@ -0,0 +1 @@ +Enable fast seeking of uncompressed unencrypted :class:`zipfile.ZipExtFile` |