summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJuniorJPDJ <github.com@juniorjpdj.pl>2022-08-06 23:21:23 (GMT)
committerGitHub <noreply@github.com>2022-08-06 23:21:23 (GMT)
commit330f1d58282517bdf1f19577ab9317fa9810bf95 (patch)
treec3e6ed80f849028056fbf64a9bb5cad8ad0d974d
parent56af5a200d60e86a8ac450264729d693053275e3 (diff)
downloadcpython-330f1d58282517bdf1f19577ab9317fa9810bf95.zip
cpython-330f1d58282517bdf1f19577ab9317fa9810bf95.tar.gz
cpython-330f1d58282517bdf1f19577ab9317fa9810bf95.tar.bz2
gh-88339: enable fast seeking of uncompressed unencrypted zipfile.ZipExtFile (GH-27737)
Avoid reading all of the intermediate data in uncompressed items in a zip file when the user seeks forward. Contributed by: @JuniorJPDJ
-rw-r--r--Lib/test/test_zipfile.py2
-rw-r--r--Lib/zipfile.py24
-rw-r--r--Misc/NEWS.d/next/Library/2021-08-27-18-07-35.bpo-44173.oW92Ev.rst1
3 files changed, 22 insertions, 5 deletions
diff --git a/Lib/test/test_zipfile.py b/Lib/test/test_zipfile.py
index fa0ca5a..2125778 100644
--- a/Lib/test/test_zipfile.py
+++ b/Lib/test/test_zipfile.py
@@ -2032,6 +2032,7 @@ class OtherTests(unittest.TestCase):
fp.seek(bloc, os.SEEK_CUR)
self.assertEqual(fp.tell(), bloc)
self.assertEqual(fp.read(5), txt[bloc:bloc+5])
+ self.assertEqual(fp.tell(), bloc + 5)
fp.seek(0, os.SEEK_END)
self.assertEqual(fp.tell(), len(txt))
fp.seek(0, os.SEEK_SET)
@@ -2049,6 +2050,7 @@ class OtherTests(unittest.TestCase):
fp.seek(bloc, os.SEEK_CUR)
self.assertEqual(fp.tell(), bloc)
self.assertEqual(fp.read(5), txt[bloc:bloc+5])
+ self.assertEqual(fp.tell(), bloc + 5)
fp.seek(0, os.SEEK_END)
self.assertEqual(fp.tell(), len(txt))
fp.seek(0, os.SEEK_SET)
diff --git a/Lib/zipfile.py b/Lib/zipfile.py
index e3b7a61..9815600 100644
--- a/Lib/zipfile.py
+++ b/Lib/zipfile.py
@@ -847,6 +847,7 @@ class ZipExtFile(io.BufferedIOBase):
self._orig_compress_size = zipinfo.compress_size
self._orig_file_size = zipinfo.file_size
self._orig_start_crc = self._running_crc
+ self._orig_crc = self._expected_crc
self._seekable = True
except AttributeError:
pass
@@ -1069,17 +1070,17 @@ class ZipExtFile(io.BufferedIOBase):
raise ValueError("I/O operation on closed file.")
return self._seekable
- def seek(self, offset, whence=0):
+ def seek(self, offset, whence=os.SEEK_SET):
if self.closed:
raise ValueError("seek on closed file.")
if not self._seekable:
raise io.UnsupportedOperation("underlying stream is not seekable")
curr_pos = self.tell()
- if whence == 0: # Seek from start of file
+ if whence == os.SEEK_SET:
new_pos = offset
- elif whence == 1: # Seek from current position
+ elif whence == os.SEEK_CUR:
new_pos = curr_pos + offset
- elif whence == 2: # Seek from EOF
+ elif whence == os.SEEK_END:
new_pos = self._orig_file_size + offset
else:
raise ValueError("whence must be os.SEEK_SET (0), "
@@ -1094,7 +1095,19 @@ class ZipExtFile(io.BufferedIOBase):
read_offset = new_pos - curr_pos
buff_offset = read_offset + self._offset
- if buff_offset >= 0 and buff_offset < len(self._readbuffer):
+ # Fast seek uncompressed unencrypted file
+ if self._compress_type == ZIP_STORED and self._decrypter is None and read_offset > 0:
+ # disable CRC checking after first seeking - it would be invalid
+ self._expected_crc = None
+ # seek actual file taking already buffered data into account
+ read_offset -= len(self._readbuffer) - self._offset
+ self._fileobj.seek(read_offset, os.SEEK_CUR)
+ self._left -= read_offset
+ read_offset = 0
+ # flush read buffer
+ self._readbuffer = b''
+ self._offset = 0
+ elif buff_offset >= 0 and buff_offset < len(self._readbuffer):
# Just move the _offset index if the new position is in the _readbuffer
self._offset = buff_offset
read_offset = 0
@@ -1102,6 +1115,7 @@ class ZipExtFile(io.BufferedIOBase):
# Position is before the current position. Reset the ZipExtFile
self._fileobj.seek(self._orig_compress_start)
self._running_crc = self._orig_start_crc
+ self._expected_crc = self._orig_crc
self._compress_left = self._orig_compress_size
self._left = self._orig_file_size
self._readbuffer = b''
diff --git a/Misc/NEWS.d/next/Library/2021-08-27-18-07-35.bpo-44173.oW92Ev.rst b/Misc/NEWS.d/next/Library/2021-08-27-18-07-35.bpo-44173.oW92Ev.rst
new file mode 100644
index 0000000..abc9826
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2021-08-27-18-07-35.bpo-44173.oW92Ev.rst
@@ -0,0 +1 @@
+Enable fast seeking of uncompressed unencrypted :class:`zipfile.ZipExtFile`