summaryrefslogtreecommitdiffstats
path: root/Lib
diff options
context:
space:
mode:
Diffstat (limited to 'Lib')
-rw-r--r--Lib/test/test_zipfile/test_core.py153
-rw-r--r--Lib/zipfile/__init__.py32
2 files changed, 170 insertions, 15 deletions
diff --git a/Lib/test/test_zipfile/test_core.py b/Lib/test/test_zipfile/test_core.py
index 73c6b01..9960259 100644
--- a/Lib/test/test_zipfile/test_core.py
+++ b/Lib/test/test_zipfile/test_core.py
@@ -1080,6 +1080,159 @@ class StoredTestZip64InSmallFiles(AbstractTestZip64InSmallFiles,
self.assertEqual(zinfo.header_offset, expected_header_offset)
self.assertEqual(zf.read(zinfo), expected_content)
+ def test_force_zip64(self):
+ """Test that forcing zip64 extensions correctly notes this in the zip file"""
+
+ # GH-103861 describes an issue where forcing a small file to use zip64
+ # extensions would add a zip64 extra record, but not change the data
+ # sizes to 0xFFFFFFFF to indicate to the extractor that the zip64
+ # record should be read. Additionally, it would not set the required
+ # version to indicate that zip64 extensions are required to extract it.
+ # This test replicates the situation and reads the raw data to specifically ensure:
+ # - The required extract version is always >= ZIP64_VERSION
+ # - The compressed and uncompressed size in the file headers are both
+ # 0xFFFFFFFF (ie. point to zip64 record)
+ # - The zip64 record is provided and has the correct sizes in it
+ # Other aspects of the zip are checked as well, but verifying the above is the main goal.
+ # Because this is hard to verify by parsing the data as a zip, the raw
+ # bytes are checked to ensure that they line up with the zip spec.
+ # The spec for this can be found at: https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT
+ # The relevent sections for this test are:
+ # - 4.3.7 for local file header
+ # - 4.5.3 for zip64 extra field
+
+ data = io.BytesIO()
+ with zipfile.ZipFile(data, mode="w", allowZip64=True) as zf:
+ with zf.open("text.txt", mode="w", force_zip64=True) as zi:
+ zi.write(b"_")
+
+ zipdata = data.getvalue()
+
+ # pull out and check zip information
+ (
+ header, vers, os, flags, comp, csize, usize, fn_len,
+ ex_total_len, filename, ex_id, ex_len, ex_usize, ex_csize, cd_sig
+ ) = struct.unpack("<4sBBHH8xIIHH8shhQQx4s", zipdata[:63])
+
+ self.assertEqual(header, b"PK\x03\x04") # local file header
+ self.assertGreaterEqual(vers, zipfile.ZIP64_VERSION) # requires zip64 to extract
+ self.assertEqual(os, 0) # compatible with MS-DOS
+ self.assertEqual(flags, 0) # no flags
+ self.assertEqual(comp, 0) # compression method = stored
+ self.assertEqual(csize, 0xFFFFFFFF) # sizes are in zip64 extra
+ self.assertEqual(usize, 0xFFFFFFFF)
+ self.assertEqual(fn_len, 8) # filename len
+ self.assertEqual(ex_total_len, 20) # size of extra records
+ self.assertEqual(ex_id, 1) # Zip64 extra record
+ self.assertEqual(ex_len, 16) # 16 bytes of data
+ self.assertEqual(ex_usize, 1) # uncompressed size
+ self.assertEqual(ex_csize, 1) # compressed size
+ self.assertEqual(cd_sig, b"PK\x01\x02") # ensure the central directory header is next
+
+ z = zipfile.ZipFile(io.BytesIO(zipdata))
+ zinfos = z.infolist()
+ self.assertEqual(len(zinfos), 1)
+ self.assertGreaterEqual(zinfos[0].extract_version, zipfile.ZIP64_VERSION) # requires zip64 to extract
+
+ def test_unseekable_zip_unknown_filesize(self):
+ """Test that creating a zip with/without seeking will raise a RuntimeError if zip64 was required but not used"""
+
+ def make_zip(fp):
+ with zipfile.ZipFile(fp, mode="w", allowZip64=True) as zf:
+ with zf.open("text.txt", mode="w", force_zip64=False) as zi:
+ zi.write(b"_" * (zipfile.ZIP64_LIMIT + 1))
+
+ self.assertRaises(RuntimeError, make_zip, io.BytesIO())
+ self.assertRaises(RuntimeError, make_zip, Unseekable(io.BytesIO()))
+
+ def test_zip64_required_not_allowed_fail(self):
+ """Test that trying to add a large file to a zip that doesn't allow zip64 extensions fails on add"""
+ def make_zip(fp):
+ with zipfile.ZipFile(fp, mode="w", allowZip64=False) as zf:
+ # pretend zipfile.ZipInfo.from_file was used to get the name and filesize
+ info = zipfile.ZipInfo("text.txt")
+ info.file_size = zipfile.ZIP64_LIMIT + 1
+ zf.open(info, mode="w")
+
+ self.assertRaises(zipfile.LargeZipFile, make_zip, io.BytesIO())
+ self.assertRaises(zipfile.LargeZipFile, make_zip, Unseekable(io.BytesIO()))
+
+ def test_unseekable_zip_known_filesize(self):
+ """Test that creating a zip without seeking will use zip64 extensions if the file size is provided up-front"""
+
+ # This test ensures that the zip will use a zip64 data descriptor (same
+ # as a regular data descriptor except the sizes are 8 bytes instead of
+ # 4) record to communicate the size of a file if the zip is being
+ # written to an unseekable stream.
+ # Because this sort of thing is hard to verify by parsing the data back
+ # in as a zip, this test looks at the raw bytes created to ensure that
+ # the correct data has been generated.
+ # The spec for this can be found at: https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT
+ # The relevent sections for this test are:
+ # - 4.3.7 for local file header
+ # - 4.3.9 for the data descriptor
+ # - 4.5.3 for zip64 extra field
+
+ file_size = zipfile.ZIP64_LIMIT + 1
+
+ def make_zip(fp):
+ with zipfile.ZipFile(fp, mode="w", allowZip64=True) as zf:
+ # pretend zipfile.ZipInfo.from_file was used to get the name and filesize
+ info = zipfile.ZipInfo("text.txt")
+ info.file_size = file_size
+ with zf.open(info, mode="w", force_zip64=False) as zi:
+ zi.write(b"_" * file_size)
+ return fp
+
+ # check seekable file information
+ seekable_data = make_zip(io.BytesIO()).getvalue()
+ (
+ header, vers, os, flags, comp, csize, usize, fn_len,
+ ex_total_len, filename, ex_id, ex_len, ex_usize, ex_csize,
+ cd_sig
+ ) = struct.unpack("<4sBBHH8xIIHH8shhQQ{}x4s".format(file_size), seekable_data[:62 + file_size])
+
+ self.assertEqual(header, b"PK\x03\x04") # local file header
+ self.assertGreaterEqual(vers, zipfile.ZIP64_VERSION) # requires zip64 to extract
+ self.assertEqual(os, 0) # compatible with MS-DOS
+ self.assertEqual(flags, 0) # no flags set
+ self.assertEqual(comp, 0) # compression method = stored
+ self.assertEqual(csize, 0xFFFFFFFF) # sizes are in zip64 extra
+ self.assertEqual(usize, 0xFFFFFFFF)
+ self.assertEqual(fn_len, 8) # filename len
+ self.assertEqual(ex_total_len, 20) # size of extra records
+ self.assertEqual(ex_id, 1) # Zip64 extra record
+ self.assertEqual(ex_len, 16) # 16 bytes of data
+ self.assertEqual(ex_usize, file_size) # uncompressed size
+ self.assertEqual(ex_csize, file_size) # compressed size
+ self.assertEqual(cd_sig, b"PK\x01\x02") # ensure the central directory header is next
+
+ # check unseekable file information
+ unseekable_data = make_zip(Unseekable(io.BytesIO())).fp.getvalue()
+ (
+ header, vers, os, flags, comp, csize, usize, fn_len,
+ ex_total_len, filename, ex_id, ex_len, ex_usize, ex_csize,
+ dd_header, dd_usize, dd_csize, cd_sig
+ ) = struct.unpack("<4sBBHH8xIIHH8shhQQ{}x4s4xQQ4s".format(file_size), unseekable_data[:86 + file_size])
+
+ self.assertEqual(header, b"PK\x03\x04") # local file header
+ self.assertGreaterEqual(vers, zipfile.ZIP64_VERSION) # requires zip64 to extract
+ self.assertEqual(os, 0) # compatible with MS-DOS
+ self.assertEqual("{:b}".format(flags), "1000") # streaming flag set
+ self.assertEqual(comp, 0) # compression method = stored
+ self.assertEqual(csize, 0xFFFFFFFF) # sizes are in zip64 extra
+ self.assertEqual(usize, 0xFFFFFFFF)
+ self.assertEqual(fn_len, 8) # filename len
+ self.assertEqual(ex_total_len, 20) # size of extra records
+ self.assertEqual(ex_id, 1) # Zip64 extra record
+ self.assertEqual(ex_len, 16) # 16 bytes of data
+ self.assertEqual(ex_usize, 0) # uncompressed size - 0 to defer to data descriptor
+ self.assertEqual(ex_csize, 0) # compressed size - 0 to defer to data descriptor
+ self.assertEqual(dd_header, b"PK\07\x08") # data descriptor
+ self.assertEqual(dd_usize, file_size) # file size (8 bytes because zip64)
+ self.assertEqual(dd_csize, file_size) # compressed size (8 bytes because zip64)
+ self.assertEqual(cd_sig, b"PK\x01\x02") # ensure the central directory header is next
+
@requires_zlib()
class DeflateTestZip64InSmallFiles(AbstractTestZip64InSmallFiles,
diff --git a/Lib/zipfile/__init__.py b/Lib/zipfile/__init__.py
index 116b939..9fc1840 100644
--- a/Lib/zipfile/__init__.py
+++ b/Lib/zipfile/__init__.py
@@ -442,7 +442,12 @@ class ZipInfo (object):
return ''.join(result)
def FileHeader(self, zip64=None):
- """Return the per-file header as a bytes object."""
+ """Return the per-file header as a bytes object.
+
+ When the optional zip64 arg is None rather than a bool, we will
+ decide based upon the file_size and compress_size, if known,
+ False otherwise.
+ """
dt = self.date_time
dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
@@ -458,16 +463,13 @@ class ZipInfo (object):
min_version = 0
if zip64 is None:
+ # We always explicitly pass zip64 within this module.... This
+ # remains for anyone using ZipInfo.FileHeader as a public API.
zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT
if zip64:
fmt = '<HHQQ'
extra = extra + struct.pack(fmt,
1, struct.calcsize(fmt)-4, file_size, compress_size)
- if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
- if not zip64:
- raise LargeZipFile("Filesize would require ZIP64 extensions")
- # File is larger than what fits into a 4 byte integer,
- # fall back to the ZIP64 extension
file_size = 0xffffffff
compress_size = 0xffffffff
min_version = ZIP64_VERSION
@@ -1219,6 +1221,12 @@ class _ZipWriteFile(io.BufferedIOBase):
self._zinfo.CRC = self._crc
self._zinfo.file_size = self._file_size
+ if not self._zip64:
+ if self._file_size > ZIP64_LIMIT:
+ raise RuntimeError("File size too large, try using force_zip64")
+ if self._compress_size > ZIP64_LIMIT:
+ raise RuntimeError("Compressed size too large, try using force_zip64")
+
# Write updated header info
if self._zinfo.flag_bits & _MASK_USE_DATA_DESCRIPTOR:
# Write CRC and file sizes after the file data
@@ -1227,13 +1235,6 @@ class _ZipWriteFile(io.BufferedIOBase):
self._zinfo.compress_size, self._zinfo.file_size))
self._zipfile.start_dir = self._fileobj.tell()
else:
- if not self._zip64:
- if self._file_size > ZIP64_LIMIT:
- raise RuntimeError(
- 'File size too large, try using force_zip64')
- if self._compress_size > ZIP64_LIMIT:
- raise RuntimeError(
- 'Compressed size too large, try using force_zip64')
# Seek backwards and write file header (which will now include
# correct CRC and file sizes)
@@ -1672,8 +1673,9 @@ class ZipFile:
zinfo.external_attr = 0o600 << 16 # permissions: ?rw-------
# Compressed size can be larger than uncompressed size
- zip64 = self._allowZip64 and \
- (force_zip64 or zinfo.file_size * 1.05 > ZIP64_LIMIT)
+ zip64 = force_zip64 or (zinfo.file_size * 1.05 > ZIP64_LIMIT)
+ if not self._allowZip64 and zip64:
+ raise LargeZipFile("Filesize would require ZIP64 extensions")
if self._seekable:
self.fp.seek(self.start_dir)