summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDaniel Hillier <daniel.hillier@gmail.com>2019-10-29 07:24:18 (GMT)
committerSerhiy Storchaka <storchaka@gmail.com>2019-10-29 07:24:18 (GMT)
commitda6ce58dd5ac109485af45878fca6bfd265b43e9 (patch)
tree05b7f6dc9bcdc8cd6b9a781da8a1caeb9b344399
parent4c155f738dc2e70ccb574f5169ad89c01753c6f7 (diff)
downloadcpython-da6ce58dd5ac109485af45878fca6bfd265b43e9.zip
cpython-da6ce58dd5ac109485af45878fca6bfd265b43e9.tar.gz
cpython-da6ce58dd5ac109485af45878fca6bfd265b43e9.tar.bz2
bpo-36993: Improve error reporting for zipfiles with bad zip64 extra data. (GH-14656)
-rw-r--r--Lib/test/test_zipfile.py222
-rw-r--r--Lib/zipfile.py12
-rw-r--r--Misc/NEWS.d/next/Library/2019-07-09-05-44-39.bpo-36993.4javqu.rst2
3 files changed, 236 insertions, 0 deletions
diff --git a/Lib/test/test_zipfile.py b/Lib/test/test_zipfile.py
index 6e1291e..1e1854b 100644
--- a/Lib/test/test_zipfile.py
+++ b/Lib/test/test_zipfile.py
@@ -1,6 +1,7 @@
import contextlib
import importlib.util
import io
+import itertools
import os
import pathlib
import posixpath
@@ -824,6 +825,227 @@ class StoredTestZip64InSmallFiles(AbstractTestZip64InSmallFiles,
zinfo = zipfp.getinfo("strfile")
self.assertEqual(zinfo.extra, extra)
+ def make_zip64_file(
+ self, file_size_64_set=False, file_size_extra=False,
+ compress_size_64_set=False, compress_size_extra=False,
+ header_offset_64_set=False, header_offset_extra=False,
+ ):
+ """Generate bytes sequence for a zip with (incomplete) zip64 data.
+
+ The actual values (not the zip 64 0xffffffff values) stored in the file
+ are:
+ file_size: 8
+ compress_size: 8
+ header_offset: 0
+ """
+ actual_size = 8
+ actual_header_offset = 0
+ local_zip64_fields = []
+ central_zip64_fields = []
+
+ file_size = actual_size
+ if file_size_64_set:
+ file_size = 0xffffffff
+ if file_size_extra:
+ local_zip64_fields.append(actual_size)
+ central_zip64_fields.append(actual_size)
+ file_size = struct.pack("<L", file_size)
+
+ compress_size = actual_size
+ if compress_size_64_set:
+ compress_size = 0xffffffff
+ if compress_size_extra:
+ local_zip64_fields.append(actual_size)
+ central_zip64_fields.append(actual_size)
+ compress_size = struct.pack("<L", compress_size)
+
+ header_offset = actual_header_offset
+ if header_offset_64_set:
+ header_offset = 0xffffffff
+ if header_offset_extra:
+ central_zip64_fields.append(actual_header_offset)
+ header_offset = struct.pack("<L", header_offset)
+
+ local_extra = struct.pack(
+ '<HH' + 'Q'*len(local_zip64_fields),
+ 0x0001,
+ 8*len(local_zip64_fields),
+ *local_zip64_fields
+ )
+
+ central_extra = struct.pack(
+ '<HH' + 'Q'*len(central_zip64_fields),
+ 0x0001,
+ 8*len(central_zip64_fields),
+ *central_zip64_fields
+ )
+
+ central_dir_size = struct.pack('<Q', 58 + 8 * len(central_zip64_fields))
+ offset_to_central_dir = struct.pack('<Q', 50 + 8 * len(local_zip64_fields))
+
+ local_extra_length = struct.pack("<H", 4 + 8 * len(local_zip64_fields))
+ central_extra_length = struct.pack("<H", 4 + 8 * len(central_zip64_fields))
+
+ filename = b"test.txt"
+ content = b"test1234"
+ filename_length = struct.pack("<H", len(filename))
+ zip64_contents = (
+ # Local file header
+ b"PK\x03\x04\x14\x00\x00\x00\x00\x00\x00\x00!\x00\x9e%\xf5\xaf"
+ + compress_size
+ + file_size
+ + filename_length
+ + local_extra_length
+ + filename
+ + local_extra
+ + content
+ # Central directory:
+ + b"PK\x01\x02-\x03-\x00\x00\x00\x00\x00\x00\x00!\x00\x9e%\xf5\xaf"
+ + compress_size
+ + file_size
+ + filename_length
+ + central_extra_length
+ + b"\x00\x00\x00\x00\x00\x00\x00\x00\x80\x01"
+ + header_offset
+ + filename
+ + central_extra
+ # Zip64 end of central directory
+ + b"PK\x06\x06,\x00\x00\x00\x00\x00\x00\x00-\x00-"
+ + b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00"
+ + b"\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00"
+ + central_dir_size
+ + offset_to_central_dir
+ # Zip64 end of central directory locator
+ + b"PK\x06\x07\x00\x00\x00\x00l\x00\x00\x00\x00\x00\x00\x00\x01"
+ + b"\x00\x00\x00"
+ # end of central directory
+ + b"PK\x05\x06\x00\x00\x00\x00\x01\x00\x01\x00:\x00\x00\x002\x00"
+ + b"\x00\x00\x00\x00"
+ )
+ return zip64_contents
+
+ def test_bad_zip64_extra(self):
+ """Missing zip64 extra records raises an exception.
+
+ There are 4 fields that the zip64 format handles (the disk number is
+ not used in this module and so is ignored here). According to the zip
+ spec:
+ The order of the fields in the zip64 extended
+ information record is fixed, but the fields MUST
+ only appear if the corresponding Local or Central
+ directory record field is set to 0xFFFF or 0xFFFFFFFF.
+
+ If the zip64 extra content doesn't contain enough entries for the
+ number of fields marked with 0xFFFF or 0xFFFFFFFF, we raise an error.
+ This test mismatches the length of the zip64 extra field and the number
+ of fields set to indicate the presence of zip64 data.
+ """
+ # zip64 file size present, no fields in extra, expecting one, equals
+ # missing file size.
+ missing_file_size_extra = self.make_zip64_file(
+ file_size_64_set=True,
+ )
+ with self.assertRaises(zipfile.BadZipFile) as e:
+ zipfile.ZipFile(io.BytesIO(missing_file_size_extra))
+ self.assertIn('file size', str(e.exception).lower())
+
+ # zip64 file size present, zip64 compress size present, one field in
+ # extra, expecting two, equals missing compress size.
+ missing_compress_size_extra = self.make_zip64_file(
+ file_size_64_set=True,
+ file_size_extra=True,
+ compress_size_64_set=True,
+ )
+ with self.assertRaises(zipfile.BadZipFile) as e:
+ zipfile.ZipFile(io.BytesIO(missing_compress_size_extra))
+ self.assertIn('compress size', str(e.exception).lower())
+
+ # zip64 compress size present, no fields in extra, expecting one,
+ # equals missing compress size.
+ missing_compress_size_extra = self.make_zip64_file(
+ compress_size_64_set=True,
+ )
+ with self.assertRaises(zipfile.BadZipFile) as e:
+ zipfile.ZipFile(io.BytesIO(missing_compress_size_extra))
+ self.assertIn('compress size', str(e.exception).lower())
+
+ # zip64 file size present, zip64 compress size present, zip64 header
+ # offset present, two fields in extra, expecting three, equals missing
+ # header offset
+ missing_header_offset_extra = self.make_zip64_file(
+ file_size_64_set=True,
+ file_size_extra=True,
+ compress_size_64_set=True,
+ compress_size_extra=True,
+ header_offset_64_set=True,
+ )
+ with self.assertRaises(zipfile.BadZipFile) as e:
+ zipfile.ZipFile(io.BytesIO(missing_header_offset_extra))
+ self.assertIn('header offset', str(e.exception).lower())
+
+ # zip64 compress size present, zip64 header offset present, one field
+ # in extra, expecting two, equals missing header offset
+ missing_header_offset_extra = self.make_zip64_file(
+ file_size_64_set=False,
+ compress_size_64_set=True,
+ compress_size_extra=True,
+ header_offset_64_set=True,
+ )
+ with self.assertRaises(zipfile.BadZipFile) as e:
+ zipfile.ZipFile(io.BytesIO(missing_header_offset_extra))
+ self.assertIn('header offset', str(e.exception).lower())
+
+ # zip64 file size present, zip64 header offset present, one field in
+ # extra, expecting two, equals missing header offset
+ missing_header_offset_extra = self.make_zip64_file(
+ file_size_64_set=True,
+ file_size_extra=True,
+ compress_size_64_set=False,
+ header_offset_64_set=True,
+ )
+ with self.assertRaises(zipfile.BadZipFile) as e:
+ zipfile.ZipFile(io.BytesIO(missing_header_offset_extra))
+ self.assertIn('header offset', str(e.exception).lower())
+
+ # zip64 header offset present, no fields in extra, expecting one,
+ # equals missing header offset
+ missing_header_offset_extra = self.make_zip64_file(
+ file_size_64_set=False,
+ compress_size_64_set=False,
+ header_offset_64_set=True,
+ )
+ with self.assertRaises(zipfile.BadZipFile) as e:
+ zipfile.ZipFile(io.BytesIO(missing_header_offset_extra))
+ self.assertIn('header offset', str(e.exception).lower())
+
+ def test_generated_valid_zip64_extra(self):
+ # These values are what is set in the make_zip64_file method.
+ expected_file_size = 8
+ expected_compress_size = 8
+ expected_header_offset = 0
+ expected_content = b"test1234"
+
+ # Loop through the various valid combinations of zip64 masks
+ # present and extra fields present.
+ params = (
+ {"file_size_64_set": True, "file_size_extra": True},
+ {"compress_size_64_set": True, "compress_size_extra": True},
+ {"header_offset_64_set": True, "header_offset_extra": True},
+ )
+
+ for r in range(1, len(params) + 1):
+ for combo in itertools.combinations(params, r):
+ kwargs = {}
+ for c in combo:
+ kwargs.update(c)
+ with zipfile.ZipFile(io.BytesIO(self.make_zip64_file(**kwargs))) as zf:
+ zinfo = zf.infolist()[0]
+ self.assertEqual(zinfo.file_size, expected_file_size)
+ self.assertEqual(zinfo.compress_size, expected_compress_size)
+ self.assertEqual(zinfo.header_offset, expected_header_offset)
+ self.assertEqual(zf.read(zinfo), expected_content)
+
+
@requires_zlib
class DeflateTestZip64InSmallFiles(AbstractTestZip64InSmallFiles,
unittest.TestCase):
diff --git a/Lib/zipfile.py b/Lib/zipfile.py
index 6201edc..6504e0e 100644
--- a/Lib/zipfile.py
+++ b/Lib/zipfile.py
@@ -480,14 +480,26 @@ class ZipInfo (object):
# ZIP64 extension (large files and/or large archives)
if self.file_size in (0xffffffffffffffff, 0xffffffff):
+ if len(counts) <= idx:
+ raise BadZipFile(
+ "Corrupt zip64 extra field. File size not found."
+ )
self.file_size = counts[idx]
idx += 1
if self.compress_size == 0xFFFFFFFF:
+ if len(counts) <= idx:
+ raise BadZipFile(
+ "Corrupt zip64 extra field. Compress size not found."
+ )
self.compress_size = counts[idx]
idx += 1
if self.header_offset == 0xffffffff:
+ if len(counts) <= idx:
+ raise BadZipFile(
+ "Corrupt zip64 extra field. Header offset not found."
+ )
old = self.header_offset
self.header_offset = counts[idx]
idx+=1
diff --git a/Misc/NEWS.d/next/Library/2019-07-09-05-44-39.bpo-36993.4javqu.rst b/Misc/NEWS.d/next/Library/2019-07-09-05-44-39.bpo-36993.4javqu.rst
new file mode 100644
index 0000000..009e07b
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2019-07-09-05-44-39.bpo-36993.4javqu.rst
@@ -0,0 +1,2 @@
+Improve error reporting for corrupt zip files with bad zip64 extra data. Patch
+by Daniel Hillier.