summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSerhiy Storchaka <storchaka@gmail.com>2016-10-07 19:20:50 (GMT)
committerSerhiy Storchaka <storchaka@gmail.com>2016-10-07 19:20:50 (GMT)
commit8793b215253bf69cc699fab77b12d7f1313360d8 (patch)
tree822be3bb6aa99e0e38b988995e42c032ce72ff1f
parentcf3806026b4f9784f97fce168695c3b4a6bbd8a5 (diff)
downloadcpython-8793b215253bf69cc699fab77b12d7f1313360d8.zip
cpython-8793b215253bf69cc699fab77b12d7f1313360d8.tar.gz
cpython-8793b215253bf69cc699fab77b12d7f1313360d8.tar.bz2
Issue #26293: Fixed writing ZIP files that starts not from the start of the
file. Offsets in ZIP file now are relative to the start of the archive in conforming to the specification.
-rw-r--r--Lib/test/test_zipfile.py43
-rw-r--r--Lib/zipfile.py30
-rw-r--r--Misc/NEWS4
3 files changed, 62 insertions, 15 deletions
diff --git a/Lib/test/test_zipfile.py b/Lib/test/test_zipfile.py
index d278e06..d18a770 100644
--- a/Lib/test/test_zipfile.py
+++ b/Lib/test/test_zipfile.py
@@ -415,6 +415,49 @@ class StoredTestsWithSourceFile(AbstractTestsWithSourceFile,
f.seek(len(data))
with zipfile.ZipFile(f, "r") as zipfp:
self.assertEqual(zipfp.namelist(), [TESTFN])
+ self.assertEqual(zipfp.read(TESTFN), self.data)
+ with open(TESTFN2, 'rb') as f:
+ self.assertEqual(f.read(len(data)), data)
+ zipfiledata = f.read()
+ with io.BytesIO(zipfiledata) as bio, zipfile.ZipFile(bio) as zipfp:
+ self.assertEqual(zipfp.namelist(), [TESTFN])
+ self.assertEqual(zipfp.read(TESTFN), self.data)
+
+ def test_read_concatenated_zip_file(self):
+ with io.BytesIO() as bio:
+ with zipfile.ZipFile(bio, 'w', zipfile.ZIP_STORED) as zipfp:
+ zipfp.write(TESTFN, TESTFN)
+ zipfiledata = bio.getvalue()
+ data = b'I am not a ZipFile!'*10
+ with open(TESTFN2, 'wb') as f:
+ f.write(data)
+ f.write(zipfiledata)
+
+ with zipfile.ZipFile(TESTFN2) as zipfp:
+ self.assertEqual(zipfp.namelist(), [TESTFN])
+ self.assertEqual(zipfp.read(TESTFN), self.data)
+
+ def test_append_to_concatenated_zip_file(self):
+ with io.BytesIO() as bio:
+ with zipfile.ZipFile(bio, 'w', zipfile.ZIP_STORED) as zipfp:
+ zipfp.write(TESTFN, TESTFN)
+ zipfiledata = bio.getvalue()
+ data = b'I am not a ZipFile!'*1000000
+ with open(TESTFN2, 'wb') as f:
+ f.write(data)
+ f.write(zipfiledata)
+
+ with zipfile.ZipFile(TESTFN2, 'a') as zipfp:
+ self.assertEqual(zipfp.namelist(), [TESTFN])
+ zipfp.writestr('strfile', self.data)
+
+ with open(TESTFN2, 'rb') as f:
+ self.assertEqual(f.read(len(data)), data)
+ zipfiledata = f.read()
+ with io.BytesIO(zipfiledata) as bio, zipfile.ZipFile(bio) as zipfp:
+ self.assertEqual(zipfp.namelist(), [TESTFN, 'strfile'])
+ self.assertEqual(zipfp.read(TESTFN), self.data)
+ self.assertEqual(zipfp.read('strfile'), self.data)
def test_ignores_newline_at_end(self):
with zipfile.ZipFile(TESTFN2, "w", zipfile.ZIP_STORED) as zipfp:
diff --git a/Lib/zipfile.py b/Lib/zipfile.py
index 56a2479..2476717 100644
--- a/Lib/zipfile.py
+++ b/Lib/zipfile.py
@@ -1029,10 +1029,10 @@ class ZipFile:
# even if no files are added to the archive
self._didModify = True
try:
- self.start_dir = self.fp.tell()
+ self.start_dir = self._start_disk = self.fp.tell()
except (AttributeError, OSError):
self.fp = _Tellable(self.fp)
- self.start_dir = 0
+ self.start_dir = self._start_disk = 0
self._seekable = False
else:
# Some file-like objects can provide tell() but not seek()
@@ -1053,7 +1053,7 @@ class ZipFile:
# set the modified flag so central directory gets written
# even if no files are added to the archive
self._didModify = True
- self.start_dir = self.fp.tell()
+ self.start_dir = self._start_disk = self.fp.tell()
else:
raise RuntimeError("Mode must be 'r', 'w', 'x', or 'a'")
except:
@@ -1097,17 +1097,18 @@ class ZipFile:
offset_cd = endrec[_ECD_OFFSET] # offset of central directory
self._comment = endrec[_ECD_COMMENT] # archive comment
- # "concat" is zero, unless zip was concatenated to another file
- concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
+ # self._start_disk: Position of the start of ZIP archive
+ # It is zero, unless ZIP was concatenated to another file
+ self._start_disk = endrec[_ECD_LOCATION] - size_cd - offset_cd
if endrec[_ECD_SIGNATURE] == stringEndArchive64:
# If Zip64 extension structures are present, account for them
- concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
+ self._start_disk -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
if self.debug > 2:
- inferred = concat + offset_cd
- print("given, inferred, offset", offset_cd, inferred, concat)
+ inferred = self._start_disk + offset_cd
+ print("given, inferred, offset", offset_cd, inferred, self._start_disk)
# self.start_dir: Position of start of central directory
- self.start_dir = offset_cd + concat
+ self.start_dir = offset_cd + self._start_disk
fp.seek(self.start_dir, 0)
data = fp.read(size_cd)
fp = io.BytesIO(data)
@@ -1147,7 +1148,7 @@ class ZipFile:
t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
x._decodeExtra()
- x.header_offset = x.header_offset + concat
+ x.header_offset = x.header_offset + self._start_disk
self.filelist.append(x)
self.NameToInfo[x.filename] = x
@@ -1627,11 +1628,10 @@ class ZipFile:
file_size = zinfo.file_size
compress_size = zinfo.compress_size
- if zinfo.header_offset > ZIP64_LIMIT:
- extra.append(zinfo.header_offset)
+ header_offset = zinfo.header_offset - self._start_disk
+ if header_offset > ZIP64_LIMIT:
+ extra.append(header_offset)
header_offset = 0xffffffff
- else:
- header_offset = zinfo.header_offset
extra_data = zinfo.extra
min_version = 0
@@ -1678,7 +1678,7 @@ class ZipFile:
# Write end-of-zip-archive record
centDirCount = len(self.filelist)
centDirSize = pos2 - self.start_dir
- centDirOffset = self.start_dir
+ centDirOffset = self.start_dir - self._start_disk
requires_zip64 = None
if centDirCount > ZIP_FILECOUNT_LIMIT:
requires_zip64 = "Files count"
diff --git a/Misc/NEWS b/Misc/NEWS
index 369aa8b..36a7f30 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -92,6 +92,10 @@ Core and Builtins
Library
-------
+- Issue #26293: Fixed writing ZIP files that starts not from the start of the
+ file. Offsets in ZIP file now are relative to the start of the archive in
+ conforming to the specification.
+
- Issue #28321: Fixed writing non-BMP characters with binary format in plistlib.
- Issue #28322: Fixed possible crashes when unpickle itertools objects from