summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Doc/lib/libzipfile.tex24
-rw-r--r--Lib/test/test_zipfile.py252
-rw-r--r--Lib/test/test_zipfile64.py67
-rw-r--r--Lib/zipfile.py384
-rw-r--r--Misc/NEWS1
5 files changed, 665 insertions, 63 deletions
diff --git a/Doc/lib/libzipfile.tex b/Doc/lib/libzipfile.tex
index 4e06ef6..d7c08f6 100644
--- a/Doc/lib/libzipfile.tex
+++ b/Doc/lib/libzipfile.tex
@@ -17,7 +17,8 @@ understanding of the format, as defined in
Note}.
This module does not currently handle ZIP files which have appended
-comments, or multi-disk ZIP files.
+comments, or multi-disk ZIP files. It can handle ZIP files that use the
+ZIP64 extensions (that is ZIP files that are more than 4 GByte in size).
The available attributes of this module are:
@@ -25,6 +26,11 @@ The available attributes of this module are:
The error raised for bad ZIP files.
\end{excdesc}
+\begin{excdesc}{LargeZipFile}
+ The error raised when a ZIP file would require ZIP64 functionality but that
+ has not been enabled.
+\end{excdesc}
+
\begin{classdesc*}{ZipFile}
The class for reading and writing ZIP files. See
``\citetitle{ZipFile Objects}'' (section \ref{zipfile-objects}) for
@@ -77,7 +83,7 @@ The available attributes of this module are:
\subsection{ZipFile Objects \label{zipfile-objects}}
-\begin{classdesc}{ZipFile}{file\optional{, mode\optional{, compression}}}
+\begin{classdesc}{ZipFile}{file\optional{, mode\optional{, compression\optional{, allowZip64}}}}
Open a ZIP file, where \var{file} can be either a path to a file
(a string) or a file-like object. The \var{mode} parameter
should be \code{'r'} to read an existing file, \code{'w'} to
@@ -100,6 +106,12 @@ cat myzip.zip >> python.exe
is specified but the \refmodule{zlib} module is not available,
\exception{RuntimeError} is also raised. The default is
\constant{ZIP_STORED}.
+ If \var{allowZip64} is \code{True} zipfile will create zipfiles that use
+ the ZIP64 extensions when the zipfile is larger than 2GBytes. If it is
+ false (the default) zipfile will raise an exception when the zipfile would
+ require ZIP64 extensions. ZIP64 extensions are disabled by default because
+ the default zip and unzip commands on Unix (the InfoZIP utilities) don't
+ support these extensions.
\end{classdesc}
\begin{methoddesc}{close}{}
@@ -132,8 +144,8 @@ cat myzip.zip >> python.exe
\end{methoddesc}
\begin{methoddesc}{testzip}{}
- Read all the files in the archive and check their CRC's. Return the
- name of the first bad file, or else return \code{None}.
+ Read all the files in the archive and check their CRC's and file
+ headers. Return the name of the first bad file, or else return \code{None}.
\end{methoddesc}
\begin{methoddesc}{write}{filename\optional{, arcname\optional{,
@@ -284,10 +296,6 @@ Instances have the following attributes:
Byte offset to the file header.
\end{memberdesc}
-\begin{memberdesc}[ZipInfo]{file_offset}
- Byte offset to the start of the file data.
-\end{memberdesc}
-
\begin{memberdesc}[ZipInfo]{CRC}
CRC-32 of the uncompressed file.
\end{memberdesc}
diff --git a/Lib/test/test_zipfile.py b/Lib/test/test_zipfile.py
index 0241348..a409d5c 100644
--- a/Lib/test/test_zipfile.py
+++ b/Lib/test/test_zipfile.py
@@ -4,7 +4,7 @@ try:
except ImportError:
zlib = None
-import zipfile, os, unittest
+import zipfile, os, unittest, sys, shutil
from StringIO import StringIO
from tempfile import TemporaryFile
@@ -28,14 +28,70 @@ class TestsWithSourceFile(unittest.TestCase):
zipfp = zipfile.ZipFile(f, "w", compression)
zipfp.write(TESTFN, "another"+os.extsep+"name")
zipfp.write(TESTFN, TESTFN)
+ zipfp.writestr("strfile", self.data)
zipfp.close()
# Read the ZIP archive
zipfp = zipfile.ZipFile(f, "r", compression)
self.assertEqual(zipfp.read(TESTFN), self.data)
self.assertEqual(zipfp.read("another"+os.extsep+"name"), self.data)
+ self.assertEqual(zipfp.read("strfile"), self.data)
+
+ # Print the ZIP directory
+ fp = StringIO()
+ stdout = sys.stdout
+ try:
+ sys.stdout = fp
+
+ zipfp.printdir()
+ finally:
+ sys.stdout = stdout
+
+ directory = fp.getvalue()
+ lines = directory.splitlines()
+ self.assertEquals(len(lines), 4) # Number of files + header
+
+ self.assert_('File Name' in lines[0])
+ self.assert_('Modified' in lines[0])
+ self.assert_('Size' in lines[0])
+
+ fn, date, time, size = lines[1].split()
+ self.assertEquals(fn, 'another.name')
+ # XXX: timestamp is not tested
+ self.assertEquals(size, str(len(self.data)))
+
+ # Check the namelist
+ names = zipfp.namelist()
+ self.assertEquals(len(names), 3)
+ self.assert_(TESTFN in names)
+ self.assert_("another"+os.extsep+"name" in names)
+ self.assert_("strfile" in names)
+
+ # Check infolist
+ infos = zipfp.infolist()
+ names = [ i.filename for i in infos ]
+ self.assertEquals(len(names), 3)
+ self.assert_(TESTFN in names)
+ self.assert_("another"+os.extsep+"name" in names)
+ self.assert_("strfile" in names)
+ for i in infos:
+ self.assertEquals(i.file_size, len(self.data))
+
+ # check getinfo
+ for nm in (TESTFN, "another"+os.extsep+"name", "strfile"):
+ info = zipfp.getinfo(nm)
+ self.assertEquals(info.filename, nm)
+ self.assertEquals(info.file_size, len(self.data))
+
+ # Check that testzip doesn't raise an exception
+ zipfp.testzip()
+
+
zipfp.close()
+
+
+
def testStored(self):
for f in (TESTFN2, TemporaryFile(), StringIO()):
self.zipTest(f, zipfile.ZIP_STORED)
@@ -59,6 +115,197 @@ class TestsWithSourceFile(unittest.TestCase):
os.remove(TESTFN)
os.remove(TESTFN2)
+class TestZip64InSmallFiles(unittest.TestCase):
+ # These tests test the ZIP64 functionality without using large files,
+ # see test_zipfile64 for proper tests.
+
+ def setUp(self):
+ self._limit = zipfile.ZIP64_LIMIT
+ zipfile.ZIP64_LIMIT = 5
+
+ line_gen = ("Test of zipfile line %d." % i for i in range(0, 1000))
+ self.data = '\n'.join(line_gen)
+
+ # Make a source file with some lines
+ fp = open(TESTFN, "wb")
+ fp.write(self.data)
+ fp.close()
+
+ def largeFileExceptionTest(self, f, compression):
+ zipfp = zipfile.ZipFile(f, "w", compression)
+ self.assertRaises(zipfile.LargeZipFile,
+ zipfp.write, TESTFN, "another"+os.extsep+"name")
+ zipfp.close()
+
+ def largeFileExceptionTest2(self, f, compression):
+ zipfp = zipfile.ZipFile(f, "w", compression)
+ self.assertRaises(zipfile.LargeZipFile,
+ zipfp.writestr, "another"+os.extsep+"name", self.data)
+ zipfp.close()
+
+ def testLargeFileException(self):
+ for f in (TESTFN2, TemporaryFile(), StringIO()):
+ self.largeFileExceptionTest(f, zipfile.ZIP_STORED)
+ self.largeFileExceptionTest2(f, zipfile.ZIP_STORED)
+
+ def zipTest(self, f, compression):
+ # Create the ZIP archive
+ zipfp = zipfile.ZipFile(f, "w", compression, allowZip64=True)
+ zipfp.write(TESTFN, "another"+os.extsep+"name")
+ zipfp.write(TESTFN, TESTFN)
+ zipfp.writestr("strfile", self.data)
+ zipfp.close()
+
+ # Read the ZIP archive
+ zipfp = zipfile.ZipFile(f, "r", compression)
+ self.assertEqual(zipfp.read(TESTFN), self.data)
+ self.assertEqual(zipfp.read("another"+os.extsep+"name"), self.data)
+ self.assertEqual(zipfp.read("strfile"), self.data)
+
+ # Print the ZIP directory
+ fp = StringIO()
+ stdout = sys.stdout
+ try:
+ sys.stdout = fp
+
+ zipfp.printdir()
+ finally:
+ sys.stdout = stdout
+
+ directory = fp.getvalue()
+ lines = directory.splitlines()
+ self.assertEquals(len(lines), 4) # Number of files + header
+
+ self.assert_('File Name' in lines[0])
+ self.assert_('Modified' in lines[0])
+ self.assert_('Size' in lines[0])
+
+ fn, date, time, size = lines[1].split()
+ self.assertEquals(fn, 'another.name')
+ # XXX: timestamp is not tested
+ self.assertEquals(size, str(len(self.data)))
+
+ # Check the namelist
+ names = zipfp.namelist()
+ self.assertEquals(len(names), 3)
+ self.assert_(TESTFN in names)
+ self.assert_("another"+os.extsep+"name" in names)
+ self.assert_("strfile" in names)
+
+ # Check infolist
+ infos = zipfp.infolist()
+ names = [ i.filename for i in infos ]
+ self.assertEquals(len(names), 3)
+ self.assert_(TESTFN in names)
+ self.assert_("another"+os.extsep+"name" in names)
+ self.assert_("strfile" in names)
+ for i in infos:
+ self.assertEquals(i.file_size, len(self.data))
+
+ # check getinfo
+ for nm in (TESTFN, "another"+os.extsep+"name", "strfile"):
+ info = zipfp.getinfo(nm)
+ self.assertEquals(info.filename, nm)
+ self.assertEquals(info.file_size, len(self.data))
+
+ # Check that testzip doesn't raise an exception
+ zipfp.testzip()
+
+
+ zipfp.close()
+
+ def testStored(self):
+ for f in (TESTFN2, TemporaryFile(), StringIO()):
+ self.zipTest(f, zipfile.ZIP_STORED)
+
+
+ if zlib:
+ def testDeflated(self):
+ for f in (TESTFN2, TemporaryFile(), StringIO()):
+ self.zipTest(f, zipfile.ZIP_DEFLATED)
+
+ def testAbsoluteArcnames(self):
+ zipfp = zipfile.ZipFile(TESTFN2, "w", zipfile.ZIP_STORED, allowZip64=True)
+ zipfp.write(TESTFN, "/absolute")
+ zipfp.close()
+
+ zipfp = zipfile.ZipFile(TESTFN2, "r", zipfile.ZIP_STORED)
+ self.assertEqual(zipfp.namelist(), ["absolute"])
+ zipfp.close()
+
+
+ def tearDown(self):
+ zipfile.ZIP64_LIMIT = self._limit
+ os.remove(TESTFN)
+ os.remove(TESTFN2)
+
+class PyZipFileTests(unittest.TestCase):
+ def testWritePyfile(self):
+ zipfp = zipfile.PyZipFile(TemporaryFile(), "w")
+ fn = __file__
+ if fn.endswith('.pyc') or fn.endswith('.pyo'):
+ fn = fn[:-1]
+
+ zipfp.writepy(fn)
+
+ bn = os.path.basename(fn)
+ self.assert_(bn not in zipfp.namelist())
+ self.assert_(bn + 'o' in zipfp.namelist() or bn + 'c' in zipfp.namelist())
+ zipfp.close()
+
+
+ zipfp = zipfile.PyZipFile(TemporaryFile(), "w")
+ fn = __file__
+ if fn.endswith('.pyc') or fn.endswith('.pyo'):
+ fn = fn[:-1]
+
+ zipfp.writepy(fn, "testpackage")
+
+ bn = "%s/%s"%("testpackage", os.path.basename(fn))
+ self.assert_(bn not in zipfp.namelist())
+ self.assert_(bn + 'o' in zipfp.namelist() or bn + 'c' in zipfp.namelist())
+ zipfp.close()
+
+ def testWritePythonPackage(self):
+ import email
+ packagedir = os.path.dirname(email.__file__)
+
+ zipfp = zipfile.PyZipFile(TemporaryFile(), "w")
+ zipfp.writepy(packagedir)
+
+ # Check for a couple of modules at different levels of the hieararchy
+ names = zipfp.namelist()
+ self.assert_('email/__init__.pyo' in names or 'email/__init__.pyc' in names)
+ self.assert_('email/mime/text.pyo' in names or 'email/mime/text.pyc' in names)
+
+ def testWritePythonDirectory(self):
+ os.mkdir(TESTFN2)
+ try:
+ fp = open(os.path.join(TESTFN2, "mod1.py"), "w")
+ fp.write("print 42\n")
+ fp.close()
+
+ fp = open(os.path.join(TESTFN2, "mod2.py"), "w")
+ fp.write("print 42 * 42\n")
+ fp.close()
+
+ fp = open(os.path.join(TESTFN2, "mod2.txt"), "w")
+ fp.write("bla bla bla\n")
+ fp.close()
+
+ zipfp = zipfile.PyZipFile(TemporaryFile(), "w")
+ zipfp.writepy(TESTFN2)
+
+ names = zipfp.namelist()
+ self.assert_('mod1.pyc' in names or 'mod1.pyo' in names)
+ self.assert_('mod2.pyc' in names or 'mod2.pyo' in names)
+ self.assert_('mod2.txt' not in names)
+
+ finally:
+ shutil.rmtree(TESTFN2)
+
+
+
class OtherTests(unittest.TestCase):
def testCloseErroneousFile(self):
# This test checks that the ZipFile constructor closes the file object
@@ -103,7 +350,8 @@ class OtherTests(unittest.TestCase):
self.assertRaises(RuntimeError, zipf.testzip)
def test_main():
- run_unittest(TestsWithSourceFile, OtherTests)
+ run_unittest(TestsWithSourceFile, TestZip64InSmallFiles, OtherTests, PyZipFileTests)
+ #run_unittest(TestZip64InSmallFiles)
if __name__ == "__main__":
test_main()
diff --git a/Lib/test/test_zipfile64.py b/Lib/test/test_zipfile64.py
new file mode 100644
index 0000000..c9807bf
--- /dev/null
+++ b/Lib/test/test_zipfile64.py
@@ -0,0 +1,67 @@
+# Tests of the full ZIP64 functionality of zipfile
+# The test_support.requires call is the only reason for keeping this separate
+# from test_zipfile
+from test import test_support
+test_support.requires(
+ 'largefile',
+ 'test requires loads of disk-space bytes and a long time to run'
+ )
+
+# We can test part of the module without zlib.
+try:
+ import zlib
+except ImportError:
+ zlib = None
+
+import zipfile, os, unittest
+
+from StringIO import StringIO
+from tempfile import TemporaryFile
+
+from test.test_support import TESTFN, run_unittest
+
+TESTFN2 = TESTFN + "2"
+
+class TestsWithSourceFile(unittest.TestCase):
+ def setUp(self):
+ line_gen = ("Test of zipfile line %d." % i for i in range(0, 1000000))
+ self.data = '\n'.join(line_gen)
+
+ # Make a source file with some lines
+ fp = open(TESTFN, "wb")
+ fp.write(self.data)
+ fp.close()
+
+ def zipTest(self, f, compression):
+ # Create the ZIP archive
+ filecount = int(((1 << 32) / len(self.data)) * 1.5)
+ zipfp = zipfile.ZipFile(f, "w", compression, allowZip64=True)
+
+ for num in range(filecount):
+ zipfp.writestr("testfn%d"%(num,), self.data)
+ zipfp.close()
+
+ # Read the ZIP archive
+ zipfp = zipfile.ZipFile(f, "r", compression)
+ for num in range(filecount):
+ self.assertEqual(zipfp.read("testfn%d"%(num,)), self.data)
+ zipfp.close()
+
+ def testStored(self):
+ for f in (TESTFN2, TemporaryFile()):
+ self.zipTest(f, zipfile.ZIP_STORED)
+
+ if zlib:
+ def testDeflated(self):
+ for f in (TESTFN2, TemporaryFile()):
+ self.zipTest(f, zipfile.ZIP_DEFLATED)
+
+ def tearDown(self):
+ os.remove(TESTFN)
+ os.remove(TESTFN2)
+
+def test_main():
+ run_unittest(TestsWithSourceFile)
+
+if __name__ == "__main__":
+ test_main()
diff --git a/Lib/zipfile.py b/Lib/zipfile.py
index 168d245..2cdbc6f 100644
--- a/Lib/zipfile.py
+++ b/Lib/zipfile.py
@@ -1,7 +1,8 @@
-"Read and write ZIP files."
-
+"""
+Read and write ZIP files.
+"""
import struct, os, time, sys
-import binascii
+import binascii, cStringIO
try:
import zlib # We may need its compression method
@@ -9,12 +10,22 @@ except ImportError:
zlib = None
__all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
- "ZipInfo", "ZipFile", "PyZipFile"]
+ "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ]
class BadZipfile(Exception):
pass
+
+
+class LargeZipFile(Exception):
+ """
+ Raised when writing a zipfile, the zipfile requires ZIP64 extensions
+ and those extensions are disabled.
+ """
+
error = BadZipfile # The exception raised by this module
+ZIP64_LIMIT= (1 << 31) - 1
+
# constants for Zip file compression methods
ZIP_STORED = 0
ZIP_DEFLATED = 8
@@ -27,6 +38,11 @@ structCentralDir = "<4s4B4HlLL5HLl"# 19 items, central directory, 46 bytes
stringCentralDir = "PK\001\002" # magic number for central directory
structFileHeader = "<4s2B4HlLL2H" # 12 items, file header record, 30 bytes
stringFileHeader = "PK\003\004" # magic number for file header
+structEndArchive64Locator = "<4slql" # 4 items, locate Zip64 header, 20 bytes
+stringEndArchive64Locator = "PK\x06\x07" # magic token for locator header
+structEndArchive64 = "<4sqhhllqqqq" # 10 items, end of archive (Zip64), 56 bytes
+stringEndArchive64 = "PK\x06\x06" # magic token for Zip64 header
+
# indexes of entries in the central directory structure
_CD_SIGNATURE = 0
@@ -75,6 +91,40 @@ def is_zipfile(filename):
pass
return False
+def _EndRecData64(fpin, offset, endrec):
+ """
+ Read the ZIP64 end-of-archive records and use that to update endrec
+ """
+ locatorSize = struct.calcsize(structEndArchive64Locator)
+ fpin.seek(offset - locatorSize, 2)
+ data = fpin.read(locatorSize)
+ sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
+ if sig != stringEndArchive64Locator:
+ return endrec
+
+ if diskno != 0 or disks != 1:
+ raise BadZipfile("zipfiles that span multiple disks are not supported")
+
+ # Assume no 'zip64 extensible data'
+ endArchiveSize = struct.calcsize(structEndArchive64)
+ fpin.seek(offset - locatorSize - endArchiveSize, 2)
+ data = fpin.read(endArchiveSize)
+ sig, sz, create_version, read_version, disk_num, disk_dir, \
+ dircount, dircount2, dirsize, diroffset = \
+ struct.unpack(structEndArchive64, data)
+ if sig != stringEndArchive64:
+ return endrec
+
+ # Update the original endrec using data from the ZIP64 record
+ endrec[1] = disk_num
+ endrec[2] = disk_dir
+ endrec[3] = dircount
+ endrec[4] = dircount2
+ endrec[5] = dirsize
+ endrec[6] = diroffset
+ return endrec
+
+
def _EndRecData(fpin):
"""Return data from the "End of Central Directory" record, or None.
@@ -88,6 +138,8 @@ def _EndRecData(fpin):
endrec = list(endrec)
endrec.append("") # Append the archive comment
endrec.append(filesize - 22) # Append the record start offset
+ if endrec[-4] == -1 or endrec[-4] == 0xffffffff:
+ return _EndRecData64(fpin, -22, endrec)
return endrec
# Search the last END_BLOCK bytes of the file for the record signature.
# The comment is appended to the ZIP file and has a 16 bit length.
@@ -106,25 +158,50 @@ def _EndRecData(fpin):
# Append the archive comment and start offset
endrec.append(comment)
endrec.append(filesize - END_BLOCK + start)
+ if endrec[-4] == -1 or endrec[-4] == 0xffffffff:
+ return _EndRecData64(fpin, - END_BLOCK + start, endrec)
return endrec
return # Error, return None
-class ZipInfo:
+class ZipInfo (object):
"""Class with attributes describing each file in the ZIP archive."""
+ __slots__ = (
+ 'orig_filename',
+ 'filename',
+ 'date_time',
+ 'compress_type',
+ 'comment',
+ 'extra',
+ 'create_system',
+ 'create_version',
+ 'extract_version',
+ 'reserved',
+ 'flag_bits',
+ 'volume',
+ 'internal_attr',
+ 'external_attr',
+ 'header_offset',
+ 'CRC',
+ 'compress_size',
+ 'file_size',
+ )
+
def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
self.orig_filename = filename # Original file name in archive
-# Terminate the file name at the first null byte. Null bytes in file
-# names are used as tricks by viruses in archives.
+
+ # Terminate the file name at the first null byte. Null bytes in file
+ # names are used as tricks by viruses in archives.
null_byte = filename.find(chr(0))
if null_byte >= 0:
filename = filename[0:null_byte]
-# This is used to ensure paths in generated ZIP files always use
-# forward slashes as the directory separator, as required by the
-# ZIP format specification.
- if os.sep != "/":
+ # This is used to ensure paths in generated ZIP files always use
+ # forward slashes as the directory separator, as required by the
+ # ZIP format specification.
+ if os.sep != "/" and os.sep in filename:
filename = filename.replace(os.sep, "/")
+
self.filename = filename # Normalized file name
self.date_time = date_time # year, month, day, hour, min, sec
# Standard values:
@@ -145,7 +222,6 @@ class ZipInfo:
self.external_attr = 0 # External file attributes
# Other attributes are set by class ZipFile:
# header_offset Byte offset to the file header
- # file_offset Byte offset to the start of the file data
# CRC CRC-32 of the uncompressed file
# compress_size Size of the compressed file
# file_size Size of the uncompressed file
@@ -162,29 +238,85 @@ class ZipInfo:
CRC = self.CRC
compress_size = self.compress_size
file_size = self.file_size
+
+ extra = self.extra
+
+ if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
+ # File is larger than what fits into a 4 byte integer,
+ # fall back to the ZIP64 extension
+ fmt = '<hhqq'
+ extra = extra + struct.pack(fmt,
+ 1, struct.calcsize(fmt)-4, file_size, compress_size)
+ file_size = 0xffffffff # -1
+ compress_size = 0xffffffff # -1
+ self.extract_version = max(45, self.extract_version)
+ self.create_version = max(45, self.extract_version)
+
header = struct.pack(structFileHeader, stringFileHeader,
self.extract_version, self.reserved, self.flag_bits,
self.compress_type, dostime, dosdate, CRC,
compress_size, file_size,
- len(self.filename), len(self.extra))
- return header + self.filename + self.extra
+ len(self.filename), len(extra))
+ return header + self.filename + extra
+
+ def _decodeExtra(self):
+ # Try to decode the extra field.
+ extra = self.extra
+ unpack = struct.unpack
+ while extra:
+ tp, ln = unpack('<hh', extra[:4])
+ if tp == 1:
+ if ln >= 24:
+ counts = unpack('<qqq', extra[4:28])
+ elif ln == 16:
+ counts = unpack('<qq', extra[4:20])
+ elif ln == 8:
+ counts = unpack('<q', extra[4:12])
+ elif ln == 0:
+ counts = ()
+ else:
+ raise RuntimeError, "Corrupt extra field %s"%(ln,)
+
+ idx = 0
+
+ # ZIP64 extension (large files and/or large archives)
+ if self.file_size == -1 or self.file_size == 0xFFFFFFFFL:
+ self.file_size = counts[idx]
+ idx += 1
+ if self.compress_size == -1 or self.compress_size == 0xFFFFFFFFL:
+ self.compress_size = counts[idx]
+ idx += 1
+
+ if self.header_offset == -1 or self.header_offset == 0xffffffffL:
+ old = self.header_offset
+ self.header_offset = counts[idx]
+ idx+=1
+
+ extra = extra[ln+4:]
+
class ZipFile:
""" Class with methods to open, read, write, close, list zip files.
- z = ZipFile(file, mode="r", compression=ZIP_STORED)
+ z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True)
file: Either the path to the file, or a file-like object.
If it is a path, the file will be opened and closed by ZipFile.
mode: The mode can be either read "r", write "w" or append "a".
compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
+ allowZip64: if True ZipFile will create files with ZIP64 extensions when
+ needed, otherwise it will raise an exception when this would
+ be necessary.
+
"""
fp = None # Set here since __del__ checks it
- def __init__(self, file, mode="r", compression=ZIP_STORED):
+ def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
"""Open the ZIP file with mode read "r", write "w" or append "a"."""
+ self._allowZip64 = allowZip64
+ self._didModify = False
if compression == ZIP_STORED:
pass
elif compression == ZIP_DEFLATED:
@@ -250,7 +382,10 @@ class ZipFile:
offset_cd = endrec[6] # offset of central directory
self.comment = endrec[8] # archive comment
# endrec[9] is the offset of the "End of Central Dir" record
- x = endrec[9] - size_cd
+ if endrec[9] > ZIP64_LIMIT:
+ x = endrec[9] - size_cd - 56 - 20
+ else:
+ x = endrec[9] - size_cd
# "concat" is zero, unless zip was concatenated to another file
concat = x - offset_cd
if self.debug > 2:
@@ -258,6 +393,8 @@ class ZipFile:
# self.start_dir: Position of start of central directory
self.start_dir = offset_cd + concat
fp.seek(self.start_dir, 0)
+ data = fp.read(size_cd)
+ fp = cStringIO.StringIO(data)
total = 0
while total < size_cd:
centdir = fp.read(46)
@@ -275,8 +412,7 @@ class ZipFile:
total = (total + centdir[_CD_FILENAME_LENGTH]
+ centdir[_CD_EXTRA_FIELD_LENGTH]
+ centdir[_CD_COMMENT_LENGTH])
- x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET] + concat
- # file_offset must be computed below...
+ x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
(x.create_version, x.create_system, x.extract_version, x.reserved,
x.flag_bits, x.compress_type, t, d,
x.CRC, x.compress_size, x.file_size) = centdir[1:12]
@@ -284,28 +420,14 @@ class ZipFile:
# Convert date/time code to (year, month, day, hour, min, sec)
x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
+
+ x._decodeExtra()
+ x.header_offset = x.header_offset + concat
self.filelist.append(x)
self.NameToInfo[x.filename] = x
if self.debug > 2:
print "total", total
- for data in self.filelist:
- fp.seek(data.header_offset, 0)
- fheader = fp.read(30)
- if fheader[0:4] != stringFileHeader:
- raise BadZipfile, "Bad magic number for file header"
- fheader = struct.unpack(structFileHeader, fheader)
- # file_offset is computed here, since the extra field for
- # the central directory and for the local file header
- # refer to different fields, and they can have different
- # lengths
- data.file_offset = (data.header_offset + 30
- + fheader[_FH_FILENAME_LENGTH]
- + fheader[_FH_EXTRA_FIELD_LENGTH])
- fname = fp.read(fheader[_FH_FILENAME_LENGTH])
- if fname != data.orig_filename:
- raise RuntimeError, \
- 'File name in directory "%s" and header "%s" differ.' % (
- data.orig_filename, fname)
+
def namelist(self):
"""Return a list of file names in the archive."""
@@ -334,6 +456,7 @@ class ZipFile:
except BadZipfile:
return zinfo.filename
+
def getinfo(self, name):
"""Return the instance of ZipInfo given 'name'."""
return self.NameToInfo[name]
@@ -347,7 +470,24 @@ class ZipFile:
"Attempt to read ZIP archive that was already closed"
zinfo = self.getinfo(name)
filepos = self.fp.tell()
- self.fp.seek(zinfo.file_offset, 0)
+
+ self.fp.seek(zinfo.header_offset, 0)
+
+ # Skip the file header:
+ fheader = self.fp.read(30)
+ if fheader[0:4] != stringFileHeader:
+ raise BadZipfile, "Bad magic number for file header"
+
+ fheader = struct.unpack(structFileHeader, fheader)
+ fname = self.fp.read(fheader[_FH_FILENAME_LENGTH])
+ if fheader[_FH_EXTRA_FIELD_LENGTH]:
+ self.fp.read(fheader[_FH_EXTRA_FIELD_LENGTH])
+
+ if fname != zinfo.orig_filename:
+ raise BadZipfile, \
+ 'File name in directory "%s" and header "%s" differ.' % (
+ zinfo.orig_filename, fname)
+
bytes = self.fp.read(zinfo.compress_size)
self.fp.seek(filepos, 0)
if zinfo.compress_type == ZIP_STORED:
@@ -388,6 +528,12 @@ class ZipFile:
if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
raise RuntimeError, \
"That compression method is not supported"
+ if zinfo.file_size > ZIP64_LIMIT:
+ if not self._allowZip64:
+ raise LargeZipFile("Filesize would require ZIP64 extensions")
+ if zinfo.header_offset > ZIP64_LIMIT:
+ if not self._allowZip64:
+ raise LargeZipFile("Zipfile size would require ZIP64 extensions")
def write(self, filename, arcname=None, compress_type=None):
"""Put the bytes from filename into the archive under the name
@@ -407,16 +553,19 @@ class ZipFile:
zinfo.compress_type = self.compression
else:
zinfo.compress_type = compress_type
- self._writecheck(zinfo)
- fp = open(filename, "rb")
+
+ zinfo.file_size = st.st_size
zinfo.flag_bits = 0x00
zinfo.header_offset = self.fp.tell() # Start of header bytes
+
+ self._writecheck(zinfo)
+ self._didModify = True
+ fp = open(filename, "rb")
# Must overwrite CRC and sizes with correct data later
zinfo.CRC = CRC = 0
zinfo.compress_size = compress_size = 0
zinfo.file_size = file_size = 0
self.fp.write(zinfo.FileHeader())
- zinfo.file_offset = self.fp.tell() # Start of file bytes
if zinfo.compress_type == ZIP_DEFLATED:
cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
zlib.DEFLATED, -15)
@@ -461,8 +610,10 @@ class ZipFile:
zinfo.compress_type = self.compression
else:
zinfo = zinfo_or_arcname
- self._writecheck(zinfo)
zinfo.file_size = len(bytes) # Uncompressed size
+ zinfo.header_offset = self.fp.tell() # Start of header bytes
+ self._writecheck(zinfo)
+ self._didModify = True
zinfo.CRC = binascii.crc32(bytes) # CRC-32 checksum
if zinfo.compress_type == ZIP_DEFLATED:
co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
@@ -473,8 +624,8 @@ class ZipFile:
zinfo.compress_size = zinfo.file_size
zinfo.header_offset = self.fp.tell() # Start of header bytes
self.fp.write(zinfo.FileHeader())
- zinfo.file_offset = self.fp.tell() # Start of file bytes
self.fp.write(bytes)
+ self.fp.flush()
if zinfo.flag_bits & 0x08:
# Write CRC and file sizes after the file data
self.fp.write(struct.pack("<lLL", zinfo.CRC, zinfo.compress_size,
@@ -491,7 +642,8 @@ class ZipFile:
records."""
if self.fp is None:
return
- if self.mode in ("w", "a"): # write ending records
+
+ if self.mode in ("w", "a") and self._didModify: # write ending records
count = 0
pos1 = self.fp.tell()
for zinfo in self.filelist: # write central directory
@@ -499,23 +651,72 @@ class ZipFile:
dt = zinfo.date_time
dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
+ extra = []
+ if zinfo.file_size > ZIP64_LIMIT \
+ or zinfo.compress_size > ZIP64_LIMIT:
+ extra.append(zinfo.file_size)
+ extra.append(zinfo.compress_size)
+ file_size = 0xffffffff #-1
+ compress_size = 0xffffffff #-1
+ else:
+ file_size = zinfo.file_size
+ compress_size = zinfo.compress_size
+
+ if zinfo.header_offset > ZIP64_LIMIT:
+ extra.append(zinfo.header_offset)
+ header_offset = 0xffffffff #-1
+ else:
+ header_offset = zinfo.header_offset
+
+ extra_data = zinfo.extra
+ if extra:
+ # Append a ZIP64 field to the extra's
+ extra_data = struct.pack(
+ '<hh' + 'q'*len(extra),
+ 1, 8*len(extra), *extra) + extra_data
+
+ extract_version = max(45, zinfo.extract_version)
+ create_version = max(45, zinfo.create_version)
+ else:
+ extract_version = zinfo.extract_version
+ create_version = zinfo.create_version
+
centdir = struct.pack(structCentralDir,
- stringCentralDir, zinfo.create_version,
- zinfo.create_system, zinfo.extract_version, zinfo.reserved,
+ stringCentralDir, create_version,
+ zinfo.create_system, extract_version, zinfo.reserved,
zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
- zinfo.CRC, zinfo.compress_size, zinfo.file_size,
- len(zinfo.filename), len(zinfo.extra), len(zinfo.comment),
+ zinfo.CRC, compress_size, file_size,
+ len(zinfo.filename), len(extra_data), len(zinfo.comment),
0, zinfo.internal_attr, zinfo.external_attr,
- zinfo.header_offset)
+ header_offset)
self.fp.write(centdir)
self.fp.write(zinfo.filename)
- self.fp.write(zinfo.extra)
+ self.fp.write(extra_data)
self.fp.write(zinfo.comment)
+
pos2 = self.fp.tell()
# Write end-of-zip-archive record
- endrec = struct.pack(structEndArchive, stringEndArchive,
- 0, 0, count, count, pos2 - pos1, pos1, 0)
- self.fp.write(endrec)
+ if pos1 > ZIP64_LIMIT:
+ # Need to write the ZIP64 end-of-archive records
+ zip64endrec = struct.pack(
+ structEndArchive64, stringEndArchive64,
+ 44, 45, 45, 0, 0, count, count, pos2 - pos1, pos1)
+ self.fp.write(zip64endrec)
+
+ zip64locrec = struct.pack(
+ structEndArchive64Locator,
+ stringEndArchive64Locator, 0, pos2, 1)
+ self.fp.write(zip64locrec)
+
+ pos3 = self.fp.tell()
+ endrec = struct.pack(structEndArchive, stringEndArchive,
+ 0, 0, count, count, pos2 - pos1, 0xffffffff, 0) # -1, 0)
+ self.fp.write(endrec)
+
+ else:
+ endrec = struct.pack(structEndArchive, stringEndArchive,
+ 0, 0, count, count, pos2 - pos1, pos1, 0)
+ self.fp.write(endrec)
self.fp.flush()
if not self._filePassed:
self.fp.close()
@@ -619,3 +820,80 @@ class PyZipFile(ZipFile):
if basename:
archivename = "%s/%s" % (basename, archivename)
return (fname, archivename)
+
+
+def main(args = None):
+ import textwrap
+ USAGE=textwrap.dedent("""\
+ Usage:
+ zipfile.py -l zipfile.zip # Show listing of a zipfile
+ zipfile.py -t zipfile.zip # Test if a zipfile is valid
+ zipfile.py -e zipfile.zip target # Extract zipfile into target dir
+ zipfile.py -c zipfile.zip src ... # Create zipfile from sources
+ """)
+ if args is None:
+ args = sys.argv[1:]
+
+ if not args or args[0] not in ('-l', '-c', '-e', '-t'):
+ print USAGE
+ sys.exit(1)
+
+ if args[0] == '-l':
+ if len(args) != 2:
+ print USAGE
+ sys.exit(1)
+ zf = ZipFile(args[1], 'r')
+ zf.printdir()
+ zf.close()
+
+ elif args[0] == '-t':
+ if len(args) != 2:
+ print USAGE
+ sys.exit(1)
+ zf = ZipFile(args[1], 'r')
+ zf.testzip()
+ print "Done testing"
+
+ elif args[0] == '-e':
+ if len(args) != 3:
+ print USAGE
+ sys.exit(1)
+
+ zf = ZipFile(args[1], 'r')
+ out = args[2]
+ for path in zf.namelist():
+ if path.startswith('./'):
+ tgt = os.path.join(out, path[2:])
+ else:
+ tgt = os.path.join(out, path)
+
+ tgtdir = os.path.dirname(tgt)
+ if not os.path.exists(tgtdir):
+ os.makedirs(tgtdir)
+ fp = open(tgt, 'wb')
+ fp.write(zf.read(path))
+ fp.close()
+ zf.close()
+
+ elif args[0] == '-c':
+ if len(args) < 3:
+ print USAGE
+ sys.exit(1)
+
+ def addToZip(zf, path, zippath):
+ if os.path.isfile(path):
+ zf.write(path, zippath, ZIP_DEFLATED)
+ elif os.path.isdir(path):
+ for nm in os.listdir(path):
+ addToZip(zf,
+ os.path.join(path, nm), os.path.join(zippath, nm))
+ # else: ignore
+
+ zf = ZipFile(args[1], 'w', allowZip64=True)
+ for src in args[2:]:
+ addToZip(zf, src, os.path.basename(src))
+
+ zf.close()
+
+if __name__ == "__main__":
+ main()
diff --git a/Misc/NEWS b/Misc/NEWS
index a8099f7..c16e0db 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -152,6 +152,7 @@ Extension Modules
aborts the db transaction safely when a modifier callback fails.
Fixes SF python patch/bug #1408584.
+- Patch #1446489: add support for the ZIP64 extensions to zipfile.
Library
-------