summaryrefslogtreecommitdiffstats
path: root/Lib/zipfile.py
diff options
context:
space:
mode:
Diffstat (limited to 'Lib/zipfile.py')
-rw-r--r--Lib/zipfile.py444
1 files changed, 251 insertions, 193 deletions
diff --git a/Lib/zipfile.py b/Lib/zipfile.py
index 2476717..e2ae042 100644
--- a/Lib/zipfile.py
+++ b/Lib/zipfile.py
@@ -371,7 +371,7 @@ class ZipInfo (object):
result.append(' filemode=%r' % stat.filemode(hi))
if lo:
result.append(' external_attr=%#x' % lo)
- isdir = self.filename[-1:] == '/'
+ isdir = self.is_dir()
if not isdir or self.file_size:
result.append(' file_size=%r' % self.file_size)
if ((not isdir or self.compress_size) and
@@ -449,7 +449,7 @@ class ZipInfo (object):
elif ln == 0:
counts = ()
else:
- raise RuntimeError("Corrupt extra field %s"%(ln,))
+ raise BadZipFile("Corrupt extra field %04x (size=%d)" % (tp, ln))
idx = 0
@@ -469,6 +469,42 @@ class ZipInfo (object):
extra = extra[ln+4:]
+ @classmethod
+ def from_file(cls, filename, arcname=None):
+ """Construct an appropriate ZipInfo for a file on the filesystem.
+
+ filename should be the path to a file or directory on the filesystem.
+
+ arcname is the name which it will have within the archive (by default,
+ this will be the same as filename, but without a drive letter and with
+ leading path separators removed).
+ """
+ st = os.stat(filename)
+ isdir = stat.S_ISDIR(st.st_mode)
+ mtime = time.localtime(st.st_mtime)
+ date_time = mtime[0:6]
+ # Create ZipInfo instance to store file information
+ if arcname is None:
+ arcname = filename
+ arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
+ while arcname[0] in (os.sep, os.altsep):
+ arcname = arcname[1:]
+ if isdir:
+ arcname += '/'
+ zinfo = cls(arcname, date_time)
+ zinfo.external_attr = (st.st_mode & 0xFFFF) << 16 # Unix attributes
+ if isdir:
+ zinfo.file_size = 0
+ zinfo.external_attr |= 0x10 # MS-DOS directory flag
+ else:
+ zinfo.file_size = st.st_size
+
+ return zinfo
+
+ def is_dir(self):
+ """Return True if this archive member is a directory."""
+ return self.filename[-1] == '/'
+
class _ZipDecrypter:
"""Class to handle decryption of files stored within a ZIP archive.
@@ -618,7 +654,7 @@ def _check_compression(compression):
raise RuntimeError(
"Compression requires the (missing) lzma module")
else:
- raise RuntimeError("That compression method is not supported")
+ raise NotImplementedError("That compression method is not supported")
def _get_compressor(compress_type):
@@ -651,14 +687,19 @@ def _get_decompressor(compress_type):
class _SharedFile:
- def __init__(self, file, pos, close, lock):
+ def __init__(self, file, pos, close, lock, writing):
self._file = file
self._pos = pos
self._close = close
self._lock = lock
+ self._writing = writing
def read(self, n=-1):
with self._lock:
+ if self._writing():
+ raise ValueError("Can't read from the ZIP file while there "
+ "is an open writing handle on it. "
+ "Close the writing handle before trying to read.")
self._file.seek(self._pos)
data = self._file.read(n)
self._pos = self._file.tell()
@@ -702,9 +743,6 @@ class ZipExtFile(io.BufferedIOBase):
# Read from compressed files in 4k blocks.
MIN_READ_SIZE = 4096
- # Search for universal newlines or line chunks.
- PATTERN = re.compile(br'^(?P<chunk>[^\r\n]+)|(?P<newline>\n|\r\n?)')
-
def __init__(self, fileobj, mode, zipinfo, decrypter=None,
close_fileobj=False):
self._fileobj = fileobj
@@ -721,7 +759,6 @@ class ZipExtFile(io.BufferedIOBase):
self._readbuffer = b''
self._offset = 0
- self._universal = 'U' in mode
self.newlines = None
# Adjust read size for encrypted files since the first 12 bytes
@@ -758,7 +795,7 @@ class ZipExtFile(io.BufferedIOBase):
If limit is specified, at most limit bytes will be read.
"""
- if not self._universal and limit < 0:
+ if limit < 0:
# Shortcut common case - newline found in buffer.
i = self._readbuffer.find(b'\n', self._offset) + 1
if i > 0:
@@ -766,41 +803,7 @@ class ZipExtFile(io.BufferedIOBase):
self._offset = i
return line
- if not self._universal:
- return io.BufferedIOBase.readline(self, limit)
-
- line = b''
- while limit < 0 or len(line) < limit:
- readahead = self.peek(2)
- if readahead == b'':
- return line
-
- #
- # Search for universal newlines or line chunks.
- #
- # The pattern returns either a line chunk or a newline, but not
- # both. Combined with peek(2), we are assured that the sequence
- # '\r\n' is always retrieved completely and never split into
- # separate newlines - '\r', '\n' due to coincidental readaheads.
- #
- match = self.PATTERN.search(readahead)
- newline = match.group('newline')
- if newline is not None:
- if self.newlines is None:
- self.newlines = []
- if newline not in self.newlines:
- self.newlines.append(newline)
- self._offset += len(newline)
- return line + b'\n'
-
- chunk = match.group('chunk')
- if limit >= 0:
- chunk = chunk[: limit - len(line)]
-
- self._offset += len(chunk)
- line += chunk
-
- return line
+ return io.BufferedIOBase.readline(self, limit)
def peek(self, n=1):
"""Returns buffered bytes without advancing the position."""
@@ -958,6 +961,76 @@ class ZipExtFile(io.BufferedIOBase):
super().close()
+class _ZipWriteFile(io.BufferedIOBase):
+ def __init__(self, zf, zinfo, zip64):
+ self._zinfo = zinfo
+ self._zip64 = zip64
+ self._zipfile = zf
+ self._compressor = _get_compressor(zinfo.compress_type)
+ self._file_size = 0
+ self._compress_size = 0
+ self._crc = 0
+
+ @property
+ def _fileobj(self):
+ return self._zipfile.fp
+
+ def writable(self):
+ return True
+
+ def write(self, data):
+ nbytes = len(data)
+ self._file_size += nbytes
+ self._crc = crc32(data, self._crc)
+ if self._compressor:
+ data = self._compressor.compress(data)
+ self._compress_size += len(data)
+ self._fileobj.write(data)
+ return nbytes
+
+ def close(self):
+ super().close()
+ # Flush any data from the compressor, and update header info
+ if self._compressor:
+ buf = self._compressor.flush()
+ self._compress_size += len(buf)
+ self._fileobj.write(buf)
+ self._zinfo.compress_size = self._compress_size
+ else:
+ self._zinfo.compress_size = self._file_size
+ self._zinfo.CRC = self._crc
+ self._zinfo.file_size = self._file_size
+
+ # Write updated header info
+ if self._zinfo.flag_bits & 0x08:
+ # Write CRC and file sizes after the file data
+ fmt = '<LQQ' if self._zip64 else '<LLL'
+ self._fileobj.write(struct.pack(fmt, self._zinfo.CRC,
+ self._zinfo.compress_size, self._zinfo.file_size))
+ self._zipfile.start_dir = self._fileobj.tell()
+ else:
+ if not self._zip64:
+ if self._file_size > ZIP64_LIMIT:
+ raise RuntimeError('File size unexpectedly exceeded ZIP64 '
+ 'limit')
+ if self._compress_size > ZIP64_LIMIT:
+ raise RuntimeError('Compressed size unexpectedly exceeded '
+ 'ZIP64 limit')
+ # Seek backwards and write file header (which will now include
+ # correct CRC and file sizes)
+
+ # Preserve current position in file
+ self._zipfile.start_dir = self._fileobj.tell()
+ self._fileobj.seek(self._zinfo.header_offset)
+ self._fileobj.write(self._zinfo.FileHeader(self._zip64))
+ self._fileobj.seek(self._zipfile.start_dir)
+
+ self._zipfile._writing = False
+
+ # Successfully written: Add file to our caches
+ self._zipfile.filelist.append(self._zinfo)
+ self._zipfile.NameToInfo[self._zinfo.filename] = self._zinfo
+
class ZipFile:
""" Class with methods to open, read, write, close, list zip files.
@@ -982,7 +1055,7 @@ class ZipFile:
"""Open the ZIP file with mode read 'r', write 'w', exclusive create 'x',
or append 'a'."""
if mode not in ('r', 'w', 'x', 'a'):
- raise RuntimeError("ZipFile requires mode 'r', 'w', 'x', or 'a'")
+ raise ValueError("ZipFile requires mode 'r', 'w', 'x', or 'a'")
_check_compression(compression)
@@ -1020,6 +1093,7 @@ class ZipFile:
self._fileRefCnt = 1
self._lock = threading.RLock()
self._seekable = True
+ self._writing = False
try:
if mode == 'r':
@@ -1055,7 +1129,7 @@ class ZipFile:
self._didModify = True
self.start_dir = self._start_disk = self.fp.tell()
else:
- raise RuntimeError("Mode must be 'r', 'w', 'x', or 'a'")
+ raise ValueError("Mode must be 'r', 'w', 'x', or 'a'")
except:
fp = self.fp
self.fp = None
@@ -1204,7 +1278,7 @@ class ZipFile:
def setpassword(self, pwd):
"""Set default password for encrypted files."""
if pwd and not isinstance(pwd, bytes):
- raise TypeError("pwd: expected bytes, got %s" % type(pwd))
+ raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)
if pwd:
self.pwd = pwd
else:
@@ -1218,7 +1292,7 @@ class ZipFile:
@comment.setter
def comment(self, comment):
if not isinstance(comment, bytes):
- raise TypeError("comment: expected bytes, got %s" % type(comment))
+ raise TypeError("comment: expected bytes, got %s" % type(comment).__name__)
# check for valid comment length
if len(comment) > ZIP_MAX_COMMENT:
import warnings
@@ -1233,30 +1307,55 @@ class ZipFile:
with self.open(name, "r", pwd) as fp:
return fp.read()
- def open(self, name, mode="r", pwd=None):
- """Return file-like object for 'name'."""
- if mode not in ("r", "U", "rU"):
- raise RuntimeError('open() requires mode "r", "U", or "rU"')
- if 'U' in mode:
- import warnings
- warnings.warn("'U' mode is deprecated",
- DeprecationWarning, 2)
+ def open(self, name, mode="r", pwd=None, *, force_zip64=False):
+ """Return file-like object for 'name'.
+
+ name is a string for the file name within the ZIP file, or a ZipInfo
+ object.
+
+ mode should be 'r' to read a file already in the ZIP file, or 'w' to
+ write to a file newly added to the archive.
+
+ pwd is the password to decrypt files (only used for reading).
+
+ When writing, if the file size is not known in advance but may exceed
+ 2 GiB, pass force_zip64 to use the ZIP64 format, which can handle large
+ files. If the size is known in advance, it is best to pass a ZipInfo
+ instance for name, with zinfo.file_size set.
+ """
+ if mode not in {"r", "w"}:
+ raise ValueError('open() requires mode "r" or "w"')
if pwd and not isinstance(pwd, bytes):
- raise TypeError("pwd: expected bytes, got %s" % type(pwd))
+ raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)
+ if pwd and (mode == "w"):
+ raise ValueError("pwd is only supported for reading files")
if not self.fp:
- raise RuntimeError(
- "Attempt to read ZIP archive that was already closed")
+ raise ValueError(
+ "Attempt to use ZIP archive that was already closed")
# Make sure we have an info object
if isinstance(name, ZipInfo):
# 'name' is already an info object
zinfo = name
+ elif mode == 'w':
+ zinfo = ZipInfo(name)
+ zinfo.compress_type = self.compression
else:
# Get info object for name
zinfo = self.getinfo(name)
+ if mode == 'w':
+ return self._open_to_write(zinfo, force_zip64=force_zip64)
+
+ if self._writing:
+ raise ValueError("Can't read from the ZIP file while there "
+ "is an open writing handle on it. "
+ "Close the writing handle before trying to read.")
+
+ # Open for reading:
self._fileRefCnt += 1
- zef_file = _SharedFile(self.fp, zinfo.header_offset, self._fpclose, self._lock)
+ zef_file = _SharedFile(self.fp, zinfo.header_offset,
+ self._fpclose, self._lock, lambda: self._writing)
try:
# Skip the file header:
fheader = zef_file.read(sizeFileHeader)
@@ -1296,7 +1395,7 @@ class ZipFile:
if not pwd:
pwd = self.pwd
if not pwd:
- raise RuntimeError("File %s is encrypted, password "
+ raise RuntimeError("File %r is encrypted, password "
"required for extraction" % name)
zd = _ZipDecrypter(pwd)
@@ -1314,13 +1413,56 @@ class ZipFile:
# compare against the CRC otherwise
check_byte = (zinfo.CRC >> 24) & 0xff
if h[11] != check_byte:
- raise RuntimeError("Bad password for file", name)
+ raise RuntimeError("Bad password for file %r" % name)
return ZipExtFile(zef_file, mode, zinfo, zd, True)
except:
zef_file.close()
raise
+ def _open_to_write(self, zinfo, force_zip64=False):
+ if force_zip64 and not self._allowZip64:
+ raise ValueError(
+ "force_zip64 is True, but allowZip64 was False when opening "
+ "the ZIP file."
+ )
+ if self._writing:
+ raise ValueError("Can't write to the ZIP file while there is "
+ "another write handle open on it. "
+ "Close the first handle before opening another.")
+
+ # Sizes and CRC are overwritten with correct data after processing the file
+ if not hasattr(zinfo, 'file_size'):
+ zinfo.file_size = 0
+ zinfo.compress_size = 0
+ zinfo.CRC = 0
+
+ zinfo.flag_bits = 0x00
+ if zinfo.compress_type == ZIP_LZMA:
+ # Compressed data includes an end-of-stream (EOS) marker
+ zinfo.flag_bits |= 0x02
+ if not self._seekable:
+ zinfo.flag_bits |= 0x08
+
+ if not zinfo.external_attr:
+ zinfo.external_attr = 0o600 << 16 # permissions: ?rw-------
+
+ # Compressed size can be larger than uncompressed size
+ zip64 = self._allowZip64 and \
+ (force_zip64 or zinfo.file_size * 1.05 > ZIP64_LIMIT)
+
+ if self._seekable:
+ self.fp.seek(self.start_dir)
+ zinfo.header_offset = self.fp.tell()
+
+ self._writecheck(zinfo)
+ self._didModify = True
+
+ self.fp.write(zinfo.FileHeader(zip64))
+
+ self._writing = True
+ return _ZipWriteFile(self, zinfo, zip64)
+
def extract(self, member, path=None, pwd=None):
"""Extract a member from the archive to the current working directory,
using its full name. Its file information is extracted as accurately
@@ -1390,7 +1532,7 @@ class ZipFile:
if upperdirs and not os.path.exists(upperdirs):
os.makedirs(upperdirs)
- if member.filename[-1] == '/':
+ if member.is_dir():
if not os.path.isdir(targetpath):
os.mkdir(targetpath)
return targetpath
@@ -1407,9 +1549,9 @@ class ZipFile:
import warnings
warnings.warn('Duplicate name: %r' % zinfo.filename, stacklevel=3)
if self.mode not in ('w', 'x', 'a'):
- raise RuntimeError("write() requires mode 'w', 'x', or 'a'")
+ raise ValueError("write() requires mode 'w', 'x', or 'a'")
if not self.fp:
- raise RuntimeError(
+ raise ValueError(
"Attempt to write ZIP archive that was already closed")
_check_compression(zinfo.compress_type)
if not self._allowZip64:
@@ -1428,105 +1570,43 @@ class ZipFile:
"""Put the bytes from filename into the archive under the name
arcname."""
if not self.fp:
- raise RuntimeError(
+ raise ValueError(
"Attempt to write to ZIP archive that was already closed")
+ if self._writing:
+ raise ValueError(
+ "Can't write to ZIP archive while an open writing handle exists"
+ )
- st = os.stat(filename)
- isdir = stat.S_ISDIR(st.st_mode)
- mtime = time.localtime(st.st_mtime)
- date_time = mtime[0:6]
- # Create ZipInfo instance to store file information
- if arcname is None:
- arcname = filename
- arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
- while arcname[0] in (os.sep, os.altsep):
- arcname = arcname[1:]
- if isdir:
- arcname += '/'
- zinfo = ZipInfo(arcname, date_time)
- zinfo.external_attr = (st[0] & 0xFFFF) << 16 # Unix attributes
- if isdir:
- zinfo.compress_type = ZIP_STORED
- elif compress_type is None:
- zinfo.compress_type = self.compression
+ zinfo = ZipInfo.from_file(filename, arcname)
+
+ if zinfo.is_dir():
+ zinfo.compress_size = 0
+ zinfo.CRC = 0
else:
- zinfo.compress_type = compress_type
+ if compress_type is not None:
+ zinfo.compress_type = compress_type
+ else:
+ zinfo.compress_type = self.compression
- zinfo.file_size = st.st_size
- zinfo.flag_bits = 0x00
- with self._lock:
- if self._seekable:
- self.fp.seek(self.start_dir)
- zinfo.header_offset = self.fp.tell() # Start of header bytes
- if zinfo.compress_type == ZIP_LZMA:
+ if zinfo.is_dir():
+ with self._lock:
+ if self._seekable:
+ self.fp.seek(self.start_dir)
+ zinfo.header_offset = self.fp.tell() # Start of header bytes
+ if zinfo.compress_type == ZIP_LZMA:
# Compressed data includes an end-of-stream (EOS) marker
- zinfo.flag_bits |= 0x02
+ zinfo.flag_bits |= 0x02
- self._writecheck(zinfo)
- self._didModify = True
+ self._writecheck(zinfo)
+ self._didModify = True
- if isdir:
- zinfo.file_size = 0
- zinfo.compress_size = 0
- zinfo.CRC = 0
- zinfo.external_attr |= 0x10 # MS-DOS directory flag
self.filelist.append(zinfo)
self.NameToInfo[zinfo.filename] = zinfo
self.fp.write(zinfo.FileHeader(False))
self.start_dir = self.fp.tell()
- return
-
- cmpr = _get_compressor(zinfo.compress_type)
- if not self._seekable:
- zinfo.flag_bits |= 0x08
- with open(filename, "rb") as fp:
- # Must overwrite CRC and sizes with correct data later
- zinfo.CRC = CRC = 0
- zinfo.compress_size = compress_size = 0
- # Compressed size can be larger than uncompressed size
- zip64 = self._allowZip64 and \
- zinfo.file_size * 1.05 > ZIP64_LIMIT
- self.fp.write(zinfo.FileHeader(zip64))
- file_size = 0
- while 1:
- buf = fp.read(1024 * 8)
- if not buf:
- break
- file_size = file_size + len(buf)
- CRC = crc32(buf, CRC)
- if cmpr:
- buf = cmpr.compress(buf)
- compress_size = compress_size + len(buf)
- self.fp.write(buf)
- if cmpr:
- buf = cmpr.flush()
- compress_size = compress_size + len(buf)
- self.fp.write(buf)
- zinfo.compress_size = compress_size
- else:
- zinfo.compress_size = file_size
- zinfo.CRC = CRC
- zinfo.file_size = file_size
- if zinfo.flag_bits & 0x08:
- # Write CRC and file sizes after the file data
- fmt = '<LQQ' if zip64 else '<LLL'
- self.fp.write(struct.pack(fmt, zinfo.CRC, zinfo.compress_size,
- zinfo.file_size))
- self.start_dir = self.fp.tell()
- else:
- if not zip64 and self._allowZip64:
- if file_size > ZIP64_LIMIT:
- raise RuntimeError('File size has increased during compressing')
- if compress_size > ZIP64_LIMIT:
- raise RuntimeError('Compressed size larger than uncompressed size')
- # Seek backwards and write file header (which will now include
- # correct CRC and file sizes)
- self.start_dir = self.fp.tell() # Preserve current position in file
- self.fp.seek(zinfo.header_offset)
- self.fp.write(zinfo.FileHeader(zip64))
- self.fp.seek(self.start_dir)
- self.filelist.append(zinfo)
- self.NameToInfo[zinfo.filename] = zinfo
+ else:
+ with open(filename, "rb") as src, self.open(zinfo, 'w') as dest:
+ shutil.copyfileobj(src, dest, 1024*8)
def writestr(self, zinfo_or_arcname, data, compress_type=None):
"""Write a file into the archive. The contents is 'data', which
@@ -1549,47 +1629,20 @@ class ZipFile:
zinfo = zinfo_or_arcname
if not self.fp:
- raise RuntimeError(
+ raise ValueError(
"Attempt to write to ZIP archive that was already closed")
+ if self._writing:
+ raise ValueError(
+ "Can't write to ZIP archive while an open writing handle exists."
+ )
+
+ if compress_type is not None:
+ zinfo.compress_type = compress_type
zinfo.file_size = len(data) # Uncompressed size
with self._lock:
- if self._seekable:
- self.fp.seek(self.start_dir)
- zinfo.header_offset = self.fp.tell() # Start of header data
- if compress_type is not None:
- zinfo.compress_type = compress_type
- zinfo.header_offset = self.fp.tell() # Start of header data
- if compress_type is not None:
- zinfo.compress_type = compress_type
- if zinfo.compress_type == ZIP_LZMA:
- # Compressed data includes an end-of-stream (EOS) marker
- zinfo.flag_bits |= 0x02
-
- self._writecheck(zinfo)
- self._didModify = True
- zinfo.CRC = crc32(data) # CRC-32 checksum
- co = _get_compressor(zinfo.compress_type)
- if co:
- data = co.compress(data) + co.flush()
- zinfo.compress_size = len(data) # Compressed size
- else:
- zinfo.compress_size = zinfo.file_size
- zip64 = zinfo.file_size > ZIP64_LIMIT or \
- zinfo.compress_size > ZIP64_LIMIT
- if zip64 and not self._allowZip64:
- raise LargeZipFile("Filesize would require ZIP64 extensions")
- self.fp.write(zinfo.FileHeader(zip64))
- self.fp.write(data)
- if zinfo.flag_bits & 0x08:
- # Write CRC and file sizes after the file data
- fmt = '<LQQ' if zip64 else '<LLL'
- self.fp.write(struct.pack(fmt, zinfo.CRC, zinfo.compress_size,
- zinfo.file_size))
- self.fp.flush()
- self.start_dir = self.fp.tell()
- self.filelist.append(zinfo)
- self.NameToInfo[zinfo.filename] = zinfo
+ with self.open(zinfo, mode='w') as dest:
+ dest.write(data)
def __del__(self):
"""Call the "close()" method in case the user forgot."""
@@ -1601,6 +1654,11 @@ class ZipFile:
if self.fp is None:
return
+ if self._writing:
+ raise ValueError("Can't close the ZIP file while there is "
+ "an open writing handle on it. "
+ "Close the writing handle before closing the zip.")
+
try:
if self.mode in ('w', 'x', 'a') and self._didModify: # write ending records
with self._lock:
@@ -1745,7 +1803,7 @@ class PyZipFile(ZipFile):
if filterfunc and not filterfunc(pathname):
if self.debug:
label = 'path' if os.path.isdir(pathname) else 'file'
- print('%s "%s" skipped by filterfunc' % (label, pathname))
+ print('%s %r skipped by filterfunc' % (label, pathname))
return
dir, name = os.path.split(pathname)
if os.path.isdir(pathname):
@@ -1776,7 +1834,7 @@ class PyZipFile(ZipFile):
elif ext == ".py":
if filterfunc and not filterfunc(path):
if self.debug:
- print('file "%s" skipped by filterfunc' % path)
+ print('file %r skipped by filterfunc' % path)
continue
fname, arcname = self._get_codename(path[0:-3],
basename)
@@ -1793,7 +1851,7 @@ class PyZipFile(ZipFile):
if ext == ".py":
if filterfunc and not filterfunc(path):
if self.debug:
- print('file "%s" skipped by filterfunc' % path)
+ print('file %r skipped by filterfunc' % path)
continue
fname, arcname = self._get_codename(path[0:-3],
basename)