diff options
Diffstat (limited to 'Lib/zipfile.py')
-rw-r--r-- | Lib/zipfile.py | 444 |
1 files changed, 251 insertions, 193 deletions
diff --git a/Lib/zipfile.py b/Lib/zipfile.py index 2476717..e2ae042 100644 --- a/Lib/zipfile.py +++ b/Lib/zipfile.py @@ -371,7 +371,7 @@ class ZipInfo (object): result.append(' filemode=%r' % stat.filemode(hi)) if lo: result.append(' external_attr=%#x' % lo) - isdir = self.filename[-1:] == '/' + isdir = self.is_dir() if not isdir or self.file_size: result.append(' file_size=%r' % self.file_size) if ((not isdir or self.compress_size) and @@ -449,7 +449,7 @@ class ZipInfo (object): elif ln == 0: counts = () else: - raise RuntimeError("Corrupt extra field %s"%(ln,)) + raise BadZipFile("Corrupt extra field %04x (size=%d)" % (tp, ln)) idx = 0 @@ -469,6 +469,42 @@ class ZipInfo (object): extra = extra[ln+4:] + @classmethod + def from_file(cls, filename, arcname=None): + """Construct an appropriate ZipInfo for a file on the filesystem. + + filename should be the path to a file or directory on the filesystem. + + arcname is the name which it will have within the archive (by default, + this will be the same as filename, but without a drive letter and with + leading path separators removed). + """ + st = os.stat(filename) + isdir = stat.S_ISDIR(st.st_mode) + mtime = time.localtime(st.st_mtime) + date_time = mtime[0:6] + # Create ZipInfo instance to store file information + if arcname is None: + arcname = filename + arcname = os.path.normpath(os.path.splitdrive(arcname)[1]) + while arcname[0] in (os.sep, os.altsep): + arcname = arcname[1:] + if isdir: + arcname += '/' + zinfo = cls(arcname, date_time) + zinfo.external_attr = (st.st_mode & 0xFFFF) << 16 # Unix attributes + if isdir: + zinfo.file_size = 0 + zinfo.external_attr |= 0x10 # MS-DOS directory flag + else: + zinfo.file_size = st.st_size + + return zinfo + + def is_dir(self): + """Return True if this archive member is a directory.""" + return self.filename[-1] == '/' + class _ZipDecrypter: """Class to handle decryption of files stored within a ZIP archive. @@ -618,7 +654,7 @@ def _check_compression(compression): raise RuntimeError( "Compression requires the (missing) lzma module") else: - raise RuntimeError("That compression method is not supported") + raise NotImplementedError("That compression method is not supported") def _get_compressor(compress_type): @@ -651,14 +687,19 @@ def _get_decompressor(compress_type): class _SharedFile: - def __init__(self, file, pos, close, lock): + def __init__(self, file, pos, close, lock, writing): self._file = file self._pos = pos self._close = close self._lock = lock + self._writing = writing def read(self, n=-1): with self._lock: + if self._writing(): + raise ValueError("Can't read from the ZIP file while there " + "is an open writing handle on it. " + "Close the writing handle before trying to read.") self._file.seek(self._pos) data = self._file.read(n) self._pos = self._file.tell() @@ -702,9 +743,6 @@ class ZipExtFile(io.BufferedIOBase): # Read from compressed files in 4k blocks. MIN_READ_SIZE = 4096 - # Search for universal newlines or line chunks. - PATTERN = re.compile(br'^(?P<chunk>[^\r\n]+)|(?P<newline>\n|\r\n?)') - def __init__(self, fileobj, mode, zipinfo, decrypter=None, close_fileobj=False): self._fileobj = fileobj @@ -721,7 +759,6 @@ class ZipExtFile(io.BufferedIOBase): self._readbuffer = b'' self._offset = 0 - self._universal = 'U' in mode self.newlines = None # Adjust read size for encrypted files since the first 12 bytes @@ -758,7 +795,7 @@ class ZipExtFile(io.BufferedIOBase): If limit is specified, at most limit bytes will be read. """ - if not self._universal and limit < 0: + if limit < 0: # Shortcut common case - newline found in buffer. i = self._readbuffer.find(b'\n', self._offset) + 1 if i > 0: @@ -766,41 +803,7 @@ class ZipExtFile(io.BufferedIOBase): self._offset = i return line - if not self._universal: - return io.BufferedIOBase.readline(self, limit) - - line = b'' - while limit < 0 or len(line) < limit: - readahead = self.peek(2) - if readahead == b'': - return line - - # - # Search for universal newlines or line chunks. - # - # The pattern returns either a line chunk or a newline, but not - # both. Combined with peek(2), we are assured that the sequence - # '\r\n' is always retrieved completely and never split into - # separate newlines - '\r', '\n' due to coincidental readaheads. - # - match = self.PATTERN.search(readahead) - newline = match.group('newline') - if newline is not None: - if self.newlines is None: - self.newlines = [] - if newline not in self.newlines: - self.newlines.append(newline) - self._offset += len(newline) - return line + b'\n' - - chunk = match.group('chunk') - if limit >= 0: - chunk = chunk[: limit - len(line)] - - self._offset += len(chunk) - line += chunk - - return line + return io.BufferedIOBase.readline(self, limit) def peek(self, n=1): """Returns buffered bytes without advancing the position.""" @@ -958,6 +961,76 @@ class ZipExtFile(io.BufferedIOBase): super().close() +class _ZipWriteFile(io.BufferedIOBase): + def __init__(self, zf, zinfo, zip64): + self._zinfo = zinfo + self._zip64 = zip64 + self._zipfile = zf + self._compressor = _get_compressor(zinfo.compress_type) + self._file_size = 0 + self._compress_size = 0 + self._crc = 0 + + @property + def _fileobj(self): + return self._zipfile.fp + + def writable(self): + return True + + def write(self, data): + nbytes = len(data) + self._file_size += nbytes + self._crc = crc32(data, self._crc) + if self._compressor: + data = self._compressor.compress(data) + self._compress_size += len(data) + self._fileobj.write(data) + return nbytes + + def close(self): + super().close() + # Flush any data from the compressor, and update header info + if self._compressor: + buf = self._compressor.flush() + self._compress_size += len(buf) + self._fileobj.write(buf) + self._zinfo.compress_size = self._compress_size + else: + self._zinfo.compress_size = self._file_size + self._zinfo.CRC = self._crc + self._zinfo.file_size = self._file_size + + # Write updated header info + if self._zinfo.flag_bits & 0x08: + # Write CRC and file sizes after the file data + fmt = '<LQQ' if self._zip64 else '<LLL' + self._fileobj.write(struct.pack(fmt, self._zinfo.CRC, + self._zinfo.compress_size, self._zinfo.file_size)) + self._zipfile.start_dir = self._fileobj.tell() + else: + if not self._zip64: + if self._file_size > ZIP64_LIMIT: + raise RuntimeError('File size unexpectedly exceeded ZIP64 ' + 'limit') + if self._compress_size > ZIP64_LIMIT: + raise RuntimeError('Compressed size unexpectedly exceeded ' + 'ZIP64 limit') + # Seek backwards and write file header (which will now include + # correct CRC and file sizes) + + # Preserve current position in file + self._zipfile.start_dir = self._fileobj.tell() + self._fileobj.seek(self._zinfo.header_offset) + self._fileobj.write(self._zinfo.FileHeader(self._zip64)) + self._fileobj.seek(self._zipfile.start_dir) + + self._zipfile._writing = False + + # Successfully written: Add file to our caches + self._zipfile.filelist.append(self._zinfo) + self._zipfile.NameToInfo[self._zinfo.filename] = self._zinfo + class ZipFile: """ Class with methods to open, read, write, close, list zip files. @@ -982,7 +1055,7 @@ class ZipFile: """Open the ZIP file with mode read 'r', write 'w', exclusive create 'x', or append 'a'.""" if mode not in ('r', 'w', 'x', 'a'): - raise RuntimeError("ZipFile requires mode 'r', 'w', 'x', or 'a'") + raise ValueError("ZipFile requires mode 'r', 'w', 'x', or 'a'") _check_compression(compression) @@ -1020,6 +1093,7 @@ class ZipFile: self._fileRefCnt = 1 self._lock = threading.RLock() self._seekable = True + self._writing = False try: if mode == 'r': @@ -1055,7 +1129,7 @@ class ZipFile: self._didModify = True self.start_dir = self._start_disk = self.fp.tell() else: - raise RuntimeError("Mode must be 'r', 'w', 'x', or 'a'") + raise ValueError("Mode must be 'r', 'w', 'x', or 'a'") except: fp = self.fp self.fp = None @@ -1204,7 +1278,7 @@ class ZipFile: def setpassword(self, pwd): """Set default password for encrypted files.""" if pwd and not isinstance(pwd, bytes): - raise TypeError("pwd: expected bytes, got %s" % type(pwd)) + raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__) if pwd: self.pwd = pwd else: @@ -1218,7 +1292,7 @@ class ZipFile: @comment.setter def comment(self, comment): if not isinstance(comment, bytes): - raise TypeError("comment: expected bytes, got %s" % type(comment)) + raise TypeError("comment: expected bytes, got %s" % type(comment).__name__) # check for valid comment length if len(comment) > ZIP_MAX_COMMENT: import warnings @@ -1233,30 +1307,55 @@ class ZipFile: with self.open(name, "r", pwd) as fp: return fp.read() - def open(self, name, mode="r", pwd=None): - """Return file-like object for 'name'.""" - if mode not in ("r", "U", "rU"): - raise RuntimeError('open() requires mode "r", "U", or "rU"') - if 'U' in mode: - import warnings - warnings.warn("'U' mode is deprecated", - DeprecationWarning, 2) + def open(self, name, mode="r", pwd=None, *, force_zip64=False): + """Return file-like object for 'name'. + + name is a string for the file name within the ZIP file, or a ZipInfo + object. + + mode should be 'r' to read a file already in the ZIP file, or 'w' to + write to a file newly added to the archive. + + pwd is the password to decrypt files (only used for reading). + + When writing, if the file size is not known in advance but may exceed + 2 GiB, pass force_zip64 to use the ZIP64 format, which can handle large + files. If the size is known in advance, it is best to pass a ZipInfo + instance for name, with zinfo.file_size set. + """ + if mode not in {"r", "w"}: + raise ValueError('open() requires mode "r" or "w"') if pwd and not isinstance(pwd, bytes): - raise TypeError("pwd: expected bytes, got %s" % type(pwd)) + raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__) + if pwd and (mode == "w"): + raise ValueError("pwd is only supported for reading files") if not self.fp: - raise RuntimeError( - "Attempt to read ZIP archive that was already closed") + raise ValueError( + "Attempt to use ZIP archive that was already closed") # Make sure we have an info object if isinstance(name, ZipInfo): # 'name' is already an info object zinfo = name + elif mode == 'w': + zinfo = ZipInfo(name) + zinfo.compress_type = self.compression else: # Get info object for name zinfo = self.getinfo(name) + if mode == 'w': + return self._open_to_write(zinfo, force_zip64=force_zip64) + + if self._writing: + raise ValueError("Can't read from the ZIP file while there " + "is an open writing handle on it. " + "Close the writing handle before trying to read.") + + # Open for reading: self._fileRefCnt += 1 - zef_file = _SharedFile(self.fp, zinfo.header_offset, self._fpclose, self._lock) + zef_file = _SharedFile(self.fp, zinfo.header_offset, + self._fpclose, self._lock, lambda: self._writing) try: # Skip the file header: fheader = zef_file.read(sizeFileHeader) @@ -1296,7 +1395,7 @@ class ZipFile: if not pwd: pwd = self.pwd if not pwd: - raise RuntimeError("File %s is encrypted, password " + raise RuntimeError("File %r is encrypted, password " "required for extraction" % name) zd = _ZipDecrypter(pwd) @@ -1314,13 +1413,56 @@ class ZipFile: # compare against the CRC otherwise check_byte = (zinfo.CRC >> 24) & 0xff if h[11] != check_byte: - raise RuntimeError("Bad password for file", name) + raise RuntimeError("Bad password for file %r" % name) return ZipExtFile(zef_file, mode, zinfo, zd, True) except: zef_file.close() raise + def _open_to_write(self, zinfo, force_zip64=False): + if force_zip64 and not self._allowZip64: + raise ValueError( + "force_zip64 is True, but allowZip64 was False when opening " + "the ZIP file." + ) + if self._writing: + raise ValueError("Can't write to the ZIP file while there is " + "another write handle open on it. " + "Close the first handle before opening another.") + + # Sizes and CRC are overwritten with correct data after processing the file + if not hasattr(zinfo, 'file_size'): + zinfo.file_size = 0 + zinfo.compress_size = 0 + zinfo.CRC = 0 + + zinfo.flag_bits = 0x00 + if zinfo.compress_type == ZIP_LZMA: + # Compressed data includes an end-of-stream (EOS) marker + zinfo.flag_bits |= 0x02 + if not self._seekable: + zinfo.flag_bits |= 0x08 + + if not zinfo.external_attr: + zinfo.external_attr = 0o600 << 16 # permissions: ?rw------- + + # Compressed size can be larger than uncompressed size + zip64 = self._allowZip64 and \ + (force_zip64 or zinfo.file_size * 1.05 > ZIP64_LIMIT) + + if self._seekable: + self.fp.seek(self.start_dir) + zinfo.header_offset = self.fp.tell() + + self._writecheck(zinfo) + self._didModify = True + + self.fp.write(zinfo.FileHeader(zip64)) + + self._writing = True + return _ZipWriteFile(self, zinfo, zip64) + def extract(self, member, path=None, pwd=None): """Extract a member from the archive to the current working directory, using its full name. Its file information is extracted as accurately @@ -1390,7 +1532,7 @@ class ZipFile: if upperdirs and not os.path.exists(upperdirs): os.makedirs(upperdirs) - if member.filename[-1] == '/': + if member.is_dir(): if not os.path.isdir(targetpath): os.mkdir(targetpath) return targetpath @@ -1407,9 +1549,9 @@ class ZipFile: import warnings warnings.warn('Duplicate name: %r' % zinfo.filename, stacklevel=3) if self.mode not in ('w', 'x', 'a'): - raise RuntimeError("write() requires mode 'w', 'x', or 'a'") + raise ValueError("write() requires mode 'w', 'x', or 'a'") if not self.fp: - raise RuntimeError( + raise ValueError( "Attempt to write ZIP archive that was already closed") _check_compression(zinfo.compress_type) if not self._allowZip64: @@ -1428,105 +1570,43 @@ class ZipFile: """Put the bytes from filename into the archive under the name arcname.""" if not self.fp: - raise RuntimeError( + raise ValueError( "Attempt to write to ZIP archive that was already closed") + if self._writing: + raise ValueError( + "Can't write to ZIP archive while an open writing handle exists" + ) - st = os.stat(filename) - isdir = stat.S_ISDIR(st.st_mode) - mtime = time.localtime(st.st_mtime) - date_time = mtime[0:6] - # Create ZipInfo instance to store file information - if arcname is None: - arcname = filename - arcname = os.path.normpath(os.path.splitdrive(arcname)[1]) - while arcname[0] in (os.sep, os.altsep): - arcname = arcname[1:] - if isdir: - arcname += '/' - zinfo = ZipInfo(arcname, date_time) - zinfo.external_attr = (st[0] & 0xFFFF) << 16 # Unix attributes - if isdir: - zinfo.compress_type = ZIP_STORED - elif compress_type is None: - zinfo.compress_type = self.compression + zinfo = ZipInfo.from_file(filename, arcname) + + if zinfo.is_dir(): + zinfo.compress_size = 0 + zinfo.CRC = 0 else: - zinfo.compress_type = compress_type + if compress_type is not None: + zinfo.compress_type = compress_type + else: + zinfo.compress_type = self.compression - zinfo.file_size = st.st_size - zinfo.flag_bits = 0x00 - with self._lock: - if self._seekable: - self.fp.seek(self.start_dir) - zinfo.header_offset = self.fp.tell() # Start of header bytes - if zinfo.compress_type == ZIP_LZMA: + if zinfo.is_dir(): + with self._lock: + if self._seekable: + self.fp.seek(self.start_dir) + zinfo.header_offset = self.fp.tell() # Start of header bytes + if zinfo.compress_type == ZIP_LZMA: # Compressed data includes an end-of-stream (EOS) marker - zinfo.flag_bits |= 0x02 + zinfo.flag_bits |= 0x02 - self._writecheck(zinfo) - self._didModify = True + self._writecheck(zinfo) + self._didModify = True - if isdir: - zinfo.file_size = 0 - zinfo.compress_size = 0 - zinfo.CRC = 0 - zinfo.external_attr |= 0x10 # MS-DOS directory flag self.filelist.append(zinfo) self.NameToInfo[zinfo.filename] = zinfo self.fp.write(zinfo.FileHeader(False)) self.start_dir = self.fp.tell() - return - - cmpr = _get_compressor(zinfo.compress_type) - if not self._seekable: - zinfo.flag_bits |= 0x08 - with open(filename, "rb") as fp: - # Must overwrite CRC and sizes with correct data later - zinfo.CRC = CRC = 0 - zinfo.compress_size = compress_size = 0 - # Compressed size can be larger than uncompressed size - zip64 = self._allowZip64 and \ - zinfo.file_size * 1.05 > ZIP64_LIMIT - self.fp.write(zinfo.FileHeader(zip64)) - file_size = 0 - while 1: - buf = fp.read(1024 * 8) - if not buf: - break - file_size = file_size + len(buf) - CRC = crc32(buf, CRC) - if cmpr: - buf = cmpr.compress(buf) - compress_size = compress_size + len(buf) - self.fp.write(buf) - if cmpr: - buf = cmpr.flush() - compress_size = compress_size + len(buf) - self.fp.write(buf) - zinfo.compress_size = compress_size - else: - zinfo.compress_size = file_size - zinfo.CRC = CRC - zinfo.file_size = file_size - if zinfo.flag_bits & 0x08: - # Write CRC and file sizes after the file data - fmt = '<LQQ' if zip64 else '<LLL' - self.fp.write(struct.pack(fmt, zinfo.CRC, zinfo.compress_size, - zinfo.file_size)) - self.start_dir = self.fp.tell() - else: - if not zip64 and self._allowZip64: - if file_size > ZIP64_LIMIT: - raise RuntimeError('File size has increased during compressing') - if compress_size > ZIP64_LIMIT: - raise RuntimeError('Compressed size larger than uncompressed size') - # Seek backwards and write file header (which will now include - # correct CRC and file sizes) - self.start_dir = self.fp.tell() # Preserve current position in file - self.fp.seek(zinfo.header_offset) - self.fp.write(zinfo.FileHeader(zip64)) - self.fp.seek(self.start_dir) - self.filelist.append(zinfo) - self.NameToInfo[zinfo.filename] = zinfo + else: + with open(filename, "rb") as src, self.open(zinfo, 'w') as dest: + shutil.copyfileobj(src, dest, 1024*8) def writestr(self, zinfo_or_arcname, data, compress_type=None): """Write a file into the archive. The contents is 'data', which @@ -1549,47 +1629,20 @@ class ZipFile: zinfo = zinfo_or_arcname if not self.fp: - raise RuntimeError( + raise ValueError( "Attempt to write to ZIP archive that was already closed") + if self._writing: + raise ValueError( + "Can't write to ZIP archive while an open writing handle exists." + ) + + if compress_type is not None: + zinfo.compress_type = compress_type zinfo.file_size = len(data) # Uncompressed size with self._lock: - if self._seekable: - self.fp.seek(self.start_dir) - zinfo.header_offset = self.fp.tell() # Start of header data - if compress_type is not None: - zinfo.compress_type = compress_type - zinfo.header_offset = self.fp.tell() # Start of header data - if compress_type is not None: - zinfo.compress_type = compress_type - if zinfo.compress_type == ZIP_LZMA: - # Compressed data includes an end-of-stream (EOS) marker - zinfo.flag_bits |= 0x02 - - self._writecheck(zinfo) - self._didModify = True - zinfo.CRC = crc32(data) # CRC-32 checksum - co = _get_compressor(zinfo.compress_type) - if co: - data = co.compress(data) + co.flush() - zinfo.compress_size = len(data) # Compressed size - else: - zinfo.compress_size = zinfo.file_size - zip64 = zinfo.file_size > ZIP64_LIMIT or \ - zinfo.compress_size > ZIP64_LIMIT - if zip64 and not self._allowZip64: - raise LargeZipFile("Filesize would require ZIP64 extensions") - self.fp.write(zinfo.FileHeader(zip64)) - self.fp.write(data) - if zinfo.flag_bits & 0x08: - # Write CRC and file sizes after the file data - fmt = '<LQQ' if zip64 else '<LLL' - self.fp.write(struct.pack(fmt, zinfo.CRC, zinfo.compress_size, - zinfo.file_size)) - self.fp.flush() - self.start_dir = self.fp.tell() - self.filelist.append(zinfo) - self.NameToInfo[zinfo.filename] = zinfo + with self.open(zinfo, mode='w') as dest: + dest.write(data) def __del__(self): """Call the "close()" method in case the user forgot.""" @@ -1601,6 +1654,11 @@ class ZipFile: if self.fp is None: return + if self._writing: + raise ValueError("Can't close the ZIP file while there is " + "an open writing handle on it. " + "Close the writing handle before closing the zip.") + try: if self.mode in ('w', 'x', 'a') and self._didModify: # write ending records with self._lock: @@ -1745,7 +1803,7 @@ class PyZipFile(ZipFile): if filterfunc and not filterfunc(pathname): if self.debug: label = 'path' if os.path.isdir(pathname) else 'file' - print('%s "%s" skipped by filterfunc' % (label, pathname)) + print('%s %r skipped by filterfunc' % (label, pathname)) return dir, name = os.path.split(pathname) if os.path.isdir(pathname): @@ -1776,7 +1834,7 @@ class PyZipFile(ZipFile): elif ext == ".py": if filterfunc and not filterfunc(path): if self.debug: - print('file "%s" skipped by filterfunc' % path) + print('file %r skipped by filterfunc' % path) continue fname, arcname = self._get_codename(path[0:-3], basename) @@ -1793,7 +1851,7 @@ class PyZipFile(ZipFile): if ext == ".py": if filterfunc and not filterfunc(path): if self.debug: - print('file "%s" skipped by filterfunc' % path) + print('file %r skipped by filterfunc' % path) continue fname, arcname = self._get_codename(path[0:-3], basename) |