diff options
author | Serhiy Storchaka <storchaka@gmail.com> | 2015-01-26 11:53:38 (GMT) |
---|---|---|
committer | Serhiy Storchaka <storchaka@gmail.com> | 2015-01-26 11:53:38 (GMT) |
commit | f15e52402640284b01860b6f56dfa50d7efda67b (patch) | |
tree | 29d58024caefd2a65406c3e1288746681aaf9a52 | |
parent | b67c516d8beecfde153236ccc24a53a8a6c6c16a (diff) | |
download | cpython-f15e52402640284b01860b6f56dfa50d7efda67b.zip cpython-f15e52402640284b01860b6f56dfa50d7efda67b.tar.gz cpython-f15e52402640284b01860b6f56dfa50d7efda67b.tar.bz2 |
Issue #14099: Writing to ZipFile and reading multiple ZipExtFiles is
threadsafe now.
-rw-r--r-- | Lib/zipfile.py | 407 | ||||
-rw-r--r-- | Misc/NEWS | 3 |
2 files changed, 212 insertions, 198 deletions
diff --git a/Lib/zipfile.py b/Lib/zipfile.py index a1f5b18..d0789b6 100644 --- a/Lib/zipfile.py +++ b/Lib/zipfile.py @@ -13,6 +13,7 @@ import stat import shutil import struct import binascii +import threading try: @@ -647,16 +648,18 @@ def _get_decompressor(compress_type): class _SharedFile: - def __init__(self, file, pos, close): + def __init__(self, file, pos, close, lock): self._file = file self._pos = pos self._close = close + self._lock = lock def read(self, n=-1): - self._file.seek(self._pos) - data = self._file.read(n) - self._pos = self._file.tell() - return data + with self._lock: + self._file.seek(self._pos) + data = self._file.read(n) + self._pos = self._file.tell() + return data def close(self): if self._file is not None: @@ -990,6 +993,7 @@ class ZipFile: self.fp = file self.filename = getattr(file, 'name', None) self._fileRefCnt = 1 + self._lock = threading.RLock() try: if mode == 'r': @@ -1214,7 +1218,7 @@ class ZipFile: zinfo = self.getinfo(name) self._fileRefCnt += 1 - zef_file = _SharedFile(self.fp, zinfo.header_offset, self._fpclose) + zef_file = _SharedFile(self.fp, zinfo.header_offset, self._fpclose, self._lock) try: # Skip the file header: fheader = zef_file.read(sizeFileHeader) @@ -1410,68 +1414,69 @@ class ZipFile: zinfo.file_size = st.st_size zinfo.flag_bits = 0x00 - self.fp.seek(self.start_dir, 0) - zinfo.header_offset = self.fp.tell() # Start of header bytes - if zinfo.compress_type == ZIP_LZMA: - # Compressed data includes an end-of-stream (EOS) marker - zinfo.flag_bits |= 0x02 - - self._writecheck(zinfo) - self._didModify = True - - if isdir: - zinfo.file_size = 0 - zinfo.compress_size = 0 - zinfo.CRC = 0 - zinfo.external_attr |= 0x10 # MS-DOS directory flag + with self._lock: + self.fp.seek(self.start_dir, 0) + zinfo.header_offset = self.fp.tell() # Start of header bytes + if zinfo.compress_type == ZIP_LZMA: + # Compressed data includes an end-of-stream (EOS) marker + zinfo.flag_bits |= 0x02 + + self._writecheck(zinfo) + self._didModify = True + + if isdir: + zinfo.file_size = 0 + zinfo.compress_size = 0 + zinfo.CRC = 0 + zinfo.external_attr |= 0x10 # MS-DOS directory flag + self.filelist.append(zinfo) + self.NameToInfo[zinfo.filename] = zinfo + self.fp.write(zinfo.FileHeader(False)) + self.start_dir = self.fp.tell() + return + + cmpr = _get_compressor(zinfo.compress_type) + with open(filename, "rb") as fp: + # Must overwrite CRC and sizes with correct data later + zinfo.CRC = CRC = 0 + zinfo.compress_size = compress_size = 0 + # Compressed size can be larger than uncompressed size + zip64 = self._allowZip64 and \ + zinfo.file_size * 1.05 > ZIP64_LIMIT + self.fp.write(zinfo.FileHeader(zip64)) + file_size = 0 + while 1: + buf = fp.read(1024 * 8) + if not buf: + break + file_size = file_size + len(buf) + CRC = crc32(buf, CRC) & 0xffffffff + if cmpr: + buf = cmpr.compress(buf) + compress_size = compress_size + len(buf) + self.fp.write(buf) + if cmpr: + buf = cmpr.flush() + compress_size = compress_size + len(buf) + self.fp.write(buf) + zinfo.compress_size = compress_size + else: + zinfo.compress_size = file_size + zinfo.CRC = CRC + zinfo.file_size = file_size + if not zip64 and self._allowZip64: + if file_size > ZIP64_LIMIT: + raise RuntimeError('File size has increased during compressing') + if compress_size > ZIP64_LIMIT: + raise RuntimeError('Compressed size larger than uncompressed size') + # Seek backwards and write file header (which will now include + # correct CRC and file sizes) + self.start_dir = self.fp.tell() # Preserve current position in file + self.fp.seek(zinfo.header_offset, 0) + self.fp.write(zinfo.FileHeader(zip64)) + self.fp.seek(self.start_dir, 0) self.filelist.append(zinfo) self.NameToInfo[zinfo.filename] = zinfo - self.fp.write(zinfo.FileHeader(False)) - self.start_dir = self.fp.tell() - return - - cmpr = _get_compressor(zinfo.compress_type) - with open(filename, "rb") as fp: - # Must overwrite CRC and sizes with correct data later - zinfo.CRC = CRC = 0 - zinfo.compress_size = compress_size = 0 - # Compressed size can be larger than uncompressed size - zip64 = self._allowZip64 and \ - zinfo.file_size * 1.05 > ZIP64_LIMIT - self.fp.write(zinfo.FileHeader(zip64)) - file_size = 0 - while 1: - buf = fp.read(1024 * 8) - if not buf: - break - file_size = file_size + len(buf) - CRC = crc32(buf, CRC) & 0xffffffff - if cmpr: - buf = cmpr.compress(buf) - compress_size = compress_size + len(buf) - self.fp.write(buf) - if cmpr: - buf = cmpr.flush() - compress_size = compress_size + len(buf) - self.fp.write(buf) - zinfo.compress_size = compress_size - else: - zinfo.compress_size = file_size - zinfo.CRC = CRC - zinfo.file_size = file_size - if not zip64 and self._allowZip64: - if file_size > ZIP64_LIMIT: - raise RuntimeError('File size has increased during compressing') - if compress_size > ZIP64_LIMIT: - raise RuntimeError('Compressed size larger than uncompressed size') - # Seek backwards and write file header (which will now include - # correct CRC and file sizes) - self.start_dir = self.fp.tell() # Preserve current position in file - self.fp.seek(zinfo.header_offset, 0) - self.fp.write(zinfo.FileHeader(zip64)) - self.fp.seek(self.start_dir, 0) - self.filelist.append(zinfo) - self.NameToInfo[zinfo.filename] = zinfo def writestr(self, zinfo_or_arcname, data, compress_type=None): """Write a file into the archive. The contents is 'data', which @@ -1498,38 +1503,39 @@ class ZipFile: "Attempt to write to ZIP archive that was already closed") zinfo.file_size = len(data) # Uncompressed size - self.fp.seek(self.start_dir, 0) - zinfo.header_offset = self.fp.tell() # Start of header data - if compress_type is not None: - zinfo.compress_type = compress_type - if zinfo.compress_type == ZIP_LZMA: - # Compressed data includes an end-of-stream (EOS) marker - zinfo.flag_bits |= 0x02 - - self._writecheck(zinfo) - self._didModify = True - zinfo.CRC = crc32(data) & 0xffffffff # CRC-32 checksum - co = _get_compressor(zinfo.compress_type) - if co: - data = co.compress(data) + co.flush() - zinfo.compress_size = len(data) # Compressed size - else: - zinfo.compress_size = zinfo.file_size - zip64 = zinfo.file_size > ZIP64_LIMIT or \ - zinfo.compress_size > ZIP64_LIMIT - if zip64 and not self._allowZip64: - raise LargeZipFile("Filesize would require ZIP64 extensions") - self.fp.write(zinfo.FileHeader(zip64)) - self.fp.write(data) - if zinfo.flag_bits & 0x08: - # Write CRC and file sizes after the file data - fmt = '<LQQ' if zip64 else '<LLL' - self.fp.write(struct.pack(fmt, zinfo.CRC, zinfo.compress_size, - zinfo.file_size)) - self.fp.flush() - self.start_dir = self.fp.tell() - self.filelist.append(zinfo) - self.NameToInfo[zinfo.filename] = zinfo + with self._lock: + self.fp.seek(self.start_dir, 0) + zinfo.header_offset = self.fp.tell() # Start of header data + if compress_type is not None: + zinfo.compress_type = compress_type + if zinfo.compress_type == ZIP_LZMA: + # Compressed data includes an end-of-stream (EOS) marker + zinfo.flag_bits |= 0x02 + + self._writecheck(zinfo) + self._didModify = True + zinfo.CRC = crc32(data) & 0xffffffff # CRC-32 checksum + co = _get_compressor(zinfo.compress_type) + if co: + data = co.compress(data) + co.flush() + zinfo.compress_size = len(data) # Compressed size + else: + zinfo.compress_size = zinfo.file_size + zip64 = zinfo.file_size > ZIP64_LIMIT or \ + zinfo.compress_size > ZIP64_LIMIT + if zip64 and not self._allowZip64: + raise LargeZipFile("Filesize would require ZIP64 extensions") + self.fp.write(zinfo.FileHeader(zip64)) + self.fp.write(data) + if zinfo.flag_bits & 0x08: + # Write CRC and file sizes after the file data + fmt = '<LQQ' if zip64 else '<LLL' + self.fp.write(struct.pack(fmt, zinfo.CRC, zinfo.compress_size, + zinfo.file_size)) + self.fp.flush() + self.start_dir = self.fp.tell() + self.filelist.append(zinfo) + self.NameToInfo[zinfo.filename] = zinfo def __del__(self): """Call the "close()" method in case the user forgot.""" @@ -1543,111 +1549,116 @@ class ZipFile: try: if self.mode in ("w", "a") and self._didModify: # write ending records - self.fp.seek(self.start_dir, 0) - for zinfo in self.filelist: # write central directory - dt = zinfo.date_time - dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2] - dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2) - extra = [] - if zinfo.file_size > ZIP64_LIMIT \ - or zinfo.compress_size > ZIP64_LIMIT: - extra.append(zinfo.file_size) - extra.append(zinfo.compress_size) - file_size = 0xffffffff - compress_size = 0xffffffff - else: - file_size = zinfo.file_size - compress_size = zinfo.compress_size - - if zinfo.header_offset > ZIP64_LIMIT: - extra.append(zinfo.header_offset) - header_offset = 0xffffffff - else: - header_offset = zinfo.header_offset - - extra_data = zinfo.extra - min_version = 0 - if extra: - # Append a ZIP64 field to the extra's - extra_data = struct.pack( - '<HH' + 'Q'*len(extra), - 1, 8*len(extra), *extra) + extra_data - - min_version = ZIP64_VERSION - - if zinfo.compress_type == ZIP_BZIP2: - min_version = max(BZIP2_VERSION, min_version) - elif zinfo.compress_type == ZIP_LZMA: - min_version = max(LZMA_VERSION, min_version) - - extract_version = max(min_version, zinfo.extract_version) - create_version = max(min_version, zinfo.create_version) - try: - filename, flag_bits = zinfo._encodeFilenameFlags() - centdir = struct.pack(structCentralDir, - stringCentralDir, create_version, - zinfo.create_system, extract_version, zinfo.reserved, - flag_bits, zinfo.compress_type, dostime, dosdate, - zinfo.CRC, compress_size, file_size, - len(filename), len(extra_data), len(zinfo.comment), - 0, zinfo.internal_attr, zinfo.external_attr, - header_offset) - except DeprecationWarning: - print((structCentralDir, stringCentralDir, create_version, - zinfo.create_system, extract_version, zinfo.reserved, - zinfo.flag_bits, zinfo.compress_type, dostime, dosdate, - zinfo.CRC, compress_size, file_size, - len(zinfo.filename), len(extra_data), len(zinfo.comment), - 0, zinfo.internal_attr, zinfo.external_attr, - header_offset), file=sys.stderr) - raise - self.fp.write(centdir) - self.fp.write(filename) - self.fp.write(extra_data) - self.fp.write(zinfo.comment) - - pos2 = self.fp.tell() - # Write end-of-zip-archive record - centDirCount = len(self.filelist) - centDirSize = pos2 - self.start_dir - centDirOffset = self.start_dir - requires_zip64 = None - if centDirCount > ZIP_FILECOUNT_LIMIT: - requires_zip64 = "Files count" - elif centDirOffset > ZIP64_LIMIT: - requires_zip64 = "Central directory offset" - elif centDirSize > ZIP64_LIMIT: - requires_zip64 = "Central directory size" - if requires_zip64: - # Need to write the ZIP64 end-of-archive records - if not self._allowZip64: - raise LargeZipFile(requires_zip64 + - " would require ZIP64 extensions") - zip64endrec = struct.pack( - structEndArchive64, stringEndArchive64, - 44, 45, 45, 0, 0, centDirCount, centDirCount, - centDirSize, centDirOffset) - self.fp.write(zip64endrec) - - zip64locrec = struct.pack( - structEndArchive64Locator, - stringEndArchive64Locator, 0, pos2, 1) - self.fp.write(zip64locrec) - centDirCount = min(centDirCount, 0xFFFF) - centDirSize = min(centDirSize, 0xFFFFFFFF) - centDirOffset = min(centDirOffset, 0xFFFFFFFF) - - endrec = struct.pack(structEndArchive, stringEndArchive, - 0, 0, centDirCount, centDirCount, - centDirSize, centDirOffset, len(self._comment)) - self.fp.write(endrec) - self.fp.write(self._comment) - self.fp.flush() + with self._lock: + self.fp.seek(self.start_dir, 0) + self._write_end_record() finally: fp = self.fp self.fp = None self._fpclose(fp) + def _write_end_record(self): + self.fp.seek(self.start_dir, 0) + for zinfo in self.filelist: # write central directory + dt = zinfo.date_time + dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2] + dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2) + extra = [] + if zinfo.file_size > ZIP64_LIMIT \ + or zinfo.compress_size > ZIP64_LIMIT: + extra.append(zinfo.file_size) + extra.append(zinfo.compress_size) + file_size = 0xffffffff + compress_size = 0xffffffff + else: + file_size = zinfo.file_size + compress_size = zinfo.compress_size + + if zinfo.header_offset > ZIP64_LIMIT: + extra.append(zinfo.header_offset) + header_offset = 0xffffffff + else: + header_offset = zinfo.header_offset + + extra_data = zinfo.extra + min_version = 0 + if extra: + # Append a ZIP64 field to the extra's + extra_data = struct.pack( + '<HH' + 'Q'*len(extra), + 1, 8*len(extra), *extra) + extra_data + + min_version = ZIP64_VERSION + + if zinfo.compress_type == ZIP_BZIP2: + min_version = max(BZIP2_VERSION, min_version) + elif zinfo.compress_type == ZIP_LZMA: + min_version = max(LZMA_VERSION, min_version) + + extract_version = max(min_version, zinfo.extract_version) + create_version = max(min_version, zinfo.create_version) + try: + filename, flag_bits = zinfo._encodeFilenameFlags() + centdir = struct.pack(structCentralDir, + stringCentralDir, create_version, + zinfo.create_system, extract_version, zinfo.reserved, + flag_bits, zinfo.compress_type, dostime, dosdate, + zinfo.CRC, compress_size, file_size, + len(filename), len(extra_data), len(zinfo.comment), + 0, zinfo.internal_attr, zinfo.external_attr, + header_offset) + except DeprecationWarning: + print((structCentralDir, stringCentralDir, create_version, + zinfo.create_system, extract_version, zinfo.reserved, + zinfo.flag_bits, zinfo.compress_type, dostime, dosdate, + zinfo.CRC, compress_size, file_size, + len(zinfo.filename), len(extra_data), len(zinfo.comment), + 0, zinfo.internal_attr, zinfo.external_attr, + header_offset), file=sys.stderr) + raise + self.fp.write(centdir) + self.fp.write(filename) + self.fp.write(extra_data) + self.fp.write(zinfo.comment) + + pos2 = self.fp.tell() + # Write end-of-zip-archive record + centDirCount = len(self.filelist) + centDirSize = pos2 - self.start_dir + centDirOffset = self.start_dir + requires_zip64 = None + if centDirCount > ZIP_FILECOUNT_LIMIT: + requires_zip64 = "Files count" + elif centDirOffset > ZIP64_LIMIT: + requires_zip64 = "Central directory offset" + elif centDirSize > ZIP64_LIMIT: + requires_zip64 = "Central directory size" + if requires_zip64: + # Need to write the ZIP64 end-of-archive records + if not self._allowZip64: + raise LargeZipFile(requires_zip64 + + " would require ZIP64 extensions") + zip64endrec = struct.pack( + structEndArchive64, stringEndArchive64, + 44, 45, 45, 0, 0, centDirCount, centDirCount, + centDirSize, centDirOffset) + self.fp.write(zip64endrec) + + zip64locrec = struct.pack( + structEndArchive64Locator, + stringEndArchive64Locator, 0, pos2, 1) + self.fp.write(zip64locrec) + centDirCount = min(centDirCount, 0xFFFF) + centDirSize = min(centDirSize, 0xFFFFFFFF) + centDirOffset = min(centDirOffset, 0xFFFFFFFF) + + endrec = struct.pack(structEndArchive, stringEndArchive, + 0, 0, centDirCount, centDirCount, + centDirSize, centDirOffset, len(self._comment)) + self.fp.write(endrec) + self.fp.write(self._comment) + self.fp.flush() + def _fpclose(self, fp): assert self._fileRefCnt > 0 self._fileRefCnt -= 1 @@ -218,6 +218,9 @@ Core and Builtins Library ------- +- Issue #14099: Writing to ZipFile and reading multiple ZipExtFiles is + threadsafe now. + - Issue #19361: JSON decoder now raises JSONDecodeError instead of ValueError. - Issue #18518: timeit now rejects statements which can't be compiled outside |