diff options
author | Martin v. Löwis <martin@v.loewis.de> | 2012-05-13 08:06:36 (GMT) |
---|---|---|
committer | Martin v. Löwis <martin@v.loewis.de> | 2012-05-13 08:06:36 (GMT) |
commit | 7fb79fcb64ad9832c2d616b1b6fe5a93d2ff3288 (patch) | |
tree | 0dce33ace1abe53028aa3722e9cb9db15ddea2ca /Lib/zipfile.py | |
parent | bb54b33cec2d83e8b5f6100361ff57cb22b5105a (diff) | |
download | cpython-7fb79fcb64ad9832c2d616b1b6fe5a93d2ff3288.zip cpython-7fb79fcb64ad9832c2d616b1b6fe5a93d2ff3288.tar.gz cpython-7fb79fcb64ad9832c2d616b1b6fe5a93d2ff3288.tar.bz2 |
Issue #14366: Support lzma compression in zip files.
Patch by Serhiy Storchaka.
Diffstat (limited to 'Lib/zipfile.py')
-rw-r--r-- | Lib/zipfile.py | 121 |
1 files changed, 105 insertions, 16 deletions
diff --git a/Lib/zipfile.py b/Lib/zipfile.py index c53b127..c127c2c 100644 --- a/Lib/zipfile.py +++ b/Lib/zipfile.py @@ -27,8 +27,13 @@ try: except ImportError: bz2 = None +try: + import lzma # We may need its compression method +except ImportError: + lzma = None + __all__ = ["BadZipFile", "BadZipfile", "error", - "ZIP_STORED", "ZIP_DEFLATED", "ZIP_BZIP2", + "ZIP_STORED", "ZIP_DEFLATED", "ZIP_BZIP2", "ZIP_LZMA", "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile"] class BadZipFile(Exception): @@ -52,13 +57,15 @@ ZIP_MAX_COMMENT = (1 << 16) - 1 ZIP_STORED = 0 ZIP_DEFLATED = 8 ZIP_BZIP2 = 12 +ZIP_LZMA = 14 # Other ZIP compression methods not supported DEFAULT_VERSION = 20 ZIP64_VERSION = 45 BZIP2_VERSION = 46 +LZMA_VERSION = 63 # we recognize (but not necessarily support) all features up to that version -MAX_EXTRACT_VERSION = 46 +MAX_EXTRACT_VERSION = 63 # Below are some formats and associated data for reading/writing headers using # the struct module. The names and structures of headers/records are those used @@ -367,6 +374,8 @@ class ZipInfo (object): if self.compress_type == ZIP_BZIP2: min_version = max(BZIP2_VERSION, min_version) + elif self.compress_type == ZIP_LZMA: + min_version = max(LZMA_VERSION, min_version) self.extract_version = max(min_version, self.extract_version) self.create_version = max(min_version, self.create_version) @@ -480,6 +489,77 @@ class _ZipDecrypter: return c +class LZMACompressor: + + def __init__(self): + self._comp = None + + def _init(self): + props = lzma.encode_filter_properties({'id': lzma.FILTER_LZMA1}) + self._comp = lzma.LZMACompressor(lzma.FORMAT_RAW, filters=[ + lzma.decode_filter_properties(lzma.FILTER_LZMA1, props) + ]) + return struct.pack('<BBH', 9, 4, len(props)) + props + + def compress(self, data): + if self._comp is None: + return self._init() + self._comp.compress(data) + return self._comp.compress(data) + + def flush(self): + if self._comp is None: + return self._init() + self._comp.flush() + return self._comp.flush() + + +class LZMADecompressor: + + def __init__(self): + self._decomp = None + self._unconsumed = b'' + self.eof = False + + def decompress(self, data): + if self._decomp is None: + self._unconsumed += data + if len(self._unconsumed) <= 4: + return b'' + psize, = struct.unpack('<H', self._unconsumed[2:4]) + if len(self._unconsumed) <= 4 + psize: + return b'' + + self._decomp = lzma.LZMADecompressor(lzma.FORMAT_RAW, filters=[ + lzma.decode_filter_properties(lzma.FILTER_LZMA1, + self._unconsumed[4:4 + psize]) + ]) + data = self._unconsumed[4 + psize:] + del self._unconsumed + + result = self._decomp.decompress(data) + self.eof = self._decomp.eof + return result + + +compressor_names = { + 0: 'store', + 1: 'shrink', + 2: 'reduce', + 3: 'reduce', + 4: 'reduce', + 5: 'reduce', + 6: 'implode', + 7: 'tokenize', + 8: 'deflate', + 9: 'deflate64', + 10: 'implode', + 12: 'bzip2', + 14: 'lzma', + 18: 'terse', + 19: 'lz77', + 97: 'wavpack', + 98: 'ppmd', +} + def _check_compression(compression): if compression == ZIP_STORED: pass @@ -491,6 +571,10 @@ def _check_compression(compression): if not bz2: raise RuntimeError( "Compression requires the (missing) bz2 module") + elif compression == ZIP_LZMA: + if not lzma: + raise RuntimeError( + "Compression requires the (missing) lzma module") else: raise RuntimeError("That compression method is not supported") @@ -501,6 +585,8 @@ def _get_compressor(compress_type): zlib.DEFLATED, -15) elif compress_type == ZIP_BZIP2: return bz2.BZ2Compressor() + elif compress_type == ZIP_LZMA: + return LZMACompressor() else: return None @@ -512,19 +598,10 @@ def _get_decompressor(compress_type): return zlib.decompressobj(-15) elif compress_type == ZIP_BZIP2: return bz2.BZ2Decompressor() + elif compress_type == ZIP_LZMA: + return LZMADecompressor() else: - unknown_compressors = { - 1: 'shrink', - 2: 'reduce', - 3: 'reduce', - 4: 'reduce', - 5: 'reduce', - 6: 'implode', - 9: 'enhanced deflate', - 10: 'implode', - 14: 'lzma', - } - descr = unknown_compressors.get(compress_type) + descr = compressor_names.get(compress_type) if descr: raise NotImplementedError("compression type %d (%s)" % (compress_type, descr)) else: @@ -781,8 +858,8 @@ class ZipFile: file: Either the path to the file, or a file-like object. If it is a path, the file will be opened and closed by ZipFile. mode: The mode can be either read "r", write "w" or append "a". - compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib) or - ZIP_BZIP2 (requires bz2). + compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib), + ZIP_BZIP2 (requires bz2) or ZIP_LZMA (requires lzma). allowZip64: if True ZipFile will create files with ZIP64 extensions when needed, otherwise it will raise an exception when this would be necessary. @@ -1062,6 +1139,10 @@ class ZipFile: # Zip 2.7: compressed patched data raise NotImplementedError("compressed patched data (flag bit 5)") + if zinfo.flag_bits & 0x40: + # strong encryption + raise NotImplementedError("strong encryption (flag bit 6)") + if zinfo.flag_bits & 0x800: # UTF-8 filename fname_str = fname.decode("utf-8") @@ -1220,6 +1301,9 @@ class ZipFile: zinfo.file_size = st.st_size zinfo.flag_bits = 0x00 zinfo.header_offset = self.fp.tell() # Start of header bytes + if zinfo.compress_type == ZIP_LZMA: + # Compressed data includes an end-of-stream (EOS) marker + zinfo.flag_bits |= 0x02 self._writecheck(zinfo) self._didModify = True @@ -1292,6 +1376,9 @@ class ZipFile: zinfo.header_offset = self.fp.tell() # Start of header data if compress_type is not None: zinfo.compress_type = compress_type + if zinfo.compress_type == ZIP_LZMA: + # Compressed data includes an end-of-stream (EOS) marker + zinfo.flag_bits |= 0x02 self._writecheck(zinfo) self._didModify = True @@ -1360,6 +1447,8 @@ class ZipFile: if zinfo.compress_type == ZIP_BZIP2: min_version = max(BZIP2_VERSION, min_version) + elif zinfo.compress_type == ZIP_LZMA: + min_version = max(LZMA_VERSION, min_version) extract_version = max(min_version, zinfo.extract_version) create_version = max(min_version, zinfo.create_version) |