diff options
author | Serhiy Storchaka <storchaka@gmail.com> | 2017-03-30 16:09:08 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-03-30 16:09:08 (GMT) |
commit | 06e522521c06671b4559eecf9e2a185c2d62c141 (patch) | |
tree | 62fadf5104aca4268033dd04847ea253e98012fc | |
parent | d4edfc9abffca965e76ebc5957a92031a4d6c4d4 (diff) | |
download | cpython-06e522521c06671b4559eecf9e2a185c2d62c141.zip cpython-06e522521c06671b4559eecf9e2a185c2d62c141.tar.gz cpython-06e522521c06671b4559eecf9e2a185c2d62c141.tar.bz2 |
bpo-10030: Sped up reading encrypted ZIP files by 2 times. (#550)
-rw-r--r-- | Lib/zipfile.py | 112 | ||||
-rw-r--r-- | Misc/NEWS | 2 |
2 files changed, 57 insertions, 57 deletions
diff --git a/Lib/zipfile.py b/Lib/zipfile.py index 8a19ca2..6fdf2c3 100644 --- a/Lib/zipfile.py +++ b/Lib/zipfile.py @@ -509,65 +509,63 @@ class ZipInfo (object): return self.filename[-1] == '/' -class _ZipDecrypter: - """Class to handle decryption of files stored within a ZIP archive. +# ZIP encryption uses the CRC32 one-byte primitive for scrambling some +# internal keys. We noticed that a direct implementation is faster than +# relying on binascii.crc32(). + +_crctable = None +def _gen_crc(crc): + for j in range(8): + if crc & 1: + crc = (crc >> 1) ^ 0xEDB88320 + else: + crc >>= 1 + return crc + +# ZIP supports a password-based form of encryption. Even though known +# plaintext attacks have been found against it, it is still useful +# to be able to get data out of such a file. +# +# Usage: +# zd = _ZipDecrypter(mypwd) +# plain_bytes = zd(cypher_bytes) + +def _ZipDecrypter(pwd): + key0 = 305419896 + key1 = 591751049 + key2 = 878082192 + + global _crctable + if _crctable is None: + _crctable = list(map(_gen_crc, range(256))) + crctable = _crctable + + def crc32(ch, crc): + """Compute the CRC32 primitive on one byte.""" + return (crc >> 8) ^ crctable[(crc ^ ch) & 0xFF] - ZIP supports a password-based form of encryption. Even though known - plaintext attacks have been found against it, it is still useful - to be able to get data out of such a file. + def update_keys(c): + nonlocal key0, key1, key2 + key0 = crc32(c, key0) + key1 = (key1 + (key0 & 0xFF)) & 0xFFFFFFFF + key1 = (key1 * 134775813 + 1) & 0xFFFFFFFF + key2 = crc32(key1 >> 24, key2) - Usage: - zd = _ZipDecrypter(mypwd) - plain_char = zd(cypher_char) - plain_text = map(zd, cypher_text) - """ + for p in pwd: + update_keys(p) - def _GenerateCRCTable(): - """Generate a CRC-32 table. + def decrypter(data): + """Decrypt a bytes object.""" + result = bytearray() + append = result.append + for c in data: + k = key2 | 2 + c ^= ((k * (k^1)) >> 8) & 0xFF + update_keys(c) + append(c) + return bytes(result) - ZIP encryption uses the CRC32 one-byte primitive for scrambling some - internal keys. We noticed that a direct implementation is faster than - relying on binascii.crc32(). - """ - poly = 0xedb88320 - table = [0] * 256 - for i in range(256): - crc = i - for j in range(8): - if crc & 1: - crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly - else: - crc = ((crc >> 1) & 0x7FFFFFFF) - table[i] = crc - return table - crctable = None - - def _crc32(self, ch, crc): - """Compute the CRC32 primitive on one byte.""" - return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ch) & 0xff] - - def __init__(self, pwd): - if _ZipDecrypter.crctable is None: - _ZipDecrypter.crctable = _ZipDecrypter._GenerateCRCTable() - self.key0 = 305419896 - self.key1 = 591751049 - self.key2 = 878082192 - for p in pwd: - self._UpdateKeys(p) - - def _UpdateKeys(self, c): - self.key0 = self._crc32(c, self.key0) - self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295 - self.key1 = (self.key1 * 134775813 + 1) & 4294967295 - self.key2 = self._crc32((self.key1 >> 24) & 255, self.key2) - - def __call__(self, c): - """Decrypt a single character.""" - assert isinstance(c, int) - k = self.key2 | 2 - c = c ^ (((k * (k^1)) >> 8) & 255) - self._UpdateKeys(c) - return c + return decrypter class LZMACompressor: @@ -953,7 +951,7 @@ class ZipExtFile(io.BufferedIOBase): raise EOFError if self._decrypter is not None: - data = bytes(map(self._decrypter, data)) + data = self._decrypter(data) return data def close(self): @@ -1411,7 +1409,7 @@ class ZipFile: # or the MSB of the file time depending on the header type # and is used to check the correctness of the password. header = zef_file.read(12) - h = list(map(zd, header[0:12])) + h = zd(header[0:12]) if zinfo.flag_bits & 0x8: # compare against the file type from extended local headers check_byte = (zinfo._raw_time >> 8) & 0xff @@ -301,6 +301,8 @@ Extension Modules Library ------- +- bpo-10030: Sped up reading encrypted ZIP files by 2 times. + - bpo-29204: Element.getiterator() and the html parameter of XMLParser() were deprecated only in the documentation (since Python 3.2 and 3.4 correspondintly). Now using them emits a deprecation warning. |