summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSerhiy Storchaka <storchaka@gmail.com>2017-03-30 16:09:08 (GMT)
committerGitHub <noreply@github.com>2017-03-30 16:09:08 (GMT)
commit06e522521c06671b4559eecf9e2a185c2d62c141 (patch)
tree62fadf5104aca4268033dd04847ea253e98012fc
parentd4edfc9abffca965e76ebc5957a92031a4d6c4d4 (diff)
downloadcpython-06e522521c06671b4559eecf9e2a185c2d62c141.zip
cpython-06e522521c06671b4559eecf9e2a185c2d62c141.tar.gz
cpython-06e522521c06671b4559eecf9e2a185c2d62c141.tar.bz2
bpo-10030: Sped up reading encrypted ZIP files by 2 times. (#550)
-rw-r--r--Lib/zipfile.py112
-rw-r--r--Misc/NEWS2
2 files changed, 57 insertions, 57 deletions
diff --git a/Lib/zipfile.py b/Lib/zipfile.py
index 8a19ca2..6fdf2c3 100644
--- a/Lib/zipfile.py
+++ b/Lib/zipfile.py
@@ -509,65 +509,63 @@ class ZipInfo (object):
return self.filename[-1] == '/'
-class _ZipDecrypter:
- """Class to handle decryption of files stored within a ZIP archive.
+# ZIP encryption uses the CRC32 one-byte primitive for scrambling some
+# internal keys. We noticed that a direct implementation is faster than
+# relying on binascii.crc32().
+
+_crctable = None
+def _gen_crc(crc):
+ for j in range(8):
+ if crc & 1:
+ crc = (crc >> 1) ^ 0xEDB88320
+ else:
+ crc >>= 1
+ return crc
+
+# ZIP supports a password-based form of encryption. Even though known
+# plaintext attacks have been found against it, it is still useful
+# to be able to get data out of such a file.
+#
+# Usage:
+# zd = _ZipDecrypter(mypwd)
+# plain_bytes = zd(cypher_bytes)
+
+def _ZipDecrypter(pwd):
+ key0 = 305419896
+ key1 = 591751049
+ key2 = 878082192
+
+ global _crctable
+ if _crctable is None:
+ _crctable = list(map(_gen_crc, range(256)))
+ crctable = _crctable
+
+ def crc32(ch, crc):
+ """Compute the CRC32 primitive on one byte."""
+ return (crc >> 8) ^ crctable[(crc ^ ch) & 0xFF]
- ZIP supports a password-based form of encryption. Even though known
- plaintext attacks have been found against it, it is still useful
- to be able to get data out of such a file.
+ def update_keys(c):
+ nonlocal key0, key1, key2
+ key0 = crc32(c, key0)
+ key1 = (key1 + (key0 & 0xFF)) & 0xFFFFFFFF
+ key1 = (key1 * 134775813 + 1) & 0xFFFFFFFF
+ key2 = crc32(key1 >> 24, key2)
- Usage:
- zd = _ZipDecrypter(mypwd)
- plain_char = zd(cypher_char)
- plain_text = map(zd, cypher_text)
- """
+ for p in pwd:
+ update_keys(p)
- def _GenerateCRCTable():
- """Generate a CRC-32 table.
+ def decrypter(data):
+ """Decrypt a bytes object."""
+ result = bytearray()
+ append = result.append
+ for c in data:
+ k = key2 | 2
+ c ^= ((k * (k^1)) >> 8) & 0xFF
+ update_keys(c)
+ append(c)
+ return bytes(result)
- ZIP encryption uses the CRC32 one-byte primitive for scrambling some
- internal keys. We noticed that a direct implementation is faster than
- relying on binascii.crc32().
- """
- poly = 0xedb88320
- table = [0] * 256
- for i in range(256):
- crc = i
- for j in range(8):
- if crc & 1:
- crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
- else:
- crc = ((crc >> 1) & 0x7FFFFFFF)
- table[i] = crc
- return table
- crctable = None
-
- def _crc32(self, ch, crc):
- """Compute the CRC32 primitive on one byte."""
- return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ch) & 0xff]
-
- def __init__(self, pwd):
- if _ZipDecrypter.crctable is None:
- _ZipDecrypter.crctable = _ZipDecrypter._GenerateCRCTable()
- self.key0 = 305419896
- self.key1 = 591751049
- self.key2 = 878082192
- for p in pwd:
- self._UpdateKeys(p)
-
- def _UpdateKeys(self, c):
- self.key0 = self._crc32(c, self.key0)
- self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
- self.key1 = (self.key1 * 134775813 + 1) & 4294967295
- self.key2 = self._crc32((self.key1 >> 24) & 255, self.key2)
-
- def __call__(self, c):
- """Decrypt a single character."""
- assert isinstance(c, int)
- k = self.key2 | 2
- c = c ^ (((k * (k^1)) >> 8) & 255)
- self._UpdateKeys(c)
- return c
+ return decrypter
class LZMACompressor:
@@ -953,7 +951,7 @@ class ZipExtFile(io.BufferedIOBase):
raise EOFError
if self._decrypter is not None:
- data = bytes(map(self._decrypter, data))
+ data = self._decrypter(data)
return data
def close(self):
@@ -1411,7 +1409,7 @@ class ZipFile:
# or the MSB of the file time depending on the header type
# and is used to check the correctness of the password.
header = zef_file.read(12)
- h = list(map(zd, header[0:12]))
+ h = zd(header[0:12])
if zinfo.flag_bits & 0x8:
# compare against the file type from extended local headers
check_byte = (zinfo._raw_time >> 8) & 0xff
diff --git a/Misc/NEWS b/Misc/NEWS
index 9c434a7..8646b8c 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -301,6 +301,8 @@ Extension Modules
Library
-------
+- bpo-10030: Sped up reading encrypted ZIP files by 2 times.
+
- bpo-29204: Element.getiterator() and the html parameter of XMLParser() were
deprecated only in the documentation (since Python 3.2 and 3.4 correspondintly).
Now using them emits a deprecation warning.