summaryrefslogtreecommitdiffstats
path: root/Lib/zipimport.py
diff options
context:
space:
mode:
authorElvis Pranskevichus <elvis@magic.io>2018-11-07 18:34:59 (GMT)
committerSerhiy Storchaka <storchaka@gmail.com>2018-11-07 18:34:59 (GMT)
commita6e956bcb0edbfe7f18af9be2215a5326ea6bf05 (patch)
tree8b3dc9558493973f8e069a76170e4ff8dc6665d4 /Lib/zipimport.py
parentbfe1839aa994f0d84471254418a4ecfa7c7c9b9c (diff)
downloadcpython-a6e956bcb0edbfe7f18af9be2215a5326ea6bf05.zip
cpython-a6e956bcb0edbfe7f18af9be2215a5326ea6bf05.tar.gz
cpython-a6e956bcb0edbfe7f18af9be2215a5326ea6bf05.tar.bz2
bpo-34726: Fix handling of hash-based pycs in zipimport. (GH-10327)
Current support for hash-based bytecode files in `zipimport` is rather sparse, which leads to test failures when the test suite is ran with the ``SOURCE_DATE_EPOCH`` environment variable set. This teaches zipimport to handle hash-based pycs properly.
Diffstat (limited to 'Lib/zipimport.py')
-rw-r--r--Lib/zipimport.py95
1 files changed, 66 insertions, 29 deletions
diff --git a/Lib/zipimport.py b/Lib/zipimport.py
index 2c11f68f..f430abd 100644
--- a/Lib/zipimport.py
+++ b/Lib/zipimport.py
@@ -578,33 +578,53 @@ def _eq_mtime(t1, t2):
# dostime only stores even seconds, so be lenient
return abs(t1 - t2) <= 1
+
# Given the contents of a .py[co] file, unmarshal the data
# and return the code object. Return None if it the magic word doesn't
-# match (we do this instead of raising an exception as we fall back
+# match, or if the recorded .py[co] metadata does not match the source,
+# (we do this instead of raising an exception as we fall back
# to .py if available and we don't want to mask other errors).
-def _unmarshal_code(pathname, data, mtime):
- if len(data) < 16:
- raise ZipImportError('bad pyc data')
-
- if data[:4] != _bootstrap_external.MAGIC_NUMBER:
- _bootstrap._verbose_message('{!r} has bad magic', pathname)
- return None # signal caller to try alternative
-
- flags = _unpack_uint32(data[4:8])
- if flags != 0:
- # Hash-based pyc. We currently refuse to handle checked hash-based
- # pycs. We could validate hash-based pycs against the source, but it
- # seems likely that most people putting hash-based pycs in a zipfile
- # will use unchecked ones.
+def _unmarshal_code(self, pathname, fullpath, fullname, data):
+ exc_details = {
+ 'name': fullname,
+ 'path': fullpath,
+ }
+
+ try:
+ flags = _bootstrap_external._classify_pyc(data, fullname, exc_details)
+ except ImportError:
+ return None
+
+ hash_based = flags & 0b1 != 0
+ if hash_based:
+ check_source = flags & 0b10 != 0
if (_imp.check_hash_based_pycs != 'never' and
- (flags != 0x1 or _imp.check_hash_based_pycs == 'always')):
- return None
- elif mtime != 0 and not _eq_mtime(_unpack_uint32(data[8:12]), mtime):
- _bootstrap._verbose_message('{!r} has bad mtime', pathname)
- return None # signal caller to try alternative
+ (check_source or _imp.check_hash_based_pycs == 'always')):
+ source_bytes = _get_pyc_source(self, fullpath)
+ if source_bytes is not None:
+ source_hash = _imp.source_hash(
+ _bootstrap_external._RAW_MAGIC_NUMBER,
+ source_bytes,
+ )
+
+ try:
+ _boostrap_external._validate_hash_pyc(
+ data, source_hash, fullname, exc_details)
+ except ImportError:
+ return None
+ else:
+ source_mtime, source_size = \
+ _get_mtime_and_size_of_source(self, fullpath)
+
+ if source_mtime:
+ # We don't use _bootstrap_external._validate_timestamp_pyc
+ # to allow for a more lenient timestamp check.
+ if (not _eq_mtime(_unpack_uint32(data[8:12]), source_mtime) or
+ _unpack_uint32(data[12:16]) != source_size):
+ _bootstrap._verbose_message(
+ f'bytecode is stale for {fullname!r}')
+ return None
- # XXX the pyc's size field is ignored; timestamp collisions are probably
- # unimportant with zip files.
code = marshal.loads(data[16:])
if not isinstance(code, _code_type):
raise TypeError(f'compiled module {pathname!r} is not a code object')
@@ -639,9 +659,9 @@ def _parse_dostime(d, t):
-1, -1, -1))
# Given a path to a .pyc file in the archive, return the
-# modification time of the matching .py file, or 0 if no source
-# is available.
-def _get_mtime_of_source(self, path):
+# modification time of the matching .py file and its size,
+# or (0, 0) if no source is available.
+def _get_mtime_and_size_of_source(self, path):
try:
# strip 'c' or 'o' from *.py[co]
assert path[-1:] in ('c', 'o')
@@ -651,9 +671,27 @@ def _get_mtime_of_source(self, path):
# with an embedded pyc time stamp
time = toc_entry[5]
date = toc_entry[6]
- return _parse_dostime(date, time)
+ uncompressed_size = toc_entry[3]
+ return _parse_dostime(date, time), uncompressed_size
except (KeyError, IndexError, TypeError):
- return 0
+ return 0, 0
+
+
+# Given a path to a .pyc file in the archive, return the
+# contents of the matching .py file, or None if no source
+# is available.
+def _get_pyc_source(self, path):
+ # strip 'c' or 'o' from *.py[co]
+ assert path[-1:] in ('c', 'o')
+ path = path[:-1]
+
+ try:
+ toc_entry = self._files[path]
+ except KeyError:
+ return None
+ else:
+ return _get_data(self.archive, toc_entry)
+
# Get the code object associated with the module specified by
# 'fullname'.
@@ -670,8 +708,7 @@ def _get_module_code(self, fullname):
modpath = toc_entry[0]
data = _get_data(self.archive, toc_entry)
if isbytecode:
- mtime = _get_mtime_of_source(self, fullpath)
- code = _unmarshal_code(modpath, data, mtime)
+ code = _unmarshal_code(self, modpath, fullpath, fullname, data)
else:
code = _compile_source(modpath, data)
if code is None: