summaryrefslogtreecommitdiffstats
path: root/Lib/test/test_importlib
diff options
context:
space:
mode:
authorBenjamin Peterson <benjamin@python.org>2017-12-09 18:26:52 (GMT)
committerGitHub <noreply@github.com>2017-12-09 18:26:52 (GMT)
commit42aa93b8ff2f7879282b06efc73a31ec7785e602 (patch)
tree92ee301e1f487a7f5aa8ec78a36ebc50d21d6ec9 /Lib/test/test_importlib
parent28d8d14013ade0657fed4673f5fa3c08eb2b1944 (diff)
downloadcpython-42aa93b8ff2f7879282b06efc73a31ec7785e602.zip
cpython-42aa93b8ff2f7879282b06efc73a31ec7785e602.tar.gz
cpython-42aa93b8ff2f7879282b06efc73a31ec7785e602.tar.bz2
closes bpo-31650: PEP 552 (Deterministic pycs) implementation (#4575)
Python now supports checking bytecode cache up-to-dateness with a hash of the source contents rather than volatile source metadata. See the PEP for details. While a fairly straightforward idea, quite a lot of code had to be modified due to the pervasiveness of pyc implementation details in the codebase. Changes in this commit include: - The core changes to importlib to understand how to read, validate, and regenerate hash-based pycs. - Support for generating hash-based pycs in py_compile and compileall. - Modifications to our siphash implementation to support passing a custom key. We then expose it to importlib through _imp. - Updates to all places in the interpreter, standard library, and tests that manually generate or parse pyc files to grok the new format. - Support in the interpreter command line code for long options like --check-hash-based-pycs. - Tests and documentation for all of the above.
Diffstat (limited to 'Lib/test/test_importlib')
-rw-r--r--Lib/test/test_importlib/source/test_file_loader.py210
-rw-r--r--Lib/test/test_importlib/test_abc.py2
2 files changed, 197 insertions, 15 deletions
diff --git a/Lib/test/test_importlib/source/test_file_loader.py b/Lib/test/test_importlib/source/test_file_loader.py
index a151149..643a02c 100644
--- a/Lib/test/test_importlib/source/test_file_loader.py
+++ b/Lib/test/test_importlib/source/test_file_loader.py
@@ -235,6 +235,123 @@ class SimpleTest(abc.LoaderTests):
warnings.simplefilter('ignore', DeprecationWarning)
loader.load_module('bad name')
+ @util.writes_bytecode_files
+ def test_checked_hash_based_pyc(self):
+ with util.create_modules('_temp') as mapping:
+ source = mapping['_temp']
+ pyc = self.util.cache_from_source(source)
+ with open(source, 'wb') as fp:
+ fp.write(b'state = "old"')
+ os.utime(source, (50, 50))
+ py_compile.compile(
+ source,
+ invalidation_mode=py_compile.PycInvalidationMode.CHECKED_HASH,
+ )
+ loader = self.machinery.SourceFileLoader('_temp', source)
+ mod = types.ModuleType('_temp')
+ mod.__spec__ = self.util.spec_from_loader('_temp', loader)
+ loader.exec_module(mod)
+ self.assertEqual(mod.state, 'old')
+ # Write a new source with the same mtime and size as before.
+ with open(source, 'wb') as fp:
+ fp.write(b'state = "new"')
+ os.utime(source, (50, 50))
+ loader.exec_module(mod)
+ self.assertEqual(mod.state, 'new')
+ with open(pyc, 'rb') as fp:
+ data = fp.read()
+ self.assertEqual(int.from_bytes(data[4:8], 'little'), 0b11)
+ self.assertEqual(
+ self.util.source_hash(b'state = "new"'),
+ data[8:16],
+ )
+
+ @util.writes_bytecode_files
+ def test_overriden_checked_hash_based_pyc(self):
+ with util.create_modules('_temp') as mapping, \
+ unittest.mock.patch('_imp.check_hash_based_pycs', 'never'):
+ source = mapping['_temp']
+ pyc = self.util.cache_from_source(source)
+ with open(source, 'wb') as fp:
+ fp.write(b'state = "old"')
+ os.utime(source, (50, 50))
+ py_compile.compile(
+ source,
+ invalidation_mode=py_compile.PycInvalidationMode.CHECKED_HASH,
+ )
+ loader = self.machinery.SourceFileLoader('_temp', source)
+ mod = types.ModuleType('_temp')
+ mod.__spec__ = self.util.spec_from_loader('_temp', loader)
+ loader.exec_module(mod)
+ self.assertEqual(mod.state, 'old')
+ # Write a new source with the same mtime and size as before.
+ with open(source, 'wb') as fp:
+ fp.write(b'state = "new"')
+ os.utime(source, (50, 50))
+ loader.exec_module(mod)
+ self.assertEqual(mod.state, 'old')
+
+ @util.writes_bytecode_files
+ def test_unchecked_hash_based_pyc(self):
+ with util.create_modules('_temp') as mapping:
+ source = mapping['_temp']
+ pyc = self.util.cache_from_source(source)
+ with open(source, 'wb') as fp:
+ fp.write(b'state = "old"')
+ os.utime(source, (50, 50))
+ py_compile.compile(
+ source,
+ invalidation_mode=py_compile.PycInvalidationMode.UNCHECKED_HASH,
+ )
+ loader = self.machinery.SourceFileLoader('_temp', source)
+ mod = types.ModuleType('_temp')
+ mod.__spec__ = self.util.spec_from_loader('_temp', loader)
+ loader.exec_module(mod)
+ self.assertEqual(mod.state, 'old')
+ # Update the source file, which should be ignored.
+ with open(source, 'wb') as fp:
+ fp.write(b'state = "new"')
+ loader.exec_module(mod)
+ self.assertEqual(mod.state, 'old')
+ with open(pyc, 'rb') as fp:
+ data = fp.read()
+ self.assertEqual(int.from_bytes(data[4:8], 'little'), 0b1)
+ self.assertEqual(
+ self.util.source_hash(b'state = "old"'),
+ data[8:16],
+ )
+
+ @util.writes_bytecode_files
+ def test_overiden_unchecked_hash_based_pyc(self):
+ with util.create_modules('_temp') as mapping, \
+ unittest.mock.patch('_imp.check_hash_based_pycs', 'always'):
+ source = mapping['_temp']
+ pyc = self.util.cache_from_source(source)
+ with open(source, 'wb') as fp:
+ fp.write(b'state = "old"')
+ os.utime(source, (50, 50))
+ py_compile.compile(
+ source,
+ invalidation_mode=py_compile.PycInvalidationMode.UNCHECKED_HASH,
+ )
+ loader = self.machinery.SourceFileLoader('_temp', source)
+ mod = types.ModuleType('_temp')
+ mod.__spec__ = self.util.spec_from_loader('_temp', loader)
+ loader.exec_module(mod)
+ self.assertEqual(mod.state, 'old')
+ # Update the source file, which should be ignored.
+ with open(source, 'wb') as fp:
+ fp.write(b'state = "new"')
+ loader.exec_module(mod)
+ self.assertEqual(mod.state, 'new')
+ with open(pyc, 'rb') as fp:
+ data = fp.read()
+ self.assertEqual(int.from_bytes(data[4:8], 'little'), 0b1)
+ self.assertEqual(
+ self.util.source_hash(b'state = "new"'),
+ data[8:16],
+ )
+
(Frozen_SimpleTest,
Source_SimpleTest
@@ -247,15 +364,17 @@ class BadBytecodeTest:
def import_(self, file, module_name):
raise NotImplementedError
- def manipulate_bytecode(self, name, mapping, manipulator, *,
- del_source=False):
+ def manipulate_bytecode(self,
+ name, mapping, manipulator, *,
+ del_source=False,
+ invalidation_mode=py_compile.PycInvalidationMode.TIMESTAMP):
"""Manipulate the bytecode of a module by passing it into a callable
that returns what to use as the new bytecode."""
try:
del sys.modules['_temp']
except KeyError:
pass
- py_compile.compile(mapping[name])
+ py_compile.compile(mapping[name], invalidation_mode=invalidation_mode)
if not del_source:
bytecode_path = self.util.cache_from_source(mapping[name])
else:
@@ -294,24 +413,51 @@ class BadBytecodeTest:
del_source=del_source)
test('_temp', mapping, bc_path)
+ def _test_partial_flags(self, test, *, del_source=False):
+ with util.create_modules('_temp') as mapping:
+ bc_path = self.manipulate_bytecode('_temp', mapping,
+ lambda bc: bc[:7],
+ del_source=del_source)
+ test('_temp', mapping, bc_path)
+
+ def _test_partial_hash(self, test, *, del_source=False):
+ with util.create_modules('_temp') as mapping:
+ bc_path = self.manipulate_bytecode(
+ '_temp',
+ mapping,
+ lambda bc: bc[:13],
+ del_source=del_source,
+ invalidation_mode=py_compile.PycInvalidationMode.CHECKED_HASH,
+ )
+ test('_temp', mapping, bc_path)
+ with util.create_modules('_temp') as mapping:
+ bc_path = self.manipulate_bytecode(
+ '_temp',
+ mapping,
+ lambda bc: bc[:13],
+ del_source=del_source,
+ invalidation_mode=py_compile.PycInvalidationMode.UNCHECKED_HASH,
+ )
+ test('_temp', mapping, bc_path)
+
def _test_partial_timestamp(self, test, *, del_source=False):
with util.create_modules('_temp') as mapping:
bc_path = self.manipulate_bytecode('_temp', mapping,
- lambda bc: bc[:7],
+ lambda bc: bc[:11],
del_source=del_source)
test('_temp', mapping, bc_path)
def _test_partial_size(self, test, *, del_source=False):
with util.create_modules('_temp') as mapping:
bc_path = self.manipulate_bytecode('_temp', mapping,
- lambda bc: bc[:11],
+ lambda bc: bc[:15],
del_source=del_source)
test('_temp', mapping, bc_path)
def _test_no_marshal(self, *, del_source=False):
with util.create_modules('_temp') as mapping:
bc_path = self.manipulate_bytecode('_temp', mapping,
- lambda bc: bc[:12],
+ lambda bc: bc[:16],
del_source=del_source)
file_path = mapping['_temp'] if not del_source else bc_path
with self.assertRaises(EOFError):
@@ -320,7 +466,7 @@ class BadBytecodeTest:
def _test_non_code_marshal(self, *, del_source=False):
with util.create_modules('_temp') as mapping:
bytecode_path = self.manipulate_bytecode('_temp', mapping,
- lambda bc: bc[:12] + marshal.dumps(b'abcd'),
+ lambda bc: bc[:16] + marshal.dumps(b'abcd'),
del_source=del_source)
file_path = mapping['_temp'] if not del_source else bytecode_path
with self.assertRaises(ImportError) as cm:
@@ -331,7 +477,7 @@ class BadBytecodeTest:
def _test_bad_marshal(self, *, del_source=False):
with util.create_modules('_temp') as mapping:
bytecode_path = self.manipulate_bytecode('_temp', mapping,
- lambda bc: bc[:12] + b'<test>',
+ lambda bc: bc[:16] + b'<test>',
del_source=del_source)
file_path = mapping['_temp'] if not del_source else bytecode_path
with self.assertRaises(EOFError):
@@ -376,7 +522,7 @@ class SourceLoaderBadBytecodeTest:
def test(name, mapping, bytecode_path):
self.import_(mapping[name], name)
with open(bytecode_path, 'rb') as file:
- self.assertGreater(len(file.read()), 12)
+ self.assertGreater(len(file.read()), 16)
self._test_empty_file(test)
@@ -384,7 +530,7 @@ class SourceLoaderBadBytecodeTest:
def test(name, mapping, bytecode_path):
self.import_(mapping[name], name)
with open(bytecode_path, 'rb') as file:
- self.assertGreater(len(file.read()), 12)
+ self.assertGreater(len(file.read()), 16)
self._test_partial_magic(test)
@@ -395,7 +541,7 @@ class SourceLoaderBadBytecodeTest:
def test(name, mapping, bytecode_path):
self.import_(mapping[name], name)
with open(bytecode_path, 'rb') as file:
- self.assertGreater(len(file.read()), 12)
+ self.assertGreater(len(file.read()), 16)
self._test_magic_only(test)
@@ -418,18 +564,38 @@ class SourceLoaderBadBytecodeTest:
def test(name, mapping, bc_path):
self.import_(mapping[name], name)
with open(bc_path, 'rb') as file:
- self.assertGreater(len(file.read()), 12)
+ self.assertGreater(len(file.read()), 16)
self._test_partial_timestamp(test)
@util.writes_bytecode_files
+ def test_partial_flags(self):
+ # When the flags is partial, regenerate the .pyc, else raise EOFError.
+ def test(name, mapping, bc_path):
+ self.import_(mapping[name], name)
+ with open(bc_path, 'rb') as file:
+ self.assertGreater(len(file.read()), 16)
+
+ self._test_partial_flags(test)
+
+ @util.writes_bytecode_files
+ def test_partial_hash(self):
+ # When the hash is partial, regenerate the .pyc, else raise EOFError.
+ def test(name, mapping, bc_path):
+ self.import_(mapping[name], name)
+ with open(bc_path, 'rb') as file:
+ self.assertGreater(len(file.read()), 16)
+
+ self._test_partial_hash(test)
+
+ @util.writes_bytecode_files
def test_partial_size(self):
# When the size is partial, regenerate the .pyc, else
# raise EOFError.
def test(name, mapping, bc_path):
self.import_(mapping[name], name)
with open(bc_path, 'rb') as file:
- self.assertGreater(len(file.read()), 12)
+ self.assertGreater(len(file.read()), 16)
self._test_partial_size(test)
@@ -459,13 +625,13 @@ class SourceLoaderBadBytecodeTest:
py_compile.compile(mapping['_temp'])
bytecode_path = self.util.cache_from_source(mapping['_temp'])
with open(bytecode_path, 'r+b') as bytecode_file:
- bytecode_file.seek(4)
+ bytecode_file.seek(8)
bytecode_file.write(zeros)
self.import_(mapping['_temp'], '_temp')
source_mtime = os.path.getmtime(mapping['_temp'])
source_timestamp = self.importlib._w_long(source_mtime)
with open(bytecode_path, 'rb') as bytecode_file:
- bytecode_file.seek(4)
+ bytecode_file.seek(8)
self.assertEqual(bytecode_file.read(4), source_timestamp)
# [bytecode read-only]
@@ -560,6 +726,20 @@ class SourcelessLoaderBadBytecodeTest:
self._test_partial_timestamp(test, del_source=True)
+ def test_partial_flags(self):
+ def test(name, mapping, bytecode_path):
+ with self.assertRaises(EOFError):
+ self.import_(bytecode_path, name)
+
+ self._test_partial_flags(test, del_source=True)
+
+ def test_partial_hash(self):
+ def test(name, mapping, bytecode_path):
+ with self.assertRaises(EOFError):
+ self.import_(bytecode_path, name)
+
+ self._test_partial_hash(test, del_source=True)
+
def test_partial_size(self):
def test(name, mapping, bytecode_path):
with self.assertRaises(EOFError):
diff --git a/Lib/test/test_importlib/test_abc.py b/Lib/test/test_importlib/test_abc.py
index 54b2da6..4ba28c6 100644
--- a/Lib/test/test_importlib/test_abc.py
+++ b/Lib/test/test_importlib/test_abc.py
@@ -673,6 +673,7 @@ class SourceLoader(SourceOnlyLoader):
if magic is None:
magic = self.util.MAGIC_NUMBER
data = bytearray(magic)
+ data.extend(self.init._w_long(0))
data.extend(self.init._w_long(self.source_mtime))
data.extend(self.init._w_long(self.source_size))
code_object = compile(self.source, self.path, 'exec',
@@ -836,6 +837,7 @@ class SourceLoaderBytecodeTests(SourceLoaderTestHarness):
if bytecode_written:
self.assertIn(self.cached, self.loader.written)
data = bytearray(self.util.MAGIC_NUMBER)
+ data.extend(self.init._w_long(0))
data.extend(self.init._w_long(self.loader.source_mtime))
data.extend(self.init._w_long(self.loader.source_size))
data.extend(marshal.dumps(code_object))