diff options
author | Benjamin Peterson <benjamin@python.org> | 2017-12-09 18:26:52 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-12-09 18:26:52 (GMT) |
commit | 42aa93b8ff2f7879282b06efc73a31ec7785e602 (patch) | |
tree | 92ee301e1f487a7f5aa8ec78a36ebc50d21d6ec9 /Lib/test/test_importlib | |
parent | 28d8d14013ade0657fed4673f5fa3c08eb2b1944 (diff) | |
download | cpython-42aa93b8ff2f7879282b06efc73a31ec7785e602.zip cpython-42aa93b8ff2f7879282b06efc73a31ec7785e602.tar.gz cpython-42aa93b8ff2f7879282b06efc73a31ec7785e602.tar.bz2 |
closes bpo-31650: PEP 552 (Deterministic pycs) implementation (#4575)
Python now supports checking bytecode cache up-to-dateness with a hash of the
source contents rather than volatile source metadata. See the PEP for details.
While a fairly straightforward idea, quite a lot of code had to be modified due
to the pervasiveness of pyc implementation details in the codebase. Changes in
this commit include:
- The core changes to importlib to understand how to read, validate, and
regenerate hash-based pycs.
- Support for generating hash-based pycs in py_compile and compileall.
- Modifications to our siphash implementation to support passing a custom
key. We then expose it to importlib through _imp.
- Updates to all places in the interpreter, standard library, and tests that
manually generate or parse pyc files to grok the new format.
- Support in the interpreter command line code for long options like
--check-hash-based-pycs.
- Tests and documentation for all of the above.
Diffstat (limited to 'Lib/test/test_importlib')
-rw-r--r-- | Lib/test/test_importlib/source/test_file_loader.py | 210 | ||||
-rw-r--r-- | Lib/test/test_importlib/test_abc.py | 2 |
2 files changed, 197 insertions, 15 deletions
diff --git a/Lib/test/test_importlib/source/test_file_loader.py b/Lib/test/test_importlib/source/test_file_loader.py index a151149..643a02c 100644 --- a/Lib/test/test_importlib/source/test_file_loader.py +++ b/Lib/test/test_importlib/source/test_file_loader.py @@ -235,6 +235,123 @@ class SimpleTest(abc.LoaderTests): warnings.simplefilter('ignore', DeprecationWarning) loader.load_module('bad name') + @util.writes_bytecode_files + def test_checked_hash_based_pyc(self): + with util.create_modules('_temp') as mapping: + source = mapping['_temp'] + pyc = self.util.cache_from_source(source) + with open(source, 'wb') as fp: + fp.write(b'state = "old"') + os.utime(source, (50, 50)) + py_compile.compile( + source, + invalidation_mode=py_compile.PycInvalidationMode.CHECKED_HASH, + ) + loader = self.machinery.SourceFileLoader('_temp', source) + mod = types.ModuleType('_temp') + mod.__spec__ = self.util.spec_from_loader('_temp', loader) + loader.exec_module(mod) + self.assertEqual(mod.state, 'old') + # Write a new source with the same mtime and size as before. + with open(source, 'wb') as fp: + fp.write(b'state = "new"') + os.utime(source, (50, 50)) + loader.exec_module(mod) + self.assertEqual(mod.state, 'new') + with open(pyc, 'rb') as fp: + data = fp.read() + self.assertEqual(int.from_bytes(data[4:8], 'little'), 0b11) + self.assertEqual( + self.util.source_hash(b'state = "new"'), + data[8:16], + ) + + @util.writes_bytecode_files + def test_overriden_checked_hash_based_pyc(self): + with util.create_modules('_temp') as mapping, \ + unittest.mock.patch('_imp.check_hash_based_pycs', 'never'): + source = mapping['_temp'] + pyc = self.util.cache_from_source(source) + with open(source, 'wb') as fp: + fp.write(b'state = "old"') + os.utime(source, (50, 50)) + py_compile.compile( + source, + invalidation_mode=py_compile.PycInvalidationMode.CHECKED_HASH, + ) + loader = self.machinery.SourceFileLoader('_temp', source) + mod = types.ModuleType('_temp') + mod.__spec__ = self.util.spec_from_loader('_temp', loader) + loader.exec_module(mod) + self.assertEqual(mod.state, 'old') + # Write a new source with the same mtime and size as before. + with open(source, 'wb') as fp: + fp.write(b'state = "new"') + os.utime(source, (50, 50)) + loader.exec_module(mod) + self.assertEqual(mod.state, 'old') + + @util.writes_bytecode_files + def test_unchecked_hash_based_pyc(self): + with util.create_modules('_temp') as mapping: + source = mapping['_temp'] + pyc = self.util.cache_from_source(source) + with open(source, 'wb') as fp: + fp.write(b'state = "old"') + os.utime(source, (50, 50)) + py_compile.compile( + source, + invalidation_mode=py_compile.PycInvalidationMode.UNCHECKED_HASH, + ) + loader = self.machinery.SourceFileLoader('_temp', source) + mod = types.ModuleType('_temp') + mod.__spec__ = self.util.spec_from_loader('_temp', loader) + loader.exec_module(mod) + self.assertEqual(mod.state, 'old') + # Update the source file, which should be ignored. + with open(source, 'wb') as fp: + fp.write(b'state = "new"') + loader.exec_module(mod) + self.assertEqual(mod.state, 'old') + with open(pyc, 'rb') as fp: + data = fp.read() + self.assertEqual(int.from_bytes(data[4:8], 'little'), 0b1) + self.assertEqual( + self.util.source_hash(b'state = "old"'), + data[8:16], + ) + + @util.writes_bytecode_files + def test_overiden_unchecked_hash_based_pyc(self): + with util.create_modules('_temp') as mapping, \ + unittest.mock.patch('_imp.check_hash_based_pycs', 'always'): + source = mapping['_temp'] + pyc = self.util.cache_from_source(source) + with open(source, 'wb') as fp: + fp.write(b'state = "old"') + os.utime(source, (50, 50)) + py_compile.compile( + source, + invalidation_mode=py_compile.PycInvalidationMode.UNCHECKED_HASH, + ) + loader = self.machinery.SourceFileLoader('_temp', source) + mod = types.ModuleType('_temp') + mod.__spec__ = self.util.spec_from_loader('_temp', loader) + loader.exec_module(mod) + self.assertEqual(mod.state, 'old') + # Update the source file, which should be ignored. + with open(source, 'wb') as fp: + fp.write(b'state = "new"') + loader.exec_module(mod) + self.assertEqual(mod.state, 'new') + with open(pyc, 'rb') as fp: + data = fp.read() + self.assertEqual(int.from_bytes(data[4:8], 'little'), 0b1) + self.assertEqual( + self.util.source_hash(b'state = "new"'), + data[8:16], + ) + (Frozen_SimpleTest, Source_SimpleTest @@ -247,15 +364,17 @@ class BadBytecodeTest: def import_(self, file, module_name): raise NotImplementedError - def manipulate_bytecode(self, name, mapping, manipulator, *, - del_source=False): + def manipulate_bytecode(self, + name, mapping, manipulator, *, + del_source=False, + invalidation_mode=py_compile.PycInvalidationMode.TIMESTAMP): """Manipulate the bytecode of a module by passing it into a callable that returns what to use as the new bytecode.""" try: del sys.modules['_temp'] except KeyError: pass - py_compile.compile(mapping[name]) + py_compile.compile(mapping[name], invalidation_mode=invalidation_mode) if not del_source: bytecode_path = self.util.cache_from_source(mapping[name]) else: @@ -294,24 +413,51 @@ class BadBytecodeTest: del_source=del_source) test('_temp', mapping, bc_path) + def _test_partial_flags(self, test, *, del_source=False): + with util.create_modules('_temp') as mapping: + bc_path = self.manipulate_bytecode('_temp', mapping, + lambda bc: bc[:7], + del_source=del_source) + test('_temp', mapping, bc_path) + + def _test_partial_hash(self, test, *, del_source=False): + with util.create_modules('_temp') as mapping: + bc_path = self.manipulate_bytecode( + '_temp', + mapping, + lambda bc: bc[:13], + del_source=del_source, + invalidation_mode=py_compile.PycInvalidationMode.CHECKED_HASH, + ) + test('_temp', mapping, bc_path) + with util.create_modules('_temp') as mapping: + bc_path = self.manipulate_bytecode( + '_temp', + mapping, + lambda bc: bc[:13], + del_source=del_source, + invalidation_mode=py_compile.PycInvalidationMode.UNCHECKED_HASH, + ) + test('_temp', mapping, bc_path) + def _test_partial_timestamp(self, test, *, del_source=False): with util.create_modules('_temp') as mapping: bc_path = self.manipulate_bytecode('_temp', mapping, - lambda bc: bc[:7], + lambda bc: bc[:11], del_source=del_source) test('_temp', mapping, bc_path) def _test_partial_size(self, test, *, del_source=False): with util.create_modules('_temp') as mapping: bc_path = self.manipulate_bytecode('_temp', mapping, - lambda bc: bc[:11], + lambda bc: bc[:15], del_source=del_source) test('_temp', mapping, bc_path) def _test_no_marshal(self, *, del_source=False): with util.create_modules('_temp') as mapping: bc_path = self.manipulate_bytecode('_temp', mapping, - lambda bc: bc[:12], + lambda bc: bc[:16], del_source=del_source) file_path = mapping['_temp'] if not del_source else bc_path with self.assertRaises(EOFError): @@ -320,7 +466,7 @@ class BadBytecodeTest: def _test_non_code_marshal(self, *, del_source=False): with util.create_modules('_temp') as mapping: bytecode_path = self.manipulate_bytecode('_temp', mapping, - lambda bc: bc[:12] + marshal.dumps(b'abcd'), + lambda bc: bc[:16] + marshal.dumps(b'abcd'), del_source=del_source) file_path = mapping['_temp'] if not del_source else bytecode_path with self.assertRaises(ImportError) as cm: @@ -331,7 +477,7 @@ class BadBytecodeTest: def _test_bad_marshal(self, *, del_source=False): with util.create_modules('_temp') as mapping: bytecode_path = self.manipulate_bytecode('_temp', mapping, - lambda bc: bc[:12] + b'<test>', + lambda bc: bc[:16] + b'<test>', del_source=del_source) file_path = mapping['_temp'] if not del_source else bytecode_path with self.assertRaises(EOFError): @@ -376,7 +522,7 @@ class SourceLoaderBadBytecodeTest: def test(name, mapping, bytecode_path): self.import_(mapping[name], name) with open(bytecode_path, 'rb') as file: - self.assertGreater(len(file.read()), 12) + self.assertGreater(len(file.read()), 16) self._test_empty_file(test) @@ -384,7 +530,7 @@ class SourceLoaderBadBytecodeTest: def test(name, mapping, bytecode_path): self.import_(mapping[name], name) with open(bytecode_path, 'rb') as file: - self.assertGreater(len(file.read()), 12) + self.assertGreater(len(file.read()), 16) self._test_partial_magic(test) @@ -395,7 +541,7 @@ class SourceLoaderBadBytecodeTest: def test(name, mapping, bytecode_path): self.import_(mapping[name], name) with open(bytecode_path, 'rb') as file: - self.assertGreater(len(file.read()), 12) + self.assertGreater(len(file.read()), 16) self._test_magic_only(test) @@ -418,18 +564,38 @@ class SourceLoaderBadBytecodeTest: def test(name, mapping, bc_path): self.import_(mapping[name], name) with open(bc_path, 'rb') as file: - self.assertGreater(len(file.read()), 12) + self.assertGreater(len(file.read()), 16) self._test_partial_timestamp(test) @util.writes_bytecode_files + def test_partial_flags(self): + # When the flags is partial, regenerate the .pyc, else raise EOFError. + def test(name, mapping, bc_path): + self.import_(mapping[name], name) + with open(bc_path, 'rb') as file: + self.assertGreater(len(file.read()), 16) + + self._test_partial_flags(test) + + @util.writes_bytecode_files + def test_partial_hash(self): + # When the hash is partial, regenerate the .pyc, else raise EOFError. + def test(name, mapping, bc_path): + self.import_(mapping[name], name) + with open(bc_path, 'rb') as file: + self.assertGreater(len(file.read()), 16) + + self._test_partial_hash(test) + + @util.writes_bytecode_files def test_partial_size(self): # When the size is partial, regenerate the .pyc, else # raise EOFError. def test(name, mapping, bc_path): self.import_(mapping[name], name) with open(bc_path, 'rb') as file: - self.assertGreater(len(file.read()), 12) + self.assertGreater(len(file.read()), 16) self._test_partial_size(test) @@ -459,13 +625,13 @@ class SourceLoaderBadBytecodeTest: py_compile.compile(mapping['_temp']) bytecode_path = self.util.cache_from_source(mapping['_temp']) with open(bytecode_path, 'r+b') as bytecode_file: - bytecode_file.seek(4) + bytecode_file.seek(8) bytecode_file.write(zeros) self.import_(mapping['_temp'], '_temp') source_mtime = os.path.getmtime(mapping['_temp']) source_timestamp = self.importlib._w_long(source_mtime) with open(bytecode_path, 'rb') as bytecode_file: - bytecode_file.seek(4) + bytecode_file.seek(8) self.assertEqual(bytecode_file.read(4), source_timestamp) # [bytecode read-only] @@ -560,6 +726,20 @@ class SourcelessLoaderBadBytecodeTest: self._test_partial_timestamp(test, del_source=True) + def test_partial_flags(self): + def test(name, mapping, bytecode_path): + with self.assertRaises(EOFError): + self.import_(bytecode_path, name) + + self._test_partial_flags(test, del_source=True) + + def test_partial_hash(self): + def test(name, mapping, bytecode_path): + with self.assertRaises(EOFError): + self.import_(bytecode_path, name) + + self._test_partial_hash(test, del_source=True) + def test_partial_size(self): def test(name, mapping, bytecode_path): with self.assertRaises(EOFError): diff --git a/Lib/test/test_importlib/test_abc.py b/Lib/test/test_importlib/test_abc.py index 54b2da6..4ba28c6 100644 --- a/Lib/test/test_importlib/test_abc.py +++ b/Lib/test/test_importlib/test_abc.py @@ -673,6 +673,7 @@ class SourceLoader(SourceOnlyLoader): if magic is None: magic = self.util.MAGIC_NUMBER data = bytearray(magic) + data.extend(self.init._w_long(0)) data.extend(self.init._w_long(self.source_mtime)) data.extend(self.init._w_long(self.source_size)) code_object = compile(self.source, self.path, 'exec', @@ -836,6 +837,7 @@ class SourceLoaderBytecodeTests(SourceLoaderTestHarness): if bytecode_written: self.assertIn(self.cached, self.loader.written) data = bytearray(self.util.MAGIC_NUMBER) + data.extend(self.init._w_long(0)) data.extend(self.init._w_long(self.loader.source_mtime)) data.extend(self.init._w_long(self.loader.source_size)) data.extend(marshal.dumps(code_object)) |