diff options
author | Antoine Pitrou <solipsis@pitrou.net> | 2012-01-13 17:52:16 (GMT) |
---|---|---|
committer | Antoine Pitrou <solipsis@pitrou.net> | 2012-01-13 17:52:16 (GMT) |
commit | 5136ac0ca21a05691978df8d0650f902c8ca3463 (patch) | |
tree | 4b5569dad3f1b36f115c673602dde6ff49eae5e0 /Lib | |
parent | 1f918c1480a1566b774391bbc4ddf1d4153965a1 (diff) | |
download | cpython-5136ac0ca21a05691978df8d0650f902c8ca3463.zip cpython-5136ac0ca21a05691978df8d0650f902c8ca3463.tar.gz cpython-5136ac0ca21a05691978df8d0650f902c8ca3463.tar.bz2 |
Issue #13645: pyc files now contain the size of the corresponding source
code, to avoid timestamp collisions (especially on filesystems with a low
timestamp resolution) when checking for freshness of the bytecode.
Diffstat (limited to 'Lib')
-rw-r--r-- | Lib/importlib/_bootstrap.py | 57 | ||||
-rw-r--r-- | Lib/importlib/abc.py | 15 | ||||
-rw-r--r-- | Lib/importlib/test/source/test_abc_loader.py | 10 | ||||
-rw-r--r-- | Lib/importlib/test/source/test_file_loader.py | 44 | ||||
-rw-r--r-- | Lib/pkgutil.py | 2 | ||||
-rw-r--r-- | Lib/py_compile.py | 7 | ||||
-rw-r--r-- | Lib/test/test_import.py | 12 | ||||
-rw-r--r-- | Lib/test/test_zipimport.py | 10 |
8 files changed, 117 insertions, 40 deletions
diff --git a/Lib/importlib/_bootstrap.py b/Lib/importlib/_bootstrap.py index 520c10a..5bdd111 100644 --- a/Lib/importlib/_bootstrap.py +++ b/Lib/importlib/_bootstrap.py @@ -331,25 +331,40 @@ class _LoaderBasics: filename = self.get_filename(fullname).rpartition(path_sep)[2] return filename.rsplit('.', 1)[0] == '__init__' - def _bytes_from_bytecode(self, fullname, data, source_mtime): + def _bytes_from_bytecode(self, fullname, data, source_stats): """Return the marshalled bytes from bytecode, verifying the magic - number and timestamp along the way. + number, timestamp and source size along the way. - If source_mtime is None then skip the timestamp check. + If source_stats is None then skip the timestamp check. """ magic = data[:4] raw_timestamp = data[4:8] + raw_size = data[8:12] if len(magic) != 4 or magic != imp.get_magic(): raise ImportError("bad magic number in {}".format(fullname)) elif len(raw_timestamp) != 4: raise EOFError("bad timestamp in {}".format(fullname)) - elif source_mtime is not None: - if marshal._r_long(raw_timestamp) != source_mtime: - raise ImportError("bytecode is stale for {}".format(fullname)) + elif len(raw_size) != 4: + raise EOFError("bad size in {}".format(fullname)) + if source_stats is not None: + try: + source_mtime = int(source_stats['mtime']) + except KeyError: + pass + else: + if marshal._r_long(raw_timestamp) != source_mtime: + raise ImportError("bytecode is stale for {}".format(fullname)) + try: + source_size = source_stats['size'] & 0xFFFFFFFF + except KeyError: + pass + else: + if marshal._r_long(raw_size) != source_size: + raise ImportError("bytecode is stale for {}".format(fullname)) # Can't return the code object as errors from marshal loading need to # propagate even when source is available. - return data[8:] + return data[12:] @module_for_loader def _load_module(self, module, *, sourceless=False): @@ -377,11 +392,20 @@ class SourceLoader(_LoaderBasics): def path_mtime(self, path): """Optional method that returns the modification time (an int) for the specified path, where path is a str. + """ + raise NotImplementedError - Implementing this method allows the loader to read bytecode files. + def path_stats(self, path): + """Optional method returning a metadata dict for the specified path + to by the path (str). + Possible keys: + - 'mtime' (mandatory) is the numeric timestamp of last source + code modification; + - 'size' (optional) is the size in bytes of the source code. + Implementing this method allows the loader to read bytecode files. """ - raise NotImplementedError + return {'mtime': self.path_mtime(path)} def set_data(self, path, data): """Optional method which writes data (bytes) to a file path (a str). @@ -407,7 +431,7 @@ class SourceLoader(_LoaderBasics): def get_code(self, fullname): """Concrete implementation of InspectLoader.get_code. - Reading of bytecode requires path_mtime to be implemented. To write + Reading of bytecode requires path_stats to be implemented. To write bytecode, set_data must also be implemented. """ @@ -416,10 +440,11 @@ class SourceLoader(_LoaderBasics): source_mtime = None if bytecode_path is not None: try: - source_mtime = self.path_mtime(source_path) + st = self.path_stats(source_path) except NotImplementedError: pass else: + source_mtime = int(st['mtime']) try: data = self.get_data(bytecode_path) except IOError: @@ -427,7 +452,7 @@ class SourceLoader(_LoaderBasics): else: try: bytes_data = self._bytes_from_bytecode(fullname, data, - source_mtime) + st) except (ImportError, EOFError): pass else: @@ -448,6 +473,7 @@ class SourceLoader(_LoaderBasics): # throw an exception. data = bytearray(imp.get_magic()) data.extend(marshal._w_long(source_mtime)) + data.extend(marshal._w_long(len(source_bytes))) data.extend(marshal.dumps(code_object)) try: self.set_data(bytecode_path, data) @@ -492,9 +518,10 @@ class _SourceFileLoader(_FileLoader, SourceLoader): """Concrete implementation of SourceLoader using the file system.""" - def path_mtime(self, path): - """Return the modification time for the path.""" - return int(_os.stat(path).st_mtime) + def path_stats(self, path): + """Return the metadat for the path.""" + st = _os.stat(path) + return {'mtime': st.st_mtime, 'size': st.st_size} def set_data(self, path, data): """Write bytes data to a file.""" diff --git a/Lib/importlib/abc.py b/Lib/importlib/abc.py index df8cd93..0d37629 100644 --- a/Lib/importlib/abc.py +++ b/Lib/importlib/abc.py @@ -123,7 +123,20 @@ class SourceLoader(_bootstrap.SourceLoader, ResourceLoader, ExecutionLoader): def path_mtime(self, path): """Return the (int) modification time for the path (str).""" - raise NotImplementedError + if self.path_stats.__func__ is SourceLoader.path_stats: + raise NotImplementedError + return int(self.path_stats(path)['mtime']) + + def path_stats(self, path): + """Return a metadata dict for the source pointed to by the path (str). + Possible keys: + - 'mtime' (mandatory) is the numeric timestamp of last source + code modification; + - 'size' (optional) is the size in bytes of the source code. + """ + if self.path_mtime.__func__ is SourceLoader.path_mtime: + raise NotImplementedError + return {'mtime': self.path_mtime(path)} def set_data(self, path, data): """Write the bytes to the path (if possible). diff --git a/Lib/importlib/test/source/test_abc_loader.py b/Lib/importlib/test/source/test_abc_loader.py index 3c19b0b..74ca2e1 100644 --- a/Lib/importlib/test/source/test_abc_loader.py +++ b/Lib/importlib/test/source/test_abc_loader.py @@ -5,6 +5,7 @@ from .. import abc as testing_abc from .. import util from . import util as source_util +import collections import imp import inspect import io @@ -40,8 +41,10 @@ class SourceLoaderMock(SourceOnlyLoaderMock): def __init__(self, path, magic=imp.get_magic()): super().__init__(path) self.bytecode_path = imp.cache_from_source(self.path) + self.source_size = len(self.source) data = bytearray(magic) data.extend(marshal._w_long(self.source_mtime)) + data.extend(marshal._w_long(self.source_size)) code_object = compile(self.source, self.path, 'exec', dont_inherit=True) data.extend(marshal.dumps(code_object)) @@ -56,9 +59,9 @@ class SourceLoaderMock(SourceOnlyLoaderMock): else: raise IOError - def path_mtime(self, path): + def path_stats(self, path): assert path == self.path - return self.source_mtime + return {'mtime': self.source_mtime, 'size': self.source_size} def set_data(self, path, data): self.written[path] = bytes(data) @@ -657,6 +660,7 @@ class SourceLoaderBytecodeTests(SourceLoaderTestHarness): self.assertIn(self.cached, self.loader.written) data = bytearray(imp.get_magic()) data.extend(marshal._w_long(self.loader.source_mtime)) + data.extend(marshal._w_long(self.loader.source_size)) data.extend(marshal.dumps(code_object)) self.assertEqual(self.loader.written[self.cached], bytes(data)) @@ -847,7 +851,7 @@ class AbstractMethodImplTests(unittest.TestCase): # Required abstractmethods. self.raises_NotImplementedError(ins, 'get_filename', 'get_data') # Optional abstractmethods. - self.raises_NotImplementedError(ins,'path_mtime', 'set_data') + self.raises_NotImplementedError(ins,'path_stats', 'set_data') def test_PyLoader(self): self.raises_NotImplementedError(self.PyLoader(), 'source_path', diff --git a/Lib/importlib/test/source/test_file_loader.py b/Lib/importlib/test/source/test_file_loader.py index de1c4d8..0809077 100644 --- a/Lib/importlib/test/source/test_file_loader.py +++ b/Lib/importlib/test/source/test_file_loader.py @@ -70,11 +70,6 @@ class SimpleTest(unittest.TestCase): module_dict_id = id(module.__dict__) with open(mapping['_temp'], 'w') as file: file.write("testing_var = 42\n") - # For filesystems where the mtime is only to a second granularity, - # everything that has happened above can be too fast; - # force an mtime on the source that is guaranteed to be different - # than the original mtime. - loader.path_mtime = self.fake_mtime(loader.path_mtime) module = loader.load_module('_temp') self.assertTrue('testing_var' in module.__dict__, "'testing_var' not in " @@ -190,10 +185,17 @@ class BadBytecodeTest(unittest.TestCase): del_source=del_source) test('_temp', mapping, bc_path) + def _test_partial_size(self, test, *, del_source=False): + with source_util.create_modules('_temp') as mapping: + bc_path = self.manipulate_bytecode('_temp', mapping, + lambda bc: bc[:11], + del_source=del_source) + test('_temp', mapping, bc_path) + def _test_no_marshal(self, *, del_source=False): with source_util.create_modules('_temp') as mapping: bc_path = self.manipulate_bytecode('_temp', mapping, - lambda bc: bc[:8], + lambda bc: bc[:12], del_source=del_source) file_path = mapping['_temp'] if not del_source else bc_path with self.assertRaises(EOFError): @@ -202,7 +204,7 @@ class BadBytecodeTest(unittest.TestCase): def _test_non_code_marshal(self, *, del_source=False): with source_util.create_modules('_temp') as mapping: bytecode_path = self.manipulate_bytecode('_temp', mapping, - lambda bc: bc[:8] + marshal.dumps(b'abcd'), + lambda bc: bc[:12] + marshal.dumps(b'abcd'), del_source=del_source) file_path = mapping['_temp'] if not del_source else bytecode_path with self.assertRaises(ImportError): @@ -211,7 +213,7 @@ class BadBytecodeTest(unittest.TestCase): def _test_bad_marshal(self, *, del_source=False): with source_util.create_modules('_temp') as mapping: bytecode_path = self.manipulate_bytecode('_temp', mapping, - lambda bc: bc[:8] + b'<test>', + lambda bc: bc[:12] + b'<test>', del_source=del_source) file_path = mapping['_temp'] if not del_source else bytecode_path with self.assertRaises(EOFError): @@ -235,7 +237,7 @@ class SourceLoaderBadBytecodeTest(BadBytecodeTest): def test(name, mapping, bytecode_path): self.import_(mapping[name], name) with open(bytecode_path, 'rb') as file: - self.assertGreater(len(file.read()), 8) + self.assertGreater(len(file.read()), 12) self._test_empty_file(test) @@ -243,7 +245,7 @@ class SourceLoaderBadBytecodeTest(BadBytecodeTest): def test(name, mapping, bytecode_path): self.import_(mapping[name], name) with open(bytecode_path, 'rb') as file: - self.assertGreater(len(file.read()), 8) + self.assertGreater(len(file.read()), 12) self._test_partial_magic(test) @@ -254,7 +256,7 @@ class SourceLoaderBadBytecodeTest(BadBytecodeTest): def test(name, mapping, bytecode_path): self.import_(mapping[name], name) with open(bytecode_path, 'rb') as file: - self.assertGreater(len(file.read()), 8) + self.assertGreater(len(file.read()), 12) self._test_magic_only(test) @@ -276,11 +278,22 @@ class SourceLoaderBadBytecodeTest(BadBytecodeTest): def test(name, mapping, bc_path): self.import_(mapping[name], name) with open(bc_path, 'rb') as file: - self.assertGreater(len(file.read()), 8) + self.assertGreater(len(file.read()), 12) self._test_partial_timestamp(test) @source_util.writes_bytecode_files + def test_partial_size(self): + # When the size is partial, regenerate the .pyc, else + # raise EOFError. + def test(name, mapping, bc_path): + self.import_(mapping[name], name) + with open(bc_path, 'rb') as file: + self.assertGreater(len(file.read()), 12) + + self._test_partial_size(test) + + @source_util.writes_bytecode_files def test_no_marshal(self): # When there is only the magic number and timestamp, raise EOFError. self._test_no_marshal() @@ -375,6 +388,13 @@ class SourcelessLoaderBadBytecodeTest(BadBytecodeTest): self._test_partial_timestamp(test, del_source=True) + def test_partial_size(self): + def test(name, mapping, bytecode_path): + with self.assertRaises(EOFError): + self.import_(bytecode_path, name) + + self._test_partial_size(test, del_source=True) + def test_no_marshal(self): self._test_no_marshal(del_source=True) diff --git a/Lib/pkgutil.py b/Lib/pkgutil.py index bdea23e..ef027be 100644 --- a/Lib/pkgutil.py +++ b/Lib/pkgutil.py @@ -21,7 +21,7 @@ def read_code(stream): if magic != imp.get_magic(): return None - stream.read(4) # Skip timestamp + stream.read(8) # Skip timestamp and size return marshal.load(stream) diff --git a/Lib/py_compile.py b/Lib/py_compile.py index 5adb70a..62d69ad 100644 --- a/Lib/py_compile.py +++ b/Lib/py_compile.py @@ -110,9 +110,11 @@ def compile(file, cfile=None, dfile=None, doraise=False, optimize=-1): """ with tokenize.open(file) as f: try: - timestamp = int(os.fstat(f.fileno()).st_mtime) + st = os.fstat(f.fileno()) except AttributeError: - timestamp = int(os.stat(file).st_mtime) + st = os.stat(file) + timestamp = int(st.st_mtime) + size = st.st_size & 0xFFFFFFFF codestring = f.read() try: codeobject = builtins.compile(codestring, dfile or file, 'exec', @@ -139,6 +141,7 @@ def compile(file, cfile=None, dfile=None, doraise=False, optimize=-1): with open(cfile, 'wb') as fc: fc.write(b'\0\0\0\0') wr_long(fc, timestamp) + wr_long(fc, size) marshal.dump(codeobject, fc) fc.flush() fc.seek(0, 0) diff --git a/Lib/test/test_import.py b/Lib/test/test_import.py index 9f80b70..e4d5332 100644 --- a/Lib/test/test_import.py +++ b/Lib/test/test_import.py @@ -380,7 +380,7 @@ func_filename = func.__code__.co_filename def test_foreign_code(self): py_compile.compile(self.file_name) with open(self.compiled_name, "rb") as f: - header = f.read(8) + header = f.read(12) code = marshal.load(f) constants = list(code.co_consts) foreign_code = test_main.__code__ @@ -644,6 +644,16 @@ class PycacheTests(unittest.TestCase): self.assertEqual(sys.modules['pep3147.foo'].__cached__, os.path.join(os.curdir, foo_pyc)) + def test_recompute_pyc_same_second(self): + # Even when the source file doesn't change timestamp, a change in + # source size is enough to trigger recomputation of the pyc file. + __import__(TESTFN) + unload(TESTFN) + with open(self.source, 'a') as fp: + print("x = 5", file=fp) + m = __import__(TESTFN) + self.assertEqual(m.x, 5) + class RelativeImportFromImportlibTests(test_relative_imports.RelativeImports): diff --git a/Lib/test/test_zipimport.py b/Lib/test/test_zipimport.py index 56141ef..358910b 100644 --- a/Lib/test/test_zipimport.py +++ b/Lib/test/test_zipimport.py @@ -19,7 +19,7 @@ import io from traceback import extract_tb, extract_stack, print_tb raise_src = 'def do_raise(): raise TypeError\n' -def make_pyc(co, mtime): +def make_pyc(co, mtime, size): data = marshal.dumps(co) if type(mtime) is type(0.0): # Mac mtimes need a bit of special casing @@ -27,14 +27,14 @@ def make_pyc(co, mtime): mtime = int(mtime) else: mtime = int(-0x100000000 + int(mtime)) - pyc = imp.get_magic() + struct.pack("<i", int(mtime)) + data + pyc = imp.get_magic() + struct.pack("<ii", int(mtime), size & 0xFFFFFFFF) + data return pyc def module_path_to_dotted_name(path): return path.replace(os.sep, '.') NOW = time.time() -test_pyc = make_pyc(test_co, NOW) +test_pyc = make_pyc(test_co, NOW, len(test_src)) TESTMOD = "ziptestmodule" @@ -293,7 +293,7 @@ class UncompressedZipImportTestCase(ImportHooksBaseTestCase): return __file__ if __loader__.get_data("some.data") != b"some data": raise AssertionError("bad data")\n""" - pyc = make_pyc(compile(src, "<???>", "exec"), NOW) + pyc = make_pyc(compile(src, "<???>", "exec"), NOW, len(src)) files = {TESTMOD + pyc_ext: (NOW, pyc), "some.data": (NOW, "some data")} self.doTest(pyc_ext, files, TESTMOD) @@ -313,7 +313,7 @@ class UncompressedZipImportTestCase(ImportHooksBaseTestCase): self.doTest(".py", files, TESTMOD, call=self.assertModuleSource) def testGetCompiledSource(self): - pyc = make_pyc(compile(test_src, "<???>", "exec"), NOW) + pyc = make_pyc(compile(test_src, "<???>", "exec"), NOW, len(test_src)) files = {TESTMOD + ".py": (NOW, test_src), TESTMOD + pyc_ext: (NOW, pyc)} self.doTest(pyc_ext, files, TESTMOD, call=self.assertModuleSource) |