summaryrefslogtreecommitdiffstats
path: root/Lib
diff options
context:
space:
mode:
authorAntoine Pitrou <solipsis@pitrou.net>2012-01-13 17:52:16 (GMT)
committerAntoine Pitrou <solipsis@pitrou.net>2012-01-13 17:52:16 (GMT)
commit5136ac0ca21a05691978df8d0650f902c8ca3463 (patch)
tree4b5569dad3f1b36f115c673602dde6ff49eae5e0 /Lib
parent1f918c1480a1566b774391bbc4ddf1d4153965a1 (diff)
downloadcpython-5136ac0ca21a05691978df8d0650f902c8ca3463.zip
cpython-5136ac0ca21a05691978df8d0650f902c8ca3463.tar.gz
cpython-5136ac0ca21a05691978df8d0650f902c8ca3463.tar.bz2
Issue #13645: pyc files now contain the size of the corresponding source
code, to avoid timestamp collisions (especially on filesystems with a low timestamp resolution) when checking for freshness of the bytecode.
Diffstat (limited to 'Lib')
-rw-r--r--Lib/importlib/_bootstrap.py57
-rw-r--r--Lib/importlib/abc.py15
-rw-r--r--Lib/importlib/test/source/test_abc_loader.py10
-rw-r--r--Lib/importlib/test/source/test_file_loader.py44
-rw-r--r--Lib/pkgutil.py2
-rw-r--r--Lib/py_compile.py7
-rw-r--r--Lib/test/test_import.py12
-rw-r--r--Lib/test/test_zipimport.py10
8 files changed, 117 insertions, 40 deletions
diff --git a/Lib/importlib/_bootstrap.py b/Lib/importlib/_bootstrap.py
index 520c10a..5bdd111 100644
--- a/Lib/importlib/_bootstrap.py
+++ b/Lib/importlib/_bootstrap.py
@@ -331,25 +331,40 @@ class _LoaderBasics:
filename = self.get_filename(fullname).rpartition(path_sep)[2]
return filename.rsplit('.', 1)[0] == '__init__'
- def _bytes_from_bytecode(self, fullname, data, source_mtime):
+ def _bytes_from_bytecode(self, fullname, data, source_stats):
"""Return the marshalled bytes from bytecode, verifying the magic
- number and timestamp along the way.
+ number, timestamp and source size along the way.
- If source_mtime is None then skip the timestamp check.
+ If source_stats is None then skip the timestamp check.
"""
magic = data[:4]
raw_timestamp = data[4:8]
+ raw_size = data[8:12]
if len(magic) != 4 or magic != imp.get_magic():
raise ImportError("bad magic number in {}".format(fullname))
elif len(raw_timestamp) != 4:
raise EOFError("bad timestamp in {}".format(fullname))
- elif source_mtime is not None:
- if marshal._r_long(raw_timestamp) != source_mtime:
- raise ImportError("bytecode is stale for {}".format(fullname))
+ elif len(raw_size) != 4:
+ raise EOFError("bad size in {}".format(fullname))
+ if source_stats is not None:
+ try:
+ source_mtime = int(source_stats['mtime'])
+ except KeyError:
+ pass
+ else:
+ if marshal._r_long(raw_timestamp) != source_mtime:
+ raise ImportError("bytecode is stale for {}".format(fullname))
+ try:
+ source_size = source_stats['size'] & 0xFFFFFFFF
+ except KeyError:
+ pass
+ else:
+ if marshal._r_long(raw_size) != source_size:
+ raise ImportError("bytecode is stale for {}".format(fullname))
# Can't return the code object as errors from marshal loading need to
# propagate even when source is available.
- return data[8:]
+ return data[12:]
@module_for_loader
def _load_module(self, module, *, sourceless=False):
@@ -377,11 +392,20 @@ class SourceLoader(_LoaderBasics):
def path_mtime(self, path):
"""Optional method that returns the modification time (an int) for the
specified path, where path is a str.
+ """
+ raise NotImplementedError
- Implementing this method allows the loader to read bytecode files.
+ def path_stats(self, path):
+ """Optional method returning a metadata dict for the specified path
+ to by the path (str).
+ Possible keys:
+ - 'mtime' (mandatory) is the numeric timestamp of last source
+ code modification;
+ - 'size' (optional) is the size in bytes of the source code.
+ Implementing this method allows the loader to read bytecode files.
"""
- raise NotImplementedError
+ return {'mtime': self.path_mtime(path)}
def set_data(self, path, data):
"""Optional method which writes data (bytes) to a file path (a str).
@@ -407,7 +431,7 @@ class SourceLoader(_LoaderBasics):
def get_code(self, fullname):
"""Concrete implementation of InspectLoader.get_code.
- Reading of bytecode requires path_mtime to be implemented. To write
+ Reading of bytecode requires path_stats to be implemented. To write
bytecode, set_data must also be implemented.
"""
@@ -416,10 +440,11 @@ class SourceLoader(_LoaderBasics):
source_mtime = None
if bytecode_path is not None:
try:
- source_mtime = self.path_mtime(source_path)
+ st = self.path_stats(source_path)
except NotImplementedError:
pass
else:
+ source_mtime = int(st['mtime'])
try:
data = self.get_data(bytecode_path)
except IOError:
@@ -427,7 +452,7 @@ class SourceLoader(_LoaderBasics):
else:
try:
bytes_data = self._bytes_from_bytecode(fullname, data,
- source_mtime)
+ st)
except (ImportError, EOFError):
pass
else:
@@ -448,6 +473,7 @@ class SourceLoader(_LoaderBasics):
# throw an exception.
data = bytearray(imp.get_magic())
data.extend(marshal._w_long(source_mtime))
+ data.extend(marshal._w_long(len(source_bytes)))
data.extend(marshal.dumps(code_object))
try:
self.set_data(bytecode_path, data)
@@ -492,9 +518,10 @@ class _SourceFileLoader(_FileLoader, SourceLoader):
"""Concrete implementation of SourceLoader using the file system."""
- def path_mtime(self, path):
- """Return the modification time for the path."""
- return int(_os.stat(path).st_mtime)
+ def path_stats(self, path):
+ """Return the metadat for the path."""
+ st = _os.stat(path)
+ return {'mtime': st.st_mtime, 'size': st.st_size}
def set_data(self, path, data):
"""Write bytes data to a file."""
diff --git a/Lib/importlib/abc.py b/Lib/importlib/abc.py
index df8cd93..0d37629 100644
--- a/Lib/importlib/abc.py
+++ b/Lib/importlib/abc.py
@@ -123,7 +123,20 @@ class SourceLoader(_bootstrap.SourceLoader, ResourceLoader, ExecutionLoader):
def path_mtime(self, path):
"""Return the (int) modification time for the path (str)."""
- raise NotImplementedError
+ if self.path_stats.__func__ is SourceLoader.path_stats:
+ raise NotImplementedError
+ return int(self.path_stats(path)['mtime'])
+
+ def path_stats(self, path):
+ """Return a metadata dict for the source pointed to by the path (str).
+ Possible keys:
+ - 'mtime' (mandatory) is the numeric timestamp of last source
+ code modification;
+ - 'size' (optional) is the size in bytes of the source code.
+ """
+ if self.path_mtime.__func__ is SourceLoader.path_mtime:
+ raise NotImplementedError
+ return {'mtime': self.path_mtime(path)}
def set_data(self, path, data):
"""Write the bytes to the path (if possible).
diff --git a/Lib/importlib/test/source/test_abc_loader.py b/Lib/importlib/test/source/test_abc_loader.py
index 3c19b0b..74ca2e1 100644
--- a/Lib/importlib/test/source/test_abc_loader.py
+++ b/Lib/importlib/test/source/test_abc_loader.py
@@ -5,6 +5,7 @@ from .. import abc as testing_abc
from .. import util
from . import util as source_util
+import collections
import imp
import inspect
import io
@@ -40,8 +41,10 @@ class SourceLoaderMock(SourceOnlyLoaderMock):
def __init__(self, path, magic=imp.get_magic()):
super().__init__(path)
self.bytecode_path = imp.cache_from_source(self.path)
+ self.source_size = len(self.source)
data = bytearray(magic)
data.extend(marshal._w_long(self.source_mtime))
+ data.extend(marshal._w_long(self.source_size))
code_object = compile(self.source, self.path, 'exec',
dont_inherit=True)
data.extend(marshal.dumps(code_object))
@@ -56,9 +59,9 @@ class SourceLoaderMock(SourceOnlyLoaderMock):
else:
raise IOError
- def path_mtime(self, path):
+ def path_stats(self, path):
assert path == self.path
- return self.source_mtime
+ return {'mtime': self.source_mtime, 'size': self.source_size}
def set_data(self, path, data):
self.written[path] = bytes(data)
@@ -657,6 +660,7 @@ class SourceLoaderBytecodeTests(SourceLoaderTestHarness):
self.assertIn(self.cached, self.loader.written)
data = bytearray(imp.get_magic())
data.extend(marshal._w_long(self.loader.source_mtime))
+ data.extend(marshal._w_long(self.loader.source_size))
data.extend(marshal.dumps(code_object))
self.assertEqual(self.loader.written[self.cached], bytes(data))
@@ -847,7 +851,7 @@ class AbstractMethodImplTests(unittest.TestCase):
# Required abstractmethods.
self.raises_NotImplementedError(ins, 'get_filename', 'get_data')
# Optional abstractmethods.
- self.raises_NotImplementedError(ins,'path_mtime', 'set_data')
+ self.raises_NotImplementedError(ins,'path_stats', 'set_data')
def test_PyLoader(self):
self.raises_NotImplementedError(self.PyLoader(), 'source_path',
diff --git a/Lib/importlib/test/source/test_file_loader.py b/Lib/importlib/test/source/test_file_loader.py
index de1c4d8..0809077 100644
--- a/Lib/importlib/test/source/test_file_loader.py
+++ b/Lib/importlib/test/source/test_file_loader.py
@@ -70,11 +70,6 @@ class SimpleTest(unittest.TestCase):
module_dict_id = id(module.__dict__)
with open(mapping['_temp'], 'w') as file:
file.write("testing_var = 42\n")
- # For filesystems where the mtime is only to a second granularity,
- # everything that has happened above can be too fast;
- # force an mtime on the source that is guaranteed to be different
- # than the original mtime.
- loader.path_mtime = self.fake_mtime(loader.path_mtime)
module = loader.load_module('_temp')
self.assertTrue('testing_var' in module.__dict__,
"'testing_var' not in "
@@ -190,10 +185,17 @@ class BadBytecodeTest(unittest.TestCase):
del_source=del_source)
test('_temp', mapping, bc_path)
+ def _test_partial_size(self, test, *, del_source=False):
+ with source_util.create_modules('_temp') as mapping:
+ bc_path = self.manipulate_bytecode('_temp', mapping,
+ lambda bc: bc[:11],
+ del_source=del_source)
+ test('_temp', mapping, bc_path)
+
def _test_no_marshal(self, *, del_source=False):
with source_util.create_modules('_temp') as mapping:
bc_path = self.manipulate_bytecode('_temp', mapping,
- lambda bc: bc[:8],
+ lambda bc: bc[:12],
del_source=del_source)
file_path = mapping['_temp'] if not del_source else bc_path
with self.assertRaises(EOFError):
@@ -202,7 +204,7 @@ class BadBytecodeTest(unittest.TestCase):
def _test_non_code_marshal(self, *, del_source=False):
with source_util.create_modules('_temp') as mapping:
bytecode_path = self.manipulate_bytecode('_temp', mapping,
- lambda bc: bc[:8] + marshal.dumps(b'abcd'),
+ lambda bc: bc[:12] + marshal.dumps(b'abcd'),
del_source=del_source)
file_path = mapping['_temp'] if not del_source else bytecode_path
with self.assertRaises(ImportError):
@@ -211,7 +213,7 @@ class BadBytecodeTest(unittest.TestCase):
def _test_bad_marshal(self, *, del_source=False):
with source_util.create_modules('_temp') as mapping:
bytecode_path = self.manipulate_bytecode('_temp', mapping,
- lambda bc: bc[:8] + b'<test>',
+ lambda bc: bc[:12] + b'<test>',
del_source=del_source)
file_path = mapping['_temp'] if not del_source else bytecode_path
with self.assertRaises(EOFError):
@@ -235,7 +237,7 @@ class SourceLoaderBadBytecodeTest(BadBytecodeTest):
def test(name, mapping, bytecode_path):
self.import_(mapping[name], name)
with open(bytecode_path, 'rb') as file:
- self.assertGreater(len(file.read()), 8)
+ self.assertGreater(len(file.read()), 12)
self._test_empty_file(test)
@@ -243,7 +245,7 @@ class SourceLoaderBadBytecodeTest(BadBytecodeTest):
def test(name, mapping, bytecode_path):
self.import_(mapping[name], name)
with open(bytecode_path, 'rb') as file:
- self.assertGreater(len(file.read()), 8)
+ self.assertGreater(len(file.read()), 12)
self._test_partial_magic(test)
@@ -254,7 +256,7 @@ class SourceLoaderBadBytecodeTest(BadBytecodeTest):
def test(name, mapping, bytecode_path):
self.import_(mapping[name], name)
with open(bytecode_path, 'rb') as file:
- self.assertGreater(len(file.read()), 8)
+ self.assertGreater(len(file.read()), 12)
self._test_magic_only(test)
@@ -276,11 +278,22 @@ class SourceLoaderBadBytecodeTest(BadBytecodeTest):
def test(name, mapping, bc_path):
self.import_(mapping[name], name)
with open(bc_path, 'rb') as file:
- self.assertGreater(len(file.read()), 8)
+ self.assertGreater(len(file.read()), 12)
self._test_partial_timestamp(test)
@source_util.writes_bytecode_files
+ def test_partial_size(self):
+ # When the size is partial, regenerate the .pyc, else
+ # raise EOFError.
+ def test(name, mapping, bc_path):
+ self.import_(mapping[name], name)
+ with open(bc_path, 'rb') as file:
+ self.assertGreater(len(file.read()), 12)
+
+ self._test_partial_size(test)
+
+ @source_util.writes_bytecode_files
def test_no_marshal(self):
# When there is only the magic number and timestamp, raise EOFError.
self._test_no_marshal()
@@ -375,6 +388,13 @@ class SourcelessLoaderBadBytecodeTest(BadBytecodeTest):
self._test_partial_timestamp(test, del_source=True)
+ def test_partial_size(self):
+ def test(name, mapping, bytecode_path):
+ with self.assertRaises(EOFError):
+ self.import_(bytecode_path, name)
+
+ self._test_partial_size(test, del_source=True)
+
def test_no_marshal(self):
self._test_no_marshal(del_source=True)
diff --git a/Lib/pkgutil.py b/Lib/pkgutil.py
index bdea23e..ef027be 100644
--- a/Lib/pkgutil.py
+++ b/Lib/pkgutil.py
@@ -21,7 +21,7 @@ def read_code(stream):
if magic != imp.get_magic():
return None
- stream.read(4) # Skip timestamp
+ stream.read(8) # Skip timestamp and size
return marshal.load(stream)
diff --git a/Lib/py_compile.py b/Lib/py_compile.py
index 5adb70a..62d69ad 100644
--- a/Lib/py_compile.py
+++ b/Lib/py_compile.py
@@ -110,9 +110,11 @@ def compile(file, cfile=None, dfile=None, doraise=False, optimize=-1):
"""
with tokenize.open(file) as f:
try:
- timestamp = int(os.fstat(f.fileno()).st_mtime)
+ st = os.fstat(f.fileno())
except AttributeError:
- timestamp = int(os.stat(file).st_mtime)
+ st = os.stat(file)
+ timestamp = int(st.st_mtime)
+ size = st.st_size & 0xFFFFFFFF
codestring = f.read()
try:
codeobject = builtins.compile(codestring, dfile or file, 'exec',
@@ -139,6 +141,7 @@ def compile(file, cfile=None, dfile=None, doraise=False, optimize=-1):
with open(cfile, 'wb') as fc:
fc.write(b'\0\0\0\0')
wr_long(fc, timestamp)
+ wr_long(fc, size)
marshal.dump(codeobject, fc)
fc.flush()
fc.seek(0, 0)
diff --git a/Lib/test/test_import.py b/Lib/test/test_import.py
index 9f80b70..e4d5332 100644
--- a/Lib/test/test_import.py
+++ b/Lib/test/test_import.py
@@ -380,7 +380,7 @@ func_filename = func.__code__.co_filename
def test_foreign_code(self):
py_compile.compile(self.file_name)
with open(self.compiled_name, "rb") as f:
- header = f.read(8)
+ header = f.read(12)
code = marshal.load(f)
constants = list(code.co_consts)
foreign_code = test_main.__code__
@@ -644,6 +644,16 @@ class PycacheTests(unittest.TestCase):
self.assertEqual(sys.modules['pep3147.foo'].__cached__,
os.path.join(os.curdir, foo_pyc))
+ def test_recompute_pyc_same_second(self):
+ # Even when the source file doesn't change timestamp, a change in
+ # source size is enough to trigger recomputation of the pyc file.
+ __import__(TESTFN)
+ unload(TESTFN)
+ with open(self.source, 'a') as fp:
+ print("x = 5", file=fp)
+ m = __import__(TESTFN)
+ self.assertEqual(m.x, 5)
+
class RelativeImportFromImportlibTests(test_relative_imports.RelativeImports):
diff --git a/Lib/test/test_zipimport.py b/Lib/test/test_zipimport.py
index 56141ef..358910b 100644
--- a/Lib/test/test_zipimport.py
+++ b/Lib/test/test_zipimport.py
@@ -19,7 +19,7 @@ import io
from traceback import extract_tb, extract_stack, print_tb
raise_src = 'def do_raise(): raise TypeError\n'
-def make_pyc(co, mtime):
+def make_pyc(co, mtime, size):
data = marshal.dumps(co)
if type(mtime) is type(0.0):
# Mac mtimes need a bit of special casing
@@ -27,14 +27,14 @@ def make_pyc(co, mtime):
mtime = int(mtime)
else:
mtime = int(-0x100000000 + int(mtime))
- pyc = imp.get_magic() + struct.pack("<i", int(mtime)) + data
+ pyc = imp.get_magic() + struct.pack("<ii", int(mtime), size & 0xFFFFFFFF) + data
return pyc
def module_path_to_dotted_name(path):
return path.replace(os.sep, '.')
NOW = time.time()
-test_pyc = make_pyc(test_co, NOW)
+test_pyc = make_pyc(test_co, NOW, len(test_src))
TESTMOD = "ziptestmodule"
@@ -293,7 +293,7 @@ class UncompressedZipImportTestCase(ImportHooksBaseTestCase):
return __file__
if __loader__.get_data("some.data") != b"some data":
raise AssertionError("bad data")\n"""
- pyc = make_pyc(compile(src, "<???>", "exec"), NOW)
+ pyc = make_pyc(compile(src, "<???>", "exec"), NOW, len(src))
files = {TESTMOD + pyc_ext: (NOW, pyc),
"some.data": (NOW, "some data")}
self.doTest(pyc_ext, files, TESTMOD)
@@ -313,7 +313,7 @@ class UncompressedZipImportTestCase(ImportHooksBaseTestCase):
self.doTest(".py", files, TESTMOD, call=self.assertModuleSource)
def testGetCompiledSource(self):
- pyc = make_pyc(compile(test_src, "<???>", "exec"), NOW)
+ pyc = make_pyc(compile(test_src, "<???>", "exec"), NOW, len(test_src))
files = {TESTMOD + ".py": (NOW, test_src),
TESTMOD + pyc_ext: (NOW, pyc)}
self.doTest(pyc_ext, files, TESTMOD, call=self.assertModuleSource)