From 42a9a7eab3c231b4cc3e5c6761b7c921694769c6 Mon Sep 17 00:00:00 2001 From: Mats Wichmann Date: Thu, 31 Aug 2023 06:36:45 -0600 Subject: msvc config cache: add locking on cachefile Following on from #4402, which tries to recover from races that might either cause a corrupt msvc cache file, or an incomplete read happening while a write is still in process, add a simple-minded locking protocol to try to prevent the problem in the first place. For writing, a lockfile is created with exclusive access. For reading, the same is done but immediately released: we only wanted to wait that it's not currently locked, we don't need to keep it locked at this point. This is addressing what's mainly an issue for testing, when there can be many concurrent SCons instance each running a test - we don't think this is likely in normal developer usage. Signed-off-by: Mats Wichmann --- CHANGES.txt | 5 +- SCons/Tool/MSCommon/common.py | 8 ++- SCons/Util/__init__.py | 1 + SCons/Util/filelock.py | 149 ++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 159 insertions(+), 4 deletions(-) create mode 100644 SCons/Util/filelock.py diff --git a/CHANGES.txt b/CHANGES.txt index e63255b..ccf024e 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -148,7 +148,10 @@ RELEASE VERSION/DATE TO BE FILLED IN LATER - Added more error handling while reading msvc config cache. (Enabled/specified by SCONS_CACHE_MSVC_CONFIG). The existing cache will be discarded if there's a decode error reading it. - It's possible there's a race condition creating this issue it in certain CI builds. + It's possible there's a race condition creating this issue in certain CI + builds. + Also add a simple filesystem-based locking protocol to try to + avoide the problem occuring. From Jonathon Reinhart: - Fix another instance of `int main()` in CheckLib() causing failures diff --git a/SCons/Tool/MSCommon/common.py b/SCons/Tool/MSCommon/common.py index 9ab7179..b6e7218 100644 --- a/SCons/Tool/MSCommon/common.py +++ b/SCons/Tool/MSCommon/common.py @@ -119,7 +119,7 @@ def read_script_env_cache() -> dict: p = Path(CONFIG_CACHE) if not CONFIG_CACHE or not p.is_file(): return envcache - with p.open('r') as f: + with SCons.Util.FileLock(CONFIG_CACHE, timeout=5), p.open('r') as f: # Convert the list of cache entry dictionaries read from # json to the cache dictionary. Reconstruct the cache key # tuple from the key list written to json. @@ -151,11 +151,13 @@ def write_script_env_cache(cache) -> None: p = Path(CONFIG_CACHE) try: - with p.open('w') as f: + with SCons.Util.FileLock(CONFIG_CACHE, timeout=5, writer=True), p.open('w') as f: # Convert the cache dictionary to a list of cache entry # dictionaries. The cache key is converted from a tuple to # a list for compatibility with json. - envcache_list = [{'key': list(key), 'data': data} for key, data in cache.items()] + envcache_list = [ + {'key': list(key), 'data': data} for key, data in cache.items() + ] json.dump(envcache_list, f, indent=2) except TypeError: # data can't serialize to json, don't leave partial file diff --git a/SCons/Util/__init__.py b/SCons/Util/__init__.py index 655c5ad..078414f 100644 --- a/SCons/Util/__init__.py +++ b/SCons/Util/__init__.py @@ -81,6 +81,7 @@ from .envs import ( AddPathIfNotExists, AddMethod, ) +from .filelock import FileLock, SConsLockFailure # Note: the Util package cannot import other parts of SCons globally without diff --git a/SCons/Util/filelock.py b/SCons/Util/filelock.py new file mode 100644 index 0000000..7e915af --- /dev/null +++ b/SCons/Util/filelock.py @@ -0,0 +1,149 @@ +# MIT License +# +# Copyright The SCons Foundation + +"""SCons file locking functions. + +Simple-minded filesystem-based locking. Provides a context manager +which acquires a lock (or at least, permission) on entry and +releases it on exit. + +Usage:: + + from SCons.Util.filelock import FileLock + + with FileLock("myfile.txt", writer=True) as lock: + print(f"Lock on {lock.file} acquired.") + # work with the file as it is now locked +""" + +# TODO: things to consider. +# Is raising an exception the right thing for failing to get lock? +# Is a filesystem lockfile scheme sufficient for our needs? +# - or is it better to put locks on the actual file (fcntl/windows-based)? +# ... Is that even viable in the case of a remote (network) file? +# Is this safe enough? Or do we risk dangling lockfiles? +# Permission issues in case of multi-user. This *should* be okay, +# the cache usually goes in user's homedir, plus you already have +# enough rights for the lockfile if the dir lets you create the cache. +# Need a forced break-lock method? +# The lock attributes could probably be made opaque. Showed one visible +# in the example above, but not sure the benefit of that. + +import os +import time +from typing import Optional + + +class SConsLockFailure(Exception): + """Lock failure exception.""" + + +class FileLock: + """Lock a file using a lockfile. + + Locking for the case where multiple processes try to hit an externally + shared resource that cannot depend on locking within a single SCons + process. SCons does not have a lot of those, but caches come to mind. + + Cross-platform safe, does not use any OS-specific features. Provides + context manager support, or can be called with :meth:`acquire_lock` + and :meth:`release_lock`. + + Lock can be a write lock, which is held until released, or a read + lock, which releases immediately upon aquisition - the interesting + thing being to not read a file which somebody else may be writing, + + TODO: Should default timeout be None (non-blocking), or 0 (block forever), + or some arbitrary number? + + Arguments: + file: name of file to lock. + timeout: optional time (sec) to give up trying. + If ``None``, quit now if we failed to get the lock (non-blocking). + If 0, block forever (well, a long time). + delay: optional delay between tries [default 0.05s] + writer: if True, obtain the lock for safe writing. If False (default), + just wait till the lock is available, give it back right away. + + Raises: + SConsLockFailure: if the operation "timed out", including the + non-blocking mode. + """ + + def __init__( + self, + file: str, + timeout: Optional[int] = None, + delay: Optional[float] = 0.05, + writer: bool = False, + ) -> None: + if timeout is not None and delay is None: + raise ValueError("delay cannot be None if timeout is None.") + # It isn't completely obvious where to put the lockfile. + # This scheme depends on diffrent processes using the same path + # to the lockfile, since the lockfile is the magic resource, + # not the file itself. getcwd() is no good for testcases, each of + # which run in a unique test directory. tempfile is no good, + # as those are (intentionally) unique per process. + # Our simple first guess is just put it where the file is. + self.file = file + self.lockfile = f"{file}.lock" + self.lock: Optional[int] = None + self.timeout = 999999 if timeout == 0 else timeout + self.delay = 0.0 if delay is None else delay + self.writer = writer + + def acquire_lock(self) -> None: + """Acquire the lock, if possible. + + If the lock is in use, check again every *delay* seconds. + Continue until lock acquired or *timeout* expires. + """ + start_time = time.perf_counter() + while True: + try: + self.lock = os.open(self.lockfile, os.O_CREAT|os.O_EXCL|os.O_RDWR) + except FileExistsError as exc: + if self.timeout is None: + raise SConsLockFailure( + f"Could not acquire lock on {self.file!r}" + ) from exc + if (time.perf_counter() - start_time) > self.timeout: + raise SConsLockFailure( + f"Timeout waiting for lock on {self.file!r}." + ) from exc + time.sleep(self.delay) + else: + if not self.writer: + # reader: waits to get lock, but doesn't hold it + self.release_lock() + break + + def release_lock(self) -> None: + """Release the lock by deleting the lockfile.""" + if self.lock: + os.close(self.lock) + os.unlink(self.lockfile) + self.lock = None + + def __enter__(self) -> "FileLock": + """Context manager entry: acquire lock if not holding.""" + if not self.lock: + self.acquire_lock() + return self + + def __exit__(self, exc_type, exc_value, exc_tb) -> None: + """Context manager exit: release lock if holding.""" + if self.lock: + self.release_lock() + + def __repr__(self) -> str: + """Nicer display if someone repr's the lock class.""" + return ( + f"FileLock(" + f"file={self.file!r}, " + f"timeout={self.timeout!r}, " + f"delay={self.delay!r}, " + f"writer={self.writer!r})" + ) -- cgit v0.12 From 015222353680b82cf621cd485bf146b7a4c42361 Mon Sep 17 00:00:00 2001 From: Mats Wichmann Date: Mon, 4 Sep 2023 12:57:00 -0600 Subject: Adjust a test that tripped over locking Signed-off-by: Mats Wichmann --- test/ParseDepends.py | 34 ++++++++++++++++++---------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/test/ParseDepends.py b/test/ParseDepends.py index 2de2105..b699017 100644 --- a/test/ParseDepends.py +++ b/test/ParseDepends.py @@ -1,6 +1,8 @@ #!/usr/bin/env python # -# __COPYRIGHT__ +# SPDX-License-Identifier: MIT +# +# Copyright The SCons Foundation # # Permission is hereby granted, free of charge, to any person obtaining # a copy of this software and associated documentation files (the @@ -20,9 +22,6 @@ # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -# - -__revision__ = "__FILE__ __REVISION__ __DATE__ __DEVELOPER__" import os.path @@ -40,18 +39,19 @@ with open(sys.argv[1], 'wb') as f, open(sys.argv[2], 'rb') as afp2, open(sys.arg f.write(afp2.read() + afp3.read()) """) -test.write('SConstruct', """ -Foo = Builder(action = r'%(_python_)s build.py $TARGET $SOURCES subdir/foo.dep') -Bar = Builder(action = r'%(_python_)s build.py $TARGET $SOURCES subdir/bar.dep') -env = Environment(BUILDERS = { 'Foo' : Foo, 'Bar' : Bar }, SUBDIR='subdir') +test.write('SConstruct', """\ +Foo = Builder(action=r'%(_python_)s build.py $TARGET $SOURCES subdir/foo.dep') +Bar = Builder(action=r'%(_python_)s build.py $TARGET $SOURCES subdir/bar.dep') +DefaultEnvironment(tools=[]) +env = Environment(tools=[], BUILDERS={'Foo': Foo, 'Bar': Bar}, SUBDIR='subdir') env.ParseDepends('foo.d') env.ParseDepends('bar.d') -env.Foo(target = 'f1.out', source = 'f1.in') -env.Foo(target = 'f2.out', source = 'f2.in') -env.Bar(target = 'subdir/f3.out', source = 'f3.in') +env.Foo(target='f1.out', source='f1.in') +env.Foo(target='f2.out', source='f2.in') +env.Bar(target='subdir/f3.out', source='f3.in') SConscript('subdir/SConscript', "env") -env.Foo(target = 'f5.out', source = 'f5.in') -env.Bar(target = 'sub2/f6.out', source = 'f6.in') +env.Foo(target='f5.out', source='f5.in') +env.Bar(target='sub2/f6.out', source='f6.in') """ % locals()) test.write('foo.d', "f1.out f2.out: %s\n" % os.path.join('subdir', 'foo.dep')) @@ -137,14 +137,16 @@ test.must_match(['sub2', 'f6.out'], "f6.in 3\nsubdir/bar.dep 3\n") -test.write('SConstruct', """ +test.write('SConstruct', """\ +DefaultEnvironment(tools=[]) ParseDepends('nonexistent_file') """) test.run() -test.write('SConstruct', """ -ParseDepends('nonexistent_file', must_exist=1) +test.write('SConstruct', """\ +DefaultEnvironment(tools=[]) +ParseDepends('nonexistent_file', must_exist=True) """) test.run(status=2, stderr=None) -- cgit v0.12 From dd8cbdbe2738a56e5dff34162057c4e58a36674f Mon Sep 17 00:00:00 2001 From: Mats Wichmann Date: Tue, 5 Sep 2023 16:04:47 -0600 Subject: filellock: also recognize PermissionError Saw an instance where acquiring the lock didn't spin because the lockfile access gave PermissionError instead of FileExistsError. Rather than trying to sort that out, just accept both. Signed-off-by: Mats Wichmann --- SCons/Util/filelock.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/SCons/Util/filelock.py b/SCons/Util/filelock.py index 7e915af..4b825a6 100644 --- a/SCons/Util/filelock.py +++ b/SCons/Util/filelock.py @@ -58,7 +58,7 @@ class FileLock: or some arbitrary number? Arguments: - file: name of file to lock. + file: name of file to lock. Only used to build the lockfile name. timeout: optional time (sec) to give up trying. If ``None``, quit now if we failed to get the lock (non-blocking). If 0, block forever (well, a long time). @@ -104,7 +104,7 @@ class FileLock: while True: try: self.lock = os.open(self.lockfile, os.O_CREAT|os.O_EXCL|os.O_RDWR) - except FileExistsError as exc: + except (FileExistsError, PermissionError) as exc: if self.timeout is None: raise SConsLockFailure( f"Could not acquire lock on {self.file!r}" -- cgit v0.12