From 50c6d9c6698a79ec2c0299c61b6ffdd2b27e921b Mon Sep 17 00:00:00 2001 From: Steven Knight Date: Tue, 21 Sep 2004 17:38:54 +0000 Subject: Performance optimization when caching include files for a search path. (Eric Frias) --- SConstruct | 2 +- src/CHANGES.txt | 6 + src/engine/MANIFEST.in | 1 + src/engine/SCons/Scanner/ScannerTests.py | 2 +- src/engine/SCons/Scanner/__init__.py | 48 ++++++-- src/engine/SCons/UserTuple.py | 185 +++++++++++++++++++++++++++++++ 6 files changed, 234 insertions(+), 10 deletions(-) create mode 100644 src/engine/SCons/UserTuple.py diff --git a/SConstruct b/SConstruct index 0e231dd..559f49e 100644 --- a/SConstruct +++ b/SConstruct @@ -900,7 +900,7 @@ if change: cmd = "aegis -list -terse pf 2>/dev/null" pf = map(lambda x: x[:-1], os.popen(cmd, "r").readlines()) - cmd = "aegis -list -terse cf 2>/dev/null" + cmd = "aegis -list -terse -c %s cf 2>/dev/null" % change cf = map(lambda x: x[:-1], os.popen(cmd, "r").readlines()) u = {} for f in pf + cf: diff --git a/src/CHANGES.txt b/src/CHANGES.txt index 000c943..9ba813b 100644 --- a/src/CHANGES.txt +++ b/src/CHANGES.txt @@ -19,6 +19,12 @@ RELEASE 0.97 - XXX - Handle exceptions from Python functions a build actions. + From Eric Frias: + + - Huge performance improvement: wrap the tuples representing an + include path in an object, so that the time it takes to hash the + path doesn't grow porportionally to the length of the path. + From Gottfried Ganssauge: - Fix SCons on SuSE/AMD-64 Linux by having the wrapper script also diff --git a/src/engine/MANIFEST.in b/src/engine/MANIFEST.in index 0fcac9c..c93ac21 100644 --- a/src/engine/MANIFEST.in +++ b/src/engine/MANIFEST.in @@ -126,5 +126,6 @@ SCons/Tool/tex.py SCons/Tool/tlib.py SCons/Tool/yacc.py SCons/Tool/zip.py +SCons/UserTuple.py SCons/Util.py SCons/Warnings.py diff --git a/src/engine/SCons/Scanner/ScannerTests.py b/src/engine/SCons/Scanner/ScannerTests.py index a0c6f01..e0c1f58 100644 --- a/src/engine/SCons/Scanner/ScannerTests.py +++ b/src/engine/SCons/Scanner/ScannerTests.py @@ -155,7 +155,7 @@ class BaseTestCase(unittest.TestCase): s = SCons.Scanner.Base(self.func, "Hash") dict = {} dict[s] = 777 - self.failUnless(hash(dict.keys()[0]) == hash(repr(s)), + self.failUnless(hash(dict.keys()[0]) == id(s), "did not hash Scanner base class as expected") def test_scan_check(self): diff --git a/src/engine/SCons/Scanner/__init__.py b/src/engine/SCons/Scanner/__init__.py index 8ea4223..f865efc 100644 --- a/src/engine/SCons/Scanner/__init__.py +++ b/src/engine/SCons/Scanner/__init__.py @@ -33,6 +33,7 @@ import re import SCons.Node.FS import SCons.Sig +import SCons.UserTuple import SCons.Util @@ -52,6 +53,29 @@ def Scanner(function, *args, **kw): else: return apply(Base, (function,) + args, kw) +# Important, important, important performance optimization: +# +# The paths of Nodes returned from a FindPathDirs will be used to index +# a dictionary that caches the values, so we don't have to look up the +# same path over and over and over. If FindPathDir returns just a tuple, +# though, then the time it takes to compute the hash of the tuple grows +# proportionally to the length of the tuple itself--and some people can +# have very, very long strings of include directories... +# +# The solution is to wrap the tuple in an object, a UserTuple class +# whose *id()* our caller can use to cache the appropriate value. +# This means we have to guarantee that these ids genuinely represent +# unique values, which we do by maintaining our own cache that maps the +# expensive-to-hash tuple values to the easy-to-hash UniqueUserTuple +# values that our caller uses. +# +# *Major* kudos to Eric Frias and his colleagues for finding this. +class UniqueUserTuple(SCons.UserTuple.UserTuple): + def __hash__(self): + return id(self) + +PathCache = {} + class FindPathDirs: """A class to bind a specific *PATH variable name and the fs object to a function that will return all of the *path directories.""" @@ -64,10 +88,16 @@ class FindPathDirs: except KeyError: return () - return tuple(self.fs.Rsearchall(env.subst_path(path), - must_exist = 0, - clazz = SCons.Node.FS.Dir, - cwd = dir)) + path_tuple = tuple(self.fs.Rsearchall(env.subst_path(path), + must_exist = 0, + clazz = SCons.Node.FS.Dir, + cwd = dir)) + try: + return PathCache[path_tuple] + except KeyError: + path_UserTuple = UniqueUserTuple(path_tuple) + PathCache[path_tuple] = path_UserTuple + return path_UserTuple class Base: """ @@ -188,7 +218,7 @@ class Base: return cmp(self.__dict__, other.__dict__) def __hash__(self): - return hash(repr(self)) + return id(self) def add_skey(self, skey): """Add a skey to the list of skeys""" @@ -269,7 +299,9 @@ class Classic(Current): apply(Current.__init__, (self,) + args, kw) def find_include(self, include, source_dir, path): - n = SCons.Node.FS.find_file(include, (source_dir,) + path, self.fs.File) + n = SCons.Node.FS.find_file(include, + (source_dir,) + tuple(path), + self.fs.File) return n, include def scan(self, node, env, path=()): @@ -330,10 +362,10 @@ class ClassicCPP(Classic): def find_include(self, include, source_dir, path): if include[0] == '"': n = SCons.Node.FS.find_file(include[1], - (source_dir,) + path, + (source_dir,) + tuple(path), self.fs.File) else: n = SCons.Node.FS.find_file(include[1], - path + (source_dir,), + tuple(path) + (source_dir,), self.fs.File) return n, include[1] diff --git a/src/engine/SCons/UserTuple.py b/src/engine/SCons/UserTuple.py new file mode 100644 index 0000000..8682783 --- /dev/null +++ b/src/engine/SCons/UserTuple.py @@ -0,0 +1,185 @@ +"""SCons.UserTuple + +A more or less complete user-defined wrapper around tuple objects. + +This is basically cut-and-pasted from UserList, but it wraps an immutable +tuple instead of a mutable list, primarily so that the wrapper object can +be used as the hash of a dictionary. The time it takes to compute the +hash value of a builtin tuple grows as the length of the tuple grows, but +the time it takes to compute hash value of an object can stay constant. + +""" + +# +# __COPYRIGHT__ +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY +# KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE +# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# + +__revision__ = "__FILE__ __REVISION__ __DATE__ __DEVELOPER__" + +class UserTuple: + def __init__(self, inittuple=None): + self.data = () + if inittuple is not None: + # XXX should this accept an arbitrary sequence? + if type(inittuple) == type(self.data): + self.data = inittuple[:] + elif isinstance(inittuple, UserTuple): + self.data = tuple(inittuple.data[:]) + else: + self.data = tuple(inittuple) + def __str__(self): return str(self.data) + def __lt__(self, other): return self.data < self.__cast(other) + def __le__(self, other): return self.data <= self.__cast(other) + def __eq__(self, other): return self.data == self.__cast(other) + def __ne__(self, other): return self.data != self.__cast(other) + def __gt__(self, other): return self.data > self.__cast(other) + def __ge__(self, other): return self.data >= self.__cast(other) + def __cast(self, other): + if isinstance(other, UserTuple): return other.data + else: return other + def __cmp__(self, other): + return cmp(self.data, self.__cast(other)) + def __contains__(self, item): return item in self.data + def __len__(self): return len(self.data) + def __getitem__(self, i): return self.data[i] + def __setitem__(self, i, item): + raise TypeError, "object doesn't support item assignment" + def __delitem__(self, i): + raise TypeError, "object doesn't support item deletion" + def __getslice__(self, i, j): + i = max(i, 0); j = max(j, 0) + return self.__class__(self.data[i:j]) + def __setslice__(self, i, j, other): + raise TypeError, "object doesn't support slice assignment" + def __delslice__(self, i, j): + raise TypeError, "object doesn't support slice deletion" + def __add__(self, other): + if isinstance(other, UserTuple): + return self.__class__(self.data + other.data) + elif isinstance(other, type(self.data)): + return self.__class__(self.data + other) + else: + return self.__class__(self.data + tuple(other)) + def __radd__(self, other): + if isinstance(other, UserTuple): + return self.__class__(other.data + self.data) + elif isinstance(other, type(self.data)): + return self.__class__(other + self.data) + else: + return self.__class__(tuple(other) + self.data) + def __mul__(self, n): + return self.__class__(self.data*n) + __rmul__ = __mul__ + def __iter__(self): + return iter(self.data) + def __hash__(self): + return hash(self.data) + +if (__name__ == "__main__"): + t = UserTuple((1, 2, 3)) + assert isinstance(t, UserTuple) + t2 = UserTuple(t) + assert isinstance(t2, UserTuple) + t3 = UserTuple([1, 2, 3]) + assert isinstance(t3, UserTuple) + assert t == t2 + assert t == t3 + assert str(t) == '(1, 2, 3)', str(t) + assert t < UserTuple((2, 2, 3)) + assert t <= UserTuple((2, 2, 3)) + assert t == UserTuple((1, 2, 3)) + assert t != UserTuple((3, 2, 1)) + assert t > UserTuple((0, 2, 3)) + assert t >= UserTuple((0, 2, 3)) + assert cmp(t, UserTuple((0,))) == 1 + assert cmp(t, UserTuple((1, 2, 3))) == 0 + assert cmp(t, UserTuple((2,))) == -1 + assert t < (2, 2, 3) + assert t <= (2, 2, 3) + assert t == (1, 2, 3) + assert t != (3, 2, 1) + assert t > (0, 2, 3) + assert t >= (0, 2, 3) + assert cmp(t, (0,)) == 1 + assert cmp(t, (1, 2, 3)) == 0 + assert cmp(t, (2,)) == -1 + assert 3 in t + assert len(t) == 3 + assert t[0] == 1 + assert t[1] == 2 + assert t[2] == 3 + try: + t[0] = 4 + except TypeError, e: + assert str(e) == "object doesn't support item assignment" + else: + raise "Did not catch expected TypeError" + try: + del t[0] + except TypeError, e: + assert str(e) == "object doesn't support item deletion" + else: + raise "Did not catch expected TypeError" + assert t[1:2] == (2,) + try: + t[0:2] = (4, 5) + except TypeError, e: + assert str(e) == "object doesn't support slice assignment", e + else: + raise "Did not catch expected TypeError" + try: + del t[0:2] + except TypeError, e: + assert str(e) == "object doesn't support slice deletion" + else: + raise "Did not catch expected TypeError" + assert t + UserTuple((4, 5)) == (1, 2, 3, 4, 5) + assert t + (4, 5) == (1, 2, 3, 4, 5) + assert t + [4, 5] == (1, 2, 3, 4, 5) + assert UserTuple((-1, 0)) + t == (-1, 0, 1, 2, 3) + assert (-1, 0) + t == (-1, 0, 1, 2, 3) + assert [-1, 0] + t == (-1, 0, 1, 2, 3) + assert t * 2 == (1, 2, 3, 1, 2, 3) + assert 2 * t == (1, 2, 3, 1, 2, 3) + + t1 = UserTuple((1,)) + t1a = UserTuple((1,)) + t1b = UserTuple((1,)) + t2 = UserTuple((2,)) + t3 = UserTuple((3,)) + d = {} + d[t1] = 't1' + d[t2] = 't2' + d[t3] = 't3' + assert d[t1] == 't1' + assert d[t1a] == 't1' + assert d[t1b] == 't1' + assert d[t2] == 't2' + assert d[t3] == 't3' + d[t1a] = 't1a' + assert d[t1] == 't1a' + assert d[t1a] == 't1a' + assert d[t1b] == 't1a' + d[t1b] = 't1b' + assert d[t1] == 't1b' + assert d[t1a] == 't1b' + assert d[t1b] == 't1b' -- cgit v0.12