Performance optimization when caching include files for a search path. (Eric Frias)

author: Steven Knight <knight@baldmt.com> 2004-09-21 17:38:54 (GMT)
committer: Steven Knight <knight@baldmt.com> 2004-09-21 17:38:54 (GMT)
commit: 50c6d9c6698a79ec2c0299c61b6ffdd2b27e921b (patch)
tree: b49818b9f0745e7613d12de400205d889baa3224
parent: e2995450dd47b5e106c16ee5768de7f6964a9408 (diff)
download: SCons-50c6d9c6698a79ec2c0299c61b6ffdd2b27e921b.zip
SCons-50c6d9c6698a79ec2c0299c61b6ffdd2b27e921b.tar.gz
SCons-50c6d9c6698a79ec2c0299c61b6ffdd2b27e921b.tar.bz2
6 files changed, 234 insertions, 10 deletions
diff --git a/SConstruct b/SConstruct
index 0e231dd..559f49e 100644
--- a/SConstruct
+++ b/SConstruct
@@ -900,7 +900,7 @@ if change:
 
     cmd = "aegis -list -terse pf 2>/dev/null"
     pf = map(lambda x: x[:-1], os.popen(cmd, "r").readlines())
-    cmd = "aegis -list -terse cf 2>/dev/null"
+    cmd = "aegis -list -terse -c %s cf 2>/dev/null" % change
     cf = map(lambda x: x[:-1], os.popen(cmd, "r").readlines())
     u = {}
     for f in pf + cf:
diff --git a/src/CHANGES.txt b/src/CHANGES.txt
index 000c943..9ba813b 100644
--- a/src/CHANGES.txt
+++ b/src/CHANGES.txt
@@ -19,6 +19,12 @@ RELEASE 0.97 - XXX
 
   - Handle exceptions from Python functions a build actions.
 
+  From Eric Frias:
+
+  - Huge performance improvement:  wrap the tuples representing an
+    include path in an object, so that the time it takes to hash the
+    path doesn't grow porportionally to the length of the path.
+
   From Gottfried Ganssauge:
 
   - Fix SCons on SuSE/AMD-64 Linux by having the wrapper script also
diff --git a/src/engine/MANIFEST.in b/src/engine/MANIFEST.in
index 0fcac9c..c93ac21 100644
--- a/src/engine/MANIFEST.in
+++ b/src/engine/MANIFEST.in
@@ -126,5 +126,6 @@ SCons/Tool/tex.py
 SCons/Tool/tlib.py
 SCons/Tool/yacc.py
 SCons/Tool/zip.py
+SCons/UserTuple.py
 SCons/Util.py
 SCons/Warnings.py
diff --git a/src/engine/SCons/Scanner/ScannerTests.py b/src/engine/SCons/Scanner/ScannerTests.py
index a0c6f01..e0c1f58 100644
--- a/src/engine/SCons/Scanner/ScannerTests.py
+++ b/src/engine/SCons/Scanner/ScannerTests.py
@@ -155,7 +155,7 @@ class BaseTestCase(unittest.TestCase):
         s = SCons.Scanner.Base(self.func, "Hash")
         dict = {}
         dict[s] = 777
-        self.failUnless(hash(dict.keys()[0]) == hash(repr(s)),
+        self.failUnless(hash(dict.keys()[0]) == id(s),
                         "did not hash Scanner base class as expected")
 
     def test_scan_check(self):
diff --git a/src/engine/SCons/Scanner/__init__.py b/src/engine/SCons/Scanner/__init__.py
index 8ea4223..f865efc 100644
--- a/src/engine/SCons/Scanner/__init__.py
+++ b/src/engine/SCons/Scanner/__init__.py
@@ -33,6 +33,7 @@ import re
 
 import SCons.Node.FS
 import SCons.Sig
+import SCons.UserTuple
 import SCons.Util
 
 
@@ -52,6 +53,29 @@ def Scanner(function, *args, **kw):
     else:
         return apply(Base, (function,) + args, kw)
 
+# Important, important, important performance optimization:
+#
+# The paths of Nodes returned from a FindPathDirs will be used to index
+# a dictionary that caches the values, so we don't have to look up the
+# same path over and over and over.  If FindPathDir returns just a tuple,
+# though, then the time it takes to compute the hash of the tuple grows
+# proportionally to the length of the tuple itself--and some people can
+# have very, very long strings of include directories...
+#
+# The solution is to wrap the tuple in an object, a UserTuple class
+# whose *id()* our caller can use to cache the appropriate value.
+# This means we have to guarantee that these ids genuinely represent
+# unique values, which we do by maintaining our own cache that maps the
+# expensive-to-hash tuple values to the easy-to-hash UniqueUserTuple
+# values that our caller uses.
+#
+# *Major* kudos to Eric Frias and his colleagues for finding this.
+class UniqueUserTuple(SCons.UserTuple.UserTuple):
+    def __hash__(self):
+        return id(self)
+
+PathCache = {}
+
 class FindPathDirs:
     """A class to bind a specific *PATH variable name and the fs object
     to a function that will return all of the *path directories."""
@@ -64,10 +88,16 @@ class FindPathDirs:
         except KeyError:
             return ()
 
-        return tuple(self.fs.Rsearchall(env.subst_path(path),
-                                        must_exist = 0,
-                                        clazz = SCons.Node.FS.Dir,
-                                        cwd = dir))
+        path_tuple = tuple(self.fs.Rsearchall(env.subst_path(path),
+                                              must_exist = 0,
+                                              clazz = SCons.Node.FS.Dir,
+                                              cwd = dir))
+        try:
+            return PathCache[path_tuple]
+        except KeyError:
+            path_UserTuple = UniqueUserTuple(path_tuple)
+            PathCache[path_tuple] = path_UserTuple
+            return path_UserTuple
 
 class Base:
     """
@@ -188,7 +218,7 @@ class Base:
         return cmp(self.__dict__, other.__dict__)
 
     def __hash__(self):
-        return hash(repr(self))
+        return id(self)
 
     def add_skey(self, skey):
         """Add a skey to the list of skeys"""
@@ -269,7 +299,9 @@ class Classic(Current):
         apply(Current.__init__, (self,) + args, kw)
 
     def find_include(self, include, source_dir, path):
-        n = SCons.Node.FS.find_file(include, (source_dir,) + path, self.fs.File)
+        n = SCons.Node.FS.find_file(include,
+                                    (source_dir,) + tuple(path),
+                                    self.fs.File)
         return n, include
 
     def scan(self, node, env, path=()):
@@ -330,10 +362,10 @@ class ClassicCPP(Classic):
     def find_include(self, include, source_dir, path):
         if include[0] == '"':
             n = SCons.Node.FS.find_file(include[1],
-                                        (source_dir,) + path,
+                                        (source_dir,) + tuple(path),
                                         self.fs.File)
         else:
             n = SCons.Node.FS.find_file(include[1],
-                                        path + (source_dir,),
+                                        tuple(path) + (source_dir,),
                                         self.fs.File)
         return n, include[1]
diff --git a/src/engine/SCons/UserTuple.py b/src/engine/SCons/UserTuple.py
new file mode 100644
index 0000000..8682783
--- /dev/null
+++ b/src/engine/SCons/UserTuple.py
@@ -0,0 +1,185 @@
+"""SCons.UserTuple
+
+A more or less complete user-defined wrapper around tuple objects.
+
+This is basically cut-and-pasted from UserList, but it wraps an immutable
+tuple instead of a mutable list, primarily so that the wrapper object can
+be used as the hash of a dictionary.  The time it takes to compute the
+hash value of a builtin tuple grows as the length of the tuple grows, but
+the time it takes to compute hash value of an object can stay constant.
+
+"""
+
+#
+# __COPYRIGHT__
+#
+# Permission is hereby granted, free of charge, to any person obtaining
+# a copy of this software and associated documentation files (the
+# "Software"), to deal in the Software without restriction, including
+# without limitation the rights to use, copy, modify, merge, publish,
+# distribute, sublicense, and/or sell copies of the Software, and to
+# permit persons to whom the Software is furnished to do so, subject to
+# the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
+# KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+#
+
+__revision__ = "__FILE__ __REVISION__ __DATE__ __DEVELOPER__"
+
+class UserTuple:
+    def __init__(self, inittuple=None):
+        self.data = ()
+        if inittuple is not None:
+            # XXX should this accept an arbitrary sequence?
+            if type(inittuple) == type(self.data):
+                self.data = inittuple[:]
+            elif isinstance(inittuple, UserTuple):
+                self.data = tuple(inittuple.data[:])
+            else:
+                self.data = tuple(inittuple)
+    def __str__(self): return str(self.data)
+    def __lt__(self, other): return self.data <  self.__cast(other)
+    def __le__(self, other): return self.data <= self.__cast(other)
+    def __eq__(self, other): return self.data == self.__cast(other)
+    def __ne__(self, other): return self.data != self.__cast(other)
+    def __gt__(self, other): return self.data >  self.__cast(other)
+    def __ge__(self, other): return self.data >= self.__cast(other)
+    def __cast(self, other):
+        if isinstance(other, UserTuple): return other.data
+        else: return other
+    def __cmp__(self, other):
+        return cmp(self.data, self.__cast(other))
+    def __contains__(self, item): return item in self.data
+    def __len__(self): return len(self.data)
+    def __getitem__(self, i): return self.data[i]
+    def __setitem__(self, i, item):
+        raise TypeError, "object doesn't support item assignment"
+    def __delitem__(self, i):
+        raise TypeError, "object doesn't support item deletion"
+    def __getslice__(self, i, j):
+        i = max(i, 0); j = max(j, 0)
+        return self.__class__(self.data[i:j])
+    def __setslice__(self, i, j, other):
+        raise TypeError, "object doesn't support slice assignment"
+    def __delslice__(self, i, j):
+        raise TypeError, "object doesn't support slice deletion"
+    def __add__(self, other):
+        if isinstance(other, UserTuple):
+            return self.__class__(self.data + other.data)
+        elif isinstance(other, type(self.data)):
+            return self.__class__(self.data + other)
+        else:
+            return self.__class__(self.data + tuple(other))
+    def __radd__(self, other):
+        if isinstance(other, UserTuple):
+            return self.__class__(other.data + self.data)
+        elif isinstance(other, type(self.data)):
+            return self.__class__(other + self.data)
+        else:
+            return self.__class__(tuple(other) + self.data)
+    def __mul__(self, n):
+        return self.__class__(self.data*n)
+    __rmul__ = __mul__
+    def __iter__(self):
+        return iter(self.data)
+    def __hash__(self):
+        return hash(self.data)
+
+if (__name__ == "__main__"):
+    t = UserTuple((1, 2, 3))
+    assert isinstance(t, UserTuple)
+    t2 = UserTuple(t)
+    assert isinstance(t2, UserTuple)
+    t3 = UserTuple([1, 2, 3])
+    assert isinstance(t3, UserTuple)
+    assert t == t2
+    assert t == t3
+    assert str(t) == '(1, 2, 3)', str(t)
+    assert t < UserTuple((2, 2, 3))
+    assert t <= UserTuple((2, 2, 3))
+    assert t == UserTuple((1, 2, 3))
+    assert t != UserTuple((3, 2, 1))
+    assert t > UserTuple((0, 2, 3))
+    assert t >= UserTuple((0, 2, 3))
+    assert cmp(t, UserTuple((0,))) == 1
+    assert cmp(t, UserTuple((1, 2, 3))) == 0
+    assert cmp(t, UserTuple((2,))) == -1
+    assert t < (2, 2, 3)
+    assert t <= (2, 2, 3)
+    assert t == (1, 2, 3)
+    assert t != (3, 2, 1)
+    assert t > (0, 2, 3)
+    assert t >= (0, 2, 3)
+    assert cmp(t, (0,)) == 1
+    assert cmp(t, (1, 2, 3)) == 0
+    assert cmp(t, (2,)) == -1
+    assert 3 in t
+    assert len(t) == 3
+    assert t[0] == 1
+    assert t[1] == 2
+    assert t[2] == 3
+    try:
+        t[0] = 4
+    except TypeError, e:
+        assert str(e) == "object doesn't support item assignment"
+    else:
+        raise "Did not catch expected TypeError"
+    try:
+        del t[0]
+    except TypeError, e:
+        assert str(e) == "object doesn't support item deletion"
+    else:
+        raise "Did not catch expected TypeError"
+    assert t[1:2] == (2,)
+    try:
+        t[0:2] = (4, 5)
+    except TypeError, e:
+        assert str(e) == "object doesn't support slice assignment", e
+    else:
+        raise "Did not catch expected TypeError"
+    try:
+        del t[0:2]
+    except TypeError, e:
+        assert str(e) == "object doesn't support slice deletion"
+    else:
+        raise "Did not catch expected TypeError"
+    assert t + UserTuple((4, 5)) == (1, 2, 3, 4, 5)
+    assert t + (4, 5) == (1, 2, 3, 4, 5)
+    assert t + [4, 5] == (1, 2, 3, 4, 5)
+    assert UserTuple((-1, 0)) + t == (-1, 0, 1, 2, 3)
+    assert (-1, 0) + t == (-1, 0, 1, 2, 3)
+    assert [-1, 0] + t == (-1, 0, 1, 2, 3)
+    assert t * 2 == (1, 2, 3, 1, 2, 3)
+    assert 2 * t == (1, 2, 3, 1, 2, 3)
+
+    t1 = UserTuple((1,))
+    t1a = UserTuple((1,))
+    t1b = UserTuple((1,))
+    t2 = UserTuple((2,))
+    t3 = UserTuple((3,))
+    d = {}
+    d[t1] = 't1'
+    d[t2] = 't2'
+    d[t3] = 't3'
+    assert d[t1] == 't1'
+    assert d[t1a] == 't1'
+    assert d[t1b] == 't1'
+    assert d[t2] == 't2'
+    assert d[t3] == 't3'
+    d[t1a] = 't1a'
+    assert d[t1] == 't1a'
+    assert d[t1a] == 't1a'
+    assert d[t1b] == 't1a'
+    d[t1b] = 't1b'
+    assert d[t1] == 't1b'
+    assert d[t1a] == 't1b'
+    assert d[t1b] == 't1b'
author	Steven Knight <knight@baldmt.com>	2004-09-21 17:38:54 (GMT)
committer	Steven Knight <knight@baldmt.com>	2004-09-21 17:38:54 (GMT)
commit	50c6d9c6698a79ec2c0299c61b6ffdd2b27e921b (patch)
tree	b49818b9f0745e7613d12de400205d889baa3224
parent	e2995450dd47b5e106c16ee5768de7f6964a9408 (diff)
download	SCons-50c6d9c6698a79ec2c0299c61b6ffdd2b27e921b.zip SCons-50c6d9c6698a79ec2c0299c61b6ffdd2b27e921b.tar.gz SCons-50c6d9c6698a79ec2c0299c61b6ffdd2b27e921b.tar.bz2