summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSteven Knight <knight@baldmt.com>2005-09-25 18:12:01 (GMT)
committerSteven Knight <knight@baldmt.com>2005-09-25 18:12:01 (GMT)
commita267d937e2def923035642e43ce8a851ddd48cc6 (patch)
tree8f3f2d46b0ac09fb08f7dc9d9e9729a92fa2e7d0
parent1c240103ecdd81e43d2bd645128bc812e6e8ab9c (diff)
downloadSCons-a267d937e2def923035642e43ce8a851ddd48cc6.zip
SCons-a267d937e2def923035642e43ce8a851ddd48cc6.tar.gz
SCons-a267d937e2def923035642e43ce8a851ddd48cc6.tar.bz2
Optimize is_List et al. Add a script harness and scripts for benchmarking Python code snippets.
-rw-r--r--README7
-rw-r--r--bench/README.txt40
-rw-r--r--bench/bench.py123
-rw-r--r--bench/is_types.py331
-rw-r--r--bench/lvars-gvars.py74
-rw-r--r--bin/bench.py129
-rw-r--r--src/engine/SCons/Util.py65
7 files changed, 629 insertions, 140 deletions
diff --git a/README b/README
index f34c9c2..e178bc2 100644
--- a/README
+++ b/README
@@ -316,6 +316,13 @@ CONTENTS OF THIS PACKAGE
Not guaranteed to be up-to-date (but better than nothing):
+bench/
+ A subdirectory for benchmarking scripts, used to perform timing
+ tests to decide what specific idioms are most efficient for
+ various parts of the code base. We check these in so they're
+ available in case we have to revisit any of these decisions in
+ the future.
+
bin/
Miscellaneous utilities used in SCons development. Right now,
some of the stuff here includes:
diff --git a/bench/README.txt b/bench/README.txt
new file mode 100644
index 0000000..0393107
--- /dev/null
+++ b/bench/README.txt
@@ -0,0 +1,40 @@
+# __COPYRIGHT__
+
+This subdirectory contains a harness and various timing tests that we've
+used to decide on the most efficient implementation of various pieces
+of the code base. We're checking these in here so that they're always
+available in case we have to revisit these decisions.
+
+NOTE: This harness is for horse-racing specific snippets of Python
+code to select the best implementation to use within a given function
+or subsystem. It's not intended for end-to-end testing of SCons itself.
+
+Contents of the directory:
+
+ README.txt
+
+ What you're reading right now.
+
+ bench.py
+
+ The harness for running the timing tests that make up
+ the rest of the directory's contents. Use it to run
+ one of the timing tests as follows:
+
+ python bench.py FILE
+
+ Various command-line options control the number of runs, the
+ number of iterations on each run, etc. Help for the command-line
+ options is available:
+
+ python bench.py -h
+
+ is_types.py
+ lvars-gvars.py
+ [etc.]
+
+ The rest of the files in this directory should each contain a
+ specific timing test, consisting of various functions to be run
+ against each other, and test data to be passed to the functions.
+
+ Yes, this list of files will get out of date.
diff --git a/bench/bench.py b/bench/bench.py
new file mode 100644
index 0000000..3c5dd50
--- /dev/null
+++ b/bench/bench.py
@@ -0,0 +1,123 @@
+#!/usr/bin/env python
+#
+# __COPYRIGHT__
+#
+# A script for timing snippets of Python code.
+#
+# By default, this script will execute a single Python file specified on
+# the command line and time any functions in a list named "FunctionList"
+# set by the Python file under test, or (by default) time any functions
+# in the file whose names begin with "Func".
+#
+# All functions are assumed to get passed the same arguments, and the
+# inputs are specified in a list named "Data," each element of which
+# is a list consisting of a tag name, a list of positional arguments,
+# and a dictionary of keyword arguments.
+#
+# Each function is expected to test a single, comparable snippet of
+# of Python code. IMPORTANT: We want to test the timing of the code
+# itself, not Python function call overhead, so every function should
+# put its code under test within the following block:
+#
+# for i in IterationList:
+#
+# This will allow (as much as possible) us to time just the code itself,
+# not Python function call overhead.
+
+import getopt
+import sys
+import time
+import types
+
+Usage = """\
+Usage: bench.py OPTIONS file.py
+ --clock Use the time.clock function
+ --func PREFIX Test functions whose names begin with PREFIX
+ -h, --help Display this help and exit
+ -i ITER, --iterations ITER Run each code snippet ITER times
+ --time Use the time.time function
+ -r RUNS, --runs RUNS Average times for RUNS invocations of
+"""
+
+# How many times each snippet of code will be (or should be) run by the
+# functions under test to gather the time (the "inner loop").
+
+Iterations = 1000
+
+# How many times we'll run each function to collect its aggregate time
+# and try to average out timing differences induced by system performance
+# (the "outer loop").
+
+Runs = 10
+
+# The prefix of the functions under test. This will be used if
+# there's no explicit list defined in FunctionList.
+
+FunctionPrefix = 'Func'
+
+# The function used to get the current time. The default of time.time is
+# good on most UNIX systems, but time.clock (selectable via the --clock
+# option) is better on Windows and some other UNIX systems.
+
+Now = time.time
+
+
+opts, args = getopt.getopt(sys.argv[1:], 'hi:r:',
+ ['clock', 'func=', 'help',
+ 'iterations=', 'time', 'runs='])
+
+for o, a in opts:
+ if o in ['--clock']:
+ Now = time.clock
+ elif o in ['--func']:
+ FunctionPrefix = a
+ elif o in ['-h', '--help']:
+ sys.stdout.write(Usage)
+ sys.exit(0)
+ elif o in ['-i', '--iterations']:
+ Iterations = int(a)
+ elif o in ['--time']:
+ Now = time.time
+ elif o in ['-r', '--runs']:
+ Runs = int(a)
+
+if len(args) != 1:
+ sys.stderr.write("bench.py: only one file argument must be specified\n")
+ sys.stderr.write(Usage)
+ sys.exit(1)
+
+
+execfile(args[0])
+
+
+try:
+ FunctionList
+except NameError:
+ function_names = filter(lambda x: x[:4] == FunctionPrefix, locals().keys())
+ function_names.sort()
+ l = map(lambda f, l=locals(): l[f], function_names)
+ FunctionList = filter(lambda f: type(f) == types.FunctionType, l)
+
+IterationList = [None] * Iterations
+
+def timer(func, *args, **kw):
+ results = []
+ for i in range(Runs):
+ start = Now()
+ apply(func, args, kw)
+ finish = Now()
+ results.append((finish - start) / Iterations)
+ return results
+
+def display(label, results):
+ total = reduce(lambda x, y: x+y, results, 0.0)
+ print " %8.3f" % ((total * 1e6) / len(results)), ':', label
+
+for func in FunctionList:
+ if func.__doc__: d = ' (' + func.__doc__ + ')'
+ else: d = ''
+ print func.__name__ + d + ':'
+
+ for label, args, kw in Data:
+ r = apply(timer, (func,)+args, kw)
+ display(label, r)
diff --git a/bench/is_types.py b/bench/is_types.py
new file mode 100644
index 0000000..1f4805b
--- /dev/null
+++ b/bench/is_types.py
@@ -0,0 +1,331 @@
+# __COPYRIGHT__
+#
+# Benchmarks for testing various possible implementations
+# of the is_Dict(), is_List() and is_String() functions in
+# src/engine/SCons/Util.py.
+
+import types
+from UserDict import UserDict
+from UserList import UserList
+
+try:
+ from UserString import UserString
+except ImportError:
+ # "Borrowed" from the Python 2.2 UserString module
+ # and modified slightly for use with SCons.
+ class UserString:
+ def __init__(self, seq):
+ if type(seq) == type(''):
+ self.data = seq
+ elif isinstance(seq, UserString):
+ self.data = seq.data[:]
+ else:
+ self.data = str(seq)
+ def __str__(self): return str(self.data)
+ def __repr__(self): return repr(self.data)
+ def __int__(self): return int(self.data)
+ def __long__(self): return long(self.data)
+ def __float__(self): return float(self.data)
+ def __complex__(self): return complex(self.data)
+ def __hash__(self): return hash(self.data)
+
+ def __cmp__(self, string):
+ if isinstance(string, UserString):
+ return cmp(self.data, string.data)
+ else:
+ return cmp(self.data, string)
+ def __contains__(self, char):
+ return char in self.data
+
+ def __len__(self): return len(self.data)
+ def __getitem__(self, index): return self.__class__(self.data[index])
+ def __getslice__(self, start, end):
+ start = max(start, 0); end = max(end, 0)
+ return self.__class__(self.data[start:end])
+
+ def __add__(self, other):
+ if isinstance(other, UserString):
+ return self.__class__(self.data + other.data)
+ elif is_String(other):
+ return self.__class__(self.data + other)
+ else:
+ return self.__class__(self.data + str(other))
+ def __radd__(self, other):
+ if is_String(other):
+ return self.__class__(other + self.data)
+ else:
+ return self.__class__(str(other) + self.data)
+ def __mul__(self, n):
+ return self.__class__(self.data*n)
+ __rmul__ = __mul__
+
+InstanceType = types.InstanceType
+DictType = types.DictType
+ListType = types.ListType
+StringType = types.StringType
+if hasattr(types, 'UnicodeType'):
+ UnicodeType = types.UnicodeType
+
+
+# The original implementations, pretty straightforward checks for the
+# type of the object and whether it's an instance of the corresponding
+# User* type.
+
+def original_is_Dict(e):
+ return type(e) is types.DictType or isinstance(e, UserDict)
+
+def original_is_List(e):
+ return type(e) is types.ListType or isinstance(e, UserList)
+
+if hasattr(types, 'UnicodeType'):
+ def original_is_String(e):
+ return type(e) is types.StringType \
+ or type(e) is types.UnicodeType \
+ or isinstance(e, UserString)
+else:
+ def original_is_String(e):
+ return type(e) is types.StringType or isinstance(e, UserString)
+
+
+
+# New candidates that explicitly check for whether the object is an
+# InstanceType before calling isinstance() on the corresponding User*
+# type.
+
+def checkInstanceType_is_Dict(e):
+ return type(e) is types.DictType or \
+ (type(e) is types.InstanceType and isinstance(e, UserDict))
+
+def checkInstanceType_is_List(e):
+ return type(e) is types.ListType \
+ or (type(e) is types.InstanceType and isinstance(e, UserList))
+
+if hasattr(types, 'UnicodeType'):
+ def checkInstanceType_is_String(e):
+ return type(e) is types.StringType \
+ or type(e) is types.UnicodeType \
+ or (type(e) is types.InstanceType and isinstance(e, UserString))
+else:
+ def checkInstanceType_is_String(e):
+ return type(e) is types.StringType \
+ or (type(e) is types.InstanceType and isinstance(e, UserString))
+
+
+
+# Improved candidates that cache the type(e) result in a variable
+# before doing any checks.
+
+def cache_type_e_is_Dict(e):
+ t = type(e)
+ return t is types.DictType or \
+ (t is types.InstanceType and isinstance(e, UserDict))
+
+def cache_type_e_is_List(e):
+ t = type(e)
+ return t is types.ListType \
+ or (t is types.InstanceType and isinstance(e, UserList))
+
+if hasattr(types, 'UnicodeType'):
+ def cache_type_e_is_String(e):
+ t = type(e)
+ return t is types.StringType \
+ or t is types.UnicodeType \
+ or (t is types.InstanceType and isinstance(e, UserString))
+else:
+ def cache_type_e_is_String(e):
+ t = type(e)
+ return t is types.StringType \
+ or (t is types.InstanceType and isinstance(e, UserString))
+
+
+
+# Improved candidates that cache the type(e) result in a variable
+# before doing any checks, but using the global names for
+# DictType, ListType and StringType.
+
+def global_cache_type_e_is_Dict(e):
+ t = type(e)
+ return t is DictType or \
+ (t is InstanceType and isinstance(e, UserDict))
+
+def global_cache_type_e_is_List(e):
+ t = type(e)
+ return t is ListType \
+ or (t is InstanceType and isinstance(e, UserList))
+
+if hasattr(types, 'UnicodeType'):
+ def global_cache_type_e_is_String(e):
+ t = type(e)
+ return t is StringType \
+ or t is UnicodeType \
+ or (t is InstanceType and isinstance(e, UserString))
+else:
+ def global_cache_type_e_is_String(e):
+ t = type(e)
+ return t is StringType \
+ or (t is InstanceType and isinstance(e, UserString))
+
+
+
+# Alternative that uses a myType() function to map the User* objects
+# to their corresponding underlying types.
+
+instanceTypeMap = {
+ UserDict : types.DictType,
+ UserList : types.ListType,
+ UserString : types.StringType,
+}
+
+if hasattr(types, 'UnicodeType'):
+ def myType(obj):
+ t = type(obj)
+ if t is types.InstanceType:
+ t = instanceTypeMap.get(obj.__class__, t)
+ elif t is types.UnicodeType:
+ t = types.StringType
+ return t
+else:
+ def myType(obj):
+ t = type(obj)
+ if t is types.InstanceType:
+ t = instanceTypeMap.get(obj.__class__, t)
+ return t
+
+def myType_is_Dict(e):
+ return myType(e) is types.DictType
+
+def myType_is_List(e):
+ return myType(e) is types.ListType
+
+def myType_is_String(e):
+ return myType(e) is types.StringType
+
+
+
+
+def Func01(obj):
+ """original_is_String"""
+ for i in IterationList:
+ original_is_String(obj)
+
+def Func02(obj):
+ """original_is_List"""
+ for i in IterationList:
+ original_is_List(obj)
+
+def Func03(obj):
+ """original_is_Dict"""
+ for i in IterationList:
+ original_is_Dict(obj)
+
+def Func04(obj):
+ """checkInstanceType_is_String"""
+ for i in IterationList:
+ checkInstanceType_is_String(obj)
+
+def Func05(obj):
+ """checkInstanceType_is_List"""
+ for i in IterationList:
+ checkInstanceType_is_List(obj)
+
+def Func06(obj):
+ """checkInstanceType_is_Dict"""
+ for i in IterationList:
+ checkInstanceType_is_Dict(obj)
+
+def Func07(obj):
+ """cache_type_e_is_String"""
+ for i in IterationList:
+ cache_type_e_is_String(obj)
+
+def Func08(obj):
+ """cache_type_e_is_List"""
+ for i in IterationList:
+ cache_type_e_is_List(obj)
+
+def Func09(obj):
+ """cache_type_e_is_Dict"""
+ for i in IterationList:
+ cache_type_e_is_Dict(obj)
+
+def Func10(obj):
+ """global_cache_type_e_is_String"""
+ for i in IterationList:
+ global_cache_type_e_is_String(obj)
+
+def Func11(obj):
+ """global_cache_type_e_is_List"""
+ for i in IterationList:
+ global_cache_type_e_is_List(obj)
+
+def Func12(obj):
+ """global_cache_type_e_is_Dict"""
+ for i in IterationList:
+ global_cache_type_e_is_Dict(obj)
+
+#def Func13(obj):
+# """myType_is_String"""
+# for i in IterationList:
+# myType_is_String(obj)
+#
+#def Func14(obj):
+# """myType_is_List"""
+# for i in IterationList:
+# myType_is_List(obj)
+#
+#def Func15(obj):
+# """myType_is_Dict"""
+# for i in IterationList:
+# myType_is_Dict(obj)
+
+
+
+# Data to pass to the functions on each run. Each entry is a
+# three-element tuple:
+#
+# (
+# "Label to print describing this data run",
+# ('positional', 'arguments'),
+# {'keyword' : 'arguments'},
+# ),
+
+class A:
+ pass
+
+Data = [
+ (
+ "String",
+ ('',),
+ {},
+ ),
+ (
+ "List",
+ ([],),
+ {},
+ ),
+ (
+ "Dict",
+ ({},),
+ {},
+ ),
+ (
+ "UserString",
+ (UserString(''),),
+ {},
+ ),
+ (
+ "UserList",
+ (UserList([]),),
+ {},
+ ),
+ (
+ "UserDict",
+ (UserDict({}),),
+ {},
+ ),
+ (
+ "Object",
+ (A(),),
+ {},
+ ),
+]
diff --git a/bench/lvars-gvars.py b/bench/lvars-gvars.py
new file mode 100644
index 0000000..efcef2a
--- /dev/null
+++ b/bench/lvars-gvars.py
@@ -0,0 +1,74 @@
+# __COPYRIGHT__
+#
+# Functions and data for timing different idioms for fetching a keyword
+# value from a pair of dictionaries for localand global values. This was
+# used to select how to most efficiently expand single $KEYWORD strings
+# in src/engine/SCons/Subst.py.
+
+def Func1(var, gvars, lvars):
+ """lvars try:-except:, gvars try:-except:"""
+ for i in IterationList:
+ try:
+ x = lvars[var]
+ except KeyError:
+ try:
+ x = gvars[var]
+ except KeyError:
+ x = ''
+
+def Func2(var, gvars, lvars):
+ """lvars has_key(), gvars try:-except:"""
+ for i in IterationList:
+ if lvars.has_key(var):
+ x = lvars[var]
+ else:
+ try:
+ x = gvars[var]
+ except KeyError:
+ x = ''
+
+def Func3(var, gvars, lvars):
+ """lvars has_key(), gvars has_key()"""
+ for i in IterationList:
+ if lvars.has_key(var):
+ x = lvars[var]
+ elif gvars.has_key(var):
+ x = gvars[var]
+ else:
+ x = ''
+
+def Func4(var, gvars, lvars):
+ """eval()"""
+ for i in IterationList:
+ try:
+ x = eval(var, gvars, lvars)
+ except NameError:
+ x = ''
+
+# Data to pass to the functions on each run. Each entry is a
+# three-element tuple:
+#
+# (
+# "Label to print describing this data run",
+# ('positional', 'arguments'),
+# {'keyword' : 'arguments'},
+# ),
+
+Data = [
+ (
+ "Neither in gvars or lvars",
+ ('x', {}, {}),
+ {},
+ ),
+ (
+ "Missing from lvars, found in gvars",
+ ('x', {'x':1}, {}),
+ {},
+ ),
+ (
+ "Found in lvars",
+ ('x', {'x':1}, {'x':2}),
+ {},
+ ),
+]
+
diff --git a/bin/bench.py b/bin/bench.py
deleted file mode 100644
index 7d9e2ba..0000000
--- a/bin/bench.py
+++ /dev/null
@@ -1,129 +0,0 @@
-#!/usr/bin/env python
-#
-# A script for timing snippets of Python code.
-
-import time
-
-Now = time.time
-#Now = time.clock # better on Windows, some UNIX/Linux systems
-
-# How many times each snippet of code will be run by the functions
-# to gather the time (the "inner loop").
-
-Iterations = 10000
-
-# How many times we'll run each function to collect its aggregate time
-# and try to average out timing differences induced by system performance
-# (the "outer loop").
-
-Runs = 10
-
-# The functions containing the code snippets to test and compare.
-# This script will test any function whose name begins with the string
-# "Func" and assumes that they all get passed the same arguments.
-# Each function should put its snippet within a block:
-#
-# for i in IterationList:
-#
-# So that (as much as possible) we're testing just the code itself,
-# not Python function call overhead.
-
-def Func1(var, gvars, lvars):
- for i in IterationList:
- try:
- x = lvars[var]
- except KeyError:
- try:
- x = gvars[var]
- except KeyError:
- x = ''
-
-def Func2(var, gvars, lvars):
- for i in IterationList:
- if lvars.has_key(var):
- x = lvars[var]
- else:
- try:
- x = gvars[var]
- except KeyError:
- x = ''
-
-def Func3(var, gvars, lvars):
- for i in IterationList:
- if lvars.has_key(var):
- x = lvars[var]
- elif gvars.has_key(var):
- x = gvars[var]
- else:
- x = ''
-
-def Func4(var, gvars, lvars):
- for i in IterationList:
- try:
- x = eval(var, gvars, lvars)
- except NameError:
- x = ''
-
-# Data to pass to the functions on each run. Each entry is a
-# three-element tuple:
-#
-# (
-# "Label to print describing this data run",
-# ('positional', 'arguments'),
-# {'keyword' : 'arguments'},
-# ),
-
-Data = [
- (
- "Neither in gvars or lvars",
- ('x', {}, {}),
- {},
- ),
- (
- "Missing from lvars, found in gvars",
- ('x', {'x':1}, {}),
- {},
- ),
- (
- "Found in lvars",
- ('x', {'x':1}, {'x':2}),
- {},
- ),
-]
-
-
-
-IterationList = [None]
-
-def timer(func, *args, **kw):
- global IterationList
- IterationList = [None] * Iterations
- results = []
- for i in range(Runs):
- start = Now()
- func(*args, **kw)
- finish = Now()
- results.append((finish - start) / Iterations)
- return results
-
-def display(label, results):
- print ' ' + label + ':'
- print ' ',
- for r in results:
- print "%.3f" % (r * 1e6),
- print
-
-def display(label, results):
- total = reduce(lambda x, y: x+y, results, 0.0)
- print " %8.3f" % ((total * 1e6) / len(results)), ':', label
-
-func_names = filter(lambda x: x.startswith('Func'), locals().keys())
-func_names.sort()
-
-for f in func_names:
- func = locals()[f]
- print f + ':'
-
- for label, args, kw in Data:
- r = apply(timer, (func,)+args, kw)
- display(label, r)
diff --git a/src/engine/SCons/Util.py b/src/engine/SCons/Util.py
index 958b8c7..b7fcec4 100644
--- a/src/engine/SCons/Util.py
+++ b/src/engine/SCons/Util.py
@@ -42,6 +42,13 @@ import types
from UserDict import UserDict
from UserList import UserList
+# Don't "from types import ..." these because we need to get at the
+# types module later to look for UnicodeType.
+DictType = types.DictType
+InstanceType = types.InstanceType
+ListType = types.ListType
+StringType = types.StringType
+
try:
from UserString import UserString
except ImportError:
@@ -164,12 +171,13 @@ def updrive(path):
# specified object has one.
#
if hasattr(types, 'UnicodeType'):
+ UnicodeType = types.UnicodeType
def to_String(s):
if isinstance(s, UserString):
t = type(s.data)
else:
t = type(s)
- if t is types.UnicodeType:
+ if t is UnicodeType:
return unicode(s)
else:
return str(s)
@@ -378,20 +386,55 @@ def print_tree(root, child_func, prune=0, showtags=0, margin=[0], visited={}):
print_tree(children[-1], child_func, prune, IDX(showtags), margin, visited)
margin.pop()
-def is_Dict(e):
- return type(e) is types.DictType or isinstance(e, UserDict)
-def is_List(e):
- return type(e) is types.ListType or isinstance(e, UserList)
+
+# Functions for deciding if things are like various types, mainly to
+# handle UserDict, UserList and UserString like their underlying types.
+#
+# Yes, all of this manual testing breaks polymorphism, and the real
+# Pythonic way to do all of this would be to just try it and handle the
+# exception, but handling the exception when it's not the right type is
+# too slow.
+#
+# The actual implementations here have been selected after timings
+# coded up in in bench/is_types.py (from the SCons source tree, see the
+# scons-src distribution). Key results from those timings:
+#
+# -- Storing the type of the object in a variable (t = type(obj))
+# slows down the case where it's a native type and the first
+# comparison will match, but nicely speeds up the case where
+# it's a different native type. Since that's going to be common,
+# it's a good tradeoff.
+#
+# -- The data show that calling isinstance() on an object that's
+# a native type (dict, list or string) is expensive enough that
+# checking up front for whether the object is of type InstanceType
+# is a pretty big win, even though it does slow down the case
+# where it really *is* an object instance a little bit.
+
+def is_Dict(obj):
+ t = type(obj)
+ return t is DictType or \
+ (t is InstanceType and isinstance(obj, UserDict))
+
+def is_List(obj):
+ t = type(obj)
+ return t is ListType \
+ or (t is InstanceType and isinstance(obj, UserList))
if hasattr(types, 'UnicodeType'):
- def is_String(e):
- return type(e) is types.StringType \
- or type(e) is types.UnicodeType \
- or isinstance(e, UserString)
+ def is_String(obj):
+ t = type(obj)
+ return t is StringType \
+ or t is UnicodeType \
+ or (t is InstanceType and isinstance(obj, UserString))
else:
- def is_String(e):
- return type(e) is types.StringType or isinstance(e, UserString)
+ def is_String(obj):
+ t = type(obj)
+ return t is StringType \
+ or (t is InstanceType and isinstance(obj, UserString))
+
+
def is_Scalar(e):
return is_String(e) or not is_List(e)