Optimize is_List et al. Add a script harness and scripts for benchmarking Python code snippets.

author: Steven Knight <knight@baldmt.com> 2005-09-25 18:12:01 (GMT)
committer: Steven Knight <knight@baldmt.com> 2005-09-25 18:12:01 (GMT)
commit: 781ba4618da6ab099b213fef0a73960459089558 (patch)
tree: 8f3f2d46b0ac09fb08f7dc9d9e9729a92fa2e7d0 /bench
parent: 83d2c54302835180d4f130d75cd69a92b2d7fd9b (diff)
download: SCons-781ba4618da6ab099b213fef0a73960459089558.zip
SCons-781ba4618da6ab099b213fef0a73960459089558.tar.gz
SCons-781ba4618da6ab099b213fef0a73960459089558.tar.bz2
4 files changed, 568 insertions, 0 deletions
diff --git a/bench/README.txt b/bench/README.txt
new file mode 100644
index 0000000..0393107
--- /dev/null
+++ b/bench/README.txt
@@ -0,0 +1,40 @@
+# __COPYRIGHT__
+
+This subdirectory contains a harness and various timing tests that we've
+used to decide on the most efficient implementation of various pieces
+of the code base.  We're checking these in here so that they're always
+available in case we have to revisit these decisions.
+
+NOTE:  This harness is for horse-racing specific snippets of Python
+code to select the best implementation to use within a given function
+or subsystem.  It's not intended for end-to-end testing of SCons itself.
+
+Contents of the directory:
+
+    README.txt
+
+        What you're reading right now.
+
+    bench.py
+
+        The harness for running the timing tests that make up
+        the rest of the directory's contents.  Use it to run
+        one of the timing tests as follows:
+
+                python bench.py FILE
+
+        Various command-line options control the number of runs, the
+        number of iterations on each run, etc.  Help for the command-line
+        options is available:
+
+                python bench.py -h
+
+    is_types.py
+    lvars-gvars.py
+    [etc.]
+
+        The rest of the files in this directory should each contain a
+        specific timing test, consisting of various functions to be run
+        against each other, and test data to be passed to the functions.
+
+        Yes, this list of files will get out of date.
diff --git a/bench/bench.py b/bench/bench.py
new file mode 100644
index 0000000..3c5dd50
--- /dev/null
+++ b/bench/bench.py
@@ -0,0 +1,123 @@
+#!/usr/bin/env python
+#
+# __COPYRIGHT__
+#
+# A script for timing snippets of Python code.
+#
+# By default, this script will execute a single Python file specified on
+# the command line and time any functions in a list named "FunctionList"
+# set by the Python file under test, or (by default) time any functions
+# in the file whose names begin with "Func".
+#
+# All functions are assumed to get passed the same arguments, and the
+# inputs are specified in a list named "Data," each element of which
+# is a list consisting of a tag name, a list of positional arguments,
+# and a dictionary of keyword arguments.
+#
+# Each function is expected to test a single, comparable snippet of
+# of Python code.  IMPORTANT:  We want to test the timing of the code
+# itself, not Python function call overhead, so every function should
+# put its code under test within the following block:
+#
+#       for i in IterationList:
+#
+# This will allow (as much as possible) us to time just the code itself,
+# not Python function call overhead.
+
+import getopt
+import sys
+import time
+import types
+
+Usage = """\
+Usage:  bench.py OPTIONS file.py
+  --clock                       Use the time.clock function
+  --func PREFIX                 Test functions whose names begin with PREFIX
+  -h, --help                    Display this help and exit
+  -i ITER, --iterations ITER    Run each code snippet ITER times
+  --time                        Use the time.time function
+  -r RUNS, --runs RUNS          Average times for RUNS invocations of 
+"""
+
+# How many times each snippet of code will be (or should be) run by the 
+# functions under test to gather the time (the "inner loop").
+
+Iterations = 1000
+
+# How many times we'll run each function to collect its aggregate time
+# and try to average out timing differences induced by system performance
+# (the "outer loop").
+
+Runs = 10
+
+# The prefix of the functions under test.  This will be used if
+# there's no explicit list defined in FunctionList.
+
+FunctionPrefix = 'Func'
+
+# The function used to get the current time.  The default of time.time is
+# good on most UNIX systems, but time.clock (selectable via the --clock
+# option) is better on Windows and some other UNIX systems.
+
+Now = time.time
+
+
+opts, args = getopt.getopt(sys.argv[1:], 'hi:r:',
+                           ['clock', 'func=', 'help',
+                            'iterations=', 'time', 'runs='])
+
+for o, a in opts:
+    if o in ['--clock']:
+        Now = time.clock
+    elif o in ['--func']:
+        FunctionPrefix = a
+    elif o in ['-h', '--help']:
+        sys.stdout.write(Usage)
+        sys.exit(0)
+    elif o in ['-i', '--iterations']:
+        Iterations = int(a)
+    elif o in ['--time']:
+        Now = time.time
+    elif o in ['-r', '--runs']:
+        Runs = int(a)
+
+if len(args) != 1:
+    sys.stderr.write("bench.py:  only one file argument must be specified\n")
+    sys.stderr.write(Usage)
+    sys.exit(1)
+
+
+execfile(args[0])
+
+
+try:
+    FunctionList
+except NameError:
+    function_names = filter(lambda x: x[:4] == FunctionPrefix, locals().keys())
+    function_names.sort()
+    l = map(lambda f, l=locals(): l[f], function_names)
+    FunctionList = filter(lambda f: type(f) == types.FunctionType, l)
+
+IterationList = [None] * Iterations
+
+def timer(func, *args, **kw):
+    results = []
+    for i in range(Runs):
+        start = Now()
+        apply(func, args, kw)
+        finish = Now()
+        results.append((finish - start) / Iterations)
+    return results
+
+def display(label, results):
+    total = reduce(lambda x, y: x+y, results, 0.0)
+    print "    %8.3f" % ((total * 1e6) / len(results)), ':', label
+
+for func in FunctionList:
+    if func.__doc__: d = ' (' + func.__doc__ + ')'
+    else: d = ''
+    print func.__name__ + d + ':'
+
+    for label, args, kw in Data:
+        r = apply(timer, (func,)+args, kw)
+        display(label, r)
diff --git a/bench/is_types.py b/bench/is_types.py
new file mode 100644
index 0000000..1f4805b
--- /dev/null
+++ b/bench/is_types.py
@@ -0,0 +1,331 @@
+# __COPYRIGHT__
+#
+# Benchmarks for testing various possible implementations
+# of the is_Dict(), is_List() and is_String() functions in
+# src/engine/SCons/Util.py.
+
+import types
+from UserDict import UserDict
+from UserList import UserList
+
+try:
+    from UserString import UserString
+except ImportError:
+    # "Borrowed" from the Python 2.2 UserString module
+    # and modified slightly for use with SCons.
+    class UserString:
+        def __init__(self, seq):
+            if type(seq) == type(''):
+                self.data = seq
+            elif isinstance(seq, UserString):
+                self.data = seq.data[:]
+            else:
+                self.data = str(seq)
+        def __str__(self): return str(self.data)
+        def __repr__(self): return repr(self.data)
+        def __int__(self): return int(self.data)
+        def __long__(self): return long(self.data)
+        def __float__(self): return float(self.data)
+        def __complex__(self): return complex(self.data)
+        def __hash__(self): return hash(self.data)
+
+        def __cmp__(self, string):
+            if isinstance(string, UserString):
+                return cmp(self.data, string.data)
+            else:
+                return cmp(self.data, string)
+        def __contains__(self, char):
+            return char in self.data
+
+        def __len__(self): return len(self.data)
+        def __getitem__(self, index): return self.__class__(self.data[index])
+        def __getslice__(self, start, end):
+            start = max(start, 0); end = max(end, 0)
+            return self.__class__(self.data[start:end])
+
+        def __add__(self, other):
+            if isinstance(other, UserString):
+                return self.__class__(self.data + other.data)
+            elif is_String(other):
+                return self.__class__(self.data + other)
+            else:
+                return self.__class__(self.data + str(other))
+        def __radd__(self, other):
+            if is_String(other):
+                return self.__class__(other + self.data)
+            else:
+                return self.__class__(str(other) + self.data)
+        def __mul__(self, n):
+            return self.__class__(self.data*n)
+        __rmul__ = __mul__
+
+InstanceType = types.InstanceType
+DictType = types.DictType
+ListType = types.ListType
+StringType = types.StringType
+if hasattr(types, 'UnicodeType'):
+    UnicodeType = types.UnicodeType
+
+
+# The original implementations, pretty straightforward checks for the
+# type of the object and whether it's an instance of the corresponding
+# User* type.
+
+def original_is_Dict(e):
+    return type(e) is types.DictType or isinstance(e, UserDict)
+
+def original_is_List(e):
+    return type(e) is types.ListType or isinstance(e, UserList)
+
+if hasattr(types, 'UnicodeType'):
+    def original_is_String(e):
+        return type(e) is types.StringType \
+            or type(e) is types.UnicodeType \
+            or isinstance(e, UserString)
+else:
+    def original_is_String(e):
+        return type(e) is types.StringType or isinstance(e, UserString)
+
+
+
+# New candidates that explicitly check for whether the object is an
+# InstanceType before calling isinstance() on the corresponding User*
+# type.
+
+def checkInstanceType_is_Dict(e):
+    return type(e) is types.DictType or \
+           (type(e) is types.InstanceType and isinstance(e, UserDict))
+
+def checkInstanceType_is_List(e):
+    return type(e) is types.ListType \
+        or (type(e) is types.InstanceType and isinstance(e, UserList))
+
+if hasattr(types, 'UnicodeType'):
+    def checkInstanceType_is_String(e):
+        return type(e) is types.StringType \
+            or type(e) is types.UnicodeType \
+            or (type(e) is types.InstanceType and isinstance(e, UserString))
+else:
+    def checkInstanceType_is_String(e):
+        return type(e) is types.StringType \
+            or (type(e) is types.InstanceType and isinstance(e, UserString))
+
+
+
+# Improved candidates that cache the type(e) result in a variable
+# before doing any checks.
+
+def cache_type_e_is_Dict(e):
+    t = type(e)
+    return t is types.DictType or \
+           (t is types.InstanceType and isinstance(e, UserDict))
+
+def cache_type_e_is_List(e):
+    t = type(e)
+    return t is types.ListType \
+        or (t is types.InstanceType and isinstance(e, UserList))
+
+if hasattr(types, 'UnicodeType'):
+    def cache_type_e_is_String(e):
+        t = type(e)
+        return t is types.StringType \
+            or t is types.UnicodeType \
+            or (t is types.InstanceType and isinstance(e, UserString))
+else:
+    def cache_type_e_is_String(e):
+        t = type(e)
+        return t is types.StringType \
+            or (t is types.InstanceType and isinstance(e, UserString))
+
+
+
+# Improved candidates that cache the type(e) result in a variable
+# before doing any checks, but using the global names for
+# DictType, ListType and StringType.
+
+def global_cache_type_e_is_Dict(e):
+    t = type(e)
+    return t is DictType or \
+           (t is InstanceType and isinstance(e, UserDict))
+
+def global_cache_type_e_is_List(e):
+    t = type(e)
+    return t is ListType \
+        or (t is InstanceType and isinstance(e, UserList))
+
+if hasattr(types, 'UnicodeType'):
+    def global_cache_type_e_is_String(e):
+        t = type(e)
+        return t is StringType \
+            or t is UnicodeType \
+            or (t is InstanceType and isinstance(e, UserString))
+else:
+    def global_cache_type_e_is_String(e):
+        t = type(e)
+        return t is StringType \
+            or (t is InstanceType and isinstance(e, UserString))
+
+
+
+# Alternative that uses a myType() function to map the User* objects
+# to their corresponding underlying types.
+
+instanceTypeMap = {
+    UserDict : types.DictType,
+    UserList : types.ListType,
+    UserString : types.StringType,
+}
+
+if hasattr(types, 'UnicodeType'):
+    def myType(obj):
+        t = type(obj)
+        if t is types.InstanceType:
+            t = instanceTypeMap.get(obj.__class__, t)
+        elif t is types.UnicodeType:
+            t = types.StringType
+        return t
+else:
+    def myType(obj):
+        t = type(obj)
+        if t is types.InstanceType:
+            t = instanceTypeMap.get(obj.__class__, t)
+        return t
+
+def myType_is_Dict(e):
+    return myType(e) is types.DictType
+
+def myType_is_List(e):
+    return myType(e) is types.ListType
+
+def myType_is_String(e):
+    return myType(e) is types.StringType
+
+
+
+
+def Func01(obj):
+    """original_is_String"""
+    for i in IterationList:
+        original_is_String(obj)
+
+def Func02(obj):
+    """original_is_List"""
+    for i in IterationList:
+        original_is_List(obj)
+
+def Func03(obj):
+    """original_is_Dict"""
+    for i in IterationList:
+        original_is_Dict(obj)
+
+def Func04(obj):
+    """checkInstanceType_is_String"""
+    for i in IterationList:
+        checkInstanceType_is_String(obj)
+
+def Func05(obj):
+    """checkInstanceType_is_List"""
+    for i in IterationList:
+        checkInstanceType_is_List(obj)
+
+def Func06(obj):
+    """checkInstanceType_is_Dict"""
+    for i in IterationList:
+        checkInstanceType_is_Dict(obj)
+
+def Func07(obj):
+    """cache_type_e_is_String"""
+    for i in IterationList:
+        cache_type_e_is_String(obj)
+
+def Func08(obj):
+    """cache_type_e_is_List"""
+    for i in IterationList:
+        cache_type_e_is_List(obj)
+
+def Func09(obj):
+    """cache_type_e_is_Dict"""
+    for i in IterationList:
+        cache_type_e_is_Dict(obj)
+
+def Func10(obj):
+    """global_cache_type_e_is_String"""
+    for i in IterationList:
+        global_cache_type_e_is_String(obj)
+
+def Func11(obj):
+    """global_cache_type_e_is_List"""
+    for i in IterationList:
+        global_cache_type_e_is_List(obj)
+
+def Func12(obj):
+    """global_cache_type_e_is_Dict"""
+    for i in IterationList:
+        global_cache_type_e_is_Dict(obj)
+
+#def Func13(obj):
+#    """myType_is_String"""
+#    for i in IterationList:
+#        myType_is_String(obj)
+#
+#def Func14(obj):
+#    """myType_is_List"""
+#    for i in IterationList:
+#        myType_is_List(obj)
+#
+#def Func15(obj):
+#    """myType_is_Dict"""
+#    for i in IterationList:
+#        myType_is_Dict(obj)
+
+
+
+# Data to pass to the functions on each run.  Each entry is a
+# three-element tuple:
+#
+#   (
+#       "Label to print describing this data run",
+#       ('positional', 'arguments'),
+#       {'keyword' : 'arguments'},
+#   ),
+
+class A:
+    pass
+
+Data = [
+    (
+        "String",
+        ('',),
+        {},
+    ),
+    (
+        "List",
+        ([],),
+        {},
+    ),
+    (
+        "Dict",
+        ({},),
+        {},
+    ),
+    (
+        "UserString",
+        (UserString(''),),
+        {},
+    ),
+    (
+        "UserList",
+        (UserList([]),),
+        {},
+    ),
+    (
+        "UserDict",
+        (UserDict({}),),
+        {},
+    ),
+    (
+        "Object",
+        (A(),),
+        {},
+    ),
+]
diff --git a/bench/lvars-gvars.py b/bench/lvars-gvars.py
new file mode 100644
index 0000000..efcef2a
--- /dev/null
+++ b/bench/lvars-gvars.py
@@ -0,0 +1,74 @@
+# __COPYRIGHT__
+#
+# Functions and data for timing different idioms for fetching a keyword
+# value from a pair of dictionaries for localand global values.  This was
+# used to select how to most efficiently expand single $KEYWORD strings
+# in src/engine/SCons/Subst.py.
+
+def Func1(var, gvars, lvars):
+    """lvars try:-except:, gvars try:-except:"""
+    for i in IterationList:
+        try:
+            x = lvars[var]
+        except KeyError:
+            try:
+                x = gvars[var]
+            except KeyError:
+                x = ''
+
+def Func2(var, gvars, lvars):
+    """lvars has_key(), gvars try:-except:"""
+    for i in IterationList:
+        if lvars.has_key(var):
+            x = lvars[var]
+        else:
+            try:
+                x = gvars[var]
+            except KeyError:
+                x = ''
+
+def Func3(var, gvars, lvars):
+    """lvars has_key(), gvars has_key()"""
+    for i in IterationList:
+        if lvars.has_key(var):
+            x = lvars[var]
+        elif gvars.has_key(var):
+            x = gvars[var]
+        else:
+            x = ''
+
+def Func4(var, gvars, lvars):
+    """eval()"""
+    for i in IterationList:
+        try:
+            x = eval(var, gvars, lvars)
+        except NameError:
+            x = ''
+
+# Data to pass to the functions on each run.  Each entry is a
+# three-element tuple:
+#
+#   (
+#       "Label to print describing this data run",
+#       ('positional', 'arguments'),
+#       {'keyword' : 'arguments'},
+#   ),
+
+Data = [
+    (
+        "Neither in gvars or lvars",
+        ('x', {}, {}),
+        {},
+    ),
+    (
+        "Missing from lvars, found in gvars",
+        ('x', {'x':1}, {}),
+        {},
+    ),
+    (
+        "Found in lvars",
+        ('x', {'x':1}, {'x':2}),
+        {},
+    ),
+]
+
author	Steven Knight <knight@baldmt.com>	2005-09-25 18:12:01 (GMT)
committer	Steven Knight <knight@baldmt.com>	2005-09-25 18:12:01 (GMT)
commit	781ba4618da6ab099b213fef0a73960459089558 (patch)
tree	8f3f2d46b0ac09fb08f7dc9d9e9729a92fa2e7d0 /bench
parent	83d2c54302835180d4f130d75cd69a92b2d7fd9b (diff)
download	SCons-781ba4618da6ab099b213fef0a73960459089558.zip SCons-781ba4618da6ab099b213fef0a73960459089558.tar.gz SCons-781ba4618da6ab099b213fef0a73960459089558.tar.bz2