diff options
Diffstat (limited to 'bench')
-rw-r--r-- | bench/env.__setitem__.py | 362 | ||||
-rw-r--r-- | bench/timeit.py | 297 |
2 files changed, 659 insertions, 0 deletions
diff --git a/bench/env.__setitem__.py b/bench/env.__setitem__.py new file mode 100644 index 0000000..3826176 --- /dev/null +++ b/bench/env.__setitem__.py @@ -0,0 +1,362 @@ +# __COPYRIGHT__ +# +# Benchmarks for testing various possible implementations of the +# env.__setitem__() method(s) in the src/engine/SCons/Environment.py +# module. + +import os.path +import re +import string +import sys +import timeit + +# Utility Timing class and function from: +# ASPN: Python Cookbook : Timing various python statements +# http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/544297 +# +# These wrap the basic timeit function to make it a little more +# convenient to do side-by-side tests of code. + +class Timing: + def __init__(self, name, num, init, statement): + self.__timer = timeit.Timer(statement, init) + self.__num = num + self.name = name + self.statement = statement + self.__result = None + + def timeit(self): + self.__result = self.__timer.timeit(self.__num) + + def getResult(self): + return self.__result + +def times(num=1000000, init='', title='Results:', **statements): + # time each statement + timings = [] + for n, s in statements.items(): + t = Timing(n, num, init, s) + t.timeit() + timings.append(t) + + print + print title + l = [] + for i in timings: l.append((i.getResult(),i.name)) + l.sort() + for i in l: print " %9.3f s %s" % i + +# Import the necessary local SCons.* modules used by some of our +# alternative implementations below, first manipulating sys.path so +# we pull in the right local modules without forcing the user to set +# PYTHONPATH. + +import __main__ +try: + filename = __main__.__file__ +except AttributeError: + filename = sys.argv[0] +script_dir = os.path.split(filename)[0] +if script_dir: + script_dir = script_dir + '/' +sys.path = [os.path.abspath(script_dir + '../src/engine')] + sys.path + +import SCons.Errors +import SCons.Environment + +is_valid_construction_var = SCons.Environment.is_valid_construction_var +global_valid_var = re.compile(r'[_a-zA-Z]\w*$') + +# The classes with different __setitem__() implementations that we're +# going to horse-race. +# +# The base class (Environment) should contain *all* class initialization +# of anything that will be used by any of the competing sub-class +# implementations. Each timing run will create an instance of the class, +# and all competing sub-classes should share the same initialization +# overhead so our timing focuses on just the __setitem__() performance. +# +# All subclasses should be prefixed with env_, in which case they'll be +# picked up automatically by the code below for testing. +# +# The env_Original subclass contains the original implementation (which +# actually had the is_valid_construction_var() function in SCons.Util +# originally). +# +# The other subclasses (except for env_Best) each contain *one* +# significant change from the env_Original implementation. The doc string +# describes the change, and is what gets displayed in the final timing. +# The doc strings of these other subclasses are "grouped" informally +# by a prefix that kind of indicates what specific aspect of __setitem__() +# is being varied and tested. +# +# The env_Best subclass contains the "best practices" from each of +# the different "groups" of techniques tested in the other subclasses, +# and is where to experiment with different combinations of techniques. +# After we're done should be the one that shows up at the top of the +# list as we run our timings. + +class Environment: + _special_set = { + 'BUILDERS' : None, + 'SCANNERS' : None, + 'TARGET' : None, + 'TARGETS' : None, + 'SOURCE' : None, + 'SOURCES' : None, + } + _special_set_keys = _special_set.keys() + _valid_var = re.compile(r'[_a-zA-Z]\w*$') + def __init__(self, **kw): + self._dict = kw + +class env_Original(Environment): + """Original __setitem__()""" + def __setitem__(self, key, value): + special = self._special_set.get(key) + if special: + special(self, key, value) + else: + if not SCons.Environment.is_valid_construction_var(key): + raise SCons.Errors.UserError, "Illegal construction variable `%s'" % key + self._dict[key] = value + +class env_Global_is_valid(Environment): + """is_valid_construction_var(): use a global function""" + def __setitem__(self, key, value): + special = self._special_set.get(key) + if special: + special(self, key, value) + else: + if not is_valid_construction_var(key): + raise SCons.Errors.UserError, "Illegal construction variable `%s'" % key + self._dict[key] = value + +class env_Method_is_valid(Environment): + """is_valid_construction_var(): use a method""" + def is_valid_construction_var(self, varstr): + """Return if the specified string is a legitimate construction + variable. + """ + return self._valid_var.match(varstr) + + def __setitem__(self, key, value): + special = self._special_set.get(key) + if special: + special(self, key, value) + else: + if not self.is_valid_construction_var(key): + raise SCons.Errors.UserError, "Illegal construction variable `%s'" % key + self._dict[key] = value + +class env_regex_attribute_is_valid(Environment): + """is_valid_construction_var(): use a regex attribute""" + def __setitem__(self, key, value): + special = self._special_set.get(key) + if special: + special(self, key, value) + else: + if not self._valid_var.match(key): + raise SCons.Errors.UserError, "Illegal construction variable `%s'" % key + self._dict[key] = value + +class env_global_regex_is_valid(Environment): + """is_valid_construction_var(): use a global regex""" + def __setitem__(self, key, value): + special = self._special_set.get(key) + if special: + special(self, key, value) + else: + if not global_valid_var.match(key): + raise SCons.Errors.UserError, "Illegal construction variable `%s'" % key + self._dict[key] = value + +class env_special_set_has_key(Environment): + """_special_set.get(): use _special_set.has_key() instead""" + def __setitem__(self, key, value): + if self._special_set.has_key(key): + self._special_set[key](self, key, value) + else: + if not SCons.Environment.is_valid_construction_var(key): + raise SCons.Errors.UserError, "Illegal construction variable `%s'" % key + self._dict[key] = value + +class env_key_in_tuple(Environment): + """_special_set.get(): use "key in tuple" instead""" + def __setitem__(self, key, value): + if key in ('BUILDERS', 'SCANNERS', 'TARGET', 'TARGETS', 'SOURCE', 'SOURCES'): + self._special_set[key](self, key, value) + else: + if not SCons.Environment.is_valid_construction_var(key): + raise SCons.Errors.UserError, "Illegal construction variable `%s'" % key + self._dict[key] = value + +class env_key_in_list(Environment): + """_special_set.get(): use "key in list" instead""" + def __setitem__(self, key, value): + if key in ['BUILDERS', 'SCANNERS', 'TARGET', 'TARGETS', 'SOURCE', 'SOURCES']: + self._special_set[key](self, key, value) + else: + if not SCons.Environment.is_valid_construction_var(key): + raise SCons.Errors.UserError, "Illegal construction variable `%s'" % key + self._dict[key] = value + +class env_key_in_attribute(Environment): + """_special_set.get(): use "key in attribute" instead""" + def __setitem__(self, key, value): + if key in self._special_set_keys: + self._special_set[key](self, key, value) + else: + if not SCons.Environment.is_valid_construction_var(key): + raise SCons.Errors.UserError, "Illegal construction variable `%s'" % key + self._dict[key] = value + +class env_try_except(Environment): + """avoid is_valid_construction_var(): use try:-except:""" + def __setitem__(self, key, value): + special = self._special_set.get(key) + if special: + special(self, key, value) + else: + try: + self._dict[key] + except KeyError: + if not SCons.Environment.is_valid_construction_var(key): + raise SCons.Errors.UserError, "Illegal construction variable `%s'" % key + self._dict[key] = value + +class env_not_has_key(Environment): + """avoid is_valid_construction_var(): use not .has_key()""" + def __setitem__(self, key, value): + special = self._special_set.get(key) + if special: + special(self, key, value) + else: + if not self._dict.has_key(key) \ + and not SCons.Environment.is_valid_construction_var(key): + raise SCons.Errors.UserError, "Illegal construction variable `%s'" % key + self._dict[key] = value + +class env_Best_attribute(Environment): + """Best __setitem__(), with an attribute""" + def __setitem__(self, key, value): + if key in self._special_set_keys: + self._special_set[key](self, key, value) + else: + if not self._dict.has_key(key) \ + and not global_valid_var.match(key): + raise SCons.Errors.UserError, "Illegal construction variable `%s'" % key + self._dict[key] = value + +class env_Best_has_key(Environment): + """Best __setitem__(), with has_key""" + def __setitem__(self, key, value): + if self._special_set.has_key(key): + self._special_set[key](self, key, value) + else: + if not self._dict.has_key(key) \ + and not global_valid_var.match(key): + raise SCons.Errors.UserError, "Illegal construction variable `%s'" % key + self._dict[key] = value + +class env_Best_list(Environment): + """Best __setitem__(), with a list""" + def __setitem__(self, key, value): + if key in ['BUILDERS', 'SCANNERS', 'TARGET', 'TARGETS', 'SOURCE', 'SOURCES']: + self._special_set[key](self, key, value) + else: + if not self._dict.has_key(key) \ + and not global_valid_var.match(key): + raise SCons.Errors.UserError, "Illegal construction variable `%s'" % key + self._dict[key] = value + +try: + ''.isalnum +except AttributeError: + pass +else: + class env_isalnum(Environment): + """Greg's Folly: isalnum instead of probe""" + def __setitem__(self, key, value): + if self._special_set.has_key(key): + self._special_set[key](self, key, value) + else: + if not key.isalnum() and not global_valid_var.match(key): + raise SCons.Errors.UserError, "Illegal construction variable `%s'" % key + self._dict[key] = value + +# We'll use the names of all the env_* classes we find later to build +# the dictionary of statements to be timed, and the import statement +# that the timer will use to get at these classes. + +class_names = [] +for n in locals().keys(): + #if n.startswith('env_'): + if n[:4] == 'env_': + class_names.append(n) + +# This is *the* function that gets timed. It will get called for the +# specified number of iterations for the cross product of the number of +# classes we're testing and the number of data sets (defined below). + +iterations = 10000 + +def do_it(names, env_class): + e = env_class() + for key in names: + e[key] = 1 + +# Build the list of "statements" that will be tested. For each class +# we're testing, the doc string describing the class is the key, and +# the statement we test is a simple "doit(names, {class})" call. + +statements = {} + +for class_name in class_names: + ec = eval(class_name) + statements[ec.__doc__] = 'do_it(names, %s)' % class_name + +# The common_imports string is used in the initialization of each +# test run. The timeit module insulates the test snippets from the +# global namespace, so we have to import these explicitly from __main__. + +common_import_variables = ['do_it'] + class_names + +common_imports = """ +from __main__ import %s +""" % string.join(common_import_variables, ', ') + +# The test data (lists of variable names) that we'll use for the runs. + +same_variable_names = ['XXX'] * 100 +uniq_variable_names = [] +for i in range(100): uniq_variable_names.append('X%05d' % i) +mixed_variable_names = uniq_variable_names[:50] + same_variable_names[:50] + +# Lastly, put it all together... + +def run_it(title, init): + s = statements.copy() + s['num'] = iterations + s['title'] = title + s['init'] = init + apply(times,(),s) + +print 'Environment __setitem__ benchmark using', +print 'Python', string.split(sys.version)[0], +print 'on', sys.platform, os.name + +run_it('Results for re-adding an existing variable name 100 times:', + common_imports + """ +import __main__ ; names = __main__.same_variable_names +""") + +run_it('Results for adding 100 variable names, 50 existing and 50 new:', + common_imports + """ +import __main__ ; names = __main__.mixed_variable_names +""") + +run_it('Results for adding 100 new, unique variable names:', + common_imports + """ +import __main__ ; names = __main__.uniq_variable_names +""") diff --git a/bench/timeit.py b/bench/timeit.py new file mode 100644 index 0000000..d5e33bb --- /dev/null +++ b/bench/timeit.py @@ -0,0 +1,297 @@ +#! /usr/bin/env python + +"""Tool for measuring execution time of small code snippets. + +This module avoids a number of common traps for measuring execution +times. See also Tim Peters' introduction to the Algorithms chapter in +the Python Cookbook, published by O'Reilly. + +Library usage: see the Timer class. + +Command line usage: + python timeit.py [-n N] [-r N] [-s S] [-t] [-c] [-h] [statement] + +Options: + -n/--number N: how many times to execute 'statement' (default: see below) + -r/--repeat N: how many times to repeat the timer (default 3) + -s/--setup S: statement to be executed once initially (default 'pass') + -t/--time: use time.time() (default on Unix) + -c/--clock: use time.clock() (default on Windows) + -v/--verbose: print raw timing results; repeat for more digits precision + -h/--help: print this usage message and exit + statement: statement to be timed (default 'pass') + +A multi-line statement may be given by specifying each line as a +separate argument; indented lines are possible by enclosing an +argument in quotes and using leading spaces. Multiple -s options are +treated similarly. + +If -n is not given, a suitable number of loops is calculated by trying +successive powers of 10 until the total time is at least 0.2 seconds. + +The difference in default timer function is because on Windows, +clock() has microsecond granularity but time()'s granularity is 1/60th +of a second; on Unix, clock() has 1/100th of a second granularity and +time() is much more precise. On either platform, the default timer +functions measure wall clock time, not the CPU time. This means that +other processes running on the same computer may interfere with the +timing. The best thing to do when accurate timing is necessary is to +repeat the timing a few times and use the best time. The -r option is +good for this; the default of 3 repetitions is probably enough in most +cases. On Unix, you can use clock() to measure CPU time. + +Note: there is a certain baseline overhead associated with executing a +pass statement. The code here doesn't try to hide it, but you should +be aware of it. The baseline overhead can be measured by invoking the +program without arguments. + +The baseline overhead differs between Python versions! Also, to +fairly compare older Python versions to Python 2.3, you may want to +use python -O for the older versions to avoid timing SET_LINENO +instructions. +""" + +try: + import gc +except ImportError: + class _fake_gc: + def isenabled(self): + return None + def enable(self): + pass + def disable(self): + pass + gc = _fake_gc() +import sys +import time +try: + import itertools +except ImportError: + # Must be an older Python version (see timeit() below) + itertools = None + +import string + +__all__ = ["Timer"] + +dummy_src_name = "<timeit-src>" +default_number = 1000000 +default_repeat = 3 + +if sys.platform == "win32": + # On Windows, the best timer is time.clock() + default_timer = time.clock +else: + # On most other platforms the best timer is time.time() + default_timer = time.time + +# Don't change the indentation of the template; the reindent() calls +# in Timer.__init__() depend on setup being indented 4 spaces and stmt +# being indented 8 spaces. +template = """ +def inner(_it, _timer): + %(setup)s + _t0 = _timer() + for _i in _it: + %(stmt)s + _t1 = _timer() + return _t1 - _t0 +""" + +def reindent(src, indent): + """Helper to reindent a multi-line statement.""" + return string.replace(src, "\n", "\n" + " "*indent) + +class Timer: + """Class for timing execution speed of small code snippets. + + The constructor takes a statement to be timed, an additional + statement used for setup, and a timer function. Both statements + default to 'pass'; the timer function is platform-dependent (see + module doc string). + + To measure the execution time of the first statement, use the + timeit() method. The repeat() method is a convenience to call + timeit() multiple times and return a list of results. + + The statements may contain newlines, as long as they don't contain + multi-line string literals. + """ + + def __init__(self, stmt="pass", setup="pass", timer=default_timer): + """Constructor. See class doc string.""" + self.timer = timer + stmt = reindent(stmt, 8) + setup = reindent(setup, 4) + src = template % {'stmt': stmt, 'setup': setup} + self.src = src # Save for traceback display + code = compile(src, dummy_src_name, "exec") + ns = {} + exec code in globals(), ns + self.inner = ns["inner"] + + def print_exc(self, file=None): + """Helper to print a traceback from the timed code. + + Typical use: + + t = Timer(...) # outside the try/except + try: + t.timeit(...) # or t.repeat(...) + except: + t.print_exc() + + The advantage over the standard traceback is that source lines + in the compiled template will be displayed. + + The optional file argument directs where the traceback is + sent; it defaults to sys.stderr. + """ + import linecache, traceback + linecache.cache[dummy_src_name] = (len(self.src), + None, + self.src.split("\n"), + dummy_src_name) + traceback.print_exc(file=file) + + def timeit(self, number=default_number): + """Time 'number' executions of the main statement. + + To be precise, this executes the setup statement once, and + then returns the time it takes to execute the main statement + a number of times, as a float measured in seconds. The + argument is the number of times through the loop, defaulting + to one million. The main statement, the setup statement and + the timer function to be used are passed to the constructor. + """ + if itertools: + it = itertools.repeat(None, number) + else: + it = [None] * number + gcold = gc.isenabled() + gc.disable() + timing = self.inner(it, self.timer) + if gcold: + gc.enable() + return timing + + def repeat(self, repeat=default_repeat, number=default_number): + """Call timeit() a few times. + + This is a convenience function that calls the timeit() + repeatedly, returning a list of results. The first argument + specifies how many times to call timeit(), defaulting to 3; + the second argument specifies the timer argument, defaulting + to one million. + + Note: it's tempting to calculate mean and standard deviation + from the result vector and report these. However, this is not + very useful. In a typical case, the lowest value gives a + lower bound for how fast your machine can run the given code + snippet; higher values in the result vector are typically not + caused by variability in Python's speed, but by other + processes interfering with your timing accuracy. So the min() + of the result is probably the only number you should be + interested in. After that, you should look at the entire + vector and apply common sense rather than statistics. + """ + r = [] + for i in range(repeat): + t = self.timeit(number) + r.append(t) + return r + +def main(args=None): + """Main program, used when run as a script. + + The optional argument specifies the command line to be parsed, + defaulting to sys.argv[1:]. + + The return value is an exit code to be passed to sys.exit(); it + may be None to indicate success. + + When an exception happens during timing, a traceback is printed to + stderr and the return value is 1. Exceptions at other times + (including the template compilation) are not caught. + """ + if args is None: + args = sys.argv[1:] + import getopt + try: + opts, args = getopt.getopt(args, "n:s:r:tcvh", + ["number=", "setup=", "repeat=", + "time", "clock", "verbose", "help"]) + except getopt.error, err: + print err + print "use -h/--help for command line help" + return 2 + timer = default_timer + stmt = string.join(args, "\n") or "pass" + number = 0 # auto-determine + setup = [] + repeat = default_repeat + verbose = 0 + precision = 3 + for o, a in opts: + if o in ("-n", "--number"): + number = int(a) + if o in ("-s", "--setup"): + setup.append(a) + if o in ("-r", "--repeat"): + repeat = int(a) + if repeat <= 0: + repeat = 1 + if o in ("-t", "--time"): + timer = time.time + if o in ("-c", "--clock"): + timer = time.clock + if o in ("-v", "--verbose"): + if verbose: + precision = precision + 1 + verbose = precision + 1 + if o in ("-h", "--help"): + print __doc__, + return 0 + setup = string.join(setup, "\n") or "pass" + # Include the current directory, so that local imports work (sys.path + # contains the directory of this script, rather than the current + # directory) + import os + sys.path.insert(0, os.curdir) + t = Timer(stmt, setup, timer) + if number == 0: + # determine number so that 0.2 <= total time < 2.0 + for i in range(1, 10): + number = 10**i + try: + x = t.timeit(number) + except: + t.print_exc() + return 1 + if verbose: + print "%d loops -> %.*g secs" % (number, precision, x) + if x >= 0.2: + break + try: + r = t.repeat(repeat, number) + except: + t.print_exc() + return 1 + best = min(r) + if verbose: + print "raw times:", string.join(map(lambda x, p=precision: "%.*g" % (p, x), r)) + print "%d loops," % number, + usec = best * 1e6 / number + if usec < 1000: + print "best of %d: %.*g usec per loop" % (repeat, precision, usec) + else: + msec = usec / 1000 + if msec < 1000: + print "best of %d: %.*g msec per loop" % (repeat, precision, msec) + else: + sec = msec / 1000 + print "best of %d: %.*g sec per loop" % (repeat, precision, sec) + return None + +if __name__ == "__main__": + sys.exit(main()) |