diff options
author | Mats Wichmann <mats@linux.com> | 2024-06-15 18:53:50 (GMT) |
---|---|---|
committer | Mats Wichmann <mats@linux.com> | 2024-06-15 18:53:50 (GMT) |
commit | 2303fb98830df908322edce751717c51ef82062d (patch) | |
tree | cf5f16545d0ff90d8414f1d1a7c75a7d49fb2bad | |
parent | a6f498306730b333c81cef8bbf135e26e3acb360 (diff) | |
download | SCons-2303fb98830df908322edce751717c51ef82062d.zip SCons-2303fb98830df908322edce751717c51ef82062d.tar.gz SCons-2303fb98830df908322edce751717c51ef82062d.tar.bz2 |
Update to current fastest env setitem
Updated and re-ran benchmark tests for SubstitutionEnvironment.__setitem__.
Added test for new (Python 3.0) string.isidentifier() method.
That's actually exactly what we want, and it times out as the fastest
method when combined with a membership test if the variable is
already defined.
Tweaked some comments about this performance consideration,
and did other updates in bench/.
Signed-off-by: Mats Wichmann <mats@linux.com>
-rw-r--r-- | CHANGES.txt | 3 | ||||
-rw-r--r-- | RELEASE.txt | 3 | ||||
-rw-r--r-- | SCons/Environment.py | 37 | ||||
-rw-r--r-- | SCons/Util/envs.py | 5 | ||||
-rw-r--r-- | SCons/Variables/__init__.py | 2 | ||||
-rw-r--r-- | bench/bench.py | 20 | ||||
-rw-r--r-- | bench/env.__setitem__.py | 32 | ||||
-rw-r--r-- | bench/is_types.py | 85 | ||||
-rw-r--r-- | bench/lvars-gvars.py | 10 |
9 files changed, 97 insertions, 100 deletions
diff --git a/CHANGES.txt b/CHANGES.txt index d0eb245..5efbe84 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -99,6 +99,9 @@ RELEASE VERSION/DATE TO BE FILLED IN LATER SCons.Environment to SCons.Util to avoid the chance of import loops. Variables and Environment both use the routine and Environment() uses a Variables() object so better to move to a safer location. + - Performance tweak: the __setitem__ method of an Environment, used for + setting construction variables, now uses the string method isidentifier + to validate the name (updated from microbenchmark results). RELEASE 4.7.0 - Sun, 17 Mar 2024 17:22:20 -0700 diff --git a/RELEASE.txt b/RELEASE.txt index 8955e46..df6f7c1 100644 --- a/RELEASE.txt +++ b/RELEASE.txt @@ -69,6 +69,9 @@ IMPROVEMENTS - Make the testing framework a little more resilient: the temporary directory for tests now includes a component named "scons" which can be given to antivirus software to exclude. +- Performance tweak: the __setitem__ method of an Environment, used for + setting construction variables, now uses the string method isidentifier + to validate the name (updated from microbenchmark results). PACKAGING --------- diff --git a/SCons/Environment.py b/SCons/Environment.py index 6669bf8..9322450 100644 --- a/SCons/Environment.py +++ b/SCons/Environment.py @@ -76,7 +76,6 @@ from SCons.Util import ( to_String_for_subst, uniquer_hashables, ) -from SCons.Util.envs import is_valid_construction_var from SCons.Util.sctyping import ExecutorType class _Null: @@ -581,26 +580,20 @@ class SubstitutionEnvironment: return self._dict[key] def __setitem__(self, key, value): - # This is heavily used. This implementation is the best we have - # according to the timings in bench/env.__setitem__.py. - # - # The "key in self._special_set_keys" test here seems to perform - # pretty well for the number of keys we have. A hard-coded - # list worked a little better in Python 2.5, but that has the - # disadvantage of maybe getting out of sync if we ever add more - # variable names. - # So right now it seems like a good trade-off, but feel free to - # revisit this with bench/env.__setitem__.py as needed (and - # as newer versions of Python come out). if key in self._special_set_keys: self._special_set[key](self, key, value) else: - # If we already have the entry, then it's obviously a valid - # key and we don't need to check. If we do check, using a - # global, pre-compiled regular expression directly is more - # efficient than calling another function or a method. - if key not in self._dict and not is_valid_construction_var(key): - raise UserError("Illegal construction variable `%s'" % key) + # Performance: since this is heavily used, try to avoid checking + # if the variable is valid unless necessary. bench/__setitem__.py + # times a bunch of different approaches. Based the most recent + # run, against Python 3.6-3.13(beta), the best we can do across + # different combinations of actions is to use a membership test + # to see if we already have the variable, if so it must already + # have been checked, so skip; if we do check, "isidentifier()" + # (new in Python 3 so wasn't in benchmark until recently) + # on the key is the best. + if key not in self._dict and not key.isidentifier(): + raise UserError(f"Illegal construction variable {key!r}") self._dict[key] = value def get(self, key, default=None): @@ -2579,8 +2572,12 @@ class OverrideEnvironment(Base): return self.__dict__['__subject'].__getitem__(key) def __setitem__(self, key, value): - if not is_valid_construction_var(key): - raise UserError("Illegal construction variable `%s'" % key) + # This doesn't have the same performance equation as a "real" + # environment: in an override you're basically just writing + # new stuff; it's not a common case to be changing values already + # set in the override dict, so don't spend time checking for existance. + if not key.isidentifier(): + raise UserError(f"Illegal construction variable {key!r}") self.__dict__['overrides'][key] = value def __delitem__(self, key): diff --git a/SCons/Util/envs.py b/SCons/Util/envs.py index 68a4048..2640ef5 100644 --- a/SCons/Util/envs.py +++ b/SCons/Util/envs.py @@ -330,10 +330,13 @@ def AddMethod(obj, function: Callable, name: Optional[str] = None) -> None: setattr(obj, name, method) +# This routine is used to validate that a construction var name can be used +# as a Python identifier, which we require. However, Python 3 introduced an +# isidentifier() string method so there's really not any need for it now. _is_valid_var_re = re.compile(r'[_a-zA-Z]\w*$') def is_valid_construction_var(varstr: str) -> bool: - """Return True if *varstr* is a legitimate construction variable.""" + """Return True if *varstr* is a legitimate name of a construction variable.""" return bool(_is_valid_var_re.match(varstr)) # Local Variables: diff --git a/SCons/Variables/__init__.py b/SCons/Variables/__init__.py index 1c41130..1337d14 100644 --- a/SCons/Variables/__init__.py +++ b/SCons/Variables/__init__.py @@ -131,7 +131,7 @@ class Variables: option.key = key # TODO: normalize to not include key in aliases. Currently breaks tests. option.aliases = [key,] - if not SCons.Util.is_valid_construction_var(option.key): + if not option.key.isidentifier(): raise SCons.Errors.UserError(f"Illegal Variables key {option.key!r}") option.help = help option.default = default diff --git a/bench/bench.py b/bench/bench.py index 5adac23..bafe5a2 100644 --- a/bench/bench.py +++ b/bench/bench.py @@ -62,10 +62,10 @@ Usage: bench.py OPTIONS file.py -h, --help Display this help and exit -i ITER, --iterations ITER Run each code snippet ITER times --time Use the time.time function - -r RUNS, --runs RUNS Average times for RUNS invocations of + -r RUNS, --runs RUNS Average times for RUNS invocations of """ -# How many times each snippet of code will be (or should be) run by the +# How many times each snippet of code will be (or should be) run by the # functions under test to gather the time (the "inner loop"). Iterations = 1000 @@ -191,19 +191,17 @@ py_ver_string = "%d.%d"%(sys.version_info.major, sys.version_info.minor) # pprint(results_dict) -# breakpoint() + tests = [label for label, args, kw in Data] -columns = ['Python Version', 'Implementation', 'Test'] + tests +columns = ['Python Version', 'Implementation'] + tests with open(results_filename, 'a') as r: - print("Python Version,%s" % ".".join(columns), file=r) - # print("Python Version,%s" % ".".join(columns)) + print(",".join(columns), file=r) - for implementation in results_dict.keys(): + for implementation in results_dict: + print(f'{py_ver_string},"{implementation}"', file=r, end='') for test in tests: - print(f'{py_ver_string},"{implementation}","{test}",%8.3f' % results_dict[implementation][test], file=r) - # print(f'{py_ver_string},"{implementation}","{test}",%8.3f' % results_dict[implementation][test]) - - + print(',%8.3f' % results_dict[implementation][test], file=r, end='') + print(file=r) # Local Variables: # tab-width:4 diff --git a/bench/env.__setitem__.py b/bench/env.__setitem__.py index 2cd0da8..e915e46 100644 --- a/bench/env.__setitem__.py +++ b/bench/env.__setitem__.py @@ -57,14 +57,14 @@ except AttributeError: script_dir = os.path.split(filename)[0] if script_dir: script_dir = script_dir + '/' -sys.path = [os.path.abspath(script_dir + '../src/engine')] + sys.path +sys.path = [os.path.abspath(script_dir + '..')] + sys.path import SCons.Errors import SCons.Environment import SCons.Util is_valid_construction_var = SCons.Util.is_valid_construction_var -global_valid_var = re.compile(r'[_a-zA-Z]\w*$') +global_valid_var = SCons.Util.envs._is_valid_var_re # The classes with different __setitem__() implementations that we're # going to horse-race. @@ -105,7 +105,7 @@ class Environment: 'SOURCES' : None, } _special_set_keys = list(_special_set.keys()) - _valid_var = re.compile(r'[_a-zA-Z]\w*$') + _valid_var = global_valid_var def __init__(self, **kw): self._dict = kw @@ -247,8 +247,8 @@ class env_Best_attribute(Environment): raise SCons.Errors.UserError("Illegal construction variable `%s'" % key) self._dict[key] = value -class env_Best_has_key(Environment): - """Best __setitem__(), with has_key""" +class env_Best_has_key_global_regex(Environment): + """Best __setitem__(), with membership test and global regex""" def __setitem__(self, key, value): if key in self._special_set: self._special_set[key](self, key, value) @@ -258,6 +258,18 @@ class env_Best_has_key(Environment): raise SCons.Errors.UserError("Illegal construction variable `%s'" % key) self._dict[key] = value +class env_Best_has_key_function(Environment): + """Best __setitem__(), with membership_test and validator function""" + def __setitem__(self, key, value): + if key in self._special_set: + self._special_set[key](self, key, value) + else: + if key not in self._dict \ + and not is_valid_construction_var(key): + raise SCons.Errors.UserError("Illegal construction variable `%s'" % key) + self._dict[key] = value + + class env_Best_list(Environment): """Best __setitem__(), with a list""" def __setitem__(self, key, value): @@ -284,6 +296,16 @@ else: raise SCons.Errors.UserError("Illegal construction variable `%s'" % key) self._dict[key] = value +class env_Best_isidentifier(Environment): + """Best __setitem__(), with membership test and isidentifier""" + def __setitem__(self, key, value): + if key in self._special_set: + self._special_set[key](self, key, value) + else: + if key not in self._dict and not key.isidentifier(): + raise SCons.Errors.UserError("Illegal construction variable `%s'" % key) + self._dict[key] = value + # We'll use the names of all the env_* classes we find later to build # the dictionary of statements to be timed, and the import statement # that the timer will use to get at these classes. diff --git a/bench/is_types.py b/bench/is_types.py index 01db768..257fa91 100644 --- a/bench/is_types.py +++ b/bench/is_types.py @@ -2,10 +2,11 @@ # # Benchmarks for testing various possible implementations # of the is_Dict(), is_List() and is_String() functions in -# src/engine/SCons/Util.py. +# SCons/Util.py. import types -from collections import UserDict, UserList, UserString +from collections import UserDict, UserList, UserString, deque +from collections.abc import Iterable, MappingView, MutableMapping, MutableSequence DictType = dict ListType = list @@ -28,19 +29,20 @@ def original_is_String(e): # New candidates that explicitly check for whether the object is an # InstanceType before calling isinstance() on the corresponding User* -# type. -# InstanceType was only for old-style classes, so absent in Python 3 -# this this is no different than the previous +# type. Update: meaningless in Python 3, InstanceType was only for +# old-style classes, so these are just removed. +# XXX -def checkInstanceType_is_Dict(e): - return isinstance(e, (dict, UserDict)) +# New candidates that try more generic names from collections: -def checkInstanceType_is_List(e): - return isinstance(e, (list, UserList)) +def new_is_Dict(e): + return isinstance(e, MutableMapping) -def checkInstanceType_is_String(e): - return isinstance(e, (str, UserString)) +def new_is_List(e): + return isinstance(e, MutableSequence) +def new_is_String(e): + return isinstance(e, (str, UserString)) # Improved candidates that cache the type(e) result in a variable # before doing any checks. @@ -51,7 +53,7 @@ def cache_type_e_is_Dict(e): def cache_type_e_is_List(e): t = type(e) - return t is list or isinstance(e, UserList) + return t is list or isinstance(e, UserList or isinstance(e, deque)) def cache_type_e_is_String(e): t = type(e) @@ -68,7 +70,7 @@ def global_cache_type_e_is_Dict(e): def global_cache_type_e_is_List(e): t = type(e) - return t is ListType or isinstance(e, UserList) + return t is ListType or isinstance(e, (UserList, deque)) def global_cache_type_e_is_String(e): t = type(e) @@ -77,30 +79,10 @@ def global_cache_type_e_is_String(e): # Alternative that uses a myType() function to map the User* objects # to their corresponding underlying types. - -instanceTypeMap = { - UserDict : dict, - UserList : list, - UserString : str, -} - -def myType(obj): - t = type(obj) - if t is types.InstanceType: - t = instanceTypeMap.get(obj.__class__, t) - return t - -def myType_is_Dict(e): - return myType(e) is dict - -def myType_is_List(e): - return myType(e) is list - -def myType_is_String(e): - return myType(e) is str - +# Again, since this used InstanceType, it's not useful for Python 3. +# These are the actual test entry points def Func01(obj): """original_is_String""" @@ -118,19 +100,19 @@ def Func03(obj): original_is_Dict(obj) def Func04(obj): - """checkInstanceType_is_String""" + """new_is_String""" for i in IterationList: - checkInstanceType_is_String(obj) + new_is_String(obj) def Func05(obj): - """checkInstanceType_is_List""" + """new_is_List""" for i in IterationList: - checkInstanceType_is_List(obj) + new_is_List(obj) def Func06(obj): - """checkInstanceType_is_Dict""" + """new_is_Dict""" for i in IterationList: - checkInstanceType_is_Dict(obj) + new_is_Dict(obj) def Func07(obj): """cache_type_e_is_String""" @@ -162,22 +144,6 @@ def Func12(obj): for i in IterationList: global_cache_type_e_is_Dict(obj) -#def Func13(obj): -# """myType_is_String""" -# for i in IterationList: -# myType_is_String(obj) -# -#def Func14(obj): -# """myType_is_List""" -# for i in IterationList: -# myType_is_List(obj) -# -#def Func15(obj): -# """myType_is_Dict""" -# for i in IterationList: -# myType_is_Dict(obj) - - # Data to pass to the functions on each run. Each entry is a # three-element tuple: @@ -218,6 +184,11 @@ Data = [ {}, ), ( + "deque", + (deque([]),), + {}, + ), + ( "UserDict", (UserDict({}),), {}, diff --git a/bench/lvars-gvars.py b/bench/lvars-gvars.py index 0a81337..91d76a0 100644 --- a/bench/lvars-gvars.py +++ b/bench/lvars-gvars.py @@ -23,9 +23,9 @@ """ Functions and data for timing different idioms for fetching a keyword -value from a pair of dictionaries for localand global values. This was +value from a pair of dictionaries for local and global values. This was used to select how to most efficiently expand single $KEYWORD strings -in src/engine/SCons/Subst.py. +in SCons/Subst.py (StringSubber and ListSubber). """ def Func1(var, gvars, lvars): @@ -40,7 +40,7 @@ def Func1(var, gvars, lvars): x = '' def Func2(var, gvars, lvars): - """lvars has_key(), gvars try:-except:""" + """lvars membership test, gvars try:-except:""" for i in IterationList: if var in lvars: x = lvars[var] @@ -51,7 +51,7 @@ def Func2(var, gvars, lvars): x = '' def Func3(var, gvars, lvars): - """lvars has_key(), gvars has_key()""" + """lvars membership test, gvars membership test)""" for i in IterationList: if var in lvars: x = lvars[var] @@ -71,7 +71,7 @@ def Func4(var, gvars, lvars): def Func5(var, gvars, lvars): """Chained get with default values""" for i in IterationList: - x = lvars.get(var,gvars.get(var,'')) + x = lvars.get(var, gvars.get(var, '')) # Data to pass to the functions on each run. Each entry is a |