summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMats Wichmann <mats@linux.com>2024-06-15 18:53:50 (GMT)
committerMats Wichmann <mats@linux.com>2024-06-15 18:53:50 (GMT)
commit2303fb98830df908322edce751717c51ef82062d (patch)
treecf5f16545d0ff90d8414f1d1a7c75a7d49fb2bad
parenta6f498306730b333c81cef8bbf135e26e3acb360 (diff)
downloadSCons-2303fb98830df908322edce751717c51ef82062d.zip
SCons-2303fb98830df908322edce751717c51ef82062d.tar.gz
SCons-2303fb98830df908322edce751717c51ef82062d.tar.bz2
Update to current fastest env setitem
Updated and re-ran benchmark tests for SubstitutionEnvironment.__setitem__. Added test for new (Python 3.0) string.isidentifier() method. That's actually exactly what we want, and it times out as the fastest method when combined with a membership test if the variable is already defined. Tweaked some comments about this performance consideration, and did other updates in bench/. Signed-off-by: Mats Wichmann <mats@linux.com>
-rw-r--r--CHANGES.txt3
-rw-r--r--RELEASE.txt3
-rw-r--r--SCons/Environment.py37
-rw-r--r--SCons/Util/envs.py5
-rw-r--r--SCons/Variables/__init__.py2
-rw-r--r--bench/bench.py20
-rw-r--r--bench/env.__setitem__.py32
-rw-r--r--bench/is_types.py85
-rw-r--r--bench/lvars-gvars.py10
9 files changed, 97 insertions, 100 deletions
diff --git a/CHANGES.txt b/CHANGES.txt
index d0eb245..5efbe84 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -99,6 +99,9 @@ RELEASE VERSION/DATE TO BE FILLED IN LATER
SCons.Environment to SCons.Util to avoid the chance of import loops. Variables
and Environment both use the routine and Environment() uses a Variables()
object so better to move to a safer location.
+ - Performance tweak: the __setitem__ method of an Environment, used for
+ setting construction variables, now uses the string method isidentifier
+ to validate the name (updated from microbenchmark results).
RELEASE 4.7.0 - Sun, 17 Mar 2024 17:22:20 -0700
diff --git a/RELEASE.txt b/RELEASE.txt
index 8955e46..df6f7c1 100644
--- a/RELEASE.txt
+++ b/RELEASE.txt
@@ -69,6 +69,9 @@ IMPROVEMENTS
- Make the testing framework a little more resilient: the temporary
directory for tests now includes a component named "scons" which can
be given to antivirus software to exclude.
+- Performance tweak: the __setitem__ method of an Environment, used for
+ setting construction variables, now uses the string method isidentifier
+ to validate the name (updated from microbenchmark results).
PACKAGING
---------
diff --git a/SCons/Environment.py b/SCons/Environment.py
index 6669bf8..9322450 100644
--- a/SCons/Environment.py
+++ b/SCons/Environment.py
@@ -76,7 +76,6 @@ from SCons.Util import (
to_String_for_subst,
uniquer_hashables,
)
-from SCons.Util.envs import is_valid_construction_var
from SCons.Util.sctyping import ExecutorType
class _Null:
@@ -581,26 +580,20 @@ class SubstitutionEnvironment:
return self._dict[key]
def __setitem__(self, key, value):
- # This is heavily used. This implementation is the best we have
- # according to the timings in bench/env.__setitem__.py.
- #
- # The "key in self._special_set_keys" test here seems to perform
- # pretty well for the number of keys we have. A hard-coded
- # list worked a little better in Python 2.5, but that has the
- # disadvantage of maybe getting out of sync if we ever add more
- # variable names.
- # So right now it seems like a good trade-off, but feel free to
- # revisit this with bench/env.__setitem__.py as needed (and
- # as newer versions of Python come out).
if key in self._special_set_keys:
self._special_set[key](self, key, value)
else:
- # If we already have the entry, then it's obviously a valid
- # key and we don't need to check. If we do check, using a
- # global, pre-compiled regular expression directly is more
- # efficient than calling another function or a method.
- if key not in self._dict and not is_valid_construction_var(key):
- raise UserError("Illegal construction variable `%s'" % key)
+ # Performance: since this is heavily used, try to avoid checking
+ # if the variable is valid unless necessary. bench/__setitem__.py
+ # times a bunch of different approaches. Based the most recent
+ # run, against Python 3.6-3.13(beta), the best we can do across
+ # different combinations of actions is to use a membership test
+ # to see if we already have the variable, if so it must already
+ # have been checked, so skip; if we do check, "isidentifier()"
+ # (new in Python 3 so wasn't in benchmark until recently)
+ # on the key is the best.
+ if key not in self._dict and not key.isidentifier():
+ raise UserError(f"Illegal construction variable {key!r}")
self._dict[key] = value
def get(self, key, default=None):
@@ -2579,8 +2572,12 @@ class OverrideEnvironment(Base):
return self.__dict__['__subject'].__getitem__(key)
def __setitem__(self, key, value):
- if not is_valid_construction_var(key):
- raise UserError("Illegal construction variable `%s'" % key)
+ # This doesn't have the same performance equation as a "real"
+ # environment: in an override you're basically just writing
+ # new stuff; it's not a common case to be changing values already
+ # set in the override dict, so don't spend time checking for existance.
+ if not key.isidentifier():
+ raise UserError(f"Illegal construction variable {key!r}")
self.__dict__['overrides'][key] = value
def __delitem__(self, key):
diff --git a/SCons/Util/envs.py b/SCons/Util/envs.py
index 68a4048..2640ef5 100644
--- a/SCons/Util/envs.py
+++ b/SCons/Util/envs.py
@@ -330,10 +330,13 @@ def AddMethod(obj, function: Callable, name: Optional[str] = None) -> None:
setattr(obj, name, method)
+# This routine is used to validate that a construction var name can be used
+# as a Python identifier, which we require. However, Python 3 introduced an
+# isidentifier() string method so there's really not any need for it now.
_is_valid_var_re = re.compile(r'[_a-zA-Z]\w*$')
def is_valid_construction_var(varstr: str) -> bool:
- """Return True if *varstr* is a legitimate construction variable."""
+ """Return True if *varstr* is a legitimate name of a construction variable."""
return bool(_is_valid_var_re.match(varstr))
# Local Variables:
diff --git a/SCons/Variables/__init__.py b/SCons/Variables/__init__.py
index 1c41130..1337d14 100644
--- a/SCons/Variables/__init__.py
+++ b/SCons/Variables/__init__.py
@@ -131,7 +131,7 @@ class Variables:
option.key = key
# TODO: normalize to not include key in aliases. Currently breaks tests.
option.aliases = [key,]
- if not SCons.Util.is_valid_construction_var(option.key):
+ if not option.key.isidentifier():
raise SCons.Errors.UserError(f"Illegal Variables key {option.key!r}")
option.help = help
option.default = default
diff --git a/bench/bench.py b/bench/bench.py
index 5adac23..bafe5a2 100644
--- a/bench/bench.py
+++ b/bench/bench.py
@@ -62,10 +62,10 @@ Usage: bench.py OPTIONS file.py
-h, --help Display this help and exit
-i ITER, --iterations ITER Run each code snippet ITER times
--time Use the time.time function
- -r RUNS, --runs RUNS Average times for RUNS invocations of
+ -r RUNS, --runs RUNS Average times for RUNS invocations of
"""
-# How many times each snippet of code will be (or should be) run by the
+# How many times each snippet of code will be (or should be) run by the
# functions under test to gather the time (the "inner loop").
Iterations = 1000
@@ -191,19 +191,17 @@ py_ver_string = "%d.%d"%(sys.version_info.major, sys.version_info.minor)
# pprint(results_dict)
-# breakpoint()
+
tests = [label for label, args, kw in Data]
-columns = ['Python Version', 'Implementation', 'Test'] + tests
+columns = ['Python Version', 'Implementation'] + tests
with open(results_filename, 'a') as r:
- print("Python Version,%s" % ".".join(columns), file=r)
- # print("Python Version,%s" % ".".join(columns))
+ print(",".join(columns), file=r)
- for implementation in results_dict.keys():
+ for implementation in results_dict:
+ print(f'{py_ver_string},"{implementation}"', file=r, end='')
for test in tests:
- print(f'{py_ver_string},"{implementation}","{test}",%8.3f' % results_dict[implementation][test], file=r)
- # print(f'{py_ver_string},"{implementation}","{test}",%8.3f' % results_dict[implementation][test])
-
-
+ print(',%8.3f' % results_dict[implementation][test], file=r, end='')
+ print(file=r)
# Local Variables:
# tab-width:4
diff --git a/bench/env.__setitem__.py b/bench/env.__setitem__.py
index 2cd0da8..e915e46 100644
--- a/bench/env.__setitem__.py
+++ b/bench/env.__setitem__.py
@@ -57,14 +57,14 @@ except AttributeError:
script_dir = os.path.split(filename)[0]
if script_dir:
script_dir = script_dir + '/'
-sys.path = [os.path.abspath(script_dir + '../src/engine')] + sys.path
+sys.path = [os.path.abspath(script_dir + '..')] + sys.path
import SCons.Errors
import SCons.Environment
import SCons.Util
is_valid_construction_var = SCons.Util.is_valid_construction_var
-global_valid_var = re.compile(r'[_a-zA-Z]\w*$')
+global_valid_var = SCons.Util.envs._is_valid_var_re
# The classes with different __setitem__() implementations that we're
# going to horse-race.
@@ -105,7 +105,7 @@ class Environment:
'SOURCES' : None,
}
_special_set_keys = list(_special_set.keys())
- _valid_var = re.compile(r'[_a-zA-Z]\w*$')
+ _valid_var = global_valid_var
def __init__(self, **kw):
self._dict = kw
@@ -247,8 +247,8 @@ class env_Best_attribute(Environment):
raise SCons.Errors.UserError("Illegal construction variable `%s'" % key)
self._dict[key] = value
-class env_Best_has_key(Environment):
- """Best __setitem__(), with has_key"""
+class env_Best_has_key_global_regex(Environment):
+ """Best __setitem__(), with membership test and global regex"""
def __setitem__(self, key, value):
if key in self._special_set:
self._special_set[key](self, key, value)
@@ -258,6 +258,18 @@ class env_Best_has_key(Environment):
raise SCons.Errors.UserError("Illegal construction variable `%s'" % key)
self._dict[key] = value
+class env_Best_has_key_function(Environment):
+ """Best __setitem__(), with membership_test and validator function"""
+ def __setitem__(self, key, value):
+ if key in self._special_set:
+ self._special_set[key](self, key, value)
+ else:
+ if key not in self._dict \
+ and not is_valid_construction_var(key):
+ raise SCons.Errors.UserError("Illegal construction variable `%s'" % key)
+ self._dict[key] = value
+
+
class env_Best_list(Environment):
"""Best __setitem__(), with a list"""
def __setitem__(self, key, value):
@@ -284,6 +296,16 @@ else:
raise SCons.Errors.UserError("Illegal construction variable `%s'" % key)
self._dict[key] = value
+class env_Best_isidentifier(Environment):
+ """Best __setitem__(), with membership test and isidentifier"""
+ def __setitem__(self, key, value):
+ if key in self._special_set:
+ self._special_set[key](self, key, value)
+ else:
+ if key not in self._dict and not key.isidentifier():
+ raise SCons.Errors.UserError("Illegal construction variable `%s'" % key)
+ self._dict[key] = value
+
# We'll use the names of all the env_* classes we find later to build
# the dictionary of statements to be timed, and the import statement
# that the timer will use to get at these classes.
diff --git a/bench/is_types.py b/bench/is_types.py
index 01db768..257fa91 100644
--- a/bench/is_types.py
+++ b/bench/is_types.py
@@ -2,10 +2,11 @@
#
# Benchmarks for testing various possible implementations
# of the is_Dict(), is_List() and is_String() functions in
-# src/engine/SCons/Util.py.
+# SCons/Util.py.
import types
-from collections import UserDict, UserList, UserString
+from collections import UserDict, UserList, UserString, deque
+from collections.abc import Iterable, MappingView, MutableMapping, MutableSequence
DictType = dict
ListType = list
@@ -28,19 +29,20 @@ def original_is_String(e):
# New candidates that explicitly check for whether the object is an
# InstanceType before calling isinstance() on the corresponding User*
-# type.
-# InstanceType was only for old-style classes, so absent in Python 3
-# this this is no different than the previous
+# type. Update: meaningless in Python 3, InstanceType was only for
+# old-style classes, so these are just removed.
+# XXX
-def checkInstanceType_is_Dict(e):
- return isinstance(e, (dict, UserDict))
+# New candidates that try more generic names from collections:
-def checkInstanceType_is_List(e):
- return isinstance(e, (list, UserList))
+def new_is_Dict(e):
+ return isinstance(e, MutableMapping)
-def checkInstanceType_is_String(e):
- return isinstance(e, (str, UserString))
+def new_is_List(e):
+ return isinstance(e, MutableSequence)
+def new_is_String(e):
+ return isinstance(e, (str, UserString))
# Improved candidates that cache the type(e) result in a variable
# before doing any checks.
@@ -51,7 +53,7 @@ def cache_type_e_is_Dict(e):
def cache_type_e_is_List(e):
t = type(e)
- return t is list or isinstance(e, UserList)
+ return t is list or isinstance(e, UserList or isinstance(e, deque))
def cache_type_e_is_String(e):
t = type(e)
@@ -68,7 +70,7 @@ def global_cache_type_e_is_Dict(e):
def global_cache_type_e_is_List(e):
t = type(e)
- return t is ListType or isinstance(e, UserList)
+ return t is ListType or isinstance(e, (UserList, deque))
def global_cache_type_e_is_String(e):
t = type(e)
@@ -77,30 +79,10 @@ def global_cache_type_e_is_String(e):
# Alternative that uses a myType() function to map the User* objects
# to their corresponding underlying types.
-
-instanceTypeMap = {
- UserDict : dict,
- UserList : list,
- UserString : str,
-}
-
-def myType(obj):
- t = type(obj)
- if t is types.InstanceType:
- t = instanceTypeMap.get(obj.__class__, t)
- return t
-
-def myType_is_Dict(e):
- return myType(e) is dict
-
-def myType_is_List(e):
- return myType(e) is list
-
-def myType_is_String(e):
- return myType(e) is str
-
+# Again, since this used InstanceType, it's not useful for Python 3.
+# These are the actual test entry points
def Func01(obj):
"""original_is_String"""
@@ -118,19 +100,19 @@ def Func03(obj):
original_is_Dict(obj)
def Func04(obj):
- """checkInstanceType_is_String"""
+ """new_is_String"""
for i in IterationList:
- checkInstanceType_is_String(obj)
+ new_is_String(obj)
def Func05(obj):
- """checkInstanceType_is_List"""
+ """new_is_List"""
for i in IterationList:
- checkInstanceType_is_List(obj)
+ new_is_List(obj)
def Func06(obj):
- """checkInstanceType_is_Dict"""
+ """new_is_Dict"""
for i in IterationList:
- checkInstanceType_is_Dict(obj)
+ new_is_Dict(obj)
def Func07(obj):
"""cache_type_e_is_String"""
@@ -162,22 +144,6 @@ def Func12(obj):
for i in IterationList:
global_cache_type_e_is_Dict(obj)
-#def Func13(obj):
-# """myType_is_String"""
-# for i in IterationList:
-# myType_is_String(obj)
-#
-#def Func14(obj):
-# """myType_is_List"""
-# for i in IterationList:
-# myType_is_List(obj)
-#
-#def Func15(obj):
-# """myType_is_Dict"""
-# for i in IterationList:
-# myType_is_Dict(obj)
-
-
# Data to pass to the functions on each run. Each entry is a
# three-element tuple:
@@ -218,6 +184,11 @@ Data = [
{},
),
(
+ "deque",
+ (deque([]),),
+ {},
+ ),
+ (
"UserDict",
(UserDict({}),),
{},
diff --git a/bench/lvars-gvars.py b/bench/lvars-gvars.py
index 0a81337..91d76a0 100644
--- a/bench/lvars-gvars.py
+++ b/bench/lvars-gvars.py
@@ -23,9 +23,9 @@
"""
Functions and data for timing different idioms for fetching a keyword
-value from a pair of dictionaries for localand global values. This was
+value from a pair of dictionaries for local and global values. This was
used to select how to most efficiently expand single $KEYWORD strings
-in src/engine/SCons/Subst.py.
+in SCons/Subst.py (StringSubber and ListSubber).
"""
def Func1(var, gvars, lvars):
@@ -40,7 +40,7 @@ def Func1(var, gvars, lvars):
x = ''
def Func2(var, gvars, lvars):
- """lvars has_key(), gvars try:-except:"""
+ """lvars membership test, gvars try:-except:"""
for i in IterationList:
if var in lvars:
x = lvars[var]
@@ -51,7 +51,7 @@ def Func2(var, gvars, lvars):
x = ''
def Func3(var, gvars, lvars):
- """lvars has_key(), gvars has_key()"""
+ """lvars membership test, gvars membership test)"""
for i in IterationList:
if var in lvars:
x = lvars[var]
@@ -71,7 +71,7 @@ def Func4(var, gvars, lvars):
def Func5(var, gvars, lvars):
"""Chained get with default values"""
for i in IterationList:
- x = lvars.get(var,gvars.get(var,''))
+ x = lvars.get(var, gvars.get(var, ''))
# Data to pass to the functions on each run. Each entry is a