diff options
author | Eric Snow <ericsnowcurrently@gmail.com> | 2019-10-19 02:00:04 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-10-19 02:00:04 (GMT) |
commit | e4c431ecf50def40eb93c3969c1e4eeaf7bf32f1 (patch) | |
tree | 071224bbded262901b9742eb82c5d82d2f744fe1 /Tools/c-analyzer/c_analyzer/variables | |
parent | ea55c51bd937f6019c35b39b87029644e469c059 (diff) | |
download | cpython-e4c431ecf50def40eb93c3969c1e4eeaf7bf32f1.zip cpython-e4c431ecf50def40eb93c3969c1e4eeaf7bf32f1.tar.gz cpython-e4c431ecf50def40eb93c3969c1e4eeaf7bf32f1.tar.bz2 |
bpo-36876: Re-organize the c-analyzer tool code. (gh-16841)
This is partly a cleanup of the code. It also is preparation for getting the variables from the source (cross-platform) rather than from the symbols.
The change only touches the tool (and its tests).
Diffstat (limited to 'Tools/c-analyzer/c_analyzer/variables')
-rw-r--r-- | Tools/c-analyzer/c_analyzer/variables/__init__.py | 0 | ||||
-rw-r--r-- | Tools/c-analyzer/c_analyzer/variables/find.py | 75 | ||||
-rw-r--r-- | Tools/c-analyzer/c_analyzer/variables/info.py | 93 | ||||
-rw-r--r-- | Tools/c-analyzer/c_analyzer/variables/known.py | 91 |
4 files changed, 259 insertions, 0 deletions
diff --git a/Tools/c-analyzer/c_analyzer/variables/__init__.py b/Tools/c-analyzer/c_analyzer/variables/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/Tools/c-analyzer/c_analyzer/variables/__init__.py diff --git a/Tools/c-analyzer/c_analyzer/variables/find.py b/Tools/c-analyzer/c_analyzer/variables/find.py new file mode 100644 index 0000000..3fe7284 --- /dev/null +++ b/Tools/c-analyzer/c_analyzer/variables/find.py @@ -0,0 +1,75 @@ +from ..common import files +from ..common.info import UNKNOWN +from ..parser import ( + find as p_find, + ) +from ..symbols import ( + info as s_info, + find as s_find, + ) +from .info import Variable + +# XXX need tests: +# * vars_from_source + + +def _remove_cached(cache, var): + if not cache: + return + try: + cached = cache[var.filename] + cached.remove(var) + except (KeyError, IndexError): + pass + + +def vars_from_binary(binfile, *, + known=None, + filenames=None, + handle_id=None, + check_filename=None, + handle_var=Variable.from_id, + _iter_vars=s_find.variables, + _get_symbol_resolver=s_find.get_resolver, + ): + """Yield a Variable for each found Symbol. + + Details are filled in from the given "known" variables and types. + """ + cache = {} + resolve = _get_symbol_resolver(filenames, known, + handle_var=handle_var, + check_filename=check_filename, + perfilecache=cache, + ) + for var, symbol in _iter_vars(binfile, + resolve=resolve, + handle_id=handle_id, + ): + if var is None: + var = Variable(symbol.id, UNKNOWN, UNKNOWN) + yield var + _remove_cached(cache, var) + + +def vars_from_source(filenames, *, + preprocessed=None, + known=None, + handle_id=None, + handle_var=Variable.from_id, + iter_vars=p_find.variables, + ): + """Yield a Variable for each declaration in the raw source code. + + Details are filled in from the given "known" variables and types. + """ + cache = {} + for varid, decl in iter_vars(filenames or (), + perfilecache=cache, + preprocessed=preprocessed, + known=known, + handle_id=handle_id, + ): + var = handle_var(varid, decl) + yield var + _remove_cached(cache, var) diff --git a/Tools/c-analyzer/c_analyzer/variables/info.py b/Tools/c-analyzer/c_analyzer/variables/info.py new file mode 100644 index 0000000..336a523 --- /dev/null +++ b/Tools/c-analyzer/c_analyzer/variables/info.py @@ -0,0 +1,93 @@ +from collections import namedtuple + +from ..common.info import ID, UNKNOWN +from ..common.util import classonly, _NTBase + + +def normalize_vartype(vartype): + """Return the canonical form for a variable type (or func signature).""" + # We allow empty strring through for semantic reasons. + if vartype is None: + return None + + # XXX finish! + # XXX Return (modifiers, type, pointer)? + return str(vartype) + + +# XXX Variable.vartype -> decl (Declaration). + +class Variable(_NTBase, + namedtuple('Variable', 'id storage vartype')): + """Information about a single variable declaration.""" + + __slots__ = () + + STORAGE = ( + 'static', + 'extern', + 'implicit', + 'local', + ) + + @classonly + def from_parts(cls, filename, funcname, name, decl, storage=None): + varid = ID(filename, funcname, name) + if storage is None: + self = cls.from_id(varid, decl) + else: + self = cls(varid, storage, decl) + return self + + @classonly + def from_id(cls, varid, decl): + from ..parser.declarations import extract_storage + storage = extract_storage(decl, infunc=varid.funcname) + return cls(varid, storage, decl) + + def __new__(cls, id, storage, vartype): + self = super().__new__( + cls, + id=ID.from_raw(id), + storage=str(storage) if storage else None, + vartype=normalize_vartype(vartype) if vartype else None, + ) + return self + + def __hash__(self): + return hash(self.id) + + def __getattr__(self, name): + return getattr(self.id, name) + + def _validate_id(self): + if not self.id: + raise TypeError('missing id') + + if not self.filename or self.filename == UNKNOWN: + raise TypeError(f'id missing filename ({self.id})') + + if self.funcname and self.funcname == UNKNOWN: + raise TypeError(f'id missing funcname ({self.id})') + + self.id.validate() + + def validate(self): + """Fail if the object is invalid (i.e. init with bad data).""" + self._validate_id() + + if self.storage is None or self.storage == UNKNOWN: + raise TypeError('missing storage') + elif self.storage not in self.STORAGE: + raise ValueError(f'unsupported storage {self.storage:r}') + + if self.vartype is None or self.vartype == UNKNOWN: + raise TypeError('missing vartype') + + @property + def isglobal(self): + return self.storage != 'local' + + @property + def isconst(self): + return 'const' in self.vartype.split() diff --git a/Tools/c-analyzer/c_analyzer/variables/known.py b/Tools/c-analyzer/c_analyzer/variables/known.py new file mode 100644 index 0000000..aa2934a --- /dev/null +++ b/Tools/c-analyzer/c_analyzer/variables/known.py @@ -0,0 +1,91 @@ +import csv + +from ..common.info import ID, UNKNOWN +from ..common.util import read_tsv +from .info import Variable + + +# XXX need tests: +# * read_file() +# * look_up_variable() + + +COLUMNS = ('filename', 'funcname', 'name', 'kind', 'declaration') +HEADER = '\t'.join(COLUMNS) + + +def read_file(infile, *, + _read_tsv=read_tsv, + ): + """Yield (kind, id, decl) for each row in the data file. + + The caller is responsible for validating each row. + """ + for row in _read_tsv(infile, HEADER): + filename, funcname, name, kind, declaration = row + if not funcname or funcname == '-': + funcname = None + id = ID(filename, funcname, name) + yield kind, id, declaration + + +def from_file(infile, *, + handle_var=Variable.from_id, + _read_file=read_file, + ): + """Return the info for known declarations in the given file.""" + known = { + 'variables': {}, + #'types': {}, + #'constants': {}, + #'macros': {}, + } + for kind, id, decl in _read_file(infile): + if kind == 'variable': + values = known['variables'] + value = handle_var(id, decl) + else: + raise ValueError(f'unsupported kind in row {row}') + value.validate() + values[id] = value + return known + + +def look_up_variable(varid, knownvars, *, + match_files=(lambda f1, f2: f1 == f2), + ): + """Return the known Variable matching the given ID. + + "knownvars" is a mapping of ID to Variable. + + "match_files" is used to verify if two filenames point to + the same file. + + If no match is found then None is returned. + """ + if not knownvars: + return None + + if varid.funcname == UNKNOWN: + if not varid.filename or varid.filename == UNKNOWN: + for varid in knownvars: + if not varid.funcname: + continue + if varid.name == varid.name: + return knownvars[varid] + else: + return None + else: + for varid in knownvars: + if not varid.funcname: + continue + if not match_files(varid.filename, varid.filename): + continue + if varid.name == varid.name: + return knownvars[varid] + else: + return None + elif not varid.filename or varid.filename == UNKNOWN: + raise NotImplementedError + else: + return knownvars.get(varid.id) |