summaryrefslogtreecommitdiffstats
path: root/Tools
diff options
context:
space:
mode:
Diffstat (limited to 'Tools')
-rw-r--r--Tools/c-analyzer/c-globals.py2
-rw-r--r--Tools/c-analyzer/c_analyzer/__init__.py (renamed from Tools/c-analyzer/c_symbols/__init__.py)0
-rw-r--r--Tools/c-analyzer/c_analyzer/common/__init__.py (renamed from Tools/c-analyzer/c_parser/__init__.py)0
-rw-r--r--Tools/c-analyzer/c_analyzer/common/files.py (renamed from Tools/c-analyzer/c_analyzer_common/files.py)26
-rw-r--r--Tools/c-analyzer/c_analyzer/common/info.py138
-rw-r--r--Tools/c-analyzer/c_analyzer/common/show.py11
-rw-r--r--Tools/c-analyzer/c_analyzer/common/util.py (renamed from Tools/c-analyzer/c_analyzer_common/util.py)0
-rw-r--r--Tools/c-analyzer/c_analyzer/parser/__init__.py (renamed from Tools/c-analyzer/c_globals/__init__.py)0
-rw-r--r--Tools/c-analyzer/c_analyzer/parser/declarations.py (renamed from Tools/c-analyzer/c_parser/declarations.py)70
-rw-r--r--Tools/c-analyzer/c_analyzer/parser/find.py107
-rw-r--r--Tools/c-analyzer/c_analyzer/parser/naive.py (renamed from Tools/c-analyzer/c_parser/naive.py)41
-rw-r--r--Tools/c-analyzer/c_analyzer/parser/preprocessor.py (renamed from Tools/c-analyzer/c_parser/preprocessor.py)3
-rw-r--r--Tools/c-analyzer/c_analyzer/parser/source.py (renamed from Tools/c-analyzer/c_parser/source.py)0
-rw-r--r--Tools/c-analyzer/c_analyzer/symbols/__init__.py0
-rw-r--r--Tools/c-analyzer/c_analyzer/symbols/_nm.py (renamed from Tools/c-analyzer/c_symbols/binary.py)104
-rw-r--r--Tools/c-analyzer/c_analyzer/symbols/find.py175
-rw-r--r--Tools/c-analyzer/c_analyzer/symbols/info.py (renamed from Tools/c-analyzer/c_symbols/info.py)4
-rw-r--r--Tools/c-analyzer/c_analyzer/variables/__init__.py0
-rw-r--r--Tools/c-analyzer/c_analyzer/variables/find.py75
-rw-r--r--Tools/c-analyzer/c_analyzer/variables/info.py (renamed from Tools/c-analyzer/c_parser/info.py)49
-rw-r--r--Tools/c-analyzer/c_analyzer/variables/known.py91
-rw-r--r--Tools/c-analyzer/c_analyzer_common/__init__.py19
-rw-r--r--Tools/c-analyzer/c_analyzer_common/info.py69
-rw-r--r--Tools/c-analyzer/c_analyzer_common/known.py74
-rw-r--r--Tools/c-analyzer/c_globals/find.py95
-rw-r--r--Tools/c-analyzer/c_globals/show.py16
-rw-r--r--Tools/c-analyzer/c_symbols/resolve.py147
-rw-r--r--Tools/c-analyzer/c_symbols/source.py58
-rw-r--r--Tools/c-analyzer/cpython/README (renamed from Tools/c-analyzer/c_globals/README)0
-rw-r--r--Tools/c-analyzer/cpython/__init__.py29
-rw-r--r--Tools/c-analyzer/cpython/__main__.py (renamed from Tools/c-analyzer/c_globals/__main__.py)109
-rw-r--r--Tools/c-analyzer/cpython/_generate.py (renamed from Tools/c-analyzer/c_analyzer_common/_generate.py)13
-rw-r--r--Tools/c-analyzer/cpython/files.py29
-rw-r--r--Tools/c-analyzer/cpython/find.py101
-rw-r--r--Tools/c-analyzer/cpython/known.py66
-rw-r--r--Tools/c-analyzer/cpython/supported.py (renamed from Tools/c-analyzer/c_globals/supported.py)15
36 files changed, 1030 insertions, 706 deletions
diff --git a/Tools/c-analyzer/c-globals.py b/Tools/c-analyzer/c-globals.py
index 9afe059..b36b791 100644
--- a/Tools/c-analyzer/c-globals.py
+++ b/Tools/c-analyzer/c-globals.py
@@ -1,6 +1,6 @@
# This is a script equivalent of running "python -m test.test_c_globals.cg".
-from c_globals.__main__ import parse_args, main
+from cpython.__main__ import parse_args, main
# This is effectively copied from cg/__main__.py:
diff --git a/Tools/c-analyzer/c_symbols/__init__.py b/Tools/c-analyzer/c_analyzer/__init__.py
index e69de29..e69de29 100644
--- a/Tools/c-analyzer/c_symbols/__init__.py
+++ b/Tools/c-analyzer/c_analyzer/__init__.py
diff --git a/Tools/c-analyzer/c_parser/__init__.py b/Tools/c-analyzer/c_analyzer/common/__init__.py
index e69de29..e69de29 100644
--- a/Tools/c-analyzer/c_parser/__init__.py
+++ b/Tools/c-analyzer/c_analyzer/common/__init__.py
diff --git a/Tools/c-analyzer/c_analyzer_common/files.py b/Tools/c-analyzer/c_analyzer/common/files.py
index b3cd16c..ab551a8 100644
--- a/Tools/c-analyzer/c_analyzer_common/files.py
+++ b/Tools/c-analyzer/c_analyzer/common/files.py
@@ -2,7 +2,10 @@ import glob
import os
import os.path
-from . import SOURCE_DIRS, REPO_ROOT
+# XXX need tests:
+# * walk_tree()
+# * glob_tree()
+# * iter_files_by_suffix()
C_SOURCE_SUFFIXES = ('.c', '.h')
@@ -115,24 +118,3 @@ def iter_files_by_suffix(root, suffixes, relparent=None, *,
# XXX Ignore repeated suffixes?
for suffix in suffixes:
yield from _iter_files(root, suffix, relparent)
-
-
-def iter_cpython_files(*,
- walk=walk_tree,
- _files=iter_files_by_suffix,
- ):
- """Yield each file in the tree for each of the given directory names."""
- excludedtrees = [
- os.path.join('Include', 'cpython', ''),
- ]
- def is_excluded(filename):
- for root in excludedtrees:
- if filename.startswith(root):
- return True
- return False
- for filename in _files(SOURCE_DIRS, C_SOURCE_SUFFIXES, REPO_ROOT,
- walk=walk,
- ):
- if is_excluded(filename):
- continue
- yield filename
diff --git a/Tools/c-analyzer/c_analyzer/common/info.py b/Tools/c-analyzer/c_analyzer/common/info.py
new file mode 100644
index 0000000..3f3f8c5
--- /dev/null
+++ b/Tools/c-analyzer/c_analyzer/common/info.py
@@ -0,0 +1,138 @@
+from collections import namedtuple
+import re
+
+from .util import classonly, _NTBase
+
+# XXX need tests:
+# * ID.match()
+
+
+UNKNOWN = '???'
+
+NAME_RE = re.compile(r'^([a-zA-Z]|_\w*[a-zA-Z]\w*|[a-zA-Z]\w*)$')
+
+
+class ID(_NTBase, namedtuple('ID', 'filename funcname name')):
+ """A unique ID for a single symbol or declaration."""
+
+ __slots__ = ()
+ # XXX Add optional conditions (tuple of strings) field.
+ #conditions = Slot()
+
+ @classonly
+ def from_raw(cls, raw):
+ if not raw:
+ return None
+ if isinstance(raw, str):
+ return cls(None, None, raw)
+ try:
+ name, = raw
+ filename = None
+ except ValueError:
+ try:
+ filename, name = raw
+ except ValueError:
+ return super().from_raw(raw)
+ return cls(filename, None, name)
+
+ def __new__(cls, filename, funcname, name):
+ self = super().__new__(
+ cls,
+ filename=str(filename) if filename else None,
+ funcname=str(funcname) if funcname else None,
+ name=str(name) if name else None,
+ )
+ #cls.conditions.set(self, tuple(str(s) if s else None
+ # for s in conditions or ()))
+ return self
+
+ def validate(self):
+ """Fail if the object is invalid (i.e. init with bad data)."""
+ if not self.name:
+ raise TypeError('missing name')
+ else:
+ if not NAME_RE.match(self.name):
+ raise ValueError(
+ f'name must be an identifier, got {self.name!r}')
+
+ # Symbols from a binary might not have filename/funcname info.
+
+ if self.funcname:
+ if not self.filename:
+ raise TypeError('missing filename')
+ if not NAME_RE.match(self.funcname) and self.funcname != UNKNOWN:
+ raise ValueError(
+ f'name must be an identifier, got {self.funcname!r}')
+
+ # XXX Require the filename (at least UNKONWN)?
+ # XXX Check the filename?
+
+ @property
+ def islocal(self):
+ return self.funcname is not None
+
+ def match(self, other, *,
+ match_files=(lambda f1, f2: f1 == f2),
+ ):
+ """Return True if the two match.
+
+ At least one of the two must be completely valid (no UNKNOWN
+ anywhere). Otherwise False is returned. The remaining one
+ *may* have UNKNOWN for both funcname and filename. It must
+ have a valid name though.
+
+ The caller is responsible for knowing which of the two is valid
+ (and which to use if both are valid).
+ """
+ # First check the name.
+ if self.name is None:
+ return False
+ if other.name != self.name:
+ return False
+
+ # Then check the filename.
+ if self.filename is None:
+ return False
+ if other.filename is None:
+ return False
+ if self.filename == UNKNOWN:
+ # "other" must be the valid one.
+ if other.funcname == UNKNOWN:
+ return False
+ elif self.funcname != UNKNOWN:
+ # XXX Try matching funcname even though we don't
+ # know the filename?
+ raise NotImplementedError
+ else:
+ return True
+ elif other.filename == UNKNOWN:
+ # "self" must be the valid one.
+ if self.funcname == UNKNOWN:
+ return False
+ elif other.funcname != UNKNOWN:
+ # XXX Try matching funcname even though we don't
+ # know the filename?
+ raise NotImplementedError
+ else:
+ return True
+ elif not match_files(self.filename, other.filename):
+ return False
+
+ # Finally, check the funcname.
+ if self.funcname == UNKNOWN:
+ # "other" must be the valid one.
+ if other.funcname == UNKNOWN:
+ return False
+ else:
+ return other.funcname is not None
+ elif other.funcname == UNKNOWN:
+ # "self" must be the valid one.
+ if self.funcname == UNKNOWN:
+ return False
+ else:
+ return self.funcname is not None
+ elif self.funcname == other.funcname:
+ # Both are valid.
+ return True
+
+ return False
diff --git a/Tools/c-analyzer/c_analyzer/common/show.py b/Tools/c-analyzer/c_analyzer/common/show.py
new file mode 100644
index 0000000..5f3cb1c
--- /dev/null
+++ b/Tools/c-analyzer/c_analyzer/common/show.py
@@ -0,0 +1,11 @@
+
+def basic(variables, *,
+ _print=print):
+ """Print each row simply."""
+ for var in variables:
+ if var.funcname:
+ line = f'{var.filename}:{var.funcname}():{var.name}'
+ else:
+ line = f'{var.filename}:{var.name}'
+ line = f'{line:<64} {var.vartype}'
+ _print(line)
diff --git a/Tools/c-analyzer/c_analyzer_common/util.py b/Tools/c-analyzer/c_analyzer/common/util.py
index 43d0bb6..43d0bb6 100644
--- a/Tools/c-analyzer/c_analyzer_common/util.py
+++ b/Tools/c-analyzer/c_analyzer/common/util.py
diff --git a/Tools/c-analyzer/c_globals/__init__.py b/Tools/c-analyzer/c_analyzer/parser/__init__.py
index e69de29..e69de29 100644
--- a/Tools/c-analyzer/c_globals/__init__.py
+++ b/Tools/c-analyzer/c_analyzer/parser/__init__.py
diff --git a/Tools/c-analyzer/c_parser/declarations.py b/Tools/c-analyzer/c_analyzer/parser/declarations.py
index 19fa3ff..f37072c 100644
--- a/Tools/c-analyzer/c_parser/declarations.py
+++ b/Tools/c-analyzer/c_analyzer/parser/declarations.py
@@ -2,6 +2,8 @@ import re
import shlex
import subprocess
+from ..common.info import UNKNOWN
+
from . import source
@@ -194,7 +196,28 @@ def parse_func(stmt, body):
return name, signature
-def parse_var(stmt):
+#TYPE_SPEC = rf'''(?:
+# )'''
+#VAR_DECLARATOR = rf'''(?:
+# )'''
+#VAR_DECL = rf'''(?:
+# {TYPE_SPEC}+
+# {VAR_DECLARATOR}
+# \s*
+# )'''
+#VAR_DECLARATION = rf'''(?:
+# {VAR_DECL}
+# (?: = [^=] [^;]* )?
+# ;
+# )'''
+#
+#
+#def parse_variable(decl, *, inFunc=False):
+# """Return [(name, storage, vartype)] for the given variable declaration."""
+# ...
+
+
+def _parse_var(stmt):
"""Return (name, vartype) for the given variable declaration."""
stmt = stmt.rstrip(';')
m = LOCAL_STMT_START_RE.match(stmt)
@@ -220,6 +243,27 @@ def parse_var(stmt):
return name, vartype
+def extract_storage(decl, *, infunc=None):
+ """Return (storage, vartype) based on the given declaration.
+
+ The default storage is "implicit" (or "local" if infunc is True).
+ """
+ if decl == UNKNOWN:
+ return decl
+ if decl.startswith('static '):
+ return 'static'
+ #return 'static', decl.partition(' ')[2].strip()
+ elif decl.startswith('extern '):
+ return 'extern'
+ #return 'extern', decl.partition(' ')[2].strip()
+ elif re.match('.*\b(static|extern)\b', decl):
+ raise NotImplementedError
+ elif infunc:
+ return 'local'
+ else:
+ return 'implicit'
+
+
def parse_compound(stmt, blocks):
"""Return (headers, bodies) for the given compound statement."""
# XXX Identify declarations inside compound statements
@@ -228,14 +272,17 @@ def parse_compound(stmt, blocks):
def iter_variables(filename, *,
+ preprocessed=False,
_iter_source_lines=source.iter_lines,
_iter_global=iter_global_declarations,
_iter_local=iter_local_statements,
_parse_func=parse_func,
- _parse_var=parse_var,
+ _parse_var=_parse_var,
_parse_compound=parse_compound,
):
"""Yield (funcname, name, vartype) for every variable in the given file."""
+ if preprocessed:
+ raise NotImplementedError
lines = _iter_source_lines(filename)
for stmt, body in _iter_global(lines):
# At the file top-level we only have to worry about vars & funcs.
@@ -256,7 +303,7 @@ def iter_variables(filename, *,
def _iter_locals(lines, *,
_iter_statements=iter_local_statements,
- _parse_var=parse_var,
+ _parse_var=_parse_var,
_parse_compound=parse_compound,
):
compound = [lines]
@@ -278,18 +325,15 @@ def _iter_locals(lines, *,
compound.extend(bodies)
-def iter_all(dirnames):
+def iter_all(filename, *,
+ preprocessed=False,
+ ):
"""Yield a Declaration for each one found.
If there are duplicates, due to preprocessor conditionals, then
they are checked to make sure they are the same.
"""
- raise NotImplementedError
-
-
-def iter_preprocessed(dirnames):
- """Yield a Declaration for each one found.
-
- All source files are run through the preprocessor first.
- """
- raise NotImplementedError
+ # XXX For the moment we cheat.
+ for funcname, name, decl in iter_variables(filename,
+ preprocessed=preprocessed):
+ yield 'variable', funcname, name, decl
diff --git a/Tools/c-analyzer/c_analyzer/parser/find.py b/Tools/c-analyzer/c_analyzer/parser/find.py
new file mode 100644
index 0000000..3860d3d
--- /dev/null
+++ b/Tools/c-analyzer/c_analyzer/parser/find.py
@@ -0,0 +1,107 @@
+from ..common.info import UNKNOWN, ID
+
+from . import declarations
+
+# XXX need tests:
+# * variables
+# * variable
+# * variable_from_id
+
+
+def _iter_vars(filenames, preprocessed, *,
+ handle_id=None,
+ _iter_decls=declarations.iter_all,
+ ):
+ if handle_id is None:
+ handle_id = ID
+
+ for filename in filenames or ():
+ for kind, funcname, name, decl in _iter_decls(filename,
+ preprocessed=preprocessed,
+ ):
+ if kind != 'variable':
+ continue
+ varid = handle_id(filename, funcname, name)
+ yield varid, decl
+
+
+# XXX Add a "handle_var" arg like we did for get_resolver()?
+
+def variables(*filenames,
+ perfilecache=None,
+ preprocessed=False,
+ known=None, # for types
+ handle_id=None,
+ _iter_vars=_iter_vars,
+ ):
+ """Yield (varid, decl) for each variable found in the given files.
+
+ If "preprocessed" is provided (and not False/None) then it is used
+ to decide which tool to use to parse the source code after it runs
+ through the C preprocessor. Otherwise the raw
+ """
+ if len(filenames) == 1 and not (filenames[0], str):
+ filenames, = filenames
+
+ if perfilecache is None:
+ yield from _iter_vars(filenames, preprocessed)
+ else:
+ # XXX Cache per-file variables (e.g. `{filename: [(varid, decl)]}`).
+ raise NotImplementedError
+
+
+def variable(name, filenames, *,
+ local=False,
+ perfilecache=None,
+ preprocessed=False,
+ handle_id=None,
+ _iter_vars=variables,
+ ):
+ """Return (varid, decl) for the first found variable that matches.
+
+ If "local" is True then the first matching local variable in the
+ file will always be returned. To avoid that, pass perfilecache and
+ pop each variable from the cache after using it.
+ """
+ for varid, decl in _iter_vars(filenames,
+ perfilecache=perfilecache,
+ preprocessed=preprocessed,
+ ):
+ if varid.name != name:
+ continue
+ if local:
+ if varid.funcname:
+ if varid.funcname == UNKNOWN:
+ raise NotImplementedError
+ return varid, decl
+ elif not varid.funcname:
+ return varid, decl
+ else:
+ return None, None # No matching variable was found.
+
+
+def variable_from_id(id, filenames, *,
+ perfilecache=None,
+ preprocessed=False,
+ handle_id=None,
+ _get_var=variable,
+ ):
+ """Return (varid, decl) for the first found variable that matches."""
+ local = False
+ if isinstance(id, str):
+ name = id
+ else:
+ if id.funcname == UNKNOWN:
+ local = True
+ elif id.funcname:
+ raise NotImplementedError
+
+ name = id.name
+ if id.filename and id.filename != UNKNOWN:
+ filenames = [id.filename]
+ return _get_var(name, filenames,
+ local=local,
+ perfilecache=perfilecache,
+ preprocessed=preprocessed,
+ handle_id=handle_id,
+ )
diff --git a/Tools/c-analyzer/c_parser/naive.py b/Tools/c-analyzer/c_analyzer/parser/naive.py
index 160f96c..4a4822d 100644
--- a/Tools/c-analyzer/c_parser/naive.py
+++ b/Tools/c-analyzer/c_analyzer/parser/naive.py
@@ -1,8 +1,7 @@
import re
-from c_analyzer_common.info import UNKNOWN
+from ..common.info import UNKNOWN, ID
-from .info import Variable
from .preprocessor import _iter_clean_lines
@@ -55,7 +54,7 @@ def parse_variable_declaration(srcline):
def parse_variable(srcline, funcname=None):
- """Return a Variable for the variable declared on the line (or None)."""
+ """Return (varid, decl) for the variable declared on the line (or None)."""
line = srcline.strip()
# XXX Handle more than just static variables.
@@ -74,7 +73,7 @@ def iter_variables(filename, *,
_get_srclines=get_srclines,
_default_parse_variable=parse_variable,
):
- """Yield a Variable for each in the given source file."""
+ """Yield (varid, decl) for each variable in the given source file."""
if parse_variable is None:
parse_variable = _default_parse_variable
@@ -99,13 +98,13 @@ def iter_variables(filename, *,
info = parse_variable(line, funcname)
if isinstance(info, list):
for name, _funcname, decl in info:
- yield Variable.from_parts(filename, _funcname, name, decl)
+ yield ID(filename, _funcname, name), decl
continue
name, decl = info
if name is None:
continue
- yield Variable.from_parts(filename, funcname, name, decl)
+ yield ID(filename, funcname, name), decl
def _match_varid(variable, name, funcname, ignored=None):
@@ -134,12 +133,12 @@ def find_variable(filename, funcname, name, *,
Return None if the variable is not found.
"""
- for variable in _iter_variables(filename,
+ for varid, decl in _iter_variables(filename,
srccache=srccache,
parse_variable=parse_variable,
):
- if _match_varid(variable, name, funcname, ignored):
- return variable
+ if _match_varid(varid, name, funcname, ignored):
+ return varid, decl
else:
return None
@@ -149,10 +148,10 @@ def find_variables(varids, filenames=None, *,
parse_variable=None,
_find_symbol=find_variable,
):
- """Yield a Variable for each ID.
+ """Yield (varid, decl) for each ID.
If the variable is not found then its decl will be UNKNOWN. That
- way there will be one resulting Variable per given ID.
+ way there will be one resulting variable per given ID.
"""
if srccache is _NOT_SET:
srccache = {}
@@ -163,18 +162,18 @@ def find_variables(varids, filenames=None, *,
srcfiles = [varid.filename]
else:
if not filenames:
- yield Variable(varid, UNKNOWN, UNKNOWN)
+ yield varid, UNKNOWN
continue
srcfiles = filenames
for filename in srcfiles:
- found = _find_varid(filename, varid.funcname, varid.name,
- ignored=used,
- srccache=srccache,
- parse_variable=parse_variable,
- )
- if found:
- yield found
- used.add(found)
+ varid, decl = _find_varid(filename, varid.funcname, varid.name,
+ ignored=used,
+ srccache=srccache,
+ parse_variable=parse_variable,
+ )
+ if varid:
+ yield varid, decl
+ used.add(varid)
break
else:
- yield Variable(varid, UNKNOWN, UNKNOWN)
+ yield varid, UNKNOWN
diff --git a/Tools/c-analyzer/c_parser/preprocessor.py b/Tools/c-analyzer/c_analyzer/parser/preprocessor.py
index 0e2866e..41f306e 100644
--- a/Tools/c-analyzer/c_parser/preprocessor.py
+++ b/Tools/c-analyzer/c_analyzer/parser/preprocessor.py
@@ -3,8 +3,7 @@ import shlex
import os
import re
-from c_analyzer_common import util
-from . import info
+from ..common import util, info
CONTINUATION = '\\' + os.linesep
diff --git a/Tools/c-analyzer/c_parser/source.py b/Tools/c-analyzer/c_analyzer/parser/source.py
index f8998c8..f8998c8 100644
--- a/Tools/c-analyzer/c_parser/source.py
+++ b/Tools/c-analyzer/c_analyzer/parser/source.py
diff --git a/Tools/c-analyzer/c_analyzer/symbols/__init__.py b/Tools/c-analyzer/c_analyzer/symbols/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/Tools/c-analyzer/c_analyzer/symbols/__init__.py
diff --git a/Tools/c-analyzer/c_symbols/binary.py b/Tools/c-analyzer/c_analyzer/symbols/_nm.py
index e125dbd5..f3a75a6 100644
--- a/Tools/c-analyzer/c_symbols/binary.py
+++ b/Tools/c-analyzer/c_analyzer/symbols/_nm.py
@@ -1,46 +1,24 @@
-import os
import os.path
import shutil
-import sys
-
-from c_analyzer_common import util, info
-from . import source
-from .info import Symbol
-
-
-#PYTHON = os.path.join(REPO_ROOT, 'python')
-PYTHON = sys.executable
+from c_analyzer.common import util, info
-def iter_symbols(binary=PYTHON, dirnames=None, *,
- # Alternately, use look_up_known_symbol()
- # from c_globals.supported.
- find_local_symbol=source.find_symbol,
- _file_exists=os.path.exists,
- _iter_symbols_nm=(lambda b, *a: _iter_symbols_nm(b, *a)),
- ):
- """Yield a Symbol for each symbol found in the binary."""
- if not _file_exists(binary):
- raise Exception('executable missing (need to build it first?)')
-
- if find_local_symbol:
- cache = {}
- def find_local_symbol(name, *, _find=find_local_symbol):
- return _find(name, dirnames, _perfilecache=cache)
- else:
- find_local_symbol = None
+from .info import Symbol
- if os.name == 'nt':
- # XXX Support this.
- raise NotImplementedError
- else:
- yield from _iter_symbols_nm(binary, find_local_symbol)
+# XXX need tests:
+# * iter_symbols
-#############################
-# binary format (e.g. ELF)
+NM_KINDS = {
+ 'b': Symbol.KIND.VARIABLE, # uninitialized
+ 'd': Symbol.KIND.VARIABLE, # initialized
+ #'g': Symbol.KIND.VARIABLE, # uninitialized
+ #'s': Symbol.KIND.VARIABLE, # initialized
+ 't': Symbol.KIND.FUNCTION,
+ }
SPECIAL_SYMBOLS = {
+ # binary format (e.g. ELF)
'__bss_start',
'__data_start',
'__dso_handle',
@@ -63,29 +41,23 @@ def _is_special_symbol(name):
return False
-#############################
-# "nm"
-
-NM_KINDS = {
- 'b': Symbol.KIND.VARIABLE, # uninitialized
- 'd': Symbol.KIND.VARIABLE, # initialized
- #'g': Symbol.KIND.VARIABLE, # uninitialized
- #'s': Symbol.KIND.VARIABLE, # initialized
- 't': Symbol.KIND.FUNCTION,
- }
-
+def iter_symbols(binfile, *,
+ nm=None,
+ handle_id=None,
+ _which=shutil.which,
+ _run=util.run_cmd,
+ ):
+ """Yield a Symbol for each relevant entry reported by the "nm" command."""
+ if nm is None:
+ nm = _which('nm')
+ if not nm:
+ raise NotImplementedError
+ if handle_id is None:
+ handle_id = info.ID
-def _iter_symbols_nm(binary, find_local_symbol=None,
- *,
- _which=shutil.which,
- _run=util.run_cmd,
- ):
- nm = _which('nm')
- if not nm:
- raise NotImplementedError
argv = [nm,
'--line-numbers',
- binary,
+ binfile,
]
try:
output = _run(argv)
@@ -95,23 +67,20 @@ def _iter_symbols_nm(binary, find_local_symbol=None,
raise NotImplementedError
raise
for line in output.splitlines():
- (name, kind, external, filename, funcname, vartype,
- ) = _parse_nm_line(line,
- _find_local_symbol=find_local_symbol,
- )
+ (name, kind, external, filename, funcname,
+ ) = _parse_nm_line(line)
if kind != Symbol.KIND.VARIABLE:
continue
elif _is_special_symbol(name):
continue
- assert vartype is None
yield Symbol(
- id=(filename, funcname, name),
+ id=handle_id(filename, funcname, name),
kind=kind,
external=external,
)
-def _parse_nm_line(line, *, _find_local_symbol=None):
+def _parse_nm_line(line):
_origline = line
_, _, line = line.partition(' ') # strip off the address
line = line.strip()
@@ -128,18 +97,9 @@ def _parse_nm_line(line, *, _find_local_symbol=None):
else:
filename = info.UNKNOWN
- vartype = None
name, islocal = _parse_nm_name(name, kind)
- if islocal:
- funcname = info.UNKNOWN
- if _find_local_symbol is not None:
- filename, funcname, vartype = _find_local_symbol(name)
- filename = filename or info.UNKNOWN
- funcname = funcname or info.UNKNOWN
- else:
- funcname = None
- # XXX fine filename and vartype?
- return name, kind, external, filename, funcname, vartype
+ funcname = info.UNKNOWN if islocal else None
+ return name, kind, external, filename, funcname
def _parse_nm_name(name, kind):
diff --git a/Tools/c-analyzer/c_analyzer/symbols/find.py b/Tools/c-analyzer/c_analyzer/symbols/find.py
new file mode 100644
index 0000000..8564652
--- /dev/null
+++ b/Tools/c-analyzer/c_analyzer/symbols/find.py
@@ -0,0 +1,175 @@
+import os
+import os.path
+import shutil
+
+from ..common import files
+from ..common.info import UNKNOWN, ID
+from ..parser import find as p_find
+
+from . import _nm
+from .info import Symbol
+
+# XXX need tests:
+# * get_resolver()
+# * get_resolver_from_dirs()
+# * symbol()
+# * symbols()
+# * variables()
+
+
+def _resolve_known(symbol, knownvars):
+ for varid in knownvars:
+ if symbol.match(varid):
+ break
+ else:
+ return None
+ return knownvars.pop(varid)
+
+
+def get_resolver(filenames=None, known=None, *,
+ handle_var,
+ check_filename=None,
+ perfilecache=None,
+ preprocessed=False,
+ _from_source=p_find.variable_from_id,
+ ):
+ """Return a "resolver" func for the given known vars/types and filenames.
+
+ "handle_var" is a callable that takes (ID, decl) and returns a
+ Variable. Variable.from_id is a suitable callable.
+
+ The returned func takes a single Symbol and returns a corresponding
+ Variable. If the symbol was located then the variable will be
+ valid, populated with the corresponding information. Otherwise None
+ is returned.
+ """
+ knownvars = (known or {}).get('variables')
+ if knownvars:
+ knownvars = dict(knownvars) # a copy
+ if filenames:
+ if check_filename is None:
+ filenames = list(filenames)
+ def check_filename(filename):
+ return filename in filenames
+ def resolve(symbol):
+ # XXX Check "found" instead?
+ if not check_filename(symbol.filename):
+ return None
+ found = _resolve_known(symbol, knownvars)
+ if found is None:
+ #return None
+ varid, decl = _from_source(symbol, filenames,
+ perfilecache=perfilecache,
+ preprocessed=preprocessed,
+ )
+ found = handle_var(varid, decl)
+ return found
+ else:
+ def resolve(symbol):
+ return _resolve_known(symbol, knownvars)
+ elif filenames:
+ def resolve(symbol):
+ varid, decl = _from_source(symbol, filenames,
+ perfilecache=perfilecache,
+ preprocessed=preprocessed,
+ )
+ return handle_var(varid, decl)
+ else:
+ def resolve(symbol):
+ return None
+ return resolve
+
+
+def get_resolver_from_dirs(dirnames, known=None, *,
+ handle_var,
+ suffixes=('.c',),
+ perfilecache=None,
+ preprocessed=False,
+ _iter_files=files.iter_files_by_suffix,
+ _get_resolver=get_resolver,
+ ):
+ """Return a "resolver" func for the given known vars/types and filenames.
+
+ "dirnames" should be absolute paths. If not then they will be
+ resolved relative to CWD.
+
+ See get_resolver().
+ """
+ dirnames = [d if d.endswith(os.path.sep) else d + os.path.sep
+ for d in dirnames]
+ filenames = _iter_files(dirnames, suffixes)
+ def check_filename(filename):
+ for dirname in dirnames:
+ if filename.startswith(dirname):
+ return True
+ else:
+ return False
+ return _get_resolver(filenames, known,
+ handle_var=handle_var,
+ check_filename=check_filename,
+ perfilecache=perfilecache,
+ preprocessed=preprocessed,
+ )
+
+
+def symbol(symbol, filenames, known=None, *,
+ perfilecache=None,
+ preprocessed=False,
+ handle_id=None,
+ _get_resolver=get_resolver,
+ ):
+ """Return a Variable for the one matching the given symbol.
+
+ "symbol" can be one of several objects:
+
+ * Symbol - use the contained info
+ * name (str) - look for a global variable with that name
+ * (filename, name) - look for named global in file
+ * (filename, funcname, name) - look for named local in file
+
+ A name is always required. If the filename is None, "", or
+ "UNKNOWN" then all files will be searched. If the funcname is
+ "" or "UNKNOWN" then only local variables will be searched for.
+ """
+ resolve = _get_resolver(known, filenames,
+ handle_id=handle_id,
+ perfilecache=perfilecache,
+ preprocessed=preprocessed,
+ )
+ return resolve(symbol)
+
+
+def _get_platform_tool():
+ if os.name == 'nt':
+ # XXX Support this.
+ raise NotImplementedError
+ elif nm := shutil.which('nm'):
+ return lambda b, hi: _nm.iter_symbols(b, nm=nm, handle_id=hi)
+ else:
+ raise NotImplementedError
+
+
+def symbols(binfile, *,
+ handle_id=None,
+ _file_exists=os.path.exists,
+ _get_platform_tool=_get_platform_tool,
+ ):
+ """Yield a Symbol for each one found in the binary."""
+ if not _file_exists(binfile):
+ raise Exception('executable missing (need to build it first?)')
+
+ _iter_symbols = _get_platform_tool()
+ yield from _iter_symbols(binfile, handle_id)
+
+
+def variables(binfile, *,
+ resolve,
+ handle_id=None,
+ _iter_symbols=symbols,
+ ):
+ """Yield (Variable, Symbol) for each found symbol."""
+ for symbol in _iter_symbols(binfile, handle_id=handle_id):
+ if symbol.kind != Symbol.KIND.VARIABLE:
+ continue
+ var = resolve(symbol) or None
+ yield var, symbol
diff --git a/Tools/c-analyzer/c_symbols/info.py b/Tools/c-analyzer/c_analyzer/symbols/info.py
index f6ed52c..96a251a 100644
--- a/Tools/c-analyzer/c_symbols/info.py
+++ b/Tools/c-analyzer/c_analyzer/symbols/info.py
@@ -1,7 +1,7 @@
from collections import namedtuple
-from c_analyzer_common.info import ID
-from c_analyzer_common.util import classonly, _NTBase
+from c_analyzer.common.info import ID
+from c_analyzer.common.util import classonly, _NTBase
class Symbol(_NTBase, namedtuple('Symbol', 'id kind external')):
diff --git a/Tools/c-analyzer/c_analyzer/variables/__init__.py b/Tools/c-analyzer/c_analyzer/variables/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/Tools/c-analyzer/c_analyzer/variables/__init__.py
diff --git a/Tools/c-analyzer/c_analyzer/variables/find.py b/Tools/c-analyzer/c_analyzer/variables/find.py
new file mode 100644
index 0000000..3fe7284
--- /dev/null
+++ b/Tools/c-analyzer/c_analyzer/variables/find.py
@@ -0,0 +1,75 @@
+from ..common import files
+from ..common.info import UNKNOWN
+from ..parser import (
+ find as p_find,
+ )
+from ..symbols import (
+ info as s_info,
+ find as s_find,
+ )
+from .info import Variable
+
+# XXX need tests:
+# * vars_from_source
+
+
+def _remove_cached(cache, var):
+ if not cache:
+ return
+ try:
+ cached = cache[var.filename]
+ cached.remove(var)
+ except (KeyError, IndexError):
+ pass
+
+
+def vars_from_binary(binfile, *,
+ known=None,
+ filenames=None,
+ handle_id=None,
+ check_filename=None,
+ handle_var=Variable.from_id,
+ _iter_vars=s_find.variables,
+ _get_symbol_resolver=s_find.get_resolver,
+ ):
+ """Yield a Variable for each found Symbol.
+
+ Details are filled in from the given "known" variables and types.
+ """
+ cache = {}
+ resolve = _get_symbol_resolver(filenames, known,
+ handle_var=handle_var,
+ check_filename=check_filename,
+ perfilecache=cache,
+ )
+ for var, symbol in _iter_vars(binfile,
+ resolve=resolve,
+ handle_id=handle_id,
+ ):
+ if var is None:
+ var = Variable(symbol.id, UNKNOWN, UNKNOWN)
+ yield var
+ _remove_cached(cache, var)
+
+
+def vars_from_source(filenames, *,
+ preprocessed=None,
+ known=None,
+ handle_id=None,
+ handle_var=Variable.from_id,
+ iter_vars=p_find.variables,
+ ):
+ """Yield a Variable for each declaration in the raw source code.
+
+ Details are filled in from the given "known" variables and types.
+ """
+ cache = {}
+ for varid, decl in iter_vars(filenames or (),
+ perfilecache=cache,
+ preprocessed=preprocessed,
+ known=known,
+ handle_id=handle_id,
+ ):
+ var = handle_var(varid, decl)
+ yield var
+ _remove_cached(cache, var)
diff --git a/Tools/c-analyzer/c_parser/info.py b/Tools/c-analyzer/c_analyzer/variables/info.py
index a4e32d7..336a523 100644
--- a/Tools/c-analyzer/c_parser/info.py
+++ b/Tools/c-analyzer/c_analyzer/variables/info.py
@@ -1,8 +1,7 @@
from collections import namedtuple
-import re
-from c_analyzer_common import info, util
-from c_analyzer_common.util import classonly, _NTBase
+from ..common.info import ID, UNKNOWN
+from ..common.util import classonly, _NTBase
def normalize_vartype(vartype):
@@ -16,26 +15,7 @@ def normalize_vartype(vartype):
return str(vartype)
-def extract_storage(decl, *, isfunc=False):
- """Return (storage, vartype) based on the given declaration.
-
- The default storage is "implicit" or "local".
- """
- if decl == info.UNKNOWN:
- return decl, decl
- if decl.startswith('static '):
- return 'static', decl
- #return 'static', decl.partition(' ')[2].strip()
- elif decl.startswith('extern '):
- return 'extern', decl
- #return 'extern', decl.partition(' ')[2].strip()
- elif re.match('.*\b(static|extern)\b', decl):
- raise NotImplementedError
- elif isfunc:
- return 'local', decl
- else:
- return 'implicit', decl
-
+# XXX Variable.vartype -> decl (Declaration).
class Variable(_NTBase,
namedtuple('Variable', 'id storage vartype')):
@@ -52,16 +32,23 @@ class Variable(_NTBase,
@classonly
def from_parts(cls, filename, funcname, name, decl, storage=None):
+ varid = ID(filename, funcname, name)
if storage is None:
- storage, decl = extract_storage(decl, isfunc=funcname)
- id = info.ID(filename, funcname, name)
- self = cls(id, storage, decl)
+ self = cls.from_id(varid, decl)
+ else:
+ self = cls(varid, storage, decl)
return self
+ @classonly
+ def from_id(cls, varid, decl):
+ from ..parser.declarations import extract_storage
+ storage = extract_storage(decl, infunc=varid.funcname)
+ return cls(varid, storage, decl)
+
def __new__(cls, id, storage, vartype):
self = super().__new__(
cls,
- id=info.ID.from_raw(id),
+ id=ID.from_raw(id),
storage=str(storage) if storage else None,
vartype=normalize_vartype(vartype) if vartype else None,
)
@@ -77,10 +64,10 @@ class Variable(_NTBase,
if not self.id:
raise TypeError('missing id')
- if not self.filename or self.filename == info.UNKNOWN:
+ if not self.filename or self.filename == UNKNOWN:
raise TypeError(f'id missing filename ({self.id})')
- if self.funcname and self.funcname == info.UNKNOWN:
+ if self.funcname and self.funcname == UNKNOWN:
raise TypeError(f'id missing funcname ({self.id})')
self.id.validate()
@@ -89,12 +76,12 @@ class Variable(_NTBase,
"""Fail if the object is invalid (i.e. init with bad data)."""
self._validate_id()
- if self.storage is None or self.storage == info.UNKNOWN:
+ if self.storage is None or self.storage == UNKNOWN:
raise TypeError('missing storage')
elif self.storage not in self.STORAGE:
raise ValueError(f'unsupported storage {self.storage:r}')
- if self.vartype is None or self.vartype == info.UNKNOWN:
+ if self.vartype is None or self.vartype == UNKNOWN:
raise TypeError('missing vartype')
@property
diff --git a/Tools/c-analyzer/c_analyzer/variables/known.py b/Tools/c-analyzer/c_analyzer/variables/known.py
new file mode 100644
index 0000000..aa2934a
--- /dev/null
+++ b/Tools/c-analyzer/c_analyzer/variables/known.py
@@ -0,0 +1,91 @@
+import csv
+
+from ..common.info import ID, UNKNOWN
+from ..common.util import read_tsv
+from .info import Variable
+
+
+# XXX need tests:
+# * read_file()
+# * look_up_variable()
+
+
+COLUMNS = ('filename', 'funcname', 'name', 'kind', 'declaration')
+HEADER = '\t'.join(COLUMNS)
+
+
+def read_file(infile, *,
+ _read_tsv=read_tsv,
+ ):
+ """Yield (kind, id, decl) for each row in the data file.
+
+ The caller is responsible for validating each row.
+ """
+ for row in _read_tsv(infile, HEADER):
+ filename, funcname, name, kind, declaration = row
+ if not funcname or funcname == '-':
+ funcname = None
+ id = ID(filename, funcname, name)
+ yield kind, id, declaration
+
+
+def from_file(infile, *,
+ handle_var=Variable.from_id,
+ _read_file=read_file,
+ ):
+ """Return the info for known declarations in the given file."""
+ known = {
+ 'variables': {},
+ #'types': {},
+ #'constants': {},
+ #'macros': {},
+ }
+ for kind, id, decl in _read_file(infile):
+ if kind == 'variable':
+ values = known['variables']
+ value = handle_var(id, decl)
+ else:
+ raise ValueError(f'unsupported kind in row {row}')
+ value.validate()
+ values[id] = value
+ return known
+
+
+def look_up_variable(varid, knownvars, *,
+ match_files=(lambda f1, f2: f1 == f2),
+ ):
+ """Return the known Variable matching the given ID.
+
+ "knownvars" is a mapping of ID to Variable.
+
+ "match_files" is used to verify if two filenames point to
+ the same file.
+
+ If no match is found then None is returned.
+ """
+ if not knownvars:
+ return None
+
+ if varid.funcname == UNKNOWN:
+ if not varid.filename or varid.filename == UNKNOWN:
+ for varid in knownvars:
+ if not varid.funcname:
+ continue
+ if varid.name == varid.name:
+ return knownvars[varid]
+ else:
+ return None
+ else:
+ for varid in knownvars:
+ if not varid.funcname:
+ continue
+ if not match_files(varid.filename, varid.filename):
+ continue
+ if varid.name == varid.name:
+ return knownvars[varid]
+ else:
+ return None
+ elif not varid.filename or varid.filename == UNKNOWN:
+ raise NotImplementedError
+ else:
+ return knownvars.get(varid.id)
diff --git a/Tools/c-analyzer/c_analyzer_common/__init__.py b/Tools/c-analyzer/c_analyzer_common/__init__.py
deleted file mode 100644
index 888b16f..0000000
--- a/Tools/c-analyzer/c_analyzer_common/__init__.py
+++ /dev/null
@@ -1,19 +0,0 @@
-import os.path
-
-
-PKG_ROOT = os.path.dirname(__file__)
-DATA_DIR = os.path.dirname(PKG_ROOT)
-REPO_ROOT = os.path.dirname(
- os.path.dirname(DATA_DIR))
-
-SOURCE_DIRS = [os.path.join(REPO_ROOT, name) for name in [
- 'Include',
- 'Python',
- 'Parser',
- 'Objects',
- 'Modules',
- ]]
-
-
-# Clean up the namespace.
-del os
diff --git a/Tools/c-analyzer/c_analyzer_common/info.py b/Tools/c-analyzer/c_analyzer_common/info.py
deleted file mode 100644
index e217380..0000000
--- a/Tools/c-analyzer/c_analyzer_common/info.py
+++ /dev/null
@@ -1,69 +0,0 @@
-from collections import namedtuple
-import re
-
-from .util import classonly, _NTBase
-
-
-UNKNOWN = '???'
-
-NAME_RE = re.compile(r'^([a-zA-Z]|_\w*[a-zA-Z]\w*|[a-zA-Z]\w*)$')
-
-
-class ID(_NTBase, namedtuple('ID', 'filename funcname name')):
- """A unique ID for a single symbol or declaration."""
-
- __slots__ = ()
- # XXX Add optional conditions (tuple of strings) field.
- #conditions = Slot()
-
- @classonly
- def from_raw(cls, raw):
- if not raw:
- return None
- if isinstance(raw, str):
- return cls(None, None, raw)
- try:
- name, = raw
- filename = None
- except ValueError:
- try:
- filename, name = raw
- except ValueError:
- return super().from_raw(raw)
- return cls(filename, None, name)
-
- def __new__(cls, filename, funcname, name):
- self = super().__new__(
- cls,
- filename=str(filename) if filename else None,
- funcname=str(funcname) if funcname else None,
- name=str(name) if name else None,
- )
- #cls.conditions.set(self, tuple(str(s) if s else None
- # for s in conditions or ()))
- return self
-
- def validate(self):
- """Fail if the object is invalid (i.e. init with bad data)."""
- if not self.name:
- raise TypeError('missing name')
- else:
- if not NAME_RE.match(self.name):
- raise ValueError(
- f'name must be an identifier, got {self.name!r}')
-
- # Symbols from a binary might not have filename/funcname info.
-
- if self.funcname:
- if not self.filename:
- raise TypeError('missing filename')
- if not NAME_RE.match(self.funcname) and self.funcname != UNKNOWN:
- raise ValueError(
- f'name must be an identifier, got {self.funcname!r}')
-
- # XXX Require the filename (at least UNKONWN)?
- # XXX Check the filename?
-
- @property
- def islocal(self):
- return self.funcname is not None
diff --git a/Tools/c-analyzer/c_analyzer_common/known.py b/Tools/c-analyzer/c_analyzer_common/known.py
deleted file mode 100644
index dec1e1d..0000000
--- a/Tools/c-analyzer/c_analyzer_common/known.py
+++ /dev/null
@@ -1,74 +0,0 @@
-import csv
-import os.path
-
-from c_parser.info import Variable
-
-from . import DATA_DIR
-from .info import ID, UNKNOWN
-from .util import read_tsv
-
-
-DATA_FILE = os.path.join(DATA_DIR, 'known.tsv')
-
-COLUMNS = ('filename', 'funcname', 'name', 'kind', 'declaration')
-HEADER = '\t'.join(COLUMNS)
-
-
-# XXX need tests:
-# * from_file()
-
-def from_file(infile, *,
- _read_tsv=read_tsv,
- ):
- """Return the info for known declarations in the given file."""
- known = {
- 'variables': {},
- #'types': {},
- #'constants': {},
- #'macros': {},
- }
- for row in _read_tsv(infile, HEADER):
- filename, funcname, name, kind, declaration = row
- if not funcname or funcname == '-':
- funcname = None
- id = ID(filename, funcname, name)
- if kind == 'variable':
- values = known['variables']
- if funcname:
- storage = _get_storage(declaration) or 'local'
- else:
- storage = _get_storage(declaration) or 'implicit'
- value = Variable(id, storage, declaration)
- else:
- raise ValueError(f'unsupported kind in row {row}')
- value.validate()
-# if value.name == 'id' and declaration == UNKNOWN:
-# # None of these are variables.
-# declaration = 'int id';
-# else:
-# value.validate()
- values[id] = value
- return known
-
-
-def _get_storage(decl):
- # statics
- if decl.startswith('static '):
- return 'static'
- if decl.startswith(('Py_LOCAL(', 'Py_LOCAL_INLINE(')):
- return 'static'
- if decl.startswith(('_Py_IDENTIFIER(', '_Py_static_string(')):
- return 'static'
- if decl.startswith('PyDoc_VAR('):
- return 'static'
- if decl.startswith(('SLOT1BINFULL(', 'SLOT1BIN(')):
- return 'static'
- if decl.startswith('WRAP_METHOD('):
- return 'static'
- # public extern
- if decl.startswith('extern '):
- return 'extern'
- if decl.startswith('PyAPI_DATA('):
- return 'extern'
- # implicit or local
- return None
diff --git a/Tools/c-analyzer/c_globals/find.py b/Tools/c-analyzer/c_globals/find.py
deleted file mode 100644
index a51b947..0000000
--- a/Tools/c-analyzer/c_globals/find.py
+++ /dev/null
@@ -1,95 +0,0 @@
-from c_analyzer_common import SOURCE_DIRS
-from c_analyzer_common.info import UNKNOWN
-from c_symbols import (
- info as s_info,
- binary as b_symbols,
- source as s_symbols,
- resolve,
- )
-from c_parser import info, declarations
-
-
-# XXX needs tests:
-# * iter_variables
-
-def globals_from_binary(binfile=b_symbols.PYTHON, *,
- knownvars=None,
- dirnames=None,
- _iter_symbols=b_symbols.iter_symbols,
- _resolve=resolve.symbols_to_variables,
- _get_symbol_resolver=resolve.get_resolver,
- ):
- """Yield a Variable for each found Symbol.
-
- Details are filled in from the given "known" variables and types.
- """
- symbols = _iter_symbols(binfile, find_local_symbol=None)
- #symbols = list(symbols)
- for variable in _resolve(symbols,
- resolve=_get_symbol_resolver(knownvars, dirnames),
- ):
- # Skip each non-global variable (unless we couldn't find it).
- # XXX Drop the "UNKNOWN" condition?
- if not variable.isglobal and variable.vartype != UNKNOWN:
- continue
- yield variable
-
-
-def globals_from_declarations(dirnames=SOURCE_DIRS, *,
- known=None,
- ):
- """Yield a Variable for each found declaration.
-
- Details are filled in from the given "known" variables and types.
- """
- raise NotImplementedError
-
-
-def iter_variables(kind='platform', *,
- known=None,
- dirnames=None,
- _resolve_symbols=resolve.symbols_to_variables,
- _get_symbol_resolver=resolve.get_resolver,
- _symbols_from_binary=b_symbols.iter_symbols,
- _symbols_from_source=s_symbols.iter_symbols,
- _iter_raw=declarations.iter_all,
- _iter_preprocessed=declarations.iter_preprocessed,
- ):
- """Yield a Variable for each one found (e.g. in files)."""
- kind = kind or 'platform'
-
- if kind == 'symbols':
- knownvars = (known or {}).get('variables')
- yield from _resolve_symbols(
- _symbols_from_source(dirnames, known),
- resolve=_get_symbol_resolver(knownvars, dirnames),
- )
- elif kind == 'platform':
- knownvars = (known or {}).get('variables')
- yield from _resolve_symbols(
- _symbols_from_binary(find_local_symbol=None),
- resolve=_get_symbol_resolver(knownvars, dirnames),
- )
- elif kind == 'declarations':
- for decl in _iter_raw(dirnames):
- if not isinstance(decl, info.Variable):
- continue
- yield decl
- elif kind == 'preprocessed':
- for decl in _iter_preprocessed(dirnames):
- if not isinstance(decl, info.Variable):
- continue
- yield decl
- else:
- raise ValueError(f'unsupported kind {kind!r}')
-
-
-def globals(dirnames, known, *,
- kind=None, # Use the default.
- _iter_variables=iter_variables,
- ):
- """Return a list of (StaticVar, <supported>) for each found global var."""
- for found in _iter_variables(kind, known=known, dirnames=dirnames):
- if not found.isglobal:
- continue
- yield found
diff --git a/Tools/c-analyzer/c_globals/show.py b/Tools/c-analyzer/c_globals/show.py
deleted file mode 100644
index f4298b1..0000000
--- a/Tools/c-analyzer/c_globals/show.py
+++ /dev/null
@@ -1,16 +0,0 @@
-
-def basic(globals, *,
- _print=print):
- """Print each row simply."""
- for variable in globals:
- if variable.funcname:
- line = f'{variable.filename}:{variable.funcname}():{variable.name}'
- else:
- line = f'{variable.filename}:{variable.name}'
- vartype = variable.vartype
- #if vartype.startswith('static '):
- # vartype = vartype.partition(' ')[2]
- #else:
- # vartype = '=' + vartype
- line = f'{line:<64} {vartype}'
- _print(line)
diff --git a/Tools/c-analyzer/c_symbols/resolve.py b/Tools/c-analyzer/c_symbols/resolve.py
deleted file mode 100644
index 56210ce..0000000
--- a/Tools/c-analyzer/c_symbols/resolve.py
+++ /dev/null
@@ -1,147 +0,0 @@
-import os.path
-
-from c_analyzer_common import files
-from c_analyzer_common.info import UNKNOWN
-from c_parser import declarations, info
-from .info import Symbol
-from .source import _find_symbol
-
-
-# XXX need tests:
-# * look_up_known_symbol()
-# * symbol_from_source()
-# * get_resolver()
-# * symbols_to_variables()
-
-def look_up_known_symbol(symbol, knownvars, *,
- match_files=(lambda f1, f2: f1 == f2),
- ):
- """Return the known variable matching the given symbol.
-
- "knownvars" is a mapping of common.ID to parser.Variable.
-
- "match_files" is used to verify if two filenames point to
- the same file.
- """
- if not knownvars:
- return None
-
- if symbol.funcname == UNKNOWN:
- if not symbol.filename or symbol.filename == UNKNOWN:
- for varid in knownvars:
- if not varid.funcname:
- continue
- if varid.name == symbol.name:
- return knownvars[varid]
- else:
- return None
- else:
- for varid in knownvars:
- if not varid.funcname:
- continue
- if not match_files(varid.filename, symbol.filename):
- continue
- if varid.name == symbol.name:
- return knownvars[varid]
- else:
- return None
- elif not symbol.filename or symbol.filename == UNKNOWN:
- raise NotImplementedError
- else:
- return knownvars.get(symbol.id)
-
-
-def find_in_source(symbol, dirnames, *,
- _perfilecache={},
- _find_symbol=_find_symbol,
- _iter_files=files.iter_files_by_suffix,
- ):
- """Return the Variable matching the given Symbol.
-
- If there is no match then return None.
- """
- if symbol.filename and symbol.filename != UNKNOWN:
- filenames = [symbol.filename]
- else:
- filenames = _iter_files(dirnames, ('.c', '.h'))
-
- if symbol.funcname and symbol.funcname != UNKNOWN:
- raise NotImplementedError
-
- (filename, funcname, decl
- ) = _find_symbol(symbol.name, filenames, _perfilecache)
- if filename == UNKNOWN:
- return None
- return info.Variable.from_parts(filename, funcname, symbol.name, decl)
-
-
-def get_resolver(knownvars=None, dirnames=None, *,
- _look_up_known=look_up_known_symbol,
- _from_source=find_in_source,
- ):
- """Return a "resolver" func for the given known vars and dirnames.
-
- The func takes a single Symbol and returns a corresponding Variable.
- If the symbol was located then the variable will be valid, populated
- with the corresponding information. Otherwise None is returned.
- """
- if knownvars:
- knownvars = dict(knownvars) # a copy
- def resolve_known(symbol):
- found = _look_up_known(symbol, knownvars)
- if found is None:
- return None
- elif symbol.funcname == UNKNOWN:
- knownvars.pop(found.id)
- elif not symbol.filename or symbol.filename == UNKNOWN:
- knownvars.pop(found.id)
- return found
- if dirnames:
- def resolve(symbol):
- found = resolve_known(symbol)
- if found is None:
- return None
- #return _from_source(symbol, dirnames)
- else:
- for dirname in dirnames:
- if not dirname.endswith(os.path.sep):
- dirname += os.path.sep
- if found.filename.startswith(dirname):
- break
- else:
- return None
- return found
- else:
- resolve = resolve_known
- elif dirnames:
- def resolve(symbol):
- return _from_source(symbol, dirnames)
- else:
- def resolve(symbol):
- return None
- return resolve
-
-
-def symbols_to_variables(symbols, *,
- resolve=(lambda s: look_up_known_symbol(s, None)),
- ):
- """Yield the variable the matches each given symbol.
-
- Use get_resolver() for a "resolve" func to use.
- """
- for symbol in symbols:
- if isinstance(symbol, info.Variable):
- # XXX validate?
- yield symbol
- continue
- if symbol.kind != Symbol.KIND.VARIABLE:
- continue
- resolved = resolve(symbol)
- if resolved is None:
- #raise NotImplementedError(symbol)
- resolved = info.Variable(
- id=symbol.id,
- storage=UNKNOWN,
- vartype=UNKNOWN,
- )
- yield resolved
diff --git a/Tools/c-analyzer/c_symbols/source.py b/Tools/c-analyzer/c_symbols/source.py
deleted file mode 100644
index a724810..0000000
--- a/Tools/c-analyzer/c_symbols/source.py
+++ /dev/null
@@ -1,58 +0,0 @@
-from c_analyzer_common import files
-from c_analyzer_common.info import UNKNOWN
-from c_parser import declarations
-
-
-# XXX need tests:
-# * find_symbol()
-
-def find_symbol(name, dirnames, *,
- _perfilecache,
- _iter_files=files.iter_files_by_suffix,
- **kwargs
- ):
- """Return (filename, funcname, vartype) for the matching Symbol."""
- filenames = _iter_files(dirnames, ('.c', '.h'))
- return _find_symbol(name, filenames, _perfilecache, **kwargs)
-
-
-def _get_symbols(filename, *,
- _iter_variables=declarations.iter_variables,
- ):
- """Return the list of Symbols found in the given file."""
- symbols = {}
- for funcname, name, vartype in _iter_variables(filename):
- if not funcname:
- continue
- try:
- instances = symbols[name]
- except KeyError:
- instances = symbols[name] = []
- instances.append((funcname, vartype))
- return symbols
-
-
-def _find_symbol(name, filenames, _perfilecache, *,
- _get_local_symbols=_get_symbols,
- ):
- for filename in filenames:
- try:
- symbols = _perfilecache[filename]
- except KeyError:
- symbols = _perfilecache[filename] = _get_local_symbols(filename)
-
- try:
- instances = symbols[name]
- except KeyError:
- continue
-
- funcname, vartype = instances.pop(0)
- if not instances:
- symbols.pop(name)
- return filename, funcname, vartype
- else:
- return UNKNOWN, UNKNOWN, UNKNOWN
-
-
-def iter_symbols():
- raise NotImplementedError
diff --git a/Tools/c-analyzer/c_globals/README b/Tools/c-analyzer/cpython/README
index 772b8be..772b8be 100644
--- a/Tools/c-analyzer/c_globals/README
+++ b/Tools/c-analyzer/cpython/README
diff --git a/Tools/c-analyzer/cpython/__init__.py b/Tools/c-analyzer/cpython/__init__.py
new file mode 100644
index 0000000..ae45b42
--- /dev/null
+++ b/Tools/c-analyzer/cpython/__init__.py
@@ -0,0 +1,29 @@
+import os.path
+import sys
+
+
+TOOL_ROOT = os.path.abspath(
+ os.path.dirname( # c-analyzer/
+ os.path.dirname(__file__))) # cpython/
+DATA_DIR = TOOL_ROOT
+REPO_ROOT = (
+ os.path.dirname( # ..
+ os.path.dirname(TOOL_ROOT))) # Tools/
+
+INCLUDE_DIRS = [os.path.join(REPO_ROOT, name) for name in [
+ 'Include',
+ ]]
+SOURCE_DIRS = [os.path.join(REPO_ROOT, name) for name in [
+ 'Python',
+ 'Parser',
+ 'Objects',
+ 'Modules',
+ ]]
+
+#PYTHON = os.path.join(REPO_ROOT, 'python')
+PYTHON = sys.executable
+
+
+# Clean up the namespace.
+del sys
+del os
diff --git a/Tools/c-analyzer/c_globals/__main__.py b/Tools/c-analyzer/cpython/__main__.py
index 9570fb6..6b0f9bc 100644
--- a/Tools/c-analyzer/c_globals/__main__.py
+++ b/Tools/c-analyzer/cpython/__main__.py
@@ -1,42 +1,42 @@
import argparse
-import os.path
import re
import sys
-from c_analyzer_common import SOURCE_DIRS, REPO_ROOT
-from c_analyzer_common.info import UNKNOWN
-from c_analyzer_common.known import (
+from c_analyzer.common import show
+from c_analyzer.common.info import UNKNOWN
+
+from . import SOURCE_DIRS
+from .find import supported_vars
+from .known import (
from_file as known_from_file,
DATA_FILE as KNOWN_FILE,
)
-from . import find, show
-from .supported import is_supported, ignored_from_file, IGNORED_FILE, _is_object
+from .supported import IGNORED_FILE
-def _match_unused_global(variable, knownvars, used):
- found = []
- for varid in knownvars:
- if varid in used:
- continue
- if varid.funcname is not None:
- continue
- if varid.name != variable.name:
- continue
- if variable.filename and variable.filename != UNKNOWN:
- if variable.filename == varid.filename:
+def _check_results(unknown, knownvars, used):
+ def _match_unused_global(variable):
+ found = []
+ for varid in knownvars:
+ if varid in used:
+ continue
+ if varid.funcname is not None:
+ continue
+ if varid.name != variable.name:
+ continue
+ if variable.filename and variable.filename != UNKNOWN:
+ if variable.filename == varid.filename:
+ found.append(varid)
+ else:
found.append(varid)
- else:
- found.append(varid)
- return found
-
+ return found
-def _check_results(unknown, knownvars, used):
badknown = set()
for variable in sorted(unknown):
msg = None
if variable.funcname != UNKNOWN:
msg = f'could not find global symbol {variable.id}'
- elif m := _match_unused_global(variable, knownvars, used):
+ elif m := _match_unused_global(variable):
assert isinstance(m, list)
badknown.update(m)
elif variable.name in ('completed', 'id'): # XXX Figure out where these variables are.
@@ -65,32 +65,29 @@ def _check_results(unknown, knownvars, used):
raise Exception('could not find all symbols')
-def _find_globals(dirnames, known, ignored):
- if dirnames == SOURCE_DIRS:
- dirnames = [os.path.relpath(d, REPO_ROOT) for d in dirnames]
-
- ignored = ignored_from_file(ignored)
- known = known_from_file(known)
+# XXX Move this check to its own command.
+def cmd_check_cache(cmd, *,
+ known=KNOWN_FILE,
+ ignored=IGNORED_FILE,
+ _known_from_file=known_from_file,
+ _find=supported_vars,
+ ):
+ known = _known_from_file(known)
used = set()
unknown = set()
- knownvars = (known or {}).get('variables')
- for variable in find.globals_from_binary(knownvars=knownvars,
- dirnames=dirnames):
- #for variable in find.globals(dirnames, known, kind='platform'):
- if variable.vartype == UNKNOWN:
- unknown.add(variable)
+ for var, supported in _find(known=known, ignored=ignored):
+ if supported is None:
+ unknown.add(var)
continue
- yield variable, is_supported(variable, ignored, known)
- used.add(variable.id)
-
- #_check_results(unknown, knownvars, used)
+ used.add(var.id)
+ _check_results(unknown, known['variables'], used)
-def cmd_check(cmd, dirs=SOURCE_DIRS, *,
- ignored=IGNORED_FILE,
+def cmd_check(cmd, *,
known=KNOWN_FILE,
- _find=_find_globals,
+ ignored=IGNORED_FILE,
+ _find=supported_vars,
_show=show.basic,
_print=print,
):
@@ -100,7 +97,11 @@ def cmd_check(cmd, dirs=SOURCE_DIRS, *,
In the failure case, the list of unsupported variables
will be printed out.
"""
- unsupported = [v for v, s in _find(dirs, known, ignored) if not s]
+ unsupported = []
+ for var, supported in _find(known=known, ignored=ignored):
+ if not supported:
+ unsupported.append(var)
+
if not unsupported:
#_print('okay')
return
@@ -112,11 +113,11 @@ def cmd_check(cmd, dirs=SOURCE_DIRS, *,
sys.exit(1)
-def cmd_show(cmd, dirs=SOURCE_DIRS, *,
- ignored=IGNORED_FILE,
+def cmd_show(cmd, *,
known=KNOWN_FILE,
+ ignored=IGNORED_FILE,
skip_objects=False,
- _find=_find_globals,
+ _find=supported_vars,
_show=show.basic,
_print=print,
):
@@ -127,10 +128,12 @@ def cmd_show(cmd, dirs=SOURCE_DIRS, *,
"""
allsupported = []
allunsupported = []
- for found, supported in _find(dirs, known, ignored):
- if skip_objects: # XXX Support proper filters instead.
- if _is_object(found.vartype):
- continue
+ for found, supported in _find(known=known,
+ ignored=ignored,
+ skip_objects=skip_objects,
+ ):
+ if supported is None:
+ continue
(allsupported if supported else allunsupported
).append(found)
@@ -165,9 +168,9 @@ def parse_args(prog=PROG, argv=sys.argv[1:], *, _fail=None):
common.add_argument('--known', metavar='FILE',
default=KNOWN_FILE,
help='path to file that lists known types')
- common.add_argument('dirs', metavar='DIR', nargs='*',
- default=SOURCE_DIRS,
- help='a directory to check')
+ #common.add_argument('dirs', metavar='DIR', nargs='*',
+ # default=SOURCE_DIRS,
+ # help='a directory to check')
parser = argparse.ArgumentParser(
prog=prog,
diff --git a/Tools/c-analyzer/c_analyzer_common/_generate.py b/Tools/c-analyzer/cpython/_generate.py
index 9b2fc9e..4c340ac 100644
--- a/Tools/c-analyzer/c_analyzer_common/_generate.py
+++ b/Tools/c-analyzer/cpython/_generate.py
@@ -1,15 +1,16 @@
# The code here consists of hacks for pre-populating the known.tsv file.
-from c_parser.preprocessor import _iter_clean_lines
-from c_parser.naive import (
+from c_analyzer.parser.preprocessor import _iter_clean_lines
+from c_analyzer.parser.naive import (
iter_variables, parse_variable_declaration, find_variables,
)
-from c_parser.info import Variable
+from c_analyzer.common.known import HEADER as KNOWN_HEADER
+from c_analyzer.common.info import UNKNOWN, ID
+from c_analyzer.variables import Variable
+from c_analyzer.util import write_tsv
from . import SOURCE_DIRS, REPO_ROOT
-from .known import DATA_FILE as KNOWN_FILE, HEADER as KNOWN_HEADER
-from .info import UNKNOWN, ID
-from .util import write_tsv
+from .known import DATA_FILE as KNOWN_FILE
from .files import iter_cpython_files
diff --git a/Tools/c-analyzer/cpython/files.py b/Tools/c-analyzer/cpython/files.py
new file mode 100644
index 0000000..543097a
--- /dev/null
+++ b/Tools/c-analyzer/cpython/files.py
@@ -0,0 +1,29 @@
+from c_analyzer.common.files import (
+ C_SOURCE_SUFFIXES, walk_tree, iter_files_by_suffix,
+ )
+
+from . import SOURCE_DIRS, REPO_ROOT
+
+# XXX need tests:
+# * iter_files()
+
+
+def iter_files(*,
+ walk=walk_tree,
+ _files=iter_files_by_suffix,
+ ):
+ """Yield each file in the tree for each of the given directory names."""
+ excludedtrees = [
+ os.path.join('Include', 'cpython', ''),
+ ]
+ def is_excluded(filename):
+ for root in excludedtrees:
+ if filename.startswith(root):
+ return True
+ return False
+ for filename in _files(SOURCE_DIRS, C_SOURCE_SUFFIXES, REPO_ROOT,
+ walk=walk,
+ ):
+ if is_excluded(filename):
+ continue
+ yield filename
diff --git a/Tools/c-analyzer/cpython/find.py b/Tools/c-analyzer/cpython/find.py
new file mode 100644
index 0000000..a7bc0b4
--- /dev/null
+++ b/Tools/c-analyzer/cpython/find.py
@@ -0,0 +1,101 @@
+import os.path
+
+from c_analyzer.common import files
+from c_analyzer.common.info import UNKNOWN, ID
+from c_analyzer.variables import find as _common
+
+from . import SOURCE_DIRS, PYTHON, REPO_ROOT
+from .known import (
+ from_file as known_from_file,
+ DATA_FILE as KNOWN_FILE,
+ )
+from .supported import (
+ ignored_from_file, IGNORED_FILE, is_supported, _is_object,
+ )
+
+# XXX need tests:
+# * vars_from_binary()
+# * vars_from_source()
+# * supported_vars()
+
+
+def _handle_id(filename, funcname, name, *,
+ _relpath=os.path.relpath,
+ ):
+ filename = _relpath(filename, REPO_ROOT)
+ return ID(filename, funcname, name)
+
+
+def vars_from_binary(*,
+ known=KNOWN_FILE,
+ _known_from_file=known_from_file,
+ _iter_files=files.iter_files_by_suffix,
+ _iter_vars=_common.vars_from_binary,
+ ):
+ """Yield a Variable for each found Symbol.
+
+ Details are filled in from the given "known" variables and types.
+ """
+ if isinstance(known, str):
+ known = _known_from_file(known)
+ dirnames = SOURCE_DIRS
+ suffixes = ('.c',)
+ filenames = _iter_files(dirnames, suffixes)
+ # XXX For now we only use known variables (no source lookup).
+ filenames = None
+ yield from _iter_vars(PYTHON,
+ known=known,
+ filenames=filenames,
+ handle_id=_handle_id,
+ check_filename=(lambda n: True),
+ )
+
+
+def vars_from_source(*,
+ preprocessed=None,
+ known=KNOWN_FILE,
+ _known_from_file=known_from_file,
+ _iter_files=files.iter_files_by_suffix,
+ _iter_vars=_common.vars_from_source,
+ ):
+ """Yield a Variable for each declaration in the raw source code.
+
+ Details are filled in from the given "known" variables and types.
+ """
+ if isinstance(known, str):
+ known = _known_from_file(known)
+ dirnames = SOURCE_DIRS
+ suffixes = ('.c',)
+ filenames = _iter_files(dirnames, suffixes)
+ yield from _iter_vars(filenames,
+ preprocessed=preprocessed,
+ known=known,
+ handle_id=_handle_id,
+ )
+
+
+def supported_vars(*,
+ known=KNOWN_FILE,
+ ignored=IGNORED_FILE,
+ skip_objects=False,
+ _known_from_file=known_from_file,
+ _ignored_from_file=ignored_from_file,
+ _iter_vars=vars_from_binary,
+ _is_supported=is_supported,
+ ):
+ """Yield (var, is supported) for each found variable."""
+ if isinstance(known, str):
+ known = _known_from_file(known)
+ if isinstance(ignored, str):
+ ignored = _ignored_from_file(ignored)
+
+ for var in _iter_vars(known=known):
+ if not var.isglobal:
+ continue
+ elif var.vartype == UNKNOWN:
+ yield var, None
+ # XXX Support proper filters instead.
+ elif skip_objects and _is_object(found.vartype):
+ continue
+ else:
+ yield var, _is_supported(var, ignored, known)
diff --git a/Tools/c-analyzer/cpython/known.py b/Tools/c-analyzer/cpython/known.py
new file mode 100644
index 0000000..c3cc2c0
--- /dev/null
+++ b/Tools/c-analyzer/cpython/known.py
@@ -0,0 +1,66 @@
+import csv
+import os.path
+
+from c_analyzer.parser.declarations import extract_storage
+from c_analyzer.variables import known as _common
+from c_analyzer.variables.info import Variable
+
+from . import DATA_DIR
+
+
+# XXX need tests:
+# * from_file()
+# * look_up_variable()
+
+
+DATA_FILE = os.path.join(DATA_DIR, 'known.tsv')
+
+
+def _get_storage(decl, infunc):
+ # statics
+ if decl.startswith(('Py_LOCAL(', 'Py_LOCAL_INLINE(')):
+ return 'static'
+ if decl.startswith(('_Py_IDENTIFIER(', '_Py_static_string(')):
+ return 'static'
+ if decl.startswith('PyDoc_VAR('):
+ return 'static'
+ if decl.startswith(('SLOT1BINFULL(', 'SLOT1BIN(')):
+ return 'static'
+ if decl.startswith('WRAP_METHOD('):
+ return 'static'
+ # public extern
+ if decl.startswith('PyAPI_DATA('):
+ return 'extern'
+ # Fall back to the normal handler.
+ return extract_storage(decl, infunc=infunc)
+
+
+def _handle_var(varid, decl):
+# if varid.name == 'id' and decl == UNKNOWN:
+# # None of these are variables.
+# decl = 'int id';
+ storage = _get_storage(decl, varid.funcname)
+ return Variable(varid, storage, decl)
+
+
+def from_file(infile=DATA_FILE, *,
+ _from_file=_common.from_file,
+ _handle_var=_handle_var,
+ ):
+ """Return the info for known declarations in the given file."""
+ return _from_file(infile, handle_var=_handle_var)
+
+
+def look_up_variable(varid, knownvars, *,
+ _lookup=_common.look_up_variable,
+ ):
+ """Return the known variable matching the given ID.
+
+ "knownvars" is a mapping of ID to Variable.
+
+ "match_files" is used to verify if two filenames point to
+ the same file.
+
+ If no match is found then None is returned.
+ """
+ return _lookup(varid, knownvars)
diff --git a/Tools/c-analyzer/c_globals/supported.py b/Tools/c-analyzer/cpython/supported.py
index d185daa..18786ee 100644
--- a/Tools/c-analyzer/c_globals/supported.py
+++ b/Tools/c-analyzer/cpython/supported.py
@@ -1,9 +1,13 @@
import os.path
import re
-from c_analyzer_common import DATA_DIR
-from c_analyzer_common.info import ID
-from c_analyzer_common.util import read_tsv, write_tsv
+from c_analyzer.common.info import ID
+from c_analyzer.common.util import read_tsv, write_tsv
+
+from . import DATA_DIR
+
+# XXX need tests:
+# * generate / script
IGNORED_FILE = os.path.join(DATA_DIR, 'ignored.tsv')
@@ -379,11 +383,12 @@ def _generate_ignored_file(variables, filename=None, *,
if __name__ == '__main__':
- from c_analyzer_common import SOURCE_DIRS
- from c_analyzer_common.known import (
+ from cpython import SOURCE_DIRS
+ from cpython.known import (
from_file as known_from_file,
DATA_FILE as KNOWN_FILE,
)
+ # XXX This is wrong!
from . import find
known = known_from_file(KNOWN_FILE)
knownvars = (known or {}).get('variables')