summaryrefslogtreecommitdiffstats
path: root/Tools/c-analyzer/c_analyzer
diff options
context:
space:
mode:
authorEric Snow <ericsnowcurrently@gmail.com>2020-10-23 00:42:51 (GMT)
committerGitHub <noreply@github.com>2020-10-23 00:42:51 (GMT)
commit345cd37abe324ad4f60f80e2c3133b8849e54e9b (patch)
tree5d965e662dca9dcac19e7eddd63a3d9d0b816fed /Tools/c-analyzer/c_analyzer
parentec388cfb4ede56dace2bb78851ff6f38fa2a6abe (diff)
downloadcpython-345cd37abe324ad4f60f80e2c3133b8849e54e9b.zip
cpython-345cd37abe324ad4f60f80e2c3133b8849e54e9b.tar.gz
cpython-345cd37abe324ad4f60f80e2c3133b8849e54e9b.tar.bz2
bpo-36876: Fix the C analyzer tool. (GH-22841)
The original tool wasn't working right and it was simpler to create a new one, partially re-using some of the old code. At this point the tool runs properly on the master. (Try: ./python Tools/c-analyzer/c-analyzer.py analyze.) It take ~40 seconds on my machine to analyze the full CPython code base. Note that we'll need to iron out some OS-specific stuff (e.g. preprocessor). We're okay though since this tool isn't used yet in our workflow. We will also need to verify the analysis results in detail before activating the check in CI, though I'm pretty sure it's close. https://bugs.python.org/issue36876
Diffstat (limited to 'Tools/c-analyzer/c_analyzer')
-rw-r--r--Tools/c-analyzer/c_analyzer/__init__.py103
-rw-r--r--Tools/c-analyzer/c_analyzer/__main__.py501
-rw-r--r--Tools/c-analyzer/c_analyzer/analyze.py307
-rw-r--r--Tools/c-analyzer/c_analyzer/common/__init__.py0
-rw-r--r--Tools/c-analyzer/c_analyzer/common/files.py124
-rw-r--r--Tools/c-analyzer/c_analyzer/common/info.py138
-rw-r--r--Tools/c-analyzer/c_analyzer/common/show.py11
-rw-r--r--Tools/c-analyzer/c_analyzer/common/util.py243
-rw-r--r--Tools/c-analyzer/c_analyzer/datafiles.py109
-rw-r--r--Tools/c-analyzer/c_analyzer/info.py353
-rw-r--r--Tools/c-analyzer/c_analyzer/parser/__init__.py0
-rw-r--r--Tools/c-analyzer/c_analyzer/parser/declarations.py339
-rw-r--r--Tools/c-analyzer/c_analyzer/parser/find.py107
-rw-r--r--Tools/c-analyzer/c_analyzer/parser/naive.py179
-rw-r--r--Tools/c-analyzer/c_analyzer/parser/preprocessor.py511
-rw-r--r--Tools/c-analyzer/c_analyzer/parser/source.py34
-rw-r--r--Tools/c-analyzer/c_analyzer/symbols/__init__.py0
-rw-r--r--Tools/c-analyzer/c_analyzer/symbols/_nm.py117
-rw-r--r--Tools/c-analyzer/c_analyzer/symbols/find.py175
-rw-r--r--Tools/c-analyzer/c_analyzer/symbols/info.py51
-rw-r--r--Tools/c-analyzer/c_analyzer/variables/__init__.py0
-rw-r--r--Tools/c-analyzer/c_analyzer/variables/find.py75
-rw-r--r--Tools/c-analyzer/c_analyzer/variables/info.py93
-rw-r--r--Tools/c-analyzer/c_analyzer/variables/known.py91
24 files changed, 1373 insertions, 2288 deletions
diff --git a/Tools/c-analyzer/c_analyzer/__init__.py b/Tools/c-analyzer/c_analyzer/__init__.py
index e69de29..4a01cd3 100644
--- a/Tools/c-analyzer/c_analyzer/__init__.py
+++ b/Tools/c-analyzer/c_analyzer/__init__.py
@@ -0,0 +1,103 @@
+from c_parser import (
+ parse_files as _parse_files,
+)
+from c_parser.info import (
+ KIND,
+ TypeDeclaration,
+ filter_by_kind,
+ collate_by_kind_group,
+ resolve_parsed,
+)
+from . import (
+ analyze as _analyze,
+ datafiles as _datafiles,
+)
+from .info import Analysis
+
+
+def analyze(filenmes, **kwargs):
+ results = iter_analyis_results(filenames, **kwargs)
+ return Analysis.from_results(results)
+
+
+def iter_analysis_results(filenmes, *,
+ known=None,
+ **kwargs
+ ):
+ decls = iter_decls(filenames, **kwargs)
+ yield from analyze_decls(decls, known)
+
+
+def iter_decls(filenames, *,
+ kinds=None,
+ parse_files=_parse_files,
+ **kwargs
+ ):
+ kinds = KIND.DECLS if kinds is None else (KIND.DECLS & set(kinds))
+ parse_files = parse_files or _parse_files
+
+ parsed = parse_files(filenames, **kwargs)
+ parsed = filter_by_kind(parsed, kinds)
+ for item in parsed:
+ yield resolve_parsed(item)
+
+
+def analyze_decls(decls, known, *,
+ analyze_resolved=None,
+ handle_unresolved=True,
+ relroot=None,
+ ):
+ knowntypes, knowntypespecs = _datafiles.get_known(
+ known,
+ handle_unresolved=handle_unresolved,
+ analyze_resolved=analyze_resolved,
+ relroot=relroot,
+ )
+
+ decls = list(decls)
+ collated = collate_by_kind_group(decls)
+
+ types = {decl: None for decl in collated['type']}
+ typespecs = _analyze.get_typespecs(types)
+
+ def analyze_decl(decl):
+ return _analyze.analyze_decl(
+ decl,
+ typespecs,
+ knowntypespecs,
+ types,
+ knowntypes,
+ analyze_resolved=analyze_resolved,
+ )
+ _analyze.analyze_type_decls(types, analyze_decl, handle_unresolved)
+ for decl in decls:
+ if decl in types:
+ resolved = types[decl]
+ else:
+ resolved = analyze_decl(decl)
+ if resolved and handle_unresolved:
+ typedeps, _ = resolved
+ if not isinstance(typedeps, TypeDeclaration):
+ if not typedeps or None in typedeps:
+ raise NotImplementedError((decl, resolved))
+
+ yield decl, resolved
+
+
+#######################################
+# checks
+
+def check_all(analysis, checks, *, failfast=False):
+ for check in checks or ():
+ for data, failure in check(analysis):
+ if failure is None:
+ continue
+
+ yield data, failure
+ if failfast:
+ yield None, None
+ break
+ else:
+ continue
+ # We failed fast.
+ break
diff --git a/Tools/c-analyzer/c_analyzer/__main__.py b/Tools/c-analyzer/c_analyzer/__main__.py
new file mode 100644
index 0000000..1fd45b9
--- /dev/null
+++ b/Tools/c-analyzer/c_analyzer/__main__.py
@@ -0,0 +1,501 @@
+import io
+import logging
+import os.path
+import re
+import sys
+
+from c_common.logging import VERBOSITY, Printer
+from c_common.scriptutil import (
+ add_verbosity_cli,
+ add_traceback_cli,
+ add_sepval_cli,
+ add_files_cli,
+ add_commands_cli,
+ process_args_by_key,
+ configure_logger,
+ get_prog,
+ filter_filenames,
+ iter_marks,
+)
+from c_parser.info import KIND, is_type_decl
+from . import (
+ analyze as _analyze,
+ check_all as _check_all,
+ datafiles as _datafiles,
+)
+
+
+KINDS = [
+ KIND.TYPEDEF,
+ KIND.STRUCT,
+ KIND.UNION,
+ KIND.ENUM,
+ KIND.FUNCTION,
+ KIND.VARIABLE,
+ KIND.STATEMENT,
+]
+
+logger = logging.getLogger(__name__)
+
+
+#######################################
+# table helpers
+
+TABLE_SECTIONS = {
+ 'types': (
+ ['kind', 'name', 'data', 'file'],
+ is_type_decl,
+ (lambda v: (v.kind.value, v.filename or '', v.name)),
+ ),
+ 'typedefs': 'types',
+ 'structs': 'types',
+ 'unions': 'types',
+ 'enums': 'types',
+ 'functions': (
+ ['name', 'data', 'file'],
+ (lambda kind: kind is KIND.FUNCTION),
+ (lambda v: (v.filename or '', v.name)),
+ ),
+ 'variables': (
+ ['name', 'parent', 'data', 'file'],
+ (lambda kind: kind is KIND.VARIABLE),
+ (lambda v: (v.filename or '', str(v.parent) if v.parent else '', v.name)),
+ ),
+ 'statements': (
+ ['file', 'parent', 'data'],
+ (lambda kind: kind is KIND.STATEMENT),
+ (lambda v: (v.filename or '', str(v.parent) if v.parent else '', v.name)),
+ ),
+ KIND.TYPEDEF: 'typedefs',
+ KIND.STRUCT: 'structs',
+ KIND.UNION: 'unions',
+ KIND.ENUM: 'enums',
+ KIND.FUNCTION: 'functions',
+ KIND.VARIABLE: 'variables',
+ KIND.STATEMENT: 'statements',
+}
+
+
+def _render_table(items, columns, relroot=None):
+ # XXX improve this
+ header = '\t'.join(columns)
+ div = '--------------------'
+ yield header
+ yield div
+ total = 0
+ for item in items:
+ rowdata = item.render_rowdata(columns)
+ row = [rowdata[c] for c in columns]
+ if relroot and 'file' in columns:
+ index = columns.index('file')
+ row[index] = os.path.relpath(row[index], relroot)
+ yield '\t'.join(row)
+ total += 1
+ yield div
+ yield f'total: {total}'
+
+
+def build_section(name, groupitems, *, relroot=None):
+ info = TABLE_SECTIONS[name]
+ while type(info) is not tuple:
+ if name in KINDS:
+ name = info
+ info = TABLE_SECTIONS[info]
+
+ columns, match_kind, sortkey = info
+ items = (v for v in groupitems if match_kind(v.kind))
+ items = sorted(items, key=sortkey)
+ def render():
+ yield ''
+ yield f'{name}:'
+ yield ''
+ for line in _render_table(items, columns, relroot):
+ yield line
+ return items, render
+
+
+#######################################
+# the checks
+
+CHECKS = {
+ #'globals': _check_globals,
+}
+
+
+def add_checks_cli(parser, checks=None, *, add_flags=None):
+ default = False
+ if not checks:
+ checks = list(CHECKS)
+ default = True
+ elif isinstance(checks, str):
+ checks = [checks]
+ if (add_flags is None and len(checks) > 1) or default:
+ add_flags = True
+
+ process_checks = add_sepval_cli(parser, '--check', 'checks', checks)
+ if add_flags:
+ for check in checks:
+ parser.add_argument(f'--{check}', dest='checks',
+ action='append_const', const=check)
+ return [
+ process_checks,
+ ]
+
+
+def _get_check_handlers(fmt, printer, verbosity=VERBOSITY):
+ div = None
+ def handle_after():
+ pass
+ if not fmt:
+ div = ''
+ def handle_failure(failure, data):
+ data = repr(data)
+ if verbosity >= 3:
+ logger.info(f'failure: {failure}')
+ logger.info(f'data: {data}')
+ else:
+ logger.warn(f'failure: {failure} (data: {data})')
+ elif fmt == 'raw':
+ def handle_failure(failure, data):
+ print(f'{failure!r} {data!r}')
+ elif fmt == 'brief':
+ def handle_failure(failure, data):
+ parent = data.parent or ''
+ funcname = parent if isinstance(parent, str) else parent.name
+ name = f'({funcname}).{data.name}' if funcname else data.name
+ failure = failure.split('\t')[0]
+ print(f'{data.filename}:{name} - {failure}')
+ elif fmt == 'summary':
+ def handle_failure(failure, data):
+ parent = data.parent or ''
+ funcname = parent if isinstance(parent, str) else parent.name
+ print(f'{data.filename:35}\t{funcname or "-":35}\t{data.name:40}\t{failure}')
+ elif fmt == 'full':
+ div = ''
+ def handle_failure(failure, data):
+ name = data.shortkey if data.kind is KIND.VARIABLE else data.name
+ parent = data.parent or ''
+ funcname = parent if isinstance(parent, str) else parent.name
+ known = 'yes' if data.is_known else '*** NO ***'
+ print(f'{data.kind.value} {name!r} failed ({failure})')
+ print(f' file: {data.filename}')
+ print(f' func: {funcname or "-"}')
+ print(f' name: {data.name}')
+ print(f' data: ...')
+ print(f' type unknown: {known}')
+ else:
+ if fmt in FORMATS:
+ raise NotImplementedError(fmt)
+ raise ValueError(f'unsupported fmt {fmt!r}')
+ return handle_failure, handle_after, div
+
+
+#######################################
+# the formats
+
+def fmt_raw(analysis):
+ for item in analysis:
+ yield from item.render('raw')
+
+
+def fmt_brief(analysis):
+ # XXX Support sorting.
+ items = sorted(analysis)
+ for kind in KINDS:
+ if kind is KIND.STATEMENT:
+ continue
+ for item in items:
+ if item.kind is not kind:
+ continue
+ yield from item.render('brief')
+ yield f' total: {len(items)}'
+
+
+def fmt_summary(analysis):
+ # XXX Support sorting and grouping.
+ items = list(analysis)
+ total = len(items)
+
+ def section(name):
+ _, render = build_section(name, items)
+ yield from render()
+
+ yield from section('types')
+ yield from section('functions')
+ yield from section('variables')
+ yield from section('statements')
+
+ yield ''
+# yield f'grand total: {len(supported) + len(unsupported)}'
+ yield f'grand total: {total}'
+
+
+def fmt_full(analysis):
+ # XXX Support sorting.
+ items = sorted(analysis, key=lambda v: v.key)
+ yield ''
+ for item in items:
+ yield from item.render('full')
+ yield ''
+ yield f'total: {len(items)}'
+
+
+FORMATS = {
+ 'raw': fmt_raw,
+ 'brief': fmt_brief,
+ 'summary': fmt_summary,
+ 'full': fmt_full,
+}
+
+
+def add_output_cli(parser, *, default='summary'):
+ parser.add_argument('--format', dest='fmt', default=default, choices=tuple(FORMATS))
+
+ def process_args(args):
+ pass
+ return process_args
+
+
+#######################################
+# the commands
+
+def _cli_check(parser, checks=None, **kwargs):
+ if isinstance(checks, str):
+ checks = [checks]
+ if checks is False:
+ process_checks = None
+ elif checks is None:
+ process_checks = add_checks_cli(parser)
+ elif len(checks) == 1 and type(checks) is not dict and re.match(r'^<.*>$', checks[0]):
+ check = checks[0][1:-1]
+ def process_checks(args):
+ args.checks = [check]
+ else:
+ process_checks = add_checks_cli(parser, checks=checks)
+ process_output = add_output_cli(parser, default=None)
+ process_files = add_files_cli(parser, **kwargs)
+ return [
+ process_checks,
+ process_output,
+ process_files,
+ ]
+
+
+def cmd_check(filenames, *,
+ checks=None,
+ ignored=None,
+ fmt=None,
+ relroot=None,
+ failfast=False,
+ iter_filenames=None,
+ verbosity=VERBOSITY,
+ _analyze=_analyze,
+ _CHECKS=CHECKS,
+ **kwargs
+ ):
+ if not checks:
+ checks = _CHECKS
+ elif isinstance(checks, str):
+ checks = [checks]
+ checks = [_CHECKS[c] if isinstance(c, str) else c
+ for c in checks]
+ printer = Printer(verbosity)
+ (handle_failure, handle_after, div
+ ) = _get_check_handlers(fmt, printer, verbosity)
+
+ filenames = filter_filenames(filenames, iter_filenames)
+
+ logger.info('analyzing...')
+ analyzed = _analyze(filenames, **kwargs)
+ if relroot:
+ analyzed.fix_filenames(relroot)
+
+ logger.info('checking...')
+ numfailed = 0
+ for data, failure in _check_all(analyzed, checks, failfast=failfast):
+ if data is None:
+ printer.info('stopping after one failure')
+ break
+ if div is not None and numfailed > 0:
+ printer.info(div)
+ numfailed += 1
+ handle_failure(failure, data)
+ handle_after()
+
+ printer.info('-------------------------')
+ logger.info(f'total failures: {numfailed}')
+ logger.info('done checking')
+
+ if numfailed > 0:
+ sys.exit(numfailed)
+
+
+def _cli_analyze(parser, **kwargs):
+ process_output = add_output_cli(parser)
+ process_files = add_files_cli(parser, **kwargs)
+ return [
+ process_output,
+ process_files,
+ ]
+
+
+# XXX Support filtering by kind.
+def cmd_analyze(filenames, *,
+ fmt=None,
+ iter_filenames=None,
+ verbosity=None,
+ _analyze=_analyze,
+ formats=FORMATS,
+ **kwargs
+ ):
+ verbosity = verbosity if verbosity is not None else 3
+
+ try:
+ do_fmt = formats[fmt]
+ except KeyError:
+ raise ValueError(f'unsupported fmt {fmt!r}')
+
+ filenames = filter_filenames(filenames, iter_filenames)
+ if verbosity == 2:
+ def iter_filenames(filenames=filenames):
+ marks = iter_marks()
+ for filename in filenames:
+ print(next(marks), end='')
+ yield filename
+ filenames = iter_filenames()
+ elif verbosity > 2:
+ def iter_filenames(filenames=filenames):
+ for filename in filenames:
+ print(f'<{filename}>')
+ yield filename
+ filenames = iter_filenames()
+
+ logger.info('analyzing...')
+ analyzed = _analyze(filenames, **kwargs)
+
+ for line in do_fmt(analyzed):
+ print(line)
+
+
+def _cli_data(parser, filenames=None, known=None):
+ ArgumentParser = type(parser)
+ common = ArgumentParser(add_help=False)
+ if filenames is None:
+ common.add_argument('filenames', metavar='FILE', nargs='+')
+
+ subs = parser.add_subparsers(dest='datacmd')
+
+ sub = subs.add_parser('show', parents=[common])
+ if known is None:
+ sub.add_argument('--known', required=True)
+
+ sub = subs.add_parser('dump')
+ if known is None:
+ sub.add_argument('--known')
+ sub.add_argument('--show', action='store_true')
+
+ sub = subs.add_parser('check')
+ if known is None:
+ sub.add_argument('--known', required=True)
+
+ return None
+
+
+def cmd_data(datacmd, filenames, known=None, *,
+ _analyze=_analyze,
+ formats=FORMATS,
+ extracolumns=None,
+ relroot=None,
+ **kwargs
+ ):
+ kwargs.pop('verbosity', None)
+ usestdout = kwargs.pop('show', None)
+ if datacmd == 'show':
+ do_fmt = formats['summary']
+ if isinstance(known, str):
+ known, _ = _datafiles.get_known(known, extracolumns, relroot)
+ for line in do_fmt(known):
+ print(line)
+ elif datacmd == 'dump':
+ analyzed = _analyze(filenames, **kwargs)
+ if known is None or usestdout:
+ outfile = io.StringIO()
+ _datafiles.write_known(analyzed, outfile, extracolumns,
+ relroot=relroot)
+ print(outfile.getvalue())
+ else:
+ _datafiles.write_known(analyzed, known, extracolumns,
+ relroot=relroot)
+ elif datacmd == 'check':
+ raise NotImplementedError(datacmd)
+ else:
+ raise ValueError(f'unsupported data command {datacmd!r}')
+
+
+COMMANDS = {
+ 'check': (
+ 'analyze and fail if the given C source/header files have any problems',
+ [_cli_check],
+ cmd_check,
+ ),
+ 'analyze': (
+ 'report on the state of the given C source/header files',
+ [_cli_analyze],
+ cmd_analyze,
+ ),
+ 'data': (
+ 'check/manage local data (e.g. knwon types, ignored vars, caches)',
+ [_cli_data],
+ cmd_data,
+ ),
+}
+
+
+#######################################
+# the script
+
+def parse_args(argv=sys.argv[1:], prog=sys.argv[0], *, subset=None):
+ import argparse
+ parser = argparse.ArgumentParser(
+ prog=prog or get_prog(),
+ )
+
+ processors = add_commands_cli(
+ parser,
+ commands={k: v[1] for k, v in COMMANDS.items()},
+ commonspecs=[
+ add_verbosity_cli,
+ add_traceback_cli,
+ ],
+ subset=subset,
+ )
+
+ args = parser.parse_args(argv)
+ ns = vars(args)
+
+ cmd = ns.pop('cmd')
+
+ verbosity, traceback_cm = process_args_by_key(
+ args,
+ processors[cmd],
+ ['verbosity', 'traceback_cm'],
+ )
+ # "verbosity" is sent to the commands, so we put it back.
+ args.verbosity = verbosity
+
+ return cmd, ns, verbosity, traceback_cm
+
+
+def main(cmd, cmd_kwargs):
+ try:
+ run_cmd = COMMANDS[cmd][0]
+ except KeyError:
+ raise ValueError(f'unsupported cmd {cmd!r}')
+ run_cmd(**cmd_kwargs)
+
+
+if __name__ == '__main__':
+ cmd, cmd_kwargs, verbosity, traceback_cm = parse_args()
+ configure_logger(verbosity)
+ with traceback_cm:
+ main(cmd, cmd_kwargs)
diff --git a/Tools/c-analyzer/c_analyzer/analyze.py b/Tools/c-analyzer/c_analyzer/analyze.py
new file mode 100644
index 0000000..d8ae915
--- /dev/null
+++ b/Tools/c-analyzer/c_analyzer/analyze.py
@@ -0,0 +1,307 @@
+from c_parser.info import (
+ KIND,
+ TypeDeclaration,
+ POTSType,
+ FuncPtr,
+ is_pots,
+ is_funcptr,
+)
+from .info import (
+ IGNORED,
+ UNKNOWN,
+ is_system_type,
+ SystemType,
+)
+
+
+def get_typespecs(typedecls):
+ typespecs = {}
+ for decl in typedecls:
+ if decl.shortkey not in typespecs:
+ typespecs[decl.shortkey] = [decl]
+ else:
+ typespecs[decl.shortkey].append(decl)
+ return typespecs
+
+
+def analyze_decl(decl, typespecs, knowntypespecs, types, knowntypes, *,
+ analyze_resolved=None):
+ resolved = resolve_decl(decl, typespecs, knowntypespecs, types)
+ if resolved is None:
+ # The decl is supposed to be skipped or ignored.
+ return None
+ if analyze_resolved is None:
+ return resolved, None
+ return analyze_resolved(resolved, decl, types, knowntypes)
+
+# This alias helps us avoid name collisions.
+_analyze_decl = analyze_decl
+
+
+def analyze_type_decls(types, analyze_decl, handle_unresolved=True):
+ unresolved = set(types)
+ while unresolved:
+ updated = []
+ for decl in unresolved:
+ resolved = analyze_decl(decl)
+ if resolved is None:
+ # The decl should be skipped or ignored.
+ types[decl] = IGNORED
+ updated.append(decl)
+ continue
+ typedeps, _ = resolved
+ if typedeps is None:
+ raise NotImplementedError(decl)
+ if UNKNOWN in typedeps:
+ # At least one dependency is unknown, so this decl
+ # is not resolvable.
+ types[decl] = UNKNOWN
+ updated.append(decl)
+ continue
+ if None in typedeps:
+ # XXX
+ # Handle direct recursive types first.
+ nonrecursive = 1
+ if decl.kind is KIND.STRUCT or decl.kind is KIND.UNION:
+ nonrecursive = 0
+ i = 0
+ for member, dep in zip(decl.members, typedeps):
+ if dep is None:
+ if member.vartype.typespec != decl.shortkey:
+ nonrecursive += 1
+ else:
+ typedeps[i] = decl
+ i += 1
+ if nonrecursive:
+ # We don't have all dependencies resolved yet.
+ continue
+ types[decl] = resolved
+ updated.append(decl)
+ if updated:
+ for decl in updated:
+ unresolved.remove(decl)
+ else:
+ # XXX
+ # Handle indirect recursive types.
+ ...
+ # We couldn't resolve the rest.
+ # Let the caller deal with it!
+ break
+ if unresolved and handle_unresolved:
+ if handle_unresolved is True:
+ handle_unresolved = _handle_unresolved
+ handle_unresolved(unresolved, types, analyze_decl)
+
+
+def resolve_decl(decl, typespecs, knowntypespecs, types):
+ if decl.kind is KIND.ENUM:
+ typedeps = []
+ else:
+ if decl.kind is KIND.VARIABLE:
+ vartypes = [decl.vartype]
+ elif decl.kind is KIND.FUNCTION:
+ vartypes = [decl.signature.returntype]
+ elif decl.kind is KIND.TYPEDEF:
+ vartypes = [decl.vartype]
+ elif decl.kind is KIND.STRUCT or decl.kind is KIND.UNION:
+ vartypes = [m.vartype for m in decl.members]
+ else:
+ # Skip this one!
+ return None
+
+ typedeps = []
+ for vartype in vartypes:
+ typespec = vartype.typespec
+ if is_pots(typespec):
+ typedecl = POTSType(typespec)
+ elif is_system_type(typespec):
+ typedecl = SystemType(typespec)
+ elif is_funcptr(vartype):
+ typedecl = FuncPtr(vartype)
+ else:
+ typedecl = find_typedecl(decl, typespec, typespecs)
+ if typedecl is None:
+ typedecl = find_typedecl(decl, typespec, knowntypespecs)
+ elif not isinstance(typedecl, TypeDeclaration):
+ raise NotImplementedError(repr(typedecl))
+ if typedecl is None:
+ # We couldn't find it!
+ typedecl = UNKNOWN
+ elif typedecl not in types:
+ # XXX How can this happen?
+ typedecl = UNKNOWN
+ elif types[typedecl] is UNKNOWN:
+ typedecl = UNKNOWN
+ elif types[typedecl] is IGNORED:
+ # We don't care if it didn't resolve.
+ pass
+ elif types[typedecl] is None:
+ # The typedecl for the typespec hasn't been resolved yet.
+ typedecl = None
+ typedeps.append(typedecl)
+ return typedeps
+
+
+def find_typedecl(decl, typespec, typespecs):
+ specdecls = typespecs.get(typespec)
+ if not specdecls:
+ return None
+
+ filename = decl.filename
+
+ if len(specdecls) == 1:
+ typedecl, = specdecls
+ if '-' in typespec and typedecl.filename != filename:
+ # Inlined types are always in the same file.
+ return None
+ return typedecl
+
+ # Decide which one to return.
+ candidates = []
+ samefile = None
+ for typedecl in specdecls:
+ type_filename = typedecl.filename
+ if type_filename == filename:
+ if samefile is not None:
+ # We expect type names to be unique in a file.
+ raise NotImplementedError((decl, samefile, typedecl))
+ samefile = typedecl
+ elif filename.endswith('.c') and not type_filename.endswith('.h'):
+ # If the decl is in a source file then we expect the
+ # type to be in the same file or in a header file.
+ continue
+ candidates.append(typedecl)
+ if not candidates:
+ return None
+ elif len(candidates) == 1:
+ winner, = candidates
+ # XXX Check for inline?
+ elif '-' in typespec:
+ # Inlined types are always in the same file.
+ winner = samefile
+ elif samefile is not None:
+ # Favor types in the same file.
+ winner = samefile
+ else:
+ # We don't know which to return.
+ raise NotImplementedError((decl, candidates))
+
+ return winner
+
+
+#############################
+# handling unresolved decls
+
+class Skipped(TypeDeclaration):
+ def __init__(self):
+ _file = _name = _data = _parent = None
+ super().__init__(_file, _name, _data, _parent, _shortkey='<skipped>')
+_SKIPPED = Skipped()
+del Skipped
+
+
+def _handle_unresolved(unresolved, types, analyze_decl):
+ #raise NotImplementedError(unresolved)
+
+ dump = True
+ dump = False
+ if dump:
+ print()
+ for decl in types: # Preserve the original order.
+ if decl not in unresolved:
+ assert types[decl] is not None, decl
+ if types[decl] in (UNKNOWN, IGNORED):
+ unresolved.add(decl)
+ if dump:
+ _dump_unresolved(decl, types, analyze_decl)
+ print()
+ else:
+ assert types[decl][0] is not None, (decl, types[decl])
+ assert None not in types[decl][0], (decl, types[decl])
+ else:
+ assert types[decl] is None
+ if dump:
+ _dump_unresolved(decl, types, analyze_decl)
+ print()
+ #raise NotImplementedError
+
+ for decl in unresolved:
+ types[decl] = ([_SKIPPED], None)
+
+ for decl in types:
+ assert types[decl]
+
+
+def _dump_unresolved(decl, types, analyze_decl):
+ if isinstance(decl, str):
+ typespec = decl
+ decl, = (d for d in types if d.shortkey == typespec)
+ elif type(decl) is tuple:
+ filename, typespec = decl
+ if '-' in typespec:
+ found = [d for d in types
+ if d.shortkey == typespec and d.filename == filename]
+ #if not found:
+ # raise NotImplementedError(decl)
+ decl, = found
+ else:
+ found = [d for d in types if d.shortkey == typespec]
+ if not found:
+ print(f'*** {typespec} ???')
+ return
+ #raise NotImplementedError(decl)
+ else:
+ decl, = found
+ resolved = analyze_decl(decl)
+ if resolved:
+ typedeps, _ = resolved or (None, None)
+
+ if decl.kind is KIND.STRUCT or decl.kind is KIND.UNION:
+ print(f'*** {decl.shortkey} {decl.filename}')
+ for member, mtype in zip(decl.members, typedeps):
+ typespec = member.vartype.typespec
+ if typespec == decl.shortkey:
+ print(f' ~~~~: {typespec:20} - {member!r}')
+ continue
+ status = None
+ if is_pots(typespec):
+ mtype = typespec
+ status = 'okay'
+ elif is_system_type(typespec):
+ mtype = typespec
+ status = 'okay'
+ elif mtype is None:
+ if '-' in member.vartype.typespec:
+ mtype, = [d for d in types
+ if d.shortkey == member.vartype.typespec
+ and d.filename == decl.filename]
+ else:
+ found = [d for d in types
+ if d.shortkey == typespec]
+ if not found:
+ print(f' ???: {typespec:20}')
+ continue
+ mtype, = found
+ if status is None:
+ status = 'okay' if types.get(mtype) else 'oops'
+ if mtype is _SKIPPED:
+ status = 'okay'
+ mtype = '<skipped>'
+ elif isinstance(mtype, FuncPtr):
+ status = 'okay'
+ mtype = str(mtype.vartype)
+ elif not isinstance(mtype, str):
+ if hasattr(mtype, 'vartype'):
+ if is_funcptr(mtype.vartype):
+ status = 'okay'
+ mtype = str(mtype).rpartition('(')[0].rstrip()
+ status = ' okay' if status == 'okay' else f'--> {status}'
+ print(f' {status}: {typespec:20} - {member!r} ({mtype})')
+ else:
+ print(f'*** {decl} ({decl.vartype!r})')
+ if decl.vartype.typespec.startswith('struct ') or is_funcptr(decl):
+ _dump_unresolved(
+ (decl.filename, decl.vartype.typespec),
+ types,
+ analyze_decl,
+ )
diff --git a/Tools/c-analyzer/c_analyzer/common/__init__.py b/Tools/c-analyzer/c_analyzer/common/__init__.py
deleted file mode 100644
index e69de29..0000000
--- a/Tools/c-analyzer/c_analyzer/common/__init__.py
+++ /dev/null
diff --git a/Tools/c-analyzer/c_analyzer/common/files.py b/Tools/c-analyzer/c_analyzer/common/files.py
deleted file mode 100644
index a8a0447..0000000
--- a/Tools/c-analyzer/c_analyzer/common/files.py
+++ /dev/null
@@ -1,124 +0,0 @@
-import glob
-import os
-import os.path
-
-# XXX need tests:
-# * walk_tree()
-# * glob_tree()
-# * iter_files_by_suffix()
-
-
-C_SOURCE_SUFFIXES = ('.c', '.h')
-
-
-def _walk_tree(root, *,
- _walk=os.walk,
- ):
- # A wrapper around os.walk that resolves the filenames.
- for parent, _, names in _walk(root):
- for name in names:
- yield os.path.join(parent, name)
-
-
-def walk_tree(root, *,
- suffix=None,
- walk=_walk_tree,
- ):
- """Yield each file in the tree under the given directory name.
-
- If "suffix" is provided then only files with that suffix will
- be included.
- """
- if suffix and not isinstance(suffix, str):
- raise ValueError('suffix must be a string')
-
- for filename in walk(root):
- if suffix and not filename.endswith(suffix):
- continue
- yield filename
-
-
-def glob_tree(root, *,
- suffix=None,
- _glob=glob.iglob,
- _escape=glob.escape,
- _join=os.path.join,
- ):
- """Yield each file in the tree under the given directory name.
-
- If "suffix" is provided then only files with that suffix will
- be included.
- """
- suffix = suffix or ''
- if not isinstance(suffix, str):
- raise ValueError('suffix must be a string')
-
- for filename in _glob(_join(_escape(root), f'*{suffix}')):
- yield filename
- for filename in _glob(_join(_escape(root), f'**/*{suffix}')):
- yield filename
-
-
-def iter_files(root, suffix=None, relparent=None, *,
- get_files=None,
- _glob=glob_tree,
- _walk=walk_tree,
- ):
- """Yield each file in the tree under the given directory name.
-
- If "root" is a non-string iterable then do the same for each of
- those trees.
-
- If "suffix" is provided then only files with that suffix will
- be included.
-
- if "relparent" is provided then it is used to resolve each
- filename as a relative path.
- """
- if get_files is None:
- get_files = os.walk
- if not isinstance(root, str):
- roots = root
- for root in roots:
- yield from iter_files(root, suffix, relparent,
- get_files=get_files,
- _glob=_glob, _walk=_walk)
- return
-
- # Use the right "walk" function.
- if get_files in (glob.glob, glob.iglob, glob_tree):
- get_files = _glob
- else:
- _files = _walk_tree if get_files in (os.walk, walk_tree) else get_files
- get_files = (lambda *a, **k: _walk(*a, walk=_files, **k))
-
- # Handle a single suffix.
- if suffix and not isinstance(suffix, str):
- filenames = get_files(root)
- suffix = tuple(suffix)
- else:
- filenames = get_files(root, suffix=suffix)
- suffix = None
-
- for filename in filenames:
- if suffix and not isinstance(suffix, str): # multiple suffixes
- if not filename.endswith(suffix):
- continue
- if relparent:
- filename = os.path.relpath(filename, relparent)
- yield filename
-
-
-def iter_files_by_suffix(root, suffixes, relparent=None, *,
- walk=walk_tree,
- _iter_files=iter_files,
- ):
- """Yield each file in the tree that has the given suffixes.
-
- Unlike iter_files(), the results are in the original suffix order.
- """
- if isinstance(suffixes, str):
- suffixes = [suffixes]
- # XXX Ignore repeated suffixes?
- for suffix in suffixes:
- yield from _iter_files(root, suffix, relparent)
diff --git a/Tools/c-analyzer/c_analyzer/common/info.py b/Tools/c-analyzer/c_analyzer/common/info.py
deleted file mode 100644
index 1a853a4..0000000
--- a/Tools/c-analyzer/c_analyzer/common/info.py
+++ /dev/null
@@ -1,138 +0,0 @@
-from collections import namedtuple
-import re
-
-from .util import classonly, _NTBase
-
-# XXX need tests:
-# * ID.match()
-
-
-UNKNOWN = '???'
-
-# Does not start with digit and contains at least one letter.
-NAME_RE = re.compile(r'(?!\d)(?=.*?[A-Za-z])\w+', re.ASCII)
-
-
-class ID(_NTBase, namedtuple('ID', 'filename funcname name')):
- """A unique ID for a single symbol or declaration."""
-
- __slots__ = ()
- # XXX Add optional conditions (tuple of strings) field.
- #conditions = Slot()
-
- @classonly
- def from_raw(cls, raw):
- if not raw:
- return None
- if isinstance(raw, str):
- return cls(None, None, raw)
- try:
- name, = raw
- filename = None
- except ValueError:
- try:
- filename, name = raw
- except ValueError:
- return super().from_raw(raw)
- return cls(filename, None, name)
-
- def __new__(cls, filename, funcname, name):
- self = super().__new__(
- cls,
- filename=str(filename) if filename else None,
- funcname=str(funcname) if funcname else None,
- name=str(name) if name else None,
- )
- #cls.conditions.set(self, tuple(str(s) if s else None
- # for s in conditions or ()))
- return self
-
- def validate(self):
- """Fail if the object is invalid (i.e. init with bad data)."""
- if not self.name:
- raise TypeError('missing name')
- if not NAME_RE.fullmatch(self.name):
- raise ValueError(
- f'name must be an identifier, got {self.name!r}')
-
- # Symbols from a binary might not have filename/funcname info.
-
- if self.funcname:
- if not self.filename:
- raise TypeError('missing filename')
- if not NAME_RE.fullmatch(self.funcname) and self.funcname != UNKNOWN:
- raise ValueError(
- f'name must be an identifier, got {self.funcname!r}')
-
- # XXX Require the filename (at least UNKONWN)?
- # XXX Check the filename?
-
- @property
- def islocal(self):
- return self.funcname is not None
-
- def match(self, other, *,
- match_files=(lambda f1, f2: f1 == f2),
- ):
- """Return True if the two match.
-
- At least one of the two must be completely valid (no UNKNOWN
- anywhere). Otherwise False is returned. The remaining one
- *may* have UNKNOWN for both funcname and filename. It must
- have a valid name though.
-
- The caller is responsible for knowing which of the two is valid
- (and which to use if both are valid).
- """
- # First check the name.
- if self.name is None:
- return False
- if other.name != self.name:
- return False
-
- # Then check the filename.
- if self.filename is None:
- return False
- if other.filename is None:
- return False
- if self.filename == UNKNOWN:
- # "other" must be the valid one.
- if other.funcname == UNKNOWN:
- return False
- elif self.funcname != UNKNOWN:
- # XXX Try matching funcname even though we don't
- # know the filename?
- raise NotImplementedError
- else:
- return True
- elif other.filename == UNKNOWN:
- # "self" must be the valid one.
- if self.funcname == UNKNOWN:
- return False
- elif other.funcname != UNKNOWN:
- # XXX Try matching funcname even though we don't
- # know the filename?
- raise NotImplementedError
- else:
- return True
- elif not match_files(self.filename, other.filename):
- return False
-
- # Finally, check the funcname.
- if self.funcname == UNKNOWN:
- # "other" must be the valid one.
- if other.funcname == UNKNOWN:
- return False
- else:
- return other.funcname is not None
- elif other.funcname == UNKNOWN:
- # "self" must be the valid one.
- if self.funcname == UNKNOWN:
- return False
- else:
- return self.funcname is not None
- elif self.funcname == other.funcname:
- # Both are valid.
- return True
-
- return False
diff --git a/Tools/c-analyzer/c_analyzer/common/show.py b/Tools/c-analyzer/c_analyzer/common/show.py
deleted file mode 100644
index 5f3cb1c..0000000
--- a/Tools/c-analyzer/c_analyzer/common/show.py
+++ /dev/null
@@ -1,11 +0,0 @@
-
-def basic(variables, *,
- _print=print):
- """Print each row simply."""
- for var in variables:
- if var.funcname:
- line = f'{var.filename}:{var.funcname}():{var.name}'
- else:
- line = f'{var.filename}:{var.name}'
- line = f'{line:<64} {var.vartype}'
- _print(line)
diff --git a/Tools/c-analyzer/c_analyzer/common/util.py b/Tools/c-analyzer/c_analyzer/common/util.py
deleted file mode 100644
index 43d0bb6..0000000
--- a/Tools/c-analyzer/c_analyzer/common/util.py
+++ /dev/null
@@ -1,243 +0,0 @@
-import csv
-import subprocess
-
-
-_NOT_SET = object()
-
-
-def run_cmd(argv, **kwargs):
- proc = subprocess.run(
- argv,
- #capture_output=True,
- #stderr=subprocess.STDOUT,
- stdout=subprocess.PIPE,
- text=True,
- check=True,
- **kwargs
- )
- return proc.stdout
-
-
-def read_tsv(infile, header, *,
- _open=open,
- _get_reader=csv.reader,
- ):
- """Yield each row of the given TSV (tab-separated) file."""
- if isinstance(infile, str):
- with _open(infile, newline='') as infile:
- yield from read_tsv(infile, header,
- _open=_open,
- _get_reader=_get_reader,
- )
- return
- lines = iter(infile)
-
- # Validate the header.
- try:
- actualheader = next(lines).strip()
- except StopIteration:
- actualheader = ''
- if actualheader != header:
- raise ValueError(f'bad header {actualheader!r}')
-
- for row in _get_reader(lines, delimiter='\t'):
- yield tuple(v.strip() for v in row)
-
-
-def write_tsv(outfile, header, rows, *,
- _open=open,
- _get_writer=csv.writer,
- ):
- """Write each of the rows to the given TSV (tab-separated) file."""
- if isinstance(outfile, str):
- with _open(outfile, 'w', newline='') as outfile:
- return write_tsv(outfile, header, rows,
- _open=_open,
- _get_writer=_get_writer,
- )
-
- if isinstance(header, str):
- header = header.split('\t')
- writer = _get_writer(outfile, delimiter='\t')
- writer.writerow(header)
- for row in rows:
- writer.writerow('' if v is None else str(v)
- for v in row)
-
-
-class Slot:
- """A descriptor that provides a slot.
-
- This is useful for types that can't have slots via __slots__,
- e.g. tuple subclasses.
- """
-
- __slots__ = ('initial', 'default', 'readonly', 'instances', 'name')
-
- def __init__(self, initial=_NOT_SET, *,
- default=_NOT_SET,
- readonly=False,
- ):
- self.initial = initial
- self.default = default
- self.readonly = readonly
-
- # The instance cache is not inherently tied to the normal
- # lifetime of the instances. So must do something in order to
- # avoid keeping the instances alive by holding a reference here.
- # Ideally we would use weakref.WeakValueDictionary to do this.
- # However, most builtin types do not support weakrefs. So
- # instead we monkey-patch __del__ on the attached class to clear
- # the instance.
- self.instances = {}
- self.name = None
-
- def __set_name__(self, cls, name):
- if self.name is not None:
- raise TypeError('already used')
- self.name = name
- try:
- slotnames = cls.__slot_names__
- except AttributeError:
- slotnames = cls.__slot_names__ = []
- slotnames.append(name)
- self._ensure___del__(cls, slotnames)
-
- def __get__(self, obj, cls):
- if obj is None: # called on the class
- return self
- try:
- value = self.instances[id(obj)]
- except KeyError:
- if self.initial is _NOT_SET:
- value = self.default
- else:
- value = self.initial
- self.instances[id(obj)] = value
- if value is _NOT_SET:
- raise AttributeError(self.name)
- # XXX Optionally make a copy?
- return value
-
- def __set__(self, obj, value):
- if self.readonly:
- raise AttributeError(f'{self.name} is readonly')
- # XXX Optionally coerce?
- self.instances[id(obj)] = value
-
- def __delete__(self, obj):
- if self.readonly:
- raise AttributeError(f'{self.name} is readonly')
- self.instances[id(obj)] = self.default # XXX refleak?
-
- def _ensure___del__(self, cls, slotnames): # See the comment in __init__().
- try:
- old___del__ = cls.__del__
- except AttributeError:
- old___del__ = (lambda s: None)
- else:
- if getattr(old___del__, '_slotted', False):
- return
-
- def __del__(_self):
- for name in slotnames:
- delattr(_self, name)
- old___del__(_self)
- __del__._slotted = True
- cls.__del__ = __del__
-
- def set(self, obj, value):
- """Update the cached value for an object.
-
- This works even if the descriptor is read-only. This is
- particularly useful when initializing the object (e.g. in
- its __new__ or __init__).
- """
- self.instances[id(obj)] = value
-
-
-class classonly:
- """A non-data descriptor that makes a value only visible on the class.
-
- This is like the "classmethod" builtin, but does not show up on
- instances of the class. It may be used as a decorator.
- """
-
- def __init__(self, value):
- self.value = value
- self.getter = classmethod(value).__get__
- self.name = None
-
- def __set_name__(self, cls, name):
- if self.name is not None:
- raise TypeError('already used')
- self.name = name
-
- def __get__(self, obj, cls):
- if obj is not None:
- raise AttributeError(self.name)
- # called on the class
- return self.getter(None, cls)
-
-
-class _NTBase:
-
- __slots__ = ()
-
- @classonly
- def from_raw(cls, raw):
- if not raw:
- return None
- elif isinstance(raw, cls):
- return raw
- elif isinstance(raw, str):
- return cls.from_string(raw)
- else:
- if hasattr(raw, 'items'):
- return cls(**raw)
- try:
- args = tuple(raw)
- except TypeError:
- pass
- else:
- return cls(*args)
- raise NotImplementedError
-
- @classonly
- def from_string(cls, value):
- """Return a new instance based on the given string."""
- raise NotImplementedError
-
- @classmethod
- def _make(cls, iterable): # The default _make() is not subclass-friendly.
- return cls.__new__(cls, *iterable)
-
- # XXX Always validate?
- #def __init__(self, *args, **kwargs):
- # self.validate()
-
- # XXX The default __repr__() is not subclass-friendly (where the name changes).
- #def __repr__(self):
- # _, _, sig = super().__repr__().partition('(')
- # return f'{self.__class__.__name__}({sig}'
-
- # To make sorting work with None:
- def __lt__(self, other):
- try:
- return super().__lt__(other)
- except TypeError:
- if None in self:
- return True
- elif None in other:
- return False
- else:
- raise
-
- def validate(self):
- return
-
- # XXX Always validate?
- #def _replace(self, **kwargs):
- # obj = super()._replace(**kwargs)
- # obj.validate()
- # return obj
diff --git a/Tools/c-analyzer/c_analyzer/datafiles.py b/Tools/c-analyzer/c_analyzer/datafiles.py
new file mode 100644
index 0000000..0de438c
--- /dev/null
+++ b/Tools/c-analyzer/c_analyzer/datafiles.py
@@ -0,0 +1,109 @@
+import c_common.tables as _tables
+import c_parser.info as _info
+import c_parser.datafiles as _parser
+from . import analyze as _analyze
+
+
+#############################
+# "known" decls
+
+EXTRA_COLUMNS = [
+ #'typedecl',
+]
+
+
+def analyze_known(known, *,
+ analyze_resolved=None,
+ handle_unresolved=True,
+ ):
+ knowntypes = knowntypespecs = {}
+ collated = _info.collate_by_kind_group(known)
+ types = {decl: None for decl in collated['type']}
+ typespecs = _analyze.get_typespecs(types)
+ def analyze_decl(decl):
+ return _analyze.analyze_decl(
+ decl,
+ typespecs,
+ knowntypespecs,
+ types,
+ knowntypes,
+ analyze_resolved=analyze_resolved,
+ )
+ _analyze.analyze_type_decls(types, analyze_decl, handle_unresolved)
+ return types, typespecs
+
+
+def get_known(known, extracolumns=None, *,
+ analyze_resolved=None,
+ handle_unresolved=True,
+ relroot=None,
+ ):
+ if isinstance(known, str):
+ known = read_known(known, extracolumns, relroot)
+ return analyze_known(
+ known,
+ handle_unresolved=handle_unresolved,
+ analyze_resolved=analyze_resolved,
+ )
+
+
+def read_known(infile, extracolumns=None, relroot=None):
+ extracolumns = EXTRA_COLUMNS + (
+ list(extracolumns) if extracolumns else []
+ )
+ known = {}
+ for decl, extra in _parser.iter_decls_tsv(infile, extracolumns, relroot):
+ known[decl] = extra
+ return known
+
+
+def write_known(rows, outfile, extracolumns=None, *,
+ relroot=None,
+ backup=True,
+ ):
+ extracolumns = EXTRA_COLUMNS + (
+ list(extracolumns) if extracolumns else []
+ )
+ _parser.write_decls_tsv(
+ rows,
+ outfile,
+ extracolumns,
+ relroot=relroot,
+ backup=backup,
+ )
+
+
+#############################
+# ignored vars
+
+IGNORED_COLUMNS = [
+ 'filename',
+ 'funcname',
+ 'name',
+ 'reason',
+]
+IGNORED_HEADER = '\t'.join(IGNORED_COLUMNS)
+
+
+def read_ignored(infile):
+ return dict(_iter_ignored(infile))
+
+
+def _iter_ignored(infile):
+ for row in _tables.read_table(infile, IGNORED_HEADER, sep='\t'):
+ *varidinfo, reason = row
+ varid = _info.DeclID.from_row(varidinfo)
+ yield varid, reason
+
+
+def write_ignored(variables, outfile):
+ raise NotImplementedError
+ reason = '???'
+ #if not isinstance(varid, DeclID):
+ # varid = getattr(varid, 'parsed', varid).id
+ _tables.write_table(
+ outfile,
+ IGNORED_HEADER,
+ sep='\t',
+ rows=(r.render_rowdata() + (reason,) for r in decls),
+ )
diff --git a/Tools/c-analyzer/c_analyzer/info.py b/Tools/c-analyzer/c_analyzer/info.py
new file mode 100644
index 0000000..23d7761
--- /dev/null
+++ b/Tools/c-analyzer/c_analyzer/info.py
@@ -0,0 +1,353 @@
+from collections import namedtuple
+
+from c_common.clsutil import classonly
+import c_common.misc as _misc
+from c_parser.info import (
+ KIND,
+ HighlevelParsedItem,
+ Declaration,
+ TypeDeclaration,
+ is_type_decl,
+ is_process_global,
+)
+
+
+IGNORED = _misc.Labeled('IGNORED')
+UNKNOWN = _misc.Labeled('UNKNOWN')
+
+
+# XXX Use known.tsv for these?
+SYSTEM_TYPES = {
+ 'int8_t',
+ 'uint8_t',
+ 'int16_t',
+ 'uint16_t',
+ 'int32_t',
+ 'uint32_t',
+ 'int64_t',
+ 'uint64_t',
+ 'size_t',
+ 'ssize_t',
+ 'intptr_t',
+ 'uintptr_t',
+ 'wchar_t',
+ '',
+ # OS-specific
+ 'pthread_cond_t',
+ 'pthread_mutex_t',
+ 'pthread_key_t',
+ 'atomic_int',
+ 'atomic_uintptr_t',
+ '',
+ # lib-specific
+ 'WINDOW', # curses
+ 'XML_LChar',
+ 'XML_Size',
+ 'XML_Parser',
+ 'enum XML_Error',
+ 'enum XML_Status',
+ '',
+}
+
+
+def is_system_type(typespec):
+ return typespec in SYSTEM_TYPES
+
+
+class SystemType(TypeDeclaration):
+
+ def __init__(self, name):
+ super().__init__(None, name, None, None, _shortkey=name)
+
+
+class Analyzed:
+ _locked = False
+
+ @classonly
+ def is_target(cls, raw):
+ if isinstance(raw, HighlevelParsedItem):
+ return True
+ else:
+ return False
+
+ @classonly
+ def from_raw(cls, raw, **extra):
+ if isinstance(raw, cls):
+ if extra:
+ # XXX ?
+ raise NotImplementedError((raw, extra))
+ #return cls(raw.item, raw.typedecl, **raw._extra, **extra)
+ else:
+ return info
+ elif cls.is_target(raw):
+ return cls(raw, **extra)
+ else:
+ raise NotImplementedError((raw, extra))
+
+ @classonly
+ def from_resolved(cls, item, resolved, **extra):
+ if isinstance(resolved, TypeDeclaration):
+ return cls(item, typedecl=resolved, **extra)
+ else:
+ typedeps, extra = cls._parse_raw_resolved(item, resolved, extra)
+ if item.kind is KIND.ENUM:
+ if typedeps:
+ raise NotImplementedError((item, resolved, extra))
+ elif not typedeps:
+ raise NotImplementedError((item, resolved, extra))
+ return cls(item, typedeps, **extra or {})
+
+ @classonly
+ def _parse_raw_resolved(cls, item, resolved, extra_extra):
+ if resolved in (UNKNOWN, IGNORED):
+ return resolved, None
+ try:
+ typedeps, extra = resolved
+ except (TypeError, ValueError):
+ typedeps = extra = None
+ if extra:
+ # The resolved data takes precedence.
+ extra = dict(extra_extra, **extra)
+ if isinstance(typedeps, TypeDeclaration):
+ return typedeps, extra
+ elif typedeps in (None, UNKNOWN):
+ # It is still effectively unresolved.
+ return UNKNOWN, extra
+ elif None in typedeps or UNKNOWN in typedeps:
+ # It is still effectively unresolved.
+ return typedeps, extra
+ elif any(not isinstance(td, TypeDeclaration) for td in typedeps):
+ raise NotImplementedError((item, typedeps, extra))
+ return typedeps, extra
+
+ def __init__(self, item, typedecl=None, **extra):
+ assert item is not None
+ self.item = item
+ if typedecl in (UNKNOWN, IGNORED):
+ pass
+ elif item.kind is KIND.STRUCT or item.kind is KIND.UNION:
+ if isinstance(typedecl, TypeDeclaration):
+ raise NotImplementedError(item, typedecl)
+ elif typedecl is None:
+ typedecl = UNKNOWN
+ else:
+ typedecl = [UNKNOWN if d is None else d for d in typedecl]
+ elif typedecl is None:
+ typedecl = UNKNOWN
+ elif typedecl and not isinstance(typedecl, TypeDeclaration):
+ # All the other decls have a single type decl.
+ typedecl, = typedecl
+ if typedecl is None:
+ typedecl = UNKNOWN
+ self.typedecl = typedecl
+ self._extra = extra
+ self._locked = True
+
+ self._validate()
+
+ def _validate(self):
+ item = self.item
+ extra = self._extra
+ # Check item.
+ if not isinstance(item, HighlevelParsedItem):
+ raise ValueError(f'"item" must be a high-level parsed item, got {item!r}')
+ # Check extra.
+ for key, value in extra.items():
+ if key.startswith('_'):
+ raise ValueError(f'extra items starting with {"_"!r} not allowed, got {extra!r}')
+ if hasattr(item, key) and not callable(getattr(item, key)):
+ raise ValueError(f'extra cannot override item, got {value!r} for key {key!r}')
+
+ def __repr__(self):
+ kwargs = [
+ f'item={self.item!r}',
+ f'typedecl={self.typedecl!r}',
+ *(f'{k}={v!r}' for k, v in self._extra.items())
+ ]
+ return f'{type(self).__name__}({", ".join(kwargs)})'
+
+ def __str__(self):
+ try:
+ return self._str
+ except AttributeError:
+ self._str, = self.render('line')
+ return self._str
+
+ def __hash__(self):
+ return hash(self.item)
+
+ def __eq__(self, other):
+ if isinstance(other, Analyzed):
+ return self.item == other.item
+ elif isinstance(other, HighlevelParsedItem):
+ return self.item == other
+ elif type(other) is tuple:
+ return self.item == other
+ else:
+ return NotImplemented
+
+ def __gt__(self, other):
+ if isinstance(other, Analyzed):
+ return self.item > other.item
+ elif isinstance(other, HighlevelParsedItem):
+ return self.item > other
+ elif type(other) is tuple:
+ return self.item > other
+ else:
+ return NotImplemented
+
+ def __dir__(self):
+ names = set(super().__dir__())
+ names.update(self._extra)
+ names.remove('_locked')
+ return sorted(names)
+
+ def __getattr__(self, name):
+ if name.startswith('_'):
+ raise AttributeError(name)
+ # The item takes precedence over the extra data (except if callable).
+ try:
+ value = getattr(self.item, name)
+ if callable(value):
+ raise AttributeError(name)
+ except AttributeError:
+ try:
+ value = self._extra[name]
+ except KeyError:
+ pass
+ else:
+ # Speed things up the next time.
+ self.__dict__[name] = value
+ return value
+ raise # re-raise
+ else:
+ return value
+
+ def __setattr__(self, name, value):
+ if self._locked and name != '_str':
+ raise AttributeError(f'readonly ({name})')
+ super().__setattr__(name, value)
+
+ def __delattr__(self, name):
+ if self._locked:
+ raise AttributeError(f'readonly ({name})')
+ super().__delattr__(name)
+
+ @property
+ def decl(self):
+ if not isinstance(self.item, Declaration):
+ raise AttributeError('decl')
+ return self.item
+
+ @property
+ def signature(self):
+ # XXX vartype...
+ ...
+
+ @property
+ def istype(self):
+ return is_type_decl(self.item.kind)
+
+ @property
+ def is_known(self):
+ if self.typedecl in (UNKNOWN, IGNORED):
+ return False
+ elif isinstance(self.typedecl, TypeDeclaration):
+ return True
+ else:
+ return UNKNOWN not in self.typedecl
+
+ def fix_filename(self, relroot):
+ self.item.fix_filename(relroot)
+
+ def as_rowdata(self, columns=None):
+ # XXX finsih!
+ return self.item.as_rowdata(columns)
+
+ def render_rowdata(self, columns=None):
+ # XXX finsih!
+ return self.item.render_rowdata(columns)
+
+ def render(self, fmt='line', *, itemonly=False):
+ if fmt == 'raw':
+ yield repr(self)
+ return
+ rendered = self.item.render(fmt)
+ if itemonly or not self._extra:
+ yield from rendered
+ return
+ extra = self._render_extra(fmt)
+ if not extra:
+ yield from rendered
+ elif fmt in ('brief', 'line'):
+ rendered, = rendered
+ extra, = extra
+ yield f'{rendered}\t{extra}'
+ elif fmt == 'summary':
+ raise NotImplementedError(fmt)
+ elif fmt == 'full':
+ yield from rendered
+ for line in extra:
+ yield f'\t{line}'
+ else:
+ raise NotImplementedError(fmt)
+
+ def _render_extra(self, fmt):
+ if fmt in ('brief', 'line'):
+ yield str(self._extra)
+ else:
+ raise NotImplementedError(fmt)
+
+
+class Analysis:
+
+ _item_class = Analyzed
+
+ @classonly
+ def build_item(cls, info, resolved=None, **extra):
+ if resolved is None:
+ return cls._item_class.from_raw(info, **extra)
+ else:
+ return cls._item_class.from_resolved(info, resolved, **extra)
+
+ @classmethod
+ def from_results(cls, results):
+ self = cls()
+ for info, resolved in results:
+ self._add_result(info, resolved)
+ return self
+
+ def __init__(self, items=None):
+ self._analyzed = {type(self).build_item(item): None
+ for item in items or ()}
+
+ def __repr__(self):
+ return f'{type(self).__name__}({list(self._analyzed.keys())})'
+
+ def __iter__(self):
+ #yield from self.types
+ #yield from self.functions
+ #yield from self.variables
+ yield from self._analyzed
+
+ def __len__(self):
+ return len(self._analyzed)
+
+ def __getitem__(self, key):
+ if type(key) is int:
+ for i, val in enumerate(self._analyzed):
+ if i == key:
+ return val
+ else:
+ raise IndexError(key)
+ else:
+ return self._analyzed[key]
+
+ def fix_filenames(self, relroot):
+ for item in self._analyzed:
+ item.fix_filename(relroot)
+
+ def _add_result(self, info, resolved):
+ analyzed = type(self).build_item(info, resolved)
+ self._analyzed[analyzed] = None
+ return analyzed
diff --git a/Tools/c-analyzer/c_analyzer/parser/__init__.py b/Tools/c-analyzer/c_analyzer/parser/__init__.py
deleted file mode 100644
index e69de29..0000000
--- a/Tools/c-analyzer/c_analyzer/parser/__init__.py
+++ /dev/null
diff --git a/Tools/c-analyzer/c_analyzer/parser/declarations.py b/Tools/c-analyzer/c_analyzer/parser/declarations.py
deleted file mode 100644
index f37072c..0000000
--- a/Tools/c-analyzer/c_analyzer/parser/declarations.py
+++ /dev/null
@@ -1,339 +0,0 @@
-import re
-import shlex
-import subprocess
-
-from ..common.info import UNKNOWN
-
-from . import source
-
-
-IDENTIFIER = r'(?:[a-zA-z]|_+[a-zA-Z0-9]\w*)'
-
-TYPE_QUAL = r'(?:const|volatile)'
-
-VAR_TYPE_SPEC = r'''(?:
- void |
- (?:
- (?:(?:un)?signed\s+)?
- (?:
- char |
- short |
- int |
- long |
- long\s+int |
- long\s+long
- ) |
- ) |
- float |
- double |
- {IDENTIFIER} |
- (?:struct|union)\s+{IDENTIFIER}
- )'''
-
-POINTER = rf'''(?:
- (?:\s+const)?\s*[*]
- )'''
-
-#STRUCT = r'''(?:
-# (?:struct|(struct\s+%s))\s*[{]
-# [^}]*
-# [}]
-# )''' % (IDENTIFIER)
-#UNION = r'''(?:
-# (?:union|(union\s+%s))\s*[{]
-# [^}]*
-# [}]
-# )''' % (IDENTIFIER)
-#DECL_SPEC = rf'''(?:
-# ({VAR_TYPE_SPEC}) |
-# ({STRUCT}) |
-# ({UNION})
-# )'''
-
-FUNC_START = rf'''(?:
- (?:
- (?:
- extern |
- static |
- static\s+inline
- )\s+
- )?
- #(?:const\s+)?
- {VAR_TYPE_SPEC}
- )'''
-#GLOBAL_VAR_START = rf'''(?:
-# (?:
-# (?:
-# extern |
-# static
-# )\s+
-# )?
-# (?:
-# {TYPE_QUAL}
-# (?:\s+{TYPE_QUAL})?
-# )?\s+
-# {VAR_TYPE_SPEC}
-# )'''
-GLOBAL_DECL_START_RE = re.compile(rf'''
- ^
- (?:
- ({FUNC_START})
- )
- ''', re.VERBOSE)
-
-LOCAL_VAR_START = rf'''(?:
- (?:
- (?:
- register |
- static
- )\s+
- )?
- (?:
- (?:
- {TYPE_QUAL}
- (?:\s+{TYPE_QUAL})?
- )\s+
- )?
- {VAR_TYPE_SPEC}
- {POINTER}?
- )'''
-LOCAL_STMT_START_RE = re.compile(rf'''
- ^
- (?:
- ({LOCAL_VAR_START})
- )
- ''', re.VERBOSE)
-
-
-def iter_global_declarations(lines):
- """Yield (decl, body) for each global declaration in the given lines.
-
- For function definitions the header is reduced to one line and
- the body is provided as-is. For other compound declarations (e.g.
- struct) the entire declaration is reduced to one line and "body"
- is None. Likewise for simple declarations (e.g. variables).
-
- Declarations inside function bodies are ignored, though their text
- is provided in the function body.
- """
- # XXX Bail out upon bogus syntax.
- lines = source.iter_clean_lines(lines)
- for line in lines:
- if not GLOBAL_DECL_START_RE.match(line):
- continue
- # We only need functions here, since we only need locals for now.
- if line.endswith(';'):
- continue
- if line.endswith('{') and '(' not in line:
- continue
-
- # Capture the function.
- # (assume no func is a one-liner)
- decl = line
- while '{' not in line: # assume no inline structs, etc.
- try:
- line = next(lines)
- except StopIteration:
- return
- decl += ' ' + line
-
- body, end = _extract_block(lines)
- if end is None:
- return
- assert end == '}'
- yield (f'{decl}\n{body}\n{end}', body)
-
-
-def iter_local_statements(lines):
- """Yield (lines, blocks) for each statement in the given lines.
-
- For simple statements, "blocks" is None and the statement is reduced
- to a single line. For compound statements, "blocks" is a pair of
- (header, body) for each block in the statement. The headers are
- reduced to a single line each, but the bpdies are provided as-is.
- """
- # XXX Bail out upon bogus syntax.
- lines = source.iter_clean_lines(lines)
- for line in lines:
- if not LOCAL_STMT_START_RE.match(line):
- continue
-
- stmt = line
- blocks = None
- if not line.endswith(';'):
- # XXX Support compound & multiline simple statements.
- #blocks = []
- continue
-
- yield (stmt, blocks)
-
-
-def _extract_block(lines):
- end = None
- depth = 1
- body = []
- for line in lines:
- depth += line.count('{') - line.count('}')
- if depth == 0:
- end = line
- break
- body.append(line)
- return '\n'.join(body), end
-
-
-def parse_func(stmt, body):
- """Return (name, signature) for the given function definition."""
- header, _, end = stmt.partition(body)
- assert end.strip() == '}'
- assert header.strip().endswith('{')
- header, _, _= header.rpartition('{')
-
- signature = ' '.join(header.strip().splitlines())
-
- _, _, name = signature.split('(')[0].strip().rpartition(' ')
- assert name
-
- return name, signature
-
-
-#TYPE_SPEC = rf'''(?:
-# )'''
-#VAR_DECLARATOR = rf'''(?:
-# )'''
-#VAR_DECL = rf'''(?:
-# {TYPE_SPEC}+
-# {VAR_DECLARATOR}
-# \s*
-# )'''
-#VAR_DECLARATION = rf'''(?:
-# {VAR_DECL}
-# (?: = [^=] [^;]* )?
-# ;
-# )'''
-#
-#
-#def parse_variable(decl, *, inFunc=False):
-# """Return [(name, storage, vartype)] for the given variable declaration."""
-# ...
-
-
-def _parse_var(stmt):
- """Return (name, vartype) for the given variable declaration."""
- stmt = stmt.rstrip(';')
- m = LOCAL_STMT_START_RE.match(stmt)
- assert m
- vartype = m.group(0)
- name = stmt[len(vartype):].partition('=')[0].strip()
-
- if name.startswith('('):
- name, _, after = name[1:].partition(')')
- assert after
- name = name.replace('*', '* ')
- inside, _, name = name.strip().rpartition(' ')
- vartype = f'{vartype} ({inside.strip()}){after}'
- else:
- name = name.replace('*', '* ')
- before, _, name = name.rpartition(' ')
- vartype = f'{vartype} {before}'
-
- vartype = vartype.strip()
- while ' ' in vartype:
- vartype = vartype.replace(' ', ' ')
-
- return name, vartype
-
-
-def extract_storage(decl, *, infunc=None):
- """Return (storage, vartype) based on the given declaration.
-
- The default storage is "implicit" (or "local" if infunc is True).
- """
- if decl == UNKNOWN:
- return decl
- if decl.startswith('static '):
- return 'static'
- #return 'static', decl.partition(' ')[2].strip()
- elif decl.startswith('extern '):
- return 'extern'
- #return 'extern', decl.partition(' ')[2].strip()
- elif re.match('.*\b(static|extern)\b', decl):
- raise NotImplementedError
- elif infunc:
- return 'local'
- else:
- return 'implicit'
-
-
-def parse_compound(stmt, blocks):
- """Return (headers, bodies) for the given compound statement."""
- # XXX Identify declarations inside compound statements
- # (if/switch/for/while).
- raise NotImplementedError
-
-
-def iter_variables(filename, *,
- preprocessed=False,
- _iter_source_lines=source.iter_lines,
- _iter_global=iter_global_declarations,
- _iter_local=iter_local_statements,
- _parse_func=parse_func,
- _parse_var=_parse_var,
- _parse_compound=parse_compound,
- ):
- """Yield (funcname, name, vartype) for every variable in the given file."""
- if preprocessed:
- raise NotImplementedError
- lines = _iter_source_lines(filename)
- for stmt, body in _iter_global(lines):
- # At the file top-level we only have to worry about vars & funcs.
- if not body:
- name, vartype = _parse_var(stmt)
- if name:
- yield (None, name, vartype)
- else:
- funcname, _ = _parse_func(stmt, body)
- localvars = _iter_locals(body,
- _iter_statements=_iter_local,
- _parse_var=_parse_var,
- _parse_compound=_parse_compound,
- )
- for name, vartype in localvars:
- yield (funcname, name, vartype)
-
-
-def _iter_locals(lines, *,
- _iter_statements=iter_local_statements,
- _parse_var=_parse_var,
- _parse_compound=parse_compound,
- ):
- compound = [lines]
- while compound:
- body = compound.pop(0)
- bodylines = body.splitlines()
- for stmt, blocks in _iter_statements(bodylines):
- if not blocks:
- name, vartype = _parse_var(stmt)
- if name:
- yield (name, vartype)
- else:
- headers, bodies = _parse_compound(stmt, blocks)
- for header in headers:
- for line in header:
- name, vartype = _parse_var(line)
- if name:
- yield (name, vartype)
- compound.extend(bodies)
-
-
-def iter_all(filename, *,
- preprocessed=False,
- ):
- """Yield a Declaration for each one found.
-
- If there are duplicates, due to preprocessor conditionals, then
- they are checked to make sure they are the same.
- """
- # XXX For the moment we cheat.
- for funcname, name, decl in iter_variables(filename,
- preprocessed=preprocessed):
- yield 'variable', funcname, name, decl
diff --git a/Tools/c-analyzer/c_analyzer/parser/find.py b/Tools/c-analyzer/c_analyzer/parser/find.py
deleted file mode 100644
index 3860d3d..0000000
--- a/Tools/c-analyzer/c_analyzer/parser/find.py
+++ /dev/null
@@ -1,107 +0,0 @@
-from ..common.info import UNKNOWN, ID
-
-from . import declarations
-
-# XXX need tests:
-# * variables
-# * variable
-# * variable_from_id
-
-
-def _iter_vars(filenames, preprocessed, *,
- handle_id=None,
- _iter_decls=declarations.iter_all,
- ):
- if handle_id is None:
- handle_id = ID
-
- for filename in filenames or ():
- for kind, funcname, name, decl in _iter_decls(filename,
- preprocessed=preprocessed,
- ):
- if kind != 'variable':
- continue
- varid = handle_id(filename, funcname, name)
- yield varid, decl
-
-
-# XXX Add a "handle_var" arg like we did for get_resolver()?
-
-def variables(*filenames,
- perfilecache=None,
- preprocessed=False,
- known=None, # for types
- handle_id=None,
- _iter_vars=_iter_vars,
- ):
- """Yield (varid, decl) for each variable found in the given files.
-
- If "preprocessed" is provided (and not False/None) then it is used
- to decide which tool to use to parse the source code after it runs
- through the C preprocessor. Otherwise the raw
- """
- if len(filenames) == 1 and not (filenames[0], str):
- filenames, = filenames
-
- if perfilecache is None:
- yield from _iter_vars(filenames, preprocessed)
- else:
- # XXX Cache per-file variables (e.g. `{filename: [(varid, decl)]}`).
- raise NotImplementedError
-
-
-def variable(name, filenames, *,
- local=False,
- perfilecache=None,
- preprocessed=False,
- handle_id=None,
- _iter_vars=variables,
- ):
- """Return (varid, decl) for the first found variable that matches.
-
- If "local" is True then the first matching local variable in the
- file will always be returned. To avoid that, pass perfilecache and
- pop each variable from the cache after using it.
- """
- for varid, decl in _iter_vars(filenames,
- perfilecache=perfilecache,
- preprocessed=preprocessed,
- ):
- if varid.name != name:
- continue
- if local:
- if varid.funcname:
- if varid.funcname == UNKNOWN:
- raise NotImplementedError
- return varid, decl
- elif not varid.funcname:
- return varid, decl
- else:
- return None, None # No matching variable was found.
-
-
-def variable_from_id(id, filenames, *,
- perfilecache=None,
- preprocessed=False,
- handle_id=None,
- _get_var=variable,
- ):
- """Return (varid, decl) for the first found variable that matches."""
- local = False
- if isinstance(id, str):
- name = id
- else:
- if id.funcname == UNKNOWN:
- local = True
- elif id.funcname:
- raise NotImplementedError
-
- name = id.name
- if id.filename and id.filename != UNKNOWN:
- filenames = [id.filename]
- return _get_var(name, filenames,
- local=local,
- perfilecache=perfilecache,
- preprocessed=preprocessed,
- handle_id=handle_id,
- )
diff --git a/Tools/c-analyzer/c_analyzer/parser/naive.py b/Tools/c-analyzer/c_analyzer/parser/naive.py
deleted file mode 100644
index 4a4822d..0000000
--- a/Tools/c-analyzer/c_analyzer/parser/naive.py
+++ /dev/null
@@ -1,179 +0,0 @@
-import re
-
-from ..common.info import UNKNOWN, ID
-
-from .preprocessor import _iter_clean_lines
-
-
-_NOT_SET = object()
-
-
-def get_srclines(filename, *,
- cache=None,
- _open=open,
- _iter_lines=_iter_clean_lines,
- ):
- """Return the file's lines as a list.
-
- Each line will have trailing whitespace removed (including newline).
-
- If a cache is given the it is used.
- """
- if cache is not None:
- try:
- return cache[filename]
- except KeyError:
- pass
-
- with _open(filename) as srcfile:
- srclines = [line
- for _, line in _iter_lines(srcfile)
- if not line.startswith('#')]
- for i, line in enumerate(srclines):
- srclines[i] = line.rstrip()
-
- if cache is not None:
- cache[filename] = srclines
- return srclines
-
-
-def parse_variable_declaration(srcline):
- """Return (name, decl) for the given declaration line."""
- # XXX possible false negatives...
- decl, sep, _ = srcline.partition('=')
- if not sep:
- if not srcline.endswith(';'):
- return None, None
- decl = decl.strip(';')
- decl = decl.strip()
- m = re.match(r'.*\b(\w+)\s*(?:\[[^\]]*\])?$', decl)
- if not m:
- return None, None
- name = m.group(1)
- return name, decl
-
-
-def parse_variable(srcline, funcname=None):
- """Return (varid, decl) for the variable declared on the line (or None)."""
- line = srcline.strip()
-
- # XXX Handle more than just static variables.
- if line.startswith('static '):
- if '(' in line and '[' not in line:
- # a function
- return None, None
- return parse_variable_declaration(line)
- else:
- return None, None
-
-
-def iter_variables(filename, *,
- srccache=None,
- parse_variable=None,
- _get_srclines=get_srclines,
- _default_parse_variable=parse_variable,
- ):
- """Yield (varid, decl) for each variable in the given source file."""
- if parse_variable is None:
- parse_variable = _default_parse_variable
-
- indent = ''
- prev = ''
- funcname = None
- for line in _get_srclines(filename, cache=srccache):
- # remember current funcname
- if funcname:
- if line == indent + '}':
- funcname = None
- continue
- else:
- if '(' in prev and line == indent + '{':
- if not prev.startswith('__attribute__'):
- funcname = prev.split('(')[0].split()[-1]
- prev = ''
- continue
- indent = line[:-len(line.lstrip())]
- prev = line
-
- info = parse_variable(line, funcname)
- if isinstance(info, list):
- for name, _funcname, decl in info:
- yield ID(filename, _funcname, name), decl
- continue
- name, decl = info
-
- if name is None:
- continue
- yield ID(filename, funcname, name), decl
-
-
-def _match_varid(variable, name, funcname, ignored=None):
- if ignored and variable in ignored:
- return False
-
- if variable.name != name:
- return False
-
- if funcname == UNKNOWN:
- if not variable.funcname:
- return False
- elif variable.funcname != funcname:
- return False
-
- return True
-
-
-def find_variable(filename, funcname, name, *,
- ignored=None,
- srccache=None, # {filename: lines}
- parse_variable=None,
- _iter_variables=iter_variables,
- ):
- """Return the matching variable.
-
- Return None if the variable is not found.
- """
- for varid, decl in _iter_variables(filename,
- srccache=srccache,
- parse_variable=parse_variable,
- ):
- if _match_varid(varid, name, funcname, ignored):
- return varid, decl
- else:
- return None
-
-
-def find_variables(varids, filenames=None, *,
- srccache=_NOT_SET,
- parse_variable=None,
- _find_symbol=find_variable,
- ):
- """Yield (varid, decl) for each ID.
-
- If the variable is not found then its decl will be UNKNOWN. That
- way there will be one resulting variable per given ID.
- """
- if srccache is _NOT_SET:
- srccache = {}
-
- used = set()
- for varid in varids:
- if varid.filename and varid.filename != UNKNOWN:
- srcfiles = [varid.filename]
- else:
- if not filenames:
- yield varid, UNKNOWN
- continue
- srcfiles = filenames
- for filename in srcfiles:
- varid, decl = _find_varid(filename, varid.funcname, varid.name,
- ignored=used,
- srccache=srccache,
- parse_variable=parse_variable,
- )
- if varid:
- yield varid, decl
- used.add(varid)
- break
- else:
- yield varid, UNKNOWN
diff --git a/Tools/c-analyzer/c_analyzer/parser/preprocessor.py b/Tools/c-analyzer/c_analyzer/parser/preprocessor.py
deleted file mode 100644
index 41f306e..0000000
--- a/Tools/c-analyzer/c_analyzer/parser/preprocessor.py
+++ /dev/null
@@ -1,511 +0,0 @@
-from collections import namedtuple
-import shlex
-import os
-import re
-
-from ..common import util, info
-
-
-CONTINUATION = '\\' + os.linesep
-
-IDENTIFIER = r'(?:\w*[a-zA-Z]\w*)'
-IDENTIFIER_RE = re.compile('^' + IDENTIFIER + '$')
-
-
-def _coerce_str(value):
- if not value:
- return ''
- return str(value).strip()
-
-
-#############################
-# directives
-
-DIRECTIVE_START = r'''
- (?:
- ^ \s*
- [#] \s*
- )'''
-DIRECTIVE_TEXT = r'''
- (?:
- (?: \s+ ( .*\S ) )?
- \s* $
- )'''
-DIRECTIVE = rf'''
- (?:
- {DIRECTIVE_START}
- (
- include |
- error | warning |
- pragma |
- define | undef |
- if | ifdef | ifndef | elseif | else | endif |
- __FILE__ | __LINE__ | __DATE __ | __TIME__ | __TIMESTAMP__
- )
- {DIRECTIVE_TEXT}
- )'''
-# (?:
-# [^\\\n] |
-# \\ [^\n] |
-# \\ \n
-# )+
-# ) \n
-# )'''
-DIRECTIVE_RE = re.compile(DIRECTIVE, re.VERBOSE)
-
-DEFINE = rf'''
- (?:
- {DIRECTIVE_START} define \s+
- (?:
- ( \w*[a-zA-Z]\w* )
- (?: \s* [(] ([^)]*) [)] )?
- )
- {DIRECTIVE_TEXT}
- )'''
-DEFINE_RE = re.compile(DEFINE, re.VERBOSE)
-
-
-def parse_directive(line):
- """Return the appropriate directive for the given line."""
- line = line.strip()
- if line.startswith('#'):
- line = line[1:].lstrip()
- line = '#' + line
- directive = line
- #directive = '#' + line
- while ' ' in directive:
- directive = directive.replace(' ', ' ')
- return _parse_directive(directive)
-
-
-def _parse_directive(line):
- m = DEFINE_RE.match(line)
- if m:
- name, args, text = m.groups()
- if args:
- args = [a.strip() for a in args.split(',')]
- return Macro(name, args, text)
- else:
- return Constant(name, text)
-
- m = DIRECTIVE_RE.match(line)
- if not m:
- raise ValueError(f'unsupported directive {line!r}')
- kind, text = m.groups()
- if not text:
- if kind not in ('else', 'endif'):
- raise ValueError(f'missing text in directive {line!r}')
- elif kind in ('else', 'endif', 'define'):
- raise ValueError(f'unexpected text in directive {line!r}')
- if kind == 'include':
- directive = Include(text)
- elif kind in IfDirective.KINDS:
- directive = IfDirective(kind, text)
- else:
- directive = OtherDirective(kind, text)
- directive.validate()
- return directive
-
-
-class PreprocessorDirective(util._NTBase):
- """The base class for directives."""
-
- __slots__ = ()
-
- KINDS = frozenset([
- 'include',
- 'pragma',
- 'error', 'warning',
- 'define', 'undef',
- 'if', 'ifdef', 'ifndef', 'elseif', 'else', 'endif',
- '__FILE__', '__DATE__', '__LINE__', '__TIME__', '__TIMESTAMP__',
- ])
-
- @property
- def text(self):
- return ' '.join(v for v in self[1:] if v and v.strip()) or None
-
- def validate(self):
- """Fail if the object is invalid (i.e. init with bad data)."""
- super().validate()
-
- if not self.kind:
- raise TypeError('missing kind')
- elif self.kind not in self.KINDS:
- raise ValueError
-
- # text can be anything, including None.
-
-
-class Constant(PreprocessorDirective,
- namedtuple('Constant', 'kind name value')):
- """A single "constant" directive ("define")."""
-
- __slots__ = ()
-
- def __new__(cls, name, value=None):
- self = super().__new__(
- cls,
- 'define',
- name=_coerce_str(name) or None,
- value=_coerce_str(value) or None,
- )
- return self
-
- def validate(self):
- """Fail if the object is invalid (i.e. init with bad data)."""
- super().validate()
-
- if not self.name:
- raise TypeError('missing name')
- elif not IDENTIFIER_RE.match(self.name):
- raise ValueError(f'name must be identifier, got {self.name!r}')
-
- # value can be anything, including None
-
-
-class Macro(PreprocessorDirective,
- namedtuple('Macro', 'kind name args body')):
- """A single "macro" directive ("define")."""
-
- __slots__ = ()
-
- def __new__(cls, name, args, body=None):
- # "args" must be a string or an iterable of strings (or "empty").
- if isinstance(args, str):
- args = [v.strip() for v in args.split(',')]
- if args:
- args = tuple(_coerce_str(a) or None for a in args)
- self = super().__new__(
- cls,
- kind='define',
- name=_coerce_str(name) or None,
- args=args if args else (),
- body=_coerce_str(body) or None,
- )
- return self
-
- @property
- def text(self):
- if self.body:
- return f'{self.name}({", ".join(self.args)}) {self.body}'
- else:
- return f'{self.name}({", ".join(self.args)})'
-
- def validate(self):
- """Fail if the object is invalid (i.e. init with bad data)."""
- super().validate()
-
- if not self.name:
- raise TypeError('missing name')
- elif not IDENTIFIER_RE.match(self.name):
- raise ValueError(f'name must be identifier, got {self.name!r}')
-
- for arg in self.args:
- if not arg:
- raise ValueError(f'missing arg in {self.args}')
- elif not IDENTIFIER_RE.match(arg):
- raise ValueError(f'arg must be identifier, got {arg!r}')
-
- # body can be anything, including None
-
-
-class IfDirective(PreprocessorDirective,
- namedtuple('IfDirective', 'kind condition')):
- """A single conditional directive (e.g. "if", "ifdef").
-
- This only includes directives that actually provide conditions. The
- related directives "else" and "endif" are covered by OtherDirective
- instead.
- """
-
- __slots__ = ()
-
- KINDS = frozenset([
- 'if',
- 'ifdef',
- 'ifndef',
- 'elseif',
- ])
-
- @classmethod
- def _condition_from_raw(cls, raw, kind):
- #return Condition.from_raw(raw, _kind=kind)
- condition = _coerce_str(raw)
- if not condition:
- return None
-
- if kind == 'ifdef':
- condition = f'defined({condition})'
- elif kind == 'ifndef':
- condition = f'! defined({condition})'
-
- return condition
-
- def __new__(cls, kind, condition):
- kind = _coerce_str(kind)
- self = super().__new__(
- cls,
- kind=kind or None,
- condition=cls._condition_from_raw(condition, kind),
- )
- return self
-
- @property
- def text(self):
- if self.kind == 'ifdef':
- return self.condition[8:-1] # strip "defined("
- elif self.kind == 'ifndef':
- return self.condition[10:-1] # strip "! defined("
- else:
- return self.condition
- #return str(self.condition)
-
- def validate(self):
- """Fail if the object is invalid (i.e. init with bad data)."""
- super().validate()
-
- if not self.condition:
- raise TypeError('missing condition')
- #else:
- # for cond in self.condition:
- # if not cond:
- # raise ValueError(f'missing condition in {self.condition}')
- # cond.validate()
- # if self.kind in ('ifdef', 'ifndef'):
- # if len(self.condition) != 1:
- # raise ValueError('too many condition')
- # if self.kind == 'ifdef':
- # if not self.condition[0].startswith('defined '):
- # raise ValueError('bad condition')
- # else:
- # if not self.condition[0].startswith('! defined '):
- # raise ValueError('bad condition')
-
-
-class Include(PreprocessorDirective,
- namedtuple('Include', 'kind file')):
- """A single "include" directive.
-
- Supported "file" values are either follow the bracket style
- (<stdio>) or double quotes ("spam.h").
- """
-
- __slots__ = ()
-
- def __new__(cls, file):
- self = super().__new__(
- cls,
- kind='include',
- file=_coerce_str(file) or None,
- )
- return self
-
- def validate(self):
- """Fail if the object is invalid (i.e. init with bad data)."""
- super().validate()
-
- if not self.file:
- raise TypeError('missing file')
-
-
-class OtherDirective(PreprocessorDirective,
- namedtuple('OtherDirective', 'kind text')):
- """A single directive not covered by another class.
-
- This includes the "else", "endif", and "undef" directives, which are
- otherwise inherently related to the directives covered by the
- Constant, Macro, and IfCondition classes.
-
- Note that all directives must have a text value, except for "else"
- and "endif" (which must have no text).
- """
-
- __slots__ = ()
-
- KINDS = PreprocessorDirective.KINDS - {'include', 'define'} - IfDirective.KINDS
-
- def __new__(cls, kind, text):
- self = super().__new__(
- cls,
- kind=_coerce_str(kind) or None,
- text=_coerce_str(text) or None,
- )
- return self
-
- def validate(self):
- """Fail if the object is invalid (i.e. init with bad data)."""
- super().validate()
-
- if self.text:
- if self.kind in ('else', 'endif'):
- raise ValueError('unexpected text in directive')
- elif self.kind not in ('else', 'endif'):
- raise TypeError('missing text')
-
-
-#############################
-# iterating lines
-
-def _recompute_conditions(directive, ifstack):
- if directive.kind in ('if', 'ifdef', 'ifndef'):
- ifstack.append(
- ([], directive.condition))
- elif directive.kind == 'elseif':
- if ifstack:
- negated, active = ifstack.pop()
- if active:
- negated.append(active)
- else:
- negated = []
- ifstack.append(
- (negated, directive.condition))
- elif directive.kind == 'else':
- if ifstack:
- negated, active = ifstack.pop()
- if active:
- negated.append(active)
- ifstack.append(
- (negated, None))
- elif directive.kind == 'endif':
- if ifstack:
- ifstack.pop()
-
- conditions = []
- for negated, active in ifstack:
- for condition in negated:
- conditions.append(f'! ({condition})')
- if active:
- conditions.append(active)
- return tuple(conditions)
-
-
-def _iter_clean_lines(lines):
- lines = iter(enumerate(lines, 1))
- for lno, line in lines:
- # Handle line continuations.
- while line.endswith(CONTINUATION):
- try:
- lno, _line = next(lines)
- except StopIteration:
- break
- line = line[:-len(CONTINUATION)] + ' ' + _line
-
- # Deal with comments.
- after = line
- line = ''
- while True:
- # Look for a comment.
- before, begin, remainder = after.partition('/*')
- if '//' in before:
- before, _, _ = before.partition('//')
- line += before + ' ' # per the C99 spec
- break
- line += before
- if not begin:
- break
- line += ' ' # per the C99 spec
-
- # Go until we find the end of the comment.
- _, end, after = remainder.partition('*/')
- while not end:
- try:
- lno, remainder = next(lines)
- except StopIteration:
- raise Exception('unterminated comment')
- _, end, after = remainder.partition('*/')
-
- yield lno, line
-
-
-def iter_lines(lines, *,
- _iter_clean_lines=_iter_clean_lines,
- _parse_directive=_parse_directive,
- _recompute_conditions=_recompute_conditions,
- ):
- """Yield (lno, line, directive, active conditions) for each given line.
-
- This is effectively a subset of the operations taking place in
- translation phases 2-4 from the C99 spec (ISO/IEC 9899:TC2); see
- section 5.1.1.2. Line continuations are removed and comments
- replaced with a single space. (In both cases "lno" will be the last
- line involved.) Otherwise each line is returned as-is.
-
- "lno" is the (1-indexed) line number for the line.
-
- "directive" will be a PreprocessorDirective or None, depending on
- whether or not there is a directive on the line.
-
- "active conditions" is the set of preprocessor conditions (e.g.
- "defined()") under which the current line of code will be included
- in compilation. That set is derived from every conditional
- directive block (e.g. "if defined()", "ifdef", "else") containing
- that line. That includes nested directives. Note that the
- current line does not affect the active conditions for iteself.
- It only impacts subsequent lines. That applies to directives
- that close blocks (e.g. "endif") just as much as conditional
- directvies. Also note that "else" and "elseif" directives
- update the active conditions (for later lines), rather than
- adding to them.
- """
- ifstack = []
- conditions = ()
- for lno, line in _iter_clean_lines(lines):
- stripped = line.strip()
- if not stripped.startswith('#'):
- yield lno, line, None, conditions
- continue
-
- directive = '#' + stripped[1:].lstrip()
- while ' ' in directive:
- directive = directive.replace(' ', ' ')
- directive = _parse_directive(directive)
- yield lno, line, directive, conditions
-
- if directive.kind in ('else', 'endif'):
- conditions = _recompute_conditions(directive, ifstack)
- elif isinstance(directive, IfDirective):
- conditions = _recompute_conditions(directive, ifstack)
-
-
-#############################
-# running (platform-specific?)
-
-def _gcc(filename, *,
- _get_argv=(lambda: _get_gcc_argv()),
- _run=util.run_cmd,
- ):
- argv = _get_argv()
- argv.extend([
- '-E', filename,
- ])
- output = _run(argv)
- return output
-
-
-def _get_gcc_argv(*,
- _open=open,
- _run=util.run_cmd,
- ):
- with _open('/tmp/print.mk', 'w') as tmpfile:
- tmpfile.write('print-%:\n')
- #tmpfile.write('\t@echo $* = $($*)\n')
- tmpfile.write('\t@echo $($*)\n')
- argv = ['/usr/bin/make',
- '-f', 'Makefile',
- '-f', '/tmp/print.mk',
- 'print-CC',
- 'print-PY_CORE_CFLAGS',
- ]
- output = _run(argv)
- gcc, cflags = output.strip().splitlines()
- argv = shlex.split(gcc.strip())
- cflags = shlex.split(cflags.strip())
- return argv + cflags
-
-
-def run(filename, *,
- _gcc=_gcc,
- ):
- """Return the text of the given file after running the preprocessor."""
- return _gcc(filename)
diff --git a/Tools/c-analyzer/c_analyzer/parser/source.py b/Tools/c-analyzer/c_analyzer/parser/source.py
deleted file mode 100644
index f8998c8..0000000
--- a/Tools/c-analyzer/c_analyzer/parser/source.py
+++ /dev/null
@@ -1,34 +0,0 @@
-from . import preprocessor
-
-
-def iter_clean_lines(lines):
- incomment = False
- for line in lines:
- # Deal with comments.
- if incomment:
- _, sep, line = line.partition('*/')
- if sep:
- incomment = False
- continue
- line, _, _ = line.partition('//')
- line, sep, remainder = line.partition('/*')
- if sep:
- _, sep, after = remainder.partition('*/')
- if not sep:
- incomment = True
- continue
- line += ' ' + after
-
- # Ignore blank lines and leading/trailing whitespace.
- line = line.strip()
- if not line:
- continue
-
- yield line
-
-
-def iter_lines(filename, *,
- preprocess=preprocessor.run,
- ):
- content = preprocess(filename)
- return iter(content.splitlines())
diff --git a/Tools/c-analyzer/c_analyzer/symbols/__init__.py b/Tools/c-analyzer/c_analyzer/symbols/__init__.py
deleted file mode 100644
index e69de29..0000000
--- a/Tools/c-analyzer/c_analyzer/symbols/__init__.py
+++ /dev/null
diff --git a/Tools/c-analyzer/c_analyzer/symbols/_nm.py b/Tools/c-analyzer/c_analyzer/symbols/_nm.py
deleted file mode 100644
index f3a75a6..0000000
--- a/Tools/c-analyzer/c_analyzer/symbols/_nm.py
+++ /dev/null
@@ -1,117 +0,0 @@
-import os.path
-import shutil
-
-from c_analyzer.common import util, info
-
-from .info import Symbol
-
-
-# XXX need tests:
-# * iter_symbols
-
-NM_KINDS = {
- 'b': Symbol.KIND.VARIABLE, # uninitialized
- 'd': Symbol.KIND.VARIABLE, # initialized
- #'g': Symbol.KIND.VARIABLE, # uninitialized
- #'s': Symbol.KIND.VARIABLE, # initialized
- 't': Symbol.KIND.FUNCTION,
- }
-
-SPECIAL_SYMBOLS = {
- # binary format (e.g. ELF)
- '__bss_start',
- '__data_start',
- '__dso_handle',
- '_DYNAMIC',
- '_edata',
- '_end',
- '__environ@@GLIBC_2.2.5',
- '_GLOBAL_OFFSET_TABLE_',
- '__JCR_END__',
- '__JCR_LIST__',
- '__TMC_END__',
- }
-
-
-def _is_special_symbol(name):
- if name in SPECIAL_SYMBOLS:
- return True
- if '@@GLIBC' in name:
- return True
- return False
-
-
-def iter_symbols(binfile, *,
- nm=None,
- handle_id=None,
- _which=shutil.which,
- _run=util.run_cmd,
- ):
- """Yield a Symbol for each relevant entry reported by the "nm" command."""
- if nm is None:
- nm = _which('nm')
- if not nm:
- raise NotImplementedError
- if handle_id is None:
- handle_id = info.ID
-
- argv = [nm,
- '--line-numbers',
- binfile,
- ]
- try:
- output = _run(argv)
- except Exception:
- if nm is None:
- # XXX Use dumpbin.exe /SYMBOLS on Windows.
- raise NotImplementedError
- raise
- for line in output.splitlines():
- (name, kind, external, filename, funcname,
- ) = _parse_nm_line(line)
- if kind != Symbol.KIND.VARIABLE:
- continue
- elif _is_special_symbol(name):
- continue
- yield Symbol(
- id=handle_id(filename, funcname, name),
- kind=kind,
- external=external,
- )
-
-
-def _parse_nm_line(line):
- _origline = line
- _, _, line = line.partition(' ') # strip off the address
- line = line.strip()
-
- kind, _, line = line.partition(' ')
- line = line.strip()
- external = kind.isupper()
- kind = NM_KINDS.get(kind.lower(), Symbol.KIND.OTHER)
-
- name, _, filename = line.partition('\t')
- name = name.strip()
- if filename:
- filename = os.path.relpath(filename.partition(':')[0])
- else:
- filename = info.UNKNOWN
-
- name, islocal = _parse_nm_name(name, kind)
- funcname = info.UNKNOWN if islocal else None
- return name, kind, external, filename, funcname
-
-
-def _parse_nm_name(name, kind):
- if kind != Symbol.KIND.VARIABLE:
- return name, None
- if _is_special_symbol(name):
- return name, None
-
- actual, sep, digits = name.partition('.')
- if not sep:
- return name, False
-
- if not digits.isdigit():
- raise Exception(f'got bogus name {name}')
- return actual, True
diff --git a/Tools/c-analyzer/c_analyzer/symbols/find.py b/Tools/c-analyzer/c_analyzer/symbols/find.py
deleted file mode 100644
index 8564652..0000000
--- a/Tools/c-analyzer/c_analyzer/symbols/find.py
+++ /dev/null
@@ -1,175 +0,0 @@
-import os
-import os.path
-import shutil
-
-from ..common import files
-from ..common.info import UNKNOWN, ID
-from ..parser import find as p_find
-
-from . import _nm
-from .info import Symbol
-
-# XXX need tests:
-# * get_resolver()
-# * get_resolver_from_dirs()
-# * symbol()
-# * symbols()
-# * variables()
-
-
-def _resolve_known(symbol, knownvars):
- for varid in knownvars:
- if symbol.match(varid):
- break
- else:
- return None
- return knownvars.pop(varid)
-
-
-def get_resolver(filenames=None, known=None, *,
- handle_var,
- check_filename=None,
- perfilecache=None,
- preprocessed=False,
- _from_source=p_find.variable_from_id,
- ):
- """Return a "resolver" func for the given known vars/types and filenames.
-
- "handle_var" is a callable that takes (ID, decl) and returns a
- Variable. Variable.from_id is a suitable callable.
-
- The returned func takes a single Symbol and returns a corresponding
- Variable. If the symbol was located then the variable will be
- valid, populated with the corresponding information. Otherwise None
- is returned.
- """
- knownvars = (known or {}).get('variables')
- if knownvars:
- knownvars = dict(knownvars) # a copy
- if filenames:
- if check_filename is None:
- filenames = list(filenames)
- def check_filename(filename):
- return filename in filenames
- def resolve(symbol):
- # XXX Check "found" instead?
- if not check_filename(symbol.filename):
- return None
- found = _resolve_known(symbol, knownvars)
- if found is None:
- #return None
- varid, decl = _from_source(symbol, filenames,
- perfilecache=perfilecache,
- preprocessed=preprocessed,
- )
- found = handle_var(varid, decl)
- return found
- else:
- def resolve(symbol):
- return _resolve_known(symbol, knownvars)
- elif filenames:
- def resolve(symbol):
- varid, decl = _from_source(symbol, filenames,
- perfilecache=perfilecache,
- preprocessed=preprocessed,
- )
- return handle_var(varid, decl)
- else:
- def resolve(symbol):
- return None
- return resolve
-
-
-def get_resolver_from_dirs(dirnames, known=None, *,
- handle_var,
- suffixes=('.c',),
- perfilecache=None,
- preprocessed=False,
- _iter_files=files.iter_files_by_suffix,
- _get_resolver=get_resolver,
- ):
- """Return a "resolver" func for the given known vars/types and filenames.
-
- "dirnames" should be absolute paths. If not then they will be
- resolved relative to CWD.
-
- See get_resolver().
- """
- dirnames = [d if d.endswith(os.path.sep) else d + os.path.sep
- for d in dirnames]
- filenames = _iter_files(dirnames, suffixes)
- def check_filename(filename):
- for dirname in dirnames:
- if filename.startswith(dirname):
- return True
- else:
- return False
- return _get_resolver(filenames, known,
- handle_var=handle_var,
- check_filename=check_filename,
- perfilecache=perfilecache,
- preprocessed=preprocessed,
- )
-
-
-def symbol(symbol, filenames, known=None, *,
- perfilecache=None,
- preprocessed=False,
- handle_id=None,
- _get_resolver=get_resolver,
- ):
- """Return a Variable for the one matching the given symbol.
-
- "symbol" can be one of several objects:
-
- * Symbol - use the contained info
- * name (str) - look for a global variable with that name
- * (filename, name) - look for named global in file
- * (filename, funcname, name) - look for named local in file
-
- A name is always required. If the filename is None, "", or
- "UNKNOWN" then all files will be searched. If the funcname is
- "" or "UNKNOWN" then only local variables will be searched for.
- """
- resolve = _get_resolver(known, filenames,
- handle_id=handle_id,
- perfilecache=perfilecache,
- preprocessed=preprocessed,
- )
- return resolve(symbol)
-
-
-def _get_platform_tool():
- if os.name == 'nt':
- # XXX Support this.
- raise NotImplementedError
- elif nm := shutil.which('nm'):
- return lambda b, hi: _nm.iter_symbols(b, nm=nm, handle_id=hi)
- else:
- raise NotImplementedError
-
-
-def symbols(binfile, *,
- handle_id=None,
- _file_exists=os.path.exists,
- _get_platform_tool=_get_platform_tool,
- ):
- """Yield a Symbol for each one found in the binary."""
- if not _file_exists(binfile):
- raise Exception('executable missing (need to build it first?)')
-
- _iter_symbols = _get_platform_tool()
- yield from _iter_symbols(binfile, handle_id)
-
-
-def variables(binfile, *,
- resolve,
- handle_id=None,
- _iter_symbols=symbols,
- ):
- """Yield (Variable, Symbol) for each found symbol."""
- for symbol in _iter_symbols(binfile, handle_id=handle_id):
- if symbol.kind != Symbol.KIND.VARIABLE:
- continue
- var = resolve(symbol) or None
- yield var, symbol
diff --git a/Tools/c-analyzer/c_analyzer/symbols/info.py b/Tools/c-analyzer/c_analyzer/symbols/info.py
deleted file mode 100644
index 96a251a..0000000
--- a/Tools/c-analyzer/c_analyzer/symbols/info.py
+++ /dev/null
@@ -1,51 +0,0 @@
-from collections import namedtuple
-
-from c_analyzer.common.info import ID
-from c_analyzer.common.util import classonly, _NTBase
-
-
-class Symbol(_NTBase, namedtuple('Symbol', 'id kind external')):
- """Info for a single compilation symbol."""
-
- __slots__ = ()
-
- class KIND:
- VARIABLE = 'variable'
- FUNCTION = 'function'
- OTHER = 'other'
-
- @classonly
- def from_name(cls, name, filename=None, kind=KIND.VARIABLE, external=None):
- """Return a new symbol based on the given name."""
- id = ID(filename, None, name)
- return cls(id, kind, external)
-
- def __new__(cls, id, kind=KIND.VARIABLE, external=None):
- self = super().__new__(
- cls,
- id=ID.from_raw(id),
- kind=str(kind) if kind else None,
- external=bool(external) if external is not None else None,
- )
- return self
-
- def __hash__(self):
- return hash(self.id)
-
- def __getattr__(self, name):
- return getattr(self.id, name)
-
- def validate(self):
- """Fail if the object is invalid (i.e. init with bad data)."""
- if not self.id:
- raise TypeError('missing id')
- else:
- self.id.validate()
-
- if not self.kind:
- raise TypeError('missing kind')
- elif self.kind not in vars(self.KIND).values():
- raise ValueError(f'unsupported kind {self.kind}')
-
- if self.external is None:
- raise TypeError('missing external')
diff --git a/Tools/c-analyzer/c_analyzer/variables/__init__.py b/Tools/c-analyzer/c_analyzer/variables/__init__.py
deleted file mode 100644
index e69de29..0000000
--- a/Tools/c-analyzer/c_analyzer/variables/__init__.py
+++ /dev/null
diff --git a/Tools/c-analyzer/c_analyzer/variables/find.py b/Tools/c-analyzer/c_analyzer/variables/find.py
deleted file mode 100644
index 3fe7284..0000000
--- a/Tools/c-analyzer/c_analyzer/variables/find.py
+++ /dev/null
@@ -1,75 +0,0 @@
-from ..common import files
-from ..common.info import UNKNOWN
-from ..parser import (
- find as p_find,
- )
-from ..symbols import (
- info as s_info,
- find as s_find,
- )
-from .info import Variable
-
-# XXX need tests:
-# * vars_from_source
-
-
-def _remove_cached(cache, var):
- if not cache:
- return
- try:
- cached = cache[var.filename]
- cached.remove(var)
- except (KeyError, IndexError):
- pass
-
-
-def vars_from_binary(binfile, *,
- known=None,
- filenames=None,
- handle_id=None,
- check_filename=None,
- handle_var=Variable.from_id,
- _iter_vars=s_find.variables,
- _get_symbol_resolver=s_find.get_resolver,
- ):
- """Yield a Variable for each found Symbol.
-
- Details are filled in from the given "known" variables and types.
- """
- cache = {}
- resolve = _get_symbol_resolver(filenames, known,
- handle_var=handle_var,
- check_filename=check_filename,
- perfilecache=cache,
- )
- for var, symbol in _iter_vars(binfile,
- resolve=resolve,
- handle_id=handle_id,
- ):
- if var is None:
- var = Variable(symbol.id, UNKNOWN, UNKNOWN)
- yield var
- _remove_cached(cache, var)
-
-
-def vars_from_source(filenames, *,
- preprocessed=None,
- known=None,
- handle_id=None,
- handle_var=Variable.from_id,
- iter_vars=p_find.variables,
- ):
- """Yield a Variable for each declaration in the raw source code.
-
- Details are filled in from the given "known" variables and types.
- """
- cache = {}
- for varid, decl in iter_vars(filenames or (),
- perfilecache=cache,
- preprocessed=preprocessed,
- known=known,
- handle_id=handle_id,
- ):
- var = handle_var(varid, decl)
- yield var
- _remove_cached(cache, var)
diff --git a/Tools/c-analyzer/c_analyzer/variables/info.py b/Tools/c-analyzer/c_analyzer/variables/info.py
deleted file mode 100644
index 336a523..0000000
--- a/Tools/c-analyzer/c_analyzer/variables/info.py
+++ /dev/null
@@ -1,93 +0,0 @@
-from collections import namedtuple
-
-from ..common.info import ID, UNKNOWN
-from ..common.util import classonly, _NTBase
-
-
-def normalize_vartype(vartype):
- """Return the canonical form for a variable type (or func signature)."""
- # We allow empty strring through for semantic reasons.
- if vartype is None:
- return None
-
- # XXX finish!
- # XXX Return (modifiers, type, pointer)?
- return str(vartype)
-
-
-# XXX Variable.vartype -> decl (Declaration).
-
-class Variable(_NTBase,
- namedtuple('Variable', 'id storage vartype')):
- """Information about a single variable declaration."""
-
- __slots__ = ()
-
- STORAGE = (
- 'static',
- 'extern',
- 'implicit',
- 'local',
- )
-
- @classonly
- def from_parts(cls, filename, funcname, name, decl, storage=None):
- varid = ID(filename, funcname, name)
- if storage is None:
- self = cls.from_id(varid, decl)
- else:
- self = cls(varid, storage, decl)
- return self
-
- @classonly
- def from_id(cls, varid, decl):
- from ..parser.declarations import extract_storage
- storage = extract_storage(decl, infunc=varid.funcname)
- return cls(varid, storage, decl)
-
- def __new__(cls, id, storage, vartype):
- self = super().__new__(
- cls,
- id=ID.from_raw(id),
- storage=str(storage) if storage else None,
- vartype=normalize_vartype(vartype) if vartype else None,
- )
- return self
-
- def __hash__(self):
- return hash(self.id)
-
- def __getattr__(self, name):
- return getattr(self.id, name)
-
- def _validate_id(self):
- if not self.id:
- raise TypeError('missing id')
-
- if not self.filename or self.filename == UNKNOWN:
- raise TypeError(f'id missing filename ({self.id})')
-
- if self.funcname and self.funcname == UNKNOWN:
- raise TypeError(f'id missing funcname ({self.id})')
-
- self.id.validate()
-
- def validate(self):
- """Fail if the object is invalid (i.e. init with bad data)."""
- self._validate_id()
-
- if self.storage is None or self.storage == UNKNOWN:
- raise TypeError('missing storage')
- elif self.storage not in self.STORAGE:
- raise ValueError(f'unsupported storage {self.storage:r}')
-
- if self.vartype is None or self.vartype == UNKNOWN:
- raise TypeError('missing vartype')
-
- @property
- def isglobal(self):
- return self.storage != 'local'
-
- @property
- def isconst(self):
- return 'const' in self.vartype.split()
diff --git a/Tools/c-analyzer/c_analyzer/variables/known.py b/Tools/c-analyzer/c_analyzer/variables/known.py
deleted file mode 100644
index aa2934a..0000000
--- a/Tools/c-analyzer/c_analyzer/variables/known.py
+++ /dev/null
@@ -1,91 +0,0 @@
-import csv
-
-from ..common.info import ID, UNKNOWN
-from ..common.util import read_tsv
-from .info import Variable
-
-
-# XXX need tests:
-# * read_file()
-# * look_up_variable()
-
-
-COLUMNS = ('filename', 'funcname', 'name', 'kind', 'declaration')
-HEADER = '\t'.join(COLUMNS)
-
-
-def read_file(infile, *,
- _read_tsv=read_tsv,
- ):
- """Yield (kind, id, decl) for each row in the data file.
-
- The caller is responsible for validating each row.
- """
- for row in _read_tsv(infile, HEADER):
- filename, funcname, name, kind, declaration = row
- if not funcname or funcname == '-':
- funcname = None
- id = ID(filename, funcname, name)
- yield kind, id, declaration
-
-
-def from_file(infile, *,
- handle_var=Variable.from_id,
- _read_file=read_file,
- ):
- """Return the info for known declarations in the given file."""
- known = {
- 'variables': {},
- #'types': {},
- #'constants': {},
- #'macros': {},
- }
- for kind, id, decl in _read_file(infile):
- if kind == 'variable':
- values = known['variables']
- value = handle_var(id, decl)
- else:
- raise ValueError(f'unsupported kind in row {row}')
- value.validate()
- values[id] = value
- return known
-
-
-def look_up_variable(varid, knownvars, *,
- match_files=(lambda f1, f2: f1 == f2),
- ):
- """Return the known Variable matching the given ID.
-
- "knownvars" is a mapping of ID to Variable.
-
- "match_files" is used to verify if two filenames point to
- the same file.
-
- If no match is found then None is returned.
- """
- if not knownvars:
- return None
-
- if varid.funcname == UNKNOWN:
- if not varid.filename or varid.filename == UNKNOWN:
- for varid in knownvars:
- if not varid.funcname:
- continue
- if varid.name == varid.name:
- return knownvars[varid]
- else:
- return None
- else:
- for varid in knownvars:
- if not varid.funcname:
- continue
- if not match_files(varid.filename, varid.filename):
- continue
- if varid.name == varid.name:
- return knownvars[varid]
- else:
- return None
- elif not varid.filename or varid.filename == UNKNOWN:
- raise NotImplementedError
- else:
- return knownvars.get(varid.id)