diff options
author | Eric Snow <ericsnowcurrently@gmail.com> | 2020-12-24 18:04:19 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-12-24 18:04:19 (GMT) |
commit | 7ec59d8861ef1104c3028678b2cacde4c5693e19 (patch) | |
tree | b499f0504f79e1a218229e93ca1847fb61d80831 /Tools | |
parent | b57ada98da0d5b0cf1ebc2c9c5502d04aa962042 (diff) | |
download | cpython-7ec59d8861ef1104c3028678b2cacde4c5693e19.zip cpython-7ec59d8861ef1104c3028678b2cacde4c5693e19.tar.gz cpython-7ec59d8861ef1104c3028678b2cacde4c5693e19.tar.bz2 |
bpo-36876: [c-analyzer tool] Add a "capi" subcommand to the c-analyzer tool. (gh-23918)
This will help identify which C-API items will need to be updated for subinterpreter support.
https://bugs.python.org/issue36876
Diffstat (limited to 'Tools')
-rw-r--r-- | Tools/c-analyzer/c_analyzer/__main__.py | 9 | ||||
-rw-r--r-- | Tools/c-analyzer/c_common/scriptutil.py | 24 | ||||
-rw-r--r-- | Tools/c-analyzer/c_common/tables.py | 176 | ||||
-rw-r--r-- | Tools/c-analyzer/c_parser/__main__.py | 3 | ||||
-rw-r--r-- | Tools/c-analyzer/c_parser/preprocessor/__main__.py | 5 | ||||
-rw-r--r-- | Tools/c-analyzer/check-c-globals.py | 1 | ||||
-rw-r--r-- | Tools/c-analyzer/cpython/__main__.py | 104 | ||||
-rw-r--r-- | Tools/c-analyzer/cpython/_capi.py | 479 | ||||
-rw-r--r-- | Tools/c-analyzer/cpython/_files.py | 69 | ||||
-rw-r--r-- | Tools/c-analyzer/cpython/_parser.py | 36 |
10 files changed, 849 insertions, 57 deletions
diff --git a/Tools/c-analyzer/c_analyzer/__main__.py b/Tools/c-analyzer/c_analyzer/__main__.py index 44325f2..24fc6cd 100644 --- a/Tools/c-analyzer/c_analyzer/__main__.py +++ b/Tools/c-analyzer/c_analyzer/__main__.py @@ -263,7 +263,7 @@ FORMATS = { def add_output_cli(parser, *, default='summary'): parser.add_argument('--format', dest='fmt', default=default, choices=tuple(FORMATS)) - def process_args(args): + def process_args(args, *, argv=None): pass return process_args @@ -280,7 +280,7 @@ def _cli_check(parser, checks=None, **kwargs): process_checks = add_checks_cli(parser) elif len(checks) == 1 and type(checks) is not dict and re.match(r'^<.*>$', checks[0]): check = checks[0][1:-1] - def process_checks(args): + def process_checks(args, *, argv=None): args.checks = [check] else: process_checks = add_checks_cli(parser, checks=checks) @@ -428,9 +428,9 @@ def _cli_data(parser, filenames=None, known=None): if known is None: sub.add_argument('--known', required=True) - def process_args(args): + def process_args(args, *, argv): if args.datacmd == 'dump': - process_progress(args) + process_progress(args, argv) return process_args @@ -515,6 +515,7 @@ def parse_args(argv=sys.argv[1:], prog=sys.argv[0], *, subset=None): verbosity, traceback_cm = process_args_by_key( args, + argv, processors[cmd], ['verbosity', 'traceback_cm'], ) diff --git a/Tools/c-analyzer/c_common/scriptutil.py b/Tools/c-analyzer/c_common/scriptutil.py index 50dd754..ce69af2 100644 --- a/Tools/c-analyzer/c_common/scriptutil.py +++ b/Tools/c-analyzer/c_common/scriptutil.py @@ -192,7 +192,7 @@ def add_verbosity_cli(parser): parser.add_argument('-q', '--quiet', action='count', default=0) parser.add_argument('-v', '--verbose', action='count', default=0) - def process_args(args): + def process_args(args, *, argv=None): ns = vars(args) key = 'verbosity' if key in ns: @@ -208,7 +208,7 @@ def add_traceback_cli(parser): parser.add_argument('--no-traceback', '--no-tb', dest='traceback', action='store_const', const=False) - def process_args(args): + def process_args(args, *, argv=None): ns = vars(args) key = 'traceback_cm' if key in ns: @@ -262,7 +262,7 @@ def add_sepval_cli(parser, opt, dest, choices, *, sep=',', **kwargs): #kwargs.setdefault('metavar', opt.upper()) parser.add_argument(opt, dest=dest, action='append', **kwargs) - def process_args(args): + def process_args(args, *, argv=None): ns = vars(args) # XXX Use normalize_selection()? @@ -293,7 +293,7 @@ def add_file_filtering_cli(parser, *, excluded=None): excluded = tuple(excluded or ()) - def process_args(args): + def process_args(args, *, argv=None): ns = vars(args) key = 'iter_filenames' if key in ns: @@ -323,7 +323,7 @@ def add_progress_cli(parser, *, threshold=VERBOSITY, **kwargs): parser.add_argument('--no-progress', dest='track_progress', action='store_false') parser.set_defaults(track_progress=True) - def process_args(args): + def process_args(args, *, argv=None): if args.track_progress: ns = vars(args) verbosity = ns.get('verbosity', VERBOSITY) @@ -339,7 +339,7 @@ def add_failure_filtering_cli(parser, pool, *, default=False): metavar=f'"{{all|{"|".join(sorted(pool))}}},..."') parser.add_argument('--no-fail', dest='fail', action='store_const', const=()) - def process_args(args): + def process_args(args, *, argv=None): ns = vars(args) fail = ns.pop('fail') @@ -371,7 +371,7 @@ def add_failure_filtering_cli(parser, pool, *, default=False): def add_kind_filtering_cli(parser, *, default=None): parser.add_argument('--kinds', action='append') - def process_args(args): + def process_args(args, *, argv=None): ns = vars(args) kinds = [] @@ -486,18 +486,18 @@ def _flatten_processors(processors): yield from _flatten_processors(proc) -def process_args(args, processors, *, keys=None): +def process_args(args, argv, processors, *, keys=None): processors = _flatten_processors(processors) ns = vars(args) extracted = {} if keys is None: for process_args in processors: - for key in process_args(args): + for key in process_args(args, argv=argv): extracted[key] = ns.pop(key) else: remainder = set(keys) for process_args in processors: - hanging = process_args(args) + hanging = process_args(args, argv=argv) if isinstance(hanging, str): hanging = [hanging] for key in hanging or (): @@ -510,8 +510,8 @@ def process_args(args, processors, *, keys=None): return extracted -def process_args_by_key(args, processors, keys): - extracted = process_args(args, processors, keys=keys) +def process_args_by_key(args, argv, processors, keys): + extracted = process_args(args, argv, processors, keys=keys) return [extracted[key] for key in keys] diff --git a/Tools/c-analyzer/c_common/tables.py b/Tools/c-analyzer/c_common/tables.py index 411152e..85b5019 100644 --- a/Tools/c-analyzer/c_common/tables.py +++ b/Tools/c-analyzer/c_common/tables.py @@ -1,4 +1,6 @@ import csv +import re +import textwrap from . import NOT_SET, strutil, fsutil @@ -212,3 +214,177 @@ def _normalize_table_file_props(header, sep): else: sep = None return header, sep + + +################################## +# stdout tables + +WIDTH = 20 + + +def resolve_columns(specs): + if isinstance(specs, str): + specs = specs.replace(',', ' ').strip().split() + return _resolve_colspecs(specs) + + +def build_table(specs, *, sep=' ', defaultwidth=None): + columns = resolve_columns(specs) + return _build_table(columns, sep=sep, defaultwidth=defaultwidth) + + +_COLSPEC_RE = re.compile(textwrap.dedent(r''' + ^ + (?: + [[] + ( + (?: [^\s\]] [^\]]* )? + [^\s\]] + ) # <label> + []] + )? + ( \w+ ) # <field> + (?: + (?: + : + ( [<^>] ) # <align> + ( \d+ ) # <width1> + ) + | + (?: + (?: + : + ( \d+ ) # <width2> + )? + (?: + : + ( .*? ) # <fmt> + )? + ) + )? + $ +'''), re.VERBOSE) + + +def _parse_fmt(fmt): + if fmt.startswith(tuple('<^>')): + align = fmt[0] + width = fmt[1:] + if width.isdigit(): + return int(width), align + return None, None + + +def _parse_colspec(raw): + m = _COLSPEC_RE.match(raw) + if not m: + return None + label, field, align, width1, width2, fmt = m.groups() + if not label: + label = field + if width1: + width = None + fmt = f'{align}{width1}' + elif width2: + width = int(width2) + if fmt: + _width, _ = _parse_fmt(fmt) + if _width == width: + width = None + else: + width = None + return field, label, width, fmt + + +def _normalize_colspec(spec): + if len(spec) == 1: + raw, = spec + return _resolve_column(raw) + + if len(spec) == 4: + label, field, width, fmt = spec + if width: + fmt = f'{width}:{fmt}' if fmt else width + elif len(raw) == 3: + label, field, fmt = spec + if not field: + label, field = None, label + elif not isinstance(field, str) or not field.isidentifier(): + fmt = f'{field}:{fmt}' if fmt else field + label, field = None, label + elif len(raw) == 2: + label = None + field, fmt = raw + if not field: + field, fmt = fmt, None + elif not field.isidentifier() or fmt.isidentifier(): + label, field = field, fmt + else: + raise NotImplementedError + + fmt = f':{fmt}' if fmt else '' + if label: + return _parse_colspec(f'[{label}]{field}{fmt}') + else: + return _parse_colspec(f'{field}{fmt}') + + +def _resolve_colspec(raw): + if isinstance(raw, str): + spec = _parse_colspec(raw) + else: + spec = _normalize_colspec(raw) + if spec is None: + raise ValueError(f'unsupported column spec {raw!r}') + return spec + + +def _resolve_colspecs(columns): + parsed = [] + for raw in columns: + column = _resolve_colspec(raw) + parsed.append(column) + return parsed + + +def _resolve_width(spec, defaultwidth): + _, label, width, fmt = spec + if width: + if not isinstance(width, int): + raise NotImplementedError + return width + elif width and fmt: + width, _ = _parse_fmt(fmt) + if width: + return width + + if not defaultwidth: + return WIDTH + elif not hasattr(defaultwidth, 'get'): + return defaultwidth or WIDTH + + defaultwidths = defaultwidth + defaultwidth = defaultwidths.get(None) or WIDTH + return defaultwidths.get(label) or defaultwidth + + +def _build_table(columns, *, sep=' ', defaultwidth=None): + header = [] + div = [] + rowfmt = [] + for spec in columns: + label, field, _, colfmt = spec + width = _resolve_width(spec, defaultwidth) + if colfmt: + colfmt = f':{colfmt}' + else: + colfmt = f':{width}' + + header.append(f' {{:^{width}}} '.format(label)) + div.append('-' * (width + 2)) + rowfmt.append(f' {{{field}{colfmt}}} ') + return ( + sep.join(header), + sep.join(div), + sep.join(rowfmt), + ) diff --git a/Tools/c-analyzer/c_parser/__main__.py b/Tools/c-analyzer/c_parser/__main__.py index 539cec5..78f47a1 100644 --- a/Tools/c-analyzer/c_parser/__main__.py +++ b/Tools/c-analyzer/c_parser/__main__.py @@ -149,7 +149,7 @@ def add_output_cli(parser): parser.add_argument('--showfwd', action='store_true', default=None) parser.add_argument('--no-showfwd', dest='showfwd', action='store_false', default=None) - def process_args(args): + def process_args(args, *, argv=None): pass return process_args @@ -243,6 +243,7 @@ def parse_args(argv=sys.argv[1:], prog=sys.argv[0], *, subset='parse'): verbosity, traceback_cm = process_args_by_key( args, + argv, processors[cmd], ['verbosity', 'traceback_cm'], ) diff --git a/Tools/c-analyzer/c_parser/preprocessor/__main__.py b/Tools/c-analyzer/c_parser/preprocessor/__main__.py index a605430..bfc6194 100644 --- a/Tools/c-analyzer/c_parser/preprocessor/__main__.py +++ b/Tools/c-analyzer/c_parser/preprocessor/__main__.py @@ -40,10 +40,10 @@ def add_common_cli(parser, *, get_preprocessor=_get_preprocessor): parser.add_argument('--same', action='append') process_fail_arg = add_failure_filtering_cli(parser, FAIL) - def process_args(args): + def process_args(args, *, argv): ns = vars(args) - process_fail_arg(args) + process_fail_arg(args, argv) ignore_exc = ns.pop('ignore_exc') # We later pass ignore_exc to _get_preprocessor(). @@ -174,6 +174,7 @@ def parse_args(argv=sys.argv[1:], prog=sys.argv[0], *, verbosity, traceback_cm = process_args_by_key( args, + argv, processors[cmd], ['verbosity', 'traceback_cm'], ) diff --git a/Tools/c-analyzer/check-c-globals.py b/Tools/c-analyzer/check-c-globals.py index 3fe2bdc..b1364a6 100644 --- a/Tools/c-analyzer/check-c-globals.py +++ b/Tools/c-analyzer/check-c-globals.py @@ -22,6 +22,7 @@ def parse_args(): cmd = 'check' verbosity, traceback_cm = process_args_by_key( args, + argv, processors, ['verbosity', 'traceback_cm'], ) diff --git a/Tools/c-analyzer/cpython/__main__.py b/Tools/c-analyzer/cpython/__main__.py index 6d78af2..9d29b13 100644 --- a/Tools/c-analyzer/cpython/__main__.py +++ b/Tools/c-analyzer/cpython/__main__.py @@ -3,11 +3,14 @@ import sys from c_common.fsutil import expand_filenames, iter_files_by_suffix from c_common.scriptutil import ( + VERBOSITY, add_verbosity_cli, add_traceback_cli, add_commands_cli, add_kind_filtering_cli, add_files_cli, + add_progress_cli, + main_for_filenames, process_args_by_key, configure_logger, get_prog, @@ -17,7 +20,7 @@ import c_parser.__main__ as c_parser import c_analyzer.__main__ as c_analyzer import c_analyzer as _c_analyzer from c_analyzer.info import UNKNOWN -from . import _analyzer, _parser, REPO_ROOT +from . import _analyzer, _capi, _files, _parser, REPO_ROOT logger = logging.getLogger(__name__) @@ -25,9 +28,9 @@ logger = logging.getLogger(__name__) def _resolve_filenames(filenames): if filenames: - resolved = (_parser.resolve_filename(f) for f in filenames) + resolved = (_files.resolve_filename(f) for f in filenames) else: - resolved = _parser.iter_filenames() + resolved = _files.iter_filenames() return resolved @@ -204,6 +207,95 @@ def cmd_data(datacmd, **kwargs): ) +def _cli_capi(parser): + parser.add_argument('--levels', action='append', metavar='LEVEL[,...]') + parser.add_argument(f'--public', dest='levels', + action='append_const', const='public') + parser.add_argument(f'--no-public', dest='levels', + action='append_const', const='no-public') + for level in _capi.LEVELS: + parser.add_argument(f'--{level}', dest='levels', + action='append_const', const=level) + def process_levels(args, *, argv=None): + levels = [] + for raw in args.levels or (): + for level in raw.replace(',', ' ').strip().split(): + if level == 'public': + levels.append('stable') + levels.append('cpython') + elif level == 'no-public': + levels.append('private') + levels.append('internal') + elif level in _capi.LEVELS: + levels.append(level) + else: + parser.error(f'expected LEVEL to be one of {sorted(_capi.LEVELS)}, got {level!r}') + args.levels = set(levels) + + parser.add_argument('--kinds', action='append', metavar='KIND[,...]') + for kind in _capi.KINDS: + parser.add_argument(f'--{kind}', dest='kinds', + action='append_const', const=kind) + def process_kinds(args, *, argv=None): + kinds = [] + for raw in args.kinds or (): + for kind in raw.replace(',', ' ').strip().split(): + if kind in _capi.KINDS: + kind.append(kind) + else: + parser.error(f'expected KIND to be one of {sorted(_capi.KINDS)}, got {kind!r}') + args.kinds = set(kinds) + + parser.add_argument('--group-by', dest='groupby', + choices=['level', 'kind']) + + parser.add_argument('--format', default='brief') + parser.add_argument('--summary', dest='format', + action='store_const', const='summary') + def process_format(args, *, argv=None): + orig = args.format + args.format = _capi.resolve_format(args.format) + if isinstance(args.format, str): + if args.format not in _capi._FORMATS: + parser.error(f'unsupported format {orig!r}') + + parser.add_argument('filenames', nargs='*', metavar='FILENAME') + process_progress = add_progress_cli(parser) + + return [ + process_levels, + process_format, + process_progress, + ] + + +def cmd_capi(filenames=None, *, + levels=None, + kinds=None, + groupby='kind', + format='brief', + track_progress=None, + verbosity=VERBOSITY, + **kwargs + ): + render = _capi.get_renderer(format) + + filenames = _files.iter_header_files(filenames, levels=levels) + #filenames = (file for file, _ in main_for_filenames(filenames)) + if track_progress is not None: + filenames = track_progress(filenames) + items = _capi.iter_capi(filenames) + if levels: + items = (item for item in items if item.level in levels) + if kinds: + items = (item for item in items if item.kind in kinds) + + lines = render(items, groupby=groupby, verbose=verbosity > VERBOSITY) + print() + for line in lines: + print(line) + + # We do not define any other cmd_*() handlers here, # favoring those defined elsewhere. @@ -228,6 +320,11 @@ COMMANDS = { [_cli_data], cmd_data, ), + 'capi': ( + 'inspect the C-API', + [_cli_capi], + cmd_capi, + ), } @@ -263,6 +360,7 @@ def parse_args(argv=sys.argv[1:], prog=None, *, subset=None): verbosity, traceback_cm = process_args_by_key( args, + argv, processors[cmd], ['verbosity', 'traceback_cm'], ) diff --git a/Tools/c-analyzer/cpython/_capi.py b/Tools/c-analyzer/cpython/_capi.py new file mode 100644 index 0000000..38d7cd3 --- /dev/null +++ b/Tools/c-analyzer/cpython/_capi.py @@ -0,0 +1,479 @@ +from collections import namedtuple +import os +import os.path +import re +import textwrap + +from c_common.tables import build_table, resolve_columns +from c_parser.parser._regexes import _ind +from ._files import iter_header_files, resolve_filename +from . import REPO_ROOT + + +INCLUDE_ROOT = os.path.join(REPO_ROOT, 'Include') +INCLUDE_CPYTHON = os.path.join(INCLUDE_ROOT, 'cpython') +INCLUDE_INTERNAL = os.path.join(INCLUDE_ROOT, 'internal') + +_MAYBE_NESTED_PARENS = textwrap.dedent(r''' + (?: + (?: [^(]* [(] [^()]* [)] )* [^(]* + ) +''') + +CAPI_FUNC = textwrap.dedent(rf''' + (?: + ^ + \s* + PyAPI_FUNC \s* + [(] + {_ind(_MAYBE_NESTED_PARENS, 2)} + [)] \s* + (\w+) # <func> + \s* [(] + ) +''') +CAPI_DATA = textwrap.dedent(rf''' + (?: + ^ + \s* + PyAPI_DATA \s* + [(] + {_ind(_MAYBE_NESTED_PARENS, 2)} + [)] \s* + (\w+) # <data> + \b [^(] + ) +''') +CAPI_INLINE = textwrap.dedent(r''' + (?: + ^ + \s* + static \s+ inline \s+ + .*? + \s+ + ( \w+ ) # <inline> + \s* [(] + ) +''') +CAPI_MACRO = textwrap.dedent(r''' + (?: + (\w+) # <macro> + [(] + ) +''') +CAPI_CONSTANT = textwrap.dedent(r''' + (?: + (\w+) # <constant> + \s+ [^(] + ) +''') +CAPI_DEFINE = textwrap.dedent(rf''' + (?: + ^ + \s* [#] \s* define \s+ + (?: + {_ind(CAPI_MACRO, 3)} + | + {_ind(CAPI_CONSTANT, 3)} + | + (?: + # ignored + \w+ # <defined_name> + \s* + $ + ) + ) + ) +''') +CAPI_RE = re.compile(textwrap.dedent(rf''' + (?: + {_ind(CAPI_FUNC, 2)} + | + {_ind(CAPI_DATA, 2)} + | + {_ind(CAPI_INLINE, 2)} + | + {_ind(CAPI_DEFINE, 2)} + ) +'''), re.VERBOSE) + +KINDS = [ + 'func', + 'data', + 'inline', + 'macro', + 'constant', +] + + +def _parse_line(line, prev=None): + last = line + if prev: + if not prev.endswith(os.linesep): + prev += os.linesep + line = prev + line + m = CAPI_RE.match(line) + if not m: + if not prev and line.startswith('static inline '): + return line # the new "prev" + #if 'PyAPI_' in line or '#define ' in line or ' define ' in line: + # print(line) + return None + results = zip(KINDS, m.groups()) + for kind, name in results: + if name: + clean = last.split('//')[0].strip() + if clean.endswith('*/'): + clean = clean.split('/*')[0].rstrip() + if kind == 'macro' or kind == 'constant': + if clean.endswith('\\'): + return line # the new "prev" + elif kind == 'inline': + if not prev: + if not clean.endswith('}'): + return line # the new "prev" + elif clean != '}': + return line # the new "prev" + elif not clean.endswith(';'): + return line # the new "prev" + return name, kind + # It was a plain #define. + return None + + +LEVELS = { + 'stable', + 'cpython', + 'private', + 'internal', +} + +def _get_level(filename, name, *, + _cpython=INCLUDE_CPYTHON + os.path.sep, + _internal=INCLUDE_INTERNAL + os.path.sep, + ): + if filename.startswith(_internal): + return 'internal' + elif name.startswith('_'): + return 'private' + elif os.path.dirname(filename) == INCLUDE_ROOT: + return 'stable' + elif filename.startswith(_cpython): + return 'cpython' + else: + raise NotImplementedError + #return '???' + + +class CAPIItem(namedtuple('CAPIItem', 'file lno name kind level')): + + @classmethod + def from_line(cls, line, filename, lno, prev=None): + parsed = _parse_line(line, prev) + if not parsed: + return None, None + if isinstance(parsed, str): + # incomplete + return None, parsed + name, kind = parsed + level = _get_level(filename, name) + self = cls(filename, lno, name, kind, level) + if prev: + self._text = (prev + line).rstrip().splitlines() + else: + self._text = [line.rstrip()] + return self, None + + @property + def relfile(self): + return self.file[len(REPO_ROOT) + 1:] + + @property + def text(self): + try: + return self._text + except AttributeError: + # XXX Actually ready the text from disk?. + self._text = [] + if self.kind == 'data': + self._text = [ + f'PyAPI_DATA(...) {self.name}', + ] + elif self.kind == 'func': + self._text = [ + f'PyAPI_FUNC(...) {self.name}(...);', + ] + elif self.kind == 'inline': + self._text = [ + f'static inline {self.name}(...);', + ] + elif self.kind == 'macro': + self._text = [ + f'#define {self.name}(...) \\', + f' ...', + ] + elif self.kind == 'constant': + self._text = [ + f'#define {self.name} ...', + ] + else: + raise NotImplementedError + + return self._text + + +def _parse_groupby(raw): + if not raw: + raw = 'kind' + + if isinstance(raw, str): + groupby = raw.replace(',', ' ').strip().split() + else: + raise NotImplementedError + + if not all(v in ('kind', 'level') for v in groupby): + raise ValueError(f'invalid groupby value {raw!r}') + return groupby + + +def summarize(items, *, groupby='kind'): + summary = {} + + groupby = _parse_groupby(groupby)[0] + if groupby == 'kind': + outers = KINDS + inners = LEVELS + def increment(item): + summary[item.kind][item.level] += 1 + elif groupby == 'level': + outers = LEVELS + inners = KINDS + def increment(item): + summary[item.level][item.kind] += 1 + else: + raise NotImplementedError + + for outer in outers: + summary[outer] = _outer = {} + for inner in inners: + _outer[inner] = 0 + for item in items: + increment(item) + + return summary + + +def _parse_capi(lines, filename): + if isinstance(lines, str): + lines = lines.splitlines() + prev = None + for lno, line in enumerate(lines, 1): + parsed, prev = CAPIItem.from_line(line, filename, lno, prev) + if parsed: + yield parsed + if prev: + parsed, prev = CAPIItem.from_line('', filename, lno, prev) + if parsed: + yield parsed + if prev: + print('incomplete match:') + print(filename) + print(prev) + raise Exception + + +def iter_capi(filenames=None): + for filename in iter_header_files(filenames): + with open(filename) as infile: + for item in _parse_capi(infile, filename): + yield item + + +def _collate(items, groupby): + groupby = _parse_groupby(groupby)[0] + maxfilename = maxname = maxkind = maxlevel = 0 + collated = {} + for item in items: + key = getattr(item, groupby) + if key in collated: + collated[key].append(item) + else: + collated[key] = [item] + maxfilename = max(len(item.relfile), maxfilename) + maxname = max(len(item.name), maxname) + maxkind = max(len(item.kind), maxkind) + maxlevel = max(len(item.level), maxlevel) + maxextra = { + 'kind': maxkind, + 'level': maxlevel, + } + return collated, groupby, maxfilename, maxname, maxextra + + +################################## +# CLI rendering + +_LEVEL_MARKERS = { + 'S': 'stable', + 'C': 'cpython', + 'P': 'private', + 'I': 'internal', +} +_KIND_MARKERS = { + 'F': 'func', + 'D': 'data', + 'I': 'inline', + 'M': 'macro', + 'C': 'constant', +} + + +def resolve_format(format): + if not format: + return 'brief' + elif isinstance(format, str) and format in _FORMATS: + return format + else: + return resolve_columns(format) + + +def get_renderer(format): + format = resolve_format(format) + if isinstance(format, str): + try: + return _FORMATS[format] + except KeyError: + raise ValueError(f'unsupported format {format!r}') + else: + def render(items, **kwargs): + return render_table(items, columns=format, **kwargs) + return render + + +def render_table(items, *, columns=None, groupby='kind', verbose=False): + if groupby: + collated, groupby, maxfilename, maxname, maxextra = _collate(items, groupby) + if groupby == 'kind': + groups = KINDS + extras = ['level'] + markers = {'level': _LEVEL_MARKERS} + elif groupby == 'level': + groups = LEVELS + extras = ['kind'] + markers = {'kind': _KIND_MARKERS} + else: + raise NotImplementedError + else: + # XXX Support no grouping? + raise NotImplementedError + + if columns: + def get_extra(item): + return {extra: getattr(item, extra) + for extra in ('kind', 'level')} + else: + if verbose: + maxextra['kind'] = max(len(kind) for kind in KINDS) + maxextra['level'] = max(len(level) for level in LEVELS) + extracols = [f'{extra}:{maxextra[extra]}' + for extra in extras] + def get_extra(item): + return {extra: getattr(item, extra) + for extra in extras} + elif len(extras) == 1: + extra, = extras + extracols = [f'{m}:1' for m in markers[extra]] + def get_extra(item): + return {m: m if getattr(item, extra) == markers[extra][m] else '' + for m in markers[extra]} + else: + raise NotImplementedError + #extracols = [[f'{m}:1' for m in markers[extra]] + # for extra in extras] + #def get_extra(item): + # values = {} + # for extra in extras: + # cur = markers[extra] + # for m in cur: + # values[m] = m if getattr(item, m) == cur[m] else '' + # return values + columns = [ + f'filename:{maxfilename}', + f'name:{maxname}', + *extracols, + ] + header, div, fmt = build_table(columns) + + total = 0 + for group in groups: + if group not in collated: + continue + yield '' + yield f' === {group} ===' + yield '' + yield header + yield div + for item in collated[group]: + yield fmt.format( + filename=item.relfile, + name=item.name, + **get_extra(item), + ) + yield div + subtotal = len(collated[group]) + yield f' sub-total: {subtotal}' + total += subtotal + yield '' + yield f'total: {total}' + + +def render_full(items, *, groupby=None, verbose=False): + if groupby: + collated, groupby, _, _, _ = _collate(items, groupby) + for group, grouped in collated.items(): + yield '#' * 25 + yield f'# {group} ({len(grouped)})' + yield '#' * 25 + yield '' + if not grouped: + continue + for item in grouped: + yield from _render_item_full(item, groupby, verbose) + yield '' + else: + for item in items: + yield from _render_item_full(item, None, verbose) + yield '' + + +def _render_item_full(item, groupby, verbose): + yield item.name + yield f' {"filename:":10} {item.relfile}' + for extra in ('kind', 'level'): + #if groupby != extra: + yield f' {extra+":":10} {getattr(item, extra)}' + if verbose: + print(' ---------------------------------------') + for lno, line in enumerate(item.text, item.lno): + print(f' | {lno:3} {line}') + print(' ---------------------------------------') + + +def render_summary(items, *, groupby='kind', verbose=False): + total = 0 + summary = summarize(items, groupby=groupby) + # XXX Stablize the sorting to match KINDS/LEVELS. + for outer, counts in summary.items(): + subtotal = sum(c for _, c in counts.items()) + yield f'{outer + ":":20} ({subtotal})' + for inner, count in counts.items(): + yield f' {inner + ":":9} {count}' + total += subtotal + yield f'{"total:":20} ({total})' + + +_FORMATS = { + 'brief': render_table, + 'full': render_full, + 'summary': render_summary, +} diff --git a/Tools/c-analyzer/cpython/_files.py b/Tools/c-analyzer/cpython/_files.py new file mode 100644 index 0000000..3e39788 --- /dev/null +++ b/Tools/c-analyzer/cpython/_files.py @@ -0,0 +1,69 @@ +import os.path + +from c_common.fsutil import expand_filenames, iter_files_by_suffix +from . import REPO_ROOT, INCLUDE_DIRS, SOURCE_DIRS + + +GLOBS = [ + 'Include/*.h', + 'Include/internal/*.h', + 'Modules/**/*.h', + 'Modules/**/*.c', + 'Objects/**/*.h', + 'Objects/**/*.c', + 'Python/**/*.h', + 'Parser/**/*.c', + 'Python/**/*.h', + 'Parser/**/*.c', +] +LEVEL_GLOBS = { + 'stable': 'Include/*.h', + 'cpython': 'Include/cpython/*.h', + 'internal': 'Include/internal/*.h', +} + + +def resolve_filename(filename): + orig = filename + filename = os.path.normcase(os.path.normpath(filename)) + if os.path.isabs(filename): + if os.path.relpath(filename, REPO_ROOT).startswith('.'): + raise Exception(f'{orig!r} is outside the repo ({REPO_ROOT})') + return filename + else: + return os.path.join(REPO_ROOT, filename) + + +def iter_filenames(*, search=False): + if search: + yield from iter_files_by_suffix(INCLUDE_DIRS, ('.h',)) + yield from iter_files_by_suffix(SOURCE_DIRS, ('.c',)) + else: + globs = (os.path.join(REPO_ROOT, file) for file in GLOBS) + yield from expand_filenames(globs) + + +def iter_header_files(filenames=None, *, levels=None): + if not filenames: + if levels: + levels = set(levels) + if 'private' in levels: + levels.add('stable') + levels.add('cpython') + for level, glob in LEVEL_GLOBS.items(): + if level in levels: + yield from expand_filenames([glob]) + else: + yield from iter_files_by_suffix(INCLUDE_DIRS, ('.h',)) + return + + for filename in filenames: + orig = filename + filename = resolve_filename(filename) + if filename.endswith(os.path.sep): + yield from iter_files_by_suffix(INCLUDE_DIRS, ('.h',)) + elif filename.endswith('.h'): + yield filename + else: + # XXX Log it and continue instead? + raise ValueError(f'expected .h file, got {orig!r}') diff --git a/Tools/c-analyzer/cpython/_parser.py b/Tools/c-analyzer/cpython/_parser.py index eef7584..ef06a9f 100644 --- a/Tools/c-analyzer/cpython/_parser.py +++ b/Tools/c-analyzer/cpython/_parser.py @@ -1,7 +1,6 @@ import os.path import re -from c_common.fsutil import expand_filenames, iter_files_by_suffix from c_parser.preprocessor import ( get_preprocessor as _get_preprocessor, ) @@ -9,7 +8,7 @@ from c_parser import ( parse_file as _parse_file, parse_files as _parse_files, ) -from . import REPO_ROOT, INCLUDE_DIRS, SOURCE_DIRS +from . import REPO_ROOT GLOB_ALL = '**/*' @@ -43,19 +42,6 @@ def clean_lines(text): @end=sh@ ''' -GLOBS = [ - 'Include/*.h', - 'Include/internal/*.h', - 'Modules/**/*.h', - 'Modules/**/*.c', - 'Objects/**/*.h', - 'Objects/**/*.c', - 'Python/**/*.h', - 'Parser/**/*.c', - 'Python/**/*.h', - 'Parser/**/*.c', -] - EXCLUDED = clean_lines(''' # @begin=conf@ @@ -280,26 +266,6 @@ SAME = [ ] -def resolve_filename(filename): - orig = filename - filename = os.path.normcase(os.path.normpath(filename)) - if os.path.isabs(filename): - if os.path.relpath(filename, REPO_ROOT).startswith('.'): - raise Exception(f'{orig!r} is outside the repo ({REPO_ROOT})') - return filename - else: - return os.path.join(REPO_ROOT, filename) - - -def iter_filenames(*, search=False): - if search: - yield from iter_files_by_suffix(INCLUDE_DIRS, ('.h',)) - yield from iter_files_by_suffix(SOURCE_DIRS, ('.c',)) - else: - globs = (os.path.join(REPO_ROOT, file) for file in GLOBS) - yield from expand_filenames(globs) - - def get_preprocessor(*, file_macros=None, file_incldirs=None, |