from collections import namedtuple import glob import os.path import re import shutil import sys import subprocess VERBOSITY = 2 C_GLOBALS_DIR = os.path.abspath(os.path.dirname(__file__)) TOOLS_DIR = os.path.dirname(C_GLOBALS_DIR) ROOT_DIR = os.path.dirname(TOOLS_DIR) GLOBALS_FILE = os.path.join(C_GLOBALS_DIR, 'ignored-globals.txt') SOURCE_DIRS = ['Include', 'Objects', 'Modules', 'Parser', 'Python'] CAPI_REGEX = re.compile(r'^ *PyAPI_DATA\([^)]*\) \W*(_?Py\w+(?:, \w+)*\w).*;.*$') IGNORED_VARS = { '_DYNAMIC', '_GLOBAL_OFFSET_TABLE_', '__JCR_LIST__', '__JCR_END__', '__TMC_END__', '__bss_start', '__data_start', '__dso_handle', '_edata', '_end', } def find_capi_vars(root): capi_vars = {} for dirname in SOURCE_DIRS: for filename in glob.glob(os.path.join( glob.escape(os.path.join(ROOT_DIR, dirname)), '**/*.[hc]'), recursive=True): with open(filename) as file: for name in _find_capi_vars(file): if name in capi_vars: assert not filename.endswith('.c') assert capi_vars[name].endswith('.c') capi_vars[name] = filename return capi_vars def _find_capi_vars(lines): for line in lines: if not line.startswith('PyAPI_DATA'): continue assert '{' not in line match = CAPI_REGEX.match(line) assert match names, = match.groups() for name in names.split(', '): yield name def _read_global_names(filename): # These variables are shared between all interpreters in the process. with open(filename) as file: return {line.partition('#')[0].strip() for line in file if line.strip() and not line.startswith('#')} def _is_global_var(name, globalnames): if _is_autogen_var(name): return True if _is_type_var(name): return True if _is_module(name): return True if _is_exception(name): return True if _is_compiler(name): return True return name in globalnames def _is_autogen_var(name): return ( name.startswith('PyId_') or '.' in name or # Objects/typeobject.c name.startswith('op_id.') or name.startswith('rop_id.') or # Python/graminit.c name.startswith('arcs_') or name.startswith('states_') ) def _is_type_var(name): if name.endswith(('Type', '_Type', '_type')): # XXX Always a static type? return True if name.endswith('_desc'): # for structseq types return True return ( name.startswith('doc_') or name.endswith(('_doc', '__doc__', '_docstring')) or name.endswith('_methods') or name.endswith('_fields') or name.endswith(('_memberlist', '_members')) or name.endswith('_slots') or name.endswith(('_getset', '_getsets', '_getsetlist')) or name.endswith('_as_mapping') or name.endswith('_as_number') or name.endswith('_as_sequence') or name.endswith('_as_buffer') or name.endswith('_as_async') ) def _is_module(name): if name.endswith(('_functions', 'Methods', '_Methods')): return True if name == 'module_def': return True if name == 'initialized': return True return name.endswith(('module', '_Module')) def _is_exception(name): # Other vars are enumerated in globals-core.txt. if not name.startswith(('PyExc_', '_PyExc_')): return False return name.endswith(('Error', 'Warning')) def _is_compiler(name): return ( # Python/Python-ast.c name.endswith('_type') or name.endswith('_singleton') or name.endswith('_attributes') ) class Var(namedtuple('Var', 'name kind scope capi filename')): @classmethod def parse_nm(cls, line, expected, ignored, capi_vars, globalnames): _, _, line = line.partition(' ') # strip off the address line = line.strip() kind, _, line = line.partition(' ') if kind in ignored or (): return None elif kind not in expected or (): raise RuntimeError('unsupported NM type {!r}'.format(kind)) name, _, filename = line.partition('\t') name = name.strip() if _is_autogen_var(name): return None if _is_global_var(name, globalnames): scope = 'global' else: scope = None capi = (name in capi_vars or ()) if filename: filename = os.path.relpath(filename.partition(':')[0]) return cls(name, kind, scope, capi, filename or '~???~') @property def external(self): return self.kind.isupper() def find_vars(root, globals_filename=GLOBALS_FILE): python = os.path.join(root, 'python') if not os.path.exists(python): raise RuntimeError('python binary missing (need to build it first?)') capi_vars = find_capi_vars(root) globalnames = _read_global_names(globals_filename) nm = shutil.which('nm') if nm is None: # XXX Use dumpbin.exe /SYMBOLS on Windows. raise NotImplementedError else: yield from (var for var in _find_var_symbols(python, nm, capi_vars, globalnames) if var.name not in IGNORED_VARS) NM_FUNCS = set('Tt') NM_PUBLIC_VARS = set('BD') NM_PRIVATE_VARS = set('bd') NM_VARS = NM_PUBLIC_VARS | NM_PRIVATE_VARS NM_DATA = set('Rr') NM_OTHER = set('ACGgiINpSsuUVvWw-?') NM_IGNORED = NM_FUNCS | NM_DATA | NM_OTHER def _find_var_symbols(python, nm, capi_vars, globalnames): args = [nm, '--line-numbers', python] out = subprocess.check_output(args) for line in out.decode('utf-8').splitlines(): var = Var.parse_nm(line, NM_VARS, NM_IGNORED, capi_vars, globalnames) if var is None: continue yield var ####################################### class Filter(namedtuple('Filter', 'name op value action')): @classmethod def parse(cls, raw): action = '+' if raw.startswith(('+', '-')): action = raw[0] raw = raw[1:] # XXX Support < and >? name, op, value = raw.partition('=') return cls(name, op, value, action) def check(self, var): value = getattr(var, self.name, None) if not self.op: matched = bool(value) elif self.op == '=': matched = (value == self.value) else: raise NotImplementedError if self.action == '+': return matched elif self.action == '-': return not matched else: raise NotImplementedError def filter_var(var, filters): for filter in filters: if not filter.check(var): return False return True def make_sort_key(spec): columns = [(col.strip('_'), '_' if col.startswith('_') else '') for col in spec] def sort_key(var): return tuple(getattr(var, col).lstrip(prefix) for col, prefix in columns) return sort_key def make_groups(allvars, spec): group = spec groups = {} for var in allvars: value = getattr(var, group) key = '{}: {}'.format(group, value) try: groupvars = groups[key] except KeyError: groupvars = groups[key] = [] groupvars.append(var) return groups def format_groups(groups, columns, fmts, widths): for group in sorted(groups): groupvars = groups[group] yield '', 0 yield ' # {}'.format(group), 0 yield from format_vars(groupvars, columns, fmts, widths) def format_vars(allvars, columns, fmts, widths): fmt = ' '.join(fmts[col] for col in columns) fmt = ' ' + fmt.replace(' ', ' ') + ' ' # for div margin header = fmt.replace(':', ':^').format(*(col.upper() for col in columns)) yield header, 0 div = ' '.join('-'*(widths[col]+2) for col in columns) yield div, 0 for var in allvars: values = (getattr(var, col) for col in columns) row = fmt.format(*('X' if val is True else val or '' for val in values)) yield row, 1 yield div, 0 ####################################### COLUMNS = 'name,external,capi,scope,filename' COLUMN_NAMES = COLUMNS.split(',') COLUMN_WIDTHS = {col: len(col) for col in COLUMN_NAMES} COLUMN_WIDTHS.update({ 'name': 50, 'scope': 7, 'filename': 40, }) COLUMN_FORMATS = {col: '{:%s}' % width for col, width in COLUMN_WIDTHS.items()} for col in COLUMN_FORMATS: if COLUMN_WIDTHS[col] == len(col): COLUMN_FORMATS[col] = COLUMN_FORMATS[col].replace(':', ':^') def _parse_filters_arg(raw, error): filters = [] for value in raw.split(','): value=value.strip() if not value: continue try: filter = Filter.parse(value) if filter.name not in COLUMN_NAMES: raise Exception('unsupported column {!r}'.format(filter.name)) except Exception as e: error('bad filter {!r}: {}'.format(raw, e)) filters.append(filter) return filters def _parse_columns_arg(raw, error): columns = raw.split(',') for column in columns: if column not in COLUMN_NAMES: error('unsupported column {!r}'.format(column)) return columns def _parse_sort_arg(raw, error): sort = raw.split(',') for column in sort: if column.lstrip('_') not in COLUMN_NAMES: error('unsupported column {!r}'.format(column)) return sort def _parse_group_arg(raw, error): if not raw: return raw group = raw if group not in COLUMN_NAMES: error('unsupported column {!r}'.format(group)) if group != 'filename': error('unsupported group {!r}'.format(group)) return group def parse_args(argv=None): if argv is None: argv = sys.argv[1:] import argparse parser = argparse.ArgumentParser() parser.add_argument('-v', '--verbose', action='count', default=0) parser.add_argument('-q', '--quiet', action='count', default=0) parser.add_argument('--filters', default='-scope', help='[[-][=]] ...') parser.add_argument('--columns', default=COLUMNS, help='a comma-separated list of columns to show') parser.add_argument('--sort', default='filename,_name', help='a comma-separated list of columns to sort') parser.add_argument('--group', help='group by the given column name (- to not group)') parser.add_argument('--rc-on-match', dest='rc', type=int) parser.add_argument('filename', nargs='?', default=GLOBALS_FILE) args = parser.parse_args(argv) verbose = vars(args).pop('verbose', 0) quiet = vars(args).pop('quiet', 0) args.verbosity = max(0, VERBOSITY + verbose - quiet) if args.sort.startswith('filename') and not args.group: args.group = 'filename' if args.rc is None: if '-scope=core' in args.filters or 'core' not in args.filters: args.rc = 0 else: args.rc = 1 args.filters = _parse_filters_arg(args.filters, parser.error) args.columns = _parse_columns_arg(args.columns, parser.error) args.sort = _parse_sort_arg(args.sort, parser.error) args.group = _parse_group_arg(args.group, parser.error) return args def main(root=ROOT_DIR, filename=GLOBALS_FILE, filters=None, columns=COLUMN_NAMES, sort=None, group=None, verbosity=VERBOSITY, rc=1): log = lambda msg: ... if verbosity >= 2: log = lambda msg: print(msg) allvars = (var for var in find_vars(root, filename) if filter_var(var, filters)) if sort: allvars = sorted(allvars, key=make_sort_key(sort)) if group: try: columns.remove(group) except ValueError: pass grouped = make_groups(allvars, group) lines = format_groups(grouped, columns, COLUMN_FORMATS, COLUMN_WIDTHS) else: lines = format_vars(allvars, columns, COLUMN_FORMATS, COLUMN_WIDTHS) total = 0 for line, count in lines: total += count log(line) log('\ntotal: {}'.format(total)) if total and rc: print('ERROR: found unsafe globals', file=sys.stderr) return rc return 0 if __name__ == '__main__': args = parse_args() sys.exit( main(**vars(args)))