summaryrefslogtreecommitdiffstats
path: root/Tools/c-globals/check-c-globals.py
diff options
context:
space:
mode:
Diffstat (limited to 'Tools/c-globals/check-c-globals.py')
-rw-r--r--Tools/c-globals/check-c-globals.py446
1 files changed, 446 insertions, 0 deletions
diff --git a/Tools/c-globals/check-c-globals.py b/Tools/c-globals/check-c-globals.py
new file mode 100644
index 0000000..1de69a8
--- /dev/null
+++ b/Tools/c-globals/check-c-globals.py
@@ -0,0 +1,446 @@
+
+from collections import namedtuple
+import glob
+import os.path
+import re
+import shutil
+import sys
+import subprocess
+
+
+VERBOSITY = 2
+
+C_GLOBALS_DIR = os.path.abspath(os.path.dirname(__file__))
+TOOLS_DIR = os.path.dirname(C_GLOBALS_DIR)
+ROOT_DIR = os.path.dirname(TOOLS_DIR)
+GLOBALS_FILE = os.path.join(C_GLOBALS_DIR, 'ignored-globals.txt')
+
+SOURCE_DIRS = ['Include', 'Objects', 'Modules', 'Parser', 'Python']
+
+CAPI_REGEX = re.compile(r'^ *PyAPI_DATA\([^)]*\) \W*(_?Py\w+(?:, \w+)*\w).*;.*$')
+
+
+IGNORED_VARS = {
+ '_DYNAMIC',
+ '_GLOBAL_OFFSET_TABLE_',
+ '__JCR_LIST__',
+ '__JCR_END__',
+ '__TMC_END__',
+ '__bss_start',
+ '__data_start',
+ '__dso_handle',
+ '_edata',
+ '_end',
+ }
+
+
+def find_capi_vars(root):
+ capi_vars = {}
+ for dirname in SOURCE_DIRS:
+ for filename in glob.glob(os.path.join(ROOT_DIR, dirname, '**/*.[hc]'),
+ recursive=True):
+ with open(filename) as file:
+ for name in _find_capi_vars(file):
+ if name in capi_vars:
+ assert not filename.endswith('.c')
+ assert capi_vars[name].endswith('.c')
+ capi_vars[name] = filename
+ return capi_vars
+
+
+def _find_capi_vars(lines):
+ for line in lines:
+ if not line.startswith('PyAPI_DATA'):
+ continue
+ assert '{' not in line
+ match = CAPI_REGEX.match(line)
+ assert match
+ names, = match.groups()
+ for name in names.split(', '):
+ yield name
+
+
+def _read_global_names(filename):
+ # These variables are shared between all interpreters in the process.
+ with open(filename) as file:
+ return {line.partition('#')[0].strip()
+ for line in file
+ if line.strip() and not line.startswith('#')}
+
+
+def _is_global_var(name, globalnames):
+ if _is_autogen_var(name):
+ return True
+ if _is_type_var(name):
+ return True
+ if _is_module(name):
+ return True
+ if _is_exception(name):
+ return True
+ if _is_compiler(name):
+ return True
+ return name in globalnames
+
+
+def _is_autogen_var(name):
+ return (
+ name.startswith('PyId_') or
+ '.' in name or
+ # Objects/typeobject.c
+ name.startswith('op_id.') or
+ name.startswith('rop_id.') or
+ # Python/graminit.c
+ name.startswith('arcs_') or
+ name.startswith('states_')
+ )
+
+
+def _is_type_var(name):
+ if name.endswith(('Type', '_Type', '_type')): # XXX Always a static type?
+ return True
+ if name.endswith('_desc'): # for structseq types
+ return True
+ return (
+ name.startswith('doc_') or
+ name.endswith(('_doc', '__doc__', '_docstring')) or
+ name.endswith('_methods') or
+ name.endswith('_fields') or
+ name.endswith(('_memberlist', '_members')) or
+ name.endswith('_slots') or
+ name.endswith(('_getset', '_getsets', '_getsetlist')) or
+ name.endswith('_as_mapping') or
+ name.endswith('_as_number') or
+ name.endswith('_as_sequence') or
+ name.endswith('_as_buffer') or
+ name.endswith('_as_async')
+ )
+
+
+def _is_module(name):
+ if name.endswith(('_functions', 'Methods', '_Methods')):
+ return True
+ if name == 'module_def':
+ return True
+ if name == 'initialized':
+ return True
+ return name.endswith(('module', '_Module'))
+
+
+def _is_exception(name):
+ # Other vars are enumerated in globals-core.txt.
+ if not name.startswith(('PyExc_', '_PyExc_')):
+ return False
+ return name.endswith(('Error', 'Warning'))
+
+
+def _is_compiler(name):
+ return (
+ # Python/Pythyon-ast.c
+ name.endswith('_type') or
+ name.endswith('_singleton') or
+ name.endswith('_attributes')
+ )
+
+
+class Var(namedtuple('Var', 'name kind scope capi filename')):
+
+ @classmethod
+ def parse_nm(cls, line, expected, ignored, capi_vars, globalnames):
+ _, _, line = line.partition(' ') # strip off the address
+ line = line.strip()
+ kind, _, line = line.partition(' ')
+ if kind in ignored or ():
+ return None
+ elif kind not in expected or ():
+ raise RuntimeError('unsupported NM type {!r}'.format(kind))
+
+ name, _, filename = line.partition('\t')
+ name = name.strip()
+ if _is_autogen_var(name):
+ return None
+ if _is_global_var(name, globalnames):
+ scope = 'global'
+ else:
+ scope = None
+ capi = (name in capi_vars or ())
+ if filename:
+ filename = os.path.relpath(filename.partition(':')[0])
+ return cls(name, kind, scope, capi, filename or '~???~')
+
+ @property
+ def external(self):
+ return self.kind.isupper()
+
+
+def find_vars(root, globals_filename=GLOBALS_FILE):
+ python = os.path.join(root, 'python')
+ if not os.path.exists(python):
+ raise RuntimeError('python binary missing (need to build it first?)')
+ capi_vars = find_capi_vars(root)
+ globalnames = _read_global_names(globals_filename)
+
+ nm = shutil.which('nm')
+ if nm is None:
+ # XXX Use dumpbin.exe /SYMBOLS on Windows.
+ raise NotImplementedError
+ else:
+ yield from (var
+ for var in _find_var_symbols(python, nm, capi_vars,
+ globalnames)
+ if var.name not in IGNORED_VARS)
+
+
+NM_FUNCS = set('Tt')
+NM_PUBLIC_VARS = set('BD')
+NM_PRIVATE_VARS = set('bd')
+NM_VARS = NM_PUBLIC_VARS | NM_PRIVATE_VARS
+NM_DATA = set('Rr')
+NM_OTHER = set('ACGgiINpSsuUVvWw-?')
+NM_IGNORED = NM_FUNCS | NM_DATA | NM_OTHER
+
+
+def _find_var_symbols(python, nm, capi_vars, globalnames):
+ args = [nm,
+ '--line-numbers',
+ python]
+ out = subprocess.check_output(args)
+ for line in out.decode('utf-8').splitlines():
+ var = Var.parse_nm(line, NM_VARS, NM_IGNORED, capi_vars, globalnames)
+ if var is None:
+ continue
+ yield var
+
+
+#######################################
+
+class Filter(namedtuple('Filter', 'name op value action')):
+
+ @classmethod
+ def parse(cls, raw):
+ action = '+'
+ if raw.startswith(('+', '-')):
+ action = raw[0]
+ raw = raw[1:]
+ # XXX Support < and >?
+ name, op, value = raw.partition('=')
+ return cls(name, op, value, action)
+
+ def check(self, var):
+ value = getattr(var, self.name, None)
+ if not self.op:
+ matched = bool(value)
+ elif self.op == '=':
+ matched = (value == self.value)
+ else:
+ raise NotImplementedError
+
+ if self.action == '+':
+ return matched
+ elif self.action == '-':
+ return not matched
+ else:
+ raise NotImplementedError
+
+
+def filter_var(var, filters):
+ for filter in filters:
+ if not filter.check(var):
+ return False
+ return True
+
+
+def make_sort_key(spec):
+ columns = [(col.strip('_'), '_' if col.startswith('_') else '')
+ for col in spec]
+ def sort_key(var):
+ return tuple(getattr(var, col).lstrip(prefix)
+ for col, prefix in columns)
+ return sort_key
+
+
+def make_groups(allvars, spec):
+ group = spec
+ groups = {}
+ for var in allvars:
+ value = getattr(var, group)
+ key = '{}: {}'.format(group, value)
+ try:
+ groupvars = groups[key]
+ except KeyError:
+ groupvars = groups[key] = []
+ groupvars.append(var)
+ return groups
+
+
+def format_groups(groups, columns, fmts, widths):
+ for group in sorted(groups):
+ groupvars = groups[group]
+ yield '', 0
+ yield ' # {}'.format(group), 0
+ yield from format_vars(groupvars, columns, fmts, widths)
+
+
+def format_vars(allvars, columns, fmts, widths):
+ fmt = ' '.join(fmts[col] for col in columns)
+ fmt = ' ' + fmt.replace(' ', ' ') + ' ' # for div margin
+ header = fmt.replace(':', ':^').format(*(col.upper() for col in columns))
+ yield header, 0
+ div = ' '.join('-'*(widths[col]+2) for col in columns)
+ yield div, 0
+ for var in allvars:
+ values = (getattr(var, col) for col in columns)
+ row = fmt.format(*('X' if val is True else val or ''
+ for val in values))
+ yield row, 1
+ yield div, 0
+
+
+#######################################
+
+COLUMNS = 'name,external,capi,scope,filename'
+COLUMN_NAMES = COLUMNS.split(',')
+
+COLUMN_WIDTHS = {col: len(col)
+ for col in COLUMN_NAMES}
+COLUMN_WIDTHS.update({
+ 'name': 50,
+ 'scope': 7,
+ 'filename': 40,
+ })
+COLUMN_FORMATS = {col: '{:%s}' % width
+ for col, width in COLUMN_WIDTHS.items()}
+for col in COLUMN_FORMATS:
+ if COLUMN_WIDTHS[col] == len(col):
+ COLUMN_FORMATS[col] = COLUMN_FORMATS[col].replace(':', ':^')
+
+
+def _parse_filters_arg(raw, error):
+ filters = []
+ for value in raw.split(','):
+ value=value.strip()
+ if not value:
+ continue
+ try:
+ filter = Filter.parse(value)
+ if filter.name not in COLUMN_NAMES:
+ raise Exception('unsupported column {!r}'.format(filter.name))
+ except Exception as e:
+ error('bad filter {!r}: {}'.format(raw, e))
+ filters.append(filter)
+ return filters
+
+
+def _parse_columns_arg(raw, error):
+ columns = raw.split(',')
+ for column in columns:
+ if column not in COLUMN_NAMES:
+ error('unsupported column {!r}'.format(column))
+ return columns
+
+
+def _parse_sort_arg(raw, error):
+ sort = raw.split(',')
+ for column in sort:
+ if column.lstrip('_') not in COLUMN_NAMES:
+ error('unsupported column {!r}'.format(column))
+ return sort
+
+
+def _parse_group_arg(raw, error):
+ if not raw:
+ return raw
+ group = raw
+ if group not in COLUMN_NAMES:
+ error('unsupported column {!r}'.format(group))
+ if group != 'filename':
+ error('unsupported group {!r}'.format(group))
+ return group
+
+
+def parse_args(argv=None):
+ if argv is None:
+ argv = sys.argv[1:]
+
+ import argparse
+ parser = argparse.ArgumentParser()
+
+ parser.add_argument('-v', '--verbose', action='count', default=0)
+ parser.add_argument('-q', '--quiet', action='count', default=0)
+
+ parser.add_argument('--filters', default='-scope',
+ help='[[-]<COLUMN>[=<GLOB>]] ...')
+
+ parser.add_argument('--columns', default=COLUMNS,
+ help='a comma-separated list of columns to show')
+ parser.add_argument('--sort', default='filename,_name',
+ help='a comma-separated list of columns to sort')
+ parser.add_argument('--group',
+ help='group by the given column name (- to not group)')
+
+ parser.add_argument('--rc-on-match', dest='rc', type=int)
+
+ parser.add_argument('filename', nargs='?', default=GLOBALS_FILE)
+
+ args = parser.parse_args(argv)
+
+ verbose = vars(args).pop('verbose', 0)
+ quiet = vars(args).pop('quiet', 0)
+ args.verbosity = max(0, VERBOSITY + verbose - quiet)
+
+ if args.sort.startswith('filename') and not args.group:
+ args.group = 'filename'
+
+ if args.rc is None:
+ if '-scope=core' in args.filters or 'core' not in args.filters:
+ args.rc = 0
+ else:
+ args.rc = 1
+
+ args.filters = _parse_filters_arg(args.filters, parser.error)
+ args.columns = _parse_columns_arg(args.columns, parser.error)
+ args.sort = _parse_sort_arg(args.sort, parser.error)
+ args.group = _parse_group_arg(args.group, parser.error)
+
+ return args
+
+
+def main(root=ROOT_DIR, filename=GLOBALS_FILE,
+ filters=None, columns=COLUMN_NAMES, sort=None, group=None,
+ verbosity=VERBOSITY, rc=1):
+
+ log = lambda msg: ...
+ if verbosity >= 2:
+ log = lambda msg: print(msg)
+
+ allvars = (var
+ for var in find_vars(root, filename)
+ if filter_var(var, filters))
+ if sort:
+ allvars = sorted(allvars, key=make_sort_key(sort))
+
+ if group:
+ try:
+ columns.remove(group)
+ except ValueError:
+ pass
+ grouped = make_groups(allvars, group)
+ lines = format_groups(grouped, columns, COLUMN_FORMATS, COLUMN_WIDTHS)
+ else:
+ lines = format_vars(allvars, columns, COLUMN_FORMATS, COLUMN_WIDTHS)
+
+ total = 0
+ for line, count in lines:
+ total += count
+ log(line)
+ log('\ntotal: {}'.format(total))
+
+ if total and rc:
+ print('ERROR: found unsafe globals', file=sys.stderr)
+ return rc
+ return 0
+
+
+if __name__ == '__main__':
+ args = parse_args()
+ sys.exit(
+ main(**vars(args)))