diff options
Diffstat (limited to 'Tools')
-rw-r--r-- | Tools/c-globals/README | 41 | ||||
-rw-r--r-- | Tools/c-globals/check-c-globals.py | 446 | ||||
-rw-r--r-- | Tools/c-globals/ignored-globals.txt | 494 |
3 files changed, 981 insertions, 0 deletions
diff --git a/Tools/c-globals/README b/Tools/c-globals/README new file mode 100644 index 0000000..d0e6e8e --- /dev/null +++ b/Tools/c-globals/README @@ -0,0 +1,41 @@ +####################################### +# C Globals and CPython Runtime State. + +CPython's C code makes extensive use of global variables. Each global +falls into one of several categories: + +* (effectively) constants (incl. static types) +* globals used exclusively in main or in the REPL +* freelists, caches, and counters +* process-global state +* module state +* Python runtime state + +The ignored-globals.txt file is organized similarly. Of the different +categories, the last two are problematic and generally should not exist +in the codebase. + +Globals that hold module state (i.e. in Modules/*.c) cause problems +when multiple interpreters are in use. For more info, see PEP 3121, +which addresses the situation for extension modules in general. + +Globals in the last category should be avoided as well. The problem +isn't with the Python runtime having state. Rather, the problem is with +that state being spread thoughout the codebase in dozens of individual +globals. Unlike the other globals, the runtime state represents a set +of values that are constantly shifting in a complex way. When they are +spread out it's harder to get a clear picture of what the runtime +involves. Furthermore, when they are spread out it complicates efforts +that change the runtime. + +Consequently, the globals for Python's runtime state have been +consolidated under a single top-level _PyRuntime global. No new globals +should be added for runtime state. Instead, they should be added to +_PyRuntimeState or one of its sub-structs. The check-c-globals script +should be run to ensure that no new globals have been added: + + python3 Tools/c-globals/check-c-globals.py + +If it reports any globals then they should be resolved. If the globals +are runtime state then they should be folded into _PyRuntimeState. +Otherwise they should be added to ignored-globals.txt. diff --git a/Tools/c-globals/check-c-globals.py b/Tools/c-globals/check-c-globals.py new file mode 100644 index 0000000..1de69a8 --- /dev/null +++ b/Tools/c-globals/check-c-globals.py @@ -0,0 +1,446 @@ + +from collections import namedtuple +import glob +import os.path +import re +import shutil +import sys +import subprocess + + +VERBOSITY = 2 + +C_GLOBALS_DIR = os.path.abspath(os.path.dirname(__file__)) +TOOLS_DIR = os.path.dirname(C_GLOBALS_DIR) +ROOT_DIR = os.path.dirname(TOOLS_DIR) +GLOBALS_FILE = os.path.join(C_GLOBALS_DIR, 'ignored-globals.txt') + +SOURCE_DIRS = ['Include', 'Objects', 'Modules', 'Parser', 'Python'] + +CAPI_REGEX = re.compile(r'^ *PyAPI_DATA\([^)]*\) \W*(_?Py\w+(?:, \w+)*\w).*;.*$') + + +IGNORED_VARS = { + '_DYNAMIC', + '_GLOBAL_OFFSET_TABLE_', + '__JCR_LIST__', + '__JCR_END__', + '__TMC_END__', + '__bss_start', + '__data_start', + '__dso_handle', + '_edata', + '_end', + } + + +def find_capi_vars(root): + capi_vars = {} + for dirname in SOURCE_DIRS: + for filename in glob.glob(os.path.join(ROOT_DIR, dirname, '**/*.[hc]'), + recursive=True): + with open(filename) as file: + for name in _find_capi_vars(file): + if name in capi_vars: + assert not filename.endswith('.c') + assert capi_vars[name].endswith('.c') + capi_vars[name] = filename + return capi_vars + + +def _find_capi_vars(lines): + for line in lines: + if not line.startswith('PyAPI_DATA'): + continue + assert '{' not in line + match = CAPI_REGEX.match(line) + assert match + names, = match.groups() + for name in names.split(', '): + yield name + + +def _read_global_names(filename): + # These variables are shared between all interpreters in the process. + with open(filename) as file: + return {line.partition('#')[0].strip() + for line in file + if line.strip() and not line.startswith('#')} + + +def _is_global_var(name, globalnames): + if _is_autogen_var(name): + return True + if _is_type_var(name): + return True + if _is_module(name): + return True + if _is_exception(name): + return True + if _is_compiler(name): + return True + return name in globalnames + + +def _is_autogen_var(name): + return ( + name.startswith('PyId_') or + '.' in name or + # Objects/typeobject.c + name.startswith('op_id.') or + name.startswith('rop_id.') or + # Python/graminit.c + name.startswith('arcs_') or + name.startswith('states_') + ) + + +def _is_type_var(name): + if name.endswith(('Type', '_Type', '_type')): # XXX Always a static type? + return True + if name.endswith('_desc'): # for structseq types + return True + return ( + name.startswith('doc_') or + name.endswith(('_doc', '__doc__', '_docstring')) or + name.endswith('_methods') or + name.endswith('_fields') or + name.endswith(('_memberlist', '_members')) or + name.endswith('_slots') or + name.endswith(('_getset', '_getsets', '_getsetlist')) or + name.endswith('_as_mapping') or + name.endswith('_as_number') or + name.endswith('_as_sequence') or + name.endswith('_as_buffer') or + name.endswith('_as_async') + ) + + +def _is_module(name): + if name.endswith(('_functions', 'Methods', '_Methods')): + return True + if name == 'module_def': + return True + if name == 'initialized': + return True + return name.endswith(('module', '_Module')) + + +def _is_exception(name): + # Other vars are enumerated in globals-core.txt. + if not name.startswith(('PyExc_', '_PyExc_')): + return False + return name.endswith(('Error', 'Warning')) + + +def _is_compiler(name): + return ( + # Python/Pythyon-ast.c + name.endswith('_type') or + name.endswith('_singleton') or + name.endswith('_attributes') + ) + + +class Var(namedtuple('Var', 'name kind scope capi filename')): + + @classmethod + def parse_nm(cls, line, expected, ignored, capi_vars, globalnames): + _, _, line = line.partition(' ') # strip off the address + line = line.strip() + kind, _, line = line.partition(' ') + if kind in ignored or (): + return None + elif kind not in expected or (): + raise RuntimeError('unsupported NM type {!r}'.format(kind)) + + name, _, filename = line.partition('\t') + name = name.strip() + if _is_autogen_var(name): + return None + if _is_global_var(name, globalnames): + scope = 'global' + else: + scope = None + capi = (name in capi_vars or ()) + if filename: + filename = os.path.relpath(filename.partition(':')[0]) + return cls(name, kind, scope, capi, filename or '~???~') + + @property + def external(self): + return self.kind.isupper() + + +def find_vars(root, globals_filename=GLOBALS_FILE): + python = os.path.join(root, 'python') + if not os.path.exists(python): + raise RuntimeError('python binary missing (need to build it first?)') + capi_vars = find_capi_vars(root) + globalnames = _read_global_names(globals_filename) + + nm = shutil.which('nm') + if nm is None: + # XXX Use dumpbin.exe /SYMBOLS on Windows. + raise NotImplementedError + else: + yield from (var + for var in _find_var_symbols(python, nm, capi_vars, + globalnames) + if var.name not in IGNORED_VARS) + + +NM_FUNCS = set('Tt') +NM_PUBLIC_VARS = set('BD') +NM_PRIVATE_VARS = set('bd') +NM_VARS = NM_PUBLIC_VARS | NM_PRIVATE_VARS +NM_DATA = set('Rr') +NM_OTHER = set('ACGgiINpSsuUVvWw-?') +NM_IGNORED = NM_FUNCS | NM_DATA | NM_OTHER + + +def _find_var_symbols(python, nm, capi_vars, globalnames): + args = [nm, + '--line-numbers', + python] + out = subprocess.check_output(args) + for line in out.decode('utf-8').splitlines(): + var = Var.parse_nm(line, NM_VARS, NM_IGNORED, capi_vars, globalnames) + if var is None: + continue + yield var + + +####################################### + +class Filter(namedtuple('Filter', 'name op value action')): + + @classmethod + def parse(cls, raw): + action = '+' + if raw.startswith(('+', '-')): + action = raw[0] + raw = raw[1:] + # XXX Support < and >? + name, op, value = raw.partition('=') + return cls(name, op, value, action) + + def check(self, var): + value = getattr(var, self.name, None) + if not self.op: + matched = bool(value) + elif self.op == '=': + matched = (value == self.value) + else: + raise NotImplementedError + + if self.action == '+': + return matched + elif self.action == '-': + return not matched + else: + raise NotImplementedError + + +def filter_var(var, filters): + for filter in filters: + if not filter.check(var): + return False + return True + + +def make_sort_key(spec): + columns = [(col.strip('_'), '_' if col.startswith('_') else '') + for col in spec] + def sort_key(var): + return tuple(getattr(var, col).lstrip(prefix) + for col, prefix in columns) + return sort_key + + +def make_groups(allvars, spec): + group = spec + groups = {} + for var in allvars: + value = getattr(var, group) + key = '{}: {}'.format(group, value) + try: + groupvars = groups[key] + except KeyError: + groupvars = groups[key] = [] + groupvars.append(var) + return groups + + +def format_groups(groups, columns, fmts, widths): + for group in sorted(groups): + groupvars = groups[group] + yield '', 0 + yield ' # {}'.format(group), 0 + yield from format_vars(groupvars, columns, fmts, widths) + + +def format_vars(allvars, columns, fmts, widths): + fmt = ' '.join(fmts[col] for col in columns) + fmt = ' ' + fmt.replace(' ', ' ') + ' ' # for div margin + header = fmt.replace(':', ':^').format(*(col.upper() for col in columns)) + yield header, 0 + div = ' '.join('-'*(widths[col]+2) for col in columns) + yield div, 0 + for var in allvars: + values = (getattr(var, col) for col in columns) + row = fmt.format(*('X' if val is True else val or '' + for val in values)) + yield row, 1 + yield div, 0 + + +####################################### + +COLUMNS = 'name,external,capi,scope,filename' +COLUMN_NAMES = COLUMNS.split(',') + +COLUMN_WIDTHS = {col: len(col) + for col in COLUMN_NAMES} +COLUMN_WIDTHS.update({ + 'name': 50, + 'scope': 7, + 'filename': 40, + }) +COLUMN_FORMATS = {col: '{:%s}' % width + for col, width in COLUMN_WIDTHS.items()} +for col in COLUMN_FORMATS: + if COLUMN_WIDTHS[col] == len(col): + COLUMN_FORMATS[col] = COLUMN_FORMATS[col].replace(':', ':^') + + +def _parse_filters_arg(raw, error): + filters = [] + for value in raw.split(','): + value=value.strip() + if not value: + continue + try: + filter = Filter.parse(value) + if filter.name not in COLUMN_NAMES: + raise Exception('unsupported column {!r}'.format(filter.name)) + except Exception as e: + error('bad filter {!r}: {}'.format(raw, e)) + filters.append(filter) + return filters + + +def _parse_columns_arg(raw, error): + columns = raw.split(',') + for column in columns: + if column not in COLUMN_NAMES: + error('unsupported column {!r}'.format(column)) + return columns + + +def _parse_sort_arg(raw, error): + sort = raw.split(',') + for column in sort: + if column.lstrip('_') not in COLUMN_NAMES: + error('unsupported column {!r}'.format(column)) + return sort + + +def _parse_group_arg(raw, error): + if not raw: + return raw + group = raw + if group not in COLUMN_NAMES: + error('unsupported column {!r}'.format(group)) + if group != 'filename': + error('unsupported group {!r}'.format(group)) + return group + + +def parse_args(argv=None): + if argv is None: + argv = sys.argv[1:] + + import argparse + parser = argparse.ArgumentParser() + + parser.add_argument('-v', '--verbose', action='count', default=0) + parser.add_argument('-q', '--quiet', action='count', default=0) + + parser.add_argument('--filters', default='-scope', + help='[[-]<COLUMN>[=<GLOB>]] ...') + + parser.add_argument('--columns', default=COLUMNS, + help='a comma-separated list of columns to show') + parser.add_argument('--sort', default='filename,_name', + help='a comma-separated list of columns to sort') + parser.add_argument('--group', + help='group by the given column name (- to not group)') + + parser.add_argument('--rc-on-match', dest='rc', type=int) + + parser.add_argument('filename', nargs='?', default=GLOBALS_FILE) + + args = parser.parse_args(argv) + + verbose = vars(args).pop('verbose', 0) + quiet = vars(args).pop('quiet', 0) + args.verbosity = max(0, VERBOSITY + verbose - quiet) + + if args.sort.startswith('filename') and not args.group: + args.group = 'filename' + + if args.rc is None: + if '-scope=core' in args.filters or 'core' not in args.filters: + args.rc = 0 + else: + args.rc = 1 + + args.filters = _parse_filters_arg(args.filters, parser.error) + args.columns = _parse_columns_arg(args.columns, parser.error) + args.sort = _parse_sort_arg(args.sort, parser.error) + args.group = _parse_group_arg(args.group, parser.error) + + return args + + +def main(root=ROOT_DIR, filename=GLOBALS_FILE, + filters=None, columns=COLUMN_NAMES, sort=None, group=None, + verbosity=VERBOSITY, rc=1): + + log = lambda msg: ... + if verbosity >= 2: + log = lambda msg: print(msg) + + allvars = (var + for var in find_vars(root, filename) + if filter_var(var, filters)) + if sort: + allvars = sorted(allvars, key=make_sort_key(sort)) + + if group: + try: + columns.remove(group) + except ValueError: + pass + grouped = make_groups(allvars, group) + lines = format_groups(grouped, columns, COLUMN_FORMATS, COLUMN_WIDTHS) + else: + lines = format_vars(allvars, columns, COLUMN_FORMATS, COLUMN_WIDTHS) + + total = 0 + for line, count in lines: + total += count + log(line) + log('\ntotal: {}'.format(total)) + + if total and rc: + print('ERROR: found unsafe globals', file=sys.stderr) + return rc + return 0 + + +if __name__ == '__main__': + args = parse_args() + sys.exit( + main(**vars(args))) diff --git a/Tools/c-globals/ignored-globals.txt b/Tools/c-globals/ignored-globals.txt new file mode 100644 index 0000000..4fafba6 --- /dev/null +++ b/Tools/c-globals/ignored-globals.txt @@ -0,0 +1,494 @@ +# All variables declared here are shared between all interpreters +# in a single process. That means that they must not be changed +# unless that change should apply to all interpreters. +# +# See check-c-globals.py. +# +# Many generic names are handled via the script: +# +# * most exceptions and all warnings handled via _is_exception() +# * for builtin modules, generic names are handled via _is_module() +# * generic names for static types handled via _is_type_var() +# * AST vars handled via _is_compiler() + + +####################################### +# main + +# Modules/getpath.c +exec_prefix +module_search_path +prefix +progpath + +# Modules/main.c +orig_argc +orig_argv + +# Python/getopt.c +opt_ptr +_PyOS_optarg +_PyOS_opterr +_PyOS_optind + + +####################################### +# REPL + +# Parser/myreadline.c +PyOS_InputHook +PyOS_ReadlineFunctionPointer +_PyOS_ReadlineLock +_PyOS_ReadlineTState + + +####################################### +# state + +# Python/dtoa.c +p5s +pmem_next # very slight race +private_mem # very slight race + +# Python/import.c +# For the moment the import lock stays global. Ultimately there should +# be a global lock for extension modules and a per-interpreter lock. +import_lock +import_lock_level +import_lock_thread + +# Python/pylifecycle.c +_PyRuntime + + +#--------------------------------- +# module globals (PyObject) + +# Modules/_functoolsmodule.c +kwd_mark + +# Modules/_localemodule.c +Error + +# Modules/_threadmodule.c +ThreadError + +# Modules/_tracemalloc.c +unknown_filename + +# Modules/gcmodule.c +gc_str + +# Modules/posixmodule.c +billion +posix_putenv_garbage + +# Modules/signalmodule.c +DefaultHandler +IgnoreHandler +IntHandler +ItimerError + +# Modules/zipimport.c +ZipImportError +zip_directory_cache + + +#--------------------------------- +# module globals (other) + +# Modules/_tracemalloc.c +allocators +tables_lock +tracemalloc_config +tracemalloc_empty_traceback +tracemalloc_filenames +tracemalloc_peak_traced_memory +tracemalloc_reentrant_key +tracemalloc_traceback +tracemalloc_tracebacks +tracemalloc_traced_memory +tracemalloc_traces + +# Modules/faulthandler.c +fatal_error +faulthandler_handlers +old_stack +stack +thread +user_signals + +# Modules/posixmodule.c +posix_constants_confstr +posix_constants_pathconf +posix_constants_sysconf +_stat_float_times # deprecated, __main__-only +structseq_new +ticks_per_second + +# Modules/signalmodule.c +Handlers # main thread only +is_tripped # main thread only +main_pid +main_thread +old_siginthandler +wakeup_fd # main thread only + +# Modules/zipimport.c +zip_searchorder + +# Python/bltinmodule.c +Py_FileSystemDefaultEncodeErrors +Py_FileSystemDefaultEncoding +Py_HasFileSystemDefaultEncoding + +# Python/sysmodule.c +_PySys_ImplCacheTag +_PySys_ImplName + + +#--------------------------------- +# freelists + +# Modules/_collectionsmodule.c +freeblocks +numfreeblocks + +# Objects/classobject.c +free_list +numfree + +# Objects/dictobject.c +free_list +keys_free_list +numfree +numfreekeys + +# Objects/exceptions.c +memerrors_freelist +memerrors_numfree + +# Objects/floatobject.c +free_list +numfree + +# Objects/frameobject.c +free_list +numfree + +# Objects/genobject.c +ag_asend_freelist +ag_asend_freelist_free +ag_value_freelist +ag_value_freelist_free + +# Objects/listobject.c +free_list +numfree + +# Objects/methodobject.c +free_list +numfree + +# Objects/sliceobject.c +slice_cache # slight race + +# Objects/tupleobject.c +free_list +numfree + +# Python/dtoa.c +freelist # very slight race + + +#--------------------------------- +# caches (PyObject) + +# Objects/typeobject.c +method_cache # only for static types +next_version_tag # only for static types + +# Python/dynload_shlib.c +handles # slight race during import +nhandles # slight race during import + +# Python/import.c +extensions # slight race on init during import + + +#--------------------------------- +# caches (other) + +# Python/bootstrap_hash.c +urandom_cache + +# Python/modsupport.c +_Py_PackageContext # Slight race during import! Move to PyThreadState? + + +#--------------------------------- +# counters + +# Objects/bytesobject.c +null_strings +one_strings + +# Objects/dictobject.c +pydict_global_version + +# Objects/moduleobject.c +max_module_number # slight race during import + + +####################################### +# constants + +#--------------------------------- +# singletons + +# Objects/boolobject.c +_Py_FalseStruct +_Py_TrueStruct + +# Objects/object.c +_Py_NoneStruct +_Py_NotImplementedStruct + +# Objects/sliceobject.c +_Py_EllipsisObject + + +#--------------------------------- +# constants (other) + +# Modules/config.c +_PyImport_Inittab + +# Objects/bytearrayobject.c +_PyByteArray_empty_string + +# Objects/dictobject.c +empty_keys_struct +empty_values + +# Objects/floatobject.c +detected_double_format +detected_float_format +double_format +float_format + +# Objects/longobject.c +_PyLong_DigitValue + +# Objects/object.c +_Py_SwappedOp + +# Objects/obmalloc.c +_PyMem_Debug + +# Objects/setobject.c +_dummy_struct + +# Objects/structseq.c +PyStructSequence_UnnamedField + +# Objects/typeobject.c +name_op +slotdefs # almost +slotdefs_initialized # almost +subtype_getsets_dict_only +subtype_getsets_full +subtype_getsets_weakref_only +tp_new_methoddef + +# Objects/unicodeobject.c +bloom_linebreak +static_strings # slight race + +# Parser/tokenizer.c +_PyParser_TokenNames + +# Python/Python-ast.c +alias_fields + +# Python/codecs.c +Py_hexdigits +ucnhash_CAPI # slight performance-only race + +# Python/dynload_shlib.c +_PyImport_DynLoadFiletab + +# Python/fileutils.c +_Py_open_cloexec_works +force_ascii + +# Python/frozen.c +M___hello__ +PyImport_FrozenModules + +# Python/graminit.c +_PyParser_Grammar +dfas +labels + +# Python/import.c +PyImport_Inittab + +# Python/pylifecycle.c +_TARGET_LOCALES + + +#--------------------------------- +# initialized (PyObject) + +# Objects/bytesobject.c +characters +nullstring + +# Objects/exceptions.c +PyExc_RecursionErrorInst +errnomap + +# Objects/longobject.c +_PyLong_One +_PyLong_Zero +small_ints + +# Objects/setobject.c +emptyfrozenset + +# Objects/unicodeobject.c +interned # slight race on init in PyUnicode_InternInPlace() +unicode_empty +unicode_latin1 + + +#--------------------------------- +# initialized (other) + +# Python/getargs.c +static_arg_parsers + +# Python/pyhash.c +PyHash_Func +_Py_HashSecret +_Py_HashSecret_Initialized + +# Python/pylifecycle.c +_Py_StandardStreamEncoding +_Py_StandardStreamErrors +default_home +env_home +progname +Py_BytesWarningFlag +Py_DebugFlag +Py_DontWriteBytecodeFlag +Py_FrozenFlag +Py_HashRandomizationFlag +Py_IgnoreEnvironmentFlag +Py_InspectFlag +Py_InteractiveFlag +Py_IsolatedFlag +Py_NoSiteFlag +Py_NoUserSiteDirectory +Py_OptimizeFlag +Py_QuietFlag +Py_UnbufferedStdioFlag +Py_UseClassExceptionsFlag +Py_VerboseFlag + + +#--------------------------------- +# types + +# Modules/_threadmodule.c +Locktype +RLocktype +localdummytype +localtype + +# Objects/exceptions.c +PyExc_BaseException +PyExc_Exception +PyExc_GeneratorExit +PyExc_KeyboardInterrupt +PyExc_StopAsyncIteration +PyExc_StopIteration +PyExc_SystemExit +_PyExc_BaseException +_PyExc_Exception +_PyExc_GeneratorExit +_PyExc_KeyboardInterrupt +_PyExc_StopAsyncIteration +_PyExc_StopIteration +_PyExc_SystemExit + +# Objects/structseq.c +_struct_sequence_template + + +#--------------------------------- +# interned strings/bytes + +# Modules/_io/_iomodule.c +_PyIO_empty_bytes +_PyIO_empty_str +_PyIO_str_close +_PyIO_str_closed +_PyIO_str_decode +_PyIO_str_encode +_PyIO_str_fileno +_PyIO_str_flush +_PyIO_str_getstate +_PyIO_str_isatty +_PyIO_str_newlines +_PyIO_str_nl +_PyIO_str_read +_PyIO_str_read1 +_PyIO_str_readable +_PyIO_str_readall +_PyIO_str_readinto +_PyIO_str_readline +_PyIO_str_reset +_PyIO_str_seek +_PyIO_str_seekable +_PyIO_str_setstate +_PyIO_str_tell +_PyIO_str_truncate +_PyIO_str_writable +_PyIO_str_write + +# Modules/_threadmodule.c +str_dict + +# Objects/boolobject.c +false_str +true_str + +# Objects/listobject.c +indexerr + +# Python/symtable.c +__class__ +dictcomp +genexpr +lambda +listcomp +setcomp +top + +# Python/sysmodule.c +whatstrings + + +####################################### +# hacks + +# Objects/object.c +_Py_abstract_hack + +# Objects/setobject.c +_PySet_Dummy + +# Python/pylifecycle.c +_PyOS_mystrnicmp_hack |