diff options
author | Eric Snow <ericsnowcurrently@gmail.com> | 2019-10-19 02:00:04 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-10-19 02:00:04 (GMT) |
commit | e4c431ecf50def40eb93c3969c1e4eeaf7bf32f1 (patch) | |
tree | 071224bbded262901b9742eb82c5d82d2f744fe1 /Tools/c-analyzer/cpython/supported.py | |
parent | ea55c51bd937f6019c35b39b87029644e469c059 (diff) | |
download | cpython-e4c431ecf50def40eb93c3969c1e4eeaf7bf32f1.zip cpython-e4c431ecf50def40eb93c3969c1e4eeaf7bf32f1.tar.gz cpython-e4c431ecf50def40eb93c3969c1e4eeaf7bf32f1.tar.bz2 |
bpo-36876: Re-organize the c-analyzer tool code. (gh-16841)
This is partly a cleanup of the code. It also is preparation for getting the variables from the source (cross-platform) rather than from the symbols.
The change only touches the tool (and its tests).
Diffstat (limited to 'Tools/c-analyzer/cpython/supported.py')
-rw-r--r-- | Tools/c-analyzer/cpython/supported.py | 398 |
1 files changed, 398 insertions, 0 deletions
diff --git a/Tools/c-analyzer/cpython/supported.py b/Tools/c-analyzer/cpython/supported.py new file mode 100644 index 0000000..18786ee --- /dev/null +++ b/Tools/c-analyzer/cpython/supported.py @@ -0,0 +1,398 @@ +import os.path +import re + +from c_analyzer.common.info import ID +from c_analyzer.common.util import read_tsv, write_tsv + +from . import DATA_DIR + +# XXX need tests: +# * generate / script + + +IGNORED_FILE = os.path.join(DATA_DIR, 'ignored.tsv') + +IGNORED_COLUMNS = ('filename', 'funcname', 'name', 'kind', 'reason') +IGNORED_HEADER = '\t'.join(IGNORED_COLUMNS) + +# XXX Move these to ignored.tsv. +IGNORED = { + # global + 'PyImport_FrozenModules': 'process-global', + 'M___hello__': 'process-global', + 'inittab_copy': 'process-global', + 'PyHash_Func': 'process-global', + '_Py_HashSecret_Initialized': 'process-global', + '_TARGET_LOCALES': 'process-global', + + # startup (only changed before/during) + '_PyRuntime': 'runtime startup', + 'runtime_initialized': 'runtime startup', + 'static_arg_parsers': 'runtime startup', + 'orig_argv': 'runtime startup', + 'opt_ptr': 'runtime startup', + '_preinit_warnoptions': 'runtime startup', + '_Py_StandardStreamEncoding': 'runtime startup', + 'Py_FileSystemDefaultEncoding': 'runtime startup', + '_Py_StandardStreamErrors': 'runtime startup', + 'Py_FileSystemDefaultEncodeErrors': 'runtime startup', + 'Py_BytesWarningFlag': 'runtime startup', + 'Py_DebugFlag': 'runtime startup', + 'Py_DontWriteBytecodeFlag': 'runtime startup', + 'Py_FrozenFlag': 'runtime startup', + 'Py_HashRandomizationFlag': 'runtime startup', + 'Py_IgnoreEnvironmentFlag': 'runtime startup', + 'Py_InspectFlag': 'runtime startup', + 'Py_InteractiveFlag': 'runtime startup', + 'Py_IsolatedFlag': 'runtime startup', + 'Py_NoSiteFlag': 'runtime startup', + 'Py_NoUserSiteDirectory': 'runtime startup', + 'Py_OptimizeFlag': 'runtime startup', + 'Py_QuietFlag': 'runtime startup', + 'Py_UTF8Mode': 'runtime startup', + 'Py_UnbufferedStdioFlag': 'runtime startup', + 'Py_VerboseFlag': 'runtime startup', + '_Py_path_config': 'runtime startup', + '_PyOS_optarg': 'runtime startup', + '_PyOS_opterr': 'runtime startup', + '_PyOS_optind': 'runtime startup', + '_Py_HashSecret': 'runtime startup', + + # REPL + '_PyOS_ReadlineLock': 'repl', + '_PyOS_ReadlineTState': 'repl', + + # effectively const + 'tracemalloc_empty_traceback': 'const', + '_empty_bitmap_node': 'const', + 'posix_constants_pathconf': 'const', + 'posix_constants_confstr': 'const', + 'posix_constants_sysconf': 'const', + '_PySys_ImplCacheTag': 'const', + '_PySys_ImplName': 'const', + 'PyImport_Inittab': 'const', + '_PyImport_DynLoadFiletab': 'const', + '_PyParser_Grammar': 'const', + 'Py_hexdigits': 'const', + '_PyImport_Inittab': 'const', + '_PyByteArray_empty_string': 'const', + '_PyLong_DigitValue': 'const', + '_Py_SwappedOp': 'const', + 'PyStructSequence_UnnamedField': 'const', + + # signals are main-thread only + 'faulthandler_handlers': 'signals are main-thread only', + 'user_signals': 'signals are main-thread only', + 'wakeup': 'signals are main-thread only', + + # hacks + '_PySet_Dummy': 'only used as a placeholder', + } + +BENIGN = 'races here are benign and unlikely' + + +def is_supported(variable, ignored=None, known=None, *, + _ignored=(lambda *a, **k: _is_ignored(*a, **k)), + _vartype_okay=(lambda *a, **k: _is_vartype_okay(*a, **k)), + ): + """Return True if the given global variable is okay in CPython.""" + if _ignored(variable, + ignored and ignored.get('variables')): + return True + elif _vartype_okay(variable.vartype, + ignored.get('types')): + return True + else: + return False + + +def _is_ignored(variable, ignoredvars=None, *, + _IGNORED=IGNORED, + ): + """Return the reason if the variable is a supported global. + + Return None if the variable is not a supported global. + """ + if ignoredvars and (reason := ignoredvars.get(variable.id)): + return reason + + if variable.funcname is None: + if reason := _IGNORED.get(variable.name): + return reason + + # compiler + if variable.filename == 'Python/graminit.c': + if variable.vartype.startswith('static state '): + return 'compiler' + if variable.filename == 'Python/symtable.c': + if variable.vartype.startswith('static identifier '): + return 'compiler' + if variable.filename == 'Python/Python-ast.c': + # These should be const. + if variable.name.endswith('_field'): + return 'compiler' + if variable.name.endswith('_attribute'): + return 'compiler' + + # other + if variable.filename == 'Python/dtoa.c': + # guarded by lock? + if variable.name in ('p5s', 'freelist'): + return 'dtoa is thread-safe?' + if variable.name in ('private_mem', 'pmem_next'): + return 'dtoa is thread-safe?' + if variable.filename == 'Python/thread.c': + # Threads do not become an issue until after these have been set + # and these never get changed after that. + if variable.name in ('initialized', 'thread_debug'): + return 'thread-safe' + if variable.filename == 'Python/getversion.c': + if variable.name == 'version': + # Races are benign here, as well as unlikely. + return BENIGN + if variable.filename == 'Python/fileutils.c': + if variable.name == 'force_ascii': + return BENIGN + if variable.name == 'ioctl_works': + return BENIGN + if variable.name == '_Py_open_cloexec_works': + return BENIGN + if variable.filename == 'Python/codecs.c': + if variable.name == 'ucnhash_CAPI': + return BENIGN + if variable.filename == 'Python/bootstrap_hash.c': + if variable.name == 'getrandom_works': + return BENIGN + if variable.filename == 'Objects/unicodeobject.c': + if variable.name == 'ucnhash_CAPI': + return BENIGN + if variable.name == 'bloom_linebreak': + # *mostly* benign + return BENIGN + if variable.filename == 'Modules/getbuildinfo.c': + if variable.name == 'buildinfo': + # The static is used for pre-allocation. + return BENIGN + if variable.filename == 'Modules/posixmodule.c': + if variable.name == 'ticks_per_second': + return BENIGN + if variable.name == 'dup3_works': + return BENIGN + if variable.filename == 'Modules/timemodule.c': + if variable.name == 'ticks_per_second': + return BENIGN + if variable.filename == 'Objects/longobject.c': + if variable.name == 'log_base_BASE': + return BENIGN + if variable.name == 'convwidth_base': + return BENIGN + if variable.name == 'convmultmax_base': + return BENIGN + + return None + + +def _is_vartype_okay(vartype, ignoredtypes=None): + if _is_object(vartype): + return None + + if vartype.startswith('static const '): + return 'const' + if vartype.startswith('const '): + return 'const' + + # components for TypeObject definitions + for name in ('PyMethodDef', 'PyGetSetDef', 'PyMemberDef'): + if name in vartype: + return 'const' + for name in ('PyNumberMethods', 'PySequenceMethods', 'PyMappingMethods', + 'PyBufferProcs', 'PyAsyncMethods'): + if name in vartype: + return 'const' + for name in ('slotdef', 'newfunc'): + if name in vartype: + return 'const' + + # structseq + for name in ('PyStructSequence_Desc', 'PyStructSequence_Field'): + if name in vartype: + return 'const' + + # other definiitions + if 'PyModuleDef' in vartype: + return 'const' + + # thread-safe + if '_Py_atomic_int' in vartype: + return 'thread-safe' + if 'pthread_condattr_t' in vartype: + return 'thread-safe' + + # startup + if '_Py_PreInitEntry' in vartype: + return 'startup' + + # global +# if 'PyMemAllocatorEx' in vartype: +# return True + + # others +# if 'PyThread_type_lock' in vartype: +# return True + + # XXX ??? + # _Py_tss_t + # _Py_hashtable_t + # stack_t + # _PyUnicode_Name_CAPI + + # functions + if '(' in vartype and '[' not in vartype: + return 'function pointer' + + # XXX finish! + # * allow const values? + #raise NotImplementedError + return None + + +PYOBJECT_RE = re.compile(r''' + ^ + ( + # must start with "static " + static \s+ + ( + identifier + ) + \b + ) | + ( + # may start with "static " + ( static \s+ )? + ( + .* + ( + PyObject | + PyTypeObject | + _? Py \w+ Object | + _PyArg_Parser | + _Py_Identifier | + traceback_t | + PyAsyncGenASend | + _PyAsyncGenWrappedValue | + PyContext | + method_cache_entry + ) + \b + ) | + ( + ( + _Py_IDENTIFIER | + _Py_static_string + ) + [(] + ) + ) + ''', re.VERBOSE) + + +def _is_object(vartype): + if 'PyDictKeysObject' in vartype: + return False + if PYOBJECT_RE.match(vartype): + return True + if vartype.endswith((' _Py_FalseStruct', ' _Py_TrueStruct')): + return True + + # XXX Add more? + + #for part in vartype.split(): + # # XXX const is automatic True? + # if part == 'PyObject' or part.startswith('PyObject['): + # return True + return False + + +def ignored_from_file(infile, *, + _read_tsv=read_tsv, + ): + """Yield a Variable for each ignored var in the file.""" + ignored = { + 'variables': {}, + #'types': {}, + #'constants': {}, + #'macros': {}, + } + for row in _read_tsv(infile, IGNORED_HEADER): + filename, funcname, name, kind, reason = row + if not funcname or funcname == '-': + funcname = None + id = ID(filename, funcname, name) + if kind == 'variable': + values = ignored['variables'] + else: + raise ValueError(f'unsupported kind in row {row}') + values[id] = reason + return ignored + + +################################## +# generate + +def _get_row(varid, reason): + return ( + varid.filename, + varid.funcname or '-', + varid.name, + 'variable', + str(reason), + ) + + +def _get_rows(variables, ignored=None, *, + _as_row=_get_row, + _is_ignored=_is_ignored, + _vartype_okay=_is_vartype_okay, + ): + count = 0 + for variable in variables: + reason = _is_ignored(variable, + ignored and ignored.get('variables'), + ) + if not reason: + reason = _vartype_okay(variable.vartype, + ignored and ignored.get('types')) + if not reason: + continue + + print(' ', variable, repr(reason)) + yield _as_row(variable.id, reason) + count += 1 + print(f'total: {count}') + + +def _generate_ignored_file(variables, filename=None, *, + _generate_rows=_get_rows, + _write_tsv=write_tsv, + ): + if not filename: + filename = IGNORED_FILE + '.new' + rows = _generate_rows(variables) + _write_tsv(filename, IGNORED_HEADER, rows) + + +if __name__ == '__main__': + from cpython import SOURCE_DIRS + from cpython.known import ( + from_file as known_from_file, + DATA_FILE as KNOWN_FILE, + ) + # XXX This is wrong! + from . import find + known = known_from_file(KNOWN_FILE) + knownvars = (known or {}).get('variables') + variables = find.globals_from_binary(knownvars=knownvars, + dirnames=SOURCE_DIRS) + + _generate_ignored_file(variables) |