diff options
Diffstat (limited to 'Tools/c-analyzer/cpython/_generate.py')
-rw-r--r-- | Tools/c-analyzer/cpython/_generate.py | 329 |
1 files changed, 0 insertions, 329 deletions
diff --git a/Tools/c-analyzer/cpython/_generate.py b/Tools/c-analyzer/cpython/_generate.py deleted file mode 100644 index 4c340ac..0000000 --- a/Tools/c-analyzer/cpython/_generate.py +++ /dev/null @@ -1,329 +0,0 @@ -# The code here consists of hacks for pre-populating the known.tsv file. - -from c_analyzer.parser.preprocessor import _iter_clean_lines -from c_analyzer.parser.naive import ( - iter_variables, parse_variable_declaration, find_variables, - ) -from c_analyzer.common.known import HEADER as KNOWN_HEADER -from c_analyzer.common.info import UNKNOWN, ID -from c_analyzer.variables import Variable -from c_analyzer.util import write_tsv - -from . import SOURCE_DIRS, REPO_ROOT -from .known import DATA_FILE as KNOWN_FILE -from .files import iter_cpython_files - - -POTS = ('char ', 'wchar_t ', 'int ', 'Py_ssize_t ') -POTS += tuple('const ' + v for v in POTS) -STRUCTS = ('PyTypeObject', 'PyObject', 'PyMethodDef', 'PyModuleDef', 'grammar') - - -def _parse_global(line, funcname=None): - line = line.strip() - if line.startswith('static '): - if '(' in line and '[' not in line and ' = ' not in line: - return None, None - name, decl = parse_variable_declaration(line) - elif line.startswith(('Py_LOCAL(', 'Py_LOCAL_INLINE(')): - name, decl = parse_variable_declaration(line) - elif line.startswith('_Py_static_string('): - decl = line.strip(';').strip() - name = line.split('(')[1].split(',')[0].strip() - elif line.startswith('_Py_IDENTIFIER('): - decl = line.strip(';').strip() - name = 'PyId_' + line.split('(')[1].split(')')[0].strip() - elif funcname: - return None, None - - # global-only - elif line.startswith('PyAPI_DATA('): # only in .h files - name, decl = parse_variable_declaration(line) - elif line.startswith('extern '): # only in .h files - name, decl = parse_variable_declaration(line) - elif line.startswith('PyDoc_VAR('): - decl = line.strip(';').strip() - name = line.split('(')[1].split(')')[0].strip() - elif line.startswith(POTS): # implied static - if '(' in line and '[' not in line and ' = ' not in line: - return None, None - name, decl = parse_variable_declaration(line) - elif line.startswith(STRUCTS) and line.endswith(' = {'): # implied static - name, decl = parse_variable_declaration(line) - elif line.startswith(STRUCTS) and line.endswith(' = NULL;'): # implied static - name, decl = parse_variable_declaration(line) - elif line.startswith('struct '): - if not line.endswith(' = {'): - return None, None - if not line.partition(' ')[2].startswith(STRUCTS): - return None, None - # implied static - name, decl = parse_variable_declaration(line) - - # file-specific - elif line.startswith(('SLOT1BINFULL(', 'SLOT1BIN(')): - # Objects/typeobject.c - funcname = line.split('(')[1].split(',')[0] - return [ - ('op_id', funcname, '_Py_static_string(op_id, OPSTR)'), - ('rop_id', funcname, '_Py_static_string(op_id, OPSTR)'), - ] - elif line.startswith('WRAP_METHOD('): - # Objects/weakrefobject.c - funcname, name = (v.strip() for v in line.split('(')[1].split(')')[0].split(',')) - return [ - ('PyId_' + name, funcname, f'_Py_IDENTIFIER({name})'), - ] - - else: - return None, None - return name, decl - - -def _pop_cached(varcache, filename, funcname, name, *, - _iter_variables=iter_variables, - ): - # Look for the file. - try: - cached = varcache[filename] - except KeyError: - cached = varcache[filename] = {} - for variable in _iter_variables(filename, - parse_variable=_parse_global, - ): - variable._isglobal = True - cached[variable.id] = variable - for var in cached: - print(' ', var) - - # Look for the variable. - if funcname == UNKNOWN: - for varid in cached: - if varid.name == name: - break - else: - return None - return cached.pop(varid) - else: - return cached.pop((filename, funcname, name), None) - - -def find_matching_variable(varid, varcache, allfilenames, *, - _pop_cached=_pop_cached, - ): - if varid.filename and varid.filename != UNKNOWN: - filenames = [varid.filename] - else: - filenames = allfilenames - for filename in filenames: - variable = _pop_cached(varcache, filename, varid.funcname, varid.name) - if variable is not None: - return variable - else: - if varid.filename and varid.filename != UNKNOWN and varid.funcname is None: - for filename in allfilenames: - if not filename.endswith('.h'): - continue - variable = _pop_cached(varcache, filename, None, varid.name) - if variable is not None: - return variable - return None - - -MULTILINE = { - # Python/Python-ast.c - 'Load_singleton': 'PyObject *', - 'Store_singleton': 'PyObject *', - 'Del_singleton': 'PyObject *', - 'AugLoad_singleton': 'PyObject *', - 'AugStore_singleton': 'PyObject *', - 'Param_singleton': 'PyObject *', - 'And_singleton': 'PyObject *', - 'Or_singleton': 'PyObject *', - 'Add_singleton': 'static PyObject *', - 'Sub_singleton': 'static PyObject *', - 'Mult_singleton': 'static PyObject *', - 'MatMult_singleton': 'static PyObject *', - 'Div_singleton': 'static PyObject *', - 'Mod_singleton': 'static PyObject *', - 'Pow_singleton': 'static PyObject *', - 'LShift_singleton': 'static PyObject *', - 'RShift_singleton': 'static PyObject *', - 'BitOr_singleton': 'static PyObject *', - 'BitXor_singleton': 'static PyObject *', - 'BitAnd_singleton': 'static PyObject *', - 'FloorDiv_singleton': 'static PyObject *', - 'Invert_singleton': 'static PyObject *', - 'Not_singleton': 'static PyObject *', - 'UAdd_singleton': 'static PyObject *', - 'USub_singleton': 'static PyObject *', - 'Eq_singleton': 'static PyObject *', - 'NotEq_singleton': 'static PyObject *', - 'Lt_singleton': 'static PyObject *', - 'LtE_singleton': 'static PyObject *', - 'Gt_singleton': 'static PyObject *', - 'GtE_singleton': 'static PyObject *', - 'Is_singleton': 'static PyObject *', - 'IsNot_singleton': 'static PyObject *', - 'In_singleton': 'static PyObject *', - 'NotIn_singleton': 'static PyObject *', - # Python/symtable.c - 'top': 'static identifier ', - 'lambda': 'static identifier ', - 'genexpr': 'static identifier ', - 'listcomp': 'static identifier ', - 'setcomp': 'static identifier ', - 'dictcomp': 'static identifier ', - '__class__': 'static identifier ', - # Python/compile.c - '__doc__': 'static PyObject *', - '__annotations__': 'static PyObject *', - # Objects/floatobject.c - 'double_format': 'static float_format_type ', - 'float_format': 'static float_format_type ', - 'detected_double_format': 'static float_format_type ', - 'detected_float_format': 'static float_format_type ', - # Parser/listnode.c - 'level': 'static int ', - 'atbol': 'static int ', - # Python/dtoa.c - 'private_mem': 'static double private_mem[PRIVATE_mem]', - 'pmem_next': 'static double *', - # Modules/_weakref.c - 'weakref_functions': 'static PyMethodDef ', -} -INLINE = { - # Modules/_tracemalloc.c - 'allocators': 'static struct { PyMemAllocatorEx mem; PyMemAllocatorEx raw; PyMemAllocatorEx obj; } ', - # Modules/faulthandler.c - 'fatal_error': 'static struct { int enabled; PyObject *file; int fd; int all_threads; PyInterpreterState *interp; void *exc_handler; } ', - 'thread': 'static struct { PyObject *file; int fd; PY_TIMEOUT_T timeout_us; int repeat; PyInterpreterState *interp; int exit; char *header; size_t header_len; PyThread_type_lock cancel_event; PyThread_type_lock running; } ', - # Modules/signalmodule.c - 'Handlers': 'static volatile struct { _Py_atomic_int tripped; PyObject *func; } Handlers[NSIG]', - 'wakeup': 'static volatile struct { SOCKET_T fd; int warn_on_full_buffer; int use_send; } ', - # Python/dynload_shlib.c - 'handles': 'static struct { dev_t dev; ino_t ino; void *handle; } handles[128]', - # Objects/obmalloc.c - '_PyMem_Debug': 'static struct { debug_alloc_api_t raw; debug_alloc_api_t mem; debug_alloc_api_t obj; } ', - # Python/bootstrap_hash.c - 'urandom_cache': 'static struct { int fd; dev_t st_dev; ino_t st_ino; } ', - } -FUNC = { - # Objects/object.c - '_Py_abstract_hack': 'Py_ssize_t (*_Py_abstract_hack)(PyObject *)', - # Parser/myreadline.c - 'PyOS_InputHook': 'int (*PyOS_InputHook)(void)', - # Python/pylifecycle.c - '_PyOS_mystrnicmp_hack': 'int (*_PyOS_mystrnicmp_hack)(const char *, const char *, Py_ssize_t)', - # Parser/myreadline.c - 'PyOS_ReadlineFunctionPointer': 'char *(*PyOS_ReadlineFunctionPointer)(FILE *, FILE *, const char *)', - } -IMPLIED = { - # Objects/boolobject.c - '_Py_FalseStruct': 'static struct _longobject ', - '_Py_TrueStruct': 'static struct _longobject ', - # Modules/config.c - '_PyImport_Inittab': 'struct _inittab _PyImport_Inittab[]', - } -GLOBALS = {} -GLOBALS.update(MULTILINE) -GLOBALS.update(INLINE) -GLOBALS.update(FUNC) -GLOBALS.update(IMPLIED) - -LOCALS = { - 'buildinfo': ('Modules/getbuildinfo.c', - 'Py_GetBuildInfo', - 'static char buildinfo[50 + sizeof(GITVERSION) + ((sizeof(GITTAG) > sizeof(GITBRANCH)) ? sizeof(GITTAG) : sizeof(GITBRANCH))]'), - 'methods': ('Python/codecs.c', - '_PyCodecRegistry_Init', - 'static struct { char *name; PyMethodDef def; } methods[]'), - } - - -def _known(symbol): - if symbol.funcname: - if symbol.funcname != UNKNOWN or symbol.filename != UNKNOWN: - raise KeyError(symbol.name) - filename, funcname, decl = LOCALS[symbol.name] - varid = ID(filename, funcname, symbol.name) - elif not symbol.filename or symbol.filename == UNKNOWN: - raise KeyError(symbol.name) - else: - varid = symbol.id - try: - decl = GLOBALS[symbol.name] - except KeyError: - - if symbol.name.endswith('_methods'): - decl = 'static PyMethodDef ' - elif symbol.filename == 'Objects/exceptions.c' and symbol.name.startswith(('PyExc_', '_PyExc_')): - decl = 'static PyTypeObject ' - else: - raise - if symbol.name not in decl: - decl = decl + symbol.name - return Variable(varid, 'static', decl) - - -def known_row(varid, decl): - return ( - varid.filename, - varid.funcname or '-', - varid.name, - 'variable', - decl, - ) - - -def known_rows(symbols, *, - cached=True, - _get_filenames=iter_cpython_files, - _find_match=find_matching_variable, - _find_symbols=find_variables, - _as_known=known_row, - ): - filenames = list(_get_filenames()) - cache = {} - if cached: - for symbol in symbols: - try: - found = _known(symbol) - except KeyError: - found = _find_match(symbol, cache, filenames) - if found is None: - found = Variable(symbol.id, UNKNOWN, UNKNOWN) - yield _as_known(found.id, found.vartype) - else: - raise NotImplementedError # XXX incorporate KNOWN - for variable in _find_symbols(symbols, filenames, - srccache=cache, - parse_variable=_parse_global, - ): - #variable = variable._replace( - # filename=os.path.relpath(variable.filename, REPO_ROOT)) - if variable.funcname == UNKNOWN: - print(variable) - if variable.vartype== UNKNOWN: - print(variable) - yield _as_known(variable.id, variable.vartype) - - -def generate(symbols, filename=None, *, - _generate_rows=known_rows, - _write_tsv=write_tsv, - ): - if not filename: - filename = KNOWN_FILE + '.new' - - rows = _generate_rows(symbols) - _write_tsv(filename, KNOWN_HEADER, rows) - - -if __name__ == '__main__': - from c_symbols import binary - symbols = binary.iter_symbols( - binary.PYTHON, - find_local_symbol=None, - ) - generate(symbols) |