summaryrefslogtreecommitdiffstats
path: root/Tools/c-analyzer/c_analyzer/parser/declarations.py
diff options
context:
space:
mode:
authorEric Snow <ericsnowcurrently@gmail.com>2019-10-19 02:00:04 (GMT)
committerGitHub <noreply@github.com>2019-10-19 02:00:04 (GMT)
commite4c431ecf50def40eb93c3969c1e4eeaf7bf32f1 (patch)
tree071224bbded262901b9742eb82c5d82d2f744fe1 /Tools/c-analyzer/c_analyzer/parser/declarations.py
parentea55c51bd937f6019c35b39b87029644e469c059 (diff)
downloadcpython-e4c431ecf50def40eb93c3969c1e4eeaf7bf32f1.zip
cpython-e4c431ecf50def40eb93c3969c1e4eeaf7bf32f1.tar.gz
cpython-e4c431ecf50def40eb93c3969c1e4eeaf7bf32f1.tar.bz2
bpo-36876: Re-organize the c-analyzer tool code. (gh-16841)
This is partly a cleanup of the code. It also is preparation for getting the variables from the source (cross-platform) rather than from the symbols. The change only touches the tool (and its tests).
Diffstat (limited to 'Tools/c-analyzer/c_analyzer/parser/declarations.py')
-rw-r--r--Tools/c-analyzer/c_analyzer/parser/declarations.py339
1 files changed, 339 insertions, 0 deletions
diff --git a/Tools/c-analyzer/c_analyzer/parser/declarations.py b/Tools/c-analyzer/c_analyzer/parser/declarations.py
new file mode 100644
index 0000000..f37072c
--- /dev/null
+++ b/Tools/c-analyzer/c_analyzer/parser/declarations.py
@@ -0,0 +1,339 @@
+import re
+import shlex
+import subprocess
+
+from ..common.info import UNKNOWN
+
+from . import source
+
+
+IDENTIFIER = r'(?:[a-zA-z]|_+[a-zA-Z0-9]\w*)'
+
+TYPE_QUAL = r'(?:const|volatile)'
+
+VAR_TYPE_SPEC = r'''(?:
+ void |
+ (?:
+ (?:(?:un)?signed\s+)?
+ (?:
+ char |
+ short |
+ int |
+ long |
+ long\s+int |
+ long\s+long
+ ) |
+ ) |
+ float |
+ double |
+ {IDENTIFIER} |
+ (?:struct|union)\s+{IDENTIFIER}
+ )'''
+
+POINTER = rf'''(?:
+ (?:\s+const)?\s*[*]
+ )'''
+
+#STRUCT = r'''(?:
+# (?:struct|(struct\s+%s))\s*[{]
+# [^}]*
+# [}]
+# )''' % (IDENTIFIER)
+#UNION = r'''(?:
+# (?:union|(union\s+%s))\s*[{]
+# [^}]*
+# [}]
+# )''' % (IDENTIFIER)
+#DECL_SPEC = rf'''(?:
+# ({VAR_TYPE_SPEC}) |
+# ({STRUCT}) |
+# ({UNION})
+# )'''
+
+FUNC_START = rf'''(?:
+ (?:
+ (?:
+ extern |
+ static |
+ static\s+inline
+ )\s+
+ )?
+ #(?:const\s+)?
+ {VAR_TYPE_SPEC}
+ )'''
+#GLOBAL_VAR_START = rf'''(?:
+# (?:
+# (?:
+# extern |
+# static
+# )\s+
+# )?
+# (?:
+# {TYPE_QUAL}
+# (?:\s+{TYPE_QUAL})?
+# )?\s+
+# {VAR_TYPE_SPEC}
+# )'''
+GLOBAL_DECL_START_RE = re.compile(rf'''
+ ^
+ (?:
+ ({FUNC_START})
+ )
+ ''', re.VERBOSE)
+
+LOCAL_VAR_START = rf'''(?:
+ (?:
+ (?:
+ register |
+ static
+ )\s+
+ )?
+ (?:
+ (?:
+ {TYPE_QUAL}
+ (?:\s+{TYPE_QUAL})?
+ )\s+
+ )?
+ {VAR_TYPE_SPEC}
+ {POINTER}?
+ )'''
+LOCAL_STMT_START_RE = re.compile(rf'''
+ ^
+ (?:
+ ({LOCAL_VAR_START})
+ )
+ ''', re.VERBOSE)
+
+
+def iter_global_declarations(lines):
+ """Yield (decl, body) for each global declaration in the given lines.
+
+ For function definitions the header is reduced to one line and
+ the body is provided as-is. For other compound declarations (e.g.
+ struct) the entire declaration is reduced to one line and "body"
+ is None. Likewise for simple declarations (e.g. variables).
+
+ Declarations inside function bodies are ignored, though their text
+ is provided in the function body.
+ """
+ # XXX Bail out upon bogus syntax.
+ lines = source.iter_clean_lines(lines)
+ for line in lines:
+ if not GLOBAL_DECL_START_RE.match(line):
+ continue
+ # We only need functions here, since we only need locals for now.
+ if line.endswith(';'):
+ continue
+ if line.endswith('{') and '(' not in line:
+ continue
+
+ # Capture the function.
+ # (assume no func is a one-liner)
+ decl = line
+ while '{' not in line: # assume no inline structs, etc.
+ try:
+ line = next(lines)
+ except StopIteration:
+ return
+ decl += ' ' + line
+
+ body, end = _extract_block(lines)
+ if end is None:
+ return
+ assert end == '}'
+ yield (f'{decl}\n{body}\n{end}', body)
+
+
+def iter_local_statements(lines):
+ """Yield (lines, blocks) for each statement in the given lines.
+
+ For simple statements, "blocks" is None and the statement is reduced
+ to a single line. For compound statements, "blocks" is a pair of
+ (header, body) for each block in the statement. The headers are
+ reduced to a single line each, but the bpdies are provided as-is.
+ """
+ # XXX Bail out upon bogus syntax.
+ lines = source.iter_clean_lines(lines)
+ for line in lines:
+ if not LOCAL_STMT_START_RE.match(line):
+ continue
+
+ stmt = line
+ blocks = None
+ if not line.endswith(';'):
+ # XXX Support compound & multiline simple statements.
+ #blocks = []
+ continue
+
+ yield (stmt, blocks)
+
+
+def _extract_block(lines):
+ end = None
+ depth = 1
+ body = []
+ for line in lines:
+ depth += line.count('{') - line.count('}')
+ if depth == 0:
+ end = line
+ break
+ body.append(line)
+ return '\n'.join(body), end
+
+
+def parse_func(stmt, body):
+ """Return (name, signature) for the given function definition."""
+ header, _, end = stmt.partition(body)
+ assert end.strip() == '}'
+ assert header.strip().endswith('{')
+ header, _, _= header.rpartition('{')
+
+ signature = ' '.join(header.strip().splitlines())
+
+ _, _, name = signature.split('(')[0].strip().rpartition(' ')
+ assert name
+
+ return name, signature
+
+
+#TYPE_SPEC = rf'''(?:
+# )'''
+#VAR_DECLARATOR = rf'''(?:
+# )'''
+#VAR_DECL = rf'''(?:
+# {TYPE_SPEC}+
+# {VAR_DECLARATOR}
+# \s*
+# )'''
+#VAR_DECLARATION = rf'''(?:
+# {VAR_DECL}
+# (?: = [^=] [^;]* )?
+# ;
+# )'''
+#
+#
+#def parse_variable(decl, *, inFunc=False):
+# """Return [(name, storage, vartype)] for the given variable declaration."""
+# ...
+
+
+def _parse_var(stmt):
+ """Return (name, vartype) for the given variable declaration."""
+ stmt = stmt.rstrip(';')
+ m = LOCAL_STMT_START_RE.match(stmt)
+ assert m
+ vartype = m.group(0)
+ name = stmt[len(vartype):].partition('=')[0].strip()
+
+ if name.startswith('('):
+ name, _, after = name[1:].partition(')')
+ assert after
+ name = name.replace('*', '* ')
+ inside, _, name = name.strip().rpartition(' ')
+ vartype = f'{vartype} ({inside.strip()}){after}'
+ else:
+ name = name.replace('*', '* ')
+ before, _, name = name.rpartition(' ')
+ vartype = f'{vartype} {before}'
+
+ vartype = vartype.strip()
+ while ' ' in vartype:
+ vartype = vartype.replace(' ', ' ')
+
+ return name, vartype
+
+
+def extract_storage(decl, *, infunc=None):
+ """Return (storage, vartype) based on the given declaration.
+
+ The default storage is "implicit" (or "local" if infunc is True).
+ """
+ if decl == UNKNOWN:
+ return decl
+ if decl.startswith('static '):
+ return 'static'
+ #return 'static', decl.partition(' ')[2].strip()
+ elif decl.startswith('extern '):
+ return 'extern'
+ #return 'extern', decl.partition(' ')[2].strip()
+ elif re.match('.*\b(static|extern)\b', decl):
+ raise NotImplementedError
+ elif infunc:
+ return 'local'
+ else:
+ return 'implicit'
+
+
+def parse_compound(stmt, blocks):
+ """Return (headers, bodies) for the given compound statement."""
+ # XXX Identify declarations inside compound statements
+ # (if/switch/for/while).
+ raise NotImplementedError
+
+
+def iter_variables(filename, *,
+ preprocessed=False,
+ _iter_source_lines=source.iter_lines,
+ _iter_global=iter_global_declarations,
+ _iter_local=iter_local_statements,
+ _parse_func=parse_func,
+ _parse_var=_parse_var,
+ _parse_compound=parse_compound,
+ ):
+ """Yield (funcname, name, vartype) for every variable in the given file."""
+ if preprocessed:
+ raise NotImplementedError
+ lines = _iter_source_lines(filename)
+ for stmt, body in _iter_global(lines):
+ # At the file top-level we only have to worry about vars & funcs.
+ if not body:
+ name, vartype = _parse_var(stmt)
+ if name:
+ yield (None, name, vartype)
+ else:
+ funcname, _ = _parse_func(stmt, body)
+ localvars = _iter_locals(body,
+ _iter_statements=_iter_local,
+ _parse_var=_parse_var,
+ _parse_compound=_parse_compound,
+ )
+ for name, vartype in localvars:
+ yield (funcname, name, vartype)
+
+
+def _iter_locals(lines, *,
+ _iter_statements=iter_local_statements,
+ _parse_var=_parse_var,
+ _parse_compound=parse_compound,
+ ):
+ compound = [lines]
+ while compound:
+ body = compound.pop(0)
+ bodylines = body.splitlines()
+ for stmt, blocks in _iter_statements(bodylines):
+ if not blocks:
+ name, vartype = _parse_var(stmt)
+ if name:
+ yield (name, vartype)
+ else:
+ headers, bodies = _parse_compound(stmt, blocks)
+ for header in headers:
+ for line in header:
+ name, vartype = _parse_var(line)
+ if name:
+ yield (name, vartype)
+ compound.extend(bodies)
+
+
+def iter_all(filename, *,
+ preprocessed=False,
+ ):
+ """Yield a Declaration for each one found.
+
+ If there are duplicates, due to preprocessor conditionals, then
+ they are checked to make sure they are the same.
+ """
+ # XXX For the moment we cheat.
+ for funcname, name, decl in iter_variables(filename,
+ preprocessed=preprocessed):
+ yield 'variable', funcname, name, decl