summaryrefslogtreecommitdiffstats
path: root/Tools/c-analyzer/c_parser
diff options
context:
space:
mode:
Diffstat (limited to 'Tools/c-analyzer/c_parser')
-rw-r--r--Tools/c-analyzer/c_parser/__init__.py0
-rw-r--r--Tools/c-analyzer/c_parser/declarations.py295
-rw-r--r--Tools/c-analyzer/c_parser/info.py78
-rw-r--r--Tools/c-analyzer/c_parser/naive.py180
-rw-r--r--Tools/c-analyzer/c_parser/preprocessor.py512
-rw-r--r--Tools/c-analyzer/c_parser/source.py34
6 files changed, 1099 insertions, 0 deletions
diff --git a/Tools/c-analyzer/c_parser/__init__.py b/Tools/c-analyzer/c_parser/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/Tools/c-analyzer/c_parser/__init__.py
diff --git a/Tools/c-analyzer/c_parser/declarations.py b/Tools/c-analyzer/c_parser/declarations.py
new file mode 100644
index 0000000..19fa3ff
--- /dev/null
+++ b/Tools/c-analyzer/c_parser/declarations.py
@@ -0,0 +1,295 @@
+import re
+import shlex
+import subprocess
+
+from . import source
+
+
+IDENTIFIER = r'(?:[a-zA-z]|_+[a-zA-Z0-9]\w*)'
+
+TYPE_QUAL = r'(?:const|volatile)'
+
+VAR_TYPE_SPEC = r'''(?:
+ void |
+ (?:
+ (?:(?:un)?signed\s+)?
+ (?:
+ char |
+ short |
+ int |
+ long |
+ long\s+int |
+ long\s+long
+ ) |
+ ) |
+ float |
+ double |
+ {IDENTIFIER} |
+ (?:struct|union)\s+{IDENTIFIER}
+ )'''
+
+POINTER = rf'''(?:
+ (?:\s+const)?\s*[*]
+ )'''
+
+#STRUCT = r'''(?:
+# (?:struct|(struct\s+%s))\s*[{]
+# [^}]*
+# [}]
+# )''' % (IDENTIFIER)
+#UNION = r'''(?:
+# (?:union|(union\s+%s))\s*[{]
+# [^}]*
+# [}]
+# )''' % (IDENTIFIER)
+#DECL_SPEC = rf'''(?:
+# ({VAR_TYPE_SPEC}) |
+# ({STRUCT}) |
+# ({UNION})
+# )'''
+
+FUNC_START = rf'''(?:
+ (?:
+ (?:
+ extern |
+ static |
+ static\s+inline
+ )\s+
+ )?
+ #(?:const\s+)?
+ {VAR_TYPE_SPEC}
+ )'''
+#GLOBAL_VAR_START = rf'''(?:
+# (?:
+# (?:
+# extern |
+# static
+# )\s+
+# )?
+# (?:
+# {TYPE_QUAL}
+# (?:\s+{TYPE_QUAL})?
+# )?\s+
+# {VAR_TYPE_SPEC}
+# )'''
+GLOBAL_DECL_START_RE = re.compile(rf'''
+ ^
+ (?:
+ ({FUNC_START})
+ )
+ ''', re.VERBOSE)
+
+LOCAL_VAR_START = rf'''(?:
+ (?:
+ (?:
+ register |
+ static
+ )\s+
+ )?
+ (?:
+ (?:
+ {TYPE_QUAL}
+ (?:\s+{TYPE_QUAL})?
+ )\s+
+ )?
+ {VAR_TYPE_SPEC}
+ {POINTER}?
+ )'''
+LOCAL_STMT_START_RE = re.compile(rf'''
+ ^
+ (?:
+ ({LOCAL_VAR_START})
+ )
+ ''', re.VERBOSE)
+
+
+def iter_global_declarations(lines):
+ """Yield (decl, body) for each global declaration in the given lines.
+
+ For function definitions the header is reduced to one line and
+ the body is provided as-is. For other compound declarations (e.g.
+ struct) the entire declaration is reduced to one line and "body"
+ is None. Likewise for simple declarations (e.g. variables).
+
+ Declarations inside function bodies are ignored, though their text
+ is provided in the function body.
+ """
+ # XXX Bail out upon bogus syntax.
+ lines = source.iter_clean_lines(lines)
+ for line in lines:
+ if not GLOBAL_DECL_START_RE.match(line):
+ continue
+ # We only need functions here, since we only need locals for now.
+ if line.endswith(';'):
+ continue
+ if line.endswith('{') and '(' not in line:
+ continue
+
+ # Capture the function.
+ # (assume no func is a one-liner)
+ decl = line
+ while '{' not in line: # assume no inline structs, etc.
+ try:
+ line = next(lines)
+ except StopIteration:
+ return
+ decl += ' ' + line
+
+ body, end = _extract_block(lines)
+ if end is None:
+ return
+ assert end == '}'
+ yield (f'{decl}\n{body}\n{end}', body)
+
+
+def iter_local_statements(lines):
+ """Yield (lines, blocks) for each statement in the given lines.
+
+ For simple statements, "blocks" is None and the statement is reduced
+ to a single line. For compound statements, "blocks" is a pair of
+ (header, body) for each block in the statement. The headers are
+ reduced to a single line each, but the bpdies are provided as-is.
+ """
+ # XXX Bail out upon bogus syntax.
+ lines = source.iter_clean_lines(lines)
+ for line in lines:
+ if not LOCAL_STMT_START_RE.match(line):
+ continue
+
+ stmt = line
+ blocks = None
+ if not line.endswith(';'):
+ # XXX Support compound & multiline simple statements.
+ #blocks = []
+ continue
+
+ yield (stmt, blocks)
+
+
+def _extract_block(lines):
+ end = None
+ depth = 1
+ body = []
+ for line in lines:
+ depth += line.count('{') - line.count('}')
+ if depth == 0:
+ end = line
+ break
+ body.append(line)
+ return '\n'.join(body), end
+
+
+def parse_func(stmt, body):
+ """Return (name, signature) for the given function definition."""
+ header, _, end = stmt.partition(body)
+ assert end.strip() == '}'
+ assert header.strip().endswith('{')
+ header, _, _= header.rpartition('{')
+
+ signature = ' '.join(header.strip().splitlines())
+
+ _, _, name = signature.split('(')[0].strip().rpartition(' ')
+ assert name
+
+ return name, signature
+
+
+def parse_var(stmt):
+ """Return (name, vartype) for the given variable declaration."""
+ stmt = stmt.rstrip(';')
+ m = LOCAL_STMT_START_RE.match(stmt)
+ assert m
+ vartype = m.group(0)
+ name = stmt[len(vartype):].partition('=')[0].strip()
+
+ if name.startswith('('):
+ name, _, after = name[1:].partition(')')
+ assert after
+ name = name.replace('*', '* ')
+ inside, _, name = name.strip().rpartition(' ')
+ vartype = f'{vartype} ({inside.strip()}){after}'
+ else:
+ name = name.replace('*', '* ')
+ before, _, name = name.rpartition(' ')
+ vartype = f'{vartype} {before}'
+
+ vartype = vartype.strip()
+ while ' ' in vartype:
+ vartype = vartype.replace(' ', ' ')
+
+ return name, vartype
+
+
+def parse_compound(stmt, blocks):
+ """Return (headers, bodies) for the given compound statement."""
+ # XXX Identify declarations inside compound statements
+ # (if/switch/for/while).
+ raise NotImplementedError
+
+
+def iter_variables(filename, *,
+ _iter_source_lines=source.iter_lines,
+ _iter_global=iter_global_declarations,
+ _iter_local=iter_local_statements,
+ _parse_func=parse_func,
+ _parse_var=parse_var,
+ _parse_compound=parse_compound,
+ ):
+ """Yield (funcname, name, vartype) for every variable in the given file."""
+ lines = _iter_source_lines(filename)
+ for stmt, body in _iter_global(lines):
+ # At the file top-level we only have to worry about vars & funcs.
+ if not body:
+ name, vartype = _parse_var(stmt)
+ if name:
+ yield (None, name, vartype)
+ else:
+ funcname, _ = _parse_func(stmt, body)
+ localvars = _iter_locals(body,
+ _iter_statements=_iter_local,
+ _parse_var=_parse_var,
+ _parse_compound=_parse_compound,
+ )
+ for name, vartype in localvars:
+ yield (funcname, name, vartype)
+
+
+def _iter_locals(lines, *,
+ _iter_statements=iter_local_statements,
+ _parse_var=parse_var,
+ _parse_compound=parse_compound,
+ ):
+ compound = [lines]
+ while compound:
+ body = compound.pop(0)
+ bodylines = body.splitlines()
+ for stmt, blocks in _iter_statements(bodylines):
+ if not blocks:
+ name, vartype = _parse_var(stmt)
+ if name:
+ yield (name, vartype)
+ else:
+ headers, bodies = _parse_compound(stmt, blocks)
+ for header in headers:
+ for line in header:
+ name, vartype = _parse_var(line)
+ if name:
+ yield (name, vartype)
+ compound.extend(bodies)
+
+
+def iter_all(dirnames):
+ """Yield a Declaration for each one found.
+
+ If there are duplicates, due to preprocessor conditionals, then
+ they are checked to make sure they are the same.
+ """
+ raise NotImplementedError
+
+
+def iter_preprocessed(dirnames):
+ """Yield a Declaration for each one found.
+
+ All source files are run through the preprocessor first.
+ """
+ raise NotImplementedError
diff --git a/Tools/c-analyzer/c_parser/info.py b/Tools/c-analyzer/c_parser/info.py
new file mode 100644
index 0000000..9ab6979
--- /dev/null
+++ b/Tools/c-analyzer/c_parser/info.py
@@ -0,0 +1,78 @@
+from collections import namedtuple
+
+from c_analyzer_common import info, util
+from c_analyzer_common.util import classonly, _NTBase
+
+
+def normalize_vartype(vartype):
+ """Return the canonical form for a variable type (or func signature)."""
+ # We allow empty strring through for semantic reasons.
+ if vartype is None:
+ return None
+
+ # XXX finish!
+ # XXX Return (modifiers, type, pointer)?
+ return str(vartype)
+
+
+class Variable(_NTBase,
+ namedtuple('Variable', 'id vartype')):
+ """Information about a single variable declaration."""
+
+ __slots__ = ()
+ _isglobal = util.Slot()
+
+ @classonly
+ def from_parts(cls, filename, funcname, name, vartype, isglobal=False):
+ id = info.ID(filename, funcname, name)
+ self = cls(id, vartype)
+ if isglobal:
+ self._isglobal = True
+ return self
+
+ def __new__(cls, id, vartype):
+ self = super().__new__(
+ cls,
+ id=info.ID.from_raw(id),
+ vartype=normalize_vartype(vartype) if vartype else None,
+ )
+ return self
+
+ def __hash__(self):
+ return hash(self.id)
+
+ def __getattr__(self, name):
+ return getattr(self.id, name)
+
+ def _validate_id(self):
+ if not self.id:
+ raise TypeError('missing id')
+
+ if not self.filename or self.filename == info.UNKNOWN:
+ raise TypeError(f'id missing filename ({self.id})')
+
+ if self.funcname and self.funcname == info.UNKNOWN:
+ raise TypeError(f'id missing funcname ({self.id})')
+
+ self.id.validate()
+
+ def validate(self):
+ """Fail if the object is invalid (i.e. init with bad data)."""
+ self._validate_id()
+
+ if self.vartype is None or self.vartype == info.UNKNOWN:
+ raise TypeError('missing vartype')
+
+ @property
+ def isglobal(self):
+ try:
+ return self._isglobal
+ except AttributeError:
+ # XXX Include extern variables.
+ # XXX Ignore functions.
+ self._isglobal = ('static' in self.vartype.split())
+ return self._isglobal
+
+ @property
+ def isconst(self):
+ return 'const' in self.vartype.split()
diff --git a/Tools/c-analyzer/c_parser/naive.py b/Tools/c-analyzer/c_parser/naive.py
new file mode 100644
index 0000000..e0370cc
--- /dev/null
+++ b/Tools/c-analyzer/c_parser/naive.py
@@ -0,0 +1,180 @@
+import re
+
+from c_analyzer_common.info import UNKNOWN
+
+from .info import Variable
+from .preprocessor import _iter_clean_lines
+
+
+_NOT_SET = object()
+
+
+def get_srclines(filename, *,
+ cache=None,
+ _open=open,
+ _iter_lines=_iter_clean_lines,
+ ):
+ """Return the file's lines as a list.
+
+ Each line will have trailing whitespace removed (including newline).
+
+ If a cache is given the it is used.
+ """
+ if cache is not None:
+ try:
+ return cache[filename]
+ except KeyError:
+ pass
+
+ with _open(filename) as srcfile:
+ srclines = [line
+ for _, line in _iter_lines(srcfile)
+ if not line.startswith('#')]
+ for i, line in enumerate(srclines):
+ srclines[i] = line.rstrip()
+
+ if cache is not None:
+ cache[filename] = srclines
+ return srclines
+
+
+def parse_variable_declaration(srcline):
+ """Return (name, decl) for the given declaration line."""
+ # XXX possible false negatives...
+ decl, sep, _ = srcline.partition('=')
+ if not sep:
+ if not srcline.endswith(';'):
+ return None, None
+ decl = decl.strip(';')
+ decl = decl.strip()
+ m = re.match(r'.*\b(\w+)\s*(?:\[[^\]]*\])?$', decl)
+ if not m:
+ return None, None
+ name = m.group(1)
+ return name, decl
+
+
+def parse_variable(srcline, funcname=None):
+ """Return a Variable for the variable declared on the line (or None)."""
+ line = srcline.strip()
+
+ # XXX Handle more than just static variables.
+ if line.startswith('static '):
+ if '(' in line and '[' not in line:
+ # a function
+ return None, None
+ return parse_variable_declaration(line)
+ else:
+ return None, None
+
+
+def iter_variables(filename, *,
+ srccache=None,
+ parse_variable=None,
+ _get_srclines=get_srclines,
+ _default_parse_variable=parse_variable,
+ ):
+ """Yield a Variable for each in the given source file."""
+ if parse_variable is None:
+ parse_variable = _default_parse_variable
+
+ indent = ''
+ prev = ''
+ funcname = None
+ for line in _get_srclines(filename, cache=srccache):
+ # remember current funcname
+ if funcname:
+ if line == indent + '}':
+ funcname = None
+ continue
+ else:
+ if '(' in prev and line == indent + '{':
+ if not prev.startswith('__attribute__'):
+ funcname = prev.split('(')[0].split()[-1]
+ prev = ''
+ continue
+ indent = line[:-len(line.lstrip())]
+ prev = line
+
+ info = parse_variable(line, funcname)
+ if isinstance(info, list):
+ for name, _funcname, decl in info:
+ yield Variable.from_parts(filename, _funcname, name, decl)
+ continue
+ name, decl = info
+
+ if name is None:
+ continue
+ yield Variable.from_parts(filename, funcname, name, decl)
+
+
+def _match_varid(variable, name, funcname, ignored=None):
+ if ignored and variable in ignored:
+ return False
+
+ if variable.name != name:
+ return False
+
+ if funcname == UNKNOWN:
+ if not variable.funcname:
+ return False
+ elif variable.funcname != funcname:
+ return False
+
+ return True
+
+
+def find_variable(filename, funcname, name, *,
+ ignored=None,
+ srccache=None, # {filename: lines}
+ parse_variable=None,
+ _iter_variables=iter_variables,
+ ):
+ """Return the matching variable.
+
+ Return None if the variable is not found.
+ """
+ for variable in _iter_variables(filename,
+ srccache=srccache,
+ parse_variable=parse_variable,
+ ):
+ if _match_varid(variable, name, funcname, ignored):
+ return variable
+ else:
+ return None
+
+
+def find_variables(varids, filenames=None, *,
+ srccache=_NOT_SET,
+ parse_variable=None,
+ _find_symbol=find_variable,
+ ):
+ """Yield a Variable for each ID.
+
+ If the variable is not found then its decl will be UNKNOWN. That
+ way there will be one resulting Variable per given ID.
+ """
+ if srccache is _NOT_SET:
+ srccache = {}
+
+ used = set()
+ for varid in varids:
+ if varid.filename and varid.filename != UNKNOWN:
+ srcfiles = [varid.filename]
+ else:
+ if not filenames:
+ yield Variable(varid, UNKNOWN)
+ continue
+ srcfiles = filenames
+ for filename in srcfiles:
+ found = _find_varid(filename, varid.funcname, varid.name,
+ ignored=used,
+ srccache=srccache,
+ parse_variable=parse_variable,
+ )
+ if found:
+ yield found
+ used.add(found)
+ break
+ else:
+ yield Variable(varid, UNKNOWN)
diff --git a/Tools/c-analyzer/c_parser/preprocessor.py b/Tools/c-analyzer/c_parser/preprocessor.py
new file mode 100644
index 0000000..0e2866e
--- /dev/null
+++ b/Tools/c-analyzer/c_parser/preprocessor.py
@@ -0,0 +1,512 @@
+from collections import namedtuple
+import shlex
+import os
+import re
+
+from c_analyzer_common import util
+from . import info
+
+
+CONTINUATION = '\\' + os.linesep
+
+IDENTIFIER = r'(?:\w*[a-zA-Z]\w*)'
+IDENTIFIER_RE = re.compile('^' + IDENTIFIER + '$')
+
+
+def _coerce_str(value):
+ if not value:
+ return ''
+ return str(value).strip()
+
+
+#############################
+# directives
+
+DIRECTIVE_START = r'''
+ (?:
+ ^ \s*
+ [#] \s*
+ )'''
+DIRECTIVE_TEXT = r'''
+ (?:
+ (?: \s+ ( .*\S ) )?
+ \s* $
+ )'''
+DIRECTIVE = rf'''
+ (?:
+ {DIRECTIVE_START}
+ (
+ include |
+ error | warning |
+ pragma |
+ define | undef |
+ if | ifdef | ifndef | elseif | else | endif |
+ __FILE__ | __LINE__ | __DATE __ | __TIME__ | __TIMESTAMP__
+ )
+ {DIRECTIVE_TEXT}
+ )'''
+# (?:
+# [^\\\n] |
+# \\ [^\n] |
+# \\ \n
+# )+
+# ) \n
+# )'''
+DIRECTIVE_RE = re.compile(DIRECTIVE, re.VERBOSE)
+
+DEFINE = rf'''
+ (?:
+ {DIRECTIVE_START} define \s+
+ (?:
+ ( \w*[a-zA-Z]\w* )
+ (?: \s* [(] ([^)]*) [)] )?
+ )
+ {DIRECTIVE_TEXT}
+ )'''
+DEFINE_RE = re.compile(DEFINE, re.VERBOSE)
+
+
+def parse_directive(line):
+ """Return the appropriate directive for the given line."""
+ line = line.strip()
+ if line.startswith('#'):
+ line = line[1:].lstrip()
+ line = '#' + line
+ directive = line
+ #directive = '#' + line
+ while ' ' in directive:
+ directive = directive.replace(' ', ' ')
+ return _parse_directive(directive)
+
+
+def _parse_directive(line):
+ m = DEFINE_RE.match(line)
+ if m:
+ name, args, text = m.groups()
+ if args:
+ args = [a.strip() for a in args.split(',')]
+ return Macro(name, args, text)
+ else:
+ return Constant(name, text)
+
+ m = DIRECTIVE_RE.match(line)
+ if not m:
+ raise ValueError(f'unsupported directive {line!r}')
+ kind, text = m.groups()
+ if not text:
+ if kind not in ('else', 'endif'):
+ raise ValueError(f'missing text in directive {line!r}')
+ elif kind in ('else', 'endif', 'define'):
+ raise ValueError(f'unexpected text in directive {line!r}')
+ if kind == 'include':
+ directive = Include(text)
+ elif kind in IfDirective.KINDS:
+ directive = IfDirective(kind, text)
+ else:
+ directive = OtherDirective(kind, text)
+ directive.validate()
+ return directive
+
+
+class PreprocessorDirective(util._NTBase):
+ """The base class for directives."""
+
+ __slots__ = ()
+
+ KINDS = frozenset([
+ 'include',
+ 'pragma',
+ 'error', 'warning',
+ 'define', 'undef',
+ 'if', 'ifdef', 'ifndef', 'elseif', 'else', 'endif',
+ '__FILE__', '__DATE__', '__LINE__', '__TIME__', '__TIMESTAMP__',
+ ])
+
+ @property
+ def text(self):
+ return ' '.join(v for v in self[1:] if v and v.strip()) or None
+
+ def validate(self):
+ """Fail if the object is invalid (i.e. init with bad data)."""
+ super().validate()
+
+ if not self.kind:
+ raise TypeError('missing kind')
+ elif self.kind not in self.KINDS:
+ raise ValueError
+
+ # text can be anything, including None.
+
+
+class Constant(PreprocessorDirective,
+ namedtuple('Constant', 'kind name value')):
+ """A single "constant" directive ("define")."""
+
+ __slots__ = ()
+
+ def __new__(cls, name, value=None):
+ self = super().__new__(
+ cls,
+ 'define',
+ name=_coerce_str(name) or None,
+ value=_coerce_str(value) or None,
+ )
+ return self
+
+ def validate(self):
+ """Fail if the object is invalid (i.e. init with bad data)."""
+ super().validate()
+
+ if not self.name:
+ raise TypeError('missing name')
+ elif not IDENTIFIER_RE.match(self.name):
+ raise ValueError(f'name must be identifier, got {self.name!r}')
+
+ # value can be anything, including None
+
+
+class Macro(PreprocessorDirective,
+ namedtuple('Macro', 'kind name args body')):
+ """A single "macro" directive ("define")."""
+
+ __slots__ = ()
+
+ def __new__(cls, name, args, body=None):
+ # "args" must be a string or an iterable of strings (or "empty").
+ if isinstance(args, str):
+ args = [v.strip() for v in args.split(',')]
+ if args:
+ args = tuple(_coerce_str(a) or None for a in args)
+ self = super().__new__(
+ cls,
+ kind='define',
+ name=_coerce_str(name) or None,
+ args=args if args else (),
+ body=_coerce_str(body) or None,
+ )
+ return self
+
+ @property
+ def text(self):
+ if self.body:
+ return f'{self.name}({", ".join(self.args)}) {self.body}'
+ else:
+ return f'{self.name}({", ".join(self.args)})'
+
+ def validate(self):
+ """Fail if the object is invalid (i.e. init with bad data)."""
+ super().validate()
+
+ if not self.name:
+ raise TypeError('missing name')
+ elif not IDENTIFIER_RE.match(self.name):
+ raise ValueError(f'name must be identifier, got {self.name!r}')
+
+ for arg in self.args:
+ if not arg:
+ raise ValueError(f'missing arg in {self.args}')
+ elif not IDENTIFIER_RE.match(arg):
+ raise ValueError(f'arg must be identifier, got {arg!r}')
+
+ # body can be anything, including None
+
+
+class IfDirective(PreprocessorDirective,
+ namedtuple('IfDirective', 'kind condition')):
+ """A single conditional directive (e.g. "if", "ifdef").
+
+ This only includes directives that actually provide conditions. The
+ related directives "else" and "endif" are covered by OtherDirective
+ instead.
+ """
+
+ __slots__ = ()
+
+ KINDS = frozenset([
+ 'if',
+ 'ifdef',
+ 'ifndef',
+ 'elseif',
+ ])
+
+ @classmethod
+ def _condition_from_raw(cls, raw, kind):
+ #return Condition.from_raw(raw, _kind=kind)
+ condition = _coerce_str(raw)
+ if not condition:
+ return None
+
+ if kind == 'ifdef':
+ condition = f'defined({condition})'
+ elif kind == 'ifndef':
+ condition = f'! defined({condition})'
+
+ return condition
+
+ def __new__(cls, kind, condition):
+ kind = _coerce_str(kind)
+ self = super().__new__(
+ cls,
+ kind=kind or None,
+ condition=cls._condition_from_raw(condition, kind),
+ )
+ return self
+
+ @property
+ def text(self):
+ if self.kind == 'ifdef':
+ return self.condition[8:-1] # strip "defined("
+ elif self.kind == 'ifndef':
+ return self.condition[10:-1] # strip "! defined("
+ else:
+ return self.condition
+ #return str(self.condition)
+
+ def validate(self):
+ """Fail if the object is invalid (i.e. init with bad data)."""
+ super().validate()
+
+ if not self.condition:
+ raise TypeError('missing condition')
+ #else:
+ # for cond in self.condition:
+ # if not cond:
+ # raise ValueError(f'missing condition in {self.condition}')
+ # cond.validate()
+ # if self.kind in ('ifdef', 'ifndef'):
+ # if len(self.condition) != 1:
+ # raise ValueError('too many condition')
+ # if self.kind == 'ifdef':
+ # if not self.condition[0].startswith('defined '):
+ # raise ValueError('bad condition')
+ # else:
+ # if not self.condition[0].startswith('! defined '):
+ # raise ValueError('bad condition')
+
+
+class Include(PreprocessorDirective,
+ namedtuple('Include', 'kind file')):
+ """A single "include" directive.
+
+ Supported "file" values are either follow the bracket style
+ (<stdio>) or double quotes ("spam.h").
+ """
+
+ __slots__ = ()
+
+ def __new__(cls, file):
+ self = super().__new__(
+ cls,
+ kind='include',
+ file=_coerce_str(file) or None,
+ )
+ return self
+
+ def validate(self):
+ """Fail if the object is invalid (i.e. init with bad data)."""
+ super().validate()
+
+ if not self.file:
+ raise TypeError('missing file')
+
+
+class OtherDirective(PreprocessorDirective,
+ namedtuple('OtherDirective', 'kind text')):
+ """A single directive not covered by another class.
+
+ This includes the "else", "endif", and "undef" directives, which are
+ otherwise inherently related to the directives covered by the
+ Constant, Macro, and IfCondition classes.
+
+ Note that all directives must have a text value, except for "else"
+ and "endif" (which must have no text).
+ """
+
+ __slots__ = ()
+
+ KINDS = PreprocessorDirective.KINDS - {'include', 'define'} - IfDirective.KINDS
+
+ def __new__(cls, kind, text):
+ self = super().__new__(
+ cls,
+ kind=_coerce_str(kind) or None,
+ text=_coerce_str(text) or None,
+ )
+ return self
+
+ def validate(self):
+ """Fail if the object is invalid (i.e. init with bad data)."""
+ super().validate()
+
+ if self.text:
+ if self.kind in ('else', 'endif'):
+ raise ValueError('unexpected text in directive')
+ elif self.kind not in ('else', 'endif'):
+ raise TypeError('missing text')
+
+
+#############################
+# iterating lines
+
+def _recompute_conditions(directive, ifstack):
+ if directive.kind in ('if', 'ifdef', 'ifndef'):
+ ifstack.append(
+ ([], directive.condition))
+ elif directive.kind == 'elseif':
+ if ifstack:
+ negated, active = ifstack.pop()
+ if active:
+ negated.append(active)
+ else:
+ negated = []
+ ifstack.append(
+ (negated, directive.condition))
+ elif directive.kind == 'else':
+ if ifstack:
+ negated, active = ifstack.pop()
+ if active:
+ negated.append(active)
+ ifstack.append(
+ (negated, None))
+ elif directive.kind == 'endif':
+ if ifstack:
+ ifstack.pop()
+
+ conditions = []
+ for negated, active in ifstack:
+ for condition in negated:
+ conditions.append(f'! ({condition})')
+ if active:
+ conditions.append(active)
+ return tuple(conditions)
+
+
+def _iter_clean_lines(lines):
+ lines = iter(enumerate(lines, 1))
+ for lno, line in lines:
+ # Handle line continuations.
+ while line.endswith(CONTINUATION):
+ try:
+ lno, _line = next(lines)
+ except StopIteration:
+ break
+ line = line[:-len(CONTINUATION)] + ' ' + _line
+
+ # Deal with comments.
+ after = line
+ line = ''
+ while True:
+ # Look for a comment.
+ before, begin, remainder = after.partition('/*')
+ if '//' in before:
+ before, _, _ = before.partition('//')
+ line += before + ' ' # per the C99 spec
+ break
+ line += before
+ if not begin:
+ break
+ line += ' ' # per the C99 spec
+
+ # Go until we find the end of the comment.
+ _, end, after = remainder.partition('*/')
+ while not end:
+ try:
+ lno, remainder = next(lines)
+ except StopIteration:
+ raise Exception('unterminated comment')
+ _, end, after = remainder.partition('*/')
+
+ yield lno, line
+
+
+def iter_lines(lines, *,
+ _iter_clean_lines=_iter_clean_lines,
+ _parse_directive=_parse_directive,
+ _recompute_conditions=_recompute_conditions,
+ ):
+ """Yield (lno, line, directive, active conditions) for each given line.
+
+ This is effectively a subset of the operations taking place in
+ translation phases 2-4 from the C99 spec (ISO/IEC 9899:TC2); see
+ section 5.1.1.2. Line continuations are removed and comments
+ replaced with a single space. (In both cases "lno" will be the last
+ line involved.) Otherwise each line is returned as-is.
+
+ "lno" is the (1-indexed) line number for the line.
+
+ "directive" will be a PreprocessorDirective or None, depending on
+ whether or not there is a directive on the line.
+
+ "active conditions" is the set of preprocessor conditions (e.g.
+ "defined()") under which the current line of code will be included
+ in compilation. That set is derived from every conditional
+ directive block (e.g. "if defined()", "ifdef", "else") containing
+ that line. That includes nested directives. Note that the
+ current line does not affect the active conditions for iteself.
+ It only impacts subsequent lines. That applies to directives
+ that close blocks (e.g. "endif") just as much as conditional
+ directvies. Also note that "else" and "elseif" directives
+ update the active conditions (for later lines), rather than
+ adding to them.
+ """
+ ifstack = []
+ conditions = ()
+ for lno, line in _iter_clean_lines(lines):
+ stripped = line.strip()
+ if not stripped.startswith('#'):
+ yield lno, line, None, conditions
+ continue
+
+ directive = '#' + stripped[1:].lstrip()
+ while ' ' in directive:
+ directive = directive.replace(' ', ' ')
+ directive = _parse_directive(directive)
+ yield lno, line, directive, conditions
+
+ if directive.kind in ('else', 'endif'):
+ conditions = _recompute_conditions(directive, ifstack)
+ elif isinstance(directive, IfDirective):
+ conditions = _recompute_conditions(directive, ifstack)
+
+
+#############################
+# running (platform-specific?)
+
+def _gcc(filename, *,
+ _get_argv=(lambda: _get_gcc_argv()),
+ _run=util.run_cmd,
+ ):
+ argv = _get_argv()
+ argv.extend([
+ '-E', filename,
+ ])
+ output = _run(argv)
+ return output
+
+
+def _get_gcc_argv(*,
+ _open=open,
+ _run=util.run_cmd,
+ ):
+ with _open('/tmp/print.mk', 'w') as tmpfile:
+ tmpfile.write('print-%:\n')
+ #tmpfile.write('\t@echo $* = $($*)\n')
+ tmpfile.write('\t@echo $($*)\n')
+ argv = ['/usr/bin/make',
+ '-f', 'Makefile',
+ '-f', '/tmp/print.mk',
+ 'print-CC',
+ 'print-PY_CORE_CFLAGS',
+ ]
+ output = _run(argv)
+ gcc, cflags = output.strip().splitlines()
+ argv = shlex.split(gcc.strip())
+ cflags = shlex.split(cflags.strip())
+ return argv + cflags
+
+
+def run(filename, *,
+ _gcc=_gcc,
+ ):
+ """Return the text of the given file after running the preprocessor."""
+ return _gcc(filename)
diff --git a/Tools/c-analyzer/c_parser/source.py b/Tools/c-analyzer/c_parser/source.py
new file mode 100644
index 0000000..f8998c8
--- /dev/null
+++ b/Tools/c-analyzer/c_parser/source.py
@@ -0,0 +1,34 @@
+from . import preprocessor
+
+
+def iter_clean_lines(lines):
+ incomment = False
+ for line in lines:
+ # Deal with comments.
+ if incomment:
+ _, sep, line = line.partition('*/')
+ if sep:
+ incomment = False
+ continue
+ line, _, _ = line.partition('//')
+ line, sep, remainder = line.partition('/*')
+ if sep:
+ _, sep, after = remainder.partition('*/')
+ if not sep:
+ incomment = True
+ continue
+ line += ' ' + after
+
+ # Ignore blank lines and leading/trailing whitespace.
+ line = line.strip()
+ if not line:
+ continue
+
+ yield line
+
+
+def iter_lines(filename, *,
+ preprocess=preprocessor.run,
+ ):
+ content = preprocess(filename)
+ return iter(content.splitlines())