diff options
author | Eric Snow <ericsnowcurrently@gmail.com> | 2019-09-11 18:49:45 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-09-11 18:49:45 (GMT) |
commit | ee536b2020b1f0baad1286dbd4345e13870324af (patch) | |
tree | 2486233603db05a76aaef863bd6639455e3dfef7 /Tools/c-analyzer/c_parser | |
parent | 9936371af298d465095ae70bc9c2943b4b16eac4 (diff) | |
download | cpython-ee536b2020b1f0baad1286dbd4345e13870324af.zip cpython-ee536b2020b1f0baad1286dbd4345e13870324af.tar.gz cpython-ee536b2020b1f0baad1286dbd4345e13870324af.tar.bz2 |
bpo-36876: Add a tool that identifies unsupported global C variables. (#15877)
Diffstat (limited to 'Tools/c-analyzer/c_parser')
-rw-r--r-- | Tools/c-analyzer/c_parser/__init__.py | 0 | ||||
-rw-r--r-- | Tools/c-analyzer/c_parser/declarations.py | 295 | ||||
-rw-r--r-- | Tools/c-analyzer/c_parser/info.py | 78 | ||||
-rw-r--r-- | Tools/c-analyzer/c_parser/naive.py | 180 | ||||
-rw-r--r-- | Tools/c-analyzer/c_parser/preprocessor.py | 512 | ||||
-rw-r--r-- | Tools/c-analyzer/c_parser/source.py | 34 |
6 files changed, 1099 insertions, 0 deletions
diff --git a/Tools/c-analyzer/c_parser/__init__.py b/Tools/c-analyzer/c_parser/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/Tools/c-analyzer/c_parser/__init__.py diff --git a/Tools/c-analyzer/c_parser/declarations.py b/Tools/c-analyzer/c_parser/declarations.py new file mode 100644 index 0000000..19fa3ff --- /dev/null +++ b/Tools/c-analyzer/c_parser/declarations.py @@ -0,0 +1,295 @@ +import re +import shlex +import subprocess + +from . import source + + +IDENTIFIER = r'(?:[a-zA-z]|_+[a-zA-Z0-9]\w*)' + +TYPE_QUAL = r'(?:const|volatile)' + +VAR_TYPE_SPEC = r'''(?: + void | + (?: + (?:(?:un)?signed\s+)? + (?: + char | + short | + int | + long | + long\s+int | + long\s+long + ) | + ) | + float | + double | + {IDENTIFIER} | + (?:struct|union)\s+{IDENTIFIER} + )''' + +POINTER = rf'''(?: + (?:\s+const)?\s*[*] + )''' + +#STRUCT = r'''(?: +# (?:struct|(struct\s+%s))\s*[{] +# [^}]* +# [}] +# )''' % (IDENTIFIER) +#UNION = r'''(?: +# (?:union|(union\s+%s))\s*[{] +# [^}]* +# [}] +# )''' % (IDENTIFIER) +#DECL_SPEC = rf'''(?: +# ({VAR_TYPE_SPEC}) | +# ({STRUCT}) | +# ({UNION}) +# )''' + +FUNC_START = rf'''(?: + (?: + (?: + extern | + static | + static\s+inline + )\s+ + )? + #(?:const\s+)? + {VAR_TYPE_SPEC} + )''' +#GLOBAL_VAR_START = rf'''(?: +# (?: +# (?: +# extern | +# static +# )\s+ +# )? +# (?: +# {TYPE_QUAL} +# (?:\s+{TYPE_QUAL})? +# )?\s+ +# {VAR_TYPE_SPEC} +# )''' +GLOBAL_DECL_START_RE = re.compile(rf''' + ^ + (?: + ({FUNC_START}) + ) + ''', re.VERBOSE) + +LOCAL_VAR_START = rf'''(?: + (?: + (?: + register | + static + )\s+ + )? + (?: + (?: + {TYPE_QUAL} + (?:\s+{TYPE_QUAL})? + )\s+ + )? + {VAR_TYPE_SPEC} + {POINTER}? + )''' +LOCAL_STMT_START_RE = re.compile(rf''' + ^ + (?: + ({LOCAL_VAR_START}) + ) + ''', re.VERBOSE) + + +def iter_global_declarations(lines): + """Yield (decl, body) for each global declaration in the given lines. + + For function definitions the header is reduced to one line and + the body is provided as-is. For other compound declarations (e.g. + struct) the entire declaration is reduced to one line and "body" + is None. Likewise for simple declarations (e.g. variables). + + Declarations inside function bodies are ignored, though their text + is provided in the function body. + """ + # XXX Bail out upon bogus syntax. + lines = source.iter_clean_lines(lines) + for line in lines: + if not GLOBAL_DECL_START_RE.match(line): + continue + # We only need functions here, since we only need locals for now. + if line.endswith(';'): + continue + if line.endswith('{') and '(' not in line: + continue + + # Capture the function. + # (assume no func is a one-liner) + decl = line + while '{' not in line: # assume no inline structs, etc. + try: + line = next(lines) + except StopIteration: + return + decl += ' ' + line + + body, end = _extract_block(lines) + if end is None: + return + assert end == '}' + yield (f'{decl}\n{body}\n{end}', body) + + +def iter_local_statements(lines): + """Yield (lines, blocks) for each statement in the given lines. + + For simple statements, "blocks" is None and the statement is reduced + to a single line. For compound statements, "blocks" is a pair of + (header, body) for each block in the statement. The headers are + reduced to a single line each, but the bpdies are provided as-is. + """ + # XXX Bail out upon bogus syntax. + lines = source.iter_clean_lines(lines) + for line in lines: + if not LOCAL_STMT_START_RE.match(line): + continue + + stmt = line + blocks = None + if not line.endswith(';'): + # XXX Support compound & multiline simple statements. + #blocks = [] + continue + + yield (stmt, blocks) + + +def _extract_block(lines): + end = None + depth = 1 + body = [] + for line in lines: + depth += line.count('{') - line.count('}') + if depth == 0: + end = line + break + body.append(line) + return '\n'.join(body), end + + +def parse_func(stmt, body): + """Return (name, signature) for the given function definition.""" + header, _, end = stmt.partition(body) + assert end.strip() == '}' + assert header.strip().endswith('{') + header, _, _= header.rpartition('{') + + signature = ' '.join(header.strip().splitlines()) + + _, _, name = signature.split('(')[0].strip().rpartition(' ') + assert name + + return name, signature + + +def parse_var(stmt): + """Return (name, vartype) for the given variable declaration.""" + stmt = stmt.rstrip(';') + m = LOCAL_STMT_START_RE.match(stmt) + assert m + vartype = m.group(0) + name = stmt[len(vartype):].partition('=')[0].strip() + + if name.startswith('('): + name, _, after = name[1:].partition(')') + assert after + name = name.replace('*', '* ') + inside, _, name = name.strip().rpartition(' ') + vartype = f'{vartype} ({inside.strip()}){after}' + else: + name = name.replace('*', '* ') + before, _, name = name.rpartition(' ') + vartype = f'{vartype} {before}' + + vartype = vartype.strip() + while ' ' in vartype: + vartype = vartype.replace(' ', ' ') + + return name, vartype + + +def parse_compound(stmt, blocks): + """Return (headers, bodies) for the given compound statement.""" + # XXX Identify declarations inside compound statements + # (if/switch/for/while). + raise NotImplementedError + + +def iter_variables(filename, *, + _iter_source_lines=source.iter_lines, + _iter_global=iter_global_declarations, + _iter_local=iter_local_statements, + _parse_func=parse_func, + _parse_var=parse_var, + _parse_compound=parse_compound, + ): + """Yield (funcname, name, vartype) for every variable in the given file.""" + lines = _iter_source_lines(filename) + for stmt, body in _iter_global(lines): + # At the file top-level we only have to worry about vars & funcs. + if not body: + name, vartype = _parse_var(stmt) + if name: + yield (None, name, vartype) + else: + funcname, _ = _parse_func(stmt, body) + localvars = _iter_locals(body, + _iter_statements=_iter_local, + _parse_var=_parse_var, + _parse_compound=_parse_compound, + ) + for name, vartype in localvars: + yield (funcname, name, vartype) + + +def _iter_locals(lines, *, + _iter_statements=iter_local_statements, + _parse_var=parse_var, + _parse_compound=parse_compound, + ): + compound = [lines] + while compound: + body = compound.pop(0) + bodylines = body.splitlines() + for stmt, blocks in _iter_statements(bodylines): + if not blocks: + name, vartype = _parse_var(stmt) + if name: + yield (name, vartype) + else: + headers, bodies = _parse_compound(stmt, blocks) + for header in headers: + for line in header: + name, vartype = _parse_var(line) + if name: + yield (name, vartype) + compound.extend(bodies) + + +def iter_all(dirnames): + """Yield a Declaration for each one found. + + If there are duplicates, due to preprocessor conditionals, then + they are checked to make sure they are the same. + """ + raise NotImplementedError + + +def iter_preprocessed(dirnames): + """Yield a Declaration for each one found. + + All source files are run through the preprocessor first. + """ + raise NotImplementedError diff --git a/Tools/c-analyzer/c_parser/info.py b/Tools/c-analyzer/c_parser/info.py new file mode 100644 index 0000000..9ab6979 --- /dev/null +++ b/Tools/c-analyzer/c_parser/info.py @@ -0,0 +1,78 @@ +from collections import namedtuple + +from c_analyzer_common import info, util +from c_analyzer_common.util import classonly, _NTBase + + +def normalize_vartype(vartype): + """Return the canonical form for a variable type (or func signature).""" + # We allow empty strring through for semantic reasons. + if vartype is None: + return None + + # XXX finish! + # XXX Return (modifiers, type, pointer)? + return str(vartype) + + +class Variable(_NTBase, + namedtuple('Variable', 'id vartype')): + """Information about a single variable declaration.""" + + __slots__ = () + _isglobal = util.Slot() + + @classonly + def from_parts(cls, filename, funcname, name, vartype, isglobal=False): + id = info.ID(filename, funcname, name) + self = cls(id, vartype) + if isglobal: + self._isglobal = True + return self + + def __new__(cls, id, vartype): + self = super().__new__( + cls, + id=info.ID.from_raw(id), + vartype=normalize_vartype(vartype) if vartype else None, + ) + return self + + def __hash__(self): + return hash(self.id) + + def __getattr__(self, name): + return getattr(self.id, name) + + def _validate_id(self): + if not self.id: + raise TypeError('missing id') + + if not self.filename or self.filename == info.UNKNOWN: + raise TypeError(f'id missing filename ({self.id})') + + if self.funcname and self.funcname == info.UNKNOWN: + raise TypeError(f'id missing funcname ({self.id})') + + self.id.validate() + + def validate(self): + """Fail if the object is invalid (i.e. init with bad data).""" + self._validate_id() + + if self.vartype is None or self.vartype == info.UNKNOWN: + raise TypeError('missing vartype') + + @property + def isglobal(self): + try: + return self._isglobal + except AttributeError: + # XXX Include extern variables. + # XXX Ignore functions. + self._isglobal = ('static' in self.vartype.split()) + return self._isglobal + + @property + def isconst(self): + return 'const' in self.vartype.split() diff --git a/Tools/c-analyzer/c_parser/naive.py b/Tools/c-analyzer/c_parser/naive.py new file mode 100644 index 0000000..e0370cc --- /dev/null +++ b/Tools/c-analyzer/c_parser/naive.py @@ -0,0 +1,180 @@ +import re + +from c_analyzer_common.info import UNKNOWN + +from .info import Variable +from .preprocessor import _iter_clean_lines + + +_NOT_SET = object() + + +def get_srclines(filename, *, + cache=None, + _open=open, + _iter_lines=_iter_clean_lines, + ): + """Return the file's lines as a list. + + Each line will have trailing whitespace removed (including newline). + + If a cache is given the it is used. + """ + if cache is not None: + try: + return cache[filename] + except KeyError: + pass + + with _open(filename) as srcfile: + srclines = [line + for _, line in _iter_lines(srcfile) + if not line.startswith('#')] + for i, line in enumerate(srclines): + srclines[i] = line.rstrip() + + if cache is not None: + cache[filename] = srclines + return srclines + + +def parse_variable_declaration(srcline): + """Return (name, decl) for the given declaration line.""" + # XXX possible false negatives... + decl, sep, _ = srcline.partition('=') + if not sep: + if not srcline.endswith(';'): + return None, None + decl = decl.strip(';') + decl = decl.strip() + m = re.match(r'.*\b(\w+)\s*(?:\[[^\]]*\])?$', decl) + if not m: + return None, None + name = m.group(1) + return name, decl + + +def parse_variable(srcline, funcname=None): + """Return a Variable for the variable declared on the line (or None).""" + line = srcline.strip() + + # XXX Handle more than just static variables. + if line.startswith('static '): + if '(' in line and '[' not in line: + # a function + return None, None + return parse_variable_declaration(line) + else: + return None, None + + +def iter_variables(filename, *, + srccache=None, + parse_variable=None, + _get_srclines=get_srclines, + _default_parse_variable=parse_variable, + ): + """Yield a Variable for each in the given source file.""" + if parse_variable is None: + parse_variable = _default_parse_variable + + indent = '' + prev = '' + funcname = None + for line in _get_srclines(filename, cache=srccache): + # remember current funcname + if funcname: + if line == indent + '}': + funcname = None + continue + else: + if '(' in prev and line == indent + '{': + if not prev.startswith('__attribute__'): + funcname = prev.split('(')[0].split()[-1] + prev = '' + continue + indent = line[:-len(line.lstrip())] + prev = line + + info = parse_variable(line, funcname) + if isinstance(info, list): + for name, _funcname, decl in info: + yield Variable.from_parts(filename, _funcname, name, decl) + continue + name, decl = info + + if name is None: + continue + yield Variable.from_parts(filename, funcname, name, decl) + + +def _match_varid(variable, name, funcname, ignored=None): + if ignored and variable in ignored: + return False + + if variable.name != name: + return False + + if funcname == UNKNOWN: + if not variable.funcname: + return False + elif variable.funcname != funcname: + return False + + return True + + +def find_variable(filename, funcname, name, *, + ignored=None, + srccache=None, # {filename: lines} + parse_variable=None, + _iter_variables=iter_variables, + ): + """Return the matching variable. + + Return None if the variable is not found. + """ + for variable in _iter_variables(filename, + srccache=srccache, + parse_variable=parse_variable, + ): + if _match_varid(variable, name, funcname, ignored): + return variable + else: + return None + + +def find_variables(varids, filenames=None, *, + srccache=_NOT_SET, + parse_variable=None, + _find_symbol=find_variable, + ): + """Yield a Variable for each ID. + + If the variable is not found then its decl will be UNKNOWN. That + way there will be one resulting Variable per given ID. + """ + if srccache is _NOT_SET: + srccache = {} + + used = set() + for varid in varids: + if varid.filename and varid.filename != UNKNOWN: + srcfiles = [varid.filename] + else: + if not filenames: + yield Variable(varid, UNKNOWN) + continue + srcfiles = filenames + for filename in srcfiles: + found = _find_varid(filename, varid.funcname, varid.name, + ignored=used, + srccache=srccache, + parse_variable=parse_variable, + ) + if found: + yield found + used.add(found) + break + else: + yield Variable(varid, UNKNOWN) diff --git a/Tools/c-analyzer/c_parser/preprocessor.py b/Tools/c-analyzer/c_parser/preprocessor.py new file mode 100644 index 0000000..0e2866e --- /dev/null +++ b/Tools/c-analyzer/c_parser/preprocessor.py @@ -0,0 +1,512 @@ +from collections import namedtuple +import shlex +import os +import re + +from c_analyzer_common import util +from . import info + + +CONTINUATION = '\\' + os.linesep + +IDENTIFIER = r'(?:\w*[a-zA-Z]\w*)' +IDENTIFIER_RE = re.compile('^' + IDENTIFIER + '$') + + +def _coerce_str(value): + if not value: + return '' + return str(value).strip() + + +############################# +# directives + +DIRECTIVE_START = r''' + (?: + ^ \s* + [#] \s* + )''' +DIRECTIVE_TEXT = r''' + (?: + (?: \s+ ( .*\S ) )? + \s* $ + )''' +DIRECTIVE = rf''' + (?: + {DIRECTIVE_START} + ( + include | + error | warning | + pragma | + define | undef | + if | ifdef | ifndef | elseif | else | endif | + __FILE__ | __LINE__ | __DATE __ | __TIME__ | __TIMESTAMP__ + ) + {DIRECTIVE_TEXT} + )''' +# (?: +# [^\\\n] | +# \\ [^\n] | +# \\ \n +# )+ +# ) \n +# )''' +DIRECTIVE_RE = re.compile(DIRECTIVE, re.VERBOSE) + +DEFINE = rf''' + (?: + {DIRECTIVE_START} define \s+ + (?: + ( \w*[a-zA-Z]\w* ) + (?: \s* [(] ([^)]*) [)] )? + ) + {DIRECTIVE_TEXT} + )''' +DEFINE_RE = re.compile(DEFINE, re.VERBOSE) + + +def parse_directive(line): + """Return the appropriate directive for the given line.""" + line = line.strip() + if line.startswith('#'): + line = line[1:].lstrip() + line = '#' + line + directive = line + #directive = '#' + line + while ' ' in directive: + directive = directive.replace(' ', ' ') + return _parse_directive(directive) + + +def _parse_directive(line): + m = DEFINE_RE.match(line) + if m: + name, args, text = m.groups() + if args: + args = [a.strip() for a in args.split(',')] + return Macro(name, args, text) + else: + return Constant(name, text) + + m = DIRECTIVE_RE.match(line) + if not m: + raise ValueError(f'unsupported directive {line!r}') + kind, text = m.groups() + if not text: + if kind not in ('else', 'endif'): + raise ValueError(f'missing text in directive {line!r}') + elif kind in ('else', 'endif', 'define'): + raise ValueError(f'unexpected text in directive {line!r}') + if kind == 'include': + directive = Include(text) + elif kind in IfDirective.KINDS: + directive = IfDirective(kind, text) + else: + directive = OtherDirective(kind, text) + directive.validate() + return directive + + +class PreprocessorDirective(util._NTBase): + """The base class for directives.""" + + __slots__ = () + + KINDS = frozenset([ + 'include', + 'pragma', + 'error', 'warning', + 'define', 'undef', + 'if', 'ifdef', 'ifndef', 'elseif', 'else', 'endif', + '__FILE__', '__DATE__', '__LINE__', '__TIME__', '__TIMESTAMP__', + ]) + + @property + def text(self): + return ' '.join(v for v in self[1:] if v and v.strip()) or None + + def validate(self): + """Fail if the object is invalid (i.e. init with bad data).""" + super().validate() + + if not self.kind: + raise TypeError('missing kind') + elif self.kind not in self.KINDS: + raise ValueError + + # text can be anything, including None. + + +class Constant(PreprocessorDirective, + namedtuple('Constant', 'kind name value')): + """A single "constant" directive ("define").""" + + __slots__ = () + + def __new__(cls, name, value=None): + self = super().__new__( + cls, + 'define', + name=_coerce_str(name) or None, + value=_coerce_str(value) or None, + ) + return self + + def validate(self): + """Fail if the object is invalid (i.e. init with bad data).""" + super().validate() + + if not self.name: + raise TypeError('missing name') + elif not IDENTIFIER_RE.match(self.name): + raise ValueError(f'name must be identifier, got {self.name!r}') + + # value can be anything, including None + + +class Macro(PreprocessorDirective, + namedtuple('Macro', 'kind name args body')): + """A single "macro" directive ("define").""" + + __slots__ = () + + def __new__(cls, name, args, body=None): + # "args" must be a string or an iterable of strings (or "empty"). + if isinstance(args, str): + args = [v.strip() for v in args.split(',')] + if args: + args = tuple(_coerce_str(a) or None for a in args) + self = super().__new__( + cls, + kind='define', + name=_coerce_str(name) or None, + args=args if args else (), + body=_coerce_str(body) or None, + ) + return self + + @property + def text(self): + if self.body: + return f'{self.name}({", ".join(self.args)}) {self.body}' + else: + return f'{self.name}({", ".join(self.args)})' + + def validate(self): + """Fail if the object is invalid (i.e. init with bad data).""" + super().validate() + + if not self.name: + raise TypeError('missing name') + elif not IDENTIFIER_RE.match(self.name): + raise ValueError(f'name must be identifier, got {self.name!r}') + + for arg in self.args: + if not arg: + raise ValueError(f'missing arg in {self.args}') + elif not IDENTIFIER_RE.match(arg): + raise ValueError(f'arg must be identifier, got {arg!r}') + + # body can be anything, including None + + +class IfDirective(PreprocessorDirective, + namedtuple('IfDirective', 'kind condition')): + """A single conditional directive (e.g. "if", "ifdef"). + + This only includes directives that actually provide conditions. The + related directives "else" and "endif" are covered by OtherDirective + instead. + """ + + __slots__ = () + + KINDS = frozenset([ + 'if', + 'ifdef', + 'ifndef', + 'elseif', + ]) + + @classmethod + def _condition_from_raw(cls, raw, kind): + #return Condition.from_raw(raw, _kind=kind) + condition = _coerce_str(raw) + if not condition: + return None + + if kind == 'ifdef': + condition = f'defined({condition})' + elif kind == 'ifndef': + condition = f'! defined({condition})' + + return condition + + def __new__(cls, kind, condition): + kind = _coerce_str(kind) + self = super().__new__( + cls, + kind=kind or None, + condition=cls._condition_from_raw(condition, kind), + ) + return self + + @property + def text(self): + if self.kind == 'ifdef': + return self.condition[8:-1] # strip "defined(" + elif self.kind == 'ifndef': + return self.condition[10:-1] # strip "! defined(" + else: + return self.condition + #return str(self.condition) + + def validate(self): + """Fail if the object is invalid (i.e. init with bad data).""" + super().validate() + + if not self.condition: + raise TypeError('missing condition') + #else: + # for cond in self.condition: + # if not cond: + # raise ValueError(f'missing condition in {self.condition}') + # cond.validate() + # if self.kind in ('ifdef', 'ifndef'): + # if len(self.condition) != 1: + # raise ValueError('too many condition') + # if self.kind == 'ifdef': + # if not self.condition[0].startswith('defined '): + # raise ValueError('bad condition') + # else: + # if not self.condition[0].startswith('! defined '): + # raise ValueError('bad condition') + + +class Include(PreprocessorDirective, + namedtuple('Include', 'kind file')): + """A single "include" directive. + + Supported "file" values are either follow the bracket style + (<stdio>) or double quotes ("spam.h"). + """ + + __slots__ = () + + def __new__(cls, file): + self = super().__new__( + cls, + kind='include', + file=_coerce_str(file) or None, + ) + return self + + def validate(self): + """Fail if the object is invalid (i.e. init with bad data).""" + super().validate() + + if not self.file: + raise TypeError('missing file') + + +class OtherDirective(PreprocessorDirective, + namedtuple('OtherDirective', 'kind text')): + """A single directive not covered by another class. + + This includes the "else", "endif", and "undef" directives, which are + otherwise inherently related to the directives covered by the + Constant, Macro, and IfCondition classes. + + Note that all directives must have a text value, except for "else" + and "endif" (which must have no text). + """ + + __slots__ = () + + KINDS = PreprocessorDirective.KINDS - {'include', 'define'} - IfDirective.KINDS + + def __new__(cls, kind, text): + self = super().__new__( + cls, + kind=_coerce_str(kind) or None, + text=_coerce_str(text) or None, + ) + return self + + def validate(self): + """Fail if the object is invalid (i.e. init with bad data).""" + super().validate() + + if self.text: + if self.kind in ('else', 'endif'): + raise ValueError('unexpected text in directive') + elif self.kind not in ('else', 'endif'): + raise TypeError('missing text') + + +############################# +# iterating lines + +def _recompute_conditions(directive, ifstack): + if directive.kind in ('if', 'ifdef', 'ifndef'): + ifstack.append( + ([], directive.condition)) + elif directive.kind == 'elseif': + if ifstack: + negated, active = ifstack.pop() + if active: + negated.append(active) + else: + negated = [] + ifstack.append( + (negated, directive.condition)) + elif directive.kind == 'else': + if ifstack: + negated, active = ifstack.pop() + if active: + negated.append(active) + ifstack.append( + (negated, None)) + elif directive.kind == 'endif': + if ifstack: + ifstack.pop() + + conditions = [] + for negated, active in ifstack: + for condition in negated: + conditions.append(f'! ({condition})') + if active: + conditions.append(active) + return tuple(conditions) + + +def _iter_clean_lines(lines): + lines = iter(enumerate(lines, 1)) + for lno, line in lines: + # Handle line continuations. + while line.endswith(CONTINUATION): + try: + lno, _line = next(lines) + except StopIteration: + break + line = line[:-len(CONTINUATION)] + ' ' + _line + + # Deal with comments. + after = line + line = '' + while True: + # Look for a comment. + before, begin, remainder = after.partition('/*') + if '//' in before: + before, _, _ = before.partition('//') + line += before + ' ' # per the C99 spec + break + line += before + if not begin: + break + line += ' ' # per the C99 spec + + # Go until we find the end of the comment. + _, end, after = remainder.partition('*/') + while not end: + try: + lno, remainder = next(lines) + except StopIteration: + raise Exception('unterminated comment') + _, end, after = remainder.partition('*/') + + yield lno, line + + +def iter_lines(lines, *, + _iter_clean_lines=_iter_clean_lines, + _parse_directive=_parse_directive, + _recompute_conditions=_recompute_conditions, + ): + """Yield (lno, line, directive, active conditions) for each given line. + + This is effectively a subset of the operations taking place in + translation phases 2-4 from the C99 spec (ISO/IEC 9899:TC2); see + section 5.1.1.2. Line continuations are removed and comments + replaced with a single space. (In both cases "lno" will be the last + line involved.) Otherwise each line is returned as-is. + + "lno" is the (1-indexed) line number for the line. + + "directive" will be a PreprocessorDirective or None, depending on + whether or not there is a directive on the line. + + "active conditions" is the set of preprocessor conditions (e.g. + "defined()") under which the current line of code will be included + in compilation. That set is derived from every conditional + directive block (e.g. "if defined()", "ifdef", "else") containing + that line. That includes nested directives. Note that the + current line does not affect the active conditions for iteself. + It only impacts subsequent lines. That applies to directives + that close blocks (e.g. "endif") just as much as conditional + directvies. Also note that "else" and "elseif" directives + update the active conditions (for later lines), rather than + adding to them. + """ + ifstack = [] + conditions = () + for lno, line in _iter_clean_lines(lines): + stripped = line.strip() + if not stripped.startswith('#'): + yield lno, line, None, conditions + continue + + directive = '#' + stripped[1:].lstrip() + while ' ' in directive: + directive = directive.replace(' ', ' ') + directive = _parse_directive(directive) + yield lno, line, directive, conditions + + if directive.kind in ('else', 'endif'): + conditions = _recompute_conditions(directive, ifstack) + elif isinstance(directive, IfDirective): + conditions = _recompute_conditions(directive, ifstack) + + +############################# +# running (platform-specific?) + +def _gcc(filename, *, + _get_argv=(lambda: _get_gcc_argv()), + _run=util.run_cmd, + ): + argv = _get_argv() + argv.extend([ + '-E', filename, + ]) + output = _run(argv) + return output + + +def _get_gcc_argv(*, + _open=open, + _run=util.run_cmd, + ): + with _open('/tmp/print.mk', 'w') as tmpfile: + tmpfile.write('print-%:\n') + #tmpfile.write('\t@echo $* = $($*)\n') + tmpfile.write('\t@echo $($*)\n') + argv = ['/usr/bin/make', + '-f', 'Makefile', + '-f', '/tmp/print.mk', + 'print-CC', + 'print-PY_CORE_CFLAGS', + ] + output = _run(argv) + gcc, cflags = output.strip().splitlines() + argv = shlex.split(gcc.strip()) + cflags = shlex.split(cflags.strip()) + return argv + cflags + + +def run(filename, *, + _gcc=_gcc, + ): + """Return the text of the given file after running the preprocessor.""" + return _gcc(filename) diff --git a/Tools/c-analyzer/c_parser/source.py b/Tools/c-analyzer/c_parser/source.py new file mode 100644 index 0000000..f8998c8 --- /dev/null +++ b/Tools/c-analyzer/c_parser/source.py @@ -0,0 +1,34 @@ +from . import preprocessor + + +def iter_clean_lines(lines): + incomment = False + for line in lines: + # Deal with comments. + if incomment: + _, sep, line = line.partition('*/') + if sep: + incomment = False + continue + line, _, _ = line.partition('//') + line, sep, remainder = line.partition('/*') + if sep: + _, sep, after = remainder.partition('*/') + if not sep: + incomment = True + continue + line += ' ' + after + + # Ignore blank lines and leading/trailing whitespace. + line = line.strip() + if not line: + continue + + yield line + + +def iter_lines(filename, *, + preprocess=preprocessor.run, + ): + content = preprocess(filename) + return iter(content.splitlines()) |