bpo-36876: Add a tool that identifies unsupported global C variables. (#15877)

author: Eric Snow <ericsnowcurrently@gmail.com> 2019-09-11 18:49:45 (GMT)
committer: GitHub <noreply@github.com> 2019-09-11 18:49:45 (GMT)
commit: ee536b2020b1f0baad1286dbd4345e13870324af (patch)
tree: 2486233603db05a76aaef863bd6639455e3dfef7 /Tools/c-analyzer/c_parser
parent: 9936371af298d465095ae70bc9c2943b4b16eac4 (diff)
download: cpython-ee536b2020b1f0baad1286dbd4345e13870324af.zip
cpython-ee536b2020b1f0baad1286dbd4345e13870324af.tar.gz
cpython-ee536b2020b1f0baad1286dbd4345e13870324af.tar.bz2
6 files changed, 1099 insertions, 0 deletions
diff --git a/Tools/c-analyzer/c_parser/__init__.py b/Tools/c-analyzer/c_parser/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/Tools/c-analyzer/c_parser/__init__.py
diff --git a/Tools/c-analyzer/c_parser/declarations.py b/Tools/c-analyzer/c_parser/declarations.py
new file mode 100644
index 0000000..19fa3ff
--- /dev/null
+++ b/Tools/c-analyzer/c_parser/declarations.py
@@ -0,0 +1,295 @@
+import re
+import shlex
+import subprocess
+
+from . import source
+
+
+IDENTIFIER = r'(?:[a-zA-z]|_+[a-zA-Z0-9]\w*)'
+
+TYPE_QUAL = r'(?:const|volatile)'
+
+VAR_TYPE_SPEC = r'''(?:
+        void |
+        (?:
+         (?:(?:un)?signed\s+)?
+         (?:
+          char |
+          short |
+          int |
+          long |
+          long\s+int |
+          long\s+long
+          ) |
+         ) |
+        float |
+        double |
+        {IDENTIFIER} |
+        (?:struct|union)\s+{IDENTIFIER}
+        )'''
+
+POINTER = rf'''(?:
+        (?:\s+const)?\s*[*]
+        )'''
+
+#STRUCT = r'''(?:
+#        (?:struct|(struct\s+%s))\s*[{]
+#            [^}]*
+#        [}]
+#        )''' % (IDENTIFIER)
+#UNION = r'''(?:
+#        (?:union|(union\s+%s))\s*[{]
+#            [^}]*
+#        [}]
+#        )''' % (IDENTIFIER)
+#DECL_SPEC = rf'''(?:
+#        ({VAR_TYPE_SPEC}) |
+#        ({STRUCT}) |
+#        ({UNION})
+#        )'''
+
+FUNC_START = rf'''(?:
+        (?:
+          (?:
+            extern |
+            static |
+            static\s+inline
+           )\s+
+         )?
+        #(?:const\s+)?
+        {VAR_TYPE_SPEC}
+        )'''
+#GLOBAL_VAR_START = rf'''(?:
+#        (?:
+#          (?:
+#            extern |
+#            static
+#           )\s+
+#         )?
+#        (?:
+#           {TYPE_QUAL}
+#           (?:\s+{TYPE_QUAL})?
+#         )?\s+
+#        {VAR_TYPE_SPEC}
+#        )'''
+GLOBAL_DECL_START_RE = re.compile(rf'''
+        ^
+        (?:
+            ({FUNC_START})
+         )
+        ''', re.VERBOSE)
+
+LOCAL_VAR_START = rf'''(?:
+        (?:
+          (?:
+            register |
+            static
+           )\s+
+         )?
+        (?:
+          (?:
+            {TYPE_QUAL}
+            (?:\s+{TYPE_QUAL})?
+           )\s+
+         )?
+        {VAR_TYPE_SPEC}
+        {POINTER}?
+        )'''
+LOCAL_STMT_START_RE = re.compile(rf'''
+        ^
+        (?:
+            ({LOCAL_VAR_START})
+         )
+        ''', re.VERBOSE)
+
+
+def iter_global_declarations(lines):
+    """Yield (decl, body) for each global declaration in the given lines.
+
+    For function definitions the header is reduced to one line and
+    the body is provided as-is.  For other compound declarations (e.g.
+    struct) the entire declaration is reduced to one line and "body"
+    is None.  Likewise for simple declarations (e.g. variables).
+
+    Declarations inside function bodies are ignored, though their text
+    is provided in the function body.
+    """
+    # XXX Bail out upon bogus syntax.
+    lines = source.iter_clean_lines(lines)
+    for line in lines:
+        if not GLOBAL_DECL_START_RE.match(line):
+            continue
+        # We only need functions here, since we only need locals for now.
+        if line.endswith(';'):
+            continue
+        if line.endswith('{') and '(' not in line:
+            continue
+
+        # Capture the function.
+        # (assume no func is a one-liner)
+        decl = line
+        while '{' not in line:  # assume no inline structs, etc.
+            try:
+                line = next(lines)
+            except StopIteration:
+                return
+            decl += ' ' + line
+
+        body, end = _extract_block(lines)
+        if end is None:
+            return
+        assert end == '}'
+        yield (f'{decl}\n{body}\n{end}', body)
+
+
+def iter_local_statements(lines):
+    """Yield (lines, blocks) for each statement in the given lines.
+
+    For simple statements, "blocks" is None and the statement is reduced
+    to a single line.  For compound statements, "blocks" is a pair of
+    (header, body) for each block in the statement.  The headers are
+    reduced to a single line each, but the bpdies are provided as-is.
+    """
+    # XXX Bail out upon bogus syntax.
+    lines = source.iter_clean_lines(lines)
+    for line in lines:
+        if not LOCAL_STMT_START_RE.match(line):
+            continue
+
+        stmt = line
+        blocks = None
+        if not line.endswith(';'):
+            # XXX Support compound & multiline simple statements.
+            #blocks = []
+            continue
+
+        yield (stmt, blocks)
+
+
+def _extract_block(lines):
+    end = None
+    depth = 1
+    body = []
+    for line in lines:
+        depth += line.count('{') - line.count('}')
+        if depth == 0:
+            end = line
+            break
+        body.append(line)
+    return '\n'.join(body), end
+
+
+def parse_func(stmt, body):
+    """Return (name, signature) for the given function definition."""
+    header, _, end = stmt.partition(body)
+    assert end.strip() == '}'
+    assert header.strip().endswith('{')
+    header, _, _= header.rpartition('{')
+
+    signature = ' '.join(header.strip().splitlines())
+
+    _, _, name = signature.split('(')[0].strip().rpartition(' ')
+    assert name
+
+    return name, signature
+
+
+def parse_var(stmt):
+    """Return (name, vartype) for the given variable declaration."""
+    stmt = stmt.rstrip(';')
+    m = LOCAL_STMT_START_RE.match(stmt)
+    assert m
+    vartype = m.group(0)
+    name = stmt[len(vartype):].partition('=')[0].strip()
+
+    if name.startswith('('):
+        name, _, after = name[1:].partition(')')
+        assert after
+        name = name.replace('*', '* ')
+        inside, _, name = name.strip().rpartition(' ')
+        vartype = f'{vartype} ({inside.strip()}){after}'
+    else:
+        name = name.replace('*', '* ')
+        before, _, name = name.rpartition(' ')
+        vartype = f'{vartype} {before}'
+
+    vartype = vartype.strip()
+    while '  ' in vartype:
+        vartype = vartype.replace('  ', ' ')
+
+    return name, vartype
+
+
+def parse_compound(stmt, blocks):
+    """Return (headers, bodies) for the given compound statement."""
+    # XXX Identify declarations inside compound statements
+    # (if/switch/for/while).
+    raise NotImplementedError
+
+
+def iter_variables(filename, *,
+                   _iter_source_lines=source.iter_lines,
+                   _iter_global=iter_global_declarations,
+                   _iter_local=iter_local_statements,
+                   _parse_func=parse_func,
+                   _parse_var=parse_var,
+                   _parse_compound=parse_compound,
+                   ):
+    """Yield (funcname, name, vartype) for every variable in the given file."""
+    lines = _iter_source_lines(filename)
+    for stmt, body in _iter_global(lines):
+        # At the file top-level we only have to worry about vars & funcs.
+        if not body:
+            name, vartype = _parse_var(stmt)
+            if name:
+                yield (None, name, vartype)
+        else:
+            funcname, _ = _parse_func(stmt, body)
+            localvars = _iter_locals(body,
+                                     _iter_statements=_iter_local,
+                                     _parse_var=_parse_var,
+                                     _parse_compound=_parse_compound,
+                                     )
+            for name, vartype in localvars:
+                yield (funcname, name, vartype)
+
+
+def _iter_locals(lines, *,
+                 _iter_statements=iter_local_statements,
+                 _parse_var=parse_var,
+                 _parse_compound=parse_compound,
+                 ):
+    compound = [lines]
+    while compound:
+        body = compound.pop(0)
+        bodylines = body.splitlines()
+        for stmt, blocks in _iter_statements(bodylines):
+            if not blocks:
+                name, vartype = _parse_var(stmt)
+                if name:
+                    yield (name, vartype)
+            else:
+                headers, bodies = _parse_compound(stmt, blocks)
+                for header in headers:
+                    for line in header:
+                        name, vartype = _parse_var(line)
+                        if name:
+                            yield (name, vartype)
+                compound.extend(bodies)
+
+
+def iter_all(dirnames):
+    """Yield a Declaration for each one found.
+
+    If there are duplicates, due to preprocessor conditionals, then
+    they are checked to make sure they are the same.
+    """
+    raise NotImplementedError
+
+
+def iter_preprocessed(dirnames):
+    """Yield a Declaration for each one found.
+
+    All source files are run through the preprocessor first.
+    """
+    raise NotImplementedError
diff --git a/Tools/c-analyzer/c_parser/info.py b/Tools/c-analyzer/c_parser/info.py
new file mode 100644
index 0000000..9ab6979
--- /dev/null
+++ b/Tools/c-analyzer/c_parser/info.py
@@ -0,0 +1,78 @@
+from collections import namedtuple
+
+from c_analyzer_common import info, util
+from c_analyzer_common.util import classonly, _NTBase
+
+
+def normalize_vartype(vartype):
+    """Return the canonical form for a variable type (or func signature)."""
+    # We allow empty strring through for semantic reasons.
+    if vartype is None:
+        return None
+
+    # XXX finish!
+    # XXX Return (modifiers, type, pointer)?
+    return str(vartype)
+
+
+class Variable(_NTBase,
+               namedtuple('Variable', 'id vartype')):
+    """Information about a single variable declaration."""
+
+    __slots__ = ()
+    _isglobal = util.Slot()
+
+    @classonly
+    def from_parts(cls, filename, funcname, name, vartype, isglobal=False):
+        id = info.ID(filename, funcname, name)
+        self = cls(id, vartype)
+        if isglobal:
+            self._isglobal = True
+        return self
+
+    def __new__(cls, id, vartype):
+        self = super().__new__(
+                cls,
+                id=info.ID.from_raw(id),
+                vartype=normalize_vartype(vartype) if vartype else None,
+                )
+        return self
+
+    def __hash__(self):
+        return hash(self.id)
+
+    def __getattr__(self, name):
+        return getattr(self.id, name)
+
+    def _validate_id(self):
+        if not self.id:
+            raise TypeError('missing id')
+
+        if not self.filename or self.filename == info.UNKNOWN:
+            raise TypeError(f'id missing filename ({self.id})')
+
+        if self.funcname and self.funcname == info.UNKNOWN:
+            raise TypeError(f'id missing funcname ({self.id})')
+
+        self.id.validate()
+
+    def validate(self):
+        """Fail if the object is invalid (i.e. init with bad data)."""
+        self._validate_id()
+
+        if self.vartype is None or self.vartype == info.UNKNOWN:
+            raise TypeError('missing vartype')
+
+    @property
+    def isglobal(self):
+        try:
+            return self._isglobal
+        except AttributeError:
+            # XXX Include extern variables.
+            # XXX Ignore functions.
+            self._isglobal = ('static' in self.vartype.split())
+            return self._isglobal
+
+    @property
+    def isconst(self):
+        return 'const' in self.vartype.split()
diff --git a/Tools/c-analyzer/c_parser/naive.py b/Tools/c-analyzer/c_parser/naive.py
new file mode 100644
index 0000000..e0370cc
--- /dev/null
+++ b/Tools/c-analyzer/c_parser/naive.py
@@ -0,0 +1,180 @@
+import re
+
+from c_analyzer_common.info import UNKNOWN
+
+from .info import Variable
+from .preprocessor import _iter_clean_lines
+
+
+_NOT_SET = object()
+
+
+def get_srclines(filename, *,
+                 cache=None,
+                 _open=open,
+                 _iter_lines=_iter_clean_lines,
+                 ):
+    """Return the file's lines as a list.
+
+    Each line will have trailing whitespace removed (including newline).
+
+    If a cache is given the it is used.
+    """
+    if cache is not None:
+        try:
+            return cache[filename]
+        except KeyError:
+            pass
+
+    with _open(filename) as srcfile:
+        srclines = [line
+                    for _, line in _iter_lines(srcfile)
+                    if not line.startswith('#')]
+    for i, line in enumerate(srclines):
+        srclines[i] = line.rstrip()
+
+    if cache is not None:
+        cache[filename] = srclines
+    return srclines
+
+
+def parse_variable_declaration(srcline):
+    """Return (name, decl) for the given declaration line."""
+    # XXX possible false negatives...
+    decl, sep, _ = srcline.partition('=')
+    if not sep:
+        if not srcline.endswith(';'):
+            return None, None
+        decl = decl.strip(';')
+    decl = decl.strip()
+    m = re.match(r'.*\b(\w+)\s*(?:\[[^\]]*\])?$', decl)
+    if not m:
+        return None, None
+    name = m.group(1)
+    return name, decl
+
+
+def parse_variable(srcline, funcname=None):
+    """Return a Variable for the variable declared on the line (or None)."""
+    line = srcline.strip()
+
+    # XXX Handle more than just static variables.
+    if line.startswith('static '):
+        if '(' in line and '[' not in line:
+            # a function
+            return None, None
+        return parse_variable_declaration(line)
+    else:
+        return None, None
+
+
+def iter_variables(filename, *,
+                   srccache=None,
+                   parse_variable=None,
+                   _get_srclines=get_srclines,
+                   _default_parse_variable=parse_variable,
+                   ):
+    """Yield a Variable for each in the given source file."""
+    if parse_variable is None:
+        parse_variable = _default_parse_variable
+
+    indent = ''
+    prev = ''
+    funcname = None
+    for line in _get_srclines(filename, cache=srccache):
+        # remember current funcname
+        if funcname:
+            if line == indent + '}':
+                funcname = None
+                continue
+        else:
+            if '(' in prev and line == indent + '{':
+                if not prev.startswith('__attribute__'):
+                    funcname = prev.split('(')[0].split()[-1]
+                    prev = ''
+                    continue
+            indent = line[:-len(line.lstrip())]
+            prev = line
+
+        info = parse_variable(line, funcname)
+        if isinstance(info, list):
+            for name, _funcname, decl in info:
+                yield Variable.from_parts(filename, _funcname, name, decl)
+            continue
+        name, decl = info
+
+        if name is None:
+            continue
+        yield Variable.from_parts(filename, funcname, name, decl)
+
+
+def _match_varid(variable, name, funcname, ignored=None):
+    if ignored and variable in ignored:
+        return False
+
+    if variable.name != name:
+        return False
+
+    if funcname == UNKNOWN:
+        if not variable.funcname:
+            return False
+    elif variable.funcname != funcname:
+        return False
+
+    return True
+
+
+def find_variable(filename, funcname, name, *,
+                  ignored=None,
+                  srccache=None,  # {filename: lines}
+                  parse_variable=None,
+                  _iter_variables=iter_variables,
+                  ):
+    """Return the matching variable.
+
+    Return None if the variable is not found.
+    """
+    for variable in _iter_variables(filename,
+                                    srccache=srccache,
+                                    parse_variable=parse_variable,
+                                    ):
+        if _match_varid(variable, name, funcname, ignored):
+            return variable
+    else:
+        return None
+
+
+def find_variables(varids, filenames=None, *,
+                   srccache=_NOT_SET,
+                   parse_variable=None,
+                   _find_symbol=find_variable,
+                   ):
+    """Yield a Variable for each ID.
+
+    If the variable is not found then its decl will be UNKNOWN.  That
+    way there will be one resulting Variable per given ID.
+    """
+    if srccache is _NOT_SET:
+        srccache = {}
+
+    used = set()
+    for varid in varids:
+        if varid.filename and varid.filename != UNKNOWN:
+            srcfiles = [varid.filename]
+        else:
+            if not filenames:
+                yield Variable(varid, UNKNOWN)
+                continue
+            srcfiles = filenames
+        for filename in srcfiles:
+            found = _find_varid(filename, varid.funcname, varid.name,
+                                 ignored=used,
+                                 srccache=srccache,
+                                 parse_variable=parse_variable,
+                                 )
+            if found:
+                yield found
+                used.add(found)
+                break
+        else:
+            yield Variable(varid, UNKNOWN)
diff --git a/Tools/c-analyzer/c_parser/preprocessor.py b/Tools/c-analyzer/c_parser/preprocessor.py
new file mode 100644
index 0000000..0e2866e
--- /dev/null
+++ b/Tools/c-analyzer/c_parser/preprocessor.py
@@ -0,0 +1,512 @@
+from collections import namedtuple
+import shlex
+import os
+import re
+
+from c_analyzer_common import util
+from . import info
+
+
+CONTINUATION = '\\' + os.linesep
+
+IDENTIFIER = r'(?:\w*[a-zA-Z]\w*)'
+IDENTIFIER_RE = re.compile('^' + IDENTIFIER + '$')
+
+
+def _coerce_str(value):
+    if not value:
+        return ''
+    return str(value).strip()
+
+
+#############################
+# directives
+
+DIRECTIVE_START = r'''
+    (?:
+      ^ \s*
+      [#] \s*
+      )'''
+DIRECTIVE_TEXT = r'''
+    (?:
+      (?: \s+ ( .*\S ) )?
+      \s* $
+      )'''
+DIRECTIVE = rf'''
+    (?:
+      {DIRECTIVE_START}
+      (
+        include |
+        error | warning |
+        pragma |
+        define | undef |
+        if | ifdef | ifndef | elseif | else | endif |
+        __FILE__ | __LINE__ | __DATE __ | __TIME__ | __TIMESTAMP__
+        )
+      {DIRECTIVE_TEXT}
+      )'''
+#       (?:
+#        [^\\\n] |
+#        \\ [^\n] |
+#        \\ \n
+#        )+
+#      ) \n
+#     )'''
+DIRECTIVE_RE = re.compile(DIRECTIVE, re.VERBOSE)
+
+DEFINE = rf'''
+    (?:
+      {DIRECTIVE_START} define \s+
+      (?:
+        ( \w*[a-zA-Z]\w* )
+        (?: \s* [(] ([^)]*) [)] )?
+        )
+      {DIRECTIVE_TEXT}
+      )'''
+DEFINE_RE = re.compile(DEFINE, re.VERBOSE)
+
+
+def parse_directive(line):
+    """Return the appropriate directive for the given line."""
+    line = line.strip()
+    if line.startswith('#'):
+        line = line[1:].lstrip()
+        line = '#' + line
+    directive = line
+    #directive = '#' + line
+    while '  ' in directive:
+        directive = directive.replace('  ', ' ')
+    return _parse_directive(directive)
+
+
+def _parse_directive(line):
+    m = DEFINE_RE.match(line)
+    if m:
+        name, args, text = m.groups()
+        if args:
+            args = [a.strip() for a in args.split(',')]
+            return Macro(name, args, text)
+        else:
+            return Constant(name, text)
+
+    m = DIRECTIVE_RE.match(line)
+    if not m:
+        raise ValueError(f'unsupported directive {line!r}')
+    kind, text = m.groups()
+    if not text:
+        if kind not in ('else', 'endif'):
+            raise ValueError(f'missing text in directive {line!r}')
+    elif kind in ('else', 'endif', 'define'):
+        raise ValueError(f'unexpected text in directive {line!r}')
+    if kind == 'include':
+        directive = Include(text)
+    elif kind in IfDirective.KINDS:
+        directive = IfDirective(kind, text)
+    else:
+        directive = OtherDirective(kind, text)
+    directive.validate()
+    return directive
+
+
+class PreprocessorDirective(util._NTBase):
+    """The base class for directives."""
+
+    __slots__ = ()
+
+    KINDS = frozenset([
+            'include',
+            'pragma',
+            'error', 'warning',
+            'define', 'undef',
+            'if', 'ifdef', 'ifndef', 'elseif', 'else', 'endif',
+            '__FILE__', '__DATE__', '__LINE__', '__TIME__', '__TIMESTAMP__',
+            ])
+
+    @property
+    def text(self):
+        return ' '.join(v for v in self[1:] if v and v.strip()) or None
+
+    def validate(self):
+        """Fail if the object is invalid (i.e. init with bad data)."""
+        super().validate()
+
+        if not self.kind:
+            raise TypeError('missing kind')
+        elif self.kind not in self.KINDS:
+            raise ValueError
+
+        # text can be anything, including None.
+
+
+class Constant(PreprocessorDirective,
+               namedtuple('Constant', 'kind name value')):
+    """A single "constant" directive ("define")."""
+
+    __slots__ = ()
+
+    def __new__(cls, name, value=None):
+        self = super().__new__(
+                cls,
+                'define',
+                name=_coerce_str(name) or None,
+                value=_coerce_str(value) or None,
+                )
+        return self
+
+    def validate(self):
+        """Fail if the object is invalid (i.e. init with bad data)."""
+        super().validate()
+
+        if not self.name:
+            raise TypeError('missing name')
+        elif not IDENTIFIER_RE.match(self.name):
+            raise ValueError(f'name must be identifier, got {self.name!r}')
+
+        # value can be anything, including None
+
+
+class Macro(PreprocessorDirective,
+            namedtuple('Macro', 'kind name args body')):
+    """A single "macro" directive ("define")."""
+
+    __slots__ = ()
+
+    def __new__(cls, name, args, body=None):
+        # "args" must be a string or an iterable of strings (or "empty").
+        if isinstance(args, str):
+            args = [v.strip() for v in args.split(',')]
+        if args:
+            args = tuple(_coerce_str(a) or None for a in args)
+        self = super().__new__(
+                cls,
+                kind='define',
+                name=_coerce_str(name) or None,
+                args=args if args else (),
+                body=_coerce_str(body) or None,
+                )
+        return self
+
+    @property
+    def text(self):
+        if self.body:
+            return f'{self.name}({", ".join(self.args)}) {self.body}'
+        else:
+            return f'{self.name}({", ".join(self.args)})'
+
+    def validate(self):
+        """Fail if the object is invalid (i.e. init with bad data)."""
+        super().validate()
+
+        if not self.name:
+            raise TypeError('missing name')
+        elif not IDENTIFIER_RE.match(self.name):
+            raise ValueError(f'name must be identifier, got {self.name!r}')
+
+        for arg in self.args:
+            if not arg:
+                raise ValueError(f'missing arg in {self.args}')
+            elif not IDENTIFIER_RE.match(arg):
+                raise ValueError(f'arg must be identifier, got {arg!r}')
+
+        # body can be anything, including None
+
+
+class IfDirective(PreprocessorDirective,
+                  namedtuple('IfDirective', 'kind condition')):
+    """A single conditional directive (e.g. "if", "ifdef").
+
+    This only includes directives that actually provide conditions.  The
+    related directives "else" and "endif" are covered by OtherDirective
+    instead.
+    """
+
+    __slots__ = ()
+
+    KINDS = frozenset([
+            'if',
+            'ifdef',
+            'ifndef',
+            'elseif',
+            ])
+
+    @classmethod
+    def _condition_from_raw(cls, raw, kind):
+        #return Condition.from_raw(raw, _kind=kind)
+        condition = _coerce_str(raw)
+        if not condition:
+            return None
+
+        if kind == 'ifdef':
+            condition = f'defined({condition})'
+        elif kind == 'ifndef':
+            condition = f'! defined({condition})'
+
+        return condition
+
+    def __new__(cls, kind, condition):
+        kind = _coerce_str(kind)
+        self = super().__new__(
+                cls,
+                kind=kind or None,
+                condition=cls._condition_from_raw(condition, kind),
+                )
+        return self
+
+    @property
+    def text(self):
+        if self.kind == 'ifdef':
+            return self.condition[8:-1]  # strip "defined("
+        elif self.kind == 'ifndef':
+            return self.condition[10:-1]  # strip "! defined("
+        else:
+            return self.condition
+        #return str(self.condition)
+
+    def validate(self):
+        """Fail if the object is invalid (i.e. init with bad data)."""
+        super().validate()
+
+        if not self.condition:
+            raise TypeError('missing condition')
+        #else:
+        #    for cond in self.condition:
+        #        if not cond:
+        #            raise ValueError(f'missing condition in {self.condition}')
+        #        cond.validate()
+        #    if self.kind in ('ifdef', 'ifndef'):
+        #        if len(self.condition) != 1:
+        #            raise ValueError('too many condition')
+        #        if self.kind == 'ifdef':
+        #            if not self.condition[0].startswith('defined '):
+        #                raise ValueError('bad condition')
+        #        else:
+        #            if not self.condition[0].startswith('! defined '):
+        #                raise ValueError('bad condition')
+
+
+class Include(PreprocessorDirective,
+              namedtuple('Include', 'kind file')):
+    """A single "include" directive.
+
+    Supported "file" values are either follow the bracket style
+    (<stdio>) or double quotes ("spam.h").
+    """
+
+    __slots__ = ()
+
+    def __new__(cls, file):
+        self = super().__new__(
+                cls,
+                kind='include',
+                file=_coerce_str(file) or None,
+                )
+        return self
+
+    def validate(self):
+        """Fail if the object is invalid (i.e. init with bad data)."""
+        super().validate()
+
+        if not self.file:
+            raise TypeError('missing file')
+
+
+class OtherDirective(PreprocessorDirective,
+                     namedtuple('OtherDirective', 'kind text')):
+    """A single directive not covered by another class.
+
+    This includes the "else", "endif", and "undef" directives, which are
+    otherwise inherently related to the directives covered by the
+    Constant, Macro, and IfCondition classes.
+
+    Note that all directives must have a text value, except for "else"
+    and "endif" (which must have no text).
+    """
+
+    __slots__ = ()
+
+    KINDS = PreprocessorDirective.KINDS - {'include', 'define'} - IfDirective.KINDS
+
+    def __new__(cls, kind, text):
+        self = super().__new__(
+                cls,
+                kind=_coerce_str(kind) or None,
+                text=_coerce_str(text) or None,
+                )
+        return self
+
+    def validate(self):
+        """Fail if the object is invalid (i.e. init with bad data)."""
+        super().validate()
+
+        if self.text:
+            if self.kind in ('else', 'endif'):
+                raise ValueError('unexpected text in directive')
+        elif self.kind not in ('else', 'endif'):
+            raise TypeError('missing text')
+
+
+#############################
+# iterating lines
+
+def _recompute_conditions(directive, ifstack):
+    if directive.kind in ('if', 'ifdef', 'ifndef'):
+        ifstack.append(
+                ([], directive.condition))
+    elif directive.kind == 'elseif':
+        if ifstack:
+            negated, active = ifstack.pop()
+            if active:
+                negated.append(active)
+        else:
+            negated = []
+        ifstack.append(
+                (negated, directive.condition))
+    elif directive.kind == 'else':
+        if ifstack:
+            negated, active = ifstack.pop()
+            if active:
+                negated.append(active)
+            ifstack.append(
+                    (negated, None))
+    elif directive.kind == 'endif':
+        if ifstack:
+            ifstack.pop()
+
+    conditions = []
+    for negated, active in ifstack:
+        for condition in negated:
+            conditions.append(f'! ({condition})')
+        if active:
+            conditions.append(active)
+    return tuple(conditions)
+
+
+def _iter_clean_lines(lines):
+    lines = iter(enumerate(lines, 1))
+    for lno, line in lines:
+        # Handle line continuations.
+        while line.endswith(CONTINUATION):
+            try:
+                lno, _line = next(lines)
+            except StopIteration:
+                break
+            line = line[:-len(CONTINUATION)] + ' ' + _line
+
+        # Deal with comments.
+        after = line
+        line = ''
+        while True:
+            # Look for a comment.
+            before, begin, remainder = after.partition('/*')
+            if '//' in before:
+                before, _, _ = before.partition('//')
+                line += before + ' '  # per the C99 spec
+                break
+            line += before
+            if not begin:
+                break
+            line += ' '  # per the C99 spec
+
+            # Go until we find the end of the comment.
+            _, end, after = remainder.partition('*/')
+            while not end:
+                try:
+                    lno, remainder = next(lines)
+                except StopIteration:
+                    raise Exception('unterminated comment')
+                _, end, after = remainder.partition('*/')
+
+        yield lno, line
+
+
+def iter_lines(lines, *,
+                   _iter_clean_lines=_iter_clean_lines,
+                   _parse_directive=_parse_directive,
+                   _recompute_conditions=_recompute_conditions,
+                   ):
+    """Yield (lno, line, directive, active conditions) for each given line.
+
+    This is effectively a subset of the operations taking place in
+    translation phases 2-4 from the C99 spec (ISO/IEC 9899:TC2); see
+    section 5.1.1.2.  Line continuations are removed and comments
+    replaced with a single space.  (In both cases "lno" will be the last
+    line involved.)  Otherwise each line is returned as-is.
+
+    "lno" is the (1-indexed) line number for the line.
+
+    "directive" will be a PreprocessorDirective or None, depending on
+    whether or not there is a directive on the line.
+
+    "active conditions" is the set of preprocessor conditions (e.g.
+    "defined()") under which the current line of code will be included
+    in compilation.  That set is derived from every conditional
+    directive block (e.g. "if defined()", "ifdef", "else") containing
+    that line.  That includes nested directives.  Note that the
+    current line does not affect the active conditions for iteself.
+    It only impacts subsequent lines.  That applies to directives
+    that close blocks (e.g. "endif") just as much as conditional
+    directvies.  Also note that "else" and "elseif" directives
+    update the active conditions (for later lines), rather than
+    adding to them.
+    """
+    ifstack = []
+    conditions = ()
+    for lno, line in _iter_clean_lines(lines):
+        stripped = line.strip()
+        if not stripped.startswith('#'):
+            yield lno, line, None, conditions
+            continue
+
+        directive = '#' + stripped[1:].lstrip()
+        while '  ' in directive:
+            directive = directive.replace('  ', ' ')
+        directive = _parse_directive(directive)
+        yield lno, line, directive, conditions
+
+        if directive.kind in ('else', 'endif'):
+            conditions = _recompute_conditions(directive, ifstack)
+        elif isinstance(directive, IfDirective):
+            conditions = _recompute_conditions(directive, ifstack)
+
+
+#############################
+# running (platform-specific?)
+
+def _gcc(filename, *,
+         _get_argv=(lambda: _get_gcc_argv()),
+         _run=util.run_cmd,
+         ):
+    argv = _get_argv()
+    argv.extend([
+            '-E', filename,
+            ])
+    output = _run(argv)
+    return output
+
+
+def _get_gcc_argv(*,
+                  _open=open,
+                  _run=util.run_cmd,
+                  ):
+    with _open('/tmp/print.mk', 'w') as tmpfile:
+        tmpfile.write('print-%:\n')
+        #tmpfile.write('\t@echo $* = $($*)\n')
+        tmpfile.write('\t@echo $($*)\n')
+    argv = ['/usr/bin/make',
+            '-f', 'Makefile',
+            '-f', '/tmp/print.mk',
+            'print-CC',
+            'print-PY_CORE_CFLAGS',
+            ]
+    output = _run(argv)
+    gcc, cflags = output.strip().splitlines()
+    argv = shlex.split(gcc.strip())
+    cflags = shlex.split(cflags.strip())
+    return argv + cflags
+
+
+def run(filename, *,
+        _gcc=_gcc,
+        ):
+    """Return the text of the given file after running the preprocessor."""
+    return _gcc(filename)
diff --git a/Tools/c-analyzer/c_parser/source.py b/Tools/c-analyzer/c_parser/source.py
new file mode 100644
index 0000000..f8998c8
--- /dev/null
+++ b/Tools/c-analyzer/c_parser/source.py
@@ -0,0 +1,34 @@
+from . import preprocessor
+
+
+def iter_clean_lines(lines):
+    incomment = False
+    for line in lines:
+        # Deal with comments.
+        if incomment:
+            _, sep, line = line.partition('*/')
+            if sep:
+                incomment = False
+            continue
+        line, _, _ = line.partition('//')
+        line, sep, remainder = line.partition('/*')
+        if sep:
+            _, sep, after = remainder.partition('*/')
+            if not sep:
+                incomment = True
+                continue
+            line += ' ' + after
+
+        # Ignore blank lines and leading/trailing whitespace.
+        line = line.strip()
+        if not line:
+            continue
+
+        yield line
+
+
+def iter_lines(filename, *,
+               preprocess=preprocessor.run,
+               ):
+    content = preprocess(filename)
+    return iter(content.splitlines())
author	Eric Snow <ericsnowcurrently@gmail.com>	2019-09-11 18:49:45 (GMT)
committer	GitHub <noreply@github.com>	2019-09-11 18:49:45 (GMT)
commit	ee536b2020b1f0baad1286dbd4345e13870324af (patch)
tree	2486233603db05a76aaef863bd6639455e3dfef7 /Tools/c-analyzer/c_parser
parent	9936371af298d465095ae70bc9c2943b4b16eac4 (diff)
download	cpython-ee536b2020b1f0baad1286dbd4345e13870324af.zip cpython-ee536b2020b1f0baad1286dbd4345e13870324af.tar.gz cpython-ee536b2020b1f0baad1286dbd4345e13870324af.tar.bz2