diff options
author | Eric Snow <ericsnowcurrently@gmail.com> | 2019-10-19 02:00:04 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-10-19 02:00:04 (GMT) |
commit | e4c431ecf50def40eb93c3969c1e4eeaf7bf32f1 (patch) | |
tree | 071224bbded262901b9742eb82c5d82d2f744fe1 /Tools/c-analyzer/c_analyzer/parser/preprocessor.py | |
parent | ea55c51bd937f6019c35b39b87029644e469c059 (diff) | |
download | cpython-e4c431ecf50def40eb93c3969c1e4eeaf7bf32f1.zip cpython-e4c431ecf50def40eb93c3969c1e4eeaf7bf32f1.tar.gz cpython-e4c431ecf50def40eb93c3969c1e4eeaf7bf32f1.tar.bz2 |
bpo-36876: Re-organize the c-analyzer tool code. (gh-16841)
This is partly a cleanup of the code. It also is preparation for getting the variables from the source (cross-platform) rather than from the symbols.
The change only touches the tool (and its tests).
Diffstat (limited to 'Tools/c-analyzer/c_analyzer/parser/preprocessor.py')
-rw-r--r-- | Tools/c-analyzer/c_analyzer/parser/preprocessor.py | 511 |
1 files changed, 511 insertions, 0 deletions
diff --git a/Tools/c-analyzer/c_analyzer/parser/preprocessor.py b/Tools/c-analyzer/c_analyzer/parser/preprocessor.py new file mode 100644 index 0000000..41f306e --- /dev/null +++ b/Tools/c-analyzer/c_analyzer/parser/preprocessor.py @@ -0,0 +1,511 @@ +from collections import namedtuple +import shlex +import os +import re + +from ..common import util, info + + +CONTINUATION = '\\' + os.linesep + +IDENTIFIER = r'(?:\w*[a-zA-Z]\w*)' +IDENTIFIER_RE = re.compile('^' + IDENTIFIER + '$') + + +def _coerce_str(value): + if not value: + return '' + return str(value).strip() + + +############################# +# directives + +DIRECTIVE_START = r''' + (?: + ^ \s* + [#] \s* + )''' +DIRECTIVE_TEXT = r''' + (?: + (?: \s+ ( .*\S ) )? + \s* $ + )''' +DIRECTIVE = rf''' + (?: + {DIRECTIVE_START} + ( + include | + error | warning | + pragma | + define | undef | + if | ifdef | ifndef | elseif | else | endif | + __FILE__ | __LINE__ | __DATE __ | __TIME__ | __TIMESTAMP__ + ) + {DIRECTIVE_TEXT} + )''' +# (?: +# [^\\\n] | +# \\ [^\n] | +# \\ \n +# )+ +# ) \n +# )''' +DIRECTIVE_RE = re.compile(DIRECTIVE, re.VERBOSE) + +DEFINE = rf''' + (?: + {DIRECTIVE_START} define \s+ + (?: + ( \w*[a-zA-Z]\w* ) + (?: \s* [(] ([^)]*) [)] )? + ) + {DIRECTIVE_TEXT} + )''' +DEFINE_RE = re.compile(DEFINE, re.VERBOSE) + + +def parse_directive(line): + """Return the appropriate directive for the given line.""" + line = line.strip() + if line.startswith('#'): + line = line[1:].lstrip() + line = '#' + line + directive = line + #directive = '#' + line + while ' ' in directive: + directive = directive.replace(' ', ' ') + return _parse_directive(directive) + + +def _parse_directive(line): + m = DEFINE_RE.match(line) + if m: + name, args, text = m.groups() + if args: + args = [a.strip() for a in args.split(',')] + return Macro(name, args, text) + else: + return Constant(name, text) + + m = DIRECTIVE_RE.match(line) + if not m: + raise ValueError(f'unsupported directive {line!r}') + kind, text = m.groups() + if not text: + if kind not in ('else', 'endif'): + raise ValueError(f'missing text in directive {line!r}') + elif kind in ('else', 'endif', 'define'): + raise ValueError(f'unexpected text in directive {line!r}') + if kind == 'include': + directive = Include(text) + elif kind in IfDirective.KINDS: + directive = IfDirective(kind, text) + else: + directive = OtherDirective(kind, text) + directive.validate() + return directive + + +class PreprocessorDirective(util._NTBase): + """The base class for directives.""" + + __slots__ = () + + KINDS = frozenset([ + 'include', + 'pragma', + 'error', 'warning', + 'define', 'undef', + 'if', 'ifdef', 'ifndef', 'elseif', 'else', 'endif', + '__FILE__', '__DATE__', '__LINE__', '__TIME__', '__TIMESTAMP__', + ]) + + @property + def text(self): + return ' '.join(v for v in self[1:] if v and v.strip()) or None + + def validate(self): + """Fail if the object is invalid (i.e. init with bad data).""" + super().validate() + + if not self.kind: + raise TypeError('missing kind') + elif self.kind not in self.KINDS: + raise ValueError + + # text can be anything, including None. + + +class Constant(PreprocessorDirective, + namedtuple('Constant', 'kind name value')): + """A single "constant" directive ("define").""" + + __slots__ = () + + def __new__(cls, name, value=None): + self = super().__new__( + cls, + 'define', + name=_coerce_str(name) or None, + value=_coerce_str(value) or None, + ) + return self + + def validate(self): + """Fail if the object is invalid (i.e. init with bad data).""" + super().validate() + + if not self.name: + raise TypeError('missing name') + elif not IDENTIFIER_RE.match(self.name): + raise ValueError(f'name must be identifier, got {self.name!r}') + + # value can be anything, including None + + +class Macro(PreprocessorDirective, + namedtuple('Macro', 'kind name args body')): + """A single "macro" directive ("define").""" + + __slots__ = () + + def __new__(cls, name, args, body=None): + # "args" must be a string or an iterable of strings (or "empty"). + if isinstance(args, str): + args = [v.strip() for v in args.split(',')] + if args: + args = tuple(_coerce_str(a) or None for a in args) + self = super().__new__( + cls, + kind='define', + name=_coerce_str(name) or None, + args=args if args else (), + body=_coerce_str(body) or None, + ) + return self + + @property + def text(self): + if self.body: + return f'{self.name}({", ".join(self.args)}) {self.body}' + else: + return f'{self.name}({", ".join(self.args)})' + + def validate(self): + """Fail if the object is invalid (i.e. init with bad data).""" + super().validate() + + if not self.name: + raise TypeError('missing name') + elif not IDENTIFIER_RE.match(self.name): + raise ValueError(f'name must be identifier, got {self.name!r}') + + for arg in self.args: + if not arg: + raise ValueError(f'missing arg in {self.args}') + elif not IDENTIFIER_RE.match(arg): + raise ValueError(f'arg must be identifier, got {arg!r}') + + # body can be anything, including None + + +class IfDirective(PreprocessorDirective, + namedtuple('IfDirective', 'kind condition')): + """A single conditional directive (e.g. "if", "ifdef"). + + This only includes directives that actually provide conditions. The + related directives "else" and "endif" are covered by OtherDirective + instead. + """ + + __slots__ = () + + KINDS = frozenset([ + 'if', + 'ifdef', + 'ifndef', + 'elseif', + ]) + + @classmethod + def _condition_from_raw(cls, raw, kind): + #return Condition.from_raw(raw, _kind=kind) + condition = _coerce_str(raw) + if not condition: + return None + + if kind == 'ifdef': + condition = f'defined({condition})' + elif kind == 'ifndef': + condition = f'! defined({condition})' + + return condition + + def __new__(cls, kind, condition): + kind = _coerce_str(kind) + self = super().__new__( + cls, + kind=kind or None, + condition=cls._condition_from_raw(condition, kind), + ) + return self + + @property + def text(self): + if self.kind == 'ifdef': + return self.condition[8:-1] # strip "defined(" + elif self.kind == 'ifndef': + return self.condition[10:-1] # strip "! defined(" + else: + return self.condition + #return str(self.condition) + + def validate(self): + """Fail if the object is invalid (i.e. init with bad data).""" + super().validate() + + if not self.condition: + raise TypeError('missing condition') + #else: + # for cond in self.condition: + # if not cond: + # raise ValueError(f'missing condition in {self.condition}') + # cond.validate() + # if self.kind in ('ifdef', 'ifndef'): + # if len(self.condition) != 1: + # raise ValueError('too many condition') + # if self.kind == 'ifdef': + # if not self.condition[0].startswith('defined '): + # raise ValueError('bad condition') + # else: + # if not self.condition[0].startswith('! defined '): + # raise ValueError('bad condition') + + +class Include(PreprocessorDirective, + namedtuple('Include', 'kind file')): + """A single "include" directive. + + Supported "file" values are either follow the bracket style + (<stdio>) or double quotes ("spam.h"). + """ + + __slots__ = () + + def __new__(cls, file): + self = super().__new__( + cls, + kind='include', + file=_coerce_str(file) or None, + ) + return self + + def validate(self): + """Fail if the object is invalid (i.e. init with bad data).""" + super().validate() + + if not self.file: + raise TypeError('missing file') + + +class OtherDirective(PreprocessorDirective, + namedtuple('OtherDirective', 'kind text')): + """A single directive not covered by another class. + + This includes the "else", "endif", and "undef" directives, which are + otherwise inherently related to the directives covered by the + Constant, Macro, and IfCondition classes. + + Note that all directives must have a text value, except for "else" + and "endif" (which must have no text). + """ + + __slots__ = () + + KINDS = PreprocessorDirective.KINDS - {'include', 'define'} - IfDirective.KINDS + + def __new__(cls, kind, text): + self = super().__new__( + cls, + kind=_coerce_str(kind) or None, + text=_coerce_str(text) or None, + ) + return self + + def validate(self): + """Fail if the object is invalid (i.e. init with bad data).""" + super().validate() + + if self.text: + if self.kind in ('else', 'endif'): + raise ValueError('unexpected text in directive') + elif self.kind not in ('else', 'endif'): + raise TypeError('missing text') + + +############################# +# iterating lines + +def _recompute_conditions(directive, ifstack): + if directive.kind in ('if', 'ifdef', 'ifndef'): + ifstack.append( + ([], directive.condition)) + elif directive.kind == 'elseif': + if ifstack: + negated, active = ifstack.pop() + if active: + negated.append(active) + else: + negated = [] + ifstack.append( + (negated, directive.condition)) + elif directive.kind == 'else': + if ifstack: + negated, active = ifstack.pop() + if active: + negated.append(active) + ifstack.append( + (negated, None)) + elif directive.kind == 'endif': + if ifstack: + ifstack.pop() + + conditions = [] + for negated, active in ifstack: + for condition in negated: + conditions.append(f'! ({condition})') + if active: + conditions.append(active) + return tuple(conditions) + + +def _iter_clean_lines(lines): + lines = iter(enumerate(lines, 1)) + for lno, line in lines: + # Handle line continuations. + while line.endswith(CONTINUATION): + try: + lno, _line = next(lines) + except StopIteration: + break + line = line[:-len(CONTINUATION)] + ' ' + _line + + # Deal with comments. + after = line + line = '' + while True: + # Look for a comment. + before, begin, remainder = after.partition('/*') + if '//' in before: + before, _, _ = before.partition('//') + line += before + ' ' # per the C99 spec + break + line += before + if not begin: + break + line += ' ' # per the C99 spec + + # Go until we find the end of the comment. + _, end, after = remainder.partition('*/') + while not end: + try: + lno, remainder = next(lines) + except StopIteration: + raise Exception('unterminated comment') + _, end, after = remainder.partition('*/') + + yield lno, line + + +def iter_lines(lines, *, + _iter_clean_lines=_iter_clean_lines, + _parse_directive=_parse_directive, + _recompute_conditions=_recompute_conditions, + ): + """Yield (lno, line, directive, active conditions) for each given line. + + This is effectively a subset of the operations taking place in + translation phases 2-4 from the C99 spec (ISO/IEC 9899:TC2); see + section 5.1.1.2. Line continuations are removed and comments + replaced with a single space. (In both cases "lno" will be the last + line involved.) Otherwise each line is returned as-is. + + "lno" is the (1-indexed) line number for the line. + + "directive" will be a PreprocessorDirective or None, depending on + whether or not there is a directive on the line. + + "active conditions" is the set of preprocessor conditions (e.g. + "defined()") under which the current line of code will be included + in compilation. That set is derived from every conditional + directive block (e.g. "if defined()", "ifdef", "else") containing + that line. That includes nested directives. Note that the + current line does not affect the active conditions for iteself. + It only impacts subsequent lines. That applies to directives + that close blocks (e.g. "endif") just as much as conditional + directvies. Also note that "else" and "elseif" directives + update the active conditions (for later lines), rather than + adding to them. + """ + ifstack = [] + conditions = () + for lno, line in _iter_clean_lines(lines): + stripped = line.strip() + if not stripped.startswith('#'): + yield lno, line, None, conditions + continue + + directive = '#' + stripped[1:].lstrip() + while ' ' in directive: + directive = directive.replace(' ', ' ') + directive = _parse_directive(directive) + yield lno, line, directive, conditions + + if directive.kind in ('else', 'endif'): + conditions = _recompute_conditions(directive, ifstack) + elif isinstance(directive, IfDirective): + conditions = _recompute_conditions(directive, ifstack) + + +############################# +# running (platform-specific?) + +def _gcc(filename, *, + _get_argv=(lambda: _get_gcc_argv()), + _run=util.run_cmd, + ): + argv = _get_argv() + argv.extend([ + '-E', filename, + ]) + output = _run(argv) + return output + + +def _get_gcc_argv(*, + _open=open, + _run=util.run_cmd, + ): + with _open('/tmp/print.mk', 'w') as tmpfile: + tmpfile.write('print-%:\n') + #tmpfile.write('\t@echo $* = $($*)\n') + tmpfile.write('\t@echo $($*)\n') + argv = ['/usr/bin/make', + '-f', 'Makefile', + '-f', '/tmp/print.mk', + 'print-CC', + 'print-PY_CORE_CFLAGS', + ] + output = _run(argv) + gcc, cflags = output.strip().splitlines() + argv = shlex.split(gcc.strip()) + cflags = shlex.split(cflags.strip()) + return argv + cflags + + +def run(filename, *, + _gcc=_gcc, + ): + """Return the text of the given file after running the preprocessor.""" + return _gcc(filename) |