summaryrefslogtreecommitdiffstats
path: root/Tools/c-analyzer/c_parser/preprocessor
diff options
context:
space:
mode:
authorEric Snow <ericsnowcurrently@gmail.com>2020-10-23 00:42:51 (GMT)
committerGitHub <noreply@github.com>2020-10-23 00:42:51 (GMT)
commit345cd37abe324ad4f60f80e2c3133b8849e54e9b (patch)
tree5d965e662dca9dcac19e7eddd63a3d9d0b816fed /Tools/c-analyzer/c_parser/preprocessor
parentec388cfb4ede56dace2bb78851ff6f38fa2a6abe (diff)
downloadcpython-345cd37abe324ad4f60f80e2c3133b8849e54e9b.zip
cpython-345cd37abe324ad4f60f80e2c3133b8849e54e9b.tar.gz
cpython-345cd37abe324ad4f60f80e2c3133b8849e54e9b.tar.bz2
bpo-36876: Fix the C analyzer tool. (GH-22841)
The original tool wasn't working right and it was simpler to create a new one, partially re-using some of the old code. At this point the tool runs properly on the master. (Try: ./python Tools/c-analyzer/c-analyzer.py analyze.) It take ~40 seconds on my machine to analyze the full CPython code base. Note that we'll need to iron out some OS-specific stuff (e.g. preprocessor). We're okay though since this tool isn't used yet in our workflow. We will also need to verify the analysis results in detail before activating the check in CI, though I'm pretty sure it's close. https://bugs.python.org/issue36876
Diffstat (limited to 'Tools/c-analyzer/c_parser/preprocessor')
-rw-r--r--Tools/c-analyzer/c_parser/preprocessor/__init__.py190
-rw-r--r--Tools/c-analyzer/c_parser/preprocessor/__main__.py196
-rw-r--r--Tools/c-analyzer/c_parser/preprocessor/common.py173
-rw-r--r--Tools/c-analyzer/c_parser/preprocessor/errors.py110
-rw-r--r--Tools/c-analyzer/c_parser/preprocessor/gcc.py123
-rw-r--r--Tools/c-analyzer/c_parser/preprocessor/pure.py23
6 files changed, 815 insertions, 0 deletions
diff --git a/Tools/c-analyzer/c_parser/preprocessor/__init__.py b/Tools/c-analyzer/c_parser/preprocessor/__init__.py
new file mode 100644
index 0000000..f206f69
--- /dev/null
+++ b/Tools/c-analyzer/c_parser/preprocessor/__init__.py
@@ -0,0 +1,190 @@
+import contextlib
+import distutils.ccompiler
+import logging
+import os.path
+
+from c_common.fsutil import match_glob as _match_glob
+from c_common.tables import parse_table as _parse_table
+from ..source import (
+ resolve as _resolve_source,
+ good_file as _good_file,
+)
+from . import errors as _errors
+from . import (
+ pure as _pure,
+ gcc as _gcc,
+)
+
+
+logger = logging.getLogger(__name__)
+
+
+# Supprted "source":
+# * filename (string)
+# * lines (iterable)
+# * text (string)
+# Supported return values:
+# * iterator of SourceLine
+# * sequence of SourceLine
+# * text (string)
+# * something that combines all those
+# XXX Add the missing support from above.
+# XXX Add more low-level functions to handle permutations?
+
+def preprocess(source, *,
+ incldirs=None,
+ macros=None,
+ samefiles=None,
+ filename=None,
+ tool=True,
+ ):
+ """...
+
+ CWD should be the project root and "source" should be relative.
+ """
+ if tool:
+ logger.debug(f'CWD: {os.getcwd()!r}')
+ logger.debug(f'incldirs: {incldirs!r}')
+ logger.debug(f'macros: {macros!r}')
+ logger.debug(f'samefiles: {samefiles!r}')
+ _preprocess = _get_preprocessor(tool)
+ with _good_file(source, filename) as source:
+ return _preprocess(source, incldirs, macros, samefiles) or ()
+ else:
+ source, filename = _resolve_source(source, filename)
+ # We ignore "includes", "macros", etc.
+ return _pure.preprocess(source, filename)
+
+ # if _run() returns just the lines:
+# text = _run(source)
+# lines = [line + os.linesep for line in text.splitlines()]
+# lines[-1] = lines[-1].splitlines()[0]
+#
+# conditions = None
+# for lno, line in enumerate(lines, 1):
+# kind = 'source'
+# directive = None
+# data = line
+# yield lno, kind, data, conditions
+
+
+def get_preprocessor(*,
+ file_macros=None,
+ file_incldirs=None,
+ file_same=None,
+ ignore_exc=False,
+ log_err=None,
+ ):
+ _preprocess = preprocess
+ if file_macros:
+ file_macros = tuple(_parse_macros(file_macros))
+ if file_incldirs:
+ file_incldirs = tuple(_parse_incldirs(file_incldirs))
+ if file_same:
+ file_same = tuple(file_same)
+ if not callable(ignore_exc):
+ ignore_exc = (lambda exc, _ig=ignore_exc: _ig)
+
+ def get_file_preprocessor(filename):
+ filename = filename.strip()
+ if file_macros:
+ macros = list(_resolve_file_values(filename, file_macros))
+ if file_incldirs:
+ incldirs = [v for v, in _resolve_file_values(filename, file_incldirs)]
+
+ def preprocess(**kwargs):
+ if file_macros and 'macros' not in kwargs:
+ kwargs['macros'] = macros
+ if file_incldirs and 'incldirs' not in kwargs:
+ kwargs['incldirs'] = [v for v, in _resolve_file_values(filename, file_incldirs)]
+ if file_same and 'file_same' not in kwargs:
+ kwargs['samefiles'] = file_same
+ kwargs.setdefault('filename', filename)
+ with handling_errors(ignore_exc, log_err=log_err):
+ return _preprocess(filename, **kwargs)
+ return preprocess
+ return get_file_preprocessor
+
+
+def _resolve_file_values(filename, file_values):
+ # We expect the filename and all patterns to be absolute paths.
+ for pattern, *value in file_values or ():
+ if _match_glob(filename, pattern):
+ yield value
+
+
+def _parse_macros(macros):
+ for row, srcfile in _parse_table(macros, '\t', 'glob\tname\tvalue', rawsep='=', default=None):
+ yield row
+
+
+def _parse_incldirs(incldirs):
+ for row, srcfile in _parse_table(incldirs, '\t', 'glob\tdirname', default=None):
+ glob, dirname = row
+ if dirname is None:
+ # Match all files.
+ dirname = glob
+ row = ('*', dirname.strip())
+ yield row
+
+
+@contextlib.contextmanager
+def handling_errors(ignore_exc=None, *, log_err=None):
+ try:
+ yield
+ except _errors.OSMismatchError as exc:
+ if not ignore_exc(exc):
+ raise # re-raise
+ if log_err is not None:
+ log_err(f'<OS mismatch (expected {" or ".join(exc.expected)})>')
+ return None
+ except _errors.MissingDependenciesError as exc:
+ if not ignore_exc(exc):
+ raise # re-raise
+ if log_err is not None:
+ log_err(f'<missing dependency {exc.missing}')
+ return None
+ except _errors.ErrorDirectiveError as exc:
+ if not ignore_exc(exc):
+ raise # re-raise
+ if log_err is not None:
+ log_err(exc)
+ return None
+
+
+##################################
+# tools
+
+_COMPILERS = {
+ # matching disutils.ccompiler.compiler_class:
+ 'unix': _gcc.preprocess,
+ 'msvc': None,
+ 'cygwin': None,
+ 'mingw32': None,
+ 'bcpp': None,
+ # aliases/extras:
+ 'gcc': _gcc.preprocess,
+ 'clang': None,
+}
+
+
+def _get_preprocessor(tool):
+ if tool is True:
+ tool = distutils.ccompiler.get_default_compiler()
+ preprocess = _COMPILERS.get(tool)
+ if preprocess is None:
+ raise ValueError(f'unsupported tool {tool}')
+ return preprocess
+
+
+##################################
+# aliases
+
+from .errors import (
+ PreprocessorError,
+ PreprocessorFailure,
+ ErrorDirectiveError,
+ MissingDependenciesError,
+ OSMismatchError,
+)
+from .common import FileInfo, SourceLine
diff --git a/Tools/c-analyzer/c_parser/preprocessor/__main__.py b/Tools/c-analyzer/c_parser/preprocessor/__main__.py
new file mode 100644
index 0000000..a605430
--- /dev/null
+++ b/Tools/c-analyzer/c_parser/preprocessor/__main__.py
@@ -0,0 +1,196 @@
+import logging
+import sys
+
+from c_common.scriptutil import (
+ CLIArgSpec as Arg,
+ add_verbosity_cli,
+ add_traceback_cli,
+ add_kind_filtering_cli,
+ add_files_cli,
+ add_failure_filtering_cli,
+ add_commands_cli,
+ process_args_by_key,
+ configure_logger,
+ get_prog,
+ main_for_filenames,
+)
+from . import (
+ errors as _errors,
+ get_preprocessor as _get_preprocessor,
+)
+
+
+FAIL = {
+ 'err': _errors.ErrorDirectiveError,
+ 'deps': _errors.MissingDependenciesError,
+ 'os': _errors.OSMismatchError,
+}
+FAIL_DEFAULT = tuple(v for v in FAIL if v != 'os')
+
+
+logger = logging.getLogger(__name__)
+
+
+##################################
+# CLI helpers
+
+def add_common_cli(parser, *, get_preprocessor=_get_preprocessor):
+ parser.add_argument('--macros', action='append')
+ parser.add_argument('--incldirs', action='append')
+ parser.add_argument('--same', action='append')
+ process_fail_arg = add_failure_filtering_cli(parser, FAIL)
+
+ def process_args(args):
+ ns = vars(args)
+
+ process_fail_arg(args)
+ ignore_exc = ns.pop('ignore_exc')
+ # We later pass ignore_exc to _get_preprocessor().
+
+ args.get_file_preprocessor = get_preprocessor(
+ file_macros=ns.pop('macros'),
+ file_incldirs=ns.pop('incldirs'),
+ file_same=ns.pop('same'),
+ ignore_exc=ignore_exc,
+ log_err=print,
+ )
+ return process_args
+
+
+def _iter_preprocessed(filename, *,
+ get_preprocessor,
+ match_kind=None,
+ pure=False,
+ ):
+ preprocess = get_preprocessor(filename)
+ for line in preprocess(tool=not pure) or ():
+ if match_kind is not None and not match_kind(line.kind):
+ continue
+ yield line
+
+
+#######################################
+# the commands
+
+def _cli_preprocess(parser, excluded=None, **prepr_kwargs):
+ parser.add_argument('--pure', action='store_true')
+ parser.add_argument('--no-pure', dest='pure', action='store_const', const=False)
+ process_kinds = add_kind_filtering_cli(parser)
+ process_common = add_common_cli(parser, **prepr_kwargs)
+ parser.add_argument('--raw', action='store_true')
+ process_files = add_files_cli(parser, excluded=excluded)
+
+ return [
+ process_kinds,
+ process_common,
+ process_files,
+ ]
+
+
+def cmd_preprocess(filenames, *,
+ raw=False,
+ iter_filenames=None,
+ **kwargs
+ ):
+ if 'get_file_preprocessor' not in kwargs:
+ kwargs['get_file_preprocessor'] = _get_preprocessor()
+ if raw:
+ def show_file(filename, lines):
+ for line in lines:
+ print(line)
+ #print(line.raw)
+ else:
+ def show_file(filename, lines):
+ for line in lines:
+ linefile = ''
+ if line.filename != filename:
+ linefile = f' ({line.filename})'
+ text = line.data
+ if line.kind == 'comment':
+ text = '/* ' + line.data.splitlines()[0]
+ text += ' */' if '\n' in line.data else r'\n... */'
+ print(f' {line.lno:>4} {line.kind:10} | {text}')
+
+ filenames = main_for_filenames(filenames, iter_filenames)
+ for filename in filenames:
+ lines = _iter_preprocessed(filename, **kwargs)
+ show_file(filename, lines)
+
+
+def _cli_data(parser):
+ ...
+
+ return None
+
+
+def cmd_data(filenames,
+ **kwargs
+ ):
+ # XXX
+ raise NotImplementedError
+
+
+COMMANDS = {
+ 'preprocess': (
+ 'preprocess the given C source & header files',
+ [_cli_preprocess],
+ cmd_preprocess,
+ ),
+ 'data': (
+ 'check/manage local data (e.g. excludes, macros)',
+ [_cli_data],
+ cmd_data,
+ ),
+}
+
+
+#######################################
+# the script
+
+def parse_args(argv=sys.argv[1:], prog=sys.argv[0], *,
+ subset='preprocess',
+ excluded=None,
+ **prepr_kwargs
+ ):
+ import argparse
+ parser = argparse.ArgumentParser(
+ prog=prog or get_prog(),
+ )
+
+ processors = add_commands_cli(
+ parser,
+ commands={k: v[1] for k, v in COMMANDS.items()},
+ commonspecs=[
+ add_verbosity_cli,
+ add_traceback_cli,
+ ],
+ subset=subset,
+ )
+
+ args = parser.parse_args(argv)
+ ns = vars(args)
+
+ cmd = ns.pop('cmd')
+
+ verbosity, traceback_cm = process_args_by_key(
+ args,
+ processors[cmd],
+ ['verbosity', 'traceback_cm'],
+ )
+
+ return cmd, ns, verbosity, traceback_cm
+
+
+def main(cmd, cmd_kwargs):
+ try:
+ run_cmd = COMMANDS[cmd][0]
+ except KeyError:
+ raise ValueError(f'unsupported cmd {cmd!r}')
+ run_cmd(**cmd_kwargs)
+
+
+if __name__ == '__main__':
+ cmd, cmd_kwargs, verbosity, traceback_cm = parse_args()
+ configure_logger(verbosity)
+ with traceback_cm:
+ main(cmd, cmd_kwargs)
diff --git a/Tools/c-analyzer/c_parser/preprocessor/common.py b/Tools/c-analyzer/c_parser/preprocessor/common.py
new file mode 100644
index 0000000..6368102
--- /dev/null
+++ b/Tools/c-analyzer/c_parser/preprocessor/common.py
@@ -0,0 +1,173 @@
+import contextlib
+import distutils.ccompiler
+import logging
+import shlex
+import subprocess
+import sys
+
+from ..info import FileInfo, SourceLine
+from .errors import (
+ PreprocessorFailure,
+ ErrorDirectiveError,
+ MissingDependenciesError,
+ OSMismatchError,
+)
+
+
+logger = logging.getLogger(__name__)
+
+
+# XXX Add aggregate "source" class(es)?
+# * expose all lines as single text string
+# * expose all lines as sequence
+# * iterate all lines
+
+
+def run_cmd(argv, *,
+ #capture_output=True,
+ stdout=subprocess.PIPE,
+ #stderr=subprocess.STDOUT,
+ stderr=subprocess.PIPE,
+ text=True,
+ check=True,
+ **kwargs
+ ):
+ if isinstance(stderr, str) and stderr.lower() == 'stdout':
+ stderr = subprocess.STDOUT
+
+ kw = dict(locals())
+ kw.pop('argv')
+ kw.pop('kwargs')
+ kwargs.update(kw)
+
+ proc = subprocess.run(argv, **kwargs)
+ return proc.stdout
+
+
+def preprocess(tool, filename, **kwargs):
+ argv = _build_argv(tool, filename, **kwargs)
+ logger.debug(' '.join(shlex.quote(v) for v in argv))
+
+ # Make sure the OS is supported for this file.
+ if (_expected := is_os_mismatch(filename)):
+ error = None
+ raise OSMismatchError(filename, _expected, argv, error, TOOL)
+
+ # Run the command.
+ with converted_error(tool, argv, filename):
+ # We use subprocess directly here, instead of calling the
+ # distutil compiler object's preprocess() method, since that
+ # one writes to stdout/stderr and it's simpler to do it directly
+ # through subprocess.
+ return run_cmd(argv)
+
+
+def _build_argv(
+ tool,
+ filename,
+ incldirs=None,
+ macros=None,
+ preargs=None,
+ postargs=None,
+ executable=None,
+ compiler=None,
+):
+ compiler = distutils.ccompiler.new_compiler(
+ compiler=compiler or tool,
+ )
+ if executable:
+ compiler.set_executable('preprocessor', executable)
+
+ argv = None
+ def _spawn(_argv):
+ nonlocal argv
+ argv = _argv
+ compiler.spawn = _spawn
+ compiler.preprocess(
+ filename,
+ macros=[tuple(v) for v in macros or ()],
+ include_dirs=incldirs or (),
+ extra_preargs=preargs or (),
+ extra_postargs=postargs or (),
+ )
+ return argv
+
+
+@contextlib.contextmanager
+def converted_error(tool, argv, filename):
+ try:
+ yield
+ except subprocess.CalledProcessError as exc:
+ convert_error(
+ tool,
+ argv,
+ filename,
+ exc.stderr,
+ exc.returncode,
+ )
+
+
+def convert_error(tool, argv, filename, stderr, rc):
+ error = (stderr.splitlines()[0], rc)
+ if (_expected := is_os_mismatch(filename, stderr)):
+ logger.debug(stderr.strip())
+ raise OSMismatchError(filename, _expected, argv, error, tool)
+ elif (_missing := is_missing_dep(stderr)):
+ logger.debug(stderr.strip())
+ raise MissingDependenciesError(filename, (_missing,), argv, error, tool)
+ elif '#error' in stderr:
+ # XXX Ignore incompatible files.
+ error = (stderr.splitlines()[1], rc)
+ logger.debug(stderr.strip())
+ raise ErrorDirectiveError(filename, argv, error, tool)
+ else:
+ # Try one more time, with stderr written to the terminal.
+ try:
+ output = run_cmd(argv, stderr=None)
+ except subprocess.CalledProcessError:
+ raise PreprocessorFailure(filename, argv, error, tool)
+
+
+def is_os_mismatch(filename, errtext=None):
+ # See: https://docs.python.org/3/library/sys.html#sys.platform
+ actual = sys.platform
+ if actual == 'unknown':
+ raise NotImplementedError
+
+ if errtext is not None:
+ if (missing := is_missing_dep(errtext)):
+ matching = get_matching_oses(missing, filename)
+ if actual not in matching:
+ return matching
+ return False
+
+
+def get_matching_oses(missing, filename):
+ # OSX
+ if 'darwin' in filename or 'osx' in filename:
+ return ('darwin',)
+ elif missing == 'SystemConfiguration/SystemConfiguration.h':
+ return ('darwin',)
+
+ # Windows
+ elif missing in ('windows.h', 'winsock2.h'):
+ return ('win32',)
+
+ # other
+ elif missing == 'sys/ldr.h':
+ return ('aix',)
+ elif missing == 'dl.h':
+ # XXX The existence of Python/dynload_dl.c implies others...
+ # Note that hpux isn't actual supported any more.
+ return ('hpux', '???')
+
+ # unrecognized
+ else:
+ return ()
+
+
+def is_missing_dep(errtext):
+ if 'No such file or directory' in errtext:
+ missing = errtext.split(': No such file or directory')[0].split()[-1]
+ return missing
+ return False
diff --git a/Tools/c-analyzer/c_parser/preprocessor/errors.py b/Tools/c-analyzer/c_parser/preprocessor/errors.py
new file mode 100644
index 0000000..9b66801
--- /dev/null
+++ b/Tools/c-analyzer/c_parser/preprocessor/errors.py
@@ -0,0 +1,110 @@
+import sys
+
+
+OS = sys.platform
+
+
+def _as_tuple(items):
+ if isinstance(items, str):
+ return tuple(items.strip().replace(',', ' ').split())
+ elif items:
+ return tuple(items)
+ else:
+ return ()
+
+
+class PreprocessorError(Exception):
+ """Something preprocessor-related went wrong."""
+
+ @classmethod
+ def _msg(cls, filename, reason, **ignored):
+ msg = 'failure while preprocessing'
+ if reason:
+ msg = f'{msg} ({reason})'
+ return msg
+
+ def __init__(self, filename, preprocessor=None, reason=None):
+ if isinstance(reason, str):
+ reason = reason.strip()
+
+ self.filename = filename
+ self.preprocessor = preprocessor or None
+ self.reason = str(reason) if reason else None
+
+ msg = self._msg(**vars(self))
+ msg = f'({filename}) {msg}'
+ if preprocessor:
+ msg = f'[{preprocessor}] {msg}'
+ super().__init__(msg)
+
+
+class PreprocessorFailure(PreprocessorError):
+ """The preprocessor command failed."""
+
+ @classmethod
+ def _msg(cls, error, **ignored):
+ msg = 'preprocessor command failed'
+ if error:
+ msg = f'{msg} {error}'
+ return msg
+
+ def __init__(self, filename, argv, error=None, preprocessor=None):
+ exitcode = -1
+ if isinstance(error, tuple):
+ if len(error) == 2:
+ error, exitcode = error
+ else:
+ error = str(error)
+ if isinstance(error, str):
+ error = error.strip()
+
+ self.argv = _as_tuple(argv) or None
+ self.error = error if error else None
+ self.exitcode = exitcode
+
+ reason = str(self.error)
+ super().__init__(filename, preprocessor, reason)
+
+
+class ErrorDirectiveError(PreprocessorFailure):
+ """The file hit a #error directive."""
+
+ @classmethod
+ def _msg(cls, error, **ignored):
+ return f'#error directive hit ({error})'
+
+ def __init__(self, filename, argv, error, *args, **kwargs):
+ super().__init__(filename, argv, error, *args, **kwargs)
+
+
+class MissingDependenciesError(PreprocessorFailure):
+ """The preprocessor did not have access to all the target's dependencies."""
+
+ @classmethod
+ def _msg(cls, missing, **ignored):
+ msg = 'preprocessing failed due to missing dependencies'
+ if missing:
+ msg = f'{msg} ({", ".join(missing)})'
+ return msg
+
+ def __init__(self, filename, missing=None, *args, **kwargs):
+ self.missing = _as_tuple(missing) or None
+
+ super().__init__(filename, *args, **kwargs)
+
+
+class OSMismatchError(MissingDependenciesError):
+ """The target is not compatible with the host OS."""
+
+ @classmethod
+ def _msg(cls, expected, **ignored):
+ return f'OS is {OS} but expected {expected or "???"}'
+
+ def __init__(self, filename, expected=None, *args, **kwargs):
+ if isinstance(expected, str):
+ expected = expected.strip()
+
+ self.actual = OS
+ self.expected = expected if expected else None
+
+ super().__init__(filename, None, *args, **kwargs)
diff --git a/Tools/c-analyzer/c_parser/preprocessor/gcc.py b/Tools/c-analyzer/c_parser/preprocessor/gcc.py
new file mode 100644
index 0000000..bb404a4
--- /dev/null
+++ b/Tools/c-analyzer/c_parser/preprocessor/gcc.py
@@ -0,0 +1,123 @@
+import os.path
+import re
+
+from . import common as _common
+
+
+TOOL = 'gcc'
+
+# https://gcc.gnu.org/onlinedocs/cpp/Preprocessor-Output.html
+LINE_MARKER_RE = re.compile(r'^# (\d+) "([^"]+)"(?: [1234])*$')
+PREPROC_DIRECTIVE_RE = re.compile(r'^\s*#\s*(\w+)\b.*')
+COMPILER_DIRECTIVE_RE = re.compile(r'''
+ ^
+ (.*?) # <before>
+ (__\w+__) # <directive>
+ \s*
+ [(] [(]
+ (
+ [^()]*
+ (?:
+ [(]
+ [^()]*
+ [)]
+ [^()]*
+ )*
+ ) # <args>
+ ( [)] [)] )? # <closed>
+''', re.VERBOSE)
+
+POST_ARGS = (
+ '-pthread',
+ '-std=c99',
+ #'-g',
+ #'-Og',
+ #'-Wno-unused-result',
+ #'-Wsign-compare',
+ #'-Wall',
+ #'-Wextra',
+ '-E',
+)
+
+
+def preprocess(filename, incldirs=None, macros=None, samefiles=None):
+ text = _common.preprocess(
+ TOOL,
+ filename,
+ incldirs=incldirs,
+ macros=macros,
+ #preargs=PRE_ARGS,
+ postargs=POST_ARGS,
+ executable=['gcc'],
+ compiler='unix',
+ )
+ return _iter_lines(text, filename, samefiles)
+
+
+def _iter_lines(text, filename, samefiles, *, raw=False):
+ lines = iter(text.splitlines())
+
+ # Build the lines and filter out directives.
+ partial = 0 # depth
+ origfile = None
+ for line in lines:
+ m = LINE_MARKER_RE.match(line)
+ if m:
+ lno, origfile = m.groups()
+ lno = int(lno)
+ elif _filter_orig_file(origfile, filename, samefiles):
+ if (m := PREPROC_DIRECTIVE_RE.match(line)):
+ name, = m.groups()
+ if name != 'pragma':
+ raise Exception(line)
+ else:
+ if not raw:
+ line, partial = _strip_directives(line, partial=partial)
+ yield _common.SourceLine(
+ _common.FileInfo(filename, lno),
+ 'source',
+ line or '',
+ None,
+ )
+ lno += 1
+
+
+def _strip_directives(line, partial=0):
+ # We assume there are no string literals with parens in directive bodies.
+ while partial > 0:
+ if not (m := re.match(r'[^{}]*([()])', line)):
+ return None, partial
+ delim, = m.groups()
+ partial += 1 if delim == '(' else -1 # opened/closed
+ line = line[m.end():]
+
+ line = re.sub(r'__extension__', '', line)
+
+ while (m := COMPILER_DIRECTIVE_RE.match(line)):
+ before, _, _, closed = m.groups()
+ if closed:
+ line = f'{before} {line[m.end():]}'
+ else:
+ after, partial = _strip_directives(line[m.end():], 2)
+ line = f'{before} {after or ""}'
+ if partial:
+ break
+
+ return line, partial
+
+
+def _filter_orig_file(origfile, current, samefiles):
+ if origfile == current:
+ return True
+ if origfile == '<stdin>':
+ return True
+ if os.path.isabs(origfile):
+ return False
+
+ for filename in samefiles or ():
+ if filename.endswith(os.path.sep):
+ filename += os.path.basename(current)
+ if origfile == filename:
+ return True
+
+ return False
diff --git a/Tools/c-analyzer/c_parser/preprocessor/pure.py b/Tools/c-analyzer/c_parser/preprocessor/pure.py
new file mode 100644
index 0000000..e971389
--- /dev/null
+++ b/Tools/c-analyzer/c_parser/preprocessor/pure.py
@@ -0,0 +1,23 @@
+from ..source import (
+ opened as _open_source,
+)
+from . import common as _common
+
+
+def preprocess(lines, filename=None):
+ if isinstance(lines, str):
+ with _open_source(lines, filename) as (lines, filename):
+ yield from preprocess(lines, filename)
+ return
+
+ # XXX actually preprocess...
+ for lno, line in enumerate(lines, 1):
+ kind = 'source'
+ data = line
+ conditions = None
+ yield _common.SourceLine(
+ _common.FileInfo(filename, lno),
+ kind,
+ data,
+ conditions,
+ )