From e4c431ecf50def40eb93c3969c1e4eeaf7bf32f1 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Fri, 18 Oct 2019 19:00:04 -0700 Subject: bpo-36876: Re-organize the c-analyzer tool code. (gh-16841) This is partly a cleanup of the code. It also is preparation for getting the variables from the source (cross-platform) rather than from the symbols. The change only touches the tool (and its tests). --- Lib/test/test_check_c_globals.py | 2 +- .../test_c_analyzer_common/__init__.py | 6 - .../test_c_analyzer_common/test_files.py | 470 ------ .../test_c_analyzer_common/test_info.py | 194 --- .../test_c_analyzer_common/test_known.py | 68 - .../test_c_analyzer/test_c_globals/__init__.py | 6 - .../test_c_globals/test___main__.py | 296 ---- .../test_c_analyzer/test_c_globals/test_find.py | 335 ----- .../test_c_globals/test_functional.py | 34 - .../test_c_analyzer/test_c_globals/test_show.py | 52 - .../test_c_globals/test_supported.py | 96 -- .../test_c_analyzer/test_c_parser/__init__.py | 6 - .../test_c_parser/test_declarations.py | 795 ---------- .../test_c_analyzer/test_c_parser/test_info.py | 244 --- .../test_c_parser/test_preprocessor.py | 1562 -------------------- .../test_c_analyzer/test_c_symbols/__init__.py | 6 - .../test_c_analyzer/test_c_symbols/test_info.py | 192 --- .../test_c_analyzer/test_common/__init__.py | 6 + .../test_c_analyzer/test_common/test_files.py | 470 ++++++ .../test_c_analyzer/test_common/test_info.py | 197 +++ .../test_c_analyzer/test_common/test_show.py | 54 + .../test_c_analyzer/test_cpython/__init__.py | 6 + .../test_c_analyzer/test_cpython/test___main__.py | 296 ++++ .../test_cpython/test_functional.py | 34 + .../test_c_analyzer/test_cpython/test_supported.py | 98 ++ .../test_c_analyzer/test_parser/__init__.py | 6 + .../test_parser/test_declarations.py | 795 ++++++++++ .../test_parser/test_preprocessor.py | 1562 ++++++++++++++++++++ .../test_c_analyzer/test_symbols/__init__.py | 6 + .../test_c_analyzer/test_symbols/test_info.py | 192 +++ .../test_c_analyzer/test_variables/__init__.py | 6 + .../test_c_analyzer/test_variables/test_find.py | 124 ++ .../test_c_analyzer/test_variables/test_info.py | 244 +++ .../test_c_analyzer/test_variables/test_known.py | 139 ++ Tools/c-analyzer/c-globals.py | 2 +- Tools/c-analyzer/c_analyzer/__init__.py | 0 Tools/c-analyzer/c_analyzer/common/__init__.py | 0 Tools/c-analyzer/c_analyzer/common/files.py | 120 ++ Tools/c-analyzer/c_analyzer/common/info.py | 138 ++ Tools/c-analyzer/c_analyzer/common/show.py | 11 + Tools/c-analyzer/c_analyzer/common/util.py | 243 +++ Tools/c-analyzer/c_analyzer/parser/__init__.py | 0 Tools/c-analyzer/c_analyzer/parser/declarations.py | 339 +++++ Tools/c-analyzer/c_analyzer/parser/find.py | 107 ++ Tools/c-analyzer/c_analyzer/parser/naive.py | 179 +++ Tools/c-analyzer/c_analyzer/parser/preprocessor.py | 511 +++++++ Tools/c-analyzer/c_analyzer/parser/source.py | 34 + Tools/c-analyzer/c_analyzer/symbols/__init__.py | 0 Tools/c-analyzer/c_analyzer/symbols/_nm.py | 117 ++ Tools/c-analyzer/c_analyzer/symbols/find.py | 175 +++ Tools/c-analyzer/c_analyzer/symbols/info.py | 51 + Tools/c-analyzer/c_analyzer/variables/__init__.py | 0 Tools/c-analyzer/c_analyzer/variables/find.py | 75 + Tools/c-analyzer/c_analyzer/variables/info.py | 93 ++ Tools/c-analyzer/c_analyzer/variables/known.py | 91 ++ Tools/c-analyzer/c_analyzer_common/__init__.py | 19 - Tools/c-analyzer/c_analyzer_common/_generate.py | 328 ---- Tools/c-analyzer/c_analyzer_common/files.py | 138 -- Tools/c-analyzer/c_analyzer_common/info.py | 69 - Tools/c-analyzer/c_analyzer_common/known.py | 74 - Tools/c-analyzer/c_analyzer_common/util.py | 243 --- Tools/c-analyzer/c_globals/README | 72 - Tools/c-analyzer/c_globals/__init__.py | 0 Tools/c-analyzer/c_globals/__main__.py | 209 --- Tools/c-analyzer/c_globals/find.py | 95 -- Tools/c-analyzer/c_globals/show.py | 16 - Tools/c-analyzer/c_globals/supported.py | 393 ----- Tools/c-analyzer/c_parser/__init__.py | 0 Tools/c-analyzer/c_parser/declarations.py | 295 ---- Tools/c-analyzer/c_parser/info.py | 106 -- Tools/c-analyzer/c_parser/naive.py | 180 --- Tools/c-analyzer/c_parser/preprocessor.py | 512 ------- Tools/c-analyzer/c_parser/source.py | 34 - Tools/c-analyzer/c_symbols/__init__.py | 0 Tools/c-analyzer/c_symbols/binary.py | 157 -- Tools/c-analyzer/c_symbols/info.py | 51 - Tools/c-analyzer/c_symbols/resolve.py | 147 -- Tools/c-analyzer/c_symbols/source.py | 58 - Tools/c-analyzer/cpython/README | 72 + Tools/c-analyzer/cpython/__init__.py | 29 + Tools/c-analyzer/cpython/__main__.py | 212 +++ Tools/c-analyzer/cpython/_generate.py | 329 +++++ Tools/c-analyzer/cpython/files.py | 29 + Tools/c-analyzer/cpython/find.py | 101 ++ Tools/c-analyzer/cpython/known.py | 66 + Tools/c-analyzer/cpython/supported.py | 398 +++++ 86 files changed, 7757 insertions(+), 7560 deletions(-) delete mode 100644 Lib/test/test_tools/test_c_analyzer/test_c_analyzer_common/__init__.py delete mode 100644 Lib/test/test_tools/test_c_analyzer/test_c_analyzer_common/test_files.py delete mode 100644 Lib/test/test_tools/test_c_analyzer/test_c_analyzer_common/test_info.py delete mode 100644 Lib/test/test_tools/test_c_analyzer/test_c_analyzer_common/test_known.py delete mode 100644 Lib/test/test_tools/test_c_analyzer/test_c_globals/__init__.py delete mode 100644 Lib/test/test_tools/test_c_analyzer/test_c_globals/test___main__.py delete mode 100644 Lib/test/test_tools/test_c_analyzer/test_c_globals/test_find.py delete mode 100644 Lib/test/test_tools/test_c_analyzer/test_c_globals/test_functional.py delete mode 100644 Lib/test/test_tools/test_c_analyzer/test_c_globals/test_show.py delete mode 100644 Lib/test/test_tools/test_c_analyzer/test_c_globals/test_supported.py delete mode 100644 Lib/test/test_tools/test_c_analyzer/test_c_parser/__init__.py delete mode 100644 Lib/test/test_tools/test_c_analyzer/test_c_parser/test_declarations.py delete mode 100644 Lib/test/test_tools/test_c_analyzer/test_c_parser/test_info.py delete mode 100644 Lib/test/test_tools/test_c_analyzer/test_c_parser/test_preprocessor.py delete mode 100644 Lib/test/test_tools/test_c_analyzer/test_c_symbols/__init__.py delete mode 100644 Lib/test/test_tools/test_c_analyzer/test_c_symbols/test_info.py create mode 100644 Lib/test/test_tools/test_c_analyzer/test_common/__init__.py create mode 100644 Lib/test/test_tools/test_c_analyzer/test_common/test_files.py create mode 100644 Lib/test/test_tools/test_c_analyzer/test_common/test_info.py create mode 100644 Lib/test/test_tools/test_c_analyzer/test_common/test_show.py create mode 100644 Lib/test/test_tools/test_c_analyzer/test_cpython/__init__.py create mode 100644 Lib/test/test_tools/test_c_analyzer/test_cpython/test___main__.py create mode 100644 Lib/test/test_tools/test_c_analyzer/test_cpython/test_functional.py create mode 100644 Lib/test/test_tools/test_c_analyzer/test_cpython/test_supported.py create mode 100644 Lib/test/test_tools/test_c_analyzer/test_parser/__init__.py create mode 100644 Lib/test/test_tools/test_c_analyzer/test_parser/test_declarations.py create mode 100644 Lib/test/test_tools/test_c_analyzer/test_parser/test_preprocessor.py create mode 100644 Lib/test/test_tools/test_c_analyzer/test_symbols/__init__.py create mode 100644 Lib/test/test_tools/test_c_analyzer/test_symbols/test_info.py create mode 100644 Lib/test/test_tools/test_c_analyzer/test_variables/__init__.py create mode 100644 Lib/test/test_tools/test_c_analyzer/test_variables/test_find.py create mode 100644 Lib/test/test_tools/test_c_analyzer/test_variables/test_info.py create mode 100644 Lib/test/test_tools/test_c_analyzer/test_variables/test_known.py create mode 100644 Tools/c-analyzer/c_analyzer/__init__.py create mode 100644 Tools/c-analyzer/c_analyzer/common/__init__.py create mode 100644 Tools/c-analyzer/c_analyzer/common/files.py create mode 100644 Tools/c-analyzer/c_analyzer/common/info.py create mode 100644 Tools/c-analyzer/c_analyzer/common/show.py create mode 100644 Tools/c-analyzer/c_analyzer/common/util.py create mode 100644 Tools/c-analyzer/c_analyzer/parser/__init__.py create mode 100644 Tools/c-analyzer/c_analyzer/parser/declarations.py create mode 100644 Tools/c-analyzer/c_analyzer/parser/find.py create mode 100644 Tools/c-analyzer/c_analyzer/parser/naive.py create mode 100644 Tools/c-analyzer/c_analyzer/parser/preprocessor.py create mode 100644 Tools/c-analyzer/c_analyzer/parser/source.py create mode 100644 Tools/c-analyzer/c_analyzer/symbols/__init__.py create mode 100644 Tools/c-analyzer/c_analyzer/symbols/_nm.py create mode 100644 Tools/c-analyzer/c_analyzer/symbols/find.py create mode 100644 Tools/c-analyzer/c_analyzer/symbols/info.py create mode 100644 Tools/c-analyzer/c_analyzer/variables/__init__.py create mode 100644 Tools/c-analyzer/c_analyzer/variables/find.py create mode 100644 Tools/c-analyzer/c_analyzer/variables/info.py create mode 100644 Tools/c-analyzer/c_analyzer/variables/known.py delete mode 100644 Tools/c-analyzer/c_analyzer_common/__init__.py delete mode 100644 Tools/c-analyzer/c_analyzer_common/_generate.py delete mode 100644 Tools/c-analyzer/c_analyzer_common/files.py delete mode 100644 Tools/c-analyzer/c_analyzer_common/info.py delete mode 100644 Tools/c-analyzer/c_analyzer_common/known.py delete mode 100644 Tools/c-analyzer/c_analyzer_common/util.py delete mode 100644 Tools/c-analyzer/c_globals/README delete mode 100644 Tools/c-analyzer/c_globals/__init__.py delete mode 100644 Tools/c-analyzer/c_globals/__main__.py delete mode 100644 Tools/c-analyzer/c_globals/find.py delete mode 100644 Tools/c-analyzer/c_globals/show.py delete mode 100644 Tools/c-analyzer/c_globals/supported.py delete mode 100644 Tools/c-analyzer/c_parser/__init__.py delete mode 100644 Tools/c-analyzer/c_parser/declarations.py delete mode 100644 Tools/c-analyzer/c_parser/info.py delete mode 100644 Tools/c-analyzer/c_parser/naive.py delete mode 100644 Tools/c-analyzer/c_parser/preprocessor.py delete mode 100644 Tools/c-analyzer/c_parser/source.py delete mode 100644 Tools/c-analyzer/c_symbols/__init__.py delete mode 100644 Tools/c-analyzer/c_symbols/binary.py delete mode 100644 Tools/c-analyzer/c_symbols/info.py delete mode 100644 Tools/c-analyzer/c_symbols/resolve.py delete mode 100644 Tools/c-analyzer/c_symbols/source.py create mode 100644 Tools/c-analyzer/cpython/README create mode 100644 Tools/c-analyzer/cpython/__init__.py create mode 100644 Tools/c-analyzer/cpython/__main__.py create mode 100644 Tools/c-analyzer/cpython/_generate.py create mode 100644 Tools/c-analyzer/cpython/files.py create mode 100644 Tools/c-analyzer/cpython/find.py create mode 100644 Tools/c-analyzer/cpython/known.py create mode 100644 Tools/c-analyzer/cpython/supported.py diff --git a/Lib/test/test_check_c_globals.py b/Lib/test/test_check_c_globals.py index a3925f0..030debc 100644 --- a/Lib/test/test_check_c_globals.py +++ b/Lib/test/test_check_c_globals.py @@ -3,7 +3,7 @@ import test.test_tools test.test_tools.skip_if_missing('c-analyzer') with test.test_tools.imports_under_tool('c-analyzer'): - from c_globals.__main__ import main + from cpython.__main__ import main class ActualChecks(unittest.TestCase): diff --git a/Lib/test/test_tools/test_c_analyzer/test_c_analyzer_common/__init__.py b/Lib/test/test_tools/test_c_analyzer/test_c_analyzer_common/__init__.py deleted file mode 100644 index bc502ef..0000000 --- a/Lib/test/test_tools/test_c_analyzer/test_c_analyzer_common/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -import os.path -from test.support import load_package_tests - - -def load_tests(*args): - return load_package_tests(os.path.dirname(__file__), *args) diff --git a/Lib/test/test_tools/test_c_analyzer/test_c_analyzer_common/test_files.py b/Lib/test/test_tools/test_c_analyzer/test_c_analyzer_common/test_files.py deleted file mode 100644 index 6d14aea..0000000 --- a/Lib/test/test_tools/test_c_analyzer/test_c_analyzer_common/test_files.py +++ /dev/null @@ -1,470 +0,0 @@ -import os.path -import unittest - -from .. import tool_imports_for_tests -with tool_imports_for_tests(): - from c_analyzer_common.files import ( - iter_files, _walk_tree, glob_tree, - ) - - -def fixpath(filename): - return filename.replace('/', os.path.sep) - - -class IterFilesTests(unittest.TestCase): - - maxDiff = None - - _return_walk = None - - @property - def calls(self): - try: - return self._calls - except AttributeError: - self._calls = [] - return self._calls - - def set_files(self, *filesperroot): - roots = [] - result = [] - for root, files in filesperroot: - root = fixpath(root) - roots.append(root) - result.append([os.path.join(root, fixpath(f)) - for f in files]) - self._return_walk = result - return roots - - def _walk(self, root, *, suffix=None, walk=None): - self.calls.append(('_walk', (root, suffix, walk))) - return iter(self._return_walk.pop(0)) - - def _glob(self, root, *, suffix=None): - self.calls.append(('_glob', (root, suffix))) - return iter(self._return_walk.pop(0)) - - def test_typical(self): - dirnames = self.set_files( - ('spam', ['file1.c', 'file2.c']), - ('eggs', ['ham/file3.h']), - ) - suffixes = ('.c', '.h') - - files = list(iter_files(dirnames, suffixes, - _glob=self._glob, - _walk=self._walk)) - - self.assertEqual(files, [ - fixpath('spam/file1.c'), - fixpath('spam/file2.c'), - fixpath('eggs/ham/file3.h'), - ]) - self.assertEqual(self.calls, [ - ('_walk', ('spam', None, _walk_tree)), - ('_walk', ('eggs', None, _walk_tree)), - ]) - - def test_single_root(self): - self._return_walk = [ - [fixpath('spam/file1.c'), fixpath('spam/file2.c')], - ] - - files = list(iter_files('spam', '.c', - _glob=self._glob, - _walk=self._walk)) - - self.assertEqual(files, [ - fixpath('spam/file1.c'), - fixpath('spam/file2.c'), - ]) - self.assertEqual(self.calls, [ - ('_walk', ('spam', '.c', _walk_tree)), - ]) - - def test_one_root(self): - self._return_walk = [ - [fixpath('spam/file1.c'), fixpath('spam/file2.c')], - ] - - files = list(iter_files(['spam'], '.c', - _glob=self._glob, - _walk=self._walk)) - - self.assertEqual(files, [ - fixpath('spam/file1.c'), - fixpath('spam/file2.c'), - ]) - self.assertEqual(self.calls, [ - ('_walk', ('spam', '.c', _walk_tree)), - ]) - - def test_multiple_roots(self): - dirnames = self.set_files( - ('spam', ['file1.c', 'file2.c']), - ('eggs', ['ham/file3.c']), - ) - - files = list(iter_files(dirnames, '.c', - _glob=self._glob, - _walk=self._walk)) - - self.assertEqual(files, [ - fixpath('spam/file1.c'), - fixpath('spam/file2.c'), - fixpath('eggs/ham/file3.c'), - ]) - self.assertEqual(self.calls, [ - ('_walk', ('spam', '.c', _walk_tree)), - ('_walk', ('eggs', '.c', _walk_tree)), - ]) - - def test_no_roots(self): - files = list(iter_files([], '.c', - _glob=self._glob, - _walk=self._walk)) - - self.assertEqual(files, []) - self.assertEqual(self.calls, []) - - def test_single_suffix(self): - self._return_walk = [ - [fixpath('spam/file1.c'), - fixpath('spam/eggs/file3.c'), - ], - ] - - files = list(iter_files('spam', '.c', - _glob=self._glob, - _walk=self._walk)) - - self.assertEqual(files, [ - fixpath('spam/file1.c'), - fixpath('spam/eggs/file3.c'), - ]) - self.assertEqual(self.calls, [ - ('_walk', ('spam', '.c', _walk_tree)), - ]) - - def test_one_suffix(self): - self._return_walk = [ - [fixpath('spam/file1.c'), - fixpath('spam/file1.h'), - fixpath('spam/file1.o'), - fixpath('spam/eggs/file3.c'), - ], - ] - - files = list(iter_files('spam', ['.c'], - _glob=self._glob, - _walk=self._walk)) - - self.assertEqual(files, [ - fixpath('spam/file1.c'), - fixpath('spam/eggs/file3.c'), - ]) - self.assertEqual(self.calls, [ - ('_walk', ('spam', None, _walk_tree)), - ]) - - def test_multiple_suffixes(self): - self._return_walk = [ - [fixpath('spam/file1.c'), - fixpath('spam/file1.h'), - fixpath('spam/file1.o'), - fixpath('spam/eggs/file3.c'), - ], - ] - - files = list(iter_files('spam', ('.c', '.h'), - _glob=self._glob, - _walk=self._walk)) - - self.assertEqual(files, [ - fixpath('spam/file1.c'), - fixpath('spam/file1.h'), - fixpath('spam/eggs/file3.c'), - ]) - self.assertEqual(self.calls, [ - ('_walk', ('spam', None, _walk_tree)), - ]) - - def test_no_suffix(self): - expected = [fixpath('spam/file1.c'), - fixpath('spam/file1.h'), - fixpath('spam/file1.o'), - fixpath('spam/eggs/file3.c'), - ] - for suffix in (None, '', ()): - with self.subTest(suffix): - self.calls.clear() - self._return_walk = [list(expected)] - - files = list(iter_files('spam', suffix, - _glob=self._glob, - _walk=self._walk)) - - self.assertEqual(files, expected) - self.assertEqual(self.calls, [ - ('_walk', ('spam', suffix, _walk_tree)), - ]) - - def test_relparent(self): - dirnames = self.set_files( - ('/x/y/z/spam', ['file1.c', 'file2.c']), - ('/x/y/z/eggs', ['ham/file3.c']), - ) - - files = list(iter_files(dirnames, '.c', fixpath('/x/y'), - _glob=self._glob, - _walk=self._walk)) - - self.assertEqual(files, [ - fixpath('z/spam/file1.c'), - fixpath('z/spam/file2.c'), - fixpath('z/eggs/ham/file3.c'), - ]) - self.assertEqual(self.calls, [ - ('_walk', (fixpath('/x/y/z/spam'), '.c', _walk_tree)), - ('_walk', (fixpath('/x/y/z/eggs'), '.c', _walk_tree)), - ]) - - def test_glob(self): - dirnames = self.set_files( - ('spam', ['file1.c', 'file2.c']), - ('eggs', ['ham/file3.c']), - ) - - files = list(iter_files(dirnames, '.c', - get_files=glob_tree, - _walk=self._walk, - _glob=self._glob)) - - self.assertEqual(files, [ - fixpath('spam/file1.c'), - fixpath('spam/file2.c'), - fixpath('eggs/ham/file3.c'), - ]) - self.assertEqual(self.calls, [ - ('_glob', ('spam', '.c')), - ('_glob', ('eggs', '.c')), - ]) - - - def test_alt_walk_func(self): - dirnames = self.set_files( - ('spam', ['file1.c', 'file2.c']), - ('eggs', ['ham/file3.c']), - ) - def get_files(root): - return None - - files = list(iter_files(dirnames, '.c', - get_files=get_files, - _walk=self._walk, - _glob=self._glob)) - - self.assertEqual(files, [ - fixpath('spam/file1.c'), - fixpath('spam/file2.c'), - fixpath('eggs/ham/file3.c'), - ]) - self.assertEqual(self.calls, [ - ('_walk', ('spam', '.c', get_files)), - ('_walk', ('eggs', '.c', get_files)), - ]) - - - - - - -# def test_no_dirnames(self): -# dirnames = [] -# filter_by_name = None -# -# files = list(iter_files(dirnames, filter_by_name, -# _walk=self._walk)) -# -# self.assertEqual(files, []) -# self.assertEqual(self.calls, []) -# -# def test_no_filter(self): -# self._return_walk = [ -# [('spam', (), ('file1', 'file2.c', 'file3.h', 'file4.o')), -# ], -# ] -# dirnames = [ -# 'spam', -# ] -# filter_by_name = None -# -# files = list(iter_files(dirnames, filter_by_name, -# _walk=self._walk)) -# -# self.assertEqual(files, [ -# fixpath('spam/file1'), -# fixpath('spam/file2.c'), -# fixpath('spam/file3.h'), -# fixpath('spam/file4.o'), -# ]) -# self.assertEqual(self.calls, [ -# ('_walk', ('spam',)), -# ]) -# -# def test_no_files(self): -# self._return_walk = [ -# [('spam', (), ()), -# ], -# [(fixpath('eggs/ham'), (), ()), -# ], -# ] -# dirnames = [ -# 'spam', -# fixpath('eggs/ham'), -# ] -# filter_by_name = None -# -# files = list(iter_files(dirnames, filter_by_name, -# _walk=self._walk)) -# -# self.assertEqual(files, []) -# self.assertEqual(self.calls, [ -# ('_walk', ('spam',)), -# ('_walk', (fixpath('eggs/ham'),)), -# ]) -# -# def test_tree(self): -# self._return_walk = [ -# [('spam', ('sub1', 'sub2', 'sub3'), ('file1',)), -# (fixpath('spam/sub1'), ('sub1sub1',), ('file2', 'file3')), -# (fixpath('spam/sub1/sub1sub1'), (), ('file4',)), -# (fixpath('spam/sub2'), (), ()), -# (fixpath('spam/sub3'), (), ('file5',)), -# ], -# [(fixpath('eggs/ham'), (), ('file6',)), -# ], -# ] -# dirnames = [ -# 'spam', -# fixpath('eggs/ham'), -# ] -# filter_by_name = None -# -# files = list(iter_files(dirnames, filter_by_name, -# _walk=self._walk)) -# -# self.assertEqual(files, [ -# fixpath('spam/file1'), -# fixpath('spam/sub1/file2'), -# fixpath('spam/sub1/file3'), -# fixpath('spam/sub1/sub1sub1/file4'), -# fixpath('spam/sub3/file5'), -# fixpath('eggs/ham/file6'), -# ]) -# self.assertEqual(self.calls, [ -# ('_walk', ('spam',)), -# ('_walk', (fixpath('eggs/ham'),)), -# ]) -# -# def test_filter_suffixes(self): -# self._return_walk = [ -# [('spam', (), ('file1', 'file2.c', 'file3.h', 'file4.o')), -# ], -# ] -# dirnames = [ -# 'spam', -# ] -# filter_by_name = ('.c', '.h') -# -# files = list(iter_files(dirnames, filter_by_name, -# _walk=self._walk)) -# -# self.assertEqual(files, [ -# fixpath('spam/file2.c'), -# fixpath('spam/file3.h'), -# ]) -# self.assertEqual(self.calls, [ -# ('_walk', ('spam',)), -# ]) -# -# def test_some_filtered(self): -# self._return_walk = [ -# [('spam', (), ('file1', 'file2', 'file3', 'file4')), -# ], -# ] -# dirnames = [ -# 'spam', -# ] -# def filter_by_name(filename, results=[False, True, False, True]): -# self.calls.append(('filter_by_name', (filename,))) -# return results.pop(0) -# -# files = list(iter_files(dirnames, filter_by_name, -# _walk=self._walk)) -# -# self.assertEqual(files, [ -# fixpath('spam/file2'), -# fixpath('spam/file4'), -# ]) -# self.assertEqual(self.calls, [ -# ('_walk', ('spam',)), -# ('filter_by_name', ('file1',)), -# ('filter_by_name', ('file2',)), -# ('filter_by_name', ('file3',)), -# ('filter_by_name', ('file4',)), -# ]) -# -# def test_none_filtered(self): -# self._return_walk = [ -# [('spam', (), ('file1', 'file2', 'file3', 'file4')), -# ], -# ] -# dirnames = [ -# 'spam', -# ] -# def filter_by_name(filename, results=[True, True, True, True]): -# self.calls.append(('filter_by_name', (filename,))) -# return results.pop(0) -# -# files = list(iter_files(dirnames, filter_by_name, -# _walk=self._walk)) -# -# self.assertEqual(files, [ -# fixpath('spam/file1'), -# fixpath('spam/file2'), -# fixpath('spam/file3'), -# fixpath('spam/file4'), -# ]) -# self.assertEqual(self.calls, [ -# ('_walk', ('spam',)), -# ('filter_by_name', ('file1',)), -# ('filter_by_name', ('file2',)), -# ('filter_by_name', ('file3',)), -# ('filter_by_name', ('file4',)), -# ]) -# -# def test_all_filtered(self): -# self._return_walk = [ -# [('spam', (), ('file1', 'file2', 'file3', 'file4')), -# ], -# ] -# dirnames = [ -# 'spam', -# ] -# def filter_by_name(filename, results=[False, False, False, False]): -# self.calls.append(('filter_by_name', (filename,))) -# return results.pop(0) -# -# files = list(iter_files(dirnames, filter_by_name, -# _walk=self._walk)) -# -# self.assertEqual(files, []) -# self.assertEqual(self.calls, [ -# ('_walk', ('spam',)), -# ('filter_by_name', ('file1',)), -# ('filter_by_name', ('file2',)), -# ('filter_by_name', ('file3',)), -# ('filter_by_name', ('file4',)), -# ]) diff --git a/Lib/test/test_tools/test_c_analyzer/test_c_analyzer_common/test_info.py b/Lib/test/test_tools/test_c_analyzer/test_c_analyzer_common/test_info.py deleted file mode 100644 index 2d38671..0000000 --- a/Lib/test/test_tools/test_c_analyzer/test_c_analyzer_common/test_info.py +++ /dev/null @@ -1,194 +0,0 @@ -import string -import unittest - -from ..util import PseudoStr, StrProxy, Object -from .. import tool_imports_for_tests -with tool_imports_for_tests(): - from c_analyzer_common.info import ID - - -class IDTests(unittest.TestCase): - - VALID_ARGS = ( - 'x/y/z/spam.c', - 'func', - 'eggs', - ) - VALID_KWARGS = dict(zip(ID._fields, VALID_ARGS)) - VALID_EXPECTED = VALID_ARGS - - def test_from_raw(self): - tests = [ - ('', None), - (None, None), - ('spam', (None, None, 'spam')), - (('spam',), (None, None, 'spam')), - (('x/y/z/spam.c', 'spam'), ('x/y/z/spam.c', None, 'spam')), - (self.VALID_ARGS, self.VALID_EXPECTED), - (self.VALID_KWARGS, self.VALID_EXPECTED), - ] - for raw, expected in tests: - with self.subTest(raw): - id = ID.from_raw(raw) - - self.assertEqual(id, expected) - - def test_minimal(self): - id = ID( - filename=None, - funcname=None, - name='eggs', - ) - - self.assertEqual(id, ( - None, - None, - 'eggs', - )) - - def test_init_typical_global(self): - id = ID( - filename='x/y/z/spam.c', - funcname=None, - name='eggs', - ) - - self.assertEqual(id, ( - 'x/y/z/spam.c', - None, - 'eggs', - )) - - def test_init_typical_local(self): - id = ID( - filename='x/y/z/spam.c', - funcname='func', - name='eggs', - ) - - self.assertEqual(id, ( - 'x/y/z/spam.c', - 'func', - 'eggs', - )) - - def test_init_all_missing(self): - for value in ('', None): - with self.subTest(repr(value)): - id = ID( - filename=value, - funcname=value, - name=value, - ) - - self.assertEqual(id, ( - None, - None, - None, - )) - - def test_init_all_coerced(self): - tests = [ - ('str subclass', - dict( - filename=PseudoStr('x/y/z/spam.c'), - funcname=PseudoStr('func'), - name=PseudoStr('eggs'), - ), - ('x/y/z/spam.c', - 'func', - 'eggs', - )), - ('non-str', - dict( - filename=StrProxy('x/y/z/spam.c'), - funcname=Object(), - name=('a', 'b', 'c'), - ), - ('x/y/z/spam.c', - '', - "('a', 'b', 'c')", - )), - ] - for summary, kwargs, expected in tests: - with self.subTest(summary): - id = ID(**kwargs) - - for field in ID._fields: - value = getattr(id, field) - self.assertIs(type(value), str) - self.assertEqual(tuple(id), expected) - - def test_iterable(self): - id = ID(**self.VALID_KWARGS) - - filename, funcname, name = id - - values = (filename, funcname, name) - for value, expected in zip(values, self.VALID_EXPECTED): - self.assertEqual(value, expected) - - def test_fields(self): - id = ID('a', 'b', 'z') - - self.assertEqual(id.filename, 'a') - self.assertEqual(id.funcname, 'b') - self.assertEqual(id.name, 'z') - - def test_validate_typical(self): - id = ID( - filename='x/y/z/spam.c', - funcname='func', - name='eggs', - ) - - id.validate() # This does not fail. - - def test_validate_missing_field(self): - for field in ID._fields: - with self.subTest(field): - id = ID(**self.VALID_KWARGS) - id = id._replace(**{field: None}) - - if field == 'funcname': - id.validate() # The field can be missing (not set). - id = id._replace(filename=None) - id.validate() # Both fields can be missing (not set). - continue - - with self.assertRaises(TypeError): - id.validate() - - def test_validate_bad_field(self): - badch = tuple(c for c in string.punctuation + string.digits) - notnames = ( - '1a', - 'a.b', - 'a-b', - '&a', - 'a++', - ) + badch - tests = [ - ('filename', ()), # Any non-empty str is okay. - ('funcname', notnames), - ('name', notnames), - ] - seen = set() - for field, invalid in tests: - for value in invalid: - seen.add(value) - with self.subTest(f'{field}={value!r}'): - id = ID(**self.VALID_KWARGS) - id = id._replace(**{field: value}) - - with self.assertRaises(ValueError): - id.validate() - - for field, invalid in tests: - valid = seen - set(invalid) - for value in valid: - with self.subTest(f'{field}={value!r}'): - id = ID(**self.VALID_KWARGS) - id = id._replace(**{field: value}) - - id.validate() # This does not fail. diff --git a/Lib/test/test_tools/test_c_analyzer/test_c_analyzer_common/test_known.py b/Lib/test/test_tools/test_c_analyzer/test_c_analyzer_common/test_known.py deleted file mode 100644 index 215023d..0000000 --- a/Lib/test/test_tools/test_c_analyzer/test_c_analyzer_common/test_known.py +++ /dev/null @@ -1,68 +0,0 @@ -import re -import textwrap -import unittest - -from .. import tool_imports_for_tests -with tool_imports_for_tests(): - from c_parser.info import Variable - from c_analyzer_common.info import ID - from c_analyzer_common.known import from_file - - -class FromFileTests(unittest.TestCase): - - maxDiff = None - - _return_read_tsv = () - - @property - def calls(self): - try: - return self._calls - except AttributeError: - self._calls = [] - return self._calls - - def _read_tsv(self, *args): - self.calls.append(('_read_tsv', args)) - return self._return_read_tsv - - def test_typical(self): - lines = textwrap.dedent(''' - filename funcname name kind declaration - file1.c - var1 variable static int - file1.c func1 local1 variable static int - file1.c - var2 variable int - file1.c func2 local2 variable char * - file2.c - var1 variable char * - ''').strip().splitlines() - lines = [re.sub(r'\s+', '\t', line, 4) for line in lines] - self._return_read_tsv = [tuple(v.strip() for v in line.split('\t')) - for line in lines[1:]] - - known = from_file('spam.c', _read_tsv=self._read_tsv) - - self.assertEqual(known, { - 'variables': {v.id: v for v in [ - Variable.from_parts('file1.c', '', 'var1', 'static int'), - Variable.from_parts('file1.c', 'func1', 'local1', 'static int'), - Variable.from_parts('file1.c', '', 'var2', 'int'), - Variable.from_parts('file1.c', 'func2', 'local2', 'char *'), - Variable.from_parts('file2.c', '', 'var1', 'char *'), - ]}, - }) - self.assertEqual(self.calls, [ - ('_read_tsv', ('spam.c', 'filename\tfuncname\tname\tkind\tdeclaration')), - ]) - - def test_empty(self): - self._return_read_tsv = [] - - known = from_file('spam.c', _read_tsv=self._read_tsv) - - self.assertEqual(known, { - 'variables': {}, - }) - self.assertEqual(self.calls, [ - ('_read_tsv', ('spam.c', 'filename\tfuncname\tname\tkind\tdeclaration')), - ]) diff --git a/Lib/test/test_tools/test_c_analyzer/test_c_globals/__init__.py b/Lib/test/test_tools/test_c_analyzer/test_c_globals/__init__.py deleted file mode 100644 index bc502ef..0000000 --- a/Lib/test/test_tools/test_c_analyzer/test_c_globals/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -import os.path -from test.support import load_package_tests - - -def load_tests(*args): - return load_package_tests(os.path.dirname(__file__), *args) diff --git a/Lib/test/test_tools/test_c_analyzer/test_c_globals/test___main__.py b/Lib/test/test_tools/test_c_analyzer/test_c_globals/test___main__.py deleted file mode 100644 index 5f52c58..0000000 --- a/Lib/test/test_tools/test_c_analyzer/test_c_globals/test___main__.py +++ /dev/null @@ -1,296 +0,0 @@ -import sys -import unittest - -from .. import tool_imports_for_tests -with tool_imports_for_tests(): - from c_analyzer_common import SOURCE_DIRS - from c_analyzer_common.known import DATA_FILE as KNOWN_FILE - from c_parser import info - import c_globals as cg - from c_globals.supported import IGNORED_FILE - from c_globals.__main__ import cmd_check, cmd_show, parse_args, main - - -TYPICAL = [ - (info.Variable.from_parts('src1/spam.c', None, 'var1', 'const char *'), - True, - ), - (info.Variable.from_parts('src1/spam.c', 'ham', 'initialized', 'int'), - True, - ), - (info.Variable.from_parts('src1/spam.c', None, 'var2', 'PyObject *'), - False, - ), - (info.Variable.from_parts('src1/eggs.c', 'tofu', 'ready', 'int'), - True, - ), - (info.Variable.from_parts('src1/spam.c', None, 'freelist', '(PyTupleObject *)[10]'), - False, - ), - (info.Variable.from_parts('src1/sub/ham.c', None, 'var1', 'const char const *'), - True, - ), - (info.Variable.from_parts('src2/jam.c', None, 'var1', 'int'), - True, - ), - (info.Variable.from_parts('src2/jam.c', None, 'var2', 'MyObject *'), - False, - ), - (info.Variable.from_parts('Include/spam.h', None, 'data', 'const int'), - True, - ), - ] - - -class CMDBase(unittest.TestCase): - - maxDiff = None - - _return_find = () - - @property - def calls(self): - try: - return self._calls - except AttributeError: - self._calls = [] - return self._calls - - def _find(self, *args): - self.calls.append(('_find', args)) - return self._return_find - - def _show(self, *args): - self.calls.append(('_show', args)) - - def _print(self, *args): - self.calls.append(('_print', args)) - - -class CheckTests(CMDBase): - - def test_defaults(self): - self._return_find = [] - - cmd_check('check', - _find=self._find, - _show=self._show, - _print=self._print, - ) - - self.assertEqual(self.calls[0], ( - '_find', ( - SOURCE_DIRS, - KNOWN_FILE, - IGNORED_FILE, - ), - )) - - def test_all_supported(self): - self._return_find = [(v, s) for v, s in TYPICAL if s] - dirs = ['src1', 'src2', 'Include'] - - cmd_check('check', - dirs, - ignored='ignored.tsv', - known='known.tsv', - _find=self._find, - _show=self._show, - _print=self._print, - ) - - self.assertEqual(self.calls, [ - ('_find', (dirs, 'known.tsv', 'ignored.tsv')), - #('_print', ('okay',)), - ]) - - def test_some_unsupported(self): - self._return_find = TYPICAL - dirs = ['src1', 'src2', 'Include'] - - with self.assertRaises(SystemExit) as cm: - cmd_check('check', - dirs, - ignored='ignored.tsv', - known='known.tsv', - _find=self._find, - _show=self._show, - _print=self._print, - ) - - unsupported = [v for v, s in TYPICAL if not s] - self.assertEqual(self.calls, [ - ('_find', (dirs, 'known.tsv', 'ignored.tsv')), - ('_print', ('ERROR: found unsupported global variables',)), - ('_print', ()), - ('_show', (sorted(unsupported),)), - ('_print', (' (3 total)',)), - ]) - self.assertEqual(cm.exception.code, 1) - - -class ShowTests(CMDBase): - - def test_defaults(self): - self._return_find = [] - - cmd_show('show', - _find=self._find, - _show=self._show, - _print=self._print, - ) - - self.assertEqual(self.calls[0], ( - '_find', ( - SOURCE_DIRS, - KNOWN_FILE, - IGNORED_FILE, - ), - )) - - def test_typical(self): - self._return_find = TYPICAL - dirs = ['src1', 'src2', 'Include'] - - cmd_show('show', - dirs, - known='known.tsv', - ignored='ignored.tsv', - _find=self._find, - _show=self._show, - _print=self._print, - ) - - supported = [v for v, s in TYPICAL if s] - unsupported = [v for v, s in TYPICAL if not s] - self.assertEqual(self.calls, [ - ('_find', (dirs, 'known.tsv', 'ignored.tsv')), - ('_print', ('supported:',)), - ('_print', ('----------',)), - ('_show', (sorted(supported),)), - ('_print', (' (6 total)',)), - ('_print', ()), - ('_print', ('unsupported:',)), - ('_print', ('------------',)), - ('_show', (sorted(unsupported),)), - ('_print', (' (3 total)',)), - ]) - - -class ParseArgsTests(unittest.TestCase): - - maxDiff = None - - def test_no_args(self): - self.errmsg = None - def fail(msg): - self.errmsg = msg - sys.exit(msg) - - with self.assertRaises(SystemExit): - parse_args('cg', [], _fail=fail) - - self.assertEqual(self.errmsg, 'missing command') - - def test_check_no_args(self): - cmd, cmdkwargs = parse_args('cg', [ - 'check', - ]) - - self.assertEqual(cmd, 'check') - self.assertEqual(cmdkwargs, { - 'ignored': IGNORED_FILE, - 'known': KNOWN_FILE, - 'dirs': SOURCE_DIRS, - }) - - def test_check_full_args(self): - cmd, cmdkwargs = parse_args('cg', [ - 'check', - '--ignored', 'spam.tsv', - '--known', 'eggs.tsv', - 'dir1', - 'dir2', - 'dir3', - ]) - - self.assertEqual(cmd, 'check') - self.assertEqual(cmdkwargs, { - 'ignored': 'spam.tsv', - 'known': 'eggs.tsv', - 'dirs': ['dir1', 'dir2', 'dir3'] - }) - - def test_show_no_args(self): - cmd, cmdkwargs = parse_args('cg', [ - 'show', - ]) - - self.assertEqual(cmd, 'show') - self.assertEqual(cmdkwargs, { - 'ignored': IGNORED_FILE, - 'known': KNOWN_FILE, - 'dirs': SOURCE_DIRS, - 'skip_objects': False, - }) - - def test_show_full_args(self): - cmd, cmdkwargs = parse_args('cg', [ - 'show', - '--ignored', 'spam.tsv', - '--known', 'eggs.tsv', - 'dir1', - 'dir2', - 'dir3', - ]) - - self.assertEqual(cmd, 'show') - self.assertEqual(cmdkwargs, { - 'ignored': 'spam.tsv', - 'known': 'eggs.tsv', - 'dirs': ['dir1', 'dir2', 'dir3'], - 'skip_objects': False, - }) - - -def new_stub_commands(*names): - calls = [] - def cmdfunc(cmd, **kwargs): - calls.append((cmd, kwargs)) - commands = {name: cmdfunc for name in names} - return commands, calls - - -class MainTests(unittest.TestCase): - - def test_no_command(self): - with self.assertRaises(ValueError): - main(None, {}) - - def test_check(self): - commands, calls = new_stub_commands('check', 'show') - - cmdkwargs = { - 'ignored': 'spam.tsv', - 'known': 'eggs.tsv', - 'dirs': ['dir1', 'dir2', 'dir3'], - } - main('check', cmdkwargs, _COMMANDS=commands) - - self.assertEqual(calls, [ - ('check', cmdkwargs), - ]) - - def test_show(self): - commands, calls = new_stub_commands('check', 'show') - - cmdkwargs = { - 'ignored': 'spam.tsv', - 'known': 'eggs.tsv', - 'dirs': ['dir1', 'dir2', 'dir3'], - } - main('show', cmdkwargs, _COMMANDS=commands) - - self.assertEqual(calls, [ - ('show', cmdkwargs), - ]) diff --git a/Lib/test/test_tools/test_c_analyzer/test_c_globals/test_find.py b/Lib/test/test_tools/test_c_analyzer/test_c_globals/test_find.py deleted file mode 100644 index 8288992..0000000 --- a/Lib/test/test_tools/test_c_analyzer/test_c_globals/test_find.py +++ /dev/null @@ -1,335 +0,0 @@ -import unittest - -from .. import tool_imports_for_tests -with tool_imports_for_tests(): - from c_parser import info - from c_globals.find import globals_from_binary, globals - - -class _Base(unittest.TestCase): - - maxDiff = None - - @property - def calls(self): - try: - return self._calls - except AttributeError: - self._calls = [] - return self._calls - - -class StaticsFromBinaryTests(_Base): - - _return_iter_symbols = () - _return_resolve_symbols = () - _return_get_symbol_resolver = None - - def setUp(self): - super().setUp() - - self.kwargs = dict( - _iter_symbols=self._iter_symbols, - _resolve=self._resolve_symbols, - _get_symbol_resolver=self._get_symbol_resolver, - ) - - def _iter_symbols(self, binfile, find_local_symbol): - self.calls.append(('_iter_symbols', (binfile, find_local_symbol))) - return self._return_iter_symbols - - def _resolve_symbols(self, symbols, resolve): - self.calls.append(('_resolve_symbols', (symbols, resolve,))) - return self._return_resolve_symbols - - def _get_symbol_resolver(self, knownvars, dirnames=None): - self.calls.append(('_get_symbol_resolver', (knownvars, dirnames))) - return self._return_get_symbol_resolver - - def test_typical(self): - symbols = self._return_iter_symbols = () - resolver = self._return_get_symbol_resolver = object() - variables = self._return_resolve_symbols = [ - info.Variable.from_parts('dir1/spam.c', None, 'var1', 'int'), - info.Variable.from_parts('dir1/spam.c', None, 'var2', 'static int'), - info.Variable.from_parts('dir1/spam.c', None, 'var3', 'char *'), - info.Variable.from_parts('dir1/spam.c', 'func2', 'var4', 'const char *'), - info.Variable.from_parts('dir1/eggs.c', None, 'var1', 'static int'), - info.Variable.from_parts('dir1/eggs.c', 'func1', 'var2', 'static char *'), - ] - knownvars = object() - - found = list(globals_from_binary('python', - knownvars=knownvars, - **self.kwargs)) - - self.assertEqual(found, [ - info.Variable.from_parts('dir1/spam.c', None, 'var1', 'int'), - info.Variable.from_parts('dir1/spam.c', None, 'var2', 'static int'), - info.Variable.from_parts('dir1/spam.c', None, 'var3', 'char *'), - info.Variable.from_parts('dir1/eggs.c', None, 'var1', 'static int'), - info.Variable.from_parts('dir1/eggs.c', 'func1', 'var2', 'static char *'), - ]) - self.assertEqual(self.calls, [ - ('_iter_symbols', ('python', None)), - ('_get_symbol_resolver', (knownvars, None)), - ('_resolve_symbols', (symbols, resolver)), - ]) - -# self._return_iter_symbols = [ -# s_info.Symbol(('dir1/spam.c', None, 'var1'), 'variable', False), -# s_info.Symbol(('dir1/spam.c', None, 'var2'), 'variable', False), -# s_info.Symbol(('dir1/spam.c', None, 'func1'), 'function', False), -# s_info.Symbol(('dir1/spam.c', None, 'func2'), 'function', True), -# s_info.Symbol(('dir1/spam.c', None, 'var3'), 'variable', False), -# s_info.Symbol(('dir1/spam.c', 'func2', 'var4'), 'variable', False), -# s_info.Symbol(('dir1/ham.c', None, 'var1'), 'variable', True), -# s_info.Symbol(('dir1/eggs.c', None, 'var1'), 'variable', False), -# s_info.Symbol(('dir1/eggs.c', None, 'xyz'), 'other', False), -# s_info.Symbol(('dir1/eggs.c', '???', 'var2'), 'variable', False), -# s_info.Symbol(('???', None, 'var_x'), 'variable', False), -# s_info.Symbol(('???', '???', 'var_y'), 'variable', False), -# s_info.Symbol((None, None, '???'), 'other', False), -# ] -# known = object() -# -# globals_from_binary('python', knownvars=known, **this.kwargs) -# found = list(globals_from_symbols(['dir1'], self.iter_symbols)) -# -# self.assertEqual(found, [ -# info.Variable.from_parts('dir1/spam.c', None, 'var1', '???'), -# info.Variable.from_parts('dir1/spam.c', None, 'var2', '???'), -# info.Variable.from_parts('dir1/spam.c', None, 'var3', '???'), -# info.Variable.from_parts('dir1/spam.c', 'func2', 'var4', '???'), -# info.Variable.from_parts('dir1/eggs.c', None, 'var1', '???'), -# ]) -# self.assertEqual(self.calls, [ -# ('iter_symbols', (['dir1'],)), -# ]) -# -# def test_no_symbols(self): -# self._return_iter_symbols = [] -# -# found = list(globals_from_symbols(['dir1'], self.iter_symbols)) -# -# self.assertEqual(found, []) -# self.assertEqual(self.calls, [ -# ('iter_symbols', (['dir1'],)), -# ]) - - # XXX need functional test - - -#class StaticFromDeclarationsTests(_Base): -# -# _return_iter_declarations = () -# -# def iter_declarations(self, dirnames): -# self.calls.append(('iter_declarations', (dirnames,))) -# return iter(self._return_iter_declarations) -# -# def test_typical(self): -# self._return_iter_declarations = [ -# None, -# info.Variable.from_parts('dir1/spam.c', None, 'var1', '???'), -# object(), -# info.Variable.from_parts('dir1/spam.c', None, 'var2', '???'), -# info.Variable.from_parts('dir1/spam.c', None, 'var3', '???'), -# object(), -# info.Variable.from_parts('dir1/spam.c', 'func2', 'var4', '???'), -# object(), -# info.Variable.from_parts('dir1/eggs.c', None, 'var1', '???'), -# object(), -# ] -# -# found = list(globals_from_declarations(['dir1'], self.iter_declarations)) -# -# self.assertEqual(found, [ -# info.Variable.from_parts('dir1/spam.c', None, 'var1', '???'), -# info.Variable.from_parts('dir1/spam.c', None, 'var2', '???'), -# info.Variable.from_parts('dir1/spam.c', None, 'var3', '???'), -# info.Variable.from_parts('dir1/spam.c', 'func2', 'var4', '???'), -# info.Variable.from_parts('dir1/eggs.c', None, 'var1', '???'), -# ]) -# self.assertEqual(self.calls, [ -# ('iter_declarations', (['dir1'],)), -# ]) -# -# def test_no_declarations(self): -# self._return_iter_declarations = [] -# -# found = list(globals_from_declarations(['dir1'], self.iter_declarations)) -# -# self.assertEqual(found, []) -# self.assertEqual(self.calls, [ -# ('iter_declarations', (['dir1'],)), -# ]) - - -#class IterVariablesTests(_Base): -# -# _return_from_symbols = () -# _return_from_declarations = () -# -# def _from_symbols(self, dirnames, iter_symbols): -# self.calls.append(('_from_symbols', (dirnames, iter_symbols))) -# return iter(self._return_from_symbols) -# -# def _from_declarations(self, dirnames, iter_declarations): -# self.calls.append(('_from_declarations', (dirnames, iter_declarations))) -# return iter(self._return_from_declarations) -# -# def test_typical(self): -# expected = [ -# info.Variable.from_parts('dir1/spam.c', None, 'var1', '???'), -# info.Variable.from_parts('dir1/spam.c', None, 'var2', '???'), -# info.Variable.from_parts('dir1/spam.c', None, 'var3', '???'), -# info.Variable.from_parts('dir1/spam.c', 'func2', 'var4', '???'), -# info.Variable.from_parts('dir1/eggs.c', None, 'var1', '???'), -# ] -# self._return_from_symbols = expected -# -# found = list(iter_variables(['dir1'], -# _from_symbols=self._from_symbols, -# _from_declarations=self._from_declarations)) -# -# self.assertEqual(found, expected) -# self.assertEqual(self.calls, [ -# ('_from_symbols', (['dir1'], b_symbols.iter_symbols)), -# ]) -# -# def test_no_symbols(self): -# self._return_from_symbols = [] -# -# found = list(iter_variables(['dir1'], -# _from_symbols=self._from_symbols, -# _from_declarations=self._from_declarations)) -# -# self.assertEqual(found, []) -# self.assertEqual(self.calls, [ -# ('_from_symbols', (['dir1'], b_symbols.iter_symbols)), -# ]) -# -# def test_from_binary(self): -# expected = [ -# info.Variable.from_parts('dir1/spam.c', None, 'var1', '???'), -# info.Variable.from_parts('dir1/spam.c', None, 'var2', '???'), -# info.Variable.from_parts('dir1/spam.c', None, 'var3', '???'), -# info.Variable.from_parts('dir1/spam.c', 'func2', 'var4', '???'), -# info.Variable.from_parts('dir1/eggs.c', None, 'var1', '???'), -# ] -# self._return_from_symbols = expected -# -# found = list(iter_variables(['dir1'], 'platform', -# _from_symbols=self._from_symbols, -# _from_declarations=self._from_declarations)) -# -# self.assertEqual(found, expected) -# self.assertEqual(self.calls, [ -# ('_from_symbols', (['dir1'], b_symbols.iter_symbols)), -# ]) -# -# def test_from_symbols(self): -# expected = [ -# info.Variable.from_parts('dir1/spam.c', None, 'var1', '???'), -# info.Variable.from_parts('dir1/spam.c', None, 'var2', '???'), -# info.Variable.from_parts('dir1/spam.c', None, 'var3', '???'), -# info.Variable.from_parts('dir1/spam.c', 'func2', 'var4', '???'), -# info.Variable.from_parts('dir1/eggs.c', None, 'var1', '???'), -# ] -# self._return_from_symbols = expected -# -# found = list(iter_variables(['dir1'], 'symbols', -# _from_symbols=self._from_symbols, -# _from_declarations=self._from_declarations)) -# -# self.assertEqual(found, expected) -# self.assertEqual(self.calls, [ -# ('_from_symbols', (['dir1'], s_symbols.iter_symbols)), -# ]) -# -# def test_from_declarations(self): -# expected = [ -# info.Variable.from_parts('dir1/spam.c', None, 'var1', '???'), -# info.Variable.from_parts('dir1/spam.c', None, 'var2', '???'), -# info.Variable.from_parts('dir1/spam.c', None, 'var3', '???'), -# info.Variable.from_parts('dir1/spam.c', 'func2', 'var4', '???'), -# info.Variable.from_parts('dir1/eggs.c', None, 'var1', '???'), -# ] -# self._return_from_declarations = expected -# -# found = list(iter_variables(['dir1'], 'declarations', -# _from_symbols=self._from_symbols, -# _from_declarations=self._from_declarations)) -# -# self.assertEqual(found, expected) -# self.assertEqual(self.calls, [ -# ('_from_declarations', (['dir1'], declarations.iter_all)), -# ]) -# -# def test_from_preprocessed(self): -# expected = [ -# info.Variable.from_parts('dir1/spam.c', None, 'var1', '???'), -# info.Variable.from_parts('dir1/spam.c', None, 'var2', '???'), -# info.Variable.from_parts('dir1/spam.c', None, 'var3', '???'), -# info.Variable.from_parts('dir1/spam.c', 'func2', 'var4', '???'), -# info.Variable.from_parts('dir1/eggs.c', None, 'var1', '???'), -# ] -# self._return_from_declarations = expected -# -# found = list(iter_variables(['dir1'], 'preprocessed', -# _from_symbols=self._from_symbols, -# _from_declarations=self._from_declarations)) -# -# self.assertEqual(found, expected) -# self.assertEqual(self.calls, [ -# ('_from_declarations', (['dir1'], declarations.iter_preprocessed)), -# ]) - - -class StaticsTest(_Base): - - _return_iter_variables = None - - def _iter_variables(self, kind, *, known, dirnames): - self.calls.append( - ('_iter_variables', (kind, known, dirnames))) - return iter(self._return_iter_variables or ()) - - def test_typical(self): - self._return_iter_variables = [ - info.Variable.from_parts('src1/spam.c', None, 'var1', 'static const char *'), - info.Variable.from_parts('src1/spam.c', None, 'var1b', 'const char *'), - info.Variable.from_parts('src1/spam.c', 'ham', 'initialized', 'static int'), - info.Variable.from_parts('src1/spam.c', 'ham', 'result', 'int'), # skipped - info.Variable.from_parts('src1/spam.c', None, 'var2', 'static PyObject *'), - info.Variable.from_parts('src1/eggs.c', 'tofu', 'ready', 'static int'), - info.Variable.from_parts('src1/spam.c', None, 'freelist', 'static (PyTupleObject *)[10]'), - info.Variable.from_parts('src1/sub/ham.c', None, 'var1', 'static const char const *'), - info.Variable.from_parts('src2/jam.c', None, 'var1', 'static int'), - info.Variable.from_parts('src2/jam.c', None, 'var2', 'static MyObject *'), - info.Variable.from_parts('Include/spam.h', None, 'data', 'static const int'), - ] - dirnames = object() - known = object() - - found = list(globals(dirnames, known, - kind='platform', - _iter_variables=self._iter_variables, - )) - - self.assertEqual(found, [ - info.Variable.from_parts('src1/spam.c', None, 'var1', 'static const char *'), - info.Variable.from_parts('src1/spam.c', None, 'var1b', 'const char *'), - info.Variable.from_parts('src1/spam.c', 'ham', 'initialized', 'static int'), - info.Variable.from_parts('src1/spam.c', None, 'var2', 'static PyObject *'), - info.Variable.from_parts('src1/eggs.c', 'tofu', 'ready', 'static int'), - info.Variable.from_parts('src1/spam.c', None, 'freelist', 'static (PyTupleObject *)[10]'), - info.Variable.from_parts('src1/sub/ham.c', None, 'var1', 'static const char const *'), - info.Variable.from_parts('src2/jam.c', None, 'var1', 'static int'), - info.Variable.from_parts('src2/jam.c', None, 'var2', 'static MyObject *'), - info.Variable.from_parts('Include/spam.h', None, 'data', 'static const int'), - ]) - self.assertEqual(self.calls, [ - ('_iter_variables', ('platform', known, dirnames)), - ]) diff --git a/Lib/test/test_tools/test_c_analyzer/test_c_globals/test_functional.py b/Lib/test/test_tools/test_c_analyzer/test_c_globals/test_functional.py deleted file mode 100644 index 9279790..0000000 --- a/Lib/test/test_tools/test_c_analyzer/test_c_globals/test_functional.py +++ /dev/null @@ -1,34 +0,0 @@ -import unittest - -from .. import tool_imports_for_tests -with tool_imports_for_tests(): - pass - - -class SelfCheckTests(unittest.TestCase): - - @unittest.expectedFailure - def test_known(self): - # Make sure known macros & vartypes aren't hiding unknown local types. - # XXX finish! - raise NotImplementedError - - @unittest.expectedFailure - def test_compare_nm_results(self): - # Make sure the "show" results match the statics found by "nm" command. - # XXX Skip if "nm" is not available. - # XXX finish! - raise NotImplementedError - - -class DummySourceTests(unittest.TestCase): - - @unittest.expectedFailure - def test_check(self): - # XXX finish! - raise NotImplementedError - - @unittest.expectedFailure - def test_show(self): - # XXX finish! - raise NotImplementedError diff --git a/Lib/test/test_tools/test_c_analyzer/test_c_globals/test_show.py b/Lib/test/test_tools/test_c_analyzer/test_c_globals/test_show.py deleted file mode 100644 index ce1dad8..0000000 --- a/Lib/test/test_tools/test_c_analyzer/test_c_globals/test_show.py +++ /dev/null @@ -1,52 +0,0 @@ -import unittest - -from .. import tool_imports_for_tests -with tool_imports_for_tests(): - from c_parser import info - from c_globals.show import basic - - -TYPICAL = [ - info.Variable.from_parts('src1/spam.c', None, 'var1', 'static const char *'), - info.Variable.from_parts('src1/spam.c', 'ham', 'initialized', 'static int'), - info.Variable.from_parts('src1/spam.c', None, 'var2', 'static PyObject *'), - info.Variable.from_parts('src1/eggs.c', 'tofu', 'ready', 'static int'), - info.Variable.from_parts('src1/spam.c', None, 'freelist', 'static (PyTupleObject *)[10]'), - info.Variable.from_parts('src1/sub/ham.c', None, 'var1', 'static const char const *'), - info.Variable.from_parts('src2/jam.c', None, 'var1', 'static int'), - info.Variable.from_parts('src2/jam.c', None, 'var2', 'static MyObject *'), - info.Variable.from_parts('Include/spam.h', None, 'data', 'static const int'), - ] - - -class BasicTests(unittest.TestCase): - - maxDiff = None - - def setUp(self): - self.lines = [] - - def print(self, line): - self.lines.append(line) - - def test_typical(self): - basic(TYPICAL, - _print=self.print) - - self.assertEqual(self.lines, [ - 'src1/spam.c:var1 static const char *', - 'src1/spam.c:ham():initialized static int', - 'src1/spam.c:var2 static PyObject *', - 'src1/eggs.c:tofu():ready static int', - 'src1/spam.c:freelist static (PyTupleObject *)[10]', - 'src1/sub/ham.c:var1 static const char const *', - 'src2/jam.c:var1 static int', - 'src2/jam.c:var2 static MyObject *', - 'Include/spam.h:data static const int', - ]) - - def test_no_rows(self): - basic([], - _print=self.print) - - self.assertEqual(self.lines, []) diff --git a/Lib/test/test_tools/test_c_analyzer/test_c_globals/test_supported.py b/Lib/test/test_tools/test_c_analyzer/test_c_globals/test_supported.py deleted file mode 100644 index 1e7d40e..0000000 --- a/Lib/test/test_tools/test_c_analyzer/test_c_globals/test_supported.py +++ /dev/null @@ -1,96 +0,0 @@ -import re -import textwrap -import unittest - -from .. import tool_imports_for_tests -with tool_imports_for_tests(): - from c_analyzer_common.info import ID - from c_parser import info - from c_globals.supported import is_supported, ignored_from_file - - -class IsSupportedTests(unittest.TestCase): - - @unittest.expectedFailure - def test_supported(self): - statics = [ - info.StaticVar('src1/spam.c', None, 'var1', 'const char *'), - info.StaticVar('src1/spam.c', None, 'var1', 'int'), - ] - for static in statics: - with self.subTest(static): - result = is_supported(static) - - self.assertTrue(result) - - @unittest.expectedFailure - def test_not_supported(self): - statics = [ - info.StaticVar('src1/spam.c', None, 'var1', 'PyObject *'), - info.StaticVar('src1/spam.c', None, 'var1', 'PyObject[10]'), - ] - for static in statics: - with self.subTest(static): - result = is_supported(static) - - self.assertFalse(result) - - -class IgnoredFromFileTests(unittest.TestCase): - - maxDiff = None - - _return_read_tsv = () - - @property - def calls(self): - try: - return self._calls - except AttributeError: - self._calls = [] - return self._calls - - def _read_tsv(self, *args): - self.calls.append(('_read_tsv', args)) - return self._return_read_tsv - - def test_typical(self): - lines = textwrap.dedent(''' - filename funcname name kind reason - file1.c - var1 variable ... - file1.c func1 local1 variable | - file1.c - var2 variable ??? - file1.c func2 local2 variable | - file2.c - var1 variable reasons - ''').strip().splitlines() - lines = [re.sub(r'\s{1,8}', '\t', line, 4).replace('|', '') - for line in lines] - self._return_read_tsv = [tuple(v.strip() for v in line.split('\t')) - for line in lines[1:]] - - ignored = ignored_from_file('spam.c', _read_tsv=self._read_tsv) - - self.assertEqual(ignored, { - 'variables': { - ID('file1.c', '', 'var1'): '...', - ID('file1.c', 'func1', 'local1'): '', - ID('file1.c', '', 'var2'): '???', - ID('file1.c', 'func2', 'local2'): '', - ID('file2.c', '', 'var1'): 'reasons', - }, - }) - self.assertEqual(self.calls, [ - ('_read_tsv', ('spam.c', 'filename\tfuncname\tname\tkind\treason')), - ]) - - def test_empty(self): - self._return_read_tsv = [] - - ignored = ignored_from_file('spam.c', _read_tsv=self._read_tsv) - - self.assertEqual(ignored, { - 'variables': {}, - }) - self.assertEqual(self.calls, [ - ('_read_tsv', ('spam.c', 'filename\tfuncname\tname\tkind\treason')), - ]) diff --git a/Lib/test/test_tools/test_c_analyzer/test_c_parser/__init__.py b/Lib/test/test_tools/test_c_analyzer/test_c_parser/__init__.py deleted file mode 100644 index bc502ef..0000000 --- a/Lib/test/test_tools/test_c_analyzer/test_c_parser/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -import os.path -from test.support import load_package_tests - - -def load_tests(*args): - return load_package_tests(os.path.dirname(__file__), *args) diff --git a/Lib/test/test_tools/test_c_analyzer/test_c_parser/test_declarations.py b/Lib/test/test_tools/test_c_analyzer/test_c_parser/test_declarations.py deleted file mode 100644 index b68744e..0000000 --- a/Lib/test/test_tools/test_c_analyzer/test_c_parser/test_declarations.py +++ /dev/null @@ -1,795 +0,0 @@ -import textwrap -import unittest - -from .. import tool_imports_for_tests -with tool_imports_for_tests(): - from c_parser.declarations import ( - iter_global_declarations, iter_local_statements, - parse_func, parse_var, parse_compound, - iter_variables, - ) - - -class TestCaseBase(unittest.TestCase): - - maxDiff = None - - @property - def calls(self): - try: - return self._calls - except AttributeError: - self._calls = [] - return self._calls - - -class IterGlobalDeclarationsTests(TestCaseBase): - - def test_functions(self): - tests = [ - (textwrap.dedent(''' - void func1() { - return; - } - '''), - textwrap.dedent(''' - void func1() { - return; - } - ''').strip(), - ), - (textwrap.dedent(''' - static unsigned int * _func1( - const char *arg1, - int *arg2 - long long arg3 - ) - { - return _do_something(arg1, arg2, arg3); - } - '''), - textwrap.dedent(''' - static unsigned int * _func1( const char *arg1, int *arg2 long long arg3 ) { - return _do_something(arg1, arg2, arg3); - } - ''').strip(), - ), - (textwrap.dedent(''' - static PyObject * - _func1(const char *arg1, PyObject *arg2) - { - static int initialized = 0; - if (!initialized) { - initialized = 1; - _init(arg1); - } - - PyObject *result = _do_something(arg1, arg2); - Py_INCREF(result); - return result; - } - '''), - textwrap.dedent(''' - static PyObject * _func1(const char *arg1, PyObject *arg2) { - static int initialized = 0; - if (!initialized) { - initialized = 1; - _init(arg1); - } - PyObject *result = _do_something(arg1, arg2); - Py_INCREF(result); - return result; - } - ''').strip(), - ), - ] - for lines, expected in tests: - body = textwrap.dedent( - expected.partition('{')[2].rpartition('}')[0] - ).strip() - expected = (expected, body) - with self.subTest(lines): - lines = lines.splitlines() - - stmts = list(iter_global_declarations(lines)) - - self.assertEqual(stmts, [expected]) - - @unittest.expectedFailure - def test_declarations(self): - tests = [ - 'int spam;', - 'long long spam;', - 'static const int const *spam;', - 'int spam;', - 'typedef int myint;', - 'typedef PyObject * (*unaryfunc)(PyObject *);', - # typedef struct - # inline struct - # enum - # inline enum - ] - for text in tests: - expected = (text, - ' '.join(l.strip() for l in text.splitlines())) - with self.subTest(lines): - lines = lines.splitlines() - - stmts = list(iter_global_declarations(lines)) - - self.assertEqual(stmts, [expected]) - - @unittest.expectedFailure - def test_declaration_multiple_vars(self): - lines = ['static const int const *spam, *ham=NULL, eggs = 3;'] - - stmts = list(iter_global_declarations(lines)) - - self.assertEqual(stmts, [ - ('static const int const *spam;', None), - ('static const int *ham=NULL;', None), - ('static const int eggs = 3;', None), - ]) - - def test_mixed(self): - lines = textwrap.dedent(''' - int spam; - static const char const *eggs; - - PyObject * start(void) { - static int initialized = 0; - if (initialized) { - initialized = 1; - init(); - } - return _start(); - } - - char* ham; - - static int stop(char *reason) { - ham = reason; - return _stop(); - } - ''').splitlines() - expected = [ - (textwrap.dedent(''' - PyObject * start(void) { - static int initialized = 0; - if (initialized) { - initialized = 1; - init(); - } - return _start(); - } - ''').strip(), - textwrap.dedent(''' - static int initialized = 0; - if (initialized) { - initialized = 1; - init(); - } - return _start(); - ''').strip(), - ), - (textwrap.dedent(''' - static int stop(char *reason) { - ham = reason; - return _stop(); - } - ''').strip(), - textwrap.dedent(''' - ham = reason; - return _stop(); - ''').strip(), - ), - ] - - stmts = list(iter_global_declarations(lines)) - - self.assertEqual(stmts, expected) - #self.assertEqual([stmt for stmt, _ in stmts], - # [stmt for stmt, _ in expected]) - #self.assertEqual([body for _, body in stmts], - # [body for _, body in expected]) - - def test_no_statements(self): - lines = [] - - stmts = list(iter_global_declarations(lines)) - - self.assertEqual(stmts, []) - - def test_bogus(self): - tests = [ - (textwrap.dedent(''' - int spam; - static const char const *eggs; - - PyObject * start(void) { - static int initialized = 0; - if (initialized) { - initialized = 1; - init(); - } - return _start(); - } - - char* ham; - - static int _stop(void) { - // missing closing bracket - - static int stop(char *reason) { - ham = reason; - return _stop(); - } - '''), - [(textwrap.dedent(''' - PyObject * start(void) { - static int initialized = 0; - if (initialized) { - initialized = 1; - init(); - } - return _start(); - } - ''').strip(), - textwrap.dedent(''' - static int initialized = 0; - if (initialized) { - initialized = 1; - init(); - } - return _start(); - ''').strip(), - ), - # Neither "stop()" nor "_stop()" are here. - ], - ), - ] - for lines, expected in tests: - with self.subTest(lines): - lines = lines.splitlines() - - stmts = list(iter_global_declarations(lines)) - - self.assertEqual(stmts, expected) - #self.assertEqual([stmt for stmt, _ in stmts], - # [stmt for stmt, _ in expected]) - #self.assertEqual([body for _, body in stmts], - # [body for _, body in expected]) - - def test_ignore_comments(self): - tests = [ - ('// msg', None), - ('// int stmt;', None), - (' // ... ', None), - ('// /*', None), - ('/* int stmt; */', None), - (""" - /** - * ... - * int stmt; - */ - """, None), - ] - for lines, expected in tests: - with self.subTest(lines): - lines = lines.splitlines() - - stmts = list(iter_global_declarations(lines)) - - self.assertEqual(stmts, [expected] if expected else []) - - -class IterLocalStatementsTests(TestCaseBase): - - def test_vars(self): - tests = [ - # POTS - 'int spam;', - 'unsigned int spam;', - 'char spam;', - 'float spam;', - - # typedefs - 'uint spam;', - 'MyType spam;', - - # complex - 'struct myspam spam;', - 'union choice spam;', - # inline struct - # inline union - # enum? - ] - # pointers - tests.extend([ - # POTS - 'int * spam;', - 'unsigned int * spam;', - 'char *spam;', - 'char const *spam = "spamspamspam...";', - # typedefs - 'MyType *spam;', - # complex - 'struct myspam *spam;', - 'union choice *spam;', - # packed with details - 'const char const *spam;', - # void pointer - 'void *data = NULL;', - # function pointers - 'int (* func)(char *arg1);', - 'char * (* func)(void);', - ]) - # storage class - tests.extend([ - 'static int spam;', - 'extern int spam;', - 'static unsigned int spam;', - 'static struct myspam spam;', - ]) - # type qualifier - tests.extend([ - 'const int spam;', - 'const unsigned int spam;', - 'const struct myspam spam;', - ]) - # combined - tests.extend([ - 'const char *spam = eggs;', - 'static const char const *spam = "spamspamspam...";', - 'extern const char const *spam;', - 'static void *data = NULL;', - 'static int (const * func)(char *arg1) = func1;', - 'static char * (* func)(void);', - ]) - for line in tests: - expected = line - with self.subTest(line): - stmts = list(iter_local_statements([line])) - - self.assertEqual(stmts, [(expected, None)]) - - @unittest.expectedFailure - def test_vars_multiline_var(self): - lines = textwrap.dedent(''' - PyObject * - spam - = NULL; - ''').splitlines() - expected = 'PyObject * spam = NULL;' - - stmts = list(iter_local_statements(lines)) - - self.assertEqual(stmts, [(expected, None)]) - - @unittest.expectedFailure - def test_declaration_multiple_vars(self): - lines = ['static const int const *spam, *ham=NULL, ham2[]={1, 2, 3}, ham3[2]={1, 2}, eggs = 3;'] - - stmts = list(iter_global_declarations(lines)) - - self.assertEqual(stmts, [ - ('static const int const *spam;', None), - ('static const int *ham=NULL;', None), - ('static const int ham[]={1, 2, 3};', None), - ('static const int ham[2]={1, 2};', None), - ('static const int eggs = 3;', None), - ]) - - @unittest.expectedFailure - def test_other_simple(self): - raise NotImplementedError - - @unittest.expectedFailure - def test_compound(self): - raise NotImplementedError - - @unittest.expectedFailure - def test_mixed(self): - raise NotImplementedError - - def test_no_statements(self): - lines = [] - - stmts = list(iter_local_statements(lines)) - - self.assertEqual(stmts, []) - - @unittest.expectedFailure - def test_bogus(self): - raise NotImplementedError - - def test_ignore_comments(self): - tests = [ - ('// msg', None), - ('// int stmt;', None), - (' // ... ', None), - ('// /*', None), - ('/* int stmt; */', None), - (""" - /** - * ... - * int stmt; - */ - """, None), - # mixed with statements - ('int stmt; // ...', ('int stmt;', None)), - ( 'int stmt; /* ... */', ('int stmt;', None)), - ( '/* ... */ int stmt;', ('int stmt;', None)), - ] - for lines, expected in tests: - with self.subTest(lines): - lines = lines.splitlines() - - stmts = list(iter_local_statements(lines)) - - self.assertEqual(stmts, [expected] if expected else []) - - -class ParseFuncTests(TestCaseBase): - - def test_typical(self): - tests = [ - ('PyObject *\nspam(char *a)\n{\nreturn _spam(a);\n}', - 'return _spam(a);', - ('spam', 'PyObject * spam(char *a)'), - ), - ] - for stmt, body, expected in tests: - with self.subTest(stmt): - name, signature = parse_func(stmt, body) - - self.assertEqual((name, signature), expected) - - -class ParseVarTests(TestCaseBase): - - def test_typical(self): - tests = [ - # POTS - ('int spam;', ('spam', 'int')), - ('unsigned int spam;', ('spam', 'unsigned int')), - ('char spam;', ('spam', 'char')), - ('float spam;', ('spam', 'float')), - - # typedefs - ('uint spam;', ('spam', 'uint')), - ('MyType spam;', ('spam', 'MyType')), - - # complex - ('struct myspam spam;', ('spam', 'struct myspam')), - ('union choice spam;', ('spam', 'union choice')), - # inline struct - # inline union - # enum? - ] - # pointers - tests.extend([ - # POTS - ('int * spam;', ('spam', 'int *')), - ('unsigned int * spam;', ('spam', 'unsigned int *')), - ('char *spam;', ('spam', 'char *')), - ('char const *spam = "spamspamspam...";', ('spam', 'char const *')), - # typedefs - ('MyType *spam;', ('spam', 'MyType *')), - # complex - ('struct myspam *spam;', ('spam', 'struct myspam *')), - ('union choice *spam;', ('spam', 'union choice *')), - # packed with details - ('const char const *spam;', ('spam', 'const char const *')), - # void pointer - ('void *data = NULL;', ('data', 'void *')), - # function pointers - ('int (* func)(char *);', ('func', 'int (*)(char *)')), - ('char * (* func)(void);', ('func', 'char * (*)(void)')), - ]) - # storage class - tests.extend([ - ('static int spam;', ('spam', 'static int')), - ('extern int spam;', ('spam', 'extern int')), - ('static unsigned int spam;', ('spam', 'static unsigned int')), - ('static struct myspam spam;', ('spam', 'static struct myspam')), - ]) - # type qualifier - tests.extend([ - ('const int spam;', ('spam', 'const int')), - ('const unsigned int spam;', ('spam', 'const unsigned int')), - ('const struct myspam spam;', ('spam', 'const struct myspam')), - ]) - # combined - tests.extend([ - ('const char *spam = eggs;', ('spam', 'const char *')), - ('static const char const *spam = "spamspamspam...";', - ('spam', 'static const char const *')), - ('extern const char const *spam;', - ('spam', 'extern const char const *')), - ('static void *data = NULL;', ('data', 'static void *')), - ('static int (const * func)(char *) = func1;', - ('func', 'static int (const *)(char *)')), - ('static char * (* func)(void);', - ('func', 'static char * (*)(void)')), - ]) - for stmt, expected in tests: - with self.subTest(stmt): - name, vartype = parse_var(stmt) - - self.assertEqual((name, vartype), expected) - - -@unittest.skip('not finished') -class ParseCompoundTests(TestCaseBase): - - def test_typical(self): - headers, bodies = parse_compound(stmt, blocks) - ... - - -class IterVariablesTests(TestCaseBase): - - _return_iter_source_lines = None - _return_iter_global = None - _return_iter_local = None - _return_parse_func = None - _return_parse_var = None - _return_parse_compound = None - - def _iter_source_lines(self, filename): - self.calls.append( - ('_iter_source_lines', (filename,))) - return self._return_iter_source_lines.splitlines() - - def _iter_global(self, lines): - self.calls.append( - ('_iter_global', (lines,))) - try: - return self._return_iter_global.pop(0) - except IndexError: - return ('???', None) - - def _iter_local(self, lines): - self.calls.append( - ('_iter_local', (lines,))) - try: - return self._return_iter_local.pop(0) - except IndexError: - return ('???', None) - - def _parse_func(self, stmt, body): - self.calls.append( - ('_parse_func', (stmt, body))) - try: - return self._return_parse_func.pop(0) - except IndexError: - return ('???', '???') - - def _parse_var(self, lines): - self.calls.append( - ('_parse_var', (lines,))) - try: - return self._return_parse_var.pop(0) - except IndexError: - return ('???', '???') - - def _parse_compound(self, stmt, blocks): - self.calls.append( - ('_parse_compound', (stmt, blocks))) - try: - return self._return_parse_compound.pop(0) - except IndexError: - return (['???'], ['???']) - - def test_empty_file(self): - self._return_iter_source_lines = '' - self._return_iter_global = [ - [], - ] - self._return_parse_func = None - self._return_parse_var = None - self._return_parse_compound = None - - srcvars = list(iter_variables('spam.c', - _iter_source_lines=self._iter_source_lines, - _iter_global=self._iter_global, - _iter_local=self._iter_local, - _parse_func=self._parse_func, - _parse_var=self._parse_var, - _parse_compound=self._parse_compound, - )) - - self.assertEqual(srcvars, []) - self.assertEqual(self.calls, [ - ('_iter_source_lines', ('spam.c',)), - ('_iter_global', ([],)), - ]) - - def test_no_statements(self): - content = textwrap.dedent(''' - ... - ''') - self._return_iter_source_lines = content - self._return_iter_global = [ - [], - ] - self._return_parse_func = None - self._return_parse_var = None - self._return_parse_compound = None - - srcvars = list(iter_variables('spam.c', - _iter_source_lines=self._iter_source_lines, - _iter_global=self._iter_global, - _iter_local=self._iter_local, - _parse_func=self._parse_func, - _parse_var=self._parse_var, - _parse_compound=self._parse_compound, - )) - - self.assertEqual(srcvars, []) - self.assertEqual(self.calls, [ - ('_iter_source_lines', ('spam.c',)), - ('_iter_global', (content.splitlines(),)), - ]) - - def test_typical(self): - content = textwrap.dedent(''' - ... - ''') - self._return_iter_source_lines = content - self._return_iter_global = [ - [('', None), # var1 - ('', None), # non-var - ('', None), # var2 - ('', ''), # func1 - ('', None), # var4 - ], - ] - self._return_iter_local = [ - # func1 - [('', None), # var3 - ('', [('
', '')]), # if - ('', None), # non-var - ], - # if - [('', None), # var2 ("collision" with global var) - ], - ] - self._return_parse_func = [ - ('func1', ''), - ] - self._return_parse_var = [ - ('var1', ''), - (None, None), - ('var2', ''), - ('var3', ''), - ('var2', ''), - ('var4', ''), - (None, None), - (None, None), - (None, None), - ('var5', ''), - ] - self._return_parse_compound = [ - ([[ - 'if (', - '', - ')', - ], - ], - ['']), - ] - - srcvars = list(iter_variables('spam.c', - _iter_source_lines=self._iter_source_lines, - _iter_global=self._iter_global, - _iter_local=self._iter_local, - _parse_func=self._parse_func, - _parse_var=self._parse_var, - _parse_compound=self._parse_compound, - )) - - self.assertEqual(srcvars, [ - (None, 'var1', ''), - (None, 'var2', ''), - ('func1', 'var3', ''), - ('func1', 'var2', ''), - ('func1', 'var4', ''), - (None, 'var5', ''), - ]) - self.assertEqual(self.calls, [ - ('_iter_source_lines', ('spam.c',)), - ('_iter_global', (content.splitlines(),)), - ('_parse_var', ('',)), - ('_parse_var', ('',)), - ('_parse_var', ('',)), - ('_parse_func', ('', '')), - ('_iter_local', ([''],)), - ('_parse_var', ('',)), - ('_parse_compound', ('', [('
', '')])), - ('_parse_var', ('if (',)), - ('_parse_var', ('',)), - ('_parse_var', (')',)), - ('_parse_var', ('',)), - ('_iter_local', ([''],)), - ('_parse_var', ('',)), - ('_parse_var', ('',)), - ]) - - def test_no_locals(self): - content = textwrap.dedent(''' - ... - ''') - self._return_iter_source_lines = content - self._return_iter_global = [ - [('', None), # var1 - ('', None), # non-var - ('', None), # var2 - ('', ''), # func1 - ], - ] - self._return_iter_local = [ - # func1 - [('', None), # non-var - ('', [('
', '')]), # if - ('', None), # non-var - ], - # if - [('', None), # non-var - ], - ] - self._return_parse_func = [ - ('func1', ''), - ] - self._return_parse_var = [ - ('var1', ''), - (None, None), - ('var2', ''), - (None, None), - (None, None), - (None, None), - (None, None), - (None, None), - (None, None), - ] - self._return_parse_compound = [ - ([[ - 'if (', - '', - ')', - ], - ], - ['']), - ] - - srcvars = list(iter_variables('spam.c', - _iter_source_lines=self._iter_source_lines, - _iter_global=self._iter_global, - _iter_local=self._iter_local, - _parse_func=self._parse_func, - _parse_var=self._parse_var, - _parse_compound=self._parse_compound, - )) - - self.assertEqual(srcvars, [ - (None, 'var1', ''), - (None, 'var2', ''), - ]) - self.assertEqual(self.calls, [ - ('_iter_source_lines', ('spam.c',)), - ('_iter_global', (content.splitlines(),)), - ('_parse_var', ('',)), - ('_parse_var', ('',)), - ('_parse_var', ('',)), - ('_parse_func', ('', '')), - ('_iter_local', ([''],)), - ('_parse_var', ('',)), - ('_parse_compound', ('', [('
', '')])), - ('_parse_var', ('if (',)), - ('_parse_var', ('',)), - ('_parse_var', (')',)), - ('_parse_var', ('',)), - ('_iter_local', ([''],)), - ('_parse_var', ('',)), - ]) diff --git a/Lib/test/test_tools/test_c_analyzer/test_c_parser/test_info.py b/Lib/test/test_tools/test_c_analyzer/test_c_parser/test_info.py deleted file mode 100644 index d1a966c..0000000 --- a/Lib/test/test_tools/test_c_analyzer/test_c_parser/test_info.py +++ /dev/null @@ -1,244 +0,0 @@ -import string -import unittest - -from ..util import PseudoStr, StrProxy, Object -from .. import tool_imports_for_tests -with tool_imports_for_tests(): - from c_analyzer_common.info import ID, UNKNOWN - from c_parser.info import ( - normalize_vartype, Variable, - ) - - -class NormalizeVartypeTests(unittest.TestCase): - - def test_basic(self): - tests = [ - (None, None), - ('', ''), - ('int', 'int'), - (PseudoStr('int'), 'int'), - (StrProxy('int'), 'int'), - ] - for vartype, expected in tests: - with self.subTest(vartype): - normalized = normalize_vartype(vartype) - - self.assertEqual(normalized, expected) - - -class VariableTests(unittest.TestCase): - - VALID_ARGS = ( - ('x/y/z/spam.c', 'func', 'eggs'), - 'static', - 'int', - ) - VALID_KWARGS = dict(zip(Variable._fields, VALID_ARGS)) - VALID_EXPECTED = VALID_ARGS - - def test_init_typical_global(self): - for storage in ('static', 'extern', 'implicit'): - with self.subTest(storage): - static = Variable( - id=ID( - filename='x/y/z/spam.c', - funcname=None, - name='eggs', - ), - storage=storage, - vartype='int', - ) - - self.assertEqual(static, ( - ('x/y/z/spam.c', None, 'eggs'), - storage, - 'int', - )) - - def test_init_typical_local(self): - for storage in ('static', 'local'): - with self.subTest(storage): - static = Variable( - id=ID( - filename='x/y/z/spam.c', - funcname='func', - name='eggs', - ), - storage=storage, - vartype='int', - ) - - self.assertEqual(static, ( - ('x/y/z/spam.c', 'func', 'eggs'), - storage, - 'int', - )) - - def test_init_all_missing(self): - for value in ('', None): - with self.subTest(repr(value)): - static = Variable( - id=value, - storage=value, - vartype=value, - ) - - self.assertEqual(static, ( - None, - None, - None, - )) - - def test_init_all_coerced(self): - id = ID('x/y/z/spam.c', 'func', 'spam') - tests = [ - ('str subclass', - dict( - id=( - PseudoStr('x/y/z/spam.c'), - PseudoStr('func'), - PseudoStr('spam'), - ), - storage=PseudoStr('static'), - vartype=PseudoStr('int'), - ), - (id, - 'static', - 'int', - )), - ('non-str 1', - dict( - id=id, - storage=Object(), - vartype=Object(), - ), - (id, - '', - '', - )), - ('non-str 2', - dict( - id=id, - storage=StrProxy('static'), - vartype=StrProxy('variable'), - ), - (id, - 'static', - 'variable', - )), - ('non-str', - dict( - id=id, - storage=('a', 'b', 'c'), - vartype=('x', 'y', 'z'), - ), - (id, - "('a', 'b', 'c')", - "('x', 'y', 'z')", - )), - ] - for summary, kwargs, expected in tests: - with self.subTest(summary): - static = Variable(**kwargs) - - for field in Variable._fields: - value = getattr(static, field) - if field == 'id': - self.assertIs(type(value), ID) - else: - self.assertIs(type(value), str) - self.assertEqual(tuple(static), expected) - - def test_iterable(self): - static = Variable(**self.VALID_KWARGS) - - id, storage, vartype = static - - values = (id, storage, vartype) - for value, expected in zip(values, self.VALID_EXPECTED): - self.assertEqual(value, expected) - - def test_fields(self): - static = Variable(('a', 'b', 'z'), 'x', 'y') - - self.assertEqual(static.id, ('a', 'b', 'z')) - self.assertEqual(static.storage, 'x') - self.assertEqual(static.vartype, 'y') - - def test___getattr__(self): - static = Variable(('a', 'b', 'z'), 'x', 'y') - - self.assertEqual(static.filename, 'a') - self.assertEqual(static.funcname, 'b') - self.assertEqual(static.name, 'z') - - def test_validate_typical(self): - validstorage = ('static', 'extern', 'implicit', 'local') - self.assertEqual(set(validstorage), set(Variable.STORAGE)) - - for storage in validstorage: - with self.subTest(storage): - static = Variable( - id=ID( - filename='x/y/z/spam.c', - funcname='func', - name='eggs', - ), - storage=storage, - vartype='int', - ) - - static.validate() # This does not fail. - - def test_validate_missing_field(self): - for field in Variable._fields: - with self.subTest(field): - static = Variable(**self.VALID_KWARGS) - static = static._replace(**{field: None}) - - with self.assertRaises(TypeError): - static.validate() - for field in ('storage', 'vartype'): - with self.subTest(field): - static = Variable(**self.VALID_KWARGS) - static = static._replace(**{field: UNKNOWN}) - - with self.assertRaises(TypeError): - static.validate() - - def test_validate_bad_field(self): - badch = tuple(c for c in string.punctuation + string.digits) - notnames = ( - '1a', - 'a.b', - 'a-b', - '&a', - 'a++', - ) + badch - tests = [ - ('id', ()), # Any non-empty str is okay. - ('storage', ('external', 'global') + notnames), - ('vartype', ()), # Any non-empty str is okay. - ] - seen = set() - for field, invalid in tests: - for value in invalid: - seen.add(value) - with self.subTest(f'{field}={value!r}'): - static = Variable(**self.VALID_KWARGS) - static = static._replace(**{field: value}) - - with self.assertRaises(ValueError): - static.validate() - - for field, invalid in tests: - if field == 'id': - continue - valid = seen - set(invalid) - for value in valid: - with self.subTest(f'{field}={value!r}'): - static = Variable(**self.VALID_KWARGS) - static = static._replace(**{field: value}) - - static.validate() # This does not fail. diff --git a/Lib/test/test_tools/test_c_analyzer/test_c_parser/test_preprocessor.py b/Lib/test/test_tools/test_c_analyzer/test_c_parser/test_preprocessor.py deleted file mode 100644 index 89e1557..0000000 --- a/Lib/test/test_tools/test_c_analyzer/test_c_parser/test_preprocessor.py +++ /dev/null @@ -1,1562 +0,0 @@ -import itertools -import textwrap -import unittest -import sys - -from ..util import wrapped_arg_combos, StrProxy -from .. import tool_imports_for_tests -with tool_imports_for_tests(): - from c_parser.preprocessor import ( - iter_lines, - # directives - parse_directive, PreprocessorDirective, - Constant, Macro, IfDirective, Include, OtherDirective, - ) - - -class TestCaseBase(unittest.TestCase): - - maxDiff = None - - def reset(self): - self._calls = [] - self.errors = None - - @property - def calls(self): - try: - return self._calls - except AttributeError: - self._calls = [] - return self._calls - - errors = None - - def try_next_exc(self): - if not self.errors: - return - if exc := self.errors.pop(0): - raise exc - - def check_calls(self, *expected): - self.assertEqual(self.calls, list(expected)) - self.assertEqual(self.errors or [], []) - - -class IterLinesTests(TestCaseBase): - - parsed = None - - def check_calls(self, *expected): - super().check_calls(*expected) - self.assertEqual(self.parsed or [], []) - - def _parse_directive(self, line): - self.calls.append( - ('_parse_directive', line)) - self.try_next_exc() - return self.parsed.pop(0) - - def test_no_lines(self): - lines = [] - - results = list( - iter_lines(lines, _parse_directive=self._parse_directive)) - - self.assertEqual(results, []) - self.check_calls() - - def test_no_directives(self): - lines = textwrap.dedent(''' - - // xyz - typedef enum { - SPAM - EGGS - } kind; - - struct info { - kind kind; - int status; - }; - - typedef struct spam { - struct info info; - } myspam; - - static int spam = 0; - - /** - * ... - */ - static char * - get_name(int arg, - char *default, - ) - { - return default - } - - int check(void) { - return 0; - } - - ''')[1:-1].splitlines() - expected = [(lno, line, None, ()) - for lno, line in enumerate(lines, 1)] - expected[1] = (2, ' ', None, ()) - expected[20] = (21, ' ', None, ()) - del expected[19] - del expected[18] - - results = list( - iter_lines(lines, _parse_directive=self._parse_directive)) - - self.assertEqual(results, expected) - self.check_calls() - - def test_single_directives(self): - tests = [ - ('#include ', Include('')), - ('#define SPAM 1', Constant('SPAM', '1')), - ('#define SPAM() 1', Macro('SPAM', (), '1')), - ('#define SPAM(a, b) a = b;', Macro('SPAM', ('a', 'b'), 'a = b;')), - ('#if defined(SPAM)', IfDirective('if', 'defined(SPAM)')), - ('#ifdef SPAM', IfDirective('ifdef', 'SPAM')), - ('#ifndef SPAM', IfDirective('ifndef', 'SPAM')), - ('#elseif defined(SPAM)', IfDirective('elseif', 'defined(SPAM)')), - ('#else', OtherDirective('else', None)), - ('#endif', OtherDirective('endif', None)), - ('#error ...', OtherDirective('error', '...')), - ('#warning ...', OtherDirective('warning', '...')), - ('#__FILE__ ...', OtherDirective('__FILE__', '...')), - ('#__LINE__ ...', OtherDirective('__LINE__', '...')), - ('#__DATE__ ...', OtherDirective('__DATE__', '...')), - ('#__TIME__ ...', OtherDirective('__TIME__', '...')), - ('#__TIMESTAMP__ ...', OtherDirective('__TIMESTAMP__', '...')), - ] - for line, directive in tests: - with self.subTest(line): - self.reset() - self.parsed = [ - directive, - ] - text = textwrap.dedent(''' - static int spam = 0; - {} - static char buffer[256]; - ''').strip().format(line) - lines = text.strip().splitlines() - - results = list( - iter_lines(lines, _parse_directive=self._parse_directive)) - - self.assertEqual(results, [ - (1, 'static int spam = 0;', None, ()), - (2, line, directive, ()), - ((3, 'static char buffer[256];', None, ('defined(SPAM)',)) - if directive.kind in ('if', 'ifdef', 'elseif') - else (3, 'static char buffer[256];', None, ('! defined(SPAM)',)) - if directive.kind == 'ifndef' - else (3, 'static char buffer[256];', None, ())), - ]) - self.check_calls( - ('_parse_directive', line), - ) - - def test_directive_whitespace(self): - line = ' # define eggs ( a , b ) { a = b ; } ' - directive = Macro('eggs', ('a', 'b'), '{ a = b; }') - self.parsed = [ - directive, - ] - lines = [line] - - results = list( - iter_lines(lines, _parse_directive=self._parse_directive)) - - self.assertEqual(results, [ - (1, line, directive, ()), - ]) - self.check_calls( - ('_parse_directive', '#define eggs ( a , b ) { a = b ; }'), - ) - - @unittest.skipIf(sys.platform == 'win32', 'needs fix under Windows') - def test_split_lines(self): - directive = Macro('eggs', ('a', 'b'), '{ a = b; }') - self.parsed = [ - directive, - ] - text = textwrap.dedent(r''' - static int spam = 0; - #define eggs(a, b) \ - { \ - a = b; \ - } - static char buffer[256]; - ''').strip() - lines = [line + '\n' for line in text.splitlines()] - lines[-1] = lines[-1][:-1] - - results = list( - iter_lines(lines, _parse_directive=self._parse_directive)) - - self.assertEqual(results, [ - (1, 'static int spam = 0;\n', None, ()), - (5, '#define eggs(a, b) { a = b; }\n', directive, ()), - (6, 'static char buffer[256];', None, ()), - ]) - self.check_calls( - ('_parse_directive', '#define eggs(a, b) { a = b; }'), - ) - - def test_nested_conditions(self): - directives = [ - IfDirective('ifdef', 'SPAM'), - IfDirective('if', 'SPAM == 1'), - IfDirective('elseif', 'SPAM == 2'), - OtherDirective('else', None), - OtherDirective('endif', None), - OtherDirective('endif', None), - ] - self.parsed = list(directives) - text = textwrap.dedent(r''' - static int spam = 0; - - #ifdef SPAM - static int start = 0; - # if SPAM == 1 - static char buffer[10]; - # elif SPAM == 2 - static char buffer[100]; - # else - static char buffer[256]; - # endif - static int end = 0; - #endif - - static int eggs = 0; - ''').strip() - lines = [line for line in text.splitlines() if line.strip()] - - results = list( - iter_lines(lines, _parse_directive=self._parse_directive)) - - self.assertEqual(results, [ - (1, 'static int spam = 0;', None, ()), - (2, '#ifdef SPAM', directives[0], ()), - (3, 'static int start = 0;', None, ('defined(SPAM)',)), - (4, '# if SPAM == 1', directives[1], ('defined(SPAM)',)), - (5, 'static char buffer[10];', None, ('defined(SPAM)', 'SPAM == 1')), - (6, '# elif SPAM == 2', directives[2], ('defined(SPAM)', 'SPAM == 1')), - (7, 'static char buffer[100];', None, ('defined(SPAM)', '! (SPAM == 1)', 'SPAM == 2')), - (8, '# else', directives[3], ('defined(SPAM)', '! (SPAM == 1)', 'SPAM == 2')), - (9, 'static char buffer[256];', None, ('defined(SPAM)', '! (SPAM == 1)', '! (SPAM == 2)')), - (10, '# endif', directives[4], ('defined(SPAM)', '! (SPAM == 1)', '! (SPAM == 2)')), - (11, 'static int end = 0;', None, ('defined(SPAM)',)), - (12, '#endif', directives[5], ('defined(SPAM)',)), - (13, 'static int eggs = 0;', None, ()), - ]) - self.check_calls( - ('_parse_directive', '#ifdef SPAM'), - ('_parse_directive', '#if SPAM == 1'), - ('_parse_directive', '#elif SPAM == 2'), - ('_parse_directive', '#else'), - ('_parse_directive', '#endif'), - ('_parse_directive', '#endif'), - ) - - def test_split_blocks(self): - directives = [ - IfDirective('ifdef', 'SPAM'), - OtherDirective('else', None), - OtherDirective('endif', None), - ] - self.parsed = list(directives) - text = textwrap.dedent(r''' - void str_copy(char *buffer, *orig); - - int init(char *name) { - static int initialized = 0; - if (initialized) { - return 0; - } - #ifdef SPAM - static char buffer[10]; - str_copy(buffer, char); - } - - void copy(char *buffer, *orig) { - strncpy(buffer, orig, 9); - buffer[9] = 0; - } - - #else - static char buffer[256]; - str_copy(buffer, char); - } - - void copy(char *buffer, *orig) { - strcpy(buffer, orig); - } - - #endif - ''').strip() - lines = [line for line in text.splitlines() if line.strip()] - - results = list( - iter_lines(lines, _parse_directive=self._parse_directive)) - - self.assertEqual(results, [ - (1, 'void str_copy(char *buffer, *orig);', None, ()), - (2, 'int init(char *name) {', None, ()), - (3, ' static int initialized = 0;', None, ()), - (4, ' if (initialized) {', None, ()), - (5, ' return 0;', None, ()), - (6, ' }', None, ()), - - (7, '#ifdef SPAM', directives[0], ()), - - (8, ' static char buffer[10];', None, ('defined(SPAM)',)), - (9, ' str_copy(buffer, char);', None, ('defined(SPAM)',)), - (10, '}', None, ('defined(SPAM)',)), - (11, 'void copy(char *buffer, *orig) {', None, ('defined(SPAM)',)), - (12, ' strncpy(buffer, orig, 9);', None, ('defined(SPAM)',)), - (13, ' buffer[9] = 0;', None, ('defined(SPAM)',)), - (14, '}', None, ('defined(SPAM)',)), - - (15, '#else', directives[1], ('defined(SPAM)',)), - - (16, ' static char buffer[256];', None, ('! (defined(SPAM))',)), - (17, ' str_copy(buffer, char);', None, ('! (defined(SPAM))',)), - (18, '}', None, ('! (defined(SPAM))',)), - (19, 'void copy(char *buffer, *orig) {', None, ('! (defined(SPAM))',)), - (20, ' strcpy(buffer, orig);', None, ('! (defined(SPAM))',)), - (21, '}', None, ('! (defined(SPAM))',)), - - (22, '#endif', directives[2], ('! (defined(SPAM))',)), - ]) - self.check_calls( - ('_parse_directive', '#ifdef SPAM'), - ('_parse_directive', '#else'), - ('_parse_directive', '#endif'), - ) - - @unittest.skipIf(sys.platform == 'win32', 'needs fix under Windows') - def test_basic(self): - directives = [ - Include(''), - IfDirective('ifdef', 'SPAM'), - IfDirective('if', '! defined(HAM) || !HAM'), - Constant('HAM', '0'), - IfDirective('elseif', 'HAM < 0'), - Constant('HAM', '-1'), - OtherDirective('else', None), - OtherDirective('endif', None), - OtherDirective('endif', None), - IfDirective('if', 'defined(HAM) && (HAM < 0 || ! HAM)'), - OtherDirective('undef', 'HAM'), - OtherDirective('endif', None), - IfDirective('ifndef', 'HAM'), - OtherDirective('endif', None), - ] - self.parsed = list(directives) - text = textwrap.dedent(r''' - #include - print("begin"); - #ifdef SPAM - print("spam"); - #if ! defined(HAM) || !HAM - # DEFINE HAM 0 - #elseif HAM < 0 - # DEFINE HAM -1 - #else - print("ham HAM"); - #endif - #endif - - #if defined(HAM) && \ - (HAM < 0 || ! HAM) - print("ham?"); - #undef HAM - # endif - - #ifndef HAM - print("no ham"); - #endif - print("end"); - ''')[1:-1] - lines = [line + '\n' for line in text.splitlines()] - lines[-1] = lines[-1][:-1] - - results = list( - iter_lines(lines, _parse_directive=self._parse_directive)) - - self.assertEqual(results, [ - (1, '#include \n', Include(''), ()), - (2, 'print("begin");\n', None, ()), - # - (3, '#ifdef SPAM\n', - IfDirective('ifdef', 'SPAM'), - ()), - (4, ' print("spam");\n', - None, - ('defined(SPAM)',)), - (5, ' #if ! defined(HAM) || !HAM\n', - IfDirective('if', '! defined(HAM) || !HAM'), - ('defined(SPAM)',)), - (6, '# DEFINE HAM 0\n', - Constant('HAM', '0'), - ('defined(SPAM)', '! defined(HAM) || !HAM')), - (7, ' #elseif HAM < 0\n', - IfDirective('elseif', 'HAM < 0'), - ('defined(SPAM)', '! defined(HAM) || !HAM')), - (8, '# DEFINE HAM -1\n', - Constant('HAM', '-1'), - ('defined(SPAM)', '! (! defined(HAM) || !HAM)', 'HAM < 0')), - (9, ' #else\n', - OtherDirective('else', None), - ('defined(SPAM)', '! (! defined(HAM) || !HAM)', 'HAM < 0')), - (10, ' print("ham HAM");\n', - None, - ('defined(SPAM)', '! (! defined(HAM) || !HAM)', '! (HAM < 0)')), - (11, ' #endif\n', - OtherDirective('endif', None), - ('defined(SPAM)', '! (! defined(HAM) || !HAM)', '! (HAM < 0)')), - (12, '#endif\n', - OtherDirective('endif', None), - ('defined(SPAM)',)), - # - (13, '\n', None, ()), - # - (15, '#if defined(HAM) && (HAM < 0 || ! HAM)\n', - IfDirective('if', 'defined(HAM) && (HAM < 0 || ! HAM)'), - ()), - (16, ' print("ham?");\n', - None, - ('defined(HAM) && (HAM < 0 || ! HAM)',)), - (17, ' #undef HAM\n', - OtherDirective('undef', 'HAM'), - ('defined(HAM) && (HAM < 0 || ! HAM)',)), - (18, '# endif\n', - OtherDirective('endif', None), - ('defined(HAM) && (HAM < 0 || ! HAM)',)), - # - (19, '\n', None, ()), - # - (20, '#ifndef HAM\n', - IfDirective('ifndef', 'HAM'), - ()), - (21, ' print("no ham");\n', - None, - ('! defined(HAM)',)), - (22, '#endif\n', - OtherDirective('endif', None), - ('! defined(HAM)',)), - # - (23, 'print("end");', None, ()), - ]) - - @unittest.skipIf(sys.platform == 'win32', 'needs fix under Windows') - def test_typical(self): - # We use Include/compile.h from commit 66c4f3f38b86. It has - # a good enough mix of code without being too large. - directives = [ - IfDirective('ifndef', 'Py_COMPILE_H'), - Constant('Py_COMPILE_H', None), - - IfDirective('ifndef', 'Py_LIMITED_API'), - - Include('"code.h"'), - - IfDirective('ifdef', '__cplusplus'), - OtherDirective('endif', None), - - Constant('PyCF_MASK', '(CO_FUTURE_DIVISION | CO_FUTURE_ABSOLUTE_IMPORT | CO_FUTURE_WITH_STATEMENT | CO_FUTURE_PRINT_FUNCTION | CO_FUTURE_UNICODE_LITERALS | CO_FUTURE_BARRY_AS_BDFL | CO_FUTURE_GENERATOR_STOP | CO_FUTURE_ANNOTATIONS)'), - Constant('PyCF_MASK_OBSOLETE', '(CO_NESTED)'), - Constant('PyCF_SOURCE_IS_UTF8', ' 0x0100'), - Constant('PyCF_DONT_IMPLY_DEDENT', '0x0200'), - Constant('PyCF_ONLY_AST', '0x0400'), - Constant('PyCF_IGNORE_COOKIE', '0x0800'), - Constant('PyCF_TYPE_COMMENTS', '0x1000'), - Constant('PyCF_ALLOW_TOP_LEVEL_AWAIT', '0x2000'), - - IfDirective('ifndef', 'Py_LIMITED_API'), - OtherDirective('endif', None), - - Constant('FUTURE_NESTED_SCOPES', '"nested_scopes"'), - Constant('FUTURE_GENERATORS', '"generators"'), - Constant('FUTURE_DIVISION', '"division"'), - Constant('FUTURE_ABSOLUTE_IMPORT', '"absolute_import"'), - Constant('FUTURE_WITH_STATEMENT', '"with_statement"'), - Constant('FUTURE_PRINT_FUNCTION', '"print_function"'), - Constant('FUTURE_UNICODE_LITERALS', '"unicode_literals"'), - Constant('FUTURE_BARRY_AS_BDFL', '"barry_as_FLUFL"'), - Constant('FUTURE_GENERATOR_STOP', '"generator_stop"'), - Constant('FUTURE_ANNOTATIONS', '"annotations"'), - - Macro('PyAST_Compile', ('mod', 's', 'f', 'ar'), 'PyAST_CompileEx(mod, s, f, -1, ar)'), - - Constant('PY_INVALID_STACK_EFFECT', 'INT_MAX'), - - IfDirective('ifdef', '__cplusplus'), - OtherDirective('endif', None), - - OtherDirective('endif', None), # ifndef Py_LIMITED_API - - Constant('Py_single_input', '256'), - Constant('Py_file_input', '257'), - Constant('Py_eval_input', '258'), - Constant('Py_func_type_input', '345'), - - OtherDirective('endif', None), # ifndef Py_COMPILE_H - ] - self.parsed = list(directives) - text = textwrap.dedent(r''' - #ifndef Py_COMPILE_H - #define Py_COMPILE_H - - #ifndef Py_LIMITED_API - #include "code.h" - - #ifdef __cplusplus - extern "C" { - #endif - - /* Public interface */ - struct _node; /* Declare the existence of this type */ - PyAPI_FUNC(PyCodeObject *) PyNode_Compile(struct _node *, const char *); - /* XXX (ncoghlan): Unprefixed type name in a public API! */ - - #define PyCF_MASK (CO_FUTURE_DIVISION | CO_FUTURE_ABSOLUTE_IMPORT | \ - CO_FUTURE_WITH_STATEMENT | CO_FUTURE_PRINT_FUNCTION | \ - CO_FUTURE_UNICODE_LITERALS | CO_FUTURE_BARRY_AS_BDFL | \ - CO_FUTURE_GENERATOR_STOP | CO_FUTURE_ANNOTATIONS) - #define PyCF_MASK_OBSOLETE (CO_NESTED) - #define PyCF_SOURCE_IS_UTF8 0x0100 - #define PyCF_DONT_IMPLY_DEDENT 0x0200 - #define PyCF_ONLY_AST 0x0400 - #define PyCF_IGNORE_COOKIE 0x0800 - #define PyCF_TYPE_COMMENTS 0x1000 - #define PyCF_ALLOW_TOP_LEVEL_AWAIT 0x2000 - - #ifndef Py_LIMITED_API - typedef struct { - int cf_flags; /* bitmask of CO_xxx flags relevant to future */ - int cf_feature_version; /* minor Python version (PyCF_ONLY_AST) */ - } PyCompilerFlags; - #endif - - /* Future feature support */ - - typedef struct { - int ff_features; /* flags set by future statements */ - int ff_lineno; /* line number of last future statement */ - } PyFutureFeatures; - - #define FUTURE_NESTED_SCOPES "nested_scopes" - #define FUTURE_GENERATORS "generators" - #define FUTURE_DIVISION "division" - #define FUTURE_ABSOLUTE_IMPORT "absolute_import" - #define FUTURE_WITH_STATEMENT "with_statement" - #define FUTURE_PRINT_FUNCTION "print_function" - #define FUTURE_UNICODE_LITERALS "unicode_literals" - #define FUTURE_BARRY_AS_BDFL "barry_as_FLUFL" - #define FUTURE_GENERATOR_STOP "generator_stop" - #define FUTURE_ANNOTATIONS "annotations" - - struct _mod; /* Declare the existence of this type */ - #define PyAST_Compile(mod, s, f, ar) PyAST_CompileEx(mod, s, f, -1, ar) - PyAPI_FUNC(PyCodeObject *) PyAST_CompileEx( - struct _mod *mod, - const char *filename, /* decoded from the filesystem encoding */ - PyCompilerFlags *flags, - int optimize, - PyArena *arena); - PyAPI_FUNC(PyCodeObject *) PyAST_CompileObject( - struct _mod *mod, - PyObject *filename, - PyCompilerFlags *flags, - int optimize, - PyArena *arena); - PyAPI_FUNC(PyFutureFeatures *) PyFuture_FromAST( - struct _mod * mod, - const char *filename /* decoded from the filesystem encoding */ - ); - PyAPI_FUNC(PyFutureFeatures *) PyFuture_FromASTObject( - struct _mod * mod, - PyObject *filename - ); - - /* _Py_Mangle is defined in compile.c */ - PyAPI_FUNC(PyObject*) _Py_Mangle(PyObject *p, PyObject *name); - - #define PY_INVALID_STACK_EFFECT INT_MAX - PyAPI_FUNC(int) PyCompile_OpcodeStackEffect(int opcode, int oparg); - PyAPI_FUNC(int) PyCompile_OpcodeStackEffectWithJump(int opcode, int oparg, int jump); - - PyAPI_FUNC(int) _PyAST_Optimize(struct _mod *, PyArena *arena, int optimize); - - #ifdef __cplusplus - } - #endif - - #endif /* !Py_LIMITED_API */ - - /* These definitions must match corresponding definitions in graminit.h. */ - #define Py_single_input 256 - #define Py_file_input 257 - #define Py_eval_input 258 - #define Py_func_type_input 345 - - #endif /* !Py_COMPILE_H */ - ''').strip() - lines = [line + '\n' for line in text.splitlines()] - lines[-1] = lines[-1][:-1] - - results = list( - iter_lines(lines, _parse_directive=self._parse_directive)) - - self.assertEqual(results, [ - (1, '#ifndef Py_COMPILE_H\n', - IfDirective('ifndef', 'Py_COMPILE_H'), - ()), - (2, '#define Py_COMPILE_H\n', - Constant('Py_COMPILE_H', None), - ('! defined(Py_COMPILE_H)',)), - (3, '\n', - None, - ('! defined(Py_COMPILE_H)',)), - (4, '#ifndef Py_LIMITED_API\n', - IfDirective('ifndef', 'Py_LIMITED_API'), - ('! defined(Py_COMPILE_H)',)), - (5, '#include "code.h"\n', - Include('"code.h"'), - ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), - (6, '\n', - None, - ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), - (7, '#ifdef __cplusplus\n', - IfDirective('ifdef', '__cplusplus'), - ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), - (8, 'extern "C" {\n', - None, - ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)', 'defined(__cplusplus)')), - (9, '#endif\n', - OtherDirective('endif', None), - ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)', 'defined(__cplusplus)')), - (10, '\n', - None, - ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), - (11, ' \n', - None, - ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), - (12, 'struct _node; \n', - None, - ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), - (13, 'PyAPI_FUNC(PyCodeObject *) PyNode_Compile(struct _node *, const char *);\n', - None, - ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), - (14, ' \n', - None, - ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), - (15, '\n', - None, - ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), - (19, '#define PyCF_MASK (CO_FUTURE_DIVISION | CO_FUTURE_ABSOLUTE_IMPORT | CO_FUTURE_WITH_STATEMENT | CO_FUTURE_PRINT_FUNCTION | CO_FUTURE_UNICODE_LITERALS | CO_FUTURE_BARRY_AS_BDFL | CO_FUTURE_GENERATOR_STOP | CO_FUTURE_ANNOTATIONS)\n', - Constant('PyCF_MASK', '(CO_FUTURE_DIVISION | CO_FUTURE_ABSOLUTE_IMPORT | CO_FUTURE_WITH_STATEMENT | CO_FUTURE_PRINT_FUNCTION | CO_FUTURE_UNICODE_LITERALS | CO_FUTURE_BARRY_AS_BDFL | CO_FUTURE_GENERATOR_STOP | CO_FUTURE_ANNOTATIONS)'), - ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), - (20, '#define PyCF_MASK_OBSOLETE (CO_NESTED)\n', - Constant('PyCF_MASK_OBSOLETE', '(CO_NESTED)'), - ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), - (21, '#define PyCF_SOURCE_IS_UTF8 0x0100\n', - Constant('PyCF_SOURCE_IS_UTF8', ' 0x0100'), - ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), - (22, '#define PyCF_DONT_IMPLY_DEDENT 0x0200\n', - Constant('PyCF_DONT_IMPLY_DEDENT', '0x0200'), - ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), - (23, '#define PyCF_ONLY_AST 0x0400\n', - Constant('PyCF_ONLY_AST', '0x0400'), - ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), - (24, '#define PyCF_IGNORE_COOKIE 0x0800\n', - Constant('PyCF_IGNORE_COOKIE', '0x0800'), - ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), - (25, '#define PyCF_TYPE_COMMENTS 0x1000\n', - Constant('PyCF_TYPE_COMMENTS', '0x1000'), - ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), - (26, '#define PyCF_ALLOW_TOP_LEVEL_AWAIT 0x2000\n', - Constant('PyCF_ALLOW_TOP_LEVEL_AWAIT', '0x2000'), - ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), - (27, '\n', - None, - ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), - (28, '#ifndef Py_LIMITED_API\n', - IfDirective('ifndef', 'Py_LIMITED_API'), - ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), - (29, 'typedef struct {\n', - None, - ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)', '! defined(Py_LIMITED_API)')), - (30, ' int cf_flags; \n', - None, - ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)', '! defined(Py_LIMITED_API)')), - (31, ' int cf_feature_version; \n', - None, - ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)', '! defined(Py_LIMITED_API)')), - (32, '} PyCompilerFlags;\n', - None, - ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)', '! defined(Py_LIMITED_API)')), - (33, '#endif\n', - OtherDirective('endif', None), - ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)', '! defined(Py_LIMITED_API)')), - (34, '\n', - None, - ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), - (35, ' \n', - None, - ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), - (36, '\n', - None, - ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), - (37, 'typedef struct {\n', - None, - ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), - (38, ' int ff_features; \n', - None, - ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), - (39, ' int ff_lineno; \n', - None, - ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), - (40, '} PyFutureFeatures;\n', - None, - ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), - (41, '\n', - None, - ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), - (42, '#define FUTURE_NESTED_SCOPES "nested_scopes"\n', - Constant('FUTURE_NESTED_SCOPES', '"nested_scopes"'), - ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), - (43, '#define FUTURE_GENERATORS "generators"\n', - Constant('FUTURE_GENERATORS', '"generators"'), - ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), - (44, '#define FUTURE_DIVISION "division"\n', - Constant('FUTURE_DIVISION', '"division"'), - ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), - (45, '#define FUTURE_ABSOLUTE_IMPORT "absolute_import"\n', - Constant('FUTURE_ABSOLUTE_IMPORT', '"absolute_import"'), - ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), - (46, '#define FUTURE_WITH_STATEMENT "with_statement"\n', - Constant('FUTURE_WITH_STATEMENT', '"with_statement"'), - ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), - (47, '#define FUTURE_PRINT_FUNCTION "print_function"\n', - Constant('FUTURE_PRINT_FUNCTION', '"print_function"'), - ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), - (48, '#define FUTURE_UNICODE_LITERALS "unicode_literals"\n', - Constant('FUTURE_UNICODE_LITERALS', '"unicode_literals"'), - ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), - (49, '#define FUTURE_BARRY_AS_BDFL "barry_as_FLUFL"\n', - Constant('FUTURE_BARRY_AS_BDFL', '"barry_as_FLUFL"'), - ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), - (50, '#define FUTURE_GENERATOR_STOP "generator_stop"\n', - Constant('FUTURE_GENERATOR_STOP', '"generator_stop"'), - ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), - (51, '#define FUTURE_ANNOTATIONS "annotations"\n', - Constant('FUTURE_ANNOTATIONS', '"annotations"'), - ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), - (52, '\n', - None, - ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), - (53, 'struct _mod; \n', - None, - ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), - (54, '#define PyAST_Compile(mod, s, f, ar) PyAST_CompileEx(mod, s, f, -1, ar)\n', - Macro('PyAST_Compile', ('mod', 's', 'f', 'ar'), 'PyAST_CompileEx(mod, s, f, -1, ar)'), - ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), - (55, 'PyAPI_FUNC(PyCodeObject *) PyAST_CompileEx(\n', - None, - ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), - (56, ' struct _mod *mod,\n', - None, - ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), - (57, ' const char *filename, \n', - None, - ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), - (58, ' PyCompilerFlags *flags,\n', - None, - ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), - (59, ' int optimize,\n', - None, - ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), - (60, ' PyArena *arena);\n', - None, - ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), - (61, 'PyAPI_FUNC(PyCodeObject *) PyAST_CompileObject(\n', - None, - ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), - (62, ' struct _mod *mod,\n', - None, - ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), - (63, ' PyObject *filename,\n', - None, - ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), - (64, ' PyCompilerFlags *flags,\n', - None, - ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), - (65, ' int optimize,\n', - None, - ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), - (66, ' PyArena *arena);\n', - None, - ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), - (67, 'PyAPI_FUNC(PyFutureFeatures *) PyFuture_FromAST(\n', - None, - ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), - (68, ' struct _mod * mod,\n', - None, - ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), - (69, ' const char *filename \n', - None, - ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), - (70, ' );\n', - None, - ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), - (71, 'PyAPI_FUNC(PyFutureFeatures *) PyFuture_FromASTObject(\n', - None, - ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), - (72, ' struct _mod * mod,\n', - None, - ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), - (73, ' PyObject *filename\n', - None, - ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), - (74, ' );\n', - None, - ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), - (75, '\n', - None, - ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), - (76, ' \n', - None, - ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), - (77, 'PyAPI_FUNC(PyObject*) _Py_Mangle(PyObject *p, PyObject *name);\n', - None, - ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), - (78, '\n', - None, - ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), - (79, '#define PY_INVALID_STACK_EFFECT INT_MAX\n', - Constant('PY_INVALID_STACK_EFFECT', 'INT_MAX'), - ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), - (80, 'PyAPI_FUNC(int) PyCompile_OpcodeStackEffect(int opcode, int oparg);\n', - None, - ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), - (81, 'PyAPI_FUNC(int) PyCompile_OpcodeStackEffectWithJump(int opcode, int oparg, int jump);\n', - None, - ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), - (82, '\n', - None, - ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), - (83, 'PyAPI_FUNC(int) _PyAST_Optimize(struct _mod *, PyArena *arena, int optimize);\n', - None, - ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), - (84, '\n', - None, - ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), - (85, '#ifdef __cplusplus\n', - IfDirective('ifdef', '__cplusplus'), - ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), - (86, '}\n', - None, - ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)', 'defined(__cplusplus)')), - (87, '#endif\n', - OtherDirective('endif', None), - ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)', 'defined(__cplusplus)')), - (88, '\n', - None, - ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), - (89, '#endif \n', - OtherDirective('endif', None), - ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), - (90, '\n', - None, - ('! defined(Py_COMPILE_H)',)), - (91, ' \n', - None, - ('! defined(Py_COMPILE_H)',)), - (92, '#define Py_single_input 256\n', - Constant('Py_single_input', '256'), - ('! defined(Py_COMPILE_H)',)), - (93, '#define Py_file_input 257\n', - Constant('Py_file_input', '257'), - ('! defined(Py_COMPILE_H)',)), - (94, '#define Py_eval_input 258\n', - Constant('Py_eval_input', '258'), - ('! defined(Py_COMPILE_H)',)), - (95, '#define Py_func_type_input 345\n', - Constant('Py_func_type_input', '345'), - ('! defined(Py_COMPILE_H)',)), - (96, '\n', - None, - ('! defined(Py_COMPILE_H)',)), - (97, '#endif ', - OtherDirective('endif', None), - ('! defined(Py_COMPILE_H)',)), - ]) - self.check_calls( - ('_parse_directive', '#ifndef Py_COMPILE_H'), - ('_parse_directive', '#define Py_COMPILE_H'), - ('_parse_directive', '#ifndef Py_LIMITED_API'), - ('_parse_directive', '#include "code.h"'), - ('_parse_directive', '#ifdef __cplusplus'), - ('_parse_directive', '#endif'), - ('_parse_directive', '#define PyCF_MASK (CO_FUTURE_DIVISION | CO_FUTURE_ABSOLUTE_IMPORT | CO_FUTURE_WITH_STATEMENT | CO_FUTURE_PRINT_FUNCTION | CO_FUTURE_UNICODE_LITERALS | CO_FUTURE_BARRY_AS_BDFL | CO_FUTURE_GENERATOR_STOP | CO_FUTURE_ANNOTATIONS)'), - ('_parse_directive', '#define PyCF_MASK_OBSOLETE (CO_NESTED)'), - ('_parse_directive', '#define PyCF_SOURCE_IS_UTF8 0x0100'), - ('_parse_directive', '#define PyCF_DONT_IMPLY_DEDENT 0x0200'), - ('_parse_directive', '#define PyCF_ONLY_AST 0x0400'), - ('_parse_directive', '#define PyCF_IGNORE_COOKIE 0x0800'), - ('_parse_directive', '#define PyCF_TYPE_COMMENTS 0x1000'), - ('_parse_directive', '#define PyCF_ALLOW_TOP_LEVEL_AWAIT 0x2000'), - ('_parse_directive', '#ifndef Py_LIMITED_API'), - ('_parse_directive', '#endif'), - ('_parse_directive', '#define FUTURE_NESTED_SCOPES "nested_scopes"'), - ('_parse_directive', '#define FUTURE_GENERATORS "generators"'), - ('_parse_directive', '#define FUTURE_DIVISION "division"'), - ('_parse_directive', '#define FUTURE_ABSOLUTE_IMPORT "absolute_import"'), - ('_parse_directive', '#define FUTURE_WITH_STATEMENT "with_statement"'), - ('_parse_directive', '#define FUTURE_PRINT_FUNCTION "print_function"'), - ('_parse_directive', '#define FUTURE_UNICODE_LITERALS "unicode_literals"'), - ('_parse_directive', '#define FUTURE_BARRY_AS_BDFL "barry_as_FLUFL"'), - ('_parse_directive', '#define FUTURE_GENERATOR_STOP "generator_stop"'), - ('_parse_directive', '#define FUTURE_ANNOTATIONS "annotations"'), - ('_parse_directive', '#define PyAST_Compile(mod, s, f, ar) PyAST_CompileEx(mod, s, f, -1, ar)'), - ('_parse_directive', '#define PY_INVALID_STACK_EFFECT INT_MAX'), - ('_parse_directive', '#ifdef __cplusplus'), - ('_parse_directive', '#endif'), - ('_parse_directive', '#endif'), - ('_parse_directive', '#define Py_single_input 256'), - ('_parse_directive', '#define Py_file_input 257'), - ('_parse_directive', '#define Py_eval_input 258'), - ('_parse_directive', '#define Py_func_type_input 345'), - ('_parse_directive', '#endif'), - ) - - -class ParseDirectiveTests(unittest.TestCase): - - def test_directives(self): - tests = [ - # includes - ('#include "internal/pycore_pystate.h"', Include('"internal/pycore_pystate.h"')), - ('#include ', Include('')), - - # defines - ('#define SPAM int', Constant('SPAM', 'int')), - ('#define SPAM', Constant('SPAM', '')), - ('#define SPAM(x, y) run(x, y)', Macro('SPAM', ('x', 'y'), 'run(x, y)')), - ('#undef SPAM', None), - - # conditionals - ('#if SPAM', IfDirective('if', 'SPAM')), - # XXX complex conditionls - ('#ifdef SPAM', IfDirective('ifdef', 'SPAM')), - ('#ifndef SPAM', IfDirective('ifndef', 'SPAM')), - ('#elseif SPAM', IfDirective('elseif', 'SPAM')), - # XXX complex conditionls - ('#else', OtherDirective('else', '')), - ('#endif', OtherDirective('endif', '')), - - # other - ('#error oops!', None), - ('#warning oops!', None), - ('#pragma ...', None), - ('#__FILE__ ...', None), - ('#__LINE__ ...', None), - ('#__DATE__ ...', None), - ('#__TIME__ ...', None), - ('#__TIMESTAMP__ ...', None), - - # extra whitespace - (' # include ', Include('')), - ('#else ', OtherDirective('else', '')), - ('#endif ', OtherDirective('endif', '')), - ('#define SPAM int ', Constant('SPAM', 'int')), - ('#define SPAM ', Constant('SPAM', '')), - ] - for line, expected in tests: - if expected is None: - kind, _, text = line[1:].partition(' ') - expected = OtherDirective(kind, text) - with self.subTest(line): - directive = parse_directive(line) - - self.assertEqual(directive, expected) - - def test_bad_directives(self): - tests = [ - # valid directives with bad text - '#define 123', - '#else spam', - '#endif spam', - ] - for kind in PreprocessorDirective.KINDS: - # missing leading "#" - tests.append(kind) - if kind in ('else', 'endif'): - continue - # valid directives with missing text - tests.append('#' + kind) - tests.append('#' + kind + ' ') - for line in tests: - with self.subTest(line): - with self.assertRaises(ValueError): - parse_directive(line) - - def test_not_directives(self): - tests = [ - '', - ' ', - 'directive', - 'directive?', - '???', - ] - for line in tests: - with self.subTest(line): - with self.assertRaises(ValueError): - parse_directive(line) - - -class ConstantTests(unittest.TestCase): - - def test_type(self): - directive = Constant('SPAM', '123') - - self.assertIs(type(directive), Constant) - self.assertIsInstance(directive, PreprocessorDirective) - - def test_attrs(self): - d = Constant('SPAM', '123') - kind, name, value = d.kind, d.name, d.value - - self.assertEqual(kind, 'define') - self.assertEqual(name, 'SPAM') - self.assertEqual(value, '123') - - def test_text(self): - tests = [ - (('SPAM', '123'), 'SPAM 123'), - (('SPAM',), 'SPAM'), - ] - for args, expected in tests: - with self.subTest(args): - d = Constant(*args) - text = d.text - - self.assertEqual(text, expected) - - def test_iter(self): - kind, name, value = Constant('SPAM', '123') - - self.assertEqual(kind, 'define') - self.assertEqual(name, 'SPAM') - self.assertEqual(value, '123') - - def test_defaults(self): - kind, name, value = Constant('SPAM') - - self.assertEqual(kind, 'define') - self.assertEqual(name, 'SPAM') - self.assertIs(value, None) - - def test_coerce(self): - tests = [] - # coerced name, value - for args in wrapped_arg_combos('SPAM', '123'): - tests.append((args, ('SPAM', '123'))) - # missing name, value - for name in ('', ' ', None, StrProxy(' '), ()): - for value in ('', ' ', None, StrProxy(' '), ()): - tests.append( - ((name, value), (None, None))) - # whitespace - tests.extend([ - ((' SPAM ', ' 123 '), ('SPAM', '123')), - ]) - - for args, expected in tests: - with self.subTest(args): - d = Constant(*args) - - self.assertEqual(d[1:], expected) - for i, exp in enumerate(expected, start=1): - if exp is not None: - self.assertIs(type(d[i]), str) - - def test_valid(self): - tests = [ - ('SPAM', '123'), - # unusual name - ('_SPAM_', '123'), - ('X_1', '123'), - # unusual value - ('SPAM', None), - ] - for args in tests: - with self.subTest(args): - directive = Constant(*args) - - directive.validate() - - def test_invalid(self): - tests = [ - # invalid name - ((None, '123'), TypeError), - (('_', '123'), ValueError), - (('1', '123'), ValueError), - (('_1_', '123'), ValueError), - # There is no invalid value (including None). - ] - for args, exctype in tests: - with self.subTest(args): - directive = Constant(*args) - - with self.assertRaises(exctype): - directive.validate() - - -class MacroTests(unittest.TestCase): - - def test_type(self): - directive = Macro('SPAM', ('x', 'y'), '123') - - self.assertIs(type(directive), Macro) - self.assertIsInstance(directive, PreprocessorDirective) - - def test_attrs(self): - d = Macro('SPAM', ('x', 'y'), '123') - kind, name, args, body = d.kind, d.name, d.args, d.body - - self.assertEqual(kind, 'define') - self.assertEqual(name, 'SPAM') - self.assertEqual(args, ('x', 'y')) - self.assertEqual(body, '123') - - def test_text(self): - tests = [ - (('SPAM', ('x', 'y'), '123'), 'SPAM(x, y) 123'), - (('SPAM', ('x', 'y'),), 'SPAM(x, y)'), - ] - for args, expected in tests: - with self.subTest(args): - d = Macro(*args) - text = d.text - - self.assertEqual(text, expected) - - def test_iter(self): - kind, name, args, body = Macro('SPAM', ('x', 'y'), '123') - - self.assertEqual(kind, 'define') - self.assertEqual(name, 'SPAM') - self.assertEqual(args, ('x', 'y')) - self.assertEqual(body, '123') - - def test_defaults(self): - kind, name, args, body = Macro('SPAM', ('x', 'y')) - - self.assertEqual(kind, 'define') - self.assertEqual(name, 'SPAM') - self.assertEqual(args, ('x', 'y')) - self.assertIs(body, None) - - def test_coerce(self): - tests = [] - # coerce name and body - for args in wrapped_arg_combos('SPAM', ('x', 'y'), '123'): - tests.append( - (args, ('SPAM', ('x', 'y'), '123'))) - # coerce args - tests.extend([ - (('SPAM', 'x', '123'), - ('SPAM', ('x',), '123')), - (('SPAM', 'x,y', '123'), - ('SPAM', ('x', 'y'), '123')), - ]) - # coerce arg names - for argnames in wrapped_arg_combos('x', 'y'): - tests.append( - (('SPAM', argnames, '123'), - ('SPAM', ('x', 'y'), '123'))) - # missing name, body - for name in ('', ' ', None, StrProxy(' '), ()): - for argnames in (None, ()): - for body in ('', ' ', None, StrProxy(' '), ()): - tests.append( - ((name, argnames, body), - (None, (), None))) - # missing args - tests.extend([ - (('SPAM', None, '123'), - ('SPAM', (), '123')), - (('SPAM', (), '123'), - ('SPAM', (), '123')), - ]) - # missing arg names - for arg in ('', ' ', None, StrProxy(' '), ()): - tests.append( - (('SPAM', (arg,), '123'), - ('SPAM', (None,), '123'))) - tests.extend([ - (('SPAM', ('x', '', 'z'), '123'), - ('SPAM', ('x', None, 'z'), '123')), - ]) - # whitespace - tests.extend([ - ((' SPAM ', (' x ', ' y '), ' 123 '), - ('SPAM', ('x', 'y'), '123')), - (('SPAM', 'x, y', '123'), - ('SPAM', ('x', 'y'), '123')), - ]) - - for args, expected in tests: - with self.subTest(args): - d = Macro(*args) - - self.assertEqual(d[1:], expected) - for i, exp in enumerate(expected, start=1): - if i == 2: - self.assertIs(type(d[i]), tuple) - elif exp is not None: - self.assertIs(type(d[i]), str) - - def test_init_bad_args(self): - tests = [ - ('SPAM', StrProxy('x'), '123'), - ('SPAM', object(), '123'), - ] - for args in tests: - with self.subTest(args): - with self.assertRaises(TypeError): - Macro(*args) - - def test_valid(self): - tests = [ - # unusual name - ('SPAM', ('x', 'y'), 'run(x, y)'), - ('_SPAM_', ('x', 'y'), 'run(x, y)'), - ('X_1', ('x', 'y'), 'run(x, y)'), - # unusual args - ('SPAM', (), 'run(x, y)'), - ('SPAM', ('_x_', 'y_1'), 'run(x, y)'), - ('SPAM', 'x', 'run(x, y)'), - ('SPAM', 'x, y', 'run(x, y)'), - # unusual body - ('SPAM', ('x', 'y'), None), - ] - for args in tests: - with self.subTest(args): - directive = Macro(*args) - - directive.validate() - - def test_invalid(self): - tests = [ - # invalid name - ((None, ('x', 'y'), '123'), TypeError), - (('_', ('x', 'y'), '123'), ValueError), - (('1', ('x', 'y'), '123'), ValueError), - (('_1', ('x', 'y'), '123'), ValueError), - # invalid args - (('SPAM', (None, 'y'), '123'), ValueError), - (('SPAM', ('x', '_'), '123'), ValueError), - (('SPAM', ('x', '1'), '123'), ValueError), - (('SPAM', ('x', '_1_'), '123'), ValueError), - # There is no invalid body (including None). - ] - for args, exctype in tests: - with self.subTest(args): - directive = Macro(*args) - - with self.assertRaises(exctype): - directive.validate() - - -class IfDirectiveTests(unittest.TestCase): - - def test_type(self): - directive = IfDirective('if', '1') - - self.assertIs(type(directive), IfDirective) - self.assertIsInstance(directive, PreprocessorDirective) - - def test_attrs(self): - d = IfDirective('if', '1') - kind, condition = d.kind, d.condition - - self.assertEqual(kind, 'if') - self.assertEqual(condition, '1') - #self.assertEqual(condition, (ArithmeticCondition('1'),)) - - def test_text(self): - tests = [ - (('if', 'defined(SPAM) && 1 || (EGGS > 3 && defined(HAM))'), - 'defined(SPAM) && 1 || (EGGS > 3 && defined(HAM))'), - ] - for kind in IfDirective.KINDS: - tests.append( - ((kind, 'SPAM'), 'SPAM')) - for args, expected in tests: - with self.subTest(args): - d = IfDirective(*args) - text = d.text - - self.assertEqual(text, expected) - - def test_iter(self): - kind, condition = IfDirective('if', '1') - - self.assertEqual(kind, 'if') - self.assertEqual(condition, '1') - #self.assertEqual(condition, (ArithmeticCondition('1'),)) - - #def test_complex_conditions(self): - # ... - - def test_coerce(self): - tests = [] - for kind in IfDirective.KINDS: - if kind == 'ifdef': - cond = 'defined(SPAM)' - elif kind == 'ifndef': - cond = '! defined(SPAM)' - else: - cond = 'SPAM' - for args in wrapped_arg_combos(kind, 'SPAM'): - tests.append((args, (kind, cond))) - tests.extend([ - ((' ' + kind + ' ', ' SPAM '), (kind, cond)), - ]) - for raw in ('', ' ', None, StrProxy(' '), ()): - tests.append(((kind, raw), (kind, None))) - for kind in ('', ' ', None, StrProxy(' '), ()): - tests.append(((kind, 'SPAM'), (None, 'SPAM'))) - for args, expected in tests: - with self.subTest(args): - d = IfDirective(*args) - - self.assertEqual(tuple(d), expected) - for i, exp in enumerate(expected): - if exp is not None: - self.assertIs(type(d[i]), str) - - def test_valid(self): - tests = [] - for kind in IfDirective.KINDS: - tests.extend([ - (kind, 'SPAM'), - (kind, '_SPAM_'), - (kind, 'X_1'), - (kind, '()'), - (kind, '--'), - (kind, '???'), - ]) - for args in tests: - with self.subTest(args): - directive = IfDirective(*args) - - directive.validate() - - def test_invalid(self): - tests = [] - # kind - tests.extend([ - ((None, 'SPAM'), TypeError), - (('_', 'SPAM'), ValueError), - (('-', 'SPAM'), ValueError), - (('spam', 'SPAM'), ValueError), - ]) - for kind in PreprocessorDirective.KINDS: - if kind in IfDirective.KINDS: - continue - tests.append( - ((kind, 'SPAM'), ValueError)) - # condition - for kind in IfDirective.KINDS: - tests.extend([ - ((kind, None), TypeError), - # Any other condition is valid. - ]) - for args, exctype in tests: - with self.subTest(args): - directive = IfDirective(*args) - - with self.assertRaises(exctype): - directive.validate() - - -class IncludeTests(unittest.TestCase): - - def test_type(self): - directive = Include('') - - self.assertIs(type(directive), Include) - self.assertIsInstance(directive, PreprocessorDirective) - - def test_attrs(self): - d = Include('') - kind, file, text = d.kind, d.file, d.text - - self.assertEqual(kind, 'include') - self.assertEqual(file, '') - self.assertEqual(text, '') - - def test_iter(self): - kind, file = Include('') - - self.assertEqual(kind, 'include') - self.assertEqual(file, '') - - def test_coerce(self): - tests = [] - for arg, in wrapped_arg_combos(''): - tests.append((arg, '')) - tests.extend([ - (' ', ''), - ]) - for arg in ('', ' ', None, StrProxy(' '), ()): - tests.append((arg, None )) - for arg, expected in tests: - with self.subTest(arg): - _, file = Include(arg) - - self.assertEqual(file, expected) - if expected is not None: - self.assertIs(type(file), str) - - def test_valid(self): - tests = [ - '', - '"spam.h"', - '"internal/pycore_pystate.h"', - ] - for arg in tests: - with self.subTest(arg): - directive = Include(arg) - - directive.validate() - - def test_invalid(self): - tests = [ - (None, TypeError), - # We currently don't check the file. - ] - for arg, exctype in tests: - with self.subTest(arg): - directive = Include(arg) - - with self.assertRaises(exctype): - directive.validate() - - -class OtherDirectiveTests(unittest.TestCase): - - def test_type(self): - directive = OtherDirective('undef', 'SPAM') - - self.assertIs(type(directive), OtherDirective) - self.assertIsInstance(directive, PreprocessorDirective) - - def test_attrs(self): - d = OtherDirective('undef', 'SPAM') - kind, text = d.kind, d.text - - self.assertEqual(kind, 'undef') - self.assertEqual(text, 'SPAM') - - def test_iter(self): - kind, text = OtherDirective('undef', 'SPAM') - - self.assertEqual(kind, 'undef') - self.assertEqual(text, 'SPAM') - - def test_coerce(self): - tests = [] - for kind in OtherDirective.KINDS: - if kind in ('else', 'endif'): - continue - for args in wrapped_arg_combos(kind, '...'): - tests.append((args, (kind, '...'))) - tests.extend([ - ((' ' + kind + ' ', ' ... '), (kind, '...')), - ]) - for raw in ('', ' ', None, StrProxy(' '), ()): - tests.append(((kind, raw), (kind, None))) - for kind in ('else', 'endif'): - for args in wrapped_arg_combos(kind, None): - tests.append((args, (kind, None))) - tests.extend([ - ((' ' + kind + ' ', None), (kind, None)), - ]) - for kind in ('', ' ', None, StrProxy(' '), ()): - tests.append(((kind, '...'), (None, '...'))) - for args, expected in tests: - with self.subTest(args): - d = OtherDirective(*args) - - self.assertEqual(tuple(d), expected) - for i, exp in enumerate(expected): - if exp is not None: - self.assertIs(type(d[i]), str) - - def test_valid(self): - tests = [] - for kind in OtherDirective.KINDS: - if kind in ('else', 'endif'): - continue - tests.extend([ - (kind, '...'), - (kind, '???'), - (kind, 'SPAM'), - (kind, '1 + 1'), - ]) - for kind in ('else', 'endif'): - tests.append((kind, None)) - for args in tests: - with self.subTest(args): - directive = OtherDirective(*args) - - directive.validate() - - def test_invalid(self): - tests = [] - # kind - tests.extend([ - ((None, '...'), TypeError), - (('_', '...'), ValueError), - (('-', '...'), ValueError), - (('spam', '...'), ValueError), - ]) - for kind in PreprocessorDirective.KINDS: - if kind in OtherDirective.KINDS: - continue - tests.append( - ((kind, None), ValueError)) - # text - for kind in OtherDirective.KINDS: - if kind in ('else', 'endif'): - tests.extend([ - # Any text is invalid. - ((kind, 'SPAM'), ValueError), - ((kind, '...'), ValueError), - ]) - else: - tests.extend([ - ((kind, None), TypeError), - # Any other text is valid. - ]) - for args, exctype in tests: - with self.subTest(args): - directive = OtherDirective(*args) - - with self.assertRaises(exctype): - directive.validate() diff --git a/Lib/test/test_tools/test_c_analyzer/test_c_symbols/__init__.py b/Lib/test/test_tools/test_c_analyzer/test_c_symbols/__init__.py deleted file mode 100644 index bc502ef..0000000 --- a/Lib/test/test_tools/test_c_analyzer/test_c_symbols/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -import os.path -from test.support import load_package_tests - - -def load_tests(*args): - return load_package_tests(os.path.dirname(__file__), *args) diff --git a/Lib/test/test_tools/test_c_analyzer/test_c_symbols/test_info.py b/Lib/test/test_tools/test_c_analyzer/test_c_symbols/test_info.py deleted file mode 100644 index e029dcf..0000000 --- a/Lib/test/test_tools/test_c_analyzer/test_c_symbols/test_info.py +++ /dev/null @@ -1,192 +0,0 @@ -import string -import unittest - -from ..util import PseudoStr, StrProxy, Object -from .. import tool_imports_for_tests -with tool_imports_for_tests(): - from c_analyzer_common.info import ID - from c_symbols.info import Symbol - - -class SymbolTests(unittest.TestCase): - - VALID_ARGS = ( - ID('x/y/z/spam.c', 'func', 'eggs'), - Symbol.KIND.VARIABLE, - False, - ) - VALID_KWARGS = dict(zip(Symbol._fields, VALID_ARGS)) - VALID_EXPECTED = VALID_ARGS - - def test_init_typical_binary_local(self): - id = ID(None, None, 'spam') - symbol = Symbol( - id=id, - kind=Symbol.KIND.VARIABLE, - external=False, - ) - - self.assertEqual(symbol, ( - id, - Symbol.KIND.VARIABLE, - False, - )) - - def test_init_typical_binary_global(self): - id = ID('Python/ceval.c', None, 'spam') - symbol = Symbol( - id=id, - kind=Symbol.KIND.VARIABLE, - external=False, - ) - - self.assertEqual(symbol, ( - id, - Symbol.KIND.VARIABLE, - False, - )) - - def test_init_coercion(self): - tests = [ - ('str subclass', - dict( - id=PseudoStr('eggs'), - kind=PseudoStr('variable'), - external=0, - ), - (ID(None, None, 'eggs'), - Symbol.KIND.VARIABLE, - False, - )), - ('with filename', - dict( - id=('x/y/z/spam.c', 'eggs'), - kind=PseudoStr('variable'), - external=0, - ), - (ID('x/y/z/spam.c', None, 'eggs'), - Symbol.KIND.VARIABLE, - False, - )), - ('non-str 1', - dict( - id=('a', 'b', 'c'), - kind=StrProxy('variable'), - external=0, - ), - (ID('a', 'b', 'c'), - Symbol.KIND.VARIABLE, - False, - )), - ('non-str 2', - dict( - id=('a', 'b', 'c'), - kind=Object(), - external=0, - ), - (ID('a', 'b', 'c'), - '', - False, - )), - ] - for summary, kwargs, expected in tests: - with self.subTest(summary): - symbol = Symbol(**kwargs) - - for field in Symbol._fields: - value = getattr(symbol, field) - if field == 'external': - self.assertIs(type(value), bool) - elif field == 'id': - self.assertIs(type(value), ID) - else: - self.assertIs(type(value), str) - self.assertEqual(tuple(symbol), expected) - - def test_init_all_missing(self): - id = ID(None, None, 'spam') - - symbol = Symbol(id) - - self.assertEqual(symbol, ( - id, - Symbol.KIND.VARIABLE, - None, - )) - - def test_fields(self): - id = ID('z', 'x', 'a') - - symbol = Symbol(id, 'b', False) - - self.assertEqual(symbol.id, id) - self.assertEqual(symbol.kind, 'b') - self.assertIs(symbol.external, False) - - def test___getattr__(self): - id = ID('z', 'x', 'a') - symbol = Symbol(id, 'b', False) - - filename = symbol.filename - funcname = symbol.funcname - name = symbol.name - - self.assertEqual(filename, 'z') - self.assertEqual(funcname, 'x') - self.assertEqual(name, 'a') - - def test_validate_typical(self): - id = ID('z', 'x', 'a') - - symbol = Symbol( - id=id, - kind=Symbol.KIND.VARIABLE, - external=False, - ) - - symbol.validate() # This does not fail. - - def test_validate_missing_field(self): - for field in Symbol._fields: - with self.subTest(field): - symbol = Symbol(**self.VALID_KWARGS) - symbol = symbol._replace(**{field: None}) - - with self.assertRaises(TypeError): - symbol.validate() - - def test_validate_bad_field(self): - badch = tuple(c for c in string.punctuation + string.digits) - notnames = ( - '1a', - 'a.b', - 'a-b', - '&a', - 'a++', - ) + badch - tests = [ - ('id', notnames), - ('kind', ('bogus',)), - ] - seen = set() - for field, invalid in tests: - for value in invalid: - if field != 'kind': - seen.add(value) - with self.subTest(f'{field}={value!r}'): - symbol = Symbol(**self.VALID_KWARGS) - symbol = symbol._replace(**{field: value}) - - with self.assertRaises(ValueError): - symbol.validate() - - for field, invalid in tests: - if field == 'kind': - continue - valid = seen - set(invalid) - for value in valid: - with self.subTest(f'{field}={value!r}'): - symbol = Symbol(**self.VALID_KWARGS) - symbol = symbol._replace(**{field: value}) - - symbol.validate() # This does not fail. diff --git a/Lib/test/test_tools/test_c_analyzer/test_common/__init__.py b/Lib/test/test_tools/test_c_analyzer/test_common/__init__.py new file mode 100644 index 0000000..bc502ef --- /dev/null +++ b/Lib/test/test_tools/test_c_analyzer/test_common/__init__.py @@ -0,0 +1,6 @@ +import os.path +from test.support import load_package_tests + + +def load_tests(*args): + return load_package_tests(os.path.dirname(__file__), *args) diff --git a/Lib/test/test_tools/test_c_analyzer/test_common/test_files.py b/Lib/test/test_tools/test_c_analyzer/test_common/test_files.py new file mode 100644 index 0000000..0c97d2a --- /dev/null +++ b/Lib/test/test_tools/test_c_analyzer/test_common/test_files.py @@ -0,0 +1,470 @@ +import os.path +import unittest + +from .. import tool_imports_for_tests +with tool_imports_for_tests(): + from c_analyzer.common.files import ( + iter_files, _walk_tree, glob_tree, + ) + + +def fixpath(filename): + return filename.replace('/', os.path.sep) + + +class IterFilesTests(unittest.TestCase): + + maxDiff = None + + _return_walk = None + + @property + def calls(self): + try: + return self._calls + except AttributeError: + self._calls = [] + return self._calls + + def set_files(self, *filesperroot): + roots = [] + result = [] + for root, files in filesperroot: + root = fixpath(root) + roots.append(root) + result.append([os.path.join(root, fixpath(f)) + for f in files]) + self._return_walk = result + return roots + + def _walk(self, root, *, suffix=None, walk=None): + self.calls.append(('_walk', (root, suffix, walk))) + return iter(self._return_walk.pop(0)) + + def _glob(self, root, *, suffix=None): + self.calls.append(('_glob', (root, suffix))) + return iter(self._return_walk.pop(0)) + + def test_typical(self): + dirnames = self.set_files( + ('spam', ['file1.c', 'file2.c']), + ('eggs', ['ham/file3.h']), + ) + suffixes = ('.c', '.h') + + files = list(iter_files(dirnames, suffixes, + _glob=self._glob, + _walk=self._walk)) + + self.assertEqual(files, [ + fixpath('spam/file1.c'), + fixpath('spam/file2.c'), + fixpath('eggs/ham/file3.h'), + ]) + self.assertEqual(self.calls, [ + ('_walk', ('spam', None, _walk_tree)), + ('_walk', ('eggs', None, _walk_tree)), + ]) + + def test_single_root(self): + self._return_walk = [ + [fixpath('spam/file1.c'), fixpath('spam/file2.c')], + ] + + files = list(iter_files('spam', '.c', + _glob=self._glob, + _walk=self._walk)) + + self.assertEqual(files, [ + fixpath('spam/file1.c'), + fixpath('spam/file2.c'), + ]) + self.assertEqual(self.calls, [ + ('_walk', ('spam', '.c', _walk_tree)), + ]) + + def test_one_root(self): + self._return_walk = [ + [fixpath('spam/file1.c'), fixpath('spam/file2.c')], + ] + + files = list(iter_files(['spam'], '.c', + _glob=self._glob, + _walk=self._walk)) + + self.assertEqual(files, [ + fixpath('spam/file1.c'), + fixpath('spam/file2.c'), + ]) + self.assertEqual(self.calls, [ + ('_walk', ('spam', '.c', _walk_tree)), + ]) + + def test_multiple_roots(self): + dirnames = self.set_files( + ('spam', ['file1.c', 'file2.c']), + ('eggs', ['ham/file3.c']), + ) + + files = list(iter_files(dirnames, '.c', + _glob=self._glob, + _walk=self._walk)) + + self.assertEqual(files, [ + fixpath('spam/file1.c'), + fixpath('spam/file2.c'), + fixpath('eggs/ham/file3.c'), + ]) + self.assertEqual(self.calls, [ + ('_walk', ('spam', '.c', _walk_tree)), + ('_walk', ('eggs', '.c', _walk_tree)), + ]) + + def test_no_roots(self): + files = list(iter_files([], '.c', + _glob=self._glob, + _walk=self._walk)) + + self.assertEqual(files, []) + self.assertEqual(self.calls, []) + + def test_single_suffix(self): + self._return_walk = [ + [fixpath('spam/file1.c'), + fixpath('spam/eggs/file3.c'), + ], + ] + + files = list(iter_files('spam', '.c', + _glob=self._glob, + _walk=self._walk)) + + self.assertEqual(files, [ + fixpath('spam/file1.c'), + fixpath('spam/eggs/file3.c'), + ]) + self.assertEqual(self.calls, [ + ('_walk', ('spam', '.c', _walk_tree)), + ]) + + def test_one_suffix(self): + self._return_walk = [ + [fixpath('spam/file1.c'), + fixpath('spam/file1.h'), + fixpath('spam/file1.o'), + fixpath('spam/eggs/file3.c'), + ], + ] + + files = list(iter_files('spam', ['.c'], + _glob=self._glob, + _walk=self._walk)) + + self.assertEqual(files, [ + fixpath('spam/file1.c'), + fixpath('spam/eggs/file3.c'), + ]) + self.assertEqual(self.calls, [ + ('_walk', ('spam', None, _walk_tree)), + ]) + + def test_multiple_suffixes(self): + self._return_walk = [ + [fixpath('spam/file1.c'), + fixpath('spam/file1.h'), + fixpath('spam/file1.o'), + fixpath('spam/eggs/file3.c'), + ], + ] + + files = list(iter_files('spam', ('.c', '.h'), + _glob=self._glob, + _walk=self._walk)) + + self.assertEqual(files, [ + fixpath('spam/file1.c'), + fixpath('spam/file1.h'), + fixpath('spam/eggs/file3.c'), + ]) + self.assertEqual(self.calls, [ + ('_walk', ('spam', None, _walk_tree)), + ]) + + def test_no_suffix(self): + expected = [fixpath('spam/file1.c'), + fixpath('spam/file1.h'), + fixpath('spam/file1.o'), + fixpath('spam/eggs/file3.c'), + ] + for suffix in (None, '', ()): + with self.subTest(suffix): + self.calls.clear() + self._return_walk = [list(expected)] + + files = list(iter_files('spam', suffix, + _glob=self._glob, + _walk=self._walk)) + + self.assertEqual(files, expected) + self.assertEqual(self.calls, [ + ('_walk', ('spam', suffix, _walk_tree)), + ]) + + def test_relparent(self): + dirnames = self.set_files( + ('/x/y/z/spam', ['file1.c', 'file2.c']), + ('/x/y/z/eggs', ['ham/file3.c']), + ) + + files = list(iter_files(dirnames, '.c', fixpath('/x/y'), + _glob=self._glob, + _walk=self._walk)) + + self.assertEqual(files, [ + fixpath('z/spam/file1.c'), + fixpath('z/spam/file2.c'), + fixpath('z/eggs/ham/file3.c'), + ]) + self.assertEqual(self.calls, [ + ('_walk', (fixpath('/x/y/z/spam'), '.c', _walk_tree)), + ('_walk', (fixpath('/x/y/z/eggs'), '.c', _walk_tree)), + ]) + + def test_glob(self): + dirnames = self.set_files( + ('spam', ['file1.c', 'file2.c']), + ('eggs', ['ham/file3.c']), + ) + + files = list(iter_files(dirnames, '.c', + get_files=glob_tree, + _walk=self._walk, + _glob=self._glob)) + + self.assertEqual(files, [ + fixpath('spam/file1.c'), + fixpath('spam/file2.c'), + fixpath('eggs/ham/file3.c'), + ]) + self.assertEqual(self.calls, [ + ('_glob', ('spam', '.c')), + ('_glob', ('eggs', '.c')), + ]) + + + def test_alt_walk_func(self): + dirnames = self.set_files( + ('spam', ['file1.c', 'file2.c']), + ('eggs', ['ham/file3.c']), + ) + def get_files(root): + return None + + files = list(iter_files(dirnames, '.c', + get_files=get_files, + _walk=self._walk, + _glob=self._glob)) + + self.assertEqual(files, [ + fixpath('spam/file1.c'), + fixpath('spam/file2.c'), + fixpath('eggs/ham/file3.c'), + ]) + self.assertEqual(self.calls, [ + ('_walk', ('spam', '.c', get_files)), + ('_walk', ('eggs', '.c', get_files)), + ]) + + + + + + +# def test_no_dirnames(self): +# dirnames = [] +# filter_by_name = None +# +# files = list(iter_files(dirnames, filter_by_name, +# _walk=self._walk)) +# +# self.assertEqual(files, []) +# self.assertEqual(self.calls, []) +# +# def test_no_filter(self): +# self._return_walk = [ +# [('spam', (), ('file1', 'file2.c', 'file3.h', 'file4.o')), +# ], +# ] +# dirnames = [ +# 'spam', +# ] +# filter_by_name = None +# +# files = list(iter_files(dirnames, filter_by_name, +# _walk=self._walk)) +# +# self.assertEqual(files, [ +# fixpath('spam/file1'), +# fixpath('spam/file2.c'), +# fixpath('spam/file3.h'), +# fixpath('spam/file4.o'), +# ]) +# self.assertEqual(self.calls, [ +# ('_walk', ('spam',)), +# ]) +# +# def test_no_files(self): +# self._return_walk = [ +# [('spam', (), ()), +# ], +# [(fixpath('eggs/ham'), (), ()), +# ], +# ] +# dirnames = [ +# 'spam', +# fixpath('eggs/ham'), +# ] +# filter_by_name = None +# +# files = list(iter_files(dirnames, filter_by_name, +# _walk=self._walk)) +# +# self.assertEqual(files, []) +# self.assertEqual(self.calls, [ +# ('_walk', ('spam',)), +# ('_walk', (fixpath('eggs/ham'),)), +# ]) +# +# def test_tree(self): +# self._return_walk = [ +# [('spam', ('sub1', 'sub2', 'sub3'), ('file1',)), +# (fixpath('spam/sub1'), ('sub1sub1',), ('file2', 'file3')), +# (fixpath('spam/sub1/sub1sub1'), (), ('file4',)), +# (fixpath('spam/sub2'), (), ()), +# (fixpath('spam/sub3'), (), ('file5',)), +# ], +# [(fixpath('eggs/ham'), (), ('file6',)), +# ], +# ] +# dirnames = [ +# 'spam', +# fixpath('eggs/ham'), +# ] +# filter_by_name = None +# +# files = list(iter_files(dirnames, filter_by_name, +# _walk=self._walk)) +# +# self.assertEqual(files, [ +# fixpath('spam/file1'), +# fixpath('spam/sub1/file2'), +# fixpath('spam/sub1/file3'), +# fixpath('spam/sub1/sub1sub1/file4'), +# fixpath('spam/sub3/file5'), +# fixpath('eggs/ham/file6'), +# ]) +# self.assertEqual(self.calls, [ +# ('_walk', ('spam',)), +# ('_walk', (fixpath('eggs/ham'),)), +# ]) +# +# def test_filter_suffixes(self): +# self._return_walk = [ +# [('spam', (), ('file1', 'file2.c', 'file3.h', 'file4.o')), +# ], +# ] +# dirnames = [ +# 'spam', +# ] +# filter_by_name = ('.c', '.h') +# +# files = list(iter_files(dirnames, filter_by_name, +# _walk=self._walk)) +# +# self.assertEqual(files, [ +# fixpath('spam/file2.c'), +# fixpath('spam/file3.h'), +# ]) +# self.assertEqual(self.calls, [ +# ('_walk', ('spam',)), +# ]) +# +# def test_some_filtered(self): +# self._return_walk = [ +# [('spam', (), ('file1', 'file2', 'file3', 'file4')), +# ], +# ] +# dirnames = [ +# 'spam', +# ] +# def filter_by_name(filename, results=[False, True, False, True]): +# self.calls.append(('filter_by_name', (filename,))) +# return results.pop(0) +# +# files = list(iter_files(dirnames, filter_by_name, +# _walk=self._walk)) +# +# self.assertEqual(files, [ +# fixpath('spam/file2'), +# fixpath('spam/file4'), +# ]) +# self.assertEqual(self.calls, [ +# ('_walk', ('spam',)), +# ('filter_by_name', ('file1',)), +# ('filter_by_name', ('file2',)), +# ('filter_by_name', ('file3',)), +# ('filter_by_name', ('file4',)), +# ]) +# +# def test_none_filtered(self): +# self._return_walk = [ +# [('spam', (), ('file1', 'file2', 'file3', 'file4')), +# ], +# ] +# dirnames = [ +# 'spam', +# ] +# def filter_by_name(filename, results=[True, True, True, True]): +# self.calls.append(('filter_by_name', (filename,))) +# return results.pop(0) +# +# files = list(iter_files(dirnames, filter_by_name, +# _walk=self._walk)) +# +# self.assertEqual(files, [ +# fixpath('spam/file1'), +# fixpath('spam/file2'), +# fixpath('spam/file3'), +# fixpath('spam/file4'), +# ]) +# self.assertEqual(self.calls, [ +# ('_walk', ('spam',)), +# ('filter_by_name', ('file1',)), +# ('filter_by_name', ('file2',)), +# ('filter_by_name', ('file3',)), +# ('filter_by_name', ('file4',)), +# ]) +# +# def test_all_filtered(self): +# self._return_walk = [ +# [('spam', (), ('file1', 'file2', 'file3', 'file4')), +# ], +# ] +# dirnames = [ +# 'spam', +# ] +# def filter_by_name(filename, results=[False, False, False, False]): +# self.calls.append(('filter_by_name', (filename,))) +# return results.pop(0) +# +# files = list(iter_files(dirnames, filter_by_name, +# _walk=self._walk)) +# +# self.assertEqual(files, []) +# self.assertEqual(self.calls, [ +# ('_walk', ('spam',)), +# ('filter_by_name', ('file1',)), +# ('filter_by_name', ('file2',)), +# ('filter_by_name', ('file3',)), +# ('filter_by_name', ('file4',)), +# ]) diff --git a/Lib/test/test_tools/test_c_analyzer/test_common/test_info.py b/Lib/test/test_tools/test_c_analyzer/test_common/test_info.py new file mode 100644 index 0000000..69dbb58 --- /dev/null +++ b/Lib/test/test_tools/test_c_analyzer/test_common/test_info.py @@ -0,0 +1,197 @@ +import string +import unittest + +from ..util import PseudoStr, StrProxy, Object +from .. import tool_imports_for_tests +with tool_imports_for_tests(): + from c_analyzer.common.info import ( + UNKNOWN, + ID, + ) + + +class IDTests(unittest.TestCase): + + VALID_ARGS = ( + 'x/y/z/spam.c', + 'func', + 'eggs', + ) + VALID_KWARGS = dict(zip(ID._fields, VALID_ARGS)) + VALID_EXPECTED = VALID_ARGS + + def test_from_raw(self): + tests = [ + ('', None), + (None, None), + ('spam', (None, None, 'spam')), + (('spam',), (None, None, 'spam')), + (('x/y/z/spam.c', 'spam'), ('x/y/z/spam.c', None, 'spam')), + (self.VALID_ARGS, self.VALID_EXPECTED), + (self.VALID_KWARGS, self.VALID_EXPECTED), + ] + for raw, expected in tests: + with self.subTest(raw): + id = ID.from_raw(raw) + + self.assertEqual(id, expected) + + def test_minimal(self): + id = ID( + filename=None, + funcname=None, + name='eggs', + ) + + self.assertEqual(id, ( + None, + None, + 'eggs', + )) + + def test_init_typical_global(self): + id = ID( + filename='x/y/z/spam.c', + funcname=None, + name='eggs', + ) + + self.assertEqual(id, ( + 'x/y/z/spam.c', + None, + 'eggs', + )) + + def test_init_typical_local(self): + id = ID( + filename='x/y/z/spam.c', + funcname='func', + name='eggs', + ) + + self.assertEqual(id, ( + 'x/y/z/spam.c', + 'func', + 'eggs', + )) + + def test_init_all_missing(self): + for value in ('', None): + with self.subTest(repr(value)): + id = ID( + filename=value, + funcname=value, + name=value, + ) + + self.assertEqual(id, ( + None, + None, + None, + )) + + def test_init_all_coerced(self): + tests = [ + ('str subclass', + dict( + filename=PseudoStr('x/y/z/spam.c'), + funcname=PseudoStr('func'), + name=PseudoStr('eggs'), + ), + ('x/y/z/spam.c', + 'func', + 'eggs', + )), + ('non-str', + dict( + filename=StrProxy('x/y/z/spam.c'), + funcname=Object(), + name=('a', 'b', 'c'), + ), + ('x/y/z/spam.c', + '', + "('a', 'b', 'c')", + )), + ] + for summary, kwargs, expected in tests: + with self.subTest(summary): + id = ID(**kwargs) + + for field in ID._fields: + value = getattr(id, field) + self.assertIs(type(value), str) + self.assertEqual(tuple(id), expected) + + def test_iterable(self): + id = ID(**self.VALID_KWARGS) + + filename, funcname, name = id + + values = (filename, funcname, name) + for value, expected in zip(values, self.VALID_EXPECTED): + self.assertEqual(value, expected) + + def test_fields(self): + id = ID('a', 'b', 'z') + + self.assertEqual(id.filename, 'a') + self.assertEqual(id.funcname, 'b') + self.assertEqual(id.name, 'z') + + def test_validate_typical(self): + id = ID( + filename='x/y/z/spam.c', + funcname='func', + name='eggs', + ) + + id.validate() # This does not fail. + + def test_validate_missing_field(self): + for field in ID._fields: + with self.subTest(field): + id = ID(**self.VALID_KWARGS) + id = id._replace(**{field: None}) + + if field == 'funcname': + id.validate() # The field can be missing (not set). + id = id._replace(filename=None) + id.validate() # Both fields can be missing (not set). + continue + + with self.assertRaises(TypeError): + id.validate() + + def test_validate_bad_field(self): + badch = tuple(c for c in string.punctuation + string.digits) + notnames = ( + '1a', + 'a.b', + 'a-b', + '&a', + 'a++', + ) + badch + tests = [ + ('filename', ()), # Any non-empty str is okay. + ('funcname', notnames), + ('name', notnames), + ] + seen = set() + for field, invalid in tests: + for value in invalid: + seen.add(value) + with self.subTest(f'{field}={value!r}'): + id = ID(**self.VALID_KWARGS) + id = id._replace(**{field: value}) + + with self.assertRaises(ValueError): + id.validate() + + for field, invalid in tests: + valid = seen - set(invalid) + for value in valid: + with self.subTest(f'{field}={value!r}'): + id = ID(**self.VALID_KWARGS) + id = id._replace(**{field: value}) + + id.validate() # This does not fail. diff --git a/Lib/test/test_tools/test_c_analyzer/test_common/test_show.py b/Lib/test/test_tools/test_c_analyzer/test_common/test_show.py new file mode 100644 index 0000000..91ca2f3 --- /dev/null +++ b/Lib/test/test_tools/test_c_analyzer/test_common/test_show.py @@ -0,0 +1,54 @@ +import unittest + +from .. import tool_imports_for_tests +with tool_imports_for_tests(): + from c_analyzer.variables import info + from c_analyzer.common.show import ( + basic, + ) + + +TYPICAL = [ + info.Variable.from_parts('src1/spam.c', None, 'var1', 'static const char *'), + info.Variable.from_parts('src1/spam.c', 'ham', 'initialized', 'static int'), + info.Variable.from_parts('src1/spam.c', None, 'var2', 'static PyObject *'), + info.Variable.from_parts('src1/eggs.c', 'tofu', 'ready', 'static int'), + info.Variable.from_parts('src1/spam.c', None, 'freelist', 'static (PyTupleObject *)[10]'), + info.Variable.from_parts('src1/sub/ham.c', None, 'var1', 'static const char const *'), + info.Variable.from_parts('src2/jam.c', None, 'var1', 'static int'), + info.Variable.from_parts('src2/jam.c', None, 'var2', 'static MyObject *'), + info.Variable.from_parts('Include/spam.h', None, 'data', 'static const int'), + ] + + +class BasicTests(unittest.TestCase): + + maxDiff = None + + def setUp(self): + self.lines = [] + + def print(self, line): + self.lines.append(line) + + def test_typical(self): + basic(TYPICAL, + _print=self.print) + + self.assertEqual(self.lines, [ + 'src1/spam.c:var1 static const char *', + 'src1/spam.c:ham():initialized static int', + 'src1/spam.c:var2 static PyObject *', + 'src1/eggs.c:tofu():ready static int', + 'src1/spam.c:freelist static (PyTupleObject *)[10]', + 'src1/sub/ham.c:var1 static const char const *', + 'src2/jam.c:var1 static int', + 'src2/jam.c:var2 static MyObject *', + 'Include/spam.h:data static const int', + ]) + + def test_no_rows(self): + basic([], + _print=self.print) + + self.assertEqual(self.lines, []) diff --git a/Lib/test/test_tools/test_c_analyzer/test_cpython/__init__.py b/Lib/test/test_tools/test_c_analyzer/test_cpython/__init__.py new file mode 100644 index 0000000..bc502ef --- /dev/null +++ b/Lib/test/test_tools/test_c_analyzer/test_cpython/__init__.py @@ -0,0 +1,6 @@ +import os.path +from test.support import load_package_tests + + +def load_tests(*args): + return load_package_tests(os.path.dirname(__file__), *args) diff --git a/Lib/test/test_tools/test_c_analyzer/test_cpython/test___main__.py b/Lib/test/test_tools/test_c_analyzer/test_cpython/test___main__.py new file mode 100644 index 0000000..6d69ed7 --- /dev/null +++ b/Lib/test/test_tools/test_c_analyzer/test_cpython/test___main__.py @@ -0,0 +1,296 @@ +import sys +import unittest + +from .. import tool_imports_for_tests +with tool_imports_for_tests(): + from c_analyzer.variables import info + from cpython import SOURCE_DIRS + from cpython.supported import IGNORED_FILE + from cpython.known import DATA_FILE as KNOWN_FILE + from cpython.__main__ import ( + cmd_check, cmd_show, parse_args, main, + ) + + +TYPICAL = [ + (info.Variable.from_parts('src1/spam.c', None, 'var1', 'const char *'), + True, + ), + (info.Variable.from_parts('src1/spam.c', 'ham', 'initialized', 'int'), + True, + ), + (info.Variable.from_parts('src1/spam.c', None, 'var2', 'PyObject *'), + False, + ), + (info.Variable.from_parts('src1/eggs.c', 'tofu', 'ready', 'int'), + True, + ), + (info.Variable.from_parts('src1/spam.c', None, 'freelist', '(PyTupleObject *)[10]'), + False, + ), + (info.Variable.from_parts('src1/sub/ham.c', None, 'var1', 'const char const *'), + True, + ), + (info.Variable.from_parts('src2/jam.c', None, 'var1', 'int'), + True, + ), + (info.Variable.from_parts('src2/jam.c', None, 'var2', 'MyObject *'), + False, + ), + (info.Variable.from_parts('Include/spam.h', None, 'data', 'const int'), + True, + ), + ] + + +class CMDBase(unittest.TestCase): + + maxDiff = None + +# _return_known_from_file = None +# _return_ignored_from_file = None + _return_find = () + + @property + def calls(self): + try: + return self._calls + except AttributeError: + self._calls = [] + return self._calls + +# def _known_from_file(self, *args): +# self.calls.append(('_known_from_file', args)) +# return self._return_known_from_file or {} +# +# def _ignored_from_file(self, *args): +# self.calls.append(('_ignored_from_file', args)) +# return self._return_ignored_from_file or {} + + def _find(self, known, ignored, skip_objects=False): + self.calls.append(('_find', (known, ignored, skip_objects))) + return self._return_find + + def _show(self, *args): + self.calls.append(('_show', args)) + + def _print(self, *args): + self.calls.append(('_print', args)) + + +class CheckTests(CMDBase): + + def test_defaults(self): + self._return_find = [] + + cmd_check('check', + _find=self._find, + _show=self._show, + _print=self._print, + ) + + self.assertEqual( + self.calls[0], + ('_find', (KNOWN_FILE, IGNORED_FILE, False)), + ) + + def test_all_supported(self): + self._return_find = [(v, s) for v, s in TYPICAL if s] + dirs = ['src1', 'src2', 'Include'] + + cmd_check('check', + known='known.tsv', + ignored='ignored.tsv', + _find=self._find, + _show=self._show, + _print=self._print, + ) + + self.assertEqual(self.calls, [ + ('_find', ('known.tsv', 'ignored.tsv', False)), + #('_print', ('okay',)), + ]) + + def test_some_unsupported(self): + self._return_find = TYPICAL + + with self.assertRaises(SystemExit) as cm: + cmd_check('check', + known='known.tsv', + ignored='ignored.tsv', + _find=self._find, + _show=self._show, + _print=self._print, + ) + + unsupported = [v for v, s in TYPICAL if not s] + self.assertEqual(self.calls, [ + ('_find', ('known.tsv', 'ignored.tsv', False)), + ('_print', ('ERROR: found unsupported global variables',)), + ('_print', ()), + ('_show', (sorted(unsupported),)), + ('_print', (' (3 total)',)), + ]) + self.assertEqual(cm.exception.code, 1) + + +class ShowTests(CMDBase): + + def test_defaults(self): + self._return_find = [] + + cmd_show('show', + _find=self._find, + _show=self._show, + _print=self._print, + ) + + self.assertEqual( + self.calls[0], + ('_find', (KNOWN_FILE, IGNORED_FILE, False)), + ) + + def test_typical(self): + self._return_find = TYPICAL + + cmd_show('show', + known='known.tsv', + ignored='ignored.tsv', + _find=self._find, + _show=self._show, + _print=self._print, + ) + + supported = [v for v, s in TYPICAL if s] + unsupported = [v for v, s in TYPICAL if not s] + self.assertEqual(self.calls, [ + ('_find', ('known.tsv', 'ignored.tsv', False)), + ('_print', ('supported:',)), + ('_print', ('----------',)), + ('_show', (sorted(supported),)), + ('_print', (' (6 total)',)), + ('_print', ()), + ('_print', ('unsupported:',)), + ('_print', ('------------',)), + ('_show', (sorted(unsupported),)), + ('_print', (' (3 total)',)), + ]) + + +class ParseArgsTests(unittest.TestCase): + + maxDiff = None + + def test_no_args(self): + self.errmsg = None + def fail(msg): + self.errmsg = msg + sys.exit(msg) + + with self.assertRaises(SystemExit): + parse_args('cg', [], _fail=fail) + + self.assertEqual(self.errmsg, 'missing command') + + def test_check_no_args(self): + cmd, cmdkwargs = parse_args('cg', [ + 'check', + ]) + + self.assertEqual(cmd, 'check') + self.assertEqual(cmdkwargs, { + 'ignored': IGNORED_FILE, + 'known': KNOWN_FILE, + #'dirs': SOURCE_DIRS, + }) + + def test_check_full_args(self): + cmd, cmdkwargs = parse_args('cg', [ + 'check', + '--ignored', 'spam.tsv', + '--known', 'eggs.tsv', + #'dir1', + #'dir2', + #'dir3', + ]) + + self.assertEqual(cmd, 'check') + self.assertEqual(cmdkwargs, { + 'ignored': 'spam.tsv', + 'known': 'eggs.tsv', + #'dirs': ['dir1', 'dir2', 'dir3'] + }) + + def test_show_no_args(self): + cmd, cmdkwargs = parse_args('cg', [ + 'show', + ]) + + self.assertEqual(cmd, 'show') + self.assertEqual(cmdkwargs, { + 'ignored': IGNORED_FILE, + 'known': KNOWN_FILE, + #'dirs': SOURCE_DIRS, + 'skip_objects': False, + }) + + def test_show_full_args(self): + cmd, cmdkwargs = parse_args('cg', [ + 'show', + '--ignored', 'spam.tsv', + '--known', 'eggs.tsv', + #'dir1', + #'dir2', + #'dir3', + ]) + + self.assertEqual(cmd, 'show') + self.assertEqual(cmdkwargs, { + 'ignored': 'spam.tsv', + 'known': 'eggs.tsv', + #'dirs': ['dir1', 'dir2', 'dir3'], + 'skip_objects': False, + }) + + +def new_stub_commands(*names): + calls = [] + def cmdfunc(cmd, **kwargs): + calls.append((cmd, kwargs)) + commands = {name: cmdfunc for name in names} + return commands, calls + + +class MainTests(unittest.TestCase): + + def test_no_command(self): + with self.assertRaises(ValueError): + main(None, {}) + + def test_check(self): + commands, calls = new_stub_commands('check', 'show') + + cmdkwargs = { + 'ignored': 'spam.tsv', + 'known': 'eggs.tsv', + 'dirs': ['dir1', 'dir2', 'dir3'], + } + main('check', cmdkwargs, _COMMANDS=commands) + + self.assertEqual(calls, [ + ('check', cmdkwargs), + ]) + + def test_show(self): + commands, calls = new_stub_commands('check', 'show') + + cmdkwargs = { + 'ignored': 'spam.tsv', + 'known': 'eggs.tsv', + 'dirs': ['dir1', 'dir2', 'dir3'], + } + main('show', cmdkwargs, _COMMANDS=commands) + + self.assertEqual(calls, [ + ('show', cmdkwargs), + ]) diff --git a/Lib/test/test_tools/test_c_analyzer/test_cpython/test_functional.py b/Lib/test/test_tools/test_c_analyzer/test_cpython/test_functional.py new file mode 100644 index 0000000..9279790 --- /dev/null +++ b/Lib/test/test_tools/test_c_analyzer/test_cpython/test_functional.py @@ -0,0 +1,34 @@ +import unittest + +from .. import tool_imports_for_tests +with tool_imports_for_tests(): + pass + + +class SelfCheckTests(unittest.TestCase): + + @unittest.expectedFailure + def test_known(self): + # Make sure known macros & vartypes aren't hiding unknown local types. + # XXX finish! + raise NotImplementedError + + @unittest.expectedFailure + def test_compare_nm_results(self): + # Make sure the "show" results match the statics found by "nm" command. + # XXX Skip if "nm" is not available. + # XXX finish! + raise NotImplementedError + + +class DummySourceTests(unittest.TestCase): + + @unittest.expectedFailure + def test_check(self): + # XXX finish! + raise NotImplementedError + + @unittest.expectedFailure + def test_show(self): + # XXX finish! + raise NotImplementedError diff --git a/Lib/test/test_tools/test_c_analyzer/test_cpython/test_supported.py b/Lib/test/test_tools/test_c_analyzer/test_cpython/test_supported.py new file mode 100644 index 0000000..a244b97 --- /dev/null +++ b/Lib/test/test_tools/test_c_analyzer/test_cpython/test_supported.py @@ -0,0 +1,98 @@ +import re +import textwrap +import unittest + +from .. import tool_imports_for_tests +with tool_imports_for_tests(): + from c_analyzer.common.info import ID + from c_analyzer.variables.info import Variable + from cpython.supported import ( + is_supported, ignored_from_file, + ) + + +class IsSupportedTests(unittest.TestCase): + + @unittest.expectedFailure + def test_supported(self): + statics = [ + Variable('src1/spam.c', None, 'var1', 'const char *'), + Variable('src1/spam.c', None, 'var1', 'int'), + ] + for static in statics: + with self.subTest(static): + result = is_supported(static) + + self.assertTrue(result) + + @unittest.expectedFailure + def test_not_supported(self): + statics = [ + Variable('src1/spam.c', None, 'var1', 'PyObject *'), + Variable('src1/spam.c', None, 'var1', 'PyObject[10]'), + ] + for static in statics: + with self.subTest(static): + result = is_supported(static) + + self.assertFalse(result) + + +class IgnoredFromFileTests(unittest.TestCase): + + maxDiff = None + + _return_read_tsv = () + + @property + def calls(self): + try: + return self._calls + except AttributeError: + self._calls = [] + return self._calls + + def _read_tsv(self, *args): + self.calls.append(('_read_tsv', args)) + return self._return_read_tsv + + def test_typical(self): + lines = textwrap.dedent(''' + filename funcname name kind reason + file1.c - var1 variable ... + file1.c func1 local1 variable | + file1.c - var2 variable ??? + file1.c func2 local2 variable | + file2.c - var1 variable reasons + ''').strip().splitlines() + lines = [re.sub(r'\s{1,8}', '\t', line, 4).replace('|', '') + for line in lines] + self._return_read_tsv = [tuple(v.strip() for v in line.split('\t')) + for line in lines[1:]] + + ignored = ignored_from_file('spam.c', _read_tsv=self._read_tsv) + + self.assertEqual(ignored, { + 'variables': { + ID('file1.c', '', 'var1'): '...', + ID('file1.c', 'func1', 'local1'): '', + ID('file1.c', '', 'var2'): '???', + ID('file1.c', 'func2', 'local2'): '', + ID('file2.c', '', 'var1'): 'reasons', + }, + }) + self.assertEqual(self.calls, [ + ('_read_tsv', ('spam.c', 'filename\tfuncname\tname\tkind\treason')), + ]) + + def test_empty(self): + self._return_read_tsv = [] + + ignored = ignored_from_file('spam.c', _read_tsv=self._read_tsv) + + self.assertEqual(ignored, { + 'variables': {}, + }) + self.assertEqual(self.calls, [ + ('_read_tsv', ('spam.c', 'filename\tfuncname\tname\tkind\treason')), + ]) diff --git a/Lib/test/test_tools/test_c_analyzer/test_parser/__init__.py b/Lib/test/test_tools/test_c_analyzer/test_parser/__init__.py new file mode 100644 index 0000000..bc502ef --- /dev/null +++ b/Lib/test/test_tools/test_c_analyzer/test_parser/__init__.py @@ -0,0 +1,6 @@ +import os.path +from test.support import load_package_tests + + +def load_tests(*args): + return load_package_tests(os.path.dirname(__file__), *args) diff --git a/Lib/test/test_tools/test_c_analyzer/test_parser/test_declarations.py b/Lib/test/test_tools/test_c_analyzer/test_parser/test_declarations.py new file mode 100644 index 0000000..674fcb1 --- /dev/null +++ b/Lib/test/test_tools/test_c_analyzer/test_parser/test_declarations.py @@ -0,0 +1,795 @@ +import textwrap +import unittest + +from .. import tool_imports_for_tests +with tool_imports_for_tests(): + from c_analyzer.parser.declarations import ( + iter_global_declarations, iter_local_statements, + parse_func, _parse_var, parse_compound, + iter_variables, + ) + + +class TestCaseBase(unittest.TestCase): + + maxDiff = None + + @property + def calls(self): + try: + return self._calls + except AttributeError: + self._calls = [] + return self._calls + + +class IterGlobalDeclarationsTests(TestCaseBase): + + def test_functions(self): + tests = [ + (textwrap.dedent(''' + void func1() { + return; + } + '''), + textwrap.dedent(''' + void func1() { + return; + } + ''').strip(), + ), + (textwrap.dedent(''' + static unsigned int * _func1( + const char *arg1, + int *arg2 + long long arg3 + ) + { + return _do_something(arg1, arg2, arg3); + } + '''), + textwrap.dedent(''' + static unsigned int * _func1( const char *arg1, int *arg2 long long arg3 ) { + return _do_something(arg1, arg2, arg3); + } + ''').strip(), + ), + (textwrap.dedent(''' + static PyObject * + _func1(const char *arg1, PyObject *arg2) + { + static int initialized = 0; + if (!initialized) { + initialized = 1; + _init(arg1); + } + + PyObject *result = _do_something(arg1, arg2); + Py_INCREF(result); + return result; + } + '''), + textwrap.dedent(''' + static PyObject * _func1(const char *arg1, PyObject *arg2) { + static int initialized = 0; + if (!initialized) { + initialized = 1; + _init(arg1); + } + PyObject *result = _do_something(arg1, arg2); + Py_INCREF(result); + return result; + } + ''').strip(), + ), + ] + for lines, expected in tests: + body = textwrap.dedent( + expected.partition('{')[2].rpartition('}')[0] + ).strip() + expected = (expected, body) + with self.subTest(lines): + lines = lines.splitlines() + + stmts = list(iter_global_declarations(lines)) + + self.assertEqual(stmts, [expected]) + + @unittest.expectedFailure + def test_declarations(self): + tests = [ + 'int spam;', + 'long long spam;', + 'static const int const *spam;', + 'int spam;', + 'typedef int myint;', + 'typedef PyObject * (*unaryfunc)(PyObject *);', + # typedef struct + # inline struct + # enum + # inline enum + ] + for text in tests: + expected = (text, + ' '.join(l.strip() for l in text.splitlines())) + with self.subTest(lines): + lines = lines.splitlines() + + stmts = list(iter_global_declarations(lines)) + + self.assertEqual(stmts, [expected]) + + @unittest.expectedFailure + def test_declaration_multiple_vars(self): + lines = ['static const int const *spam, *ham=NULL, eggs = 3;'] + + stmts = list(iter_global_declarations(lines)) + + self.assertEqual(stmts, [ + ('static const int const *spam;', None), + ('static const int *ham=NULL;', None), + ('static const int eggs = 3;', None), + ]) + + def test_mixed(self): + lines = textwrap.dedent(''' + int spam; + static const char const *eggs; + + PyObject * start(void) { + static int initialized = 0; + if (initialized) { + initialized = 1; + init(); + } + return _start(); + } + + char* ham; + + static int stop(char *reason) { + ham = reason; + return _stop(); + } + ''').splitlines() + expected = [ + (textwrap.dedent(''' + PyObject * start(void) { + static int initialized = 0; + if (initialized) { + initialized = 1; + init(); + } + return _start(); + } + ''').strip(), + textwrap.dedent(''' + static int initialized = 0; + if (initialized) { + initialized = 1; + init(); + } + return _start(); + ''').strip(), + ), + (textwrap.dedent(''' + static int stop(char *reason) { + ham = reason; + return _stop(); + } + ''').strip(), + textwrap.dedent(''' + ham = reason; + return _stop(); + ''').strip(), + ), + ] + + stmts = list(iter_global_declarations(lines)) + + self.assertEqual(stmts, expected) + #self.assertEqual([stmt for stmt, _ in stmts], + # [stmt for stmt, _ in expected]) + #self.assertEqual([body for _, body in stmts], + # [body for _, body in expected]) + + def test_no_statements(self): + lines = [] + + stmts = list(iter_global_declarations(lines)) + + self.assertEqual(stmts, []) + + def test_bogus(self): + tests = [ + (textwrap.dedent(''' + int spam; + static const char const *eggs; + + PyObject * start(void) { + static int initialized = 0; + if (initialized) { + initialized = 1; + init(); + } + return _start(); + } + + char* ham; + + static int _stop(void) { + // missing closing bracket + + static int stop(char *reason) { + ham = reason; + return _stop(); + } + '''), + [(textwrap.dedent(''' + PyObject * start(void) { + static int initialized = 0; + if (initialized) { + initialized = 1; + init(); + } + return _start(); + } + ''').strip(), + textwrap.dedent(''' + static int initialized = 0; + if (initialized) { + initialized = 1; + init(); + } + return _start(); + ''').strip(), + ), + # Neither "stop()" nor "_stop()" are here. + ], + ), + ] + for lines, expected in tests: + with self.subTest(lines): + lines = lines.splitlines() + + stmts = list(iter_global_declarations(lines)) + + self.assertEqual(stmts, expected) + #self.assertEqual([stmt for stmt, _ in stmts], + # [stmt for stmt, _ in expected]) + #self.assertEqual([body for _, body in stmts], + # [body for _, body in expected]) + + def test_ignore_comments(self): + tests = [ + ('// msg', None), + ('// int stmt;', None), + (' // ... ', None), + ('// /*', None), + ('/* int stmt; */', None), + (""" + /** + * ... + * int stmt; + */ + """, None), + ] + for lines, expected in tests: + with self.subTest(lines): + lines = lines.splitlines() + + stmts = list(iter_global_declarations(lines)) + + self.assertEqual(stmts, [expected] if expected else []) + + +class IterLocalStatementsTests(TestCaseBase): + + def test_vars(self): + tests = [ + # POTS + 'int spam;', + 'unsigned int spam;', + 'char spam;', + 'float spam;', + + # typedefs + 'uint spam;', + 'MyType spam;', + + # complex + 'struct myspam spam;', + 'union choice spam;', + # inline struct + # inline union + # enum? + ] + # pointers + tests.extend([ + # POTS + 'int * spam;', + 'unsigned int * spam;', + 'char *spam;', + 'char const *spam = "spamspamspam...";', + # typedefs + 'MyType *spam;', + # complex + 'struct myspam *spam;', + 'union choice *spam;', + # packed with details + 'const char const *spam;', + # void pointer + 'void *data = NULL;', + # function pointers + 'int (* func)(char *arg1);', + 'char * (* func)(void);', + ]) + # storage class + tests.extend([ + 'static int spam;', + 'extern int spam;', + 'static unsigned int spam;', + 'static struct myspam spam;', + ]) + # type qualifier + tests.extend([ + 'const int spam;', + 'const unsigned int spam;', + 'const struct myspam spam;', + ]) + # combined + tests.extend([ + 'const char *spam = eggs;', + 'static const char const *spam = "spamspamspam...";', + 'extern const char const *spam;', + 'static void *data = NULL;', + 'static int (const * func)(char *arg1) = func1;', + 'static char * (* func)(void);', + ]) + for line in tests: + expected = line + with self.subTest(line): + stmts = list(iter_local_statements([line])) + + self.assertEqual(stmts, [(expected, None)]) + + @unittest.expectedFailure + def test_vars_multiline_var(self): + lines = textwrap.dedent(''' + PyObject * + spam + = NULL; + ''').splitlines() + expected = 'PyObject * spam = NULL;' + + stmts = list(iter_local_statements(lines)) + + self.assertEqual(stmts, [(expected, None)]) + + @unittest.expectedFailure + def test_declaration_multiple_vars(self): + lines = ['static const int const *spam, *ham=NULL, ham2[]={1, 2, 3}, ham3[2]={1, 2}, eggs = 3;'] + + stmts = list(iter_global_declarations(lines)) + + self.assertEqual(stmts, [ + ('static const int const *spam;', None), + ('static const int *ham=NULL;', None), + ('static const int ham[]={1, 2, 3};', None), + ('static const int ham[2]={1, 2};', None), + ('static const int eggs = 3;', None), + ]) + + @unittest.expectedFailure + def test_other_simple(self): + raise NotImplementedError + + @unittest.expectedFailure + def test_compound(self): + raise NotImplementedError + + @unittest.expectedFailure + def test_mixed(self): + raise NotImplementedError + + def test_no_statements(self): + lines = [] + + stmts = list(iter_local_statements(lines)) + + self.assertEqual(stmts, []) + + @unittest.expectedFailure + def test_bogus(self): + raise NotImplementedError + + def test_ignore_comments(self): + tests = [ + ('// msg', None), + ('// int stmt;', None), + (' // ... ', None), + ('// /*', None), + ('/* int stmt; */', None), + (""" + /** + * ... + * int stmt; + */ + """, None), + # mixed with statements + ('int stmt; // ...', ('int stmt;', None)), + ( 'int stmt; /* ... */', ('int stmt;', None)), + ( '/* ... */ int stmt;', ('int stmt;', None)), + ] + for lines, expected in tests: + with self.subTest(lines): + lines = lines.splitlines() + + stmts = list(iter_local_statements(lines)) + + self.assertEqual(stmts, [expected] if expected else []) + + +class ParseFuncTests(TestCaseBase): + + def test_typical(self): + tests = [ + ('PyObject *\nspam(char *a)\n{\nreturn _spam(a);\n}', + 'return _spam(a);', + ('spam', 'PyObject * spam(char *a)'), + ), + ] + for stmt, body, expected in tests: + with self.subTest(stmt): + name, signature = parse_func(stmt, body) + + self.assertEqual((name, signature), expected) + + +class ParseVarTests(TestCaseBase): + + def test_typical(self): + tests = [ + # POTS + ('int spam;', ('spam', 'int')), + ('unsigned int spam;', ('spam', 'unsigned int')), + ('char spam;', ('spam', 'char')), + ('float spam;', ('spam', 'float')), + + # typedefs + ('uint spam;', ('spam', 'uint')), + ('MyType spam;', ('spam', 'MyType')), + + # complex + ('struct myspam spam;', ('spam', 'struct myspam')), + ('union choice spam;', ('spam', 'union choice')), + # inline struct + # inline union + # enum? + ] + # pointers + tests.extend([ + # POTS + ('int * spam;', ('spam', 'int *')), + ('unsigned int * spam;', ('spam', 'unsigned int *')), + ('char *spam;', ('spam', 'char *')), + ('char const *spam = "spamspamspam...";', ('spam', 'char const *')), + # typedefs + ('MyType *spam;', ('spam', 'MyType *')), + # complex + ('struct myspam *spam;', ('spam', 'struct myspam *')), + ('union choice *spam;', ('spam', 'union choice *')), + # packed with details + ('const char const *spam;', ('spam', 'const char const *')), + # void pointer + ('void *data = NULL;', ('data', 'void *')), + # function pointers + ('int (* func)(char *);', ('func', 'int (*)(char *)')), + ('char * (* func)(void);', ('func', 'char * (*)(void)')), + ]) + # storage class + tests.extend([ + ('static int spam;', ('spam', 'static int')), + ('extern int spam;', ('spam', 'extern int')), + ('static unsigned int spam;', ('spam', 'static unsigned int')), + ('static struct myspam spam;', ('spam', 'static struct myspam')), + ]) + # type qualifier + tests.extend([ + ('const int spam;', ('spam', 'const int')), + ('const unsigned int spam;', ('spam', 'const unsigned int')), + ('const struct myspam spam;', ('spam', 'const struct myspam')), + ]) + # combined + tests.extend([ + ('const char *spam = eggs;', ('spam', 'const char *')), + ('static const char const *spam = "spamspamspam...";', + ('spam', 'static const char const *')), + ('extern const char const *spam;', + ('spam', 'extern const char const *')), + ('static void *data = NULL;', ('data', 'static void *')), + ('static int (const * func)(char *) = func1;', + ('func', 'static int (const *)(char *)')), + ('static char * (* func)(void);', + ('func', 'static char * (*)(void)')), + ]) + for stmt, expected in tests: + with self.subTest(stmt): + name, vartype = _parse_var(stmt) + + self.assertEqual((name, vartype), expected) + + +@unittest.skip('not finished') +class ParseCompoundTests(TestCaseBase): + + def test_typical(self): + headers, bodies = parse_compound(stmt, blocks) + ... + + +class IterVariablesTests(TestCaseBase): + + _return_iter_source_lines = None + _return_iter_global = None + _return_iter_local = None + _return_parse_func = None + _return_parse_var = None + _return_parse_compound = None + + def _iter_source_lines(self, filename): + self.calls.append( + ('_iter_source_lines', (filename,))) + return self._return_iter_source_lines.splitlines() + + def _iter_global(self, lines): + self.calls.append( + ('_iter_global', (lines,))) + try: + return self._return_iter_global.pop(0) + except IndexError: + return ('???', None) + + def _iter_local(self, lines): + self.calls.append( + ('_iter_local', (lines,))) + try: + return self._return_iter_local.pop(0) + except IndexError: + return ('???', None) + + def _parse_func(self, stmt, body): + self.calls.append( + ('_parse_func', (stmt, body))) + try: + return self._return_parse_func.pop(0) + except IndexError: + return ('???', '???') + + def _parse_var(self, lines): + self.calls.append( + ('_parse_var', (lines,))) + try: + return self._return_parse_var.pop(0) + except IndexError: + return ('???', '???') + + def _parse_compound(self, stmt, blocks): + self.calls.append( + ('_parse_compound', (stmt, blocks))) + try: + return self._return_parse_compound.pop(0) + except IndexError: + return (['???'], ['???']) + + def test_empty_file(self): + self._return_iter_source_lines = '' + self._return_iter_global = [ + [], + ] + self._return_parse_func = None + self._return_parse_var = None + self._return_parse_compound = None + + srcvars = list(iter_variables('spam.c', + _iter_source_lines=self._iter_source_lines, + _iter_global=self._iter_global, + _iter_local=self._iter_local, + _parse_func=self._parse_func, + _parse_var=self._parse_var, + _parse_compound=self._parse_compound, + )) + + self.assertEqual(srcvars, []) + self.assertEqual(self.calls, [ + ('_iter_source_lines', ('spam.c',)), + ('_iter_global', ([],)), + ]) + + def test_no_statements(self): + content = textwrap.dedent(''' + ... + ''') + self._return_iter_source_lines = content + self._return_iter_global = [ + [], + ] + self._return_parse_func = None + self._return_parse_var = None + self._return_parse_compound = None + + srcvars = list(iter_variables('spam.c', + _iter_source_lines=self._iter_source_lines, + _iter_global=self._iter_global, + _iter_local=self._iter_local, + _parse_func=self._parse_func, + _parse_var=self._parse_var, + _parse_compound=self._parse_compound, + )) + + self.assertEqual(srcvars, []) + self.assertEqual(self.calls, [ + ('_iter_source_lines', ('spam.c',)), + ('_iter_global', (content.splitlines(),)), + ]) + + def test_typical(self): + content = textwrap.dedent(''' + ... + ''') + self._return_iter_source_lines = content + self._return_iter_global = [ + [('', None), # var1 + ('', None), # non-var + ('', None), # var2 + ('', ''), # func1 + ('', None), # var4 + ], + ] + self._return_iter_local = [ + # func1 + [('', None), # var3 + ('', [('
', '')]), # if + ('', None), # non-var + ], + # if + [('', None), # var2 ("collision" with global var) + ], + ] + self._return_parse_func = [ + ('func1', ''), + ] + self._return_parse_var = [ + ('var1', ''), + (None, None), + ('var2', ''), + ('var3', ''), + ('var2', ''), + ('var4', ''), + (None, None), + (None, None), + (None, None), + ('var5', ''), + ] + self._return_parse_compound = [ + ([[ + 'if (', + '', + ')', + ], + ], + ['']), + ] + + srcvars = list(iter_variables('spam.c', + _iter_source_lines=self._iter_source_lines, + _iter_global=self._iter_global, + _iter_local=self._iter_local, + _parse_func=self._parse_func, + _parse_var=self._parse_var, + _parse_compound=self._parse_compound, + )) + + self.assertEqual(srcvars, [ + (None, 'var1', ''), + (None, 'var2', ''), + ('func1', 'var3', ''), + ('func1', 'var2', ''), + ('func1', 'var4', ''), + (None, 'var5', ''), + ]) + self.assertEqual(self.calls, [ + ('_iter_source_lines', ('spam.c',)), + ('_iter_global', (content.splitlines(),)), + ('_parse_var', ('',)), + ('_parse_var', ('',)), + ('_parse_var', ('',)), + ('_parse_func', ('', '')), + ('_iter_local', ([''],)), + ('_parse_var', ('',)), + ('_parse_compound', ('', [('
', '')])), + ('_parse_var', ('if (',)), + ('_parse_var', ('',)), + ('_parse_var', (')',)), + ('_parse_var', ('',)), + ('_iter_local', ([''],)), + ('_parse_var', ('',)), + ('_parse_var', ('',)), + ]) + + def test_no_locals(self): + content = textwrap.dedent(''' + ... + ''') + self._return_iter_source_lines = content + self._return_iter_global = [ + [('', None), # var1 + ('', None), # non-var + ('', None), # var2 + ('', ''), # func1 + ], + ] + self._return_iter_local = [ + # func1 + [('', None), # non-var + ('', [('
', '')]), # if + ('', None), # non-var + ], + # if + [('', None), # non-var + ], + ] + self._return_parse_func = [ + ('func1', ''), + ] + self._return_parse_var = [ + ('var1', ''), + (None, None), + ('var2', ''), + (None, None), + (None, None), + (None, None), + (None, None), + (None, None), + (None, None), + ] + self._return_parse_compound = [ + ([[ + 'if (', + '', + ')', + ], + ], + ['']), + ] + + srcvars = list(iter_variables('spam.c', + _iter_source_lines=self._iter_source_lines, + _iter_global=self._iter_global, + _iter_local=self._iter_local, + _parse_func=self._parse_func, + _parse_var=self._parse_var, + _parse_compound=self._parse_compound, + )) + + self.assertEqual(srcvars, [ + (None, 'var1', ''), + (None, 'var2', ''), + ]) + self.assertEqual(self.calls, [ + ('_iter_source_lines', ('spam.c',)), + ('_iter_global', (content.splitlines(),)), + ('_parse_var', ('',)), + ('_parse_var', ('',)), + ('_parse_var', ('',)), + ('_parse_func', ('', '')), + ('_iter_local', ([''],)), + ('_parse_var', ('',)), + ('_parse_compound', ('', [('
', '')])), + ('_parse_var', ('if (',)), + ('_parse_var', ('',)), + ('_parse_var', (')',)), + ('_parse_var', ('',)), + ('_iter_local', ([''],)), + ('_parse_var', ('',)), + ]) diff --git a/Lib/test/test_tools/test_c_analyzer/test_parser/test_preprocessor.py b/Lib/test/test_tools/test_c_analyzer/test_parser/test_preprocessor.py new file mode 100644 index 0000000..56a1c9c --- /dev/null +++ b/Lib/test/test_tools/test_c_analyzer/test_parser/test_preprocessor.py @@ -0,0 +1,1562 @@ +import itertools +import textwrap +import unittest +import sys + +from ..util import wrapped_arg_combos, StrProxy +from .. import tool_imports_for_tests +with tool_imports_for_tests(): + from c_analyzer.parser.preprocessor import ( + iter_lines, + # directives + parse_directive, PreprocessorDirective, + Constant, Macro, IfDirective, Include, OtherDirective, + ) + + +class TestCaseBase(unittest.TestCase): + + maxDiff = None + + def reset(self): + self._calls = [] + self.errors = None + + @property + def calls(self): + try: + return self._calls + except AttributeError: + self._calls = [] + return self._calls + + errors = None + + def try_next_exc(self): + if not self.errors: + return + if exc := self.errors.pop(0): + raise exc + + def check_calls(self, *expected): + self.assertEqual(self.calls, list(expected)) + self.assertEqual(self.errors or [], []) + + +class IterLinesTests(TestCaseBase): + + parsed = None + + def check_calls(self, *expected): + super().check_calls(*expected) + self.assertEqual(self.parsed or [], []) + + def _parse_directive(self, line): + self.calls.append( + ('_parse_directive', line)) + self.try_next_exc() + return self.parsed.pop(0) + + def test_no_lines(self): + lines = [] + + results = list( + iter_lines(lines, _parse_directive=self._parse_directive)) + + self.assertEqual(results, []) + self.check_calls() + + def test_no_directives(self): + lines = textwrap.dedent(''' + + // xyz + typedef enum { + SPAM + EGGS + } kind; + + struct info { + kind kind; + int status; + }; + + typedef struct spam { + struct info info; + } myspam; + + static int spam = 0; + + /** + * ... + */ + static char * + get_name(int arg, + char *default, + ) + { + return default + } + + int check(void) { + return 0; + } + + ''')[1:-1].splitlines() + expected = [(lno, line, None, ()) + for lno, line in enumerate(lines, 1)] + expected[1] = (2, ' ', None, ()) + expected[20] = (21, ' ', None, ()) + del expected[19] + del expected[18] + + results = list( + iter_lines(lines, _parse_directive=self._parse_directive)) + + self.assertEqual(results, expected) + self.check_calls() + + def test_single_directives(self): + tests = [ + ('#include ', Include('')), + ('#define SPAM 1', Constant('SPAM', '1')), + ('#define SPAM() 1', Macro('SPAM', (), '1')), + ('#define SPAM(a, b) a = b;', Macro('SPAM', ('a', 'b'), 'a = b;')), + ('#if defined(SPAM)', IfDirective('if', 'defined(SPAM)')), + ('#ifdef SPAM', IfDirective('ifdef', 'SPAM')), + ('#ifndef SPAM', IfDirective('ifndef', 'SPAM')), + ('#elseif defined(SPAM)', IfDirective('elseif', 'defined(SPAM)')), + ('#else', OtherDirective('else', None)), + ('#endif', OtherDirective('endif', None)), + ('#error ...', OtherDirective('error', '...')), + ('#warning ...', OtherDirective('warning', '...')), + ('#__FILE__ ...', OtherDirective('__FILE__', '...')), + ('#__LINE__ ...', OtherDirective('__LINE__', '...')), + ('#__DATE__ ...', OtherDirective('__DATE__', '...')), + ('#__TIME__ ...', OtherDirective('__TIME__', '...')), + ('#__TIMESTAMP__ ...', OtherDirective('__TIMESTAMP__', '...')), + ] + for line, directive in tests: + with self.subTest(line): + self.reset() + self.parsed = [ + directive, + ] + text = textwrap.dedent(''' + static int spam = 0; + {} + static char buffer[256]; + ''').strip().format(line) + lines = text.strip().splitlines() + + results = list( + iter_lines(lines, _parse_directive=self._parse_directive)) + + self.assertEqual(results, [ + (1, 'static int spam = 0;', None, ()), + (2, line, directive, ()), + ((3, 'static char buffer[256];', None, ('defined(SPAM)',)) + if directive.kind in ('if', 'ifdef', 'elseif') + else (3, 'static char buffer[256];', None, ('! defined(SPAM)',)) + if directive.kind == 'ifndef' + else (3, 'static char buffer[256];', None, ())), + ]) + self.check_calls( + ('_parse_directive', line), + ) + + def test_directive_whitespace(self): + line = ' # define eggs ( a , b ) { a = b ; } ' + directive = Macro('eggs', ('a', 'b'), '{ a = b; }') + self.parsed = [ + directive, + ] + lines = [line] + + results = list( + iter_lines(lines, _parse_directive=self._parse_directive)) + + self.assertEqual(results, [ + (1, line, directive, ()), + ]) + self.check_calls( + ('_parse_directive', '#define eggs ( a , b ) { a = b ; }'), + ) + + @unittest.skipIf(sys.platform == 'win32', 'needs fix under Windows') + def test_split_lines(self): + directive = Macro('eggs', ('a', 'b'), '{ a = b; }') + self.parsed = [ + directive, + ] + text = textwrap.dedent(r''' + static int spam = 0; + #define eggs(a, b) \ + { \ + a = b; \ + } + static char buffer[256]; + ''').strip() + lines = [line + '\n' for line in text.splitlines()] + lines[-1] = lines[-1][:-1] + + results = list( + iter_lines(lines, _parse_directive=self._parse_directive)) + + self.assertEqual(results, [ + (1, 'static int spam = 0;\n', None, ()), + (5, '#define eggs(a, b) { a = b; }\n', directive, ()), + (6, 'static char buffer[256];', None, ()), + ]) + self.check_calls( + ('_parse_directive', '#define eggs(a, b) { a = b; }'), + ) + + def test_nested_conditions(self): + directives = [ + IfDirective('ifdef', 'SPAM'), + IfDirective('if', 'SPAM == 1'), + IfDirective('elseif', 'SPAM == 2'), + OtherDirective('else', None), + OtherDirective('endif', None), + OtherDirective('endif', None), + ] + self.parsed = list(directives) + text = textwrap.dedent(r''' + static int spam = 0; + + #ifdef SPAM + static int start = 0; + # if SPAM == 1 + static char buffer[10]; + # elif SPAM == 2 + static char buffer[100]; + # else + static char buffer[256]; + # endif + static int end = 0; + #endif + + static int eggs = 0; + ''').strip() + lines = [line for line in text.splitlines() if line.strip()] + + results = list( + iter_lines(lines, _parse_directive=self._parse_directive)) + + self.assertEqual(results, [ + (1, 'static int spam = 0;', None, ()), + (2, '#ifdef SPAM', directives[0], ()), + (3, 'static int start = 0;', None, ('defined(SPAM)',)), + (4, '# if SPAM == 1', directives[1], ('defined(SPAM)',)), + (5, 'static char buffer[10];', None, ('defined(SPAM)', 'SPAM == 1')), + (6, '# elif SPAM == 2', directives[2], ('defined(SPAM)', 'SPAM == 1')), + (7, 'static char buffer[100];', None, ('defined(SPAM)', '! (SPAM == 1)', 'SPAM == 2')), + (8, '# else', directives[3], ('defined(SPAM)', '! (SPAM == 1)', 'SPAM == 2')), + (9, 'static char buffer[256];', None, ('defined(SPAM)', '! (SPAM == 1)', '! (SPAM == 2)')), + (10, '# endif', directives[4], ('defined(SPAM)', '! (SPAM == 1)', '! (SPAM == 2)')), + (11, 'static int end = 0;', None, ('defined(SPAM)',)), + (12, '#endif', directives[5], ('defined(SPAM)',)), + (13, 'static int eggs = 0;', None, ()), + ]) + self.check_calls( + ('_parse_directive', '#ifdef SPAM'), + ('_parse_directive', '#if SPAM == 1'), + ('_parse_directive', '#elif SPAM == 2'), + ('_parse_directive', '#else'), + ('_parse_directive', '#endif'), + ('_parse_directive', '#endif'), + ) + + def test_split_blocks(self): + directives = [ + IfDirective('ifdef', 'SPAM'), + OtherDirective('else', None), + OtherDirective('endif', None), + ] + self.parsed = list(directives) + text = textwrap.dedent(r''' + void str_copy(char *buffer, *orig); + + int init(char *name) { + static int initialized = 0; + if (initialized) { + return 0; + } + #ifdef SPAM + static char buffer[10]; + str_copy(buffer, char); + } + + void copy(char *buffer, *orig) { + strncpy(buffer, orig, 9); + buffer[9] = 0; + } + + #else + static char buffer[256]; + str_copy(buffer, char); + } + + void copy(char *buffer, *orig) { + strcpy(buffer, orig); + } + + #endif + ''').strip() + lines = [line for line in text.splitlines() if line.strip()] + + results = list( + iter_lines(lines, _parse_directive=self._parse_directive)) + + self.assertEqual(results, [ + (1, 'void str_copy(char *buffer, *orig);', None, ()), + (2, 'int init(char *name) {', None, ()), + (3, ' static int initialized = 0;', None, ()), + (4, ' if (initialized) {', None, ()), + (5, ' return 0;', None, ()), + (6, ' }', None, ()), + + (7, '#ifdef SPAM', directives[0], ()), + + (8, ' static char buffer[10];', None, ('defined(SPAM)',)), + (9, ' str_copy(buffer, char);', None, ('defined(SPAM)',)), + (10, '}', None, ('defined(SPAM)',)), + (11, 'void copy(char *buffer, *orig) {', None, ('defined(SPAM)',)), + (12, ' strncpy(buffer, orig, 9);', None, ('defined(SPAM)',)), + (13, ' buffer[9] = 0;', None, ('defined(SPAM)',)), + (14, '}', None, ('defined(SPAM)',)), + + (15, '#else', directives[1], ('defined(SPAM)',)), + + (16, ' static char buffer[256];', None, ('! (defined(SPAM))',)), + (17, ' str_copy(buffer, char);', None, ('! (defined(SPAM))',)), + (18, '}', None, ('! (defined(SPAM))',)), + (19, 'void copy(char *buffer, *orig) {', None, ('! (defined(SPAM))',)), + (20, ' strcpy(buffer, orig);', None, ('! (defined(SPAM))',)), + (21, '}', None, ('! (defined(SPAM))',)), + + (22, '#endif', directives[2], ('! (defined(SPAM))',)), + ]) + self.check_calls( + ('_parse_directive', '#ifdef SPAM'), + ('_parse_directive', '#else'), + ('_parse_directive', '#endif'), + ) + + @unittest.skipIf(sys.platform == 'win32', 'needs fix under Windows') + def test_basic(self): + directives = [ + Include(''), + IfDirective('ifdef', 'SPAM'), + IfDirective('if', '! defined(HAM) || !HAM'), + Constant('HAM', '0'), + IfDirective('elseif', 'HAM < 0'), + Constant('HAM', '-1'), + OtherDirective('else', None), + OtherDirective('endif', None), + OtherDirective('endif', None), + IfDirective('if', 'defined(HAM) && (HAM < 0 || ! HAM)'), + OtherDirective('undef', 'HAM'), + OtherDirective('endif', None), + IfDirective('ifndef', 'HAM'), + OtherDirective('endif', None), + ] + self.parsed = list(directives) + text = textwrap.dedent(r''' + #include + print("begin"); + #ifdef SPAM + print("spam"); + #if ! defined(HAM) || !HAM + # DEFINE HAM 0 + #elseif HAM < 0 + # DEFINE HAM -1 + #else + print("ham HAM"); + #endif + #endif + + #if defined(HAM) && \ + (HAM < 0 || ! HAM) + print("ham?"); + #undef HAM + # endif + + #ifndef HAM + print("no ham"); + #endif + print("end"); + ''')[1:-1] + lines = [line + '\n' for line in text.splitlines()] + lines[-1] = lines[-1][:-1] + + results = list( + iter_lines(lines, _parse_directive=self._parse_directive)) + + self.assertEqual(results, [ + (1, '#include \n', Include(''), ()), + (2, 'print("begin");\n', None, ()), + # + (3, '#ifdef SPAM\n', + IfDirective('ifdef', 'SPAM'), + ()), + (4, ' print("spam");\n', + None, + ('defined(SPAM)',)), + (5, ' #if ! defined(HAM) || !HAM\n', + IfDirective('if', '! defined(HAM) || !HAM'), + ('defined(SPAM)',)), + (6, '# DEFINE HAM 0\n', + Constant('HAM', '0'), + ('defined(SPAM)', '! defined(HAM) || !HAM')), + (7, ' #elseif HAM < 0\n', + IfDirective('elseif', 'HAM < 0'), + ('defined(SPAM)', '! defined(HAM) || !HAM')), + (8, '# DEFINE HAM -1\n', + Constant('HAM', '-1'), + ('defined(SPAM)', '! (! defined(HAM) || !HAM)', 'HAM < 0')), + (9, ' #else\n', + OtherDirective('else', None), + ('defined(SPAM)', '! (! defined(HAM) || !HAM)', 'HAM < 0')), + (10, ' print("ham HAM");\n', + None, + ('defined(SPAM)', '! (! defined(HAM) || !HAM)', '! (HAM < 0)')), + (11, ' #endif\n', + OtherDirective('endif', None), + ('defined(SPAM)', '! (! defined(HAM) || !HAM)', '! (HAM < 0)')), + (12, '#endif\n', + OtherDirective('endif', None), + ('defined(SPAM)',)), + # + (13, '\n', None, ()), + # + (15, '#if defined(HAM) && (HAM < 0 || ! HAM)\n', + IfDirective('if', 'defined(HAM) && (HAM < 0 || ! HAM)'), + ()), + (16, ' print("ham?");\n', + None, + ('defined(HAM) && (HAM < 0 || ! HAM)',)), + (17, ' #undef HAM\n', + OtherDirective('undef', 'HAM'), + ('defined(HAM) && (HAM < 0 || ! HAM)',)), + (18, '# endif\n', + OtherDirective('endif', None), + ('defined(HAM) && (HAM < 0 || ! HAM)',)), + # + (19, '\n', None, ()), + # + (20, '#ifndef HAM\n', + IfDirective('ifndef', 'HAM'), + ()), + (21, ' print("no ham");\n', + None, + ('! defined(HAM)',)), + (22, '#endif\n', + OtherDirective('endif', None), + ('! defined(HAM)',)), + # + (23, 'print("end");', None, ()), + ]) + + @unittest.skipIf(sys.platform == 'win32', 'needs fix under Windows') + def test_typical(self): + # We use Include/compile.h from commit 66c4f3f38b86. It has + # a good enough mix of code without being too large. + directives = [ + IfDirective('ifndef', 'Py_COMPILE_H'), + Constant('Py_COMPILE_H', None), + + IfDirective('ifndef', 'Py_LIMITED_API'), + + Include('"code.h"'), + + IfDirective('ifdef', '__cplusplus'), + OtherDirective('endif', None), + + Constant('PyCF_MASK', '(CO_FUTURE_DIVISION | CO_FUTURE_ABSOLUTE_IMPORT | CO_FUTURE_WITH_STATEMENT | CO_FUTURE_PRINT_FUNCTION | CO_FUTURE_UNICODE_LITERALS | CO_FUTURE_BARRY_AS_BDFL | CO_FUTURE_GENERATOR_STOP | CO_FUTURE_ANNOTATIONS)'), + Constant('PyCF_MASK_OBSOLETE', '(CO_NESTED)'), + Constant('PyCF_SOURCE_IS_UTF8', ' 0x0100'), + Constant('PyCF_DONT_IMPLY_DEDENT', '0x0200'), + Constant('PyCF_ONLY_AST', '0x0400'), + Constant('PyCF_IGNORE_COOKIE', '0x0800'), + Constant('PyCF_TYPE_COMMENTS', '0x1000'), + Constant('PyCF_ALLOW_TOP_LEVEL_AWAIT', '0x2000'), + + IfDirective('ifndef', 'Py_LIMITED_API'), + OtherDirective('endif', None), + + Constant('FUTURE_NESTED_SCOPES', '"nested_scopes"'), + Constant('FUTURE_GENERATORS', '"generators"'), + Constant('FUTURE_DIVISION', '"division"'), + Constant('FUTURE_ABSOLUTE_IMPORT', '"absolute_import"'), + Constant('FUTURE_WITH_STATEMENT', '"with_statement"'), + Constant('FUTURE_PRINT_FUNCTION', '"print_function"'), + Constant('FUTURE_UNICODE_LITERALS', '"unicode_literals"'), + Constant('FUTURE_BARRY_AS_BDFL', '"barry_as_FLUFL"'), + Constant('FUTURE_GENERATOR_STOP', '"generator_stop"'), + Constant('FUTURE_ANNOTATIONS', '"annotations"'), + + Macro('PyAST_Compile', ('mod', 's', 'f', 'ar'), 'PyAST_CompileEx(mod, s, f, -1, ar)'), + + Constant('PY_INVALID_STACK_EFFECT', 'INT_MAX'), + + IfDirective('ifdef', '__cplusplus'), + OtherDirective('endif', None), + + OtherDirective('endif', None), # ifndef Py_LIMITED_API + + Constant('Py_single_input', '256'), + Constant('Py_file_input', '257'), + Constant('Py_eval_input', '258'), + Constant('Py_func_type_input', '345'), + + OtherDirective('endif', None), # ifndef Py_COMPILE_H + ] + self.parsed = list(directives) + text = textwrap.dedent(r''' + #ifndef Py_COMPILE_H + #define Py_COMPILE_H + + #ifndef Py_LIMITED_API + #include "code.h" + + #ifdef __cplusplus + extern "C" { + #endif + + /* Public interface */ + struct _node; /* Declare the existence of this type */ + PyAPI_FUNC(PyCodeObject *) PyNode_Compile(struct _node *, const char *); + /* XXX (ncoghlan): Unprefixed type name in a public API! */ + + #define PyCF_MASK (CO_FUTURE_DIVISION | CO_FUTURE_ABSOLUTE_IMPORT | \ + CO_FUTURE_WITH_STATEMENT | CO_FUTURE_PRINT_FUNCTION | \ + CO_FUTURE_UNICODE_LITERALS | CO_FUTURE_BARRY_AS_BDFL | \ + CO_FUTURE_GENERATOR_STOP | CO_FUTURE_ANNOTATIONS) + #define PyCF_MASK_OBSOLETE (CO_NESTED) + #define PyCF_SOURCE_IS_UTF8 0x0100 + #define PyCF_DONT_IMPLY_DEDENT 0x0200 + #define PyCF_ONLY_AST 0x0400 + #define PyCF_IGNORE_COOKIE 0x0800 + #define PyCF_TYPE_COMMENTS 0x1000 + #define PyCF_ALLOW_TOP_LEVEL_AWAIT 0x2000 + + #ifndef Py_LIMITED_API + typedef struct { + int cf_flags; /* bitmask of CO_xxx flags relevant to future */ + int cf_feature_version; /* minor Python version (PyCF_ONLY_AST) */ + } PyCompilerFlags; + #endif + + /* Future feature support */ + + typedef struct { + int ff_features; /* flags set by future statements */ + int ff_lineno; /* line number of last future statement */ + } PyFutureFeatures; + + #define FUTURE_NESTED_SCOPES "nested_scopes" + #define FUTURE_GENERATORS "generators" + #define FUTURE_DIVISION "division" + #define FUTURE_ABSOLUTE_IMPORT "absolute_import" + #define FUTURE_WITH_STATEMENT "with_statement" + #define FUTURE_PRINT_FUNCTION "print_function" + #define FUTURE_UNICODE_LITERALS "unicode_literals" + #define FUTURE_BARRY_AS_BDFL "barry_as_FLUFL" + #define FUTURE_GENERATOR_STOP "generator_stop" + #define FUTURE_ANNOTATIONS "annotations" + + struct _mod; /* Declare the existence of this type */ + #define PyAST_Compile(mod, s, f, ar) PyAST_CompileEx(mod, s, f, -1, ar) + PyAPI_FUNC(PyCodeObject *) PyAST_CompileEx( + struct _mod *mod, + const char *filename, /* decoded from the filesystem encoding */ + PyCompilerFlags *flags, + int optimize, + PyArena *arena); + PyAPI_FUNC(PyCodeObject *) PyAST_CompileObject( + struct _mod *mod, + PyObject *filename, + PyCompilerFlags *flags, + int optimize, + PyArena *arena); + PyAPI_FUNC(PyFutureFeatures *) PyFuture_FromAST( + struct _mod * mod, + const char *filename /* decoded from the filesystem encoding */ + ); + PyAPI_FUNC(PyFutureFeatures *) PyFuture_FromASTObject( + struct _mod * mod, + PyObject *filename + ); + + /* _Py_Mangle is defined in compile.c */ + PyAPI_FUNC(PyObject*) _Py_Mangle(PyObject *p, PyObject *name); + + #define PY_INVALID_STACK_EFFECT INT_MAX + PyAPI_FUNC(int) PyCompile_OpcodeStackEffect(int opcode, int oparg); + PyAPI_FUNC(int) PyCompile_OpcodeStackEffectWithJump(int opcode, int oparg, int jump); + + PyAPI_FUNC(int) _PyAST_Optimize(struct _mod *, PyArena *arena, int optimize); + + #ifdef __cplusplus + } + #endif + + #endif /* !Py_LIMITED_API */ + + /* These definitions must match corresponding definitions in graminit.h. */ + #define Py_single_input 256 + #define Py_file_input 257 + #define Py_eval_input 258 + #define Py_func_type_input 345 + + #endif /* !Py_COMPILE_H */ + ''').strip() + lines = [line + '\n' for line in text.splitlines()] + lines[-1] = lines[-1][:-1] + + results = list( + iter_lines(lines, _parse_directive=self._parse_directive)) + + self.assertEqual(results, [ + (1, '#ifndef Py_COMPILE_H\n', + IfDirective('ifndef', 'Py_COMPILE_H'), + ()), + (2, '#define Py_COMPILE_H\n', + Constant('Py_COMPILE_H', None), + ('! defined(Py_COMPILE_H)',)), + (3, '\n', + None, + ('! defined(Py_COMPILE_H)',)), + (4, '#ifndef Py_LIMITED_API\n', + IfDirective('ifndef', 'Py_LIMITED_API'), + ('! defined(Py_COMPILE_H)',)), + (5, '#include "code.h"\n', + Include('"code.h"'), + ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), + (6, '\n', + None, + ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), + (7, '#ifdef __cplusplus\n', + IfDirective('ifdef', '__cplusplus'), + ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), + (8, 'extern "C" {\n', + None, + ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)', 'defined(__cplusplus)')), + (9, '#endif\n', + OtherDirective('endif', None), + ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)', 'defined(__cplusplus)')), + (10, '\n', + None, + ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), + (11, ' \n', + None, + ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), + (12, 'struct _node; \n', + None, + ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), + (13, 'PyAPI_FUNC(PyCodeObject *) PyNode_Compile(struct _node *, const char *);\n', + None, + ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), + (14, ' \n', + None, + ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), + (15, '\n', + None, + ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), + (19, '#define PyCF_MASK (CO_FUTURE_DIVISION | CO_FUTURE_ABSOLUTE_IMPORT | CO_FUTURE_WITH_STATEMENT | CO_FUTURE_PRINT_FUNCTION | CO_FUTURE_UNICODE_LITERALS | CO_FUTURE_BARRY_AS_BDFL | CO_FUTURE_GENERATOR_STOP | CO_FUTURE_ANNOTATIONS)\n', + Constant('PyCF_MASK', '(CO_FUTURE_DIVISION | CO_FUTURE_ABSOLUTE_IMPORT | CO_FUTURE_WITH_STATEMENT | CO_FUTURE_PRINT_FUNCTION | CO_FUTURE_UNICODE_LITERALS | CO_FUTURE_BARRY_AS_BDFL | CO_FUTURE_GENERATOR_STOP | CO_FUTURE_ANNOTATIONS)'), + ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), + (20, '#define PyCF_MASK_OBSOLETE (CO_NESTED)\n', + Constant('PyCF_MASK_OBSOLETE', '(CO_NESTED)'), + ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), + (21, '#define PyCF_SOURCE_IS_UTF8 0x0100\n', + Constant('PyCF_SOURCE_IS_UTF8', ' 0x0100'), + ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), + (22, '#define PyCF_DONT_IMPLY_DEDENT 0x0200\n', + Constant('PyCF_DONT_IMPLY_DEDENT', '0x0200'), + ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), + (23, '#define PyCF_ONLY_AST 0x0400\n', + Constant('PyCF_ONLY_AST', '0x0400'), + ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), + (24, '#define PyCF_IGNORE_COOKIE 0x0800\n', + Constant('PyCF_IGNORE_COOKIE', '0x0800'), + ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), + (25, '#define PyCF_TYPE_COMMENTS 0x1000\n', + Constant('PyCF_TYPE_COMMENTS', '0x1000'), + ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), + (26, '#define PyCF_ALLOW_TOP_LEVEL_AWAIT 0x2000\n', + Constant('PyCF_ALLOW_TOP_LEVEL_AWAIT', '0x2000'), + ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), + (27, '\n', + None, + ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), + (28, '#ifndef Py_LIMITED_API\n', + IfDirective('ifndef', 'Py_LIMITED_API'), + ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), + (29, 'typedef struct {\n', + None, + ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)', '! defined(Py_LIMITED_API)')), + (30, ' int cf_flags; \n', + None, + ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)', '! defined(Py_LIMITED_API)')), + (31, ' int cf_feature_version; \n', + None, + ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)', '! defined(Py_LIMITED_API)')), + (32, '} PyCompilerFlags;\n', + None, + ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)', '! defined(Py_LIMITED_API)')), + (33, '#endif\n', + OtherDirective('endif', None), + ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)', '! defined(Py_LIMITED_API)')), + (34, '\n', + None, + ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), + (35, ' \n', + None, + ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), + (36, '\n', + None, + ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), + (37, 'typedef struct {\n', + None, + ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), + (38, ' int ff_features; \n', + None, + ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), + (39, ' int ff_lineno; \n', + None, + ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), + (40, '} PyFutureFeatures;\n', + None, + ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), + (41, '\n', + None, + ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), + (42, '#define FUTURE_NESTED_SCOPES "nested_scopes"\n', + Constant('FUTURE_NESTED_SCOPES', '"nested_scopes"'), + ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), + (43, '#define FUTURE_GENERATORS "generators"\n', + Constant('FUTURE_GENERATORS', '"generators"'), + ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), + (44, '#define FUTURE_DIVISION "division"\n', + Constant('FUTURE_DIVISION', '"division"'), + ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), + (45, '#define FUTURE_ABSOLUTE_IMPORT "absolute_import"\n', + Constant('FUTURE_ABSOLUTE_IMPORT', '"absolute_import"'), + ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), + (46, '#define FUTURE_WITH_STATEMENT "with_statement"\n', + Constant('FUTURE_WITH_STATEMENT', '"with_statement"'), + ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), + (47, '#define FUTURE_PRINT_FUNCTION "print_function"\n', + Constant('FUTURE_PRINT_FUNCTION', '"print_function"'), + ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), + (48, '#define FUTURE_UNICODE_LITERALS "unicode_literals"\n', + Constant('FUTURE_UNICODE_LITERALS', '"unicode_literals"'), + ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), + (49, '#define FUTURE_BARRY_AS_BDFL "barry_as_FLUFL"\n', + Constant('FUTURE_BARRY_AS_BDFL', '"barry_as_FLUFL"'), + ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), + (50, '#define FUTURE_GENERATOR_STOP "generator_stop"\n', + Constant('FUTURE_GENERATOR_STOP', '"generator_stop"'), + ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), + (51, '#define FUTURE_ANNOTATIONS "annotations"\n', + Constant('FUTURE_ANNOTATIONS', '"annotations"'), + ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), + (52, '\n', + None, + ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), + (53, 'struct _mod; \n', + None, + ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), + (54, '#define PyAST_Compile(mod, s, f, ar) PyAST_CompileEx(mod, s, f, -1, ar)\n', + Macro('PyAST_Compile', ('mod', 's', 'f', 'ar'), 'PyAST_CompileEx(mod, s, f, -1, ar)'), + ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), + (55, 'PyAPI_FUNC(PyCodeObject *) PyAST_CompileEx(\n', + None, + ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), + (56, ' struct _mod *mod,\n', + None, + ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), + (57, ' const char *filename, \n', + None, + ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), + (58, ' PyCompilerFlags *flags,\n', + None, + ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), + (59, ' int optimize,\n', + None, + ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), + (60, ' PyArena *arena);\n', + None, + ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), + (61, 'PyAPI_FUNC(PyCodeObject *) PyAST_CompileObject(\n', + None, + ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), + (62, ' struct _mod *mod,\n', + None, + ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), + (63, ' PyObject *filename,\n', + None, + ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), + (64, ' PyCompilerFlags *flags,\n', + None, + ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), + (65, ' int optimize,\n', + None, + ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), + (66, ' PyArena *arena);\n', + None, + ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), + (67, 'PyAPI_FUNC(PyFutureFeatures *) PyFuture_FromAST(\n', + None, + ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), + (68, ' struct _mod * mod,\n', + None, + ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), + (69, ' const char *filename \n', + None, + ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), + (70, ' );\n', + None, + ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), + (71, 'PyAPI_FUNC(PyFutureFeatures *) PyFuture_FromASTObject(\n', + None, + ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), + (72, ' struct _mod * mod,\n', + None, + ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), + (73, ' PyObject *filename\n', + None, + ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), + (74, ' );\n', + None, + ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), + (75, '\n', + None, + ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), + (76, ' \n', + None, + ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), + (77, 'PyAPI_FUNC(PyObject*) _Py_Mangle(PyObject *p, PyObject *name);\n', + None, + ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), + (78, '\n', + None, + ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), + (79, '#define PY_INVALID_STACK_EFFECT INT_MAX\n', + Constant('PY_INVALID_STACK_EFFECT', 'INT_MAX'), + ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), + (80, 'PyAPI_FUNC(int) PyCompile_OpcodeStackEffect(int opcode, int oparg);\n', + None, + ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), + (81, 'PyAPI_FUNC(int) PyCompile_OpcodeStackEffectWithJump(int opcode, int oparg, int jump);\n', + None, + ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), + (82, '\n', + None, + ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), + (83, 'PyAPI_FUNC(int) _PyAST_Optimize(struct _mod *, PyArena *arena, int optimize);\n', + None, + ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), + (84, '\n', + None, + ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), + (85, '#ifdef __cplusplus\n', + IfDirective('ifdef', '__cplusplus'), + ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), + (86, '}\n', + None, + ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)', 'defined(__cplusplus)')), + (87, '#endif\n', + OtherDirective('endif', None), + ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)', 'defined(__cplusplus)')), + (88, '\n', + None, + ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), + (89, '#endif \n', + OtherDirective('endif', None), + ('! defined(Py_COMPILE_H)', '! defined(Py_LIMITED_API)')), + (90, '\n', + None, + ('! defined(Py_COMPILE_H)',)), + (91, ' \n', + None, + ('! defined(Py_COMPILE_H)',)), + (92, '#define Py_single_input 256\n', + Constant('Py_single_input', '256'), + ('! defined(Py_COMPILE_H)',)), + (93, '#define Py_file_input 257\n', + Constant('Py_file_input', '257'), + ('! defined(Py_COMPILE_H)',)), + (94, '#define Py_eval_input 258\n', + Constant('Py_eval_input', '258'), + ('! defined(Py_COMPILE_H)',)), + (95, '#define Py_func_type_input 345\n', + Constant('Py_func_type_input', '345'), + ('! defined(Py_COMPILE_H)',)), + (96, '\n', + None, + ('! defined(Py_COMPILE_H)',)), + (97, '#endif ', + OtherDirective('endif', None), + ('! defined(Py_COMPILE_H)',)), + ]) + self.check_calls( + ('_parse_directive', '#ifndef Py_COMPILE_H'), + ('_parse_directive', '#define Py_COMPILE_H'), + ('_parse_directive', '#ifndef Py_LIMITED_API'), + ('_parse_directive', '#include "code.h"'), + ('_parse_directive', '#ifdef __cplusplus'), + ('_parse_directive', '#endif'), + ('_parse_directive', '#define PyCF_MASK (CO_FUTURE_DIVISION | CO_FUTURE_ABSOLUTE_IMPORT | CO_FUTURE_WITH_STATEMENT | CO_FUTURE_PRINT_FUNCTION | CO_FUTURE_UNICODE_LITERALS | CO_FUTURE_BARRY_AS_BDFL | CO_FUTURE_GENERATOR_STOP | CO_FUTURE_ANNOTATIONS)'), + ('_parse_directive', '#define PyCF_MASK_OBSOLETE (CO_NESTED)'), + ('_parse_directive', '#define PyCF_SOURCE_IS_UTF8 0x0100'), + ('_parse_directive', '#define PyCF_DONT_IMPLY_DEDENT 0x0200'), + ('_parse_directive', '#define PyCF_ONLY_AST 0x0400'), + ('_parse_directive', '#define PyCF_IGNORE_COOKIE 0x0800'), + ('_parse_directive', '#define PyCF_TYPE_COMMENTS 0x1000'), + ('_parse_directive', '#define PyCF_ALLOW_TOP_LEVEL_AWAIT 0x2000'), + ('_parse_directive', '#ifndef Py_LIMITED_API'), + ('_parse_directive', '#endif'), + ('_parse_directive', '#define FUTURE_NESTED_SCOPES "nested_scopes"'), + ('_parse_directive', '#define FUTURE_GENERATORS "generators"'), + ('_parse_directive', '#define FUTURE_DIVISION "division"'), + ('_parse_directive', '#define FUTURE_ABSOLUTE_IMPORT "absolute_import"'), + ('_parse_directive', '#define FUTURE_WITH_STATEMENT "with_statement"'), + ('_parse_directive', '#define FUTURE_PRINT_FUNCTION "print_function"'), + ('_parse_directive', '#define FUTURE_UNICODE_LITERALS "unicode_literals"'), + ('_parse_directive', '#define FUTURE_BARRY_AS_BDFL "barry_as_FLUFL"'), + ('_parse_directive', '#define FUTURE_GENERATOR_STOP "generator_stop"'), + ('_parse_directive', '#define FUTURE_ANNOTATIONS "annotations"'), + ('_parse_directive', '#define PyAST_Compile(mod, s, f, ar) PyAST_CompileEx(mod, s, f, -1, ar)'), + ('_parse_directive', '#define PY_INVALID_STACK_EFFECT INT_MAX'), + ('_parse_directive', '#ifdef __cplusplus'), + ('_parse_directive', '#endif'), + ('_parse_directive', '#endif'), + ('_parse_directive', '#define Py_single_input 256'), + ('_parse_directive', '#define Py_file_input 257'), + ('_parse_directive', '#define Py_eval_input 258'), + ('_parse_directive', '#define Py_func_type_input 345'), + ('_parse_directive', '#endif'), + ) + + +class ParseDirectiveTests(unittest.TestCase): + + def test_directives(self): + tests = [ + # includes + ('#include "internal/pycore_pystate.h"', Include('"internal/pycore_pystate.h"')), + ('#include ', Include('')), + + # defines + ('#define SPAM int', Constant('SPAM', 'int')), + ('#define SPAM', Constant('SPAM', '')), + ('#define SPAM(x, y) run(x, y)', Macro('SPAM', ('x', 'y'), 'run(x, y)')), + ('#undef SPAM', None), + + # conditionals + ('#if SPAM', IfDirective('if', 'SPAM')), + # XXX complex conditionls + ('#ifdef SPAM', IfDirective('ifdef', 'SPAM')), + ('#ifndef SPAM', IfDirective('ifndef', 'SPAM')), + ('#elseif SPAM', IfDirective('elseif', 'SPAM')), + # XXX complex conditionls + ('#else', OtherDirective('else', '')), + ('#endif', OtherDirective('endif', '')), + + # other + ('#error oops!', None), + ('#warning oops!', None), + ('#pragma ...', None), + ('#__FILE__ ...', None), + ('#__LINE__ ...', None), + ('#__DATE__ ...', None), + ('#__TIME__ ...', None), + ('#__TIMESTAMP__ ...', None), + + # extra whitespace + (' # include ', Include('')), + ('#else ', OtherDirective('else', '')), + ('#endif ', OtherDirective('endif', '')), + ('#define SPAM int ', Constant('SPAM', 'int')), + ('#define SPAM ', Constant('SPAM', '')), + ] + for line, expected in tests: + if expected is None: + kind, _, text = line[1:].partition(' ') + expected = OtherDirective(kind, text) + with self.subTest(line): + directive = parse_directive(line) + + self.assertEqual(directive, expected) + + def test_bad_directives(self): + tests = [ + # valid directives with bad text + '#define 123', + '#else spam', + '#endif spam', + ] + for kind in PreprocessorDirective.KINDS: + # missing leading "#" + tests.append(kind) + if kind in ('else', 'endif'): + continue + # valid directives with missing text + tests.append('#' + kind) + tests.append('#' + kind + ' ') + for line in tests: + with self.subTest(line): + with self.assertRaises(ValueError): + parse_directive(line) + + def test_not_directives(self): + tests = [ + '', + ' ', + 'directive', + 'directive?', + '???', + ] + for line in tests: + with self.subTest(line): + with self.assertRaises(ValueError): + parse_directive(line) + + +class ConstantTests(unittest.TestCase): + + def test_type(self): + directive = Constant('SPAM', '123') + + self.assertIs(type(directive), Constant) + self.assertIsInstance(directive, PreprocessorDirective) + + def test_attrs(self): + d = Constant('SPAM', '123') + kind, name, value = d.kind, d.name, d.value + + self.assertEqual(kind, 'define') + self.assertEqual(name, 'SPAM') + self.assertEqual(value, '123') + + def test_text(self): + tests = [ + (('SPAM', '123'), 'SPAM 123'), + (('SPAM',), 'SPAM'), + ] + for args, expected in tests: + with self.subTest(args): + d = Constant(*args) + text = d.text + + self.assertEqual(text, expected) + + def test_iter(self): + kind, name, value = Constant('SPAM', '123') + + self.assertEqual(kind, 'define') + self.assertEqual(name, 'SPAM') + self.assertEqual(value, '123') + + def test_defaults(self): + kind, name, value = Constant('SPAM') + + self.assertEqual(kind, 'define') + self.assertEqual(name, 'SPAM') + self.assertIs(value, None) + + def test_coerce(self): + tests = [] + # coerced name, value + for args in wrapped_arg_combos('SPAM', '123'): + tests.append((args, ('SPAM', '123'))) + # missing name, value + for name in ('', ' ', None, StrProxy(' '), ()): + for value in ('', ' ', None, StrProxy(' '), ()): + tests.append( + ((name, value), (None, None))) + # whitespace + tests.extend([ + ((' SPAM ', ' 123 '), ('SPAM', '123')), + ]) + + for args, expected in tests: + with self.subTest(args): + d = Constant(*args) + + self.assertEqual(d[1:], expected) + for i, exp in enumerate(expected, start=1): + if exp is not None: + self.assertIs(type(d[i]), str) + + def test_valid(self): + tests = [ + ('SPAM', '123'), + # unusual name + ('_SPAM_', '123'), + ('X_1', '123'), + # unusual value + ('SPAM', None), + ] + for args in tests: + with self.subTest(args): + directive = Constant(*args) + + directive.validate() + + def test_invalid(self): + tests = [ + # invalid name + ((None, '123'), TypeError), + (('_', '123'), ValueError), + (('1', '123'), ValueError), + (('_1_', '123'), ValueError), + # There is no invalid value (including None). + ] + for args, exctype in tests: + with self.subTest(args): + directive = Constant(*args) + + with self.assertRaises(exctype): + directive.validate() + + +class MacroTests(unittest.TestCase): + + def test_type(self): + directive = Macro('SPAM', ('x', 'y'), '123') + + self.assertIs(type(directive), Macro) + self.assertIsInstance(directive, PreprocessorDirective) + + def test_attrs(self): + d = Macro('SPAM', ('x', 'y'), '123') + kind, name, args, body = d.kind, d.name, d.args, d.body + + self.assertEqual(kind, 'define') + self.assertEqual(name, 'SPAM') + self.assertEqual(args, ('x', 'y')) + self.assertEqual(body, '123') + + def test_text(self): + tests = [ + (('SPAM', ('x', 'y'), '123'), 'SPAM(x, y) 123'), + (('SPAM', ('x', 'y'),), 'SPAM(x, y)'), + ] + for args, expected in tests: + with self.subTest(args): + d = Macro(*args) + text = d.text + + self.assertEqual(text, expected) + + def test_iter(self): + kind, name, args, body = Macro('SPAM', ('x', 'y'), '123') + + self.assertEqual(kind, 'define') + self.assertEqual(name, 'SPAM') + self.assertEqual(args, ('x', 'y')) + self.assertEqual(body, '123') + + def test_defaults(self): + kind, name, args, body = Macro('SPAM', ('x', 'y')) + + self.assertEqual(kind, 'define') + self.assertEqual(name, 'SPAM') + self.assertEqual(args, ('x', 'y')) + self.assertIs(body, None) + + def test_coerce(self): + tests = [] + # coerce name and body + for args in wrapped_arg_combos('SPAM', ('x', 'y'), '123'): + tests.append( + (args, ('SPAM', ('x', 'y'), '123'))) + # coerce args + tests.extend([ + (('SPAM', 'x', '123'), + ('SPAM', ('x',), '123')), + (('SPAM', 'x,y', '123'), + ('SPAM', ('x', 'y'), '123')), + ]) + # coerce arg names + for argnames in wrapped_arg_combos('x', 'y'): + tests.append( + (('SPAM', argnames, '123'), + ('SPAM', ('x', 'y'), '123'))) + # missing name, body + for name in ('', ' ', None, StrProxy(' '), ()): + for argnames in (None, ()): + for body in ('', ' ', None, StrProxy(' '), ()): + tests.append( + ((name, argnames, body), + (None, (), None))) + # missing args + tests.extend([ + (('SPAM', None, '123'), + ('SPAM', (), '123')), + (('SPAM', (), '123'), + ('SPAM', (), '123')), + ]) + # missing arg names + for arg in ('', ' ', None, StrProxy(' '), ()): + tests.append( + (('SPAM', (arg,), '123'), + ('SPAM', (None,), '123'))) + tests.extend([ + (('SPAM', ('x', '', 'z'), '123'), + ('SPAM', ('x', None, 'z'), '123')), + ]) + # whitespace + tests.extend([ + ((' SPAM ', (' x ', ' y '), ' 123 '), + ('SPAM', ('x', 'y'), '123')), + (('SPAM', 'x, y', '123'), + ('SPAM', ('x', 'y'), '123')), + ]) + + for args, expected in tests: + with self.subTest(args): + d = Macro(*args) + + self.assertEqual(d[1:], expected) + for i, exp in enumerate(expected, start=1): + if i == 2: + self.assertIs(type(d[i]), tuple) + elif exp is not None: + self.assertIs(type(d[i]), str) + + def test_init_bad_args(self): + tests = [ + ('SPAM', StrProxy('x'), '123'), + ('SPAM', object(), '123'), + ] + for args in tests: + with self.subTest(args): + with self.assertRaises(TypeError): + Macro(*args) + + def test_valid(self): + tests = [ + # unusual name + ('SPAM', ('x', 'y'), 'run(x, y)'), + ('_SPAM_', ('x', 'y'), 'run(x, y)'), + ('X_1', ('x', 'y'), 'run(x, y)'), + # unusual args + ('SPAM', (), 'run(x, y)'), + ('SPAM', ('_x_', 'y_1'), 'run(x, y)'), + ('SPAM', 'x', 'run(x, y)'), + ('SPAM', 'x, y', 'run(x, y)'), + # unusual body + ('SPAM', ('x', 'y'), None), + ] + for args in tests: + with self.subTest(args): + directive = Macro(*args) + + directive.validate() + + def test_invalid(self): + tests = [ + # invalid name + ((None, ('x', 'y'), '123'), TypeError), + (('_', ('x', 'y'), '123'), ValueError), + (('1', ('x', 'y'), '123'), ValueError), + (('_1', ('x', 'y'), '123'), ValueError), + # invalid args + (('SPAM', (None, 'y'), '123'), ValueError), + (('SPAM', ('x', '_'), '123'), ValueError), + (('SPAM', ('x', '1'), '123'), ValueError), + (('SPAM', ('x', '_1_'), '123'), ValueError), + # There is no invalid body (including None). + ] + for args, exctype in tests: + with self.subTest(args): + directive = Macro(*args) + + with self.assertRaises(exctype): + directive.validate() + + +class IfDirectiveTests(unittest.TestCase): + + def test_type(self): + directive = IfDirective('if', '1') + + self.assertIs(type(directive), IfDirective) + self.assertIsInstance(directive, PreprocessorDirective) + + def test_attrs(self): + d = IfDirective('if', '1') + kind, condition = d.kind, d.condition + + self.assertEqual(kind, 'if') + self.assertEqual(condition, '1') + #self.assertEqual(condition, (ArithmeticCondition('1'),)) + + def test_text(self): + tests = [ + (('if', 'defined(SPAM) && 1 || (EGGS > 3 && defined(HAM))'), + 'defined(SPAM) && 1 || (EGGS > 3 && defined(HAM))'), + ] + for kind in IfDirective.KINDS: + tests.append( + ((kind, 'SPAM'), 'SPAM')) + for args, expected in tests: + with self.subTest(args): + d = IfDirective(*args) + text = d.text + + self.assertEqual(text, expected) + + def test_iter(self): + kind, condition = IfDirective('if', '1') + + self.assertEqual(kind, 'if') + self.assertEqual(condition, '1') + #self.assertEqual(condition, (ArithmeticCondition('1'),)) + + #def test_complex_conditions(self): + # ... + + def test_coerce(self): + tests = [] + for kind in IfDirective.KINDS: + if kind == 'ifdef': + cond = 'defined(SPAM)' + elif kind == 'ifndef': + cond = '! defined(SPAM)' + else: + cond = 'SPAM' + for args in wrapped_arg_combos(kind, 'SPAM'): + tests.append((args, (kind, cond))) + tests.extend([ + ((' ' + kind + ' ', ' SPAM '), (kind, cond)), + ]) + for raw in ('', ' ', None, StrProxy(' '), ()): + tests.append(((kind, raw), (kind, None))) + for kind in ('', ' ', None, StrProxy(' '), ()): + tests.append(((kind, 'SPAM'), (None, 'SPAM'))) + for args, expected in tests: + with self.subTest(args): + d = IfDirective(*args) + + self.assertEqual(tuple(d), expected) + for i, exp in enumerate(expected): + if exp is not None: + self.assertIs(type(d[i]), str) + + def test_valid(self): + tests = [] + for kind in IfDirective.KINDS: + tests.extend([ + (kind, 'SPAM'), + (kind, '_SPAM_'), + (kind, 'X_1'), + (kind, '()'), + (kind, '--'), + (kind, '???'), + ]) + for args in tests: + with self.subTest(args): + directive = IfDirective(*args) + + directive.validate() + + def test_invalid(self): + tests = [] + # kind + tests.extend([ + ((None, 'SPAM'), TypeError), + (('_', 'SPAM'), ValueError), + (('-', 'SPAM'), ValueError), + (('spam', 'SPAM'), ValueError), + ]) + for kind in PreprocessorDirective.KINDS: + if kind in IfDirective.KINDS: + continue + tests.append( + ((kind, 'SPAM'), ValueError)) + # condition + for kind in IfDirective.KINDS: + tests.extend([ + ((kind, None), TypeError), + # Any other condition is valid. + ]) + for args, exctype in tests: + with self.subTest(args): + directive = IfDirective(*args) + + with self.assertRaises(exctype): + directive.validate() + + +class IncludeTests(unittest.TestCase): + + def test_type(self): + directive = Include('') + + self.assertIs(type(directive), Include) + self.assertIsInstance(directive, PreprocessorDirective) + + def test_attrs(self): + d = Include('') + kind, file, text = d.kind, d.file, d.text + + self.assertEqual(kind, 'include') + self.assertEqual(file, '') + self.assertEqual(text, '') + + def test_iter(self): + kind, file = Include('') + + self.assertEqual(kind, 'include') + self.assertEqual(file, '') + + def test_coerce(self): + tests = [] + for arg, in wrapped_arg_combos(''): + tests.append((arg, '')) + tests.extend([ + (' ', ''), + ]) + for arg in ('', ' ', None, StrProxy(' '), ()): + tests.append((arg, None )) + for arg, expected in tests: + with self.subTest(arg): + _, file = Include(arg) + + self.assertEqual(file, expected) + if expected is not None: + self.assertIs(type(file), str) + + def test_valid(self): + tests = [ + '', + '"spam.h"', + '"internal/pycore_pystate.h"', + ] + for arg in tests: + with self.subTest(arg): + directive = Include(arg) + + directive.validate() + + def test_invalid(self): + tests = [ + (None, TypeError), + # We currently don't check the file. + ] + for arg, exctype in tests: + with self.subTest(arg): + directive = Include(arg) + + with self.assertRaises(exctype): + directive.validate() + + +class OtherDirectiveTests(unittest.TestCase): + + def test_type(self): + directive = OtherDirective('undef', 'SPAM') + + self.assertIs(type(directive), OtherDirective) + self.assertIsInstance(directive, PreprocessorDirective) + + def test_attrs(self): + d = OtherDirective('undef', 'SPAM') + kind, text = d.kind, d.text + + self.assertEqual(kind, 'undef') + self.assertEqual(text, 'SPAM') + + def test_iter(self): + kind, text = OtherDirective('undef', 'SPAM') + + self.assertEqual(kind, 'undef') + self.assertEqual(text, 'SPAM') + + def test_coerce(self): + tests = [] + for kind in OtherDirective.KINDS: + if kind in ('else', 'endif'): + continue + for args in wrapped_arg_combos(kind, '...'): + tests.append((args, (kind, '...'))) + tests.extend([ + ((' ' + kind + ' ', ' ... '), (kind, '...')), + ]) + for raw in ('', ' ', None, StrProxy(' '), ()): + tests.append(((kind, raw), (kind, None))) + for kind in ('else', 'endif'): + for args in wrapped_arg_combos(kind, None): + tests.append((args, (kind, None))) + tests.extend([ + ((' ' + kind + ' ', None), (kind, None)), + ]) + for kind in ('', ' ', None, StrProxy(' '), ()): + tests.append(((kind, '...'), (None, '...'))) + for args, expected in tests: + with self.subTest(args): + d = OtherDirective(*args) + + self.assertEqual(tuple(d), expected) + for i, exp in enumerate(expected): + if exp is not None: + self.assertIs(type(d[i]), str) + + def test_valid(self): + tests = [] + for kind in OtherDirective.KINDS: + if kind in ('else', 'endif'): + continue + tests.extend([ + (kind, '...'), + (kind, '???'), + (kind, 'SPAM'), + (kind, '1 + 1'), + ]) + for kind in ('else', 'endif'): + tests.append((kind, None)) + for args in tests: + with self.subTest(args): + directive = OtherDirective(*args) + + directive.validate() + + def test_invalid(self): + tests = [] + # kind + tests.extend([ + ((None, '...'), TypeError), + (('_', '...'), ValueError), + (('-', '...'), ValueError), + (('spam', '...'), ValueError), + ]) + for kind in PreprocessorDirective.KINDS: + if kind in OtherDirective.KINDS: + continue + tests.append( + ((kind, None), ValueError)) + # text + for kind in OtherDirective.KINDS: + if kind in ('else', 'endif'): + tests.extend([ + # Any text is invalid. + ((kind, 'SPAM'), ValueError), + ((kind, '...'), ValueError), + ]) + else: + tests.extend([ + ((kind, None), TypeError), + # Any other text is valid. + ]) + for args, exctype in tests: + with self.subTest(args): + directive = OtherDirective(*args) + + with self.assertRaises(exctype): + directive.validate() diff --git a/Lib/test/test_tools/test_c_analyzer/test_symbols/__init__.py b/Lib/test/test_tools/test_c_analyzer/test_symbols/__init__.py new file mode 100644 index 0000000..bc502ef --- /dev/null +++ b/Lib/test/test_tools/test_c_analyzer/test_symbols/__init__.py @@ -0,0 +1,6 @@ +import os.path +from test.support import load_package_tests + + +def load_tests(*args): + return load_package_tests(os.path.dirname(__file__), *args) diff --git a/Lib/test/test_tools/test_c_analyzer/test_symbols/test_info.py b/Lib/test/test_tools/test_c_analyzer/test_symbols/test_info.py new file mode 100644 index 0000000..1282a89 --- /dev/null +++ b/Lib/test/test_tools/test_c_analyzer/test_symbols/test_info.py @@ -0,0 +1,192 @@ +import string +import unittest + +from ..util import PseudoStr, StrProxy, Object +from .. import tool_imports_for_tests +with tool_imports_for_tests(): + from c_analyzer.common.info import ID + from c_analyzer.symbols.info import Symbol + + +class SymbolTests(unittest.TestCase): + + VALID_ARGS = ( + ID('x/y/z/spam.c', 'func', 'eggs'), + Symbol.KIND.VARIABLE, + False, + ) + VALID_KWARGS = dict(zip(Symbol._fields, VALID_ARGS)) + VALID_EXPECTED = VALID_ARGS + + def test_init_typical_binary_local(self): + id = ID(None, None, 'spam') + symbol = Symbol( + id=id, + kind=Symbol.KIND.VARIABLE, + external=False, + ) + + self.assertEqual(symbol, ( + id, + Symbol.KIND.VARIABLE, + False, + )) + + def test_init_typical_binary_global(self): + id = ID('Python/ceval.c', None, 'spam') + symbol = Symbol( + id=id, + kind=Symbol.KIND.VARIABLE, + external=False, + ) + + self.assertEqual(symbol, ( + id, + Symbol.KIND.VARIABLE, + False, + )) + + def test_init_coercion(self): + tests = [ + ('str subclass', + dict( + id=PseudoStr('eggs'), + kind=PseudoStr('variable'), + external=0, + ), + (ID(None, None, 'eggs'), + Symbol.KIND.VARIABLE, + False, + )), + ('with filename', + dict( + id=('x/y/z/spam.c', 'eggs'), + kind=PseudoStr('variable'), + external=0, + ), + (ID('x/y/z/spam.c', None, 'eggs'), + Symbol.KIND.VARIABLE, + False, + )), + ('non-str 1', + dict( + id=('a', 'b', 'c'), + kind=StrProxy('variable'), + external=0, + ), + (ID('a', 'b', 'c'), + Symbol.KIND.VARIABLE, + False, + )), + ('non-str 2', + dict( + id=('a', 'b', 'c'), + kind=Object(), + external=0, + ), + (ID('a', 'b', 'c'), + '', + False, + )), + ] + for summary, kwargs, expected in tests: + with self.subTest(summary): + symbol = Symbol(**kwargs) + + for field in Symbol._fields: + value = getattr(symbol, field) + if field == 'external': + self.assertIs(type(value), bool) + elif field == 'id': + self.assertIs(type(value), ID) + else: + self.assertIs(type(value), str) + self.assertEqual(tuple(symbol), expected) + + def test_init_all_missing(self): + id = ID(None, None, 'spam') + + symbol = Symbol(id) + + self.assertEqual(symbol, ( + id, + Symbol.KIND.VARIABLE, + None, + )) + + def test_fields(self): + id = ID('z', 'x', 'a') + + symbol = Symbol(id, 'b', False) + + self.assertEqual(symbol.id, id) + self.assertEqual(symbol.kind, 'b') + self.assertIs(symbol.external, False) + + def test___getattr__(self): + id = ID('z', 'x', 'a') + symbol = Symbol(id, 'b', False) + + filename = symbol.filename + funcname = symbol.funcname + name = symbol.name + + self.assertEqual(filename, 'z') + self.assertEqual(funcname, 'x') + self.assertEqual(name, 'a') + + def test_validate_typical(self): + id = ID('z', 'x', 'a') + + symbol = Symbol( + id=id, + kind=Symbol.KIND.VARIABLE, + external=False, + ) + + symbol.validate() # This does not fail. + + def test_validate_missing_field(self): + for field in Symbol._fields: + with self.subTest(field): + symbol = Symbol(**self.VALID_KWARGS) + symbol = symbol._replace(**{field: None}) + + with self.assertRaises(TypeError): + symbol.validate() + + def test_validate_bad_field(self): + badch = tuple(c for c in string.punctuation + string.digits) + notnames = ( + '1a', + 'a.b', + 'a-b', + '&a', + 'a++', + ) + badch + tests = [ + ('id', notnames), + ('kind', ('bogus',)), + ] + seen = set() + for field, invalid in tests: + for value in invalid: + if field != 'kind': + seen.add(value) + with self.subTest(f'{field}={value!r}'): + symbol = Symbol(**self.VALID_KWARGS) + symbol = symbol._replace(**{field: value}) + + with self.assertRaises(ValueError): + symbol.validate() + + for field, invalid in tests: + if field == 'kind': + continue + valid = seen - set(invalid) + for value in valid: + with self.subTest(f'{field}={value!r}'): + symbol = Symbol(**self.VALID_KWARGS) + symbol = symbol._replace(**{field: value}) + + symbol.validate() # This does not fail. diff --git a/Lib/test/test_tools/test_c_analyzer/test_variables/__init__.py b/Lib/test/test_tools/test_c_analyzer/test_variables/__init__.py new file mode 100644 index 0000000..bc502ef --- /dev/null +++ b/Lib/test/test_tools/test_c_analyzer/test_variables/__init__.py @@ -0,0 +1,6 @@ +import os.path +from test.support import load_package_tests + + +def load_tests(*args): + return load_package_tests(os.path.dirname(__file__), *args) diff --git a/Lib/test/test_tools/test_c_analyzer/test_variables/test_find.py b/Lib/test/test_tools/test_c_analyzer/test_variables/test_find.py new file mode 100644 index 0000000..7a13cf3 --- /dev/null +++ b/Lib/test/test_tools/test_c_analyzer/test_variables/test_find.py @@ -0,0 +1,124 @@ +import unittest + +from .. import tool_imports_for_tests +with tool_imports_for_tests(): + from c_analyzer.variables import info + from c_analyzer.variables.find import ( + vars_from_binary, + ) + + +class _Base(unittest.TestCase): + + maxDiff = None + + @property + def calls(self): + try: + return self._calls + except AttributeError: + self._calls = [] + return self._calls + + +class VarsFromBinaryTests(_Base): + + _return_iter_vars = () + _return_get_symbol_resolver = None + + def setUp(self): + super().setUp() + + self.kwargs = dict( + _iter_vars=self._iter_vars, + _get_symbol_resolver=self._get_symbol_resolver, + ) + + def _iter_vars(self, binfile, resolve, handle_id): + self.calls.append(('_iter_vars', (binfile, resolve, handle_id))) + return [(v, v.id) for v in self._return_iter_vars] + + def _get_symbol_resolver(self, known=None, dirnames=(), *, + handle_var, + filenames=None, + check_filename=None, + perfilecache=None, + ): + self.calls.append(('_get_symbol_resolver', + (known, dirnames, handle_var, filenames, + check_filename, perfilecache))) + return self._return_get_symbol_resolver + + def test_typical(self): + resolver = self._return_get_symbol_resolver = object() + variables = self._return_iter_vars = [ + info.Variable.from_parts('dir1/spam.c', None, 'var1', 'int'), + info.Variable.from_parts('dir1/spam.c', None, 'var2', 'static int'), + info.Variable.from_parts('dir1/spam.c', None, 'var3', 'char *'), + info.Variable.from_parts('dir1/spam.c', 'func2', 'var4', 'const char *'), + info.Variable.from_parts('dir1/eggs.c', None, 'var1', 'static int'), + info.Variable.from_parts('dir1/eggs.c', 'func1', 'var2', 'static char *'), + ] + known = object() + filenames = object() + + found = list(vars_from_binary('python', + known=known, + filenames=filenames, + **self.kwargs)) + + self.assertEqual(found, [ + info.Variable.from_parts('dir1/spam.c', None, 'var1', 'int'), + info.Variable.from_parts('dir1/spam.c', None, 'var2', 'static int'), + info.Variable.from_parts('dir1/spam.c', None, 'var3', 'char *'), + info.Variable.from_parts('dir1/spam.c', 'func2', 'var4', 'const char *'), + info.Variable.from_parts('dir1/eggs.c', None, 'var1', 'static int'), + info.Variable.from_parts('dir1/eggs.c', 'func1', 'var2', 'static char *'), + ]) + self.assertEqual(self.calls, [ + ('_get_symbol_resolver', (filenames, known, info.Variable.from_id, None, None, {})), + ('_iter_vars', ('python', resolver, None)), + ]) + +# self._return_iter_symbols = [ +# s_info.Symbol(('dir1/spam.c', None, 'var1'), 'variable', False), +# s_info.Symbol(('dir1/spam.c', None, 'var2'), 'variable', False), +# s_info.Symbol(('dir1/spam.c', None, 'func1'), 'function', False), +# s_info.Symbol(('dir1/spam.c', None, 'func2'), 'function', True), +# s_info.Symbol(('dir1/spam.c', None, 'var3'), 'variable', False), +# s_info.Symbol(('dir1/spam.c', 'func2', 'var4'), 'variable', False), +# s_info.Symbol(('dir1/ham.c', None, 'var1'), 'variable', True), +# s_info.Symbol(('dir1/eggs.c', None, 'var1'), 'variable', False), +# s_info.Symbol(('dir1/eggs.c', None, 'xyz'), 'other', False), +# s_info.Symbol(('dir1/eggs.c', '???', 'var2'), 'variable', False), +# s_info.Symbol(('???', None, 'var_x'), 'variable', False), +# s_info.Symbol(('???', '???', 'var_y'), 'variable', False), +# s_info.Symbol((None, None, '???'), 'other', False), +# ] +# known = object() +# +# vars_from_binary('python', knownvars=known, **this.kwargs) +# found = list(globals_from_symbols(['dir1'], self.iter_symbols)) +# +# self.assertEqual(found, [ +# info.Variable.from_parts('dir1/spam.c', None, 'var1', '???'), +# info.Variable.from_parts('dir1/spam.c', None, 'var2', '???'), +# info.Variable.from_parts('dir1/spam.c', None, 'var3', '???'), +# info.Variable.from_parts('dir1/spam.c', 'func2', 'var4', '???'), +# info.Variable.from_parts('dir1/eggs.c', None, 'var1', '???'), +# ]) +# self.assertEqual(self.calls, [ +# ('iter_symbols', (['dir1'],)), +# ]) +# +# def test_no_symbols(self): +# self._return_iter_symbols = [] +# +# found = list(globals_from_symbols(['dir1'], self.iter_symbols)) +# +# self.assertEqual(found, []) +# self.assertEqual(self.calls, [ +# ('iter_symbols', (['dir1'],)), +# ]) + + # XXX need functional test diff --git a/Lib/test/test_tools/test_c_analyzer/test_variables/test_info.py b/Lib/test/test_tools/test_c_analyzer/test_variables/test_info.py new file mode 100644 index 0000000..d424d8e --- /dev/null +++ b/Lib/test/test_tools/test_c_analyzer/test_variables/test_info.py @@ -0,0 +1,244 @@ +import string +import unittest + +from ..util import PseudoStr, StrProxy, Object +from .. import tool_imports_for_tests +with tool_imports_for_tests(): + from c_analyzer.common.info import UNKNOWN, ID + from c_analyzer.variables.info import ( + normalize_vartype, Variable + ) + + +class NormalizeVartypeTests(unittest.TestCase): + + def test_basic(self): + tests = [ + (None, None), + ('', ''), + ('int', 'int'), + (PseudoStr('int'), 'int'), + (StrProxy('int'), 'int'), + ] + for vartype, expected in tests: + with self.subTest(vartype): + normalized = normalize_vartype(vartype) + + self.assertEqual(normalized, expected) + + +class VariableTests(unittest.TestCase): + + VALID_ARGS = ( + ('x/y/z/spam.c', 'func', 'eggs'), + 'static', + 'int', + ) + VALID_KWARGS = dict(zip(Variable._fields, VALID_ARGS)) + VALID_EXPECTED = VALID_ARGS + + def test_init_typical_global(self): + for storage in ('static', 'extern', 'implicit'): + with self.subTest(storage): + static = Variable( + id=ID( + filename='x/y/z/spam.c', + funcname=None, + name='eggs', + ), + storage=storage, + vartype='int', + ) + + self.assertEqual(static, ( + ('x/y/z/spam.c', None, 'eggs'), + storage, + 'int', + )) + + def test_init_typical_local(self): + for storage in ('static', 'local'): + with self.subTest(storage): + static = Variable( + id=ID( + filename='x/y/z/spam.c', + funcname='func', + name='eggs', + ), + storage=storage, + vartype='int', + ) + + self.assertEqual(static, ( + ('x/y/z/spam.c', 'func', 'eggs'), + storage, + 'int', + )) + + def test_init_all_missing(self): + for value in ('', None): + with self.subTest(repr(value)): + static = Variable( + id=value, + storage=value, + vartype=value, + ) + + self.assertEqual(static, ( + None, + None, + None, + )) + + def test_init_all_coerced(self): + id = ID('x/y/z/spam.c', 'func', 'spam') + tests = [ + ('str subclass', + dict( + id=( + PseudoStr('x/y/z/spam.c'), + PseudoStr('func'), + PseudoStr('spam'), + ), + storage=PseudoStr('static'), + vartype=PseudoStr('int'), + ), + (id, + 'static', + 'int', + )), + ('non-str 1', + dict( + id=id, + storage=Object(), + vartype=Object(), + ), + (id, + '', + '', + )), + ('non-str 2', + dict( + id=id, + storage=StrProxy('static'), + vartype=StrProxy('variable'), + ), + (id, + 'static', + 'variable', + )), + ('non-str', + dict( + id=id, + storage=('a', 'b', 'c'), + vartype=('x', 'y', 'z'), + ), + (id, + "('a', 'b', 'c')", + "('x', 'y', 'z')", + )), + ] + for summary, kwargs, expected in tests: + with self.subTest(summary): + static = Variable(**kwargs) + + for field in Variable._fields: + value = getattr(static, field) + if field == 'id': + self.assertIs(type(value), ID) + else: + self.assertIs(type(value), str) + self.assertEqual(tuple(static), expected) + + def test_iterable(self): + static = Variable(**self.VALID_KWARGS) + + id, storage, vartype = static + + values = (id, storage, vartype) + for value, expected in zip(values, self.VALID_EXPECTED): + self.assertEqual(value, expected) + + def test_fields(self): + static = Variable(('a', 'b', 'z'), 'x', 'y') + + self.assertEqual(static.id, ('a', 'b', 'z')) + self.assertEqual(static.storage, 'x') + self.assertEqual(static.vartype, 'y') + + def test___getattr__(self): + static = Variable(('a', 'b', 'z'), 'x', 'y') + + self.assertEqual(static.filename, 'a') + self.assertEqual(static.funcname, 'b') + self.assertEqual(static.name, 'z') + + def test_validate_typical(self): + validstorage = ('static', 'extern', 'implicit', 'local') + self.assertEqual(set(validstorage), set(Variable.STORAGE)) + + for storage in validstorage: + with self.subTest(storage): + static = Variable( + id=ID( + filename='x/y/z/spam.c', + funcname='func', + name='eggs', + ), + storage=storage, + vartype='int', + ) + + static.validate() # This does not fail. + + def test_validate_missing_field(self): + for field in Variable._fields: + with self.subTest(field): + static = Variable(**self.VALID_KWARGS) + static = static._replace(**{field: None}) + + with self.assertRaises(TypeError): + static.validate() + for field in ('storage', 'vartype'): + with self.subTest(field): + static = Variable(**self.VALID_KWARGS) + static = static._replace(**{field: UNKNOWN}) + + with self.assertRaises(TypeError): + static.validate() + + def test_validate_bad_field(self): + badch = tuple(c for c in string.punctuation + string.digits) + notnames = ( + '1a', + 'a.b', + 'a-b', + '&a', + 'a++', + ) + badch + tests = [ + ('id', ()), # Any non-empty str is okay. + ('storage', ('external', 'global') + notnames), + ('vartype', ()), # Any non-empty str is okay. + ] + seen = set() + for field, invalid in tests: + for value in invalid: + seen.add(value) + with self.subTest(f'{field}={value!r}'): + static = Variable(**self.VALID_KWARGS) + static = static._replace(**{field: value}) + + with self.assertRaises(ValueError): + static.validate() + + for field, invalid in tests: + if field == 'id': + continue + valid = seen - set(invalid) + for value in valid: + with self.subTest(f'{field}={value!r}'): + static = Variable(**self.VALID_KWARGS) + static = static._replace(**{field: value}) + + static.validate() # This does not fail. diff --git a/Lib/test/test_tools/test_c_analyzer/test_variables/test_known.py b/Lib/test/test_tools/test_c_analyzer/test_variables/test_known.py new file mode 100644 index 0000000..49ff45c --- /dev/null +++ b/Lib/test/test_tools/test_c_analyzer/test_variables/test_known.py @@ -0,0 +1,139 @@ +import re +import textwrap +import unittest + +from .. import tool_imports_for_tests +with tool_imports_for_tests(): + from c_analyzer.common.info import ID + from c_analyzer.variables.info import Variable + from c_analyzer.variables.known import ( + read_file, + from_file, + ) + +class _BaseTests(unittest.TestCase): + + maxDiff = None + + @property + def calls(self): + try: + return self._calls + except AttributeError: + self._calls = [] + return self._calls + + +class ReadFileTests(_BaseTests): + + _return_read_tsv = () + + def _read_tsv(self, *args): + self.calls.append(('_read_tsv', args)) + return self._return_read_tsv + + def test_typical(self): + lines = textwrap.dedent(''' + filename funcname name kind declaration + file1.c - var1 variable static int + file1.c func1 local1 variable static int + file1.c - var2 variable int + file1.c func2 local2 variable char * + file2.c - var1 variable char * + ''').strip().splitlines() + lines = [re.sub(r'\s+', '\t', line, 4) for line in lines] + self._return_read_tsv = [tuple(v.strip() for v in line.split('\t')) + for line in lines[1:]] + + known = list(read_file('known.tsv', _read_tsv=self._read_tsv)) + + self.assertEqual(known, [ + ('variable', ID('file1.c', '', 'var1'), 'static int'), + ('variable', ID('file1.c', 'func1', 'local1'), 'static int'), + ('variable', ID('file1.c', '', 'var2'), 'int'), + ('variable', ID('file1.c', 'func2', 'local2'), 'char *'), + ('variable', ID('file2.c', '', 'var1'), 'char *'), + ]) + self.assertEqual(self.calls, [ + ('_read_tsv', + ('known.tsv', 'filename\tfuncname\tname\tkind\tdeclaration')), + ]) + + def test_empty(self): + self._return_read_tsv = [] + + known = list(read_file('known.tsv', _read_tsv=self._read_tsv)) + + self.assertEqual(known, []) + self.assertEqual(self.calls, [ + ('_read_tsv', ('known.tsv', 'filename\tfuncname\tname\tkind\tdeclaration')), + ]) + + +class FromFileTests(_BaseTests): + + _return_read_file = () + _return_handle_var = () + + def _read_file(self, infile): + self.calls.append(('_read_file', (infile,))) + return iter(self._return_read_file) + + def _handle_var(self, varid, decl): + self.calls.append(('_handle_var', (varid, decl))) + var = self._return_handle_var.pop(0) + return var + + def test_typical(self): + expected = [ + Variable.from_parts('file1.c', '', 'var1', 'static int'), + Variable.from_parts('file1.c', 'func1', 'local1', 'static int'), + Variable.from_parts('file1.c', '', 'var2', 'int'), + Variable.from_parts('file1.c', 'func2', 'local2', 'char *'), + Variable.from_parts('file2.c', '', 'var1', 'char *'), + ] + self._return_read_file = [('variable', v.id, v.vartype) + for v in expected] +# ('variable', ID('file1.c', '', 'var1'), 'static int'), +# ('variable', ID('file1.c', 'func1', 'local1'), 'static int'), +# ('variable', ID('file1.c', '', 'var2'), 'int'), +# ('variable', ID('file1.c', 'func2', 'local2'), 'char *'), +# ('variable', ID('file2.c', '', 'var1'), 'char *'), +# ] + self._return_handle_var = list(expected) # a copy + + known = from_file('known.tsv', + handle_var=self._handle_var, + _read_file=self._read_file, + ) + + self.assertEqual(known, { + 'variables': {v.id: v for v in expected}, + }) +# Variable.from_parts('file1.c', '', 'var1', 'static int'), +# Variable.from_parts('file1.c', 'func1', 'local1', 'static int'), +# Variable.from_parts('file1.c', '', 'var2', 'int'), +# Variable.from_parts('file1.c', 'func2', 'local2', 'char *'), +# Variable.from_parts('file2.c', '', 'var1', 'char *'), +# ]}, +# }) + self.assertEqual(self.calls, [ + ('_read_file', ('known.tsv',)), + *[('_handle_var', (v.id, v.vartype)) + for v in expected], + ]) + + def test_empty(self): + self._return_read_file = [] + + known = from_file('known.tsv', + handle_var=self._handle_var, + _read_file=self._read_file, + ) + + self.assertEqual(known, { + 'variables': {}, + }) + self.assertEqual(self.calls, [ + ('_read_file', ('known.tsv',)), + ]) diff --git a/Tools/c-analyzer/c-globals.py b/Tools/c-analyzer/c-globals.py index 9afe059..b36b791 100644 --- a/Tools/c-analyzer/c-globals.py +++ b/Tools/c-analyzer/c-globals.py @@ -1,6 +1,6 @@ # This is a script equivalent of running "python -m test.test_c_globals.cg". -from c_globals.__main__ import parse_args, main +from cpython.__main__ import parse_args, main # This is effectively copied from cg/__main__.py: diff --git a/Tools/c-analyzer/c_analyzer/__init__.py b/Tools/c-analyzer/c_analyzer/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/Tools/c-analyzer/c_analyzer/common/__init__.py b/Tools/c-analyzer/c_analyzer/common/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/Tools/c-analyzer/c_analyzer/common/files.py b/Tools/c-analyzer/c_analyzer/common/files.py new file mode 100644 index 0000000..ab551a8 --- /dev/null +++ b/Tools/c-analyzer/c_analyzer/common/files.py @@ -0,0 +1,120 @@ +import glob +import os +import os.path + +# XXX need tests: +# * walk_tree() +# * glob_tree() +# * iter_files_by_suffix() + + +C_SOURCE_SUFFIXES = ('.c', '.h') + + +def _walk_tree(root, *, + _walk=os.walk, + ): + # A wrapper around os.walk that resolves the filenames. + for parent, _, names in _walk(root): + for name in names: + yield os.path.join(parent, name) + + +def walk_tree(root, *, + suffix=None, + walk=_walk_tree, + ): + """Yield each file in the tree under the given directory name. + + If "suffix" is provided then only files with that suffix will + be included. + """ + if suffix and not isinstance(suffix, str): + raise ValueError('suffix must be a string') + + for filename in walk(root): + if suffix and not filename.endswith(suffix): + continue + yield filename + + +def glob_tree(root, *, + suffix=None, + _glob=glob.iglob, + ): + """Yield each file in the tree under the given directory name. + + If "suffix" is provided then only files with that suffix will + be included. + """ + suffix = suffix or '' + if not isinstance(suffix, str): + raise ValueError('suffix must be a string') + + for filename in _glob(f'{root}/*{suffix}'): + yield filename + for filename in _glob(f'{root}/**/*{suffix}'): + yield filename + + +def iter_files(root, suffix=None, relparent=None, *, + get_files=os.walk, + _glob=glob_tree, + _walk=walk_tree, + ): + """Yield each file in the tree under the given directory name. + + If "root" is a non-string iterable then do the same for each of + those trees. + + If "suffix" is provided then only files with that suffix will + be included. + + if "relparent" is provided then it is used to resolve each + filename as a relative path. + """ + if not isinstance(root, str): + roots = root + for root in roots: + yield from iter_files(root, suffix, relparent, + get_files=get_files, + _glob=_glob, _walk=_walk) + return + + # Use the right "walk" function. + if get_files in (glob.glob, glob.iglob, glob_tree): + get_files = _glob + else: + _files = _walk_tree if get_files in (os.walk, walk_tree) else get_files + get_files = (lambda *a, **k: _walk(*a, walk=_files, **k)) + + # Handle a single suffix. + if suffix and not isinstance(suffix, str): + filenames = get_files(root) + suffix = tuple(suffix) + else: + filenames = get_files(root, suffix=suffix) + suffix = None + + for filename in filenames: + if suffix and not isinstance(suffix, str): # multiple suffixes + if not filename.endswith(suffix): + continue + if relparent: + filename = os.path.relpath(filename, relparent) + yield filename + + +def iter_files_by_suffix(root, suffixes, relparent=None, *, + walk=walk_tree, + _iter_files=iter_files, + ): + """Yield each file in the tree that has the given suffixes. + + Unlike iter_files(), the results are in the original suffix order. + """ + if isinstance(suffixes, str): + suffixes = [suffixes] + # XXX Ignore repeated suffixes? + for suffix in suffixes: + yield from _iter_files(root, suffix, relparent) diff --git a/Tools/c-analyzer/c_analyzer/common/info.py b/Tools/c-analyzer/c_analyzer/common/info.py new file mode 100644 index 0000000..3f3f8c5 --- /dev/null +++ b/Tools/c-analyzer/c_analyzer/common/info.py @@ -0,0 +1,138 @@ +from collections import namedtuple +import re + +from .util import classonly, _NTBase + +# XXX need tests: +# * ID.match() + + +UNKNOWN = '???' + +NAME_RE = re.compile(r'^([a-zA-Z]|_\w*[a-zA-Z]\w*|[a-zA-Z]\w*)$') + + +class ID(_NTBase, namedtuple('ID', 'filename funcname name')): + """A unique ID for a single symbol or declaration.""" + + __slots__ = () + # XXX Add optional conditions (tuple of strings) field. + #conditions = Slot() + + @classonly + def from_raw(cls, raw): + if not raw: + return None + if isinstance(raw, str): + return cls(None, None, raw) + try: + name, = raw + filename = None + except ValueError: + try: + filename, name = raw + except ValueError: + return super().from_raw(raw) + return cls(filename, None, name) + + def __new__(cls, filename, funcname, name): + self = super().__new__( + cls, + filename=str(filename) if filename else None, + funcname=str(funcname) if funcname else None, + name=str(name) if name else None, + ) + #cls.conditions.set(self, tuple(str(s) if s else None + # for s in conditions or ())) + return self + + def validate(self): + """Fail if the object is invalid (i.e. init with bad data).""" + if not self.name: + raise TypeError('missing name') + else: + if not NAME_RE.match(self.name): + raise ValueError( + f'name must be an identifier, got {self.name!r}') + + # Symbols from a binary might not have filename/funcname info. + + if self.funcname: + if not self.filename: + raise TypeError('missing filename') + if not NAME_RE.match(self.funcname) and self.funcname != UNKNOWN: + raise ValueError( + f'name must be an identifier, got {self.funcname!r}') + + # XXX Require the filename (at least UNKONWN)? + # XXX Check the filename? + + @property + def islocal(self): + return self.funcname is not None + + def match(self, other, *, + match_files=(lambda f1, f2: f1 == f2), + ): + """Return True if the two match. + + At least one of the two must be completely valid (no UNKNOWN + anywhere). Otherwise False is returned. The remaining one + *may* have UNKNOWN for both funcname and filename. It must + have a valid name though. + + The caller is responsible for knowing which of the two is valid + (and which to use if both are valid). + """ + # First check the name. + if self.name is None: + return False + if other.name != self.name: + return False + + # Then check the filename. + if self.filename is None: + return False + if other.filename is None: + return False + if self.filename == UNKNOWN: + # "other" must be the valid one. + if other.funcname == UNKNOWN: + return False + elif self.funcname != UNKNOWN: + # XXX Try matching funcname even though we don't + # know the filename? + raise NotImplementedError + else: + return True + elif other.filename == UNKNOWN: + # "self" must be the valid one. + if self.funcname == UNKNOWN: + return False + elif other.funcname != UNKNOWN: + # XXX Try matching funcname even though we don't + # know the filename? + raise NotImplementedError + else: + return True + elif not match_files(self.filename, other.filename): + return False + + # Finally, check the funcname. + if self.funcname == UNKNOWN: + # "other" must be the valid one. + if other.funcname == UNKNOWN: + return False + else: + return other.funcname is not None + elif other.funcname == UNKNOWN: + # "self" must be the valid one. + if self.funcname == UNKNOWN: + return False + else: + return self.funcname is not None + elif self.funcname == other.funcname: + # Both are valid. + return True + + return False diff --git a/Tools/c-analyzer/c_analyzer/common/show.py b/Tools/c-analyzer/c_analyzer/common/show.py new file mode 100644 index 0000000..5f3cb1c --- /dev/null +++ b/Tools/c-analyzer/c_analyzer/common/show.py @@ -0,0 +1,11 @@ + +def basic(variables, *, + _print=print): + """Print each row simply.""" + for var in variables: + if var.funcname: + line = f'{var.filename}:{var.funcname}():{var.name}' + else: + line = f'{var.filename}:{var.name}' + line = f'{line:<64} {var.vartype}' + _print(line) diff --git a/Tools/c-analyzer/c_analyzer/common/util.py b/Tools/c-analyzer/c_analyzer/common/util.py new file mode 100644 index 0000000..43d0bb6 --- /dev/null +++ b/Tools/c-analyzer/c_analyzer/common/util.py @@ -0,0 +1,243 @@ +import csv +import subprocess + + +_NOT_SET = object() + + +def run_cmd(argv, **kwargs): + proc = subprocess.run( + argv, + #capture_output=True, + #stderr=subprocess.STDOUT, + stdout=subprocess.PIPE, + text=True, + check=True, + **kwargs + ) + return proc.stdout + + +def read_tsv(infile, header, *, + _open=open, + _get_reader=csv.reader, + ): + """Yield each row of the given TSV (tab-separated) file.""" + if isinstance(infile, str): + with _open(infile, newline='') as infile: + yield from read_tsv(infile, header, + _open=_open, + _get_reader=_get_reader, + ) + return + lines = iter(infile) + + # Validate the header. + try: + actualheader = next(lines).strip() + except StopIteration: + actualheader = '' + if actualheader != header: + raise ValueError(f'bad header {actualheader!r}') + + for row in _get_reader(lines, delimiter='\t'): + yield tuple(v.strip() for v in row) + + +def write_tsv(outfile, header, rows, *, + _open=open, + _get_writer=csv.writer, + ): + """Write each of the rows to the given TSV (tab-separated) file.""" + if isinstance(outfile, str): + with _open(outfile, 'w', newline='') as outfile: + return write_tsv(outfile, header, rows, + _open=_open, + _get_writer=_get_writer, + ) + + if isinstance(header, str): + header = header.split('\t') + writer = _get_writer(outfile, delimiter='\t') + writer.writerow(header) + for row in rows: + writer.writerow('' if v is None else str(v) + for v in row) + + +class Slot: + """A descriptor that provides a slot. + + This is useful for types that can't have slots via __slots__, + e.g. tuple subclasses. + """ + + __slots__ = ('initial', 'default', 'readonly', 'instances', 'name') + + def __init__(self, initial=_NOT_SET, *, + default=_NOT_SET, + readonly=False, + ): + self.initial = initial + self.default = default + self.readonly = readonly + + # The instance cache is not inherently tied to the normal + # lifetime of the instances. So must do something in order to + # avoid keeping the instances alive by holding a reference here. + # Ideally we would use weakref.WeakValueDictionary to do this. + # However, most builtin types do not support weakrefs. So + # instead we monkey-patch __del__ on the attached class to clear + # the instance. + self.instances = {} + self.name = None + + def __set_name__(self, cls, name): + if self.name is not None: + raise TypeError('already used') + self.name = name + try: + slotnames = cls.__slot_names__ + except AttributeError: + slotnames = cls.__slot_names__ = [] + slotnames.append(name) + self._ensure___del__(cls, slotnames) + + def __get__(self, obj, cls): + if obj is None: # called on the class + return self + try: + value = self.instances[id(obj)] + except KeyError: + if self.initial is _NOT_SET: + value = self.default + else: + value = self.initial + self.instances[id(obj)] = value + if value is _NOT_SET: + raise AttributeError(self.name) + # XXX Optionally make a copy? + return value + + def __set__(self, obj, value): + if self.readonly: + raise AttributeError(f'{self.name} is readonly') + # XXX Optionally coerce? + self.instances[id(obj)] = value + + def __delete__(self, obj): + if self.readonly: + raise AttributeError(f'{self.name} is readonly') + self.instances[id(obj)] = self.default # XXX refleak? + + def _ensure___del__(self, cls, slotnames): # See the comment in __init__(). + try: + old___del__ = cls.__del__ + except AttributeError: + old___del__ = (lambda s: None) + else: + if getattr(old___del__, '_slotted', False): + return + + def __del__(_self): + for name in slotnames: + delattr(_self, name) + old___del__(_self) + __del__._slotted = True + cls.__del__ = __del__ + + def set(self, obj, value): + """Update the cached value for an object. + + This works even if the descriptor is read-only. This is + particularly useful when initializing the object (e.g. in + its __new__ or __init__). + """ + self.instances[id(obj)] = value + + +class classonly: + """A non-data descriptor that makes a value only visible on the class. + + This is like the "classmethod" builtin, but does not show up on + instances of the class. It may be used as a decorator. + """ + + def __init__(self, value): + self.value = value + self.getter = classmethod(value).__get__ + self.name = None + + def __set_name__(self, cls, name): + if self.name is not None: + raise TypeError('already used') + self.name = name + + def __get__(self, obj, cls): + if obj is not None: + raise AttributeError(self.name) + # called on the class + return self.getter(None, cls) + + +class _NTBase: + + __slots__ = () + + @classonly + def from_raw(cls, raw): + if not raw: + return None + elif isinstance(raw, cls): + return raw + elif isinstance(raw, str): + return cls.from_string(raw) + else: + if hasattr(raw, 'items'): + return cls(**raw) + try: + args = tuple(raw) + except TypeError: + pass + else: + return cls(*args) + raise NotImplementedError + + @classonly + def from_string(cls, value): + """Return a new instance based on the given string.""" + raise NotImplementedError + + @classmethod + def _make(cls, iterable): # The default _make() is not subclass-friendly. + return cls.__new__(cls, *iterable) + + # XXX Always validate? + #def __init__(self, *args, **kwargs): + # self.validate() + + # XXX The default __repr__() is not subclass-friendly (where the name changes). + #def __repr__(self): + # _, _, sig = super().__repr__().partition('(') + # return f'{self.__class__.__name__}({sig}' + + # To make sorting work with None: + def __lt__(self, other): + try: + return super().__lt__(other) + except TypeError: + if None in self: + return True + elif None in other: + return False + else: + raise + + def validate(self): + return + + # XXX Always validate? + #def _replace(self, **kwargs): + # obj = super()._replace(**kwargs) + # obj.validate() + # return obj diff --git a/Tools/c-analyzer/c_analyzer/parser/__init__.py b/Tools/c-analyzer/c_analyzer/parser/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/Tools/c-analyzer/c_analyzer/parser/declarations.py b/Tools/c-analyzer/c_analyzer/parser/declarations.py new file mode 100644 index 0000000..f37072c --- /dev/null +++ b/Tools/c-analyzer/c_analyzer/parser/declarations.py @@ -0,0 +1,339 @@ +import re +import shlex +import subprocess + +from ..common.info import UNKNOWN + +from . import source + + +IDENTIFIER = r'(?:[a-zA-z]|_+[a-zA-Z0-9]\w*)' + +TYPE_QUAL = r'(?:const|volatile)' + +VAR_TYPE_SPEC = r'''(?: + void | + (?: + (?:(?:un)?signed\s+)? + (?: + char | + short | + int | + long | + long\s+int | + long\s+long + ) | + ) | + float | + double | + {IDENTIFIER} | + (?:struct|union)\s+{IDENTIFIER} + )''' + +POINTER = rf'''(?: + (?:\s+const)?\s*[*] + )''' + +#STRUCT = r'''(?: +# (?:struct|(struct\s+%s))\s*[{] +# [^}]* +# [}] +# )''' % (IDENTIFIER) +#UNION = r'''(?: +# (?:union|(union\s+%s))\s*[{] +# [^}]* +# [}] +# )''' % (IDENTIFIER) +#DECL_SPEC = rf'''(?: +# ({VAR_TYPE_SPEC}) | +# ({STRUCT}) | +# ({UNION}) +# )''' + +FUNC_START = rf'''(?: + (?: + (?: + extern | + static | + static\s+inline + )\s+ + )? + #(?:const\s+)? + {VAR_TYPE_SPEC} + )''' +#GLOBAL_VAR_START = rf'''(?: +# (?: +# (?: +# extern | +# static +# )\s+ +# )? +# (?: +# {TYPE_QUAL} +# (?:\s+{TYPE_QUAL})? +# )?\s+ +# {VAR_TYPE_SPEC} +# )''' +GLOBAL_DECL_START_RE = re.compile(rf''' + ^ + (?: + ({FUNC_START}) + ) + ''', re.VERBOSE) + +LOCAL_VAR_START = rf'''(?: + (?: + (?: + register | + static + )\s+ + )? + (?: + (?: + {TYPE_QUAL} + (?:\s+{TYPE_QUAL})? + )\s+ + )? + {VAR_TYPE_SPEC} + {POINTER}? + )''' +LOCAL_STMT_START_RE = re.compile(rf''' + ^ + (?: + ({LOCAL_VAR_START}) + ) + ''', re.VERBOSE) + + +def iter_global_declarations(lines): + """Yield (decl, body) for each global declaration in the given lines. + + For function definitions the header is reduced to one line and + the body is provided as-is. For other compound declarations (e.g. + struct) the entire declaration is reduced to one line and "body" + is None. Likewise for simple declarations (e.g. variables). + + Declarations inside function bodies are ignored, though their text + is provided in the function body. + """ + # XXX Bail out upon bogus syntax. + lines = source.iter_clean_lines(lines) + for line in lines: + if not GLOBAL_DECL_START_RE.match(line): + continue + # We only need functions here, since we only need locals for now. + if line.endswith(';'): + continue + if line.endswith('{') and '(' not in line: + continue + + # Capture the function. + # (assume no func is a one-liner) + decl = line + while '{' not in line: # assume no inline structs, etc. + try: + line = next(lines) + except StopIteration: + return + decl += ' ' + line + + body, end = _extract_block(lines) + if end is None: + return + assert end == '}' + yield (f'{decl}\n{body}\n{end}', body) + + +def iter_local_statements(lines): + """Yield (lines, blocks) for each statement in the given lines. + + For simple statements, "blocks" is None and the statement is reduced + to a single line. For compound statements, "blocks" is a pair of + (header, body) for each block in the statement. The headers are + reduced to a single line each, but the bpdies are provided as-is. + """ + # XXX Bail out upon bogus syntax. + lines = source.iter_clean_lines(lines) + for line in lines: + if not LOCAL_STMT_START_RE.match(line): + continue + + stmt = line + blocks = None + if not line.endswith(';'): + # XXX Support compound & multiline simple statements. + #blocks = [] + continue + + yield (stmt, blocks) + + +def _extract_block(lines): + end = None + depth = 1 + body = [] + for line in lines: + depth += line.count('{') - line.count('}') + if depth == 0: + end = line + break + body.append(line) + return '\n'.join(body), end + + +def parse_func(stmt, body): + """Return (name, signature) for the given function definition.""" + header, _, end = stmt.partition(body) + assert end.strip() == '}' + assert header.strip().endswith('{') + header, _, _= header.rpartition('{') + + signature = ' '.join(header.strip().splitlines()) + + _, _, name = signature.split('(')[0].strip().rpartition(' ') + assert name + + return name, signature + + +#TYPE_SPEC = rf'''(?: +# )''' +#VAR_DECLARATOR = rf'''(?: +# )''' +#VAR_DECL = rf'''(?: +# {TYPE_SPEC}+ +# {VAR_DECLARATOR} +# \s* +# )''' +#VAR_DECLARATION = rf'''(?: +# {VAR_DECL} +# (?: = [^=] [^;]* )? +# ; +# )''' +# +# +#def parse_variable(decl, *, inFunc=False): +# """Return [(name, storage, vartype)] for the given variable declaration.""" +# ... + + +def _parse_var(stmt): + """Return (name, vartype) for the given variable declaration.""" + stmt = stmt.rstrip(';') + m = LOCAL_STMT_START_RE.match(stmt) + assert m + vartype = m.group(0) + name = stmt[len(vartype):].partition('=')[0].strip() + + if name.startswith('('): + name, _, after = name[1:].partition(')') + assert after + name = name.replace('*', '* ') + inside, _, name = name.strip().rpartition(' ') + vartype = f'{vartype} ({inside.strip()}){after}' + else: + name = name.replace('*', '* ') + before, _, name = name.rpartition(' ') + vartype = f'{vartype} {before}' + + vartype = vartype.strip() + while ' ' in vartype: + vartype = vartype.replace(' ', ' ') + + return name, vartype + + +def extract_storage(decl, *, infunc=None): + """Return (storage, vartype) based on the given declaration. + + The default storage is "implicit" (or "local" if infunc is True). + """ + if decl == UNKNOWN: + return decl + if decl.startswith('static '): + return 'static' + #return 'static', decl.partition(' ')[2].strip() + elif decl.startswith('extern '): + return 'extern' + #return 'extern', decl.partition(' ')[2].strip() + elif re.match('.*\b(static|extern)\b', decl): + raise NotImplementedError + elif infunc: + return 'local' + else: + return 'implicit' + + +def parse_compound(stmt, blocks): + """Return (headers, bodies) for the given compound statement.""" + # XXX Identify declarations inside compound statements + # (if/switch/for/while). + raise NotImplementedError + + +def iter_variables(filename, *, + preprocessed=False, + _iter_source_lines=source.iter_lines, + _iter_global=iter_global_declarations, + _iter_local=iter_local_statements, + _parse_func=parse_func, + _parse_var=_parse_var, + _parse_compound=parse_compound, + ): + """Yield (funcname, name, vartype) for every variable in the given file.""" + if preprocessed: + raise NotImplementedError + lines = _iter_source_lines(filename) + for stmt, body in _iter_global(lines): + # At the file top-level we only have to worry about vars & funcs. + if not body: + name, vartype = _parse_var(stmt) + if name: + yield (None, name, vartype) + else: + funcname, _ = _parse_func(stmt, body) + localvars = _iter_locals(body, + _iter_statements=_iter_local, + _parse_var=_parse_var, + _parse_compound=_parse_compound, + ) + for name, vartype in localvars: + yield (funcname, name, vartype) + + +def _iter_locals(lines, *, + _iter_statements=iter_local_statements, + _parse_var=_parse_var, + _parse_compound=parse_compound, + ): + compound = [lines] + while compound: + body = compound.pop(0) + bodylines = body.splitlines() + for stmt, blocks in _iter_statements(bodylines): + if not blocks: + name, vartype = _parse_var(stmt) + if name: + yield (name, vartype) + else: + headers, bodies = _parse_compound(stmt, blocks) + for header in headers: + for line in header: + name, vartype = _parse_var(line) + if name: + yield (name, vartype) + compound.extend(bodies) + + +def iter_all(filename, *, + preprocessed=False, + ): + """Yield a Declaration for each one found. + + If there are duplicates, due to preprocessor conditionals, then + they are checked to make sure they are the same. + """ + # XXX For the moment we cheat. + for funcname, name, decl in iter_variables(filename, + preprocessed=preprocessed): + yield 'variable', funcname, name, decl diff --git a/Tools/c-analyzer/c_analyzer/parser/find.py b/Tools/c-analyzer/c_analyzer/parser/find.py new file mode 100644 index 0000000..3860d3d --- /dev/null +++ b/Tools/c-analyzer/c_analyzer/parser/find.py @@ -0,0 +1,107 @@ +from ..common.info import UNKNOWN, ID + +from . import declarations + +# XXX need tests: +# * variables +# * variable +# * variable_from_id + + +def _iter_vars(filenames, preprocessed, *, + handle_id=None, + _iter_decls=declarations.iter_all, + ): + if handle_id is None: + handle_id = ID + + for filename in filenames or (): + for kind, funcname, name, decl in _iter_decls(filename, + preprocessed=preprocessed, + ): + if kind != 'variable': + continue + varid = handle_id(filename, funcname, name) + yield varid, decl + + +# XXX Add a "handle_var" arg like we did for get_resolver()? + +def variables(*filenames, + perfilecache=None, + preprocessed=False, + known=None, # for types + handle_id=None, + _iter_vars=_iter_vars, + ): + """Yield (varid, decl) for each variable found in the given files. + + If "preprocessed" is provided (and not False/None) then it is used + to decide which tool to use to parse the source code after it runs + through the C preprocessor. Otherwise the raw + """ + if len(filenames) == 1 and not (filenames[0], str): + filenames, = filenames + + if perfilecache is None: + yield from _iter_vars(filenames, preprocessed) + else: + # XXX Cache per-file variables (e.g. `{filename: [(varid, decl)]}`). + raise NotImplementedError + + +def variable(name, filenames, *, + local=False, + perfilecache=None, + preprocessed=False, + handle_id=None, + _iter_vars=variables, + ): + """Return (varid, decl) for the first found variable that matches. + + If "local" is True then the first matching local variable in the + file will always be returned. To avoid that, pass perfilecache and + pop each variable from the cache after using it. + """ + for varid, decl in _iter_vars(filenames, + perfilecache=perfilecache, + preprocessed=preprocessed, + ): + if varid.name != name: + continue + if local: + if varid.funcname: + if varid.funcname == UNKNOWN: + raise NotImplementedError + return varid, decl + elif not varid.funcname: + return varid, decl + else: + return None, None # No matching variable was found. + + +def variable_from_id(id, filenames, *, + perfilecache=None, + preprocessed=False, + handle_id=None, + _get_var=variable, + ): + """Return (varid, decl) for the first found variable that matches.""" + local = False + if isinstance(id, str): + name = id + else: + if id.funcname == UNKNOWN: + local = True + elif id.funcname: + raise NotImplementedError + + name = id.name + if id.filename and id.filename != UNKNOWN: + filenames = [id.filename] + return _get_var(name, filenames, + local=local, + perfilecache=perfilecache, + preprocessed=preprocessed, + handle_id=handle_id, + ) diff --git a/Tools/c-analyzer/c_analyzer/parser/naive.py b/Tools/c-analyzer/c_analyzer/parser/naive.py new file mode 100644 index 0000000..4a4822d --- /dev/null +++ b/Tools/c-analyzer/c_analyzer/parser/naive.py @@ -0,0 +1,179 @@ +import re + +from ..common.info import UNKNOWN, ID + +from .preprocessor import _iter_clean_lines + + +_NOT_SET = object() + + +def get_srclines(filename, *, + cache=None, + _open=open, + _iter_lines=_iter_clean_lines, + ): + """Return the file's lines as a list. + + Each line will have trailing whitespace removed (including newline). + + If a cache is given the it is used. + """ + if cache is not None: + try: + return cache[filename] + except KeyError: + pass + + with _open(filename) as srcfile: + srclines = [line + for _, line in _iter_lines(srcfile) + if not line.startswith('#')] + for i, line in enumerate(srclines): + srclines[i] = line.rstrip() + + if cache is not None: + cache[filename] = srclines + return srclines + + +def parse_variable_declaration(srcline): + """Return (name, decl) for the given declaration line.""" + # XXX possible false negatives... + decl, sep, _ = srcline.partition('=') + if not sep: + if not srcline.endswith(';'): + return None, None + decl = decl.strip(';') + decl = decl.strip() + m = re.match(r'.*\b(\w+)\s*(?:\[[^\]]*\])?$', decl) + if not m: + return None, None + name = m.group(1) + return name, decl + + +def parse_variable(srcline, funcname=None): + """Return (varid, decl) for the variable declared on the line (or None).""" + line = srcline.strip() + + # XXX Handle more than just static variables. + if line.startswith('static '): + if '(' in line and '[' not in line: + # a function + return None, None + return parse_variable_declaration(line) + else: + return None, None + + +def iter_variables(filename, *, + srccache=None, + parse_variable=None, + _get_srclines=get_srclines, + _default_parse_variable=parse_variable, + ): + """Yield (varid, decl) for each variable in the given source file.""" + if parse_variable is None: + parse_variable = _default_parse_variable + + indent = '' + prev = '' + funcname = None + for line in _get_srclines(filename, cache=srccache): + # remember current funcname + if funcname: + if line == indent + '}': + funcname = None + continue + else: + if '(' in prev and line == indent + '{': + if not prev.startswith('__attribute__'): + funcname = prev.split('(')[0].split()[-1] + prev = '' + continue + indent = line[:-len(line.lstrip())] + prev = line + + info = parse_variable(line, funcname) + if isinstance(info, list): + for name, _funcname, decl in info: + yield ID(filename, _funcname, name), decl + continue + name, decl = info + + if name is None: + continue + yield ID(filename, funcname, name), decl + + +def _match_varid(variable, name, funcname, ignored=None): + if ignored and variable in ignored: + return False + + if variable.name != name: + return False + + if funcname == UNKNOWN: + if not variable.funcname: + return False + elif variable.funcname != funcname: + return False + + return True + + +def find_variable(filename, funcname, name, *, + ignored=None, + srccache=None, # {filename: lines} + parse_variable=None, + _iter_variables=iter_variables, + ): + """Return the matching variable. + + Return None if the variable is not found. + """ + for varid, decl in _iter_variables(filename, + srccache=srccache, + parse_variable=parse_variable, + ): + if _match_varid(varid, name, funcname, ignored): + return varid, decl + else: + return None + + +def find_variables(varids, filenames=None, *, + srccache=_NOT_SET, + parse_variable=None, + _find_symbol=find_variable, + ): + """Yield (varid, decl) for each ID. + + If the variable is not found then its decl will be UNKNOWN. That + way there will be one resulting variable per given ID. + """ + if srccache is _NOT_SET: + srccache = {} + + used = set() + for varid in varids: + if varid.filename and varid.filename != UNKNOWN: + srcfiles = [varid.filename] + else: + if not filenames: + yield varid, UNKNOWN + continue + srcfiles = filenames + for filename in srcfiles: + varid, decl = _find_varid(filename, varid.funcname, varid.name, + ignored=used, + srccache=srccache, + parse_variable=parse_variable, + ) + if varid: + yield varid, decl + used.add(varid) + break + else: + yield varid, UNKNOWN diff --git a/Tools/c-analyzer/c_analyzer/parser/preprocessor.py b/Tools/c-analyzer/c_analyzer/parser/preprocessor.py new file mode 100644 index 0000000..41f306e --- /dev/null +++ b/Tools/c-analyzer/c_analyzer/parser/preprocessor.py @@ -0,0 +1,511 @@ +from collections import namedtuple +import shlex +import os +import re + +from ..common import util, info + + +CONTINUATION = '\\' + os.linesep + +IDENTIFIER = r'(?:\w*[a-zA-Z]\w*)' +IDENTIFIER_RE = re.compile('^' + IDENTIFIER + '$') + + +def _coerce_str(value): + if not value: + return '' + return str(value).strip() + + +############################# +# directives + +DIRECTIVE_START = r''' + (?: + ^ \s* + [#] \s* + )''' +DIRECTIVE_TEXT = r''' + (?: + (?: \s+ ( .*\S ) )? + \s* $ + )''' +DIRECTIVE = rf''' + (?: + {DIRECTIVE_START} + ( + include | + error | warning | + pragma | + define | undef | + if | ifdef | ifndef | elseif | else | endif | + __FILE__ | __LINE__ | __DATE __ | __TIME__ | __TIMESTAMP__ + ) + {DIRECTIVE_TEXT} + )''' +# (?: +# [^\\\n] | +# \\ [^\n] | +# \\ \n +# )+ +# ) \n +# )''' +DIRECTIVE_RE = re.compile(DIRECTIVE, re.VERBOSE) + +DEFINE = rf''' + (?: + {DIRECTIVE_START} define \s+ + (?: + ( \w*[a-zA-Z]\w* ) + (?: \s* [(] ([^)]*) [)] )? + ) + {DIRECTIVE_TEXT} + )''' +DEFINE_RE = re.compile(DEFINE, re.VERBOSE) + + +def parse_directive(line): + """Return the appropriate directive for the given line.""" + line = line.strip() + if line.startswith('#'): + line = line[1:].lstrip() + line = '#' + line + directive = line + #directive = '#' + line + while ' ' in directive: + directive = directive.replace(' ', ' ') + return _parse_directive(directive) + + +def _parse_directive(line): + m = DEFINE_RE.match(line) + if m: + name, args, text = m.groups() + if args: + args = [a.strip() for a in args.split(',')] + return Macro(name, args, text) + else: + return Constant(name, text) + + m = DIRECTIVE_RE.match(line) + if not m: + raise ValueError(f'unsupported directive {line!r}') + kind, text = m.groups() + if not text: + if kind not in ('else', 'endif'): + raise ValueError(f'missing text in directive {line!r}') + elif kind in ('else', 'endif', 'define'): + raise ValueError(f'unexpected text in directive {line!r}') + if kind == 'include': + directive = Include(text) + elif kind in IfDirective.KINDS: + directive = IfDirective(kind, text) + else: + directive = OtherDirective(kind, text) + directive.validate() + return directive + + +class PreprocessorDirective(util._NTBase): + """The base class for directives.""" + + __slots__ = () + + KINDS = frozenset([ + 'include', + 'pragma', + 'error', 'warning', + 'define', 'undef', + 'if', 'ifdef', 'ifndef', 'elseif', 'else', 'endif', + '__FILE__', '__DATE__', '__LINE__', '__TIME__', '__TIMESTAMP__', + ]) + + @property + def text(self): + return ' '.join(v for v in self[1:] if v and v.strip()) or None + + def validate(self): + """Fail if the object is invalid (i.e. init with bad data).""" + super().validate() + + if not self.kind: + raise TypeError('missing kind') + elif self.kind not in self.KINDS: + raise ValueError + + # text can be anything, including None. + + +class Constant(PreprocessorDirective, + namedtuple('Constant', 'kind name value')): + """A single "constant" directive ("define").""" + + __slots__ = () + + def __new__(cls, name, value=None): + self = super().__new__( + cls, + 'define', + name=_coerce_str(name) or None, + value=_coerce_str(value) or None, + ) + return self + + def validate(self): + """Fail if the object is invalid (i.e. init with bad data).""" + super().validate() + + if not self.name: + raise TypeError('missing name') + elif not IDENTIFIER_RE.match(self.name): + raise ValueError(f'name must be identifier, got {self.name!r}') + + # value can be anything, including None + + +class Macro(PreprocessorDirective, + namedtuple('Macro', 'kind name args body')): + """A single "macro" directive ("define").""" + + __slots__ = () + + def __new__(cls, name, args, body=None): + # "args" must be a string or an iterable of strings (or "empty"). + if isinstance(args, str): + args = [v.strip() for v in args.split(',')] + if args: + args = tuple(_coerce_str(a) or None for a in args) + self = super().__new__( + cls, + kind='define', + name=_coerce_str(name) or None, + args=args if args else (), + body=_coerce_str(body) or None, + ) + return self + + @property + def text(self): + if self.body: + return f'{self.name}({", ".join(self.args)}) {self.body}' + else: + return f'{self.name}({", ".join(self.args)})' + + def validate(self): + """Fail if the object is invalid (i.e. init with bad data).""" + super().validate() + + if not self.name: + raise TypeError('missing name') + elif not IDENTIFIER_RE.match(self.name): + raise ValueError(f'name must be identifier, got {self.name!r}') + + for arg in self.args: + if not arg: + raise ValueError(f'missing arg in {self.args}') + elif not IDENTIFIER_RE.match(arg): + raise ValueError(f'arg must be identifier, got {arg!r}') + + # body can be anything, including None + + +class IfDirective(PreprocessorDirective, + namedtuple('IfDirective', 'kind condition')): + """A single conditional directive (e.g. "if", "ifdef"). + + This only includes directives that actually provide conditions. The + related directives "else" and "endif" are covered by OtherDirective + instead. + """ + + __slots__ = () + + KINDS = frozenset([ + 'if', + 'ifdef', + 'ifndef', + 'elseif', + ]) + + @classmethod + def _condition_from_raw(cls, raw, kind): + #return Condition.from_raw(raw, _kind=kind) + condition = _coerce_str(raw) + if not condition: + return None + + if kind == 'ifdef': + condition = f'defined({condition})' + elif kind == 'ifndef': + condition = f'! defined({condition})' + + return condition + + def __new__(cls, kind, condition): + kind = _coerce_str(kind) + self = super().__new__( + cls, + kind=kind or None, + condition=cls._condition_from_raw(condition, kind), + ) + return self + + @property + def text(self): + if self.kind == 'ifdef': + return self.condition[8:-1] # strip "defined(" + elif self.kind == 'ifndef': + return self.condition[10:-1] # strip "! defined(" + else: + return self.condition + #return str(self.condition) + + def validate(self): + """Fail if the object is invalid (i.e. init with bad data).""" + super().validate() + + if not self.condition: + raise TypeError('missing condition') + #else: + # for cond in self.condition: + # if not cond: + # raise ValueError(f'missing condition in {self.condition}') + # cond.validate() + # if self.kind in ('ifdef', 'ifndef'): + # if len(self.condition) != 1: + # raise ValueError('too many condition') + # if self.kind == 'ifdef': + # if not self.condition[0].startswith('defined '): + # raise ValueError('bad condition') + # else: + # if not self.condition[0].startswith('! defined '): + # raise ValueError('bad condition') + + +class Include(PreprocessorDirective, + namedtuple('Include', 'kind file')): + """A single "include" directive. + + Supported "file" values are either follow the bracket style + () or double quotes ("spam.h"). + """ + + __slots__ = () + + def __new__(cls, file): + self = super().__new__( + cls, + kind='include', + file=_coerce_str(file) or None, + ) + return self + + def validate(self): + """Fail if the object is invalid (i.e. init with bad data).""" + super().validate() + + if not self.file: + raise TypeError('missing file') + + +class OtherDirective(PreprocessorDirective, + namedtuple('OtherDirective', 'kind text')): + """A single directive not covered by another class. + + This includes the "else", "endif", and "undef" directives, which are + otherwise inherently related to the directives covered by the + Constant, Macro, and IfCondition classes. + + Note that all directives must have a text value, except for "else" + and "endif" (which must have no text). + """ + + __slots__ = () + + KINDS = PreprocessorDirective.KINDS - {'include', 'define'} - IfDirective.KINDS + + def __new__(cls, kind, text): + self = super().__new__( + cls, + kind=_coerce_str(kind) or None, + text=_coerce_str(text) or None, + ) + return self + + def validate(self): + """Fail if the object is invalid (i.e. init with bad data).""" + super().validate() + + if self.text: + if self.kind in ('else', 'endif'): + raise ValueError('unexpected text in directive') + elif self.kind not in ('else', 'endif'): + raise TypeError('missing text') + + +############################# +# iterating lines + +def _recompute_conditions(directive, ifstack): + if directive.kind in ('if', 'ifdef', 'ifndef'): + ifstack.append( + ([], directive.condition)) + elif directive.kind == 'elseif': + if ifstack: + negated, active = ifstack.pop() + if active: + negated.append(active) + else: + negated = [] + ifstack.append( + (negated, directive.condition)) + elif directive.kind == 'else': + if ifstack: + negated, active = ifstack.pop() + if active: + negated.append(active) + ifstack.append( + (negated, None)) + elif directive.kind == 'endif': + if ifstack: + ifstack.pop() + + conditions = [] + for negated, active in ifstack: + for condition in negated: + conditions.append(f'! ({condition})') + if active: + conditions.append(active) + return tuple(conditions) + + +def _iter_clean_lines(lines): + lines = iter(enumerate(lines, 1)) + for lno, line in lines: + # Handle line continuations. + while line.endswith(CONTINUATION): + try: + lno, _line = next(lines) + except StopIteration: + break + line = line[:-len(CONTINUATION)] + ' ' + _line + + # Deal with comments. + after = line + line = '' + while True: + # Look for a comment. + before, begin, remainder = after.partition('/*') + if '//' in before: + before, _, _ = before.partition('//') + line += before + ' ' # per the C99 spec + break + line += before + if not begin: + break + line += ' ' # per the C99 spec + + # Go until we find the end of the comment. + _, end, after = remainder.partition('*/') + while not end: + try: + lno, remainder = next(lines) + except StopIteration: + raise Exception('unterminated comment') + _, end, after = remainder.partition('*/') + + yield lno, line + + +def iter_lines(lines, *, + _iter_clean_lines=_iter_clean_lines, + _parse_directive=_parse_directive, + _recompute_conditions=_recompute_conditions, + ): + """Yield (lno, line, directive, active conditions) for each given line. + + This is effectively a subset of the operations taking place in + translation phases 2-4 from the C99 spec (ISO/IEC 9899:TC2); see + section 5.1.1.2. Line continuations are removed and comments + replaced with a single space. (In both cases "lno" will be the last + line involved.) Otherwise each line is returned as-is. + + "lno" is the (1-indexed) line number for the line. + + "directive" will be a PreprocessorDirective or None, depending on + whether or not there is a directive on the line. + + "active conditions" is the set of preprocessor conditions (e.g. + "defined()") under which the current line of code will be included + in compilation. That set is derived from every conditional + directive block (e.g. "if defined()", "ifdef", "else") containing + that line. That includes nested directives. Note that the + current line does not affect the active conditions for iteself. + It only impacts subsequent lines. That applies to directives + that close blocks (e.g. "endif") just as much as conditional + directvies. Also note that "else" and "elseif" directives + update the active conditions (for later lines), rather than + adding to them. + """ + ifstack = [] + conditions = () + for lno, line in _iter_clean_lines(lines): + stripped = line.strip() + if not stripped.startswith('#'): + yield lno, line, None, conditions + continue + + directive = '#' + stripped[1:].lstrip() + while ' ' in directive: + directive = directive.replace(' ', ' ') + directive = _parse_directive(directive) + yield lno, line, directive, conditions + + if directive.kind in ('else', 'endif'): + conditions = _recompute_conditions(directive, ifstack) + elif isinstance(directive, IfDirective): + conditions = _recompute_conditions(directive, ifstack) + + +############################# +# running (platform-specific?) + +def _gcc(filename, *, + _get_argv=(lambda: _get_gcc_argv()), + _run=util.run_cmd, + ): + argv = _get_argv() + argv.extend([ + '-E', filename, + ]) + output = _run(argv) + return output + + +def _get_gcc_argv(*, + _open=open, + _run=util.run_cmd, + ): + with _open('/tmp/print.mk', 'w') as tmpfile: + tmpfile.write('print-%:\n') + #tmpfile.write('\t@echo $* = $($*)\n') + tmpfile.write('\t@echo $($*)\n') + argv = ['/usr/bin/make', + '-f', 'Makefile', + '-f', '/tmp/print.mk', + 'print-CC', + 'print-PY_CORE_CFLAGS', + ] + output = _run(argv) + gcc, cflags = output.strip().splitlines() + argv = shlex.split(gcc.strip()) + cflags = shlex.split(cflags.strip()) + return argv + cflags + + +def run(filename, *, + _gcc=_gcc, + ): + """Return the text of the given file after running the preprocessor.""" + return _gcc(filename) diff --git a/Tools/c-analyzer/c_analyzer/parser/source.py b/Tools/c-analyzer/c_analyzer/parser/source.py new file mode 100644 index 0000000..f8998c8 --- /dev/null +++ b/Tools/c-analyzer/c_analyzer/parser/source.py @@ -0,0 +1,34 @@ +from . import preprocessor + + +def iter_clean_lines(lines): + incomment = False + for line in lines: + # Deal with comments. + if incomment: + _, sep, line = line.partition('*/') + if sep: + incomment = False + continue + line, _, _ = line.partition('//') + line, sep, remainder = line.partition('/*') + if sep: + _, sep, after = remainder.partition('*/') + if not sep: + incomment = True + continue + line += ' ' + after + + # Ignore blank lines and leading/trailing whitespace. + line = line.strip() + if not line: + continue + + yield line + + +def iter_lines(filename, *, + preprocess=preprocessor.run, + ): + content = preprocess(filename) + return iter(content.splitlines()) diff --git a/Tools/c-analyzer/c_analyzer/symbols/__init__.py b/Tools/c-analyzer/c_analyzer/symbols/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/Tools/c-analyzer/c_analyzer/symbols/_nm.py b/Tools/c-analyzer/c_analyzer/symbols/_nm.py new file mode 100644 index 0000000..f3a75a6 --- /dev/null +++ b/Tools/c-analyzer/c_analyzer/symbols/_nm.py @@ -0,0 +1,117 @@ +import os.path +import shutil + +from c_analyzer.common import util, info + +from .info import Symbol + + +# XXX need tests: +# * iter_symbols + +NM_KINDS = { + 'b': Symbol.KIND.VARIABLE, # uninitialized + 'd': Symbol.KIND.VARIABLE, # initialized + #'g': Symbol.KIND.VARIABLE, # uninitialized + #'s': Symbol.KIND.VARIABLE, # initialized + 't': Symbol.KIND.FUNCTION, + } + +SPECIAL_SYMBOLS = { + # binary format (e.g. ELF) + '__bss_start', + '__data_start', + '__dso_handle', + '_DYNAMIC', + '_edata', + '_end', + '__environ@@GLIBC_2.2.5', + '_GLOBAL_OFFSET_TABLE_', + '__JCR_END__', + '__JCR_LIST__', + '__TMC_END__', + } + + +def _is_special_symbol(name): + if name in SPECIAL_SYMBOLS: + return True + if '@@GLIBC' in name: + return True + return False + + +def iter_symbols(binfile, *, + nm=None, + handle_id=None, + _which=shutil.which, + _run=util.run_cmd, + ): + """Yield a Symbol for each relevant entry reported by the "nm" command.""" + if nm is None: + nm = _which('nm') + if not nm: + raise NotImplementedError + if handle_id is None: + handle_id = info.ID + + argv = [nm, + '--line-numbers', + binfile, + ] + try: + output = _run(argv) + except Exception: + if nm is None: + # XXX Use dumpbin.exe /SYMBOLS on Windows. + raise NotImplementedError + raise + for line in output.splitlines(): + (name, kind, external, filename, funcname, + ) = _parse_nm_line(line) + if kind != Symbol.KIND.VARIABLE: + continue + elif _is_special_symbol(name): + continue + yield Symbol( + id=handle_id(filename, funcname, name), + kind=kind, + external=external, + ) + + +def _parse_nm_line(line): + _origline = line + _, _, line = line.partition(' ') # strip off the address + line = line.strip() + + kind, _, line = line.partition(' ') + line = line.strip() + external = kind.isupper() + kind = NM_KINDS.get(kind.lower(), Symbol.KIND.OTHER) + + name, _, filename = line.partition('\t') + name = name.strip() + if filename: + filename = os.path.relpath(filename.partition(':')[0]) + else: + filename = info.UNKNOWN + + name, islocal = _parse_nm_name(name, kind) + funcname = info.UNKNOWN if islocal else None + return name, kind, external, filename, funcname + + +def _parse_nm_name(name, kind): + if kind != Symbol.KIND.VARIABLE: + return name, None + if _is_special_symbol(name): + return name, None + + actual, sep, digits = name.partition('.') + if not sep: + return name, False + + if not digits.isdigit(): + raise Exception(f'got bogus name {name}') + return actual, True diff --git a/Tools/c-analyzer/c_analyzer/symbols/find.py b/Tools/c-analyzer/c_analyzer/symbols/find.py new file mode 100644 index 0000000..8564652 --- /dev/null +++ b/Tools/c-analyzer/c_analyzer/symbols/find.py @@ -0,0 +1,175 @@ +import os +import os.path +import shutil + +from ..common import files +from ..common.info import UNKNOWN, ID +from ..parser import find as p_find + +from . import _nm +from .info import Symbol + +# XXX need tests: +# * get_resolver() +# * get_resolver_from_dirs() +# * symbol() +# * symbols() +# * variables() + + +def _resolve_known(symbol, knownvars): + for varid in knownvars: + if symbol.match(varid): + break + else: + return None + return knownvars.pop(varid) + + +def get_resolver(filenames=None, known=None, *, + handle_var, + check_filename=None, + perfilecache=None, + preprocessed=False, + _from_source=p_find.variable_from_id, + ): + """Return a "resolver" func for the given known vars/types and filenames. + + "handle_var" is a callable that takes (ID, decl) and returns a + Variable. Variable.from_id is a suitable callable. + + The returned func takes a single Symbol and returns a corresponding + Variable. If the symbol was located then the variable will be + valid, populated with the corresponding information. Otherwise None + is returned. + """ + knownvars = (known or {}).get('variables') + if knownvars: + knownvars = dict(knownvars) # a copy + if filenames: + if check_filename is None: + filenames = list(filenames) + def check_filename(filename): + return filename in filenames + def resolve(symbol): + # XXX Check "found" instead? + if not check_filename(symbol.filename): + return None + found = _resolve_known(symbol, knownvars) + if found is None: + #return None + varid, decl = _from_source(symbol, filenames, + perfilecache=perfilecache, + preprocessed=preprocessed, + ) + found = handle_var(varid, decl) + return found + else: + def resolve(symbol): + return _resolve_known(symbol, knownvars) + elif filenames: + def resolve(symbol): + varid, decl = _from_source(symbol, filenames, + perfilecache=perfilecache, + preprocessed=preprocessed, + ) + return handle_var(varid, decl) + else: + def resolve(symbol): + return None + return resolve + + +def get_resolver_from_dirs(dirnames, known=None, *, + handle_var, + suffixes=('.c',), + perfilecache=None, + preprocessed=False, + _iter_files=files.iter_files_by_suffix, + _get_resolver=get_resolver, + ): + """Return a "resolver" func for the given known vars/types and filenames. + + "dirnames" should be absolute paths. If not then they will be + resolved relative to CWD. + + See get_resolver(). + """ + dirnames = [d if d.endswith(os.path.sep) else d + os.path.sep + for d in dirnames] + filenames = _iter_files(dirnames, suffixes) + def check_filename(filename): + for dirname in dirnames: + if filename.startswith(dirname): + return True + else: + return False + return _get_resolver(filenames, known, + handle_var=handle_var, + check_filename=check_filename, + perfilecache=perfilecache, + preprocessed=preprocessed, + ) + + +def symbol(symbol, filenames, known=None, *, + perfilecache=None, + preprocessed=False, + handle_id=None, + _get_resolver=get_resolver, + ): + """Return a Variable for the one matching the given symbol. + + "symbol" can be one of several objects: + + * Symbol - use the contained info + * name (str) - look for a global variable with that name + * (filename, name) - look for named global in file + * (filename, funcname, name) - look for named local in file + + A name is always required. If the filename is None, "", or + "UNKNOWN" then all files will be searched. If the funcname is + "" or "UNKNOWN" then only local variables will be searched for. + """ + resolve = _get_resolver(known, filenames, + handle_id=handle_id, + perfilecache=perfilecache, + preprocessed=preprocessed, + ) + return resolve(symbol) + + +def _get_platform_tool(): + if os.name == 'nt': + # XXX Support this. + raise NotImplementedError + elif nm := shutil.which('nm'): + return lambda b, hi: _nm.iter_symbols(b, nm=nm, handle_id=hi) + else: + raise NotImplementedError + + +def symbols(binfile, *, + handle_id=None, + _file_exists=os.path.exists, + _get_platform_tool=_get_platform_tool, + ): + """Yield a Symbol for each one found in the binary.""" + if not _file_exists(binfile): + raise Exception('executable missing (need to build it first?)') + + _iter_symbols = _get_platform_tool() + yield from _iter_symbols(binfile, handle_id) + + +def variables(binfile, *, + resolve, + handle_id=None, + _iter_symbols=symbols, + ): + """Yield (Variable, Symbol) for each found symbol.""" + for symbol in _iter_symbols(binfile, handle_id=handle_id): + if symbol.kind != Symbol.KIND.VARIABLE: + continue + var = resolve(symbol) or None + yield var, symbol diff --git a/Tools/c-analyzer/c_analyzer/symbols/info.py b/Tools/c-analyzer/c_analyzer/symbols/info.py new file mode 100644 index 0000000..96a251a --- /dev/null +++ b/Tools/c-analyzer/c_analyzer/symbols/info.py @@ -0,0 +1,51 @@ +from collections import namedtuple + +from c_analyzer.common.info import ID +from c_analyzer.common.util import classonly, _NTBase + + +class Symbol(_NTBase, namedtuple('Symbol', 'id kind external')): + """Info for a single compilation symbol.""" + + __slots__ = () + + class KIND: + VARIABLE = 'variable' + FUNCTION = 'function' + OTHER = 'other' + + @classonly + def from_name(cls, name, filename=None, kind=KIND.VARIABLE, external=None): + """Return a new symbol based on the given name.""" + id = ID(filename, None, name) + return cls(id, kind, external) + + def __new__(cls, id, kind=KIND.VARIABLE, external=None): + self = super().__new__( + cls, + id=ID.from_raw(id), + kind=str(kind) if kind else None, + external=bool(external) if external is not None else None, + ) + return self + + def __hash__(self): + return hash(self.id) + + def __getattr__(self, name): + return getattr(self.id, name) + + def validate(self): + """Fail if the object is invalid (i.e. init with bad data).""" + if not self.id: + raise TypeError('missing id') + else: + self.id.validate() + + if not self.kind: + raise TypeError('missing kind') + elif self.kind not in vars(self.KIND).values(): + raise ValueError(f'unsupported kind {self.kind}') + + if self.external is None: + raise TypeError('missing external') diff --git a/Tools/c-analyzer/c_analyzer/variables/__init__.py b/Tools/c-analyzer/c_analyzer/variables/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/Tools/c-analyzer/c_analyzer/variables/find.py b/Tools/c-analyzer/c_analyzer/variables/find.py new file mode 100644 index 0000000..3fe7284 --- /dev/null +++ b/Tools/c-analyzer/c_analyzer/variables/find.py @@ -0,0 +1,75 @@ +from ..common import files +from ..common.info import UNKNOWN +from ..parser import ( + find as p_find, + ) +from ..symbols import ( + info as s_info, + find as s_find, + ) +from .info import Variable + +# XXX need tests: +# * vars_from_source + + +def _remove_cached(cache, var): + if not cache: + return + try: + cached = cache[var.filename] + cached.remove(var) + except (KeyError, IndexError): + pass + + +def vars_from_binary(binfile, *, + known=None, + filenames=None, + handle_id=None, + check_filename=None, + handle_var=Variable.from_id, + _iter_vars=s_find.variables, + _get_symbol_resolver=s_find.get_resolver, + ): + """Yield a Variable for each found Symbol. + + Details are filled in from the given "known" variables and types. + """ + cache = {} + resolve = _get_symbol_resolver(filenames, known, + handle_var=handle_var, + check_filename=check_filename, + perfilecache=cache, + ) + for var, symbol in _iter_vars(binfile, + resolve=resolve, + handle_id=handle_id, + ): + if var is None: + var = Variable(symbol.id, UNKNOWN, UNKNOWN) + yield var + _remove_cached(cache, var) + + +def vars_from_source(filenames, *, + preprocessed=None, + known=None, + handle_id=None, + handle_var=Variable.from_id, + iter_vars=p_find.variables, + ): + """Yield a Variable for each declaration in the raw source code. + + Details are filled in from the given "known" variables and types. + """ + cache = {} + for varid, decl in iter_vars(filenames or (), + perfilecache=cache, + preprocessed=preprocessed, + known=known, + handle_id=handle_id, + ): + var = handle_var(varid, decl) + yield var + _remove_cached(cache, var) diff --git a/Tools/c-analyzer/c_analyzer/variables/info.py b/Tools/c-analyzer/c_analyzer/variables/info.py new file mode 100644 index 0000000..336a523 --- /dev/null +++ b/Tools/c-analyzer/c_analyzer/variables/info.py @@ -0,0 +1,93 @@ +from collections import namedtuple + +from ..common.info import ID, UNKNOWN +from ..common.util import classonly, _NTBase + + +def normalize_vartype(vartype): + """Return the canonical form for a variable type (or func signature).""" + # We allow empty strring through for semantic reasons. + if vartype is None: + return None + + # XXX finish! + # XXX Return (modifiers, type, pointer)? + return str(vartype) + + +# XXX Variable.vartype -> decl (Declaration). + +class Variable(_NTBase, + namedtuple('Variable', 'id storage vartype')): + """Information about a single variable declaration.""" + + __slots__ = () + + STORAGE = ( + 'static', + 'extern', + 'implicit', + 'local', + ) + + @classonly + def from_parts(cls, filename, funcname, name, decl, storage=None): + varid = ID(filename, funcname, name) + if storage is None: + self = cls.from_id(varid, decl) + else: + self = cls(varid, storage, decl) + return self + + @classonly + def from_id(cls, varid, decl): + from ..parser.declarations import extract_storage + storage = extract_storage(decl, infunc=varid.funcname) + return cls(varid, storage, decl) + + def __new__(cls, id, storage, vartype): + self = super().__new__( + cls, + id=ID.from_raw(id), + storage=str(storage) if storage else None, + vartype=normalize_vartype(vartype) if vartype else None, + ) + return self + + def __hash__(self): + return hash(self.id) + + def __getattr__(self, name): + return getattr(self.id, name) + + def _validate_id(self): + if not self.id: + raise TypeError('missing id') + + if not self.filename or self.filename == UNKNOWN: + raise TypeError(f'id missing filename ({self.id})') + + if self.funcname and self.funcname == UNKNOWN: + raise TypeError(f'id missing funcname ({self.id})') + + self.id.validate() + + def validate(self): + """Fail if the object is invalid (i.e. init with bad data).""" + self._validate_id() + + if self.storage is None or self.storage == UNKNOWN: + raise TypeError('missing storage') + elif self.storage not in self.STORAGE: + raise ValueError(f'unsupported storage {self.storage:r}') + + if self.vartype is None or self.vartype == UNKNOWN: + raise TypeError('missing vartype') + + @property + def isglobal(self): + return self.storage != 'local' + + @property + def isconst(self): + return 'const' in self.vartype.split() diff --git a/Tools/c-analyzer/c_analyzer/variables/known.py b/Tools/c-analyzer/c_analyzer/variables/known.py new file mode 100644 index 0000000..aa2934a --- /dev/null +++ b/Tools/c-analyzer/c_analyzer/variables/known.py @@ -0,0 +1,91 @@ +import csv + +from ..common.info import ID, UNKNOWN +from ..common.util import read_tsv +from .info import Variable + + +# XXX need tests: +# * read_file() +# * look_up_variable() + + +COLUMNS = ('filename', 'funcname', 'name', 'kind', 'declaration') +HEADER = '\t'.join(COLUMNS) + + +def read_file(infile, *, + _read_tsv=read_tsv, + ): + """Yield (kind, id, decl) for each row in the data file. + + The caller is responsible for validating each row. + """ + for row in _read_tsv(infile, HEADER): + filename, funcname, name, kind, declaration = row + if not funcname or funcname == '-': + funcname = None + id = ID(filename, funcname, name) + yield kind, id, declaration + + +def from_file(infile, *, + handle_var=Variable.from_id, + _read_file=read_file, + ): + """Return the info for known declarations in the given file.""" + known = { + 'variables': {}, + #'types': {}, + #'constants': {}, + #'macros': {}, + } + for kind, id, decl in _read_file(infile): + if kind == 'variable': + values = known['variables'] + value = handle_var(id, decl) + else: + raise ValueError(f'unsupported kind in row {row}') + value.validate() + values[id] = value + return known + + +def look_up_variable(varid, knownvars, *, + match_files=(lambda f1, f2: f1 == f2), + ): + """Return the known Variable matching the given ID. + + "knownvars" is a mapping of ID to Variable. + + "match_files" is used to verify if two filenames point to + the same file. + + If no match is found then None is returned. + """ + if not knownvars: + return None + + if varid.funcname == UNKNOWN: + if not varid.filename or varid.filename == UNKNOWN: + for varid in knownvars: + if not varid.funcname: + continue + if varid.name == varid.name: + return knownvars[varid] + else: + return None + else: + for varid in knownvars: + if not varid.funcname: + continue + if not match_files(varid.filename, varid.filename): + continue + if varid.name == varid.name: + return knownvars[varid] + else: + return None + elif not varid.filename or varid.filename == UNKNOWN: + raise NotImplementedError + else: + return knownvars.get(varid.id) diff --git a/Tools/c-analyzer/c_analyzer_common/__init__.py b/Tools/c-analyzer/c_analyzer_common/__init__.py deleted file mode 100644 index 888b16f..0000000 --- a/Tools/c-analyzer/c_analyzer_common/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -import os.path - - -PKG_ROOT = os.path.dirname(__file__) -DATA_DIR = os.path.dirname(PKG_ROOT) -REPO_ROOT = os.path.dirname( - os.path.dirname(DATA_DIR)) - -SOURCE_DIRS = [os.path.join(REPO_ROOT, name) for name in [ - 'Include', - 'Python', - 'Parser', - 'Objects', - 'Modules', - ]] - - -# Clean up the namespace. -del os diff --git a/Tools/c-analyzer/c_analyzer_common/_generate.py b/Tools/c-analyzer/c_analyzer_common/_generate.py deleted file mode 100644 index 9b2fc9e..0000000 --- a/Tools/c-analyzer/c_analyzer_common/_generate.py +++ /dev/null @@ -1,328 +0,0 @@ -# The code here consists of hacks for pre-populating the known.tsv file. - -from c_parser.preprocessor import _iter_clean_lines -from c_parser.naive import ( - iter_variables, parse_variable_declaration, find_variables, - ) -from c_parser.info import Variable - -from . import SOURCE_DIRS, REPO_ROOT -from .known import DATA_FILE as KNOWN_FILE, HEADER as KNOWN_HEADER -from .info import UNKNOWN, ID -from .util import write_tsv -from .files import iter_cpython_files - - -POTS = ('char ', 'wchar_t ', 'int ', 'Py_ssize_t ') -POTS += tuple('const ' + v for v in POTS) -STRUCTS = ('PyTypeObject', 'PyObject', 'PyMethodDef', 'PyModuleDef', 'grammar') - - -def _parse_global(line, funcname=None): - line = line.strip() - if line.startswith('static '): - if '(' in line and '[' not in line and ' = ' not in line: - return None, None - name, decl = parse_variable_declaration(line) - elif line.startswith(('Py_LOCAL(', 'Py_LOCAL_INLINE(')): - name, decl = parse_variable_declaration(line) - elif line.startswith('_Py_static_string('): - decl = line.strip(';').strip() - name = line.split('(')[1].split(',')[0].strip() - elif line.startswith('_Py_IDENTIFIER('): - decl = line.strip(';').strip() - name = 'PyId_' + line.split('(')[1].split(')')[0].strip() - elif funcname: - return None, None - - # global-only - elif line.startswith('PyAPI_DATA('): # only in .h files - name, decl = parse_variable_declaration(line) - elif line.startswith('extern '): # only in .h files - name, decl = parse_variable_declaration(line) - elif line.startswith('PyDoc_VAR('): - decl = line.strip(';').strip() - name = line.split('(')[1].split(')')[0].strip() - elif line.startswith(POTS): # implied static - if '(' in line and '[' not in line and ' = ' not in line: - return None, None - name, decl = parse_variable_declaration(line) - elif line.startswith(STRUCTS) and line.endswith(' = {'): # implied static - name, decl = parse_variable_declaration(line) - elif line.startswith(STRUCTS) and line.endswith(' = NULL;'): # implied static - name, decl = parse_variable_declaration(line) - elif line.startswith('struct '): - if not line.endswith(' = {'): - return None, None - if not line.partition(' ')[2].startswith(STRUCTS): - return None, None - # implied static - name, decl = parse_variable_declaration(line) - - # file-specific - elif line.startswith(('SLOT1BINFULL(', 'SLOT1BIN(')): - # Objects/typeobject.c - funcname = line.split('(')[1].split(',')[0] - return [ - ('op_id', funcname, '_Py_static_string(op_id, OPSTR)'), - ('rop_id', funcname, '_Py_static_string(op_id, OPSTR)'), - ] - elif line.startswith('WRAP_METHOD('): - # Objects/weakrefobject.c - funcname, name = (v.strip() for v in line.split('(')[1].split(')')[0].split(',')) - return [ - ('PyId_' + name, funcname, f'_Py_IDENTIFIER({name})'), - ] - - else: - return None, None - return name, decl - - -def _pop_cached(varcache, filename, funcname, name, *, - _iter_variables=iter_variables, - ): - # Look for the file. - try: - cached = varcache[filename] - except KeyError: - cached = varcache[filename] = {} - for variable in _iter_variables(filename, - parse_variable=_parse_global, - ): - variable._isglobal = True - cached[variable.id] = variable - for var in cached: - print(' ', var) - - # Look for the variable. - if funcname == UNKNOWN: - for varid in cached: - if varid.name == name: - break - else: - return None - return cached.pop(varid) - else: - return cached.pop((filename, funcname, name), None) - - -def find_matching_variable(varid, varcache, allfilenames, *, - _pop_cached=_pop_cached, - ): - if varid.filename and varid.filename != UNKNOWN: - filenames = [varid.filename] - else: - filenames = allfilenames - for filename in filenames: - variable = _pop_cached(varcache, filename, varid.funcname, varid.name) - if variable is not None: - return variable - else: - if varid.filename and varid.filename != UNKNOWN and varid.funcname is None: - for filename in allfilenames: - if not filename.endswith('.h'): - continue - variable = _pop_cached(varcache, filename, None, varid.name) - if variable is not None: - return variable - return None - - -MULTILINE = { - # Python/Python-ast.c - 'Load_singleton': 'PyObject *', - 'Store_singleton': 'PyObject *', - 'Del_singleton': 'PyObject *', - 'AugLoad_singleton': 'PyObject *', - 'AugStore_singleton': 'PyObject *', - 'Param_singleton': 'PyObject *', - 'And_singleton': 'PyObject *', - 'Or_singleton': 'PyObject *', - 'Add_singleton': 'static PyObject *', - 'Sub_singleton': 'static PyObject *', - 'Mult_singleton': 'static PyObject *', - 'MatMult_singleton': 'static PyObject *', - 'Div_singleton': 'static PyObject *', - 'Mod_singleton': 'static PyObject *', - 'Pow_singleton': 'static PyObject *', - 'LShift_singleton': 'static PyObject *', - 'RShift_singleton': 'static PyObject *', - 'BitOr_singleton': 'static PyObject *', - 'BitXor_singleton': 'static PyObject *', - 'BitAnd_singleton': 'static PyObject *', - 'FloorDiv_singleton': 'static PyObject *', - 'Invert_singleton': 'static PyObject *', - 'Not_singleton': 'static PyObject *', - 'UAdd_singleton': 'static PyObject *', - 'USub_singleton': 'static PyObject *', - 'Eq_singleton': 'static PyObject *', - 'NotEq_singleton': 'static PyObject *', - 'Lt_singleton': 'static PyObject *', - 'LtE_singleton': 'static PyObject *', - 'Gt_singleton': 'static PyObject *', - 'GtE_singleton': 'static PyObject *', - 'Is_singleton': 'static PyObject *', - 'IsNot_singleton': 'static PyObject *', - 'In_singleton': 'static PyObject *', - 'NotIn_singleton': 'static PyObject *', - # Python/symtable.c - 'top': 'static identifier ', - 'lambda': 'static identifier ', - 'genexpr': 'static identifier ', - 'listcomp': 'static identifier ', - 'setcomp': 'static identifier ', - 'dictcomp': 'static identifier ', - '__class__': 'static identifier ', - # Python/compile.c - '__doc__': 'static PyObject *', - '__annotations__': 'static PyObject *', - # Objects/floatobject.c - 'double_format': 'static float_format_type ', - 'float_format': 'static float_format_type ', - 'detected_double_format': 'static float_format_type ', - 'detected_float_format': 'static float_format_type ', - # Parser/listnode.c - 'level': 'static int ', - 'atbol': 'static int ', - # Python/dtoa.c - 'private_mem': 'static double private_mem[PRIVATE_mem]', - 'pmem_next': 'static double *', - # Modules/_weakref.c - 'weakref_functions': 'static PyMethodDef ', -} -INLINE = { - # Modules/_tracemalloc.c - 'allocators': 'static struct { PyMemAllocatorEx mem; PyMemAllocatorEx raw; PyMemAllocatorEx obj; } ', - # Modules/faulthandler.c - 'fatal_error': 'static struct { int enabled; PyObject *file; int fd; int all_threads; PyInterpreterState *interp; void *exc_handler; } ', - 'thread': 'static struct { PyObject *file; int fd; PY_TIMEOUT_T timeout_us; int repeat; PyInterpreterState *interp; int exit; char *header; size_t header_len; PyThread_type_lock cancel_event; PyThread_type_lock running; } ', - # Modules/signalmodule.c - 'Handlers': 'static volatile struct { _Py_atomic_int tripped; PyObject *func; } Handlers[NSIG]', - 'wakeup': 'static volatile struct { SOCKET_T fd; int warn_on_full_buffer; int use_send; } ', - # Python/dynload_shlib.c - 'handles': 'static struct { dev_t dev; ino_t ino; void *handle; } handles[128]', - # Objects/obmalloc.c - '_PyMem_Debug': 'static struct { debug_alloc_api_t raw; debug_alloc_api_t mem; debug_alloc_api_t obj; } ', - # Python/bootstrap_hash.c - 'urandom_cache': 'static struct { int fd; dev_t st_dev; ino_t st_ino; } ', - } -FUNC = { - # Objects/object.c - '_Py_abstract_hack': 'Py_ssize_t (*_Py_abstract_hack)(PyObject *)', - # Parser/myreadline.c - 'PyOS_InputHook': 'int (*PyOS_InputHook)(void)', - # Python/pylifecycle.c - '_PyOS_mystrnicmp_hack': 'int (*_PyOS_mystrnicmp_hack)(const char *, const char *, Py_ssize_t)', - # Parser/myreadline.c - 'PyOS_ReadlineFunctionPointer': 'char *(*PyOS_ReadlineFunctionPointer)(FILE *, FILE *, const char *)', - } -IMPLIED = { - # Objects/boolobject.c - '_Py_FalseStruct': 'static struct _longobject ', - '_Py_TrueStruct': 'static struct _longobject ', - # Modules/config.c - '_PyImport_Inittab': 'struct _inittab _PyImport_Inittab[]', - } -GLOBALS = {} -GLOBALS.update(MULTILINE) -GLOBALS.update(INLINE) -GLOBALS.update(FUNC) -GLOBALS.update(IMPLIED) - -LOCALS = { - 'buildinfo': ('Modules/getbuildinfo.c', - 'Py_GetBuildInfo', - 'static char buildinfo[50 + sizeof(GITVERSION) + ((sizeof(GITTAG) > sizeof(GITBRANCH)) ? sizeof(GITTAG) : sizeof(GITBRANCH))]'), - 'methods': ('Python/codecs.c', - '_PyCodecRegistry_Init', - 'static struct { char *name; PyMethodDef def; } methods[]'), - } - - -def _known(symbol): - if symbol.funcname: - if symbol.funcname != UNKNOWN or symbol.filename != UNKNOWN: - raise KeyError(symbol.name) - filename, funcname, decl = LOCALS[symbol.name] - varid = ID(filename, funcname, symbol.name) - elif not symbol.filename or symbol.filename == UNKNOWN: - raise KeyError(symbol.name) - else: - varid = symbol.id - try: - decl = GLOBALS[symbol.name] - except KeyError: - - if symbol.name.endswith('_methods'): - decl = 'static PyMethodDef ' - elif symbol.filename == 'Objects/exceptions.c' and symbol.name.startswith(('PyExc_', '_PyExc_')): - decl = 'static PyTypeObject ' - else: - raise - if symbol.name not in decl: - decl = decl + symbol.name - return Variable(varid, 'static', decl) - - -def known_row(varid, decl): - return ( - varid.filename, - varid.funcname or '-', - varid.name, - 'variable', - decl, - ) - - -def known_rows(symbols, *, - cached=True, - _get_filenames=iter_cpython_files, - _find_match=find_matching_variable, - _find_symbols=find_variables, - _as_known=known_row, - ): - filenames = list(_get_filenames()) - cache = {} - if cached: - for symbol in symbols: - try: - found = _known(symbol) - except KeyError: - found = _find_match(symbol, cache, filenames) - if found is None: - found = Variable(symbol.id, UNKNOWN, UNKNOWN) - yield _as_known(found.id, found.vartype) - else: - raise NotImplementedError # XXX incorporate KNOWN - for variable in _find_symbols(symbols, filenames, - srccache=cache, - parse_variable=_parse_global, - ): - #variable = variable._replace( - # filename=os.path.relpath(variable.filename, REPO_ROOT)) - if variable.funcname == UNKNOWN: - print(variable) - if variable.vartype== UNKNOWN: - print(variable) - yield _as_known(variable.id, variable.vartype) - - -def generate(symbols, filename=None, *, - _generate_rows=known_rows, - _write_tsv=write_tsv, - ): - if not filename: - filename = KNOWN_FILE + '.new' - - rows = _generate_rows(symbols) - _write_tsv(filename, KNOWN_HEADER, rows) - - -if __name__ == '__main__': - from c_symbols import binary - symbols = binary.iter_symbols( - binary.PYTHON, - find_local_symbol=None, - ) - generate(symbols) diff --git a/Tools/c-analyzer/c_analyzer_common/files.py b/Tools/c-analyzer/c_analyzer_common/files.py deleted file mode 100644 index b3cd16c..0000000 --- a/Tools/c-analyzer/c_analyzer_common/files.py +++ /dev/null @@ -1,138 +0,0 @@ -import glob -import os -import os.path - -from . import SOURCE_DIRS, REPO_ROOT - - -C_SOURCE_SUFFIXES = ('.c', '.h') - - -def _walk_tree(root, *, - _walk=os.walk, - ): - # A wrapper around os.walk that resolves the filenames. - for parent, _, names in _walk(root): - for name in names: - yield os.path.join(parent, name) - - -def walk_tree(root, *, - suffix=None, - walk=_walk_tree, - ): - """Yield each file in the tree under the given directory name. - - If "suffix" is provided then only files with that suffix will - be included. - """ - if suffix and not isinstance(suffix, str): - raise ValueError('suffix must be a string') - - for filename in walk(root): - if suffix and not filename.endswith(suffix): - continue - yield filename - - -def glob_tree(root, *, - suffix=None, - _glob=glob.iglob, - ): - """Yield each file in the tree under the given directory name. - - If "suffix" is provided then only files with that suffix will - be included. - """ - suffix = suffix or '' - if not isinstance(suffix, str): - raise ValueError('suffix must be a string') - - for filename in _glob(f'{root}/*{suffix}'): - yield filename - for filename in _glob(f'{root}/**/*{suffix}'): - yield filename - - -def iter_files(root, suffix=None, relparent=None, *, - get_files=os.walk, - _glob=glob_tree, - _walk=walk_tree, - ): - """Yield each file in the tree under the given directory name. - - If "root" is a non-string iterable then do the same for each of - those trees. - - If "suffix" is provided then only files with that suffix will - be included. - - if "relparent" is provided then it is used to resolve each - filename as a relative path. - """ - if not isinstance(root, str): - roots = root - for root in roots: - yield from iter_files(root, suffix, relparent, - get_files=get_files, - _glob=_glob, _walk=_walk) - return - - # Use the right "walk" function. - if get_files in (glob.glob, glob.iglob, glob_tree): - get_files = _glob - else: - _files = _walk_tree if get_files in (os.walk, walk_tree) else get_files - get_files = (lambda *a, **k: _walk(*a, walk=_files, **k)) - - # Handle a single suffix. - if suffix and not isinstance(suffix, str): - filenames = get_files(root) - suffix = tuple(suffix) - else: - filenames = get_files(root, suffix=suffix) - suffix = None - - for filename in filenames: - if suffix and not isinstance(suffix, str): # multiple suffixes - if not filename.endswith(suffix): - continue - if relparent: - filename = os.path.relpath(filename, relparent) - yield filename - - -def iter_files_by_suffix(root, suffixes, relparent=None, *, - walk=walk_tree, - _iter_files=iter_files, - ): - """Yield each file in the tree that has the given suffixes. - - Unlike iter_files(), the results are in the original suffix order. - """ - if isinstance(suffixes, str): - suffixes = [suffixes] - # XXX Ignore repeated suffixes? - for suffix in suffixes: - yield from _iter_files(root, suffix, relparent) - - -def iter_cpython_files(*, - walk=walk_tree, - _files=iter_files_by_suffix, - ): - """Yield each file in the tree for each of the given directory names.""" - excludedtrees = [ - os.path.join('Include', 'cpython', ''), - ] - def is_excluded(filename): - for root in excludedtrees: - if filename.startswith(root): - return True - return False - for filename in _files(SOURCE_DIRS, C_SOURCE_SUFFIXES, REPO_ROOT, - walk=walk, - ): - if is_excluded(filename): - continue - yield filename diff --git a/Tools/c-analyzer/c_analyzer_common/info.py b/Tools/c-analyzer/c_analyzer_common/info.py deleted file mode 100644 index e217380..0000000 --- a/Tools/c-analyzer/c_analyzer_common/info.py +++ /dev/null @@ -1,69 +0,0 @@ -from collections import namedtuple -import re - -from .util import classonly, _NTBase - - -UNKNOWN = '???' - -NAME_RE = re.compile(r'^([a-zA-Z]|_\w*[a-zA-Z]\w*|[a-zA-Z]\w*)$') - - -class ID(_NTBase, namedtuple('ID', 'filename funcname name')): - """A unique ID for a single symbol or declaration.""" - - __slots__ = () - # XXX Add optional conditions (tuple of strings) field. - #conditions = Slot() - - @classonly - def from_raw(cls, raw): - if not raw: - return None - if isinstance(raw, str): - return cls(None, None, raw) - try: - name, = raw - filename = None - except ValueError: - try: - filename, name = raw - except ValueError: - return super().from_raw(raw) - return cls(filename, None, name) - - def __new__(cls, filename, funcname, name): - self = super().__new__( - cls, - filename=str(filename) if filename else None, - funcname=str(funcname) if funcname else None, - name=str(name) if name else None, - ) - #cls.conditions.set(self, tuple(str(s) if s else None - # for s in conditions or ())) - return self - - def validate(self): - """Fail if the object is invalid (i.e. init with bad data).""" - if not self.name: - raise TypeError('missing name') - else: - if not NAME_RE.match(self.name): - raise ValueError( - f'name must be an identifier, got {self.name!r}') - - # Symbols from a binary might not have filename/funcname info. - - if self.funcname: - if not self.filename: - raise TypeError('missing filename') - if not NAME_RE.match(self.funcname) and self.funcname != UNKNOWN: - raise ValueError( - f'name must be an identifier, got {self.funcname!r}') - - # XXX Require the filename (at least UNKONWN)? - # XXX Check the filename? - - @property - def islocal(self): - return self.funcname is not None diff --git a/Tools/c-analyzer/c_analyzer_common/known.py b/Tools/c-analyzer/c_analyzer_common/known.py deleted file mode 100644 index dec1e1d..0000000 --- a/Tools/c-analyzer/c_analyzer_common/known.py +++ /dev/null @@ -1,74 +0,0 @@ -import csv -import os.path - -from c_parser.info import Variable - -from . import DATA_DIR -from .info import ID, UNKNOWN -from .util import read_tsv - - -DATA_FILE = os.path.join(DATA_DIR, 'known.tsv') - -COLUMNS = ('filename', 'funcname', 'name', 'kind', 'declaration') -HEADER = '\t'.join(COLUMNS) - - -# XXX need tests: -# * from_file() - -def from_file(infile, *, - _read_tsv=read_tsv, - ): - """Return the info for known declarations in the given file.""" - known = { - 'variables': {}, - #'types': {}, - #'constants': {}, - #'macros': {}, - } - for row in _read_tsv(infile, HEADER): - filename, funcname, name, kind, declaration = row - if not funcname or funcname == '-': - funcname = None - id = ID(filename, funcname, name) - if kind == 'variable': - values = known['variables'] - if funcname: - storage = _get_storage(declaration) or 'local' - else: - storage = _get_storage(declaration) or 'implicit' - value = Variable(id, storage, declaration) - else: - raise ValueError(f'unsupported kind in row {row}') - value.validate() -# if value.name == 'id' and declaration == UNKNOWN: -# # None of these are variables. -# declaration = 'int id'; -# else: -# value.validate() - values[id] = value - return known - - -def _get_storage(decl): - # statics - if decl.startswith('static '): - return 'static' - if decl.startswith(('Py_LOCAL(', 'Py_LOCAL_INLINE(')): - return 'static' - if decl.startswith(('_Py_IDENTIFIER(', '_Py_static_string(')): - return 'static' - if decl.startswith('PyDoc_VAR('): - return 'static' - if decl.startswith(('SLOT1BINFULL(', 'SLOT1BIN(')): - return 'static' - if decl.startswith('WRAP_METHOD('): - return 'static' - # public extern - if decl.startswith('extern '): - return 'extern' - if decl.startswith('PyAPI_DATA('): - return 'extern' - # implicit or local - return None diff --git a/Tools/c-analyzer/c_analyzer_common/util.py b/Tools/c-analyzer/c_analyzer_common/util.py deleted file mode 100644 index 43d0bb6..0000000 --- a/Tools/c-analyzer/c_analyzer_common/util.py +++ /dev/null @@ -1,243 +0,0 @@ -import csv -import subprocess - - -_NOT_SET = object() - - -def run_cmd(argv, **kwargs): - proc = subprocess.run( - argv, - #capture_output=True, - #stderr=subprocess.STDOUT, - stdout=subprocess.PIPE, - text=True, - check=True, - **kwargs - ) - return proc.stdout - - -def read_tsv(infile, header, *, - _open=open, - _get_reader=csv.reader, - ): - """Yield each row of the given TSV (tab-separated) file.""" - if isinstance(infile, str): - with _open(infile, newline='') as infile: - yield from read_tsv(infile, header, - _open=_open, - _get_reader=_get_reader, - ) - return - lines = iter(infile) - - # Validate the header. - try: - actualheader = next(lines).strip() - except StopIteration: - actualheader = '' - if actualheader != header: - raise ValueError(f'bad header {actualheader!r}') - - for row in _get_reader(lines, delimiter='\t'): - yield tuple(v.strip() for v in row) - - -def write_tsv(outfile, header, rows, *, - _open=open, - _get_writer=csv.writer, - ): - """Write each of the rows to the given TSV (tab-separated) file.""" - if isinstance(outfile, str): - with _open(outfile, 'w', newline='') as outfile: - return write_tsv(outfile, header, rows, - _open=_open, - _get_writer=_get_writer, - ) - - if isinstance(header, str): - header = header.split('\t') - writer = _get_writer(outfile, delimiter='\t') - writer.writerow(header) - for row in rows: - writer.writerow('' if v is None else str(v) - for v in row) - - -class Slot: - """A descriptor that provides a slot. - - This is useful for types that can't have slots via __slots__, - e.g. tuple subclasses. - """ - - __slots__ = ('initial', 'default', 'readonly', 'instances', 'name') - - def __init__(self, initial=_NOT_SET, *, - default=_NOT_SET, - readonly=False, - ): - self.initial = initial - self.default = default - self.readonly = readonly - - # The instance cache is not inherently tied to the normal - # lifetime of the instances. So must do something in order to - # avoid keeping the instances alive by holding a reference here. - # Ideally we would use weakref.WeakValueDictionary to do this. - # However, most builtin types do not support weakrefs. So - # instead we monkey-patch __del__ on the attached class to clear - # the instance. - self.instances = {} - self.name = None - - def __set_name__(self, cls, name): - if self.name is not None: - raise TypeError('already used') - self.name = name - try: - slotnames = cls.__slot_names__ - except AttributeError: - slotnames = cls.__slot_names__ = [] - slotnames.append(name) - self._ensure___del__(cls, slotnames) - - def __get__(self, obj, cls): - if obj is None: # called on the class - return self - try: - value = self.instances[id(obj)] - except KeyError: - if self.initial is _NOT_SET: - value = self.default - else: - value = self.initial - self.instances[id(obj)] = value - if value is _NOT_SET: - raise AttributeError(self.name) - # XXX Optionally make a copy? - return value - - def __set__(self, obj, value): - if self.readonly: - raise AttributeError(f'{self.name} is readonly') - # XXX Optionally coerce? - self.instances[id(obj)] = value - - def __delete__(self, obj): - if self.readonly: - raise AttributeError(f'{self.name} is readonly') - self.instances[id(obj)] = self.default # XXX refleak? - - def _ensure___del__(self, cls, slotnames): # See the comment in __init__(). - try: - old___del__ = cls.__del__ - except AttributeError: - old___del__ = (lambda s: None) - else: - if getattr(old___del__, '_slotted', False): - return - - def __del__(_self): - for name in slotnames: - delattr(_self, name) - old___del__(_self) - __del__._slotted = True - cls.__del__ = __del__ - - def set(self, obj, value): - """Update the cached value for an object. - - This works even if the descriptor is read-only. This is - particularly useful when initializing the object (e.g. in - its __new__ or __init__). - """ - self.instances[id(obj)] = value - - -class classonly: - """A non-data descriptor that makes a value only visible on the class. - - This is like the "classmethod" builtin, but does not show up on - instances of the class. It may be used as a decorator. - """ - - def __init__(self, value): - self.value = value - self.getter = classmethod(value).__get__ - self.name = None - - def __set_name__(self, cls, name): - if self.name is not None: - raise TypeError('already used') - self.name = name - - def __get__(self, obj, cls): - if obj is not None: - raise AttributeError(self.name) - # called on the class - return self.getter(None, cls) - - -class _NTBase: - - __slots__ = () - - @classonly - def from_raw(cls, raw): - if not raw: - return None - elif isinstance(raw, cls): - return raw - elif isinstance(raw, str): - return cls.from_string(raw) - else: - if hasattr(raw, 'items'): - return cls(**raw) - try: - args = tuple(raw) - except TypeError: - pass - else: - return cls(*args) - raise NotImplementedError - - @classonly - def from_string(cls, value): - """Return a new instance based on the given string.""" - raise NotImplementedError - - @classmethod - def _make(cls, iterable): # The default _make() is not subclass-friendly. - return cls.__new__(cls, *iterable) - - # XXX Always validate? - #def __init__(self, *args, **kwargs): - # self.validate() - - # XXX The default __repr__() is not subclass-friendly (where the name changes). - #def __repr__(self): - # _, _, sig = super().__repr__().partition('(') - # return f'{self.__class__.__name__}({sig}' - - # To make sorting work with None: - def __lt__(self, other): - try: - return super().__lt__(other) - except TypeError: - if None in self: - return True - elif None in other: - return False - else: - raise - - def validate(self): - return - - # XXX Always validate? - #def _replace(self, **kwargs): - # obj = super()._replace(**kwargs) - # obj.validate() - # return obj diff --git a/Tools/c-analyzer/c_globals/README b/Tools/c-analyzer/c_globals/README deleted file mode 100644 index 772b8be..0000000 --- a/Tools/c-analyzer/c_globals/README +++ /dev/null @@ -1,72 +0,0 @@ -####################################### -# C Globals and CPython Runtime State. - -CPython's C code makes extensive use of global variables (whether static -globals or static locals). Each such variable falls into one of several -categories: - -* strictly const data -* used exclusively in main or in the REPL -* process-global state (e.g. managing process-level resources - like signals and file descriptors) -* Python "global" runtime state -* per-interpreter runtime state - -The last one can be a problem as soon as anyone creates a second -interpreter (AKA "subinterpreter") in a process. It is definitely a -problem under subinterpreters if they are no longer sharing the GIL, -since the GIL protects us from a lot of race conditions. Keep in mind -that ultimately *all* objects (PyObject) should be treated as -per-interpreter state. This includes "static types", freelists, -_PyIdentifier, and singletons. Take that in for a second. It has -significant implications on where we use static variables! - -Be aware that module-global state (stored in C statics) is a kind of -per-interpreter state. There have been efforts across many years, and -still going, to provide extension module authors mechanisms to store -that state safely (see PEPs 3121, 489, etc.). - -(Note that there has been discussion around support for running multiple -Python runtimes in the same process. That would ends up with the same -problems, relative to static variables, that subinterpreters have.) - -Historically we have been bad at keeping per-interpreter state out of -static variables, mostly because until recently subinterpreters were -not widely used nor even factored in to solutions. However, the -feature is growing in popularity and use in the community. - -Mandate: "Eliminate use of static variables for per-interpreter state." - -The "c-statics.py" script in this directory, along with its accompanying -data files, are part of the effort to resolve existing problems with -our use of static variables and to prevent future problems. - -#------------------------- -## statics for actually-global state (and runtime state consolidation) - -In general, holding any kind of state in static variables -increases maintenance burden and increases the complexity of code (e.g. -we use TSS to identify the active thread state). So it is a good idea -to avoid using statics for state even if for the "global" runtime or -for process-global state. - -Relative to maintenance burden, one problem is where the runtime -state is spread throughout the codebase in dozens of individual -globals. Unlike the other globals, the runtime state represents a set -of values that are constantly shifting in a complex way. When they are -spread out it's harder to get a clear picture of what the runtime -involves. Furthermore, when they are spread out it complicates efforts -that change the runtime. - -Consequently, the globals for Python's runtime state have been -consolidated under a single top-level _PyRuntime global. No new globals -should be added for runtime state. Instead, they should be added to -_PyRuntimeState or one of its sub-structs. The tools in this directory -are run as part of the test suite to ensure that no new globals have -been added. The script can be run manually as well: - - ./python Lib/test/test_c_statics/c-statics.py check - -If it reports any globals then they should be resolved. If the globals -are runtime state then they should be folded into _PyRuntimeState. -Otherwise they should be marked as ignored. diff --git a/Tools/c-analyzer/c_globals/__init__.py b/Tools/c-analyzer/c_globals/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/Tools/c-analyzer/c_globals/__main__.py b/Tools/c-analyzer/c_globals/__main__.py deleted file mode 100644 index 9570fb6..0000000 --- a/Tools/c-analyzer/c_globals/__main__.py +++ /dev/null @@ -1,209 +0,0 @@ -import argparse -import os.path -import re -import sys - -from c_analyzer_common import SOURCE_DIRS, REPO_ROOT -from c_analyzer_common.info import UNKNOWN -from c_analyzer_common.known import ( - from_file as known_from_file, - DATA_FILE as KNOWN_FILE, - ) -from . import find, show -from .supported import is_supported, ignored_from_file, IGNORED_FILE, _is_object - - -def _match_unused_global(variable, knownvars, used): - found = [] - for varid in knownvars: - if varid in used: - continue - if varid.funcname is not None: - continue - if varid.name != variable.name: - continue - if variable.filename and variable.filename != UNKNOWN: - if variable.filename == varid.filename: - found.append(varid) - else: - found.append(varid) - return found - - -def _check_results(unknown, knownvars, used): - badknown = set() - for variable in sorted(unknown): - msg = None - if variable.funcname != UNKNOWN: - msg = f'could not find global symbol {variable.id}' - elif m := _match_unused_global(variable, knownvars, used): - assert isinstance(m, list) - badknown.update(m) - elif variable.name in ('completed', 'id'): # XXX Figure out where these variables are. - unknown.remove(variable) - else: - msg = f'could not find local symbol {variable.id}' - if msg: - #raise Exception(msg) - print(msg) - if badknown: - print('---') - print(f'{len(badknown)} globals in known.tsv, but may actually be local:') - for varid in sorted(badknown): - print(f'{varid.filename:30} {varid.name}') - unused = sorted(varid - for varid in set(knownvars) - used - if varid.name != 'id') # XXX Figure out where these variables are. - if unused: - print('---') - print(f'did not use {len(unused)} known vars:') - for varid in unused: - print(f'{varid.filename:30} {varid.funcname or "-":20} {varid.name}') - raise Exception('not all known symbols used') - if unknown: - print('---') - raise Exception('could not find all symbols') - - -def _find_globals(dirnames, known, ignored): - if dirnames == SOURCE_DIRS: - dirnames = [os.path.relpath(d, REPO_ROOT) for d in dirnames] - - ignored = ignored_from_file(ignored) - known = known_from_file(known) - - used = set() - unknown = set() - knownvars = (known or {}).get('variables') - for variable in find.globals_from_binary(knownvars=knownvars, - dirnames=dirnames): - #for variable in find.globals(dirnames, known, kind='platform'): - if variable.vartype == UNKNOWN: - unknown.add(variable) - continue - yield variable, is_supported(variable, ignored, known) - used.add(variable.id) - - #_check_results(unknown, knownvars, used) - - -def cmd_check(cmd, dirs=SOURCE_DIRS, *, - ignored=IGNORED_FILE, - known=KNOWN_FILE, - _find=_find_globals, - _show=show.basic, - _print=print, - ): - """ - Fail if there are unsupported globals variables. - - In the failure case, the list of unsupported variables - will be printed out. - """ - unsupported = [v for v, s in _find(dirs, known, ignored) if not s] - if not unsupported: - #_print('okay') - return - - _print('ERROR: found unsupported global variables') - _print() - _show(sorted(unsupported)) - _print(f' ({len(unsupported)} total)') - sys.exit(1) - - -def cmd_show(cmd, dirs=SOURCE_DIRS, *, - ignored=IGNORED_FILE, - known=KNOWN_FILE, - skip_objects=False, - _find=_find_globals, - _show=show.basic, - _print=print, - ): - """ - Print out the list of found global variables. - - The variables will be distinguished as "supported" or "unsupported". - """ - allsupported = [] - allunsupported = [] - for found, supported in _find(dirs, known, ignored): - if skip_objects: # XXX Support proper filters instead. - if _is_object(found.vartype): - continue - (allsupported if supported else allunsupported - ).append(found) - - _print('supported:') - _print('----------') - _show(sorted(allsupported)) - _print(f' ({len(allsupported)} total)') - _print() - _print('unsupported:') - _print('------------') - _show(sorted(allunsupported)) - _print(f' ({len(allunsupported)} total)') - - -############################# -# the script - -COMMANDS = { - 'check': cmd_check, - 'show': cmd_show, - } - -PROG = sys.argv[0] -PROG = 'c-globals.py' - - -def parse_args(prog=PROG, argv=sys.argv[1:], *, _fail=None): - common = argparse.ArgumentParser(add_help=False) - common.add_argument('--ignored', metavar='FILE', - default=IGNORED_FILE, - help='path to file that lists ignored vars') - common.add_argument('--known', metavar='FILE', - default=KNOWN_FILE, - help='path to file that lists known types') - common.add_argument('dirs', metavar='DIR', nargs='*', - default=SOURCE_DIRS, - help='a directory to check') - - parser = argparse.ArgumentParser( - prog=prog, - ) - subs = parser.add_subparsers(dest='cmd') - - check = subs.add_parser('check', parents=[common]) - - show = subs.add_parser('show', parents=[common]) - show.add_argument('--skip-objects', action='store_true') - - if _fail is None: - def _fail(msg): - parser.error(msg) - - # Now parse the args. - args = parser.parse_args(argv) - ns = vars(args) - - cmd = ns.pop('cmd') - if not cmd: - _fail('missing command') - - return cmd, ns - - -def main(cmd, cmdkwargs=None, *, _COMMANDS=COMMANDS): - try: - cmdfunc = _COMMANDS[cmd] - except KeyError: - raise ValueError( - f'unsupported cmd {cmd!r}' if cmd else 'missing cmd') - - cmdfunc(cmd, **cmdkwargs or {}) - - -if __name__ == '__main__': - cmd, cmdkwargs = parse_args() - main(cmd, cmdkwargs) diff --git a/Tools/c-analyzer/c_globals/find.py b/Tools/c-analyzer/c_globals/find.py deleted file mode 100644 index a51b947..0000000 --- a/Tools/c-analyzer/c_globals/find.py +++ /dev/null @@ -1,95 +0,0 @@ -from c_analyzer_common import SOURCE_DIRS -from c_analyzer_common.info import UNKNOWN -from c_symbols import ( - info as s_info, - binary as b_symbols, - source as s_symbols, - resolve, - ) -from c_parser import info, declarations - - -# XXX needs tests: -# * iter_variables - -def globals_from_binary(binfile=b_symbols.PYTHON, *, - knownvars=None, - dirnames=None, - _iter_symbols=b_symbols.iter_symbols, - _resolve=resolve.symbols_to_variables, - _get_symbol_resolver=resolve.get_resolver, - ): - """Yield a Variable for each found Symbol. - - Details are filled in from the given "known" variables and types. - """ - symbols = _iter_symbols(binfile, find_local_symbol=None) - #symbols = list(symbols) - for variable in _resolve(symbols, - resolve=_get_symbol_resolver(knownvars, dirnames), - ): - # Skip each non-global variable (unless we couldn't find it). - # XXX Drop the "UNKNOWN" condition? - if not variable.isglobal and variable.vartype != UNKNOWN: - continue - yield variable - - -def globals_from_declarations(dirnames=SOURCE_DIRS, *, - known=None, - ): - """Yield a Variable for each found declaration. - - Details are filled in from the given "known" variables and types. - """ - raise NotImplementedError - - -def iter_variables(kind='platform', *, - known=None, - dirnames=None, - _resolve_symbols=resolve.symbols_to_variables, - _get_symbol_resolver=resolve.get_resolver, - _symbols_from_binary=b_symbols.iter_symbols, - _symbols_from_source=s_symbols.iter_symbols, - _iter_raw=declarations.iter_all, - _iter_preprocessed=declarations.iter_preprocessed, - ): - """Yield a Variable for each one found (e.g. in files).""" - kind = kind or 'platform' - - if kind == 'symbols': - knownvars = (known or {}).get('variables') - yield from _resolve_symbols( - _symbols_from_source(dirnames, known), - resolve=_get_symbol_resolver(knownvars, dirnames), - ) - elif kind == 'platform': - knownvars = (known or {}).get('variables') - yield from _resolve_symbols( - _symbols_from_binary(find_local_symbol=None), - resolve=_get_symbol_resolver(knownvars, dirnames), - ) - elif kind == 'declarations': - for decl in _iter_raw(dirnames): - if not isinstance(decl, info.Variable): - continue - yield decl - elif kind == 'preprocessed': - for decl in _iter_preprocessed(dirnames): - if not isinstance(decl, info.Variable): - continue - yield decl - else: - raise ValueError(f'unsupported kind {kind!r}') - - -def globals(dirnames, known, *, - kind=None, # Use the default. - _iter_variables=iter_variables, - ): - """Return a list of (StaticVar, ) for each found global var.""" - for found in _iter_variables(kind, known=known, dirnames=dirnames): - if not found.isglobal: - continue - yield found diff --git a/Tools/c-analyzer/c_globals/show.py b/Tools/c-analyzer/c_globals/show.py deleted file mode 100644 index f4298b1..0000000 --- a/Tools/c-analyzer/c_globals/show.py +++ /dev/null @@ -1,16 +0,0 @@ - -def basic(globals, *, - _print=print): - """Print each row simply.""" - for variable in globals: - if variable.funcname: - line = f'{variable.filename}:{variable.funcname}():{variable.name}' - else: - line = f'{variable.filename}:{variable.name}' - vartype = variable.vartype - #if vartype.startswith('static '): - # vartype = vartype.partition(' ')[2] - #else: - # vartype = '=' + vartype - line = f'{line:<64} {vartype}' - _print(line) diff --git a/Tools/c-analyzer/c_globals/supported.py b/Tools/c-analyzer/c_globals/supported.py deleted file mode 100644 index d185daa..0000000 --- a/Tools/c-analyzer/c_globals/supported.py +++ /dev/null @@ -1,393 +0,0 @@ -import os.path -import re - -from c_analyzer_common import DATA_DIR -from c_analyzer_common.info import ID -from c_analyzer_common.util import read_tsv, write_tsv - - -IGNORED_FILE = os.path.join(DATA_DIR, 'ignored.tsv') - -IGNORED_COLUMNS = ('filename', 'funcname', 'name', 'kind', 'reason') -IGNORED_HEADER = '\t'.join(IGNORED_COLUMNS) - -# XXX Move these to ignored.tsv. -IGNORED = { - # global - 'PyImport_FrozenModules': 'process-global', - 'M___hello__': 'process-global', - 'inittab_copy': 'process-global', - 'PyHash_Func': 'process-global', - '_Py_HashSecret_Initialized': 'process-global', - '_TARGET_LOCALES': 'process-global', - - # startup (only changed before/during) - '_PyRuntime': 'runtime startup', - 'runtime_initialized': 'runtime startup', - 'static_arg_parsers': 'runtime startup', - 'orig_argv': 'runtime startup', - 'opt_ptr': 'runtime startup', - '_preinit_warnoptions': 'runtime startup', - '_Py_StandardStreamEncoding': 'runtime startup', - 'Py_FileSystemDefaultEncoding': 'runtime startup', - '_Py_StandardStreamErrors': 'runtime startup', - 'Py_FileSystemDefaultEncodeErrors': 'runtime startup', - 'Py_BytesWarningFlag': 'runtime startup', - 'Py_DebugFlag': 'runtime startup', - 'Py_DontWriteBytecodeFlag': 'runtime startup', - 'Py_FrozenFlag': 'runtime startup', - 'Py_HashRandomizationFlag': 'runtime startup', - 'Py_IgnoreEnvironmentFlag': 'runtime startup', - 'Py_InspectFlag': 'runtime startup', - 'Py_InteractiveFlag': 'runtime startup', - 'Py_IsolatedFlag': 'runtime startup', - 'Py_NoSiteFlag': 'runtime startup', - 'Py_NoUserSiteDirectory': 'runtime startup', - 'Py_OptimizeFlag': 'runtime startup', - 'Py_QuietFlag': 'runtime startup', - 'Py_UTF8Mode': 'runtime startup', - 'Py_UnbufferedStdioFlag': 'runtime startup', - 'Py_VerboseFlag': 'runtime startup', - '_Py_path_config': 'runtime startup', - '_PyOS_optarg': 'runtime startup', - '_PyOS_opterr': 'runtime startup', - '_PyOS_optind': 'runtime startup', - '_Py_HashSecret': 'runtime startup', - - # REPL - '_PyOS_ReadlineLock': 'repl', - '_PyOS_ReadlineTState': 'repl', - - # effectively const - 'tracemalloc_empty_traceback': 'const', - '_empty_bitmap_node': 'const', - 'posix_constants_pathconf': 'const', - 'posix_constants_confstr': 'const', - 'posix_constants_sysconf': 'const', - '_PySys_ImplCacheTag': 'const', - '_PySys_ImplName': 'const', - 'PyImport_Inittab': 'const', - '_PyImport_DynLoadFiletab': 'const', - '_PyParser_Grammar': 'const', - 'Py_hexdigits': 'const', - '_PyImport_Inittab': 'const', - '_PyByteArray_empty_string': 'const', - '_PyLong_DigitValue': 'const', - '_Py_SwappedOp': 'const', - 'PyStructSequence_UnnamedField': 'const', - - # signals are main-thread only - 'faulthandler_handlers': 'signals are main-thread only', - 'user_signals': 'signals are main-thread only', - 'wakeup': 'signals are main-thread only', - - # hacks - '_PySet_Dummy': 'only used as a placeholder', - } - -BENIGN = 'races here are benign and unlikely' - - -def is_supported(variable, ignored=None, known=None, *, - _ignored=(lambda *a, **k: _is_ignored(*a, **k)), - _vartype_okay=(lambda *a, **k: _is_vartype_okay(*a, **k)), - ): - """Return True if the given global variable is okay in CPython.""" - if _ignored(variable, - ignored and ignored.get('variables')): - return True - elif _vartype_okay(variable.vartype, - ignored.get('types')): - return True - else: - return False - - -def _is_ignored(variable, ignoredvars=None, *, - _IGNORED=IGNORED, - ): - """Return the reason if the variable is a supported global. - - Return None if the variable is not a supported global. - """ - if ignoredvars and (reason := ignoredvars.get(variable.id)): - return reason - - if variable.funcname is None: - if reason := _IGNORED.get(variable.name): - return reason - - # compiler - if variable.filename == 'Python/graminit.c': - if variable.vartype.startswith('static state '): - return 'compiler' - if variable.filename == 'Python/symtable.c': - if variable.vartype.startswith('static identifier '): - return 'compiler' - if variable.filename == 'Python/Python-ast.c': - # These should be const. - if variable.name.endswith('_field'): - return 'compiler' - if variable.name.endswith('_attribute'): - return 'compiler' - - # other - if variable.filename == 'Python/dtoa.c': - # guarded by lock? - if variable.name in ('p5s', 'freelist'): - return 'dtoa is thread-safe?' - if variable.name in ('private_mem', 'pmem_next'): - return 'dtoa is thread-safe?' - if variable.filename == 'Python/thread.c': - # Threads do not become an issue until after these have been set - # and these never get changed after that. - if variable.name in ('initialized', 'thread_debug'): - return 'thread-safe' - if variable.filename == 'Python/getversion.c': - if variable.name == 'version': - # Races are benign here, as well as unlikely. - return BENIGN - if variable.filename == 'Python/fileutils.c': - if variable.name == 'force_ascii': - return BENIGN - if variable.name == 'ioctl_works': - return BENIGN - if variable.name == '_Py_open_cloexec_works': - return BENIGN - if variable.filename == 'Python/codecs.c': - if variable.name == 'ucnhash_CAPI': - return BENIGN - if variable.filename == 'Python/bootstrap_hash.c': - if variable.name == 'getrandom_works': - return BENIGN - if variable.filename == 'Objects/unicodeobject.c': - if variable.name == 'ucnhash_CAPI': - return BENIGN - if variable.name == 'bloom_linebreak': - # *mostly* benign - return BENIGN - if variable.filename == 'Modules/getbuildinfo.c': - if variable.name == 'buildinfo': - # The static is used for pre-allocation. - return BENIGN - if variable.filename == 'Modules/posixmodule.c': - if variable.name == 'ticks_per_second': - return BENIGN - if variable.name == 'dup3_works': - return BENIGN - if variable.filename == 'Modules/timemodule.c': - if variable.name == 'ticks_per_second': - return BENIGN - if variable.filename == 'Objects/longobject.c': - if variable.name == 'log_base_BASE': - return BENIGN - if variable.name == 'convwidth_base': - return BENIGN - if variable.name == 'convmultmax_base': - return BENIGN - - return None - - -def _is_vartype_okay(vartype, ignoredtypes=None): - if _is_object(vartype): - return None - - if vartype.startswith('static const '): - return 'const' - if vartype.startswith('const '): - return 'const' - - # components for TypeObject definitions - for name in ('PyMethodDef', 'PyGetSetDef', 'PyMemberDef'): - if name in vartype: - return 'const' - for name in ('PyNumberMethods', 'PySequenceMethods', 'PyMappingMethods', - 'PyBufferProcs', 'PyAsyncMethods'): - if name in vartype: - return 'const' - for name in ('slotdef', 'newfunc'): - if name in vartype: - return 'const' - - # structseq - for name in ('PyStructSequence_Desc', 'PyStructSequence_Field'): - if name in vartype: - return 'const' - - # other definiitions - if 'PyModuleDef' in vartype: - return 'const' - - # thread-safe - if '_Py_atomic_int' in vartype: - return 'thread-safe' - if 'pthread_condattr_t' in vartype: - return 'thread-safe' - - # startup - if '_Py_PreInitEntry' in vartype: - return 'startup' - - # global -# if 'PyMemAllocatorEx' in vartype: -# return True - - # others -# if 'PyThread_type_lock' in vartype: -# return True - - # XXX ??? - # _Py_tss_t - # _Py_hashtable_t - # stack_t - # _PyUnicode_Name_CAPI - - # functions - if '(' in vartype and '[' not in vartype: - return 'function pointer' - - # XXX finish! - # * allow const values? - #raise NotImplementedError - return None - - -PYOBJECT_RE = re.compile(r''' - ^ - ( - # must start with "static " - static \s+ - ( - identifier - ) - \b - ) | - ( - # may start with "static " - ( static \s+ )? - ( - .* - ( - PyObject | - PyTypeObject | - _? Py \w+ Object | - _PyArg_Parser | - _Py_Identifier | - traceback_t | - PyAsyncGenASend | - _PyAsyncGenWrappedValue | - PyContext | - method_cache_entry - ) - \b - ) | - ( - ( - _Py_IDENTIFIER | - _Py_static_string - ) - [(] - ) - ) - ''', re.VERBOSE) - - -def _is_object(vartype): - if 'PyDictKeysObject' in vartype: - return False - if PYOBJECT_RE.match(vartype): - return True - if vartype.endswith((' _Py_FalseStruct', ' _Py_TrueStruct')): - return True - - # XXX Add more? - - #for part in vartype.split(): - # # XXX const is automatic True? - # if part == 'PyObject' or part.startswith('PyObject['): - # return True - return False - - -def ignored_from_file(infile, *, - _read_tsv=read_tsv, - ): - """Yield a Variable for each ignored var in the file.""" - ignored = { - 'variables': {}, - #'types': {}, - #'constants': {}, - #'macros': {}, - } - for row in _read_tsv(infile, IGNORED_HEADER): - filename, funcname, name, kind, reason = row - if not funcname or funcname == '-': - funcname = None - id = ID(filename, funcname, name) - if kind == 'variable': - values = ignored['variables'] - else: - raise ValueError(f'unsupported kind in row {row}') - values[id] = reason - return ignored - - -################################## -# generate - -def _get_row(varid, reason): - return ( - varid.filename, - varid.funcname or '-', - varid.name, - 'variable', - str(reason), - ) - - -def _get_rows(variables, ignored=None, *, - _as_row=_get_row, - _is_ignored=_is_ignored, - _vartype_okay=_is_vartype_okay, - ): - count = 0 - for variable in variables: - reason = _is_ignored(variable, - ignored and ignored.get('variables'), - ) - if not reason: - reason = _vartype_okay(variable.vartype, - ignored and ignored.get('types')) - if not reason: - continue - - print(' ', variable, repr(reason)) - yield _as_row(variable.id, reason) - count += 1 - print(f'total: {count}') - - -def _generate_ignored_file(variables, filename=None, *, - _generate_rows=_get_rows, - _write_tsv=write_tsv, - ): - if not filename: - filename = IGNORED_FILE + '.new' - rows = _generate_rows(variables) - _write_tsv(filename, IGNORED_HEADER, rows) - - -if __name__ == '__main__': - from c_analyzer_common import SOURCE_DIRS - from c_analyzer_common.known import ( - from_file as known_from_file, - DATA_FILE as KNOWN_FILE, - ) - from . import find - known = known_from_file(KNOWN_FILE) - knownvars = (known or {}).get('variables') - variables = find.globals_from_binary(knownvars=knownvars, - dirnames=SOURCE_DIRS) - - _generate_ignored_file(variables) diff --git a/Tools/c-analyzer/c_parser/__init__.py b/Tools/c-analyzer/c_parser/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/Tools/c-analyzer/c_parser/declarations.py b/Tools/c-analyzer/c_parser/declarations.py deleted file mode 100644 index 19fa3ff..0000000 --- a/Tools/c-analyzer/c_parser/declarations.py +++ /dev/null @@ -1,295 +0,0 @@ -import re -import shlex -import subprocess - -from . import source - - -IDENTIFIER = r'(?:[a-zA-z]|_+[a-zA-Z0-9]\w*)' - -TYPE_QUAL = r'(?:const|volatile)' - -VAR_TYPE_SPEC = r'''(?: - void | - (?: - (?:(?:un)?signed\s+)? - (?: - char | - short | - int | - long | - long\s+int | - long\s+long - ) | - ) | - float | - double | - {IDENTIFIER} | - (?:struct|union)\s+{IDENTIFIER} - )''' - -POINTER = rf'''(?: - (?:\s+const)?\s*[*] - )''' - -#STRUCT = r'''(?: -# (?:struct|(struct\s+%s))\s*[{] -# [^}]* -# [}] -# )''' % (IDENTIFIER) -#UNION = r'''(?: -# (?:union|(union\s+%s))\s*[{] -# [^}]* -# [}] -# )''' % (IDENTIFIER) -#DECL_SPEC = rf'''(?: -# ({VAR_TYPE_SPEC}) | -# ({STRUCT}) | -# ({UNION}) -# )''' - -FUNC_START = rf'''(?: - (?: - (?: - extern | - static | - static\s+inline - )\s+ - )? - #(?:const\s+)? - {VAR_TYPE_SPEC} - )''' -#GLOBAL_VAR_START = rf'''(?: -# (?: -# (?: -# extern | -# static -# )\s+ -# )? -# (?: -# {TYPE_QUAL} -# (?:\s+{TYPE_QUAL})? -# )?\s+ -# {VAR_TYPE_SPEC} -# )''' -GLOBAL_DECL_START_RE = re.compile(rf''' - ^ - (?: - ({FUNC_START}) - ) - ''', re.VERBOSE) - -LOCAL_VAR_START = rf'''(?: - (?: - (?: - register | - static - )\s+ - )? - (?: - (?: - {TYPE_QUAL} - (?:\s+{TYPE_QUAL})? - )\s+ - )? - {VAR_TYPE_SPEC} - {POINTER}? - )''' -LOCAL_STMT_START_RE = re.compile(rf''' - ^ - (?: - ({LOCAL_VAR_START}) - ) - ''', re.VERBOSE) - - -def iter_global_declarations(lines): - """Yield (decl, body) for each global declaration in the given lines. - - For function definitions the header is reduced to one line and - the body is provided as-is. For other compound declarations (e.g. - struct) the entire declaration is reduced to one line and "body" - is None. Likewise for simple declarations (e.g. variables). - - Declarations inside function bodies are ignored, though their text - is provided in the function body. - """ - # XXX Bail out upon bogus syntax. - lines = source.iter_clean_lines(lines) - for line in lines: - if not GLOBAL_DECL_START_RE.match(line): - continue - # We only need functions here, since we only need locals for now. - if line.endswith(';'): - continue - if line.endswith('{') and '(' not in line: - continue - - # Capture the function. - # (assume no func is a one-liner) - decl = line - while '{' not in line: # assume no inline structs, etc. - try: - line = next(lines) - except StopIteration: - return - decl += ' ' + line - - body, end = _extract_block(lines) - if end is None: - return - assert end == '}' - yield (f'{decl}\n{body}\n{end}', body) - - -def iter_local_statements(lines): - """Yield (lines, blocks) for each statement in the given lines. - - For simple statements, "blocks" is None and the statement is reduced - to a single line. For compound statements, "blocks" is a pair of - (header, body) for each block in the statement. The headers are - reduced to a single line each, but the bpdies are provided as-is. - """ - # XXX Bail out upon bogus syntax. - lines = source.iter_clean_lines(lines) - for line in lines: - if not LOCAL_STMT_START_RE.match(line): - continue - - stmt = line - blocks = None - if not line.endswith(';'): - # XXX Support compound & multiline simple statements. - #blocks = [] - continue - - yield (stmt, blocks) - - -def _extract_block(lines): - end = None - depth = 1 - body = [] - for line in lines: - depth += line.count('{') - line.count('}') - if depth == 0: - end = line - break - body.append(line) - return '\n'.join(body), end - - -def parse_func(stmt, body): - """Return (name, signature) for the given function definition.""" - header, _, end = stmt.partition(body) - assert end.strip() == '}' - assert header.strip().endswith('{') - header, _, _= header.rpartition('{') - - signature = ' '.join(header.strip().splitlines()) - - _, _, name = signature.split('(')[0].strip().rpartition(' ') - assert name - - return name, signature - - -def parse_var(stmt): - """Return (name, vartype) for the given variable declaration.""" - stmt = stmt.rstrip(';') - m = LOCAL_STMT_START_RE.match(stmt) - assert m - vartype = m.group(0) - name = stmt[len(vartype):].partition('=')[0].strip() - - if name.startswith('('): - name, _, after = name[1:].partition(')') - assert after - name = name.replace('*', '* ') - inside, _, name = name.strip().rpartition(' ') - vartype = f'{vartype} ({inside.strip()}){after}' - else: - name = name.replace('*', '* ') - before, _, name = name.rpartition(' ') - vartype = f'{vartype} {before}' - - vartype = vartype.strip() - while ' ' in vartype: - vartype = vartype.replace(' ', ' ') - - return name, vartype - - -def parse_compound(stmt, blocks): - """Return (headers, bodies) for the given compound statement.""" - # XXX Identify declarations inside compound statements - # (if/switch/for/while). - raise NotImplementedError - - -def iter_variables(filename, *, - _iter_source_lines=source.iter_lines, - _iter_global=iter_global_declarations, - _iter_local=iter_local_statements, - _parse_func=parse_func, - _parse_var=parse_var, - _parse_compound=parse_compound, - ): - """Yield (funcname, name, vartype) for every variable in the given file.""" - lines = _iter_source_lines(filename) - for stmt, body in _iter_global(lines): - # At the file top-level we only have to worry about vars & funcs. - if not body: - name, vartype = _parse_var(stmt) - if name: - yield (None, name, vartype) - else: - funcname, _ = _parse_func(stmt, body) - localvars = _iter_locals(body, - _iter_statements=_iter_local, - _parse_var=_parse_var, - _parse_compound=_parse_compound, - ) - for name, vartype in localvars: - yield (funcname, name, vartype) - - -def _iter_locals(lines, *, - _iter_statements=iter_local_statements, - _parse_var=parse_var, - _parse_compound=parse_compound, - ): - compound = [lines] - while compound: - body = compound.pop(0) - bodylines = body.splitlines() - for stmt, blocks in _iter_statements(bodylines): - if not blocks: - name, vartype = _parse_var(stmt) - if name: - yield (name, vartype) - else: - headers, bodies = _parse_compound(stmt, blocks) - for header in headers: - for line in header: - name, vartype = _parse_var(line) - if name: - yield (name, vartype) - compound.extend(bodies) - - -def iter_all(dirnames): - """Yield a Declaration for each one found. - - If there are duplicates, due to preprocessor conditionals, then - they are checked to make sure they are the same. - """ - raise NotImplementedError - - -def iter_preprocessed(dirnames): - """Yield a Declaration for each one found. - - All source files are run through the preprocessor first. - """ - raise NotImplementedError diff --git a/Tools/c-analyzer/c_parser/info.py b/Tools/c-analyzer/c_parser/info.py deleted file mode 100644 index a4e32d7..0000000 --- a/Tools/c-analyzer/c_parser/info.py +++ /dev/null @@ -1,106 +0,0 @@ -from collections import namedtuple -import re - -from c_analyzer_common import info, util -from c_analyzer_common.util import classonly, _NTBase - - -def normalize_vartype(vartype): - """Return the canonical form for a variable type (or func signature).""" - # We allow empty strring through for semantic reasons. - if vartype is None: - return None - - # XXX finish! - # XXX Return (modifiers, type, pointer)? - return str(vartype) - - -def extract_storage(decl, *, isfunc=False): - """Return (storage, vartype) based on the given declaration. - - The default storage is "implicit" or "local". - """ - if decl == info.UNKNOWN: - return decl, decl - if decl.startswith('static '): - return 'static', decl - #return 'static', decl.partition(' ')[2].strip() - elif decl.startswith('extern '): - return 'extern', decl - #return 'extern', decl.partition(' ')[2].strip() - elif re.match('.*\b(static|extern)\b', decl): - raise NotImplementedError - elif isfunc: - return 'local', decl - else: - return 'implicit', decl - - -class Variable(_NTBase, - namedtuple('Variable', 'id storage vartype')): - """Information about a single variable declaration.""" - - __slots__ = () - - STORAGE = ( - 'static', - 'extern', - 'implicit', - 'local', - ) - - @classonly - def from_parts(cls, filename, funcname, name, decl, storage=None): - if storage is None: - storage, decl = extract_storage(decl, isfunc=funcname) - id = info.ID(filename, funcname, name) - self = cls(id, storage, decl) - return self - - def __new__(cls, id, storage, vartype): - self = super().__new__( - cls, - id=info.ID.from_raw(id), - storage=str(storage) if storage else None, - vartype=normalize_vartype(vartype) if vartype else None, - ) - return self - - def __hash__(self): - return hash(self.id) - - def __getattr__(self, name): - return getattr(self.id, name) - - def _validate_id(self): - if not self.id: - raise TypeError('missing id') - - if not self.filename or self.filename == info.UNKNOWN: - raise TypeError(f'id missing filename ({self.id})') - - if self.funcname and self.funcname == info.UNKNOWN: - raise TypeError(f'id missing funcname ({self.id})') - - self.id.validate() - - def validate(self): - """Fail if the object is invalid (i.e. init with bad data).""" - self._validate_id() - - if self.storage is None or self.storage == info.UNKNOWN: - raise TypeError('missing storage') - elif self.storage not in self.STORAGE: - raise ValueError(f'unsupported storage {self.storage:r}') - - if self.vartype is None or self.vartype == info.UNKNOWN: - raise TypeError('missing vartype') - - @property - def isglobal(self): - return self.storage != 'local' - - @property - def isconst(self): - return 'const' in self.vartype.split() diff --git a/Tools/c-analyzer/c_parser/naive.py b/Tools/c-analyzer/c_parser/naive.py deleted file mode 100644 index 160f96c..0000000 --- a/Tools/c-analyzer/c_parser/naive.py +++ /dev/null @@ -1,180 +0,0 @@ -import re - -from c_analyzer_common.info import UNKNOWN - -from .info import Variable -from .preprocessor import _iter_clean_lines - - -_NOT_SET = object() - - -def get_srclines(filename, *, - cache=None, - _open=open, - _iter_lines=_iter_clean_lines, - ): - """Return the file's lines as a list. - - Each line will have trailing whitespace removed (including newline). - - If a cache is given the it is used. - """ - if cache is not None: - try: - return cache[filename] - except KeyError: - pass - - with _open(filename) as srcfile: - srclines = [line - for _, line in _iter_lines(srcfile) - if not line.startswith('#')] - for i, line in enumerate(srclines): - srclines[i] = line.rstrip() - - if cache is not None: - cache[filename] = srclines - return srclines - - -def parse_variable_declaration(srcline): - """Return (name, decl) for the given declaration line.""" - # XXX possible false negatives... - decl, sep, _ = srcline.partition('=') - if not sep: - if not srcline.endswith(';'): - return None, None - decl = decl.strip(';') - decl = decl.strip() - m = re.match(r'.*\b(\w+)\s*(?:\[[^\]]*\])?$', decl) - if not m: - return None, None - name = m.group(1) - return name, decl - - -def parse_variable(srcline, funcname=None): - """Return a Variable for the variable declared on the line (or None).""" - line = srcline.strip() - - # XXX Handle more than just static variables. - if line.startswith('static '): - if '(' in line and '[' not in line: - # a function - return None, None - return parse_variable_declaration(line) - else: - return None, None - - -def iter_variables(filename, *, - srccache=None, - parse_variable=None, - _get_srclines=get_srclines, - _default_parse_variable=parse_variable, - ): - """Yield a Variable for each in the given source file.""" - if parse_variable is None: - parse_variable = _default_parse_variable - - indent = '' - prev = '' - funcname = None - for line in _get_srclines(filename, cache=srccache): - # remember current funcname - if funcname: - if line == indent + '}': - funcname = None - continue - else: - if '(' in prev and line == indent + '{': - if not prev.startswith('__attribute__'): - funcname = prev.split('(')[0].split()[-1] - prev = '' - continue - indent = line[:-len(line.lstrip())] - prev = line - - info = parse_variable(line, funcname) - if isinstance(info, list): - for name, _funcname, decl in info: - yield Variable.from_parts(filename, _funcname, name, decl) - continue - name, decl = info - - if name is None: - continue - yield Variable.from_parts(filename, funcname, name, decl) - - -def _match_varid(variable, name, funcname, ignored=None): - if ignored and variable in ignored: - return False - - if variable.name != name: - return False - - if funcname == UNKNOWN: - if not variable.funcname: - return False - elif variable.funcname != funcname: - return False - - return True - - -def find_variable(filename, funcname, name, *, - ignored=None, - srccache=None, # {filename: lines} - parse_variable=None, - _iter_variables=iter_variables, - ): - """Return the matching variable. - - Return None if the variable is not found. - """ - for variable in _iter_variables(filename, - srccache=srccache, - parse_variable=parse_variable, - ): - if _match_varid(variable, name, funcname, ignored): - return variable - else: - return None - - -def find_variables(varids, filenames=None, *, - srccache=_NOT_SET, - parse_variable=None, - _find_symbol=find_variable, - ): - """Yield a Variable for each ID. - - If the variable is not found then its decl will be UNKNOWN. That - way there will be one resulting Variable per given ID. - """ - if srccache is _NOT_SET: - srccache = {} - - used = set() - for varid in varids: - if varid.filename and varid.filename != UNKNOWN: - srcfiles = [varid.filename] - else: - if not filenames: - yield Variable(varid, UNKNOWN, UNKNOWN) - continue - srcfiles = filenames - for filename in srcfiles: - found = _find_varid(filename, varid.funcname, varid.name, - ignored=used, - srccache=srccache, - parse_variable=parse_variable, - ) - if found: - yield found - used.add(found) - break - else: - yield Variable(varid, UNKNOWN, UNKNOWN) diff --git a/Tools/c-analyzer/c_parser/preprocessor.py b/Tools/c-analyzer/c_parser/preprocessor.py deleted file mode 100644 index 0e2866e..0000000 --- a/Tools/c-analyzer/c_parser/preprocessor.py +++ /dev/null @@ -1,512 +0,0 @@ -from collections import namedtuple -import shlex -import os -import re - -from c_analyzer_common import util -from . import info - - -CONTINUATION = '\\' + os.linesep - -IDENTIFIER = r'(?:\w*[a-zA-Z]\w*)' -IDENTIFIER_RE = re.compile('^' + IDENTIFIER + '$') - - -def _coerce_str(value): - if not value: - return '' - return str(value).strip() - - -############################# -# directives - -DIRECTIVE_START = r''' - (?: - ^ \s* - [#] \s* - )''' -DIRECTIVE_TEXT = r''' - (?: - (?: \s+ ( .*\S ) )? - \s* $ - )''' -DIRECTIVE = rf''' - (?: - {DIRECTIVE_START} - ( - include | - error | warning | - pragma | - define | undef | - if | ifdef | ifndef | elseif | else | endif | - __FILE__ | __LINE__ | __DATE __ | __TIME__ | __TIMESTAMP__ - ) - {DIRECTIVE_TEXT} - )''' -# (?: -# [^\\\n] | -# \\ [^\n] | -# \\ \n -# )+ -# ) \n -# )''' -DIRECTIVE_RE = re.compile(DIRECTIVE, re.VERBOSE) - -DEFINE = rf''' - (?: - {DIRECTIVE_START} define \s+ - (?: - ( \w*[a-zA-Z]\w* ) - (?: \s* [(] ([^)]*) [)] )? - ) - {DIRECTIVE_TEXT} - )''' -DEFINE_RE = re.compile(DEFINE, re.VERBOSE) - - -def parse_directive(line): - """Return the appropriate directive for the given line.""" - line = line.strip() - if line.startswith('#'): - line = line[1:].lstrip() - line = '#' + line - directive = line - #directive = '#' + line - while ' ' in directive: - directive = directive.replace(' ', ' ') - return _parse_directive(directive) - - -def _parse_directive(line): - m = DEFINE_RE.match(line) - if m: - name, args, text = m.groups() - if args: - args = [a.strip() for a in args.split(',')] - return Macro(name, args, text) - else: - return Constant(name, text) - - m = DIRECTIVE_RE.match(line) - if not m: - raise ValueError(f'unsupported directive {line!r}') - kind, text = m.groups() - if not text: - if kind not in ('else', 'endif'): - raise ValueError(f'missing text in directive {line!r}') - elif kind in ('else', 'endif', 'define'): - raise ValueError(f'unexpected text in directive {line!r}') - if kind == 'include': - directive = Include(text) - elif kind in IfDirective.KINDS: - directive = IfDirective(kind, text) - else: - directive = OtherDirective(kind, text) - directive.validate() - return directive - - -class PreprocessorDirective(util._NTBase): - """The base class for directives.""" - - __slots__ = () - - KINDS = frozenset([ - 'include', - 'pragma', - 'error', 'warning', - 'define', 'undef', - 'if', 'ifdef', 'ifndef', 'elseif', 'else', 'endif', - '__FILE__', '__DATE__', '__LINE__', '__TIME__', '__TIMESTAMP__', - ]) - - @property - def text(self): - return ' '.join(v for v in self[1:] if v and v.strip()) or None - - def validate(self): - """Fail if the object is invalid (i.e. init with bad data).""" - super().validate() - - if not self.kind: - raise TypeError('missing kind') - elif self.kind not in self.KINDS: - raise ValueError - - # text can be anything, including None. - - -class Constant(PreprocessorDirective, - namedtuple('Constant', 'kind name value')): - """A single "constant" directive ("define").""" - - __slots__ = () - - def __new__(cls, name, value=None): - self = super().__new__( - cls, - 'define', - name=_coerce_str(name) or None, - value=_coerce_str(value) or None, - ) - return self - - def validate(self): - """Fail if the object is invalid (i.e. init with bad data).""" - super().validate() - - if not self.name: - raise TypeError('missing name') - elif not IDENTIFIER_RE.match(self.name): - raise ValueError(f'name must be identifier, got {self.name!r}') - - # value can be anything, including None - - -class Macro(PreprocessorDirective, - namedtuple('Macro', 'kind name args body')): - """A single "macro" directive ("define").""" - - __slots__ = () - - def __new__(cls, name, args, body=None): - # "args" must be a string or an iterable of strings (or "empty"). - if isinstance(args, str): - args = [v.strip() for v in args.split(',')] - if args: - args = tuple(_coerce_str(a) or None for a in args) - self = super().__new__( - cls, - kind='define', - name=_coerce_str(name) or None, - args=args if args else (), - body=_coerce_str(body) or None, - ) - return self - - @property - def text(self): - if self.body: - return f'{self.name}({", ".join(self.args)}) {self.body}' - else: - return f'{self.name}({", ".join(self.args)})' - - def validate(self): - """Fail if the object is invalid (i.e. init with bad data).""" - super().validate() - - if not self.name: - raise TypeError('missing name') - elif not IDENTIFIER_RE.match(self.name): - raise ValueError(f'name must be identifier, got {self.name!r}') - - for arg in self.args: - if not arg: - raise ValueError(f'missing arg in {self.args}') - elif not IDENTIFIER_RE.match(arg): - raise ValueError(f'arg must be identifier, got {arg!r}') - - # body can be anything, including None - - -class IfDirective(PreprocessorDirective, - namedtuple('IfDirective', 'kind condition')): - """A single conditional directive (e.g. "if", "ifdef"). - - This only includes directives that actually provide conditions. The - related directives "else" and "endif" are covered by OtherDirective - instead. - """ - - __slots__ = () - - KINDS = frozenset([ - 'if', - 'ifdef', - 'ifndef', - 'elseif', - ]) - - @classmethod - def _condition_from_raw(cls, raw, kind): - #return Condition.from_raw(raw, _kind=kind) - condition = _coerce_str(raw) - if not condition: - return None - - if kind == 'ifdef': - condition = f'defined({condition})' - elif kind == 'ifndef': - condition = f'! defined({condition})' - - return condition - - def __new__(cls, kind, condition): - kind = _coerce_str(kind) - self = super().__new__( - cls, - kind=kind or None, - condition=cls._condition_from_raw(condition, kind), - ) - return self - - @property - def text(self): - if self.kind == 'ifdef': - return self.condition[8:-1] # strip "defined(" - elif self.kind == 'ifndef': - return self.condition[10:-1] # strip "! defined(" - else: - return self.condition - #return str(self.condition) - - def validate(self): - """Fail if the object is invalid (i.e. init with bad data).""" - super().validate() - - if not self.condition: - raise TypeError('missing condition') - #else: - # for cond in self.condition: - # if not cond: - # raise ValueError(f'missing condition in {self.condition}') - # cond.validate() - # if self.kind in ('ifdef', 'ifndef'): - # if len(self.condition) != 1: - # raise ValueError('too many condition') - # if self.kind == 'ifdef': - # if not self.condition[0].startswith('defined '): - # raise ValueError('bad condition') - # else: - # if not self.condition[0].startswith('! defined '): - # raise ValueError('bad condition') - - -class Include(PreprocessorDirective, - namedtuple('Include', 'kind file')): - """A single "include" directive. - - Supported "file" values are either follow the bracket style - () or double quotes ("spam.h"). - """ - - __slots__ = () - - def __new__(cls, file): - self = super().__new__( - cls, - kind='include', - file=_coerce_str(file) or None, - ) - return self - - def validate(self): - """Fail if the object is invalid (i.e. init with bad data).""" - super().validate() - - if not self.file: - raise TypeError('missing file') - - -class OtherDirective(PreprocessorDirective, - namedtuple('OtherDirective', 'kind text')): - """A single directive not covered by another class. - - This includes the "else", "endif", and "undef" directives, which are - otherwise inherently related to the directives covered by the - Constant, Macro, and IfCondition classes. - - Note that all directives must have a text value, except for "else" - and "endif" (which must have no text). - """ - - __slots__ = () - - KINDS = PreprocessorDirective.KINDS - {'include', 'define'} - IfDirective.KINDS - - def __new__(cls, kind, text): - self = super().__new__( - cls, - kind=_coerce_str(kind) or None, - text=_coerce_str(text) or None, - ) - return self - - def validate(self): - """Fail if the object is invalid (i.e. init with bad data).""" - super().validate() - - if self.text: - if self.kind in ('else', 'endif'): - raise ValueError('unexpected text in directive') - elif self.kind not in ('else', 'endif'): - raise TypeError('missing text') - - -############################# -# iterating lines - -def _recompute_conditions(directive, ifstack): - if directive.kind in ('if', 'ifdef', 'ifndef'): - ifstack.append( - ([], directive.condition)) - elif directive.kind == 'elseif': - if ifstack: - negated, active = ifstack.pop() - if active: - negated.append(active) - else: - negated = [] - ifstack.append( - (negated, directive.condition)) - elif directive.kind == 'else': - if ifstack: - negated, active = ifstack.pop() - if active: - negated.append(active) - ifstack.append( - (negated, None)) - elif directive.kind == 'endif': - if ifstack: - ifstack.pop() - - conditions = [] - for negated, active in ifstack: - for condition in negated: - conditions.append(f'! ({condition})') - if active: - conditions.append(active) - return tuple(conditions) - - -def _iter_clean_lines(lines): - lines = iter(enumerate(lines, 1)) - for lno, line in lines: - # Handle line continuations. - while line.endswith(CONTINUATION): - try: - lno, _line = next(lines) - except StopIteration: - break - line = line[:-len(CONTINUATION)] + ' ' + _line - - # Deal with comments. - after = line - line = '' - while True: - # Look for a comment. - before, begin, remainder = after.partition('/*') - if '//' in before: - before, _, _ = before.partition('//') - line += before + ' ' # per the C99 spec - break - line += before - if not begin: - break - line += ' ' # per the C99 spec - - # Go until we find the end of the comment. - _, end, after = remainder.partition('*/') - while not end: - try: - lno, remainder = next(lines) - except StopIteration: - raise Exception('unterminated comment') - _, end, after = remainder.partition('*/') - - yield lno, line - - -def iter_lines(lines, *, - _iter_clean_lines=_iter_clean_lines, - _parse_directive=_parse_directive, - _recompute_conditions=_recompute_conditions, - ): - """Yield (lno, line, directive, active conditions) for each given line. - - This is effectively a subset of the operations taking place in - translation phases 2-4 from the C99 spec (ISO/IEC 9899:TC2); see - section 5.1.1.2. Line continuations are removed and comments - replaced with a single space. (In both cases "lno" will be the last - line involved.) Otherwise each line is returned as-is. - - "lno" is the (1-indexed) line number for the line. - - "directive" will be a PreprocessorDirective or None, depending on - whether or not there is a directive on the line. - - "active conditions" is the set of preprocessor conditions (e.g. - "defined()") under which the current line of code will be included - in compilation. That set is derived from every conditional - directive block (e.g. "if defined()", "ifdef", "else") containing - that line. That includes nested directives. Note that the - current line does not affect the active conditions for iteself. - It only impacts subsequent lines. That applies to directives - that close blocks (e.g. "endif") just as much as conditional - directvies. Also note that "else" and "elseif" directives - update the active conditions (for later lines), rather than - adding to them. - """ - ifstack = [] - conditions = () - for lno, line in _iter_clean_lines(lines): - stripped = line.strip() - if not stripped.startswith('#'): - yield lno, line, None, conditions - continue - - directive = '#' + stripped[1:].lstrip() - while ' ' in directive: - directive = directive.replace(' ', ' ') - directive = _parse_directive(directive) - yield lno, line, directive, conditions - - if directive.kind in ('else', 'endif'): - conditions = _recompute_conditions(directive, ifstack) - elif isinstance(directive, IfDirective): - conditions = _recompute_conditions(directive, ifstack) - - -############################# -# running (platform-specific?) - -def _gcc(filename, *, - _get_argv=(lambda: _get_gcc_argv()), - _run=util.run_cmd, - ): - argv = _get_argv() - argv.extend([ - '-E', filename, - ]) - output = _run(argv) - return output - - -def _get_gcc_argv(*, - _open=open, - _run=util.run_cmd, - ): - with _open('/tmp/print.mk', 'w') as tmpfile: - tmpfile.write('print-%:\n') - #tmpfile.write('\t@echo $* = $($*)\n') - tmpfile.write('\t@echo $($*)\n') - argv = ['/usr/bin/make', - '-f', 'Makefile', - '-f', '/tmp/print.mk', - 'print-CC', - 'print-PY_CORE_CFLAGS', - ] - output = _run(argv) - gcc, cflags = output.strip().splitlines() - argv = shlex.split(gcc.strip()) - cflags = shlex.split(cflags.strip()) - return argv + cflags - - -def run(filename, *, - _gcc=_gcc, - ): - """Return the text of the given file after running the preprocessor.""" - return _gcc(filename) diff --git a/Tools/c-analyzer/c_parser/source.py b/Tools/c-analyzer/c_parser/source.py deleted file mode 100644 index f8998c8..0000000 --- a/Tools/c-analyzer/c_parser/source.py +++ /dev/null @@ -1,34 +0,0 @@ -from . import preprocessor - - -def iter_clean_lines(lines): - incomment = False - for line in lines: - # Deal with comments. - if incomment: - _, sep, line = line.partition('*/') - if sep: - incomment = False - continue - line, _, _ = line.partition('//') - line, sep, remainder = line.partition('/*') - if sep: - _, sep, after = remainder.partition('*/') - if not sep: - incomment = True - continue - line += ' ' + after - - # Ignore blank lines and leading/trailing whitespace. - line = line.strip() - if not line: - continue - - yield line - - -def iter_lines(filename, *, - preprocess=preprocessor.run, - ): - content = preprocess(filename) - return iter(content.splitlines()) diff --git a/Tools/c-analyzer/c_symbols/__init__.py b/Tools/c-analyzer/c_symbols/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/Tools/c-analyzer/c_symbols/binary.py b/Tools/c-analyzer/c_symbols/binary.py deleted file mode 100644 index e125dbd5..0000000 --- a/Tools/c-analyzer/c_symbols/binary.py +++ /dev/null @@ -1,157 +0,0 @@ -import os -import os.path -import shutil -import sys - -from c_analyzer_common import util, info -from . import source -from .info import Symbol - - -#PYTHON = os.path.join(REPO_ROOT, 'python') -PYTHON = sys.executable - - -def iter_symbols(binary=PYTHON, dirnames=None, *, - # Alternately, use look_up_known_symbol() - # from c_globals.supported. - find_local_symbol=source.find_symbol, - _file_exists=os.path.exists, - _iter_symbols_nm=(lambda b, *a: _iter_symbols_nm(b, *a)), - ): - """Yield a Symbol for each symbol found in the binary.""" - if not _file_exists(binary): - raise Exception('executable missing (need to build it first?)') - - if find_local_symbol: - cache = {} - def find_local_symbol(name, *, _find=find_local_symbol): - return _find(name, dirnames, _perfilecache=cache) - else: - find_local_symbol = None - - if os.name == 'nt': - # XXX Support this. - raise NotImplementedError - else: - yield from _iter_symbols_nm(binary, find_local_symbol) - - -############################# -# binary format (e.g. ELF) - -SPECIAL_SYMBOLS = { - '__bss_start', - '__data_start', - '__dso_handle', - '_DYNAMIC', - '_edata', - '_end', - '__environ@@GLIBC_2.2.5', - '_GLOBAL_OFFSET_TABLE_', - '__JCR_END__', - '__JCR_LIST__', - '__TMC_END__', - } - - -def _is_special_symbol(name): - if name in SPECIAL_SYMBOLS: - return True - if '@@GLIBC' in name: - return True - return False - - -############################# -# "nm" - -NM_KINDS = { - 'b': Symbol.KIND.VARIABLE, # uninitialized - 'd': Symbol.KIND.VARIABLE, # initialized - #'g': Symbol.KIND.VARIABLE, # uninitialized - #'s': Symbol.KIND.VARIABLE, # initialized - 't': Symbol.KIND.FUNCTION, - } - - -def _iter_symbols_nm(binary, find_local_symbol=None, - *, - _which=shutil.which, - _run=util.run_cmd, - ): - nm = _which('nm') - if not nm: - raise NotImplementedError - argv = [nm, - '--line-numbers', - binary, - ] - try: - output = _run(argv) - except Exception: - if nm is None: - # XXX Use dumpbin.exe /SYMBOLS on Windows. - raise NotImplementedError - raise - for line in output.splitlines(): - (name, kind, external, filename, funcname, vartype, - ) = _parse_nm_line(line, - _find_local_symbol=find_local_symbol, - ) - if kind != Symbol.KIND.VARIABLE: - continue - elif _is_special_symbol(name): - continue - assert vartype is None - yield Symbol( - id=(filename, funcname, name), - kind=kind, - external=external, - ) - - -def _parse_nm_line(line, *, _find_local_symbol=None): - _origline = line - _, _, line = line.partition(' ') # strip off the address - line = line.strip() - - kind, _, line = line.partition(' ') - line = line.strip() - external = kind.isupper() - kind = NM_KINDS.get(kind.lower(), Symbol.KIND.OTHER) - - name, _, filename = line.partition('\t') - name = name.strip() - if filename: - filename = os.path.relpath(filename.partition(':')[0]) - else: - filename = info.UNKNOWN - - vartype = None - name, islocal = _parse_nm_name(name, kind) - if islocal: - funcname = info.UNKNOWN - if _find_local_symbol is not None: - filename, funcname, vartype = _find_local_symbol(name) - filename = filename or info.UNKNOWN - funcname = funcname or info.UNKNOWN - else: - funcname = None - # XXX fine filename and vartype? - return name, kind, external, filename, funcname, vartype - - -def _parse_nm_name(name, kind): - if kind != Symbol.KIND.VARIABLE: - return name, None - if _is_special_symbol(name): - return name, None - - actual, sep, digits = name.partition('.') - if not sep: - return name, False - - if not digits.isdigit(): - raise Exception(f'got bogus name {name}') - return actual, True diff --git a/Tools/c-analyzer/c_symbols/info.py b/Tools/c-analyzer/c_symbols/info.py deleted file mode 100644 index f6ed52c..0000000 --- a/Tools/c-analyzer/c_symbols/info.py +++ /dev/null @@ -1,51 +0,0 @@ -from collections import namedtuple - -from c_analyzer_common.info import ID -from c_analyzer_common.util import classonly, _NTBase - - -class Symbol(_NTBase, namedtuple('Symbol', 'id kind external')): - """Info for a single compilation symbol.""" - - __slots__ = () - - class KIND: - VARIABLE = 'variable' - FUNCTION = 'function' - OTHER = 'other' - - @classonly - def from_name(cls, name, filename=None, kind=KIND.VARIABLE, external=None): - """Return a new symbol based on the given name.""" - id = ID(filename, None, name) - return cls(id, kind, external) - - def __new__(cls, id, kind=KIND.VARIABLE, external=None): - self = super().__new__( - cls, - id=ID.from_raw(id), - kind=str(kind) if kind else None, - external=bool(external) if external is not None else None, - ) - return self - - def __hash__(self): - return hash(self.id) - - def __getattr__(self, name): - return getattr(self.id, name) - - def validate(self): - """Fail if the object is invalid (i.e. init with bad data).""" - if not self.id: - raise TypeError('missing id') - else: - self.id.validate() - - if not self.kind: - raise TypeError('missing kind') - elif self.kind not in vars(self.KIND).values(): - raise ValueError(f'unsupported kind {self.kind}') - - if self.external is None: - raise TypeError('missing external') diff --git a/Tools/c-analyzer/c_symbols/resolve.py b/Tools/c-analyzer/c_symbols/resolve.py deleted file mode 100644 index 56210ce..0000000 --- a/Tools/c-analyzer/c_symbols/resolve.py +++ /dev/null @@ -1,147 +0,0 @@ -import os.path - -from c_analyzer_common import files -from c_analyzer_common.info import UNKNOWN -from c_parser import declarations, info -from .info import Symbol -from .source import _find_symbol - - -# XXX need tests: -# * look_up_known_symbol() -# * symbol_from_source() -# * get_resolver() -# * symbols_to_variables() - -def look_up_known_symbol(symbol, knownvars, *, - match_files=(lambda f1, f2: f1 == f2), - ): - """Return the known variable matching the given symbol. - - "knownvars" is a mapping of common.ID to parser.Variable. - - "match_files" is used to verify if two filenames point to - the same file. - """ - if not knownvars: - return None - - if symbol.funcname == UNKNOWN: - if not symbol.filename or symbol.filename == UNKNOWN: - for varid in knownvars: - if not varid.funcname: - continue - if varid.name == symbol.name: - return knownvars[varid] - else: - return None - else: - for varid in knownvars: - if not varid.funcname: - continue - if not match_files(varid.filename, symbol.filename): - continue - if varid.name == symbol.name: - return knownvars[varid] - else: - return None - elif not symbol.filename or symbol.filename == UNKNOWN: - raise NotImplementedError - else: - return knownvars.get(symbol.id) - - -def find_in_source(symbol, dirnames, *, - _perfilecache={}, - _find_symbol=_find_symbol, - _iter_files=files.iter_files_by_suffix, - ): - """Return the Variable matching the given Symbol. - - If there is no match then return None. - """ - if symbol.filename and symbol.filename != UNKNOWN: - filenames = [symbol.filename] - else: - filenames = _iter_files(dirnames, ('.c', '.h')) - - if symbol.funcname and symbol.funcname != UNKNOWN: - raise NotImplementedError - - (filename, funcname, decl - ) = _find_symbol(symbol.name, filenames, _perfilecache) - if filename == UNKNOWN: - return None - return info.Variable.from_parts(filename, funcname, symbol.name, decl) - - -def get_resolver(knownvars=None, dirnames=None, *, - _look_up_known=look_up_known_symbol, - _from_source=find_in_source, - ): - """Return a "resolver" func for the given known vars and dirnames. - - The func takes a single Symbol and returns a corresponding Variable. - If the symbol was located then the variable will be valid, populated - with the corresponding information. Otherwise None is returned. - """ - if knownvars: - knownvars = dict(knownvars) # a copy - def resolve_known(symbol): - found = _look_up_known(symbol, knownvars) - if found is None: - return None - elif symbol.funcname == UNKNOWN: - knownvars.pop(found.id) - elif not symbol.filename or symbol.filename == UNKNOWN: - knownvars.pop(found.id) - return found - if dirnames: - def resolve(symbol): - found = resolve_known(symbol) - if found is None: - return None - #return _from_source(symbol, dirnames) - else: - for dirname in dirnames: - if not dirname.endswith(os.path.sep): - dirname += os.path.sep - if found.filename.startswith(dirname): - break - else: - return None - return found - else: - resolve = resolve_known - elif dirnames: - def resolve(symbol): - return _from_source(symbol, dirnames) - else: - def resolve(symbol): - return None - return resolve - - -def symbols_to_variables(symbols, *, - resolve=(lambda s: look_up_known_symbol(s, None)), - ): - """Yield the variable the matches each given symbol. - - Use get_resolver() for a "resolve" func to use. - """ - for symbol in symbols: - if isinstance(symbol, info.Variable): - # XXX validate? - yield symbol - continue - if symbol.kind != Symbol.KIND.VARIABLE: - continue - resolved = resolve(symbol) - if resolved is None: - #raise NotImplementedError(symbol) - resolved = info.Variable( - id=symbol.id, - storage=UNKNOWN, - vartype=UNKNOWN, - ) - yield resolved diff --git a/Tools/c-analyzer/c_symbols/source.py b/Tools/c-analyzer/c_symbols/source.py deleted file mode 100644 index a724810..0000000 --- a/Tools/c-analyzer/c_symbols/source.py +++ /dev/null @@ -1,58 +0,0 @@ -from c_analyzer_common import files -from c_analyzer_common.info import UNKNOWN -from c_parser import declarations - - -# XXX need tests: -# * find_symbol() - -def find_symbol(name, dirnames, *, - _perfilecache, - _iter_files=files.iter_files_by_suffix, - **kwargs - ): - """Return (filename, funcname, vartype) for the matching Symbol.""" - filenames = _iter_files(dirnames, ('.c', '.h')) - return _find_symbol(name, filenames, _perfilecache, **kwargs) - - -def _get_symbols(filename, *, - _iter_variables=declarations.iter_variables, - ): - """Return the list of Symbols found in the given file.""" - symbols = {} - for funcname, name, vartype in _iter_variables(filename): - if not funcname: - continue - try: - instances = symbols[name] - except KeyError: - instances = symbols[name] = [] - instances.append((funcname, vartype)) - return symbols - - -def _find_symbol(name, filenames, _perfilecache, *, - _get_local_symbols=_get_symbols, - ): - for filename in filenames: - try: - symbols = _perfilecache[filename] - except KeyError: - symbols = _perfilecache[filename] = _get_local_symbols(filename) - - try: - instances = symbols[name] - except KeyError: - continue - - funcname, vartype = instances.pop(0) - if not instances: - symbols.pop(name) - return filename, funcname, vartype - else: - return UNKNOWN, UNKNOWN, UNKNOWN - - -def iter_symbols(): - raise NotImplementedError diff --git a/Tools/c-analyzer/cpython/README b/Tools/c-analyzer/cpython/README new file mode 100644 index 0000000..772b8be --- /dev/null +++ b/Tools/c-analyzer/cpython/README @@ -0,0 +1,72 @@ +####################################### +# C Globals and CPython Runtime State. + +CPython's C code makes extensive use of global variables (whether static +globals or static locals). Each such variable falls into one of several +categories: + +* strictly const data +* used exclusively in main or in the REPL +* process-global state (e.g. managing process-level resources + like signals and file descriptors) +* Python "global" runtime state +* per-interpreter runtime state + +The last one can be a problem as soon as anyone creates a second +interpreter (AKA "subinterpreter") in a process. It is definitely a +problem under subinterpreters if they are no longer sharing the GIL, +since the GIL protects us from a lot of race conditions. Keep in mind +that ultimately *all* objects (PyObject) should be treated as +per-interpreter state. This includes "static types", freelists, +_PyIdentifier, and singletons. Take that in for a second. It has +significant implications on where we use static variables! + +Be aware that module-global state (stored in C statics) is a kind of +per-interpreter state. There have been efforts across many years, and +still going, to provide extension module authors mechanisms to store +that state safely (see PEPs 3121, 489, etc.). + +(Note that there has been discussion around support for running multiple +Python runtimes in the same process. That would ends up with the same +problems, relative to static variables, that subinterpreters have.) + +Historically we have been bad at keeping per-interpreter state out of +static variables, mostly because until recently subinterpreters were +not widely used nor even factored in to solutions. However, the +feature is growing in popularity and use in the community. + +Mandate: "Eliminate use of static variables for per-interpreter state." + +The "c-statics.py" script in this directory, along with its accompanying +data files, are part of the effort to resolve existing problems with +our use of static variables and to prevent future problems. + +#------------------------- +## statics for actually-global state (and runtime state consolidation) + +In general, holding any kind of state in static variables +increases maintenance burden and increases the complexity of code (e.g. +we use TSS to identify the active thread state). So it is a good idea +to avoid using statics for state even if for the "global" runtime or +for process-global state. + +Relative to maintenance burden, one problem is where the runtime +state is spread throughout the codebase in dozens of individual +globals. Unlike the other globals, the runtime state represents a set +of values that are constantly shifting in a complex way. When they are +spread out it's harder to get a clear picture of what the runtime +involves. Furthermore, when they are spread out it complicates efforts +that change the runtime. + +Consequently, the globals for Python's runtime state have been +consolidated under a single top-level _PyRuntime global. No new globals +should be added for runtime state. Instead, they should be added to +_PyRuntimeState or one of its sub-structs. The tools in this directory +are run as part of the test suite to ensure that no new globals have +been added. The script can be run manually as well: + + ./python Lib/test/test_c_statics/c-statics.py check + +If it reports any globals then they should be resolved. If the globals +are runtime state then they should be folded into _PyRuntimeState. +Otherwise they should be marked as ignored. diff --git a/Tools/c-analyzer/cpython/__init__.py b/Tools/c-analyzer/cpython/__init__.py new file mode 100644 index 0000000..ae45b42 --- /dev/null +++ b/Tools/c-analyzer/cpython/__init__.py @@ -0,0 +1,29 @@ +import os.path +import sys + + +TOOL_ROOT = os.path.abspath( + os.path.dirname( # c-analyzer/ + os.path.dirname(__file__))) # cpython/ +DATA_DIR = TOOL_ROOT +REPO_ROOT = ( + os.path.dirname( # .. + os.path.dirname(TOOL_ROOT))) # Tools/ + +INCLUDE_DIRS = [os.path.join(REPO_ROOT, name) for name in [ + 'Include', + ]] +SOURCE_DIRS = [os.path.join(REPO_ROOT, name) for name in [ + 'Python', + 'Parser', + 'Objects', + 'Modules', + ]] + +#PYTHON = os.path.join(REPO_ROOT, 'python') +PYTHON = sys.executable + + +# Clean up the namespace. +del sys +del os diff --git a/Tools/c-analyzer/cpython/__main__.py b/Tools/c-analyzer/cpython/__main__.py new file mode 100644 index 0000000..6b0f9bc --- /dev/null +++ b/Tools/c-analyzer/cpython/__main__.py @@ -0,0 +1,212 @@ +import argparse +import re +import sys + +from c_analyzer.common import show +from c_analyzer.common.info import UNKNOWN + +from . import SOURCE_DIRS +from .find import supported_vars +from .known import ( + from_file as known_from_file, + DATA_FILE as KNOWN_FILE, + ) +from .supported import IGNORED_FILE + + +def _check_results(unknown, knownvars, used): + def _match_unused_global(variable): + found = [] + for varid in knownvars: + if varid in used: + continue + if varid.funcname is not None: + continue + if varid.name != variable.name: + continue + if variable.filename and variable.filename != UNKNOWN: + if variable.filename == varid.filename: + found.append(varid) + else: + found.append(varid) + return found + + badknown = set() + for variable in sorted(unknown): + msg = None + if variable.funcname != UNKNOWN: + msg = f'could not find global symbol {variable.id}' + elif m := _match_unused_global(variable): + assert isinstance(m, list) + badknown.update(m) + elif variable.name in ('completed', 'id'): # XXX Figure out where these variables are. + unknown.remove(variable) + else: + msg = f'could not find local symbol {variable.id}' + if msg: + #raise Exception(msg) + print(msg) + if badknown: + print('---') + print(f'{len(badknown)} globals in known.tsv, but may actually be local:') + for varid in sorted(badknown): + print(f'{varid.filename:30} {varid.name}') + unused = sorted(varid + for varid in set(knownvars) - used + if varid.name != 'id') # XXX Figure out where these variables are. + if unused: + print('---') + print(f'did not use {len(unused)} known vars:') + for varid in unused: + print(f'{varid.filename:30} {varid.funcname or "-":20} {varid.name}') + raise Exception('not all known symbols used') + if unknown: + print('---') + raise Exception('could not find all symbols') + + +# XXX Move this check to its own command. +def cmd_check_cache(cmd, *, + known=KNOWN_FILE, + ignored=IGNORED_FILE, + _known_from_file=known_from_file, + _find=supported_vars, + ): + known = _known_from_file(known) + + used = set() + unknown = set() + for var, supported in _find(known=known, ignored=ignored): + if supported is None: + unknown.add(var) + continue + used.add(var.id) + _check_results(unknown, known['variables'], used) + + +def cmd_check(cmd, *, + known=KNOWN_FILE, + ignored=IGNORED_FILE, + _find=supported_vars, + _show=show.basic, + _print=print, + ): + """ + Fail if there are unsupported globals variables. + + In the failure case, the list of unsupported variables + will be printed out. + """ + unsupported = [] + for var, supported in _find(known=known, ignored=ignored): + if not supported: + unsupported.append(var) + + if not unsupported: + #_print('okay') + return + + _print('ERROR: found unsupported global variables') + _print() + _show(sorted(unsupported)) + _print(f' ({len(unsupported)} total)') + sys.exit(1) + + +def cmd_show(cmd, *, + known=KNOWN_FILE, + ignored=IGNORED_FILE, + skip_objects=False, + _find=supported_vars, + _show=show.basic, + _print=print, + ): + """ + Print out the list of found global variables. + + The variables will be distinguished as "supported" or "unsupported". + """ + allsupported = [] + allunsupported = [] + for found, supported in _find(known=known, + ignored=ignored, + skip_objects=skip_objects, + ): + if supported is None: + continue + (allsupported if supported else allunsupported + ).append(found) + + _print('supported:') + _print('----------') + _show(sorted(allsupported)) + _print(f' ({len(allsupported)} total)') + _print() + _print('unsupported:') + _print('------------') + _show(sorted(allunsupported)) + _print(f' ({len(allunsupported)} total)') + + +############################# +# the script + +COMMANDS = { + 'check': cmd_check, + 'show': cmd_show, + } + +PROG = sys.argv[0] +PROG = 'c-globals.py' + + +def parse_args(prog=PROG, argv=sys.argv[1:], *, _fail=None): + common = argparse.ArgumentParser(add_help=False) + common.add_argument('--ignored', metavar='FILE', + default=IGNORED_FILE, + help='path to file that lists ignored vars') + common.add_argument('--known', metavar='FILE', + default=KNOWN_FILE, + help='path to file that lists known types') + #common.add_argument('dirs', metavar='DIR', nargs='*', + # default=SOURCE_DIRS, + # help='a directory to check') + + parser = argparse.ArgumentParser( + prog=prog, + ) + subs = parser.add_subparsers(dest='cmd') + + check = subs.add_parser('check', parents=[common]) + + show = subs.add_parser('show', parents=[common]) + show.add_argument('--skip-objects', action='store_true') + + if _fail is None: + def _fail(msg): + parser.error(msg) + + # Now parse the args. + args = parser.parse_args(argv) + ns = vars(args) + + cmd = ns.pop('cmd') + if not cmd: + _fail('missing command') + + return cmd, ns + + +def main(cmd, cmdkwargs=None, *, _COMMANDS=COMMANDS): + try: + cmdfunc = _COMMANDS[cmd] + except KeyError: + raise ValueError( + f'unsupported cmd {cmd!r}' if cmd else 'missing cmd') + + cmdfunc(cmd, **cmdkwargs or {}) + + +if __name__ == '__main__': + cmd, cmdkwargs = parse_args() + main(cmd, cmdkwargs) diff --git a/Tools/c-analyzer/cpython/_generate.py b/Tools/c-analyzer/cpython/_generate.py new file mode 100644 index 0000000..4c340ac --- /dev/null +++ b/Tools/c-analyzer/cpython/_generate.py @@ -0,0 +1,329 @@ +# The code here consists of hacks for pre-populating the known.tsv file. + +from c_analyzer.parser.preprocessor import _iter_clean_lines +from c_analyzer.parser.naive import ( + iter_variables, parse_variable_declaration, find_variables, + ) +from c_analyzer.common.known import HEADER as KNOWN_HEADER +from c_analyzer.common.info import UNKNOWN, ID +from c_analyzer.variables import Variable +from c_analyzer.util import write_tsv + +from . import SOURCE_DIRS, REPO_ROOT +from .known import DATA_FILE as KNOWN_FILE +from .files import iter_cpython_files + + +POTS = ('char ', 'wchar_t ', 'int ', 'Py_ssize_t ') +POTS += tuple('const ' + v for v in POTS) +STRUCTS = ('PyTypeObject', 'PyObject', 'PyMethodDef', 'PyModuleDef', 'grammar') + + +def _parse_global(line, funcname=None): + line = line.strip() + if line.startswith('static '): + if '(' in line and '[' not in line and ' = ' not in line: + return None, None + name, decl = parse_variable_declaration(line) + elif line.startswith(('Py_LOCAL(', 'Py_LOCAL_INLINE(')): + name, decl = parse_variable_declaration(line) + elif line.startswith('_Py_static_string('): + decl = line.strip(';').strip() + name = line.split('(')[1].split(',')[0].strip() + elif line.startswith('_Py_IDENTIFIER('): + decl = line.strip(';').strip() + name = 'PyId_' + line.split('(')[1].split(')')[0].strip() + elif funcname: + return None, None + + # global-only + elif line.startswith('PyAPI_DATA('): # only in .h files + name, decl = parse_variable_declaration(line) + elif line.startswith('extern '): # only in .h files + name, decl = parse_variable_declaration(line) + elif line.startswith('PyDoc_VAR('): + decl = line.strip(';').strip() + name = line.split('(')[1].split(')')[0].strip() + elif line.startswith(POTS): # implied static + if '(' in line and '[' not in line and ' = ' not in line: + return None, None + name, decl = parse_variable_declaration(line) + elif line.startswith(STRUCTS) and line.endswith(' = {'): # implied static + name, decl = parse_variable_declaration(line) + elif line.startswith(STRUCTS) and line.endswith(' = NULL;'): # implied static + name, decl = parse_variable_declaration(line) + elif line.startswith('struct '): + if not line.endswith(' = {'): + return None, None + if not line.partition(' ')[2].startswith(STRUCTS): + return None, None + # implied static + name, decl = parse_variable_declaration(line) + + # file-specific + elif line.startswith(('SLOT1BINFULL(', 'SLOT1BIN(')): + # Objects/typeobject.c + funcname = line.split('(')[1].split(',')[0] + return [ + ('op_id', funcname, '_Py_static_string(op_id, OPSTR)'), + ('rop_id', funcname, '_Py_static_string(op_id, OPSTR)'), + ] + elif line.startswith('WRAP_METHOD('): + # Objects/weakrefobject.c + funcname, name = (v.strip() for v in line.split('(')[1].split(')')[0].split(',')) + return [ + ('PyId_' + name, funcname, f'_Py_IDENTIFIER({name})'), + ] + + else: + return None, None + return name, decl + + +def _pop_cached(varcache, filename, funcname, name, *, + _iter_variables=iter_variables, + ): + # Look for the file. + try: + cached = varcache[filename] + except KeyError: + cached = varcache[filename] = {} + for variable in _iter_variables(filename, + parse_variable=_parse_global, + ): + variable._isglobal = True + cached[variable.id] = variable + for var in cached: + print(' ', var) + + # Look for the variable. + if funcname == UNKNOWN: + for varid in cached: + if varid.name == name: + break + else: + return None + return cached.pop(varid) + else: + return cached.pop((filename, funcname, name), None) + + +def find_matching_variable(varid, varcache, allfilenames, *, + _pop_cached=_pop_cached, + ): + if varid.filename and varid.filename != UNKNOWN: + filenames = [varid.filename] + else: + filenames = allfilenames + for filename in filenames: + variable = _pop_cached(varcache, filename, varid.funcname, varid.name) + if variable is not None: + return variable + else: + if varid.filename and varid.filename != UNKNOWN and varid.funcname is None: + for filename in allfilenames: + if not filename.endswith('.h'): + continue + variable = _pop_cached(varcache, filename, None, varid.name) + if variable is not None: + return variable + return None + + +MULTILINE = { + # Python/Python-ast.c + 'Load_singleton': 'PyObject *', + 'Store_singleton': 'PyObject *', + 'Del_singleton': 'PyObject *', + 'AugLoad_singleton': 'PyObject *', + 'AugStore_singleton': 'PyObject *', + 'Param_singleton': 'PyObject *', + 'And_singleton': 'PyObject *', + 'Or_singleton': 'PyObject *', + 'Add_singleton': 'static PyObject *', + 'Sub_singleton': 'static PyObject *', + 'Mult_singleton': 'static PyObject *', + 'MatMult_singleton': 'static PyObject *', + 'Div_singleton': 'static PyObject *', + 'Mod_singleton': 'static PyObject *', + 'Pow_singleton': 'static PyObject *', + 'LShift_singleton': 'static PyObject *', + 'RShift_singleton': 'static PyObject *', + 'BitOr_singleton': 'static PyObject *', + 'BitXor_singleton': 'static PyObject *', + 'BitAnd_singleton': 'static PyObject *', + 'FloorDiv_singleton': 'static PyObject *', + 'Invert_singleton': 'static PyObject *', + 'Not_singleton': 'static PyObject *', + 'UAdd_singleton': 'static PyObject *', + 'USub_singleton': 'static PyObject *', + 'Eq_singleton': 'static PyObject *', + 'NotEq_singleton': 'static PyObject *', + 'Lt_singleton': 'static PyObject *', + 'LtE_singleton': 'static PyObject *', + 'Gt_singleton': 'static PyObject *', + 'GtE_singleton': 'static PyObject *', + 'Is_singleton': 'static PyObject *', + 'IsNot_singleton': 'static PyObject *', + 'In_singleton': 'static PyObject *', + 'NotIn_singleton': 'static PyObject *', + # Python/symtable.c + 'top': 'static identifier ', + 'lambda': 'static identifier ', + 'genexpr': 'static identifier ', + 'listcomp': 'static identifier ', + 'setcomp': 'static identifier ', + 'dictcomp': 'static identifier ', + '__class__': 'static identifier ', + # Python/compile.c + '__doc__': 'static PyObject *', + '__annotations__': 'static PyObject *', + # Objects/floatobject.c + 'double_format': 'static float_format_type ', + 'float_format': 'static float_format_type ', + 'detected_double_format': 'static float_format_type ', + 'detected_float_format': 'static float_format_type ', + # Parser/listnode.c + 'level': 'static int ', + 'atbol': 'static int ', + # Python/dtoa.c + 'private_mem': 'static double private_mem[PRIVATE_mem]', + 'pmem_next': 'static double *', + # Modules/_weakref.c + 'weakref_functions': 'static PyMethodDef ', +} +INLINE = { + # Modules/_tracemalloc.c + 'allocators': 'static struct { PyMemAllocatorEx mem; PyMemAllocatorEx raw; PyMemAllocatorEx obj; } ', + # Modules/faulthandler.c + 'fatal_error': 'static struct { int enabled; PyObject *file; int fd; int all_threads; PyInterpreterState *interp; void *exc_handler; } ', + 'thread': 'static struct { PyObject *file; int fd; PY_TIMEOUT_T timeout_us; int repeat; PyInterpreterState *interp; int exit; char *header; size_t header_len; PyThread_type_lock cancel_event; PyThread_type_lock running; } ', + # Modules/signalmodule.c + 'Handlers': 'static volatile struct { _Py_atomic_int tripped; PyObject *func; } Handlers[NSIG]', + 'wakeup': 'static volatile struct { SOCKET_T fd; int warn_on_full_buffer; int use_send; } ', + # Python/dynload_shlib.c + 'handles': 'static struct { dev_t dev; ino_t ino; void *handle; } handles[128]', + # Objects/obmalloc.c + '_PyMem_Debug': 'static struct { debug_alloc_api_t raw; debug_alloc_api_t mem; debug_alloc_api_t obj; } ', + # Python/bootstrap_hash.c + 'urandom_cache': 'static struct { int fd; dev_t st_dev; ino_t st_ino; } ', + } +FUNC = { + # Objects/object.c + '_Py_abstract_hack': 'Py_ssize_t (*_Py_abstract_hack)(PyObject *)', + # Parser/myreadline.c + 'PyOS_InputHook': 'int (*PyOS_InputHook)(void)', + # Python/pylifecycle.c + '_PyOS_mystrnicmp_hack': 'int (*_PyOS_mystrnicmp_hack)(const char *, const char *, Py_ssize_t)', + # Parser/myreadline.c + 'PyOS_ReadlineFunctionPointer': 'char *(*PyOS_ReadlineFunctionPointer)(FILE *, FILE *, const char *)', + } +IMPLIED = { + # Objects/boolobject.c + '_Py_FalseStruct': 'static struct _longobject ', + '_Py_TrueStruct': 'static struct _longobject ', + # Modules/config.c + '_PyImport_Inittab': 'struct _inittab _PyImport_Inittab[]', + } +GLOBALS = {} +GLOBALS.update(MULTILINE) +GLOBALS.update(INLINE) +GLOBALS.update(FUNC) +GLOBALS.update(IMPLIED) + +LOCALS = { + 'buildinfo': ('Modules/getbuildinfo.c', + 'Py_GetBuildInfo', + 'static char buildinfo[50 + sizeof(GITVERSION) + ((sizeof(GITTAG) > sizeof(GITBRANCH)) ? sizeof(GITTAG) : sizeof(GITBRANCH))]'), + 'methods': ('Python/codecs.c', + '_PyCodecRegistry_Init', + 'static struct { char *name; PyMethodDef def; } methods[]'), + } + + +def _known(symbol): + if symbol.funcname: + if symbol.funcname != UNKNOWN or symbol.filename != UNKNOWN: + raise KeyError(symbol.name) + filename, funcname, decl = LOCALS[symbol.name] + varid = ID(filename, funcname, symbol.name) + elif not symbol.filename or symbol.filename == UNKNOWN: + raise KeyError(symbol.name) + else: + varid = symbol.id + try: + decl = GLOBALS[symbol.name] + except KeyError: + + if symbol.name.endswith('_methods'): + decl = 'static PyMethodDef ' + elif symbol.filename == 'Objects/exceptions.c' and symbol.name.startswith(('PyExc_', '_PyExc_')): + decl = 'static PyTypeObject ' + else: + raise + if symbol.name not in decl: + decl = decl + symbol.name + return Variable(varid, 'static', decl) + + +def known_row(varid, decl): + return ( + varid.filename, + varid.funcname or '-', + varid.name, + 'variable', + decl, + ) + + +def known_rows(symbols, *, + cached=True, + _get_filenames=iter_cpython_files, + _find_match=find_matching_variable, + _find_symbols=find_variables, + _as_known=known_row, + ): + filenames = list(_get_filenames()) + cache = {} + if cached: + for symbol in symbols: + try: + found = _known(symbol) + except KeyError: + found = _find_match(symbol, cache, filenames) + if found is None: + found = Variable(symbol.id, UNKNOWN, UNKNOWN) + yield _as_known(found.id, found.vartype) + else: + raise NotImplementedError # XXX incorporate KNOWN + for variable in _find_symbols(symbols, filenames, + srccache=cache, + parse_variable=_parse_global, + ): + #variable = variable._replace( + # filename=os.path.relpath(variable.filename, REPO_ROOT)) + if variable.funcname == UNKNOWN: + print(variable) + if variable.vartype== UNKNOWN: + print(variable) + yield _as_known(variable.id, variable.vartype) + + +def generate(symbols, filename=None, *, + _generate_rows=known_rows, + _write_tsv=write_tsv, + ): + if not filename: + filename = KNOWN_FILE + '.new' + + rows = _generate_rows(symbols) + _write_tsv(filename, KNOWN_HEADER, rows) + + +if __name__ == '__main__': + from c_symbols import binary + symbols = binary.iter_symbols( + binary.PYTHON, + find_local_symbol=None, + ) + generate(symbols) diff --git a/Tools/c-analyzer/cpython/files.py b/Tools/c-analyzer/cpython/files.py new file mode 100644 index 0000000..543097a --- /dev/null +++ b/Tools/c-analyzer/cpython/files.py @@ -0,0 +1,29 @@ +from c_analyzer.common.files import ( + C_SOURCE_SUFFIXES, walk_tree, iter_files_by_suffix, + ) + +from . import SOURCE_DIRS, REPO_ROOT + +# XXX need tests: +# * iter_files() + + +def iter_files(*, + walk=walk_tree, + _files=iter_files_by_suffix, + ): + """Yield each file in the tree for each of the given directory names.""" + excludedtrees = [ + os.path.join('Include', 'cpython', ''), + ] + def is_excluded(filename): + for root in excludedtrees: + if filename.startswith(root): + return True + return False + for filename in _files(SOURCE_DIRS, C_SOURCE_SUFFIXES, REPO_ROOT, + walk=walk, + ): + if is_excluded(filename): + continue + yield filename diff --git a/Tools/c-analyzer/cpython/find.py b/Tools/c-analyzer/cpython/find.py new file mode 100644 index 0000000..a7bc0b4 --- /dev/null +++ b/Tools/c-analyzer/cpython/find.py @@ -0,0 +1,101 @@ +import os.path + +from c_analyzer.common import files +from c_analyzer.common.info import UNKNOWN, ID +from c_analyzer.variables import find as _common + +from . import SOURCE_DIRS, PYTHON, REPO_ROOT +from .known import ( + from_file as known_from_file, + DATA_FILE as KNOWN_FILE, + ) +from .supported import ( + ignored_from_file, IGNORED_FILE, is_supported, _is_object, + ) + +# XXX need tests: +# * vars_from_binary() +# * vars_from_source() +# * supported_vars() + + +def _handle_id(filename, funcname, name, *, + _relpath=os.path.relpath, + ): + filename = _relpath(filename, REPO_ROOT) + return ID(filename, funcname, name) + + +def vars_from_binary(*, + known=KNOWN_FILE, + _known_from_file=known_from_file, + _iter_files=files.iter_files_by_suffix, + _iter_vars=_common.vars_from_binary, + ): + """Yield a Variable for each found Symbol. + + Details are filled in from the given "known" variables and types. + """ + if isinstance(known, str): + known = _known_from_file(known) + dirnames = SOURCE_DIRS + suffixes = ('.c',) + filenames = _iter_files(dirnames, suffixes) + # XXX For now we only use known variables (no source lookup). + filenames = None + yield from _iter_vars(PYTHON, + known=known, + filenames=filenames, + handle_id=_handle_id, + check_filename=(lambda n: True), + ) + + +def vars_from_source(*, + preprocessed=None, + known=KNOWN_FILE, + _known_from_file=known_from_file, + _iter_files=files.iter_files_by_suffix, + _iter_vars=_common.vars_from_source, + ): + """Yield a Variable for each declaration in the raw source code. + + Details are filled in from the given "known" variables and types. + """ + if isinstance(known, str): + known = _known_from_file(known) + dirnames = SOURCE_DIRS + suffixes = ('.c',) + filenames = _iter_files(dirnames, suffixes) + yield from _iter_vars(filenames, + preprocessed=preprocessed, + known=known, + handle_id=_handle_id, + ) + + +def supported_vars(*, + known=KNOWN_FILE, + ignored=IGNORED_FILE, + skip_objects=False, + _known_from_file=known_from_file, + _ignored_from_file=ignored_from_file, + _iter_vars=vars_from_binary, + _is_supported=is_supported, + ): + """Yield (var, is supported) for each found variable.""" + if isinstance(known, str): + known = _known_from_file(known) + if isinstance(ignored, str): + ignored = _ignored_from_file(ignored) + + for var in _iter_vars(known=known): + if not var.isglobal: + continue + elif var.vartype == UNKNOWN: + yield var, None + # XXX Support proper filters instead. + elif skip_objects and _is_object(found.vartype): + continue + else: + yield var, _is_supported(var, ignored, known) diff --git a/Tools/c-analyzer/cpython/known.py b/Tools/c-analyzer/cpython/known.py new file mode 100644 index 0000000..c3cc2c0 --- /dev/null +++ b/Tools/c-analyzer/cpython/known.py @@ -0,0 +1,66 @@ +import csv +import os.path + +from c_analyzer.parser.declarations import extract_storage +from c_analyzer.variables import known as _common +from c_analyzer.variables.info import Variable + +from . import DATA_DIR + + +# XXX need tests: +# * from_file() +# * look_up_variable() + + +DATA_FILE = os.path.join(DATA_DIR, 'known.tsv') + + +def _get_storage(decl, infunc): + # statics + if decl.startswith(('Py_LOCAL(', 'Py_LOCAL_INLINE(')): + return 'static' + if decl.startswith(('_Py_IDENTIFIER(', '_Py_static_string(')): + return 'static' + if decl.startswith('PyDoc_VAR('): + return 'static' + if decl.startswith(('SLOT1BINFULL(', 'SLOT1BIN(')): + return 'static' + if decl.startswith('WRAP_METHOD('): + return 'static' + # public extern + if decl.startswith('PyAPI_DATA('): + return 'extern' + # Fall back to the normal handler. + return extract_storage(decl, infunc=infunc) + + +def _handle_var(varid, decl): +# if varid.name == 'id' and decl == UNKNOWN: +# # None of these are variables. +# decl = 'int id'; + storage = _get_storage(decl, varid.funcname) + return Variable(varid, storage, decl) + + +def from_file(infile=DATA_FILE, *, + _from_file=_common.from_file, + _handle_var=_handle_var, + ): + """Return the info for known declarations in the given file.""" + return _from_file(infile, handle_var=_handle_var) + + +def look_up_variable(varid, knownvars, *, + _lookup=_common.look_up_variable, + ): + """Return the known variable matching the given ID. + + "knownvars" is a mapping of ID to Variable. + + "match_files" is used to verify if two filenames point to + the same file. + + If no match is found then None is returned. + """ + return _lookup(varid, knownvars) diff --git a/Tools/c-analyzer/cpython/supported.py b/Tools/c-analyzer/cpython/supported.py new file mode 100644 index 0000000..18786ee --- /dev/null +++ b/Tools/c-analyzer/cpython/supported.py @@ -0,0 +1,398 @@ +import os.path +import re + +from c_analyzer.common.info import ID +from c_analyzer.common.util import read_tsv, write_tsv + +from . import DATA_DIR + +# XXX need tests: +# * generate / script + + +IGNORED_FILE = os.path.join(DATA_DIR, 'ignored.tsv') + +IGNORED_COLUMNS = ('filename', 'funcname', 'name', 'kind', 'reason') +IGNORED_HEADER = '\t'.join(IGNORED_COLUMNS) + +# XXX Move these to ignored.tsv. +IGNORED = { + # global + 'PyImport_FrozenModules': 'process-global', + 'M___hello__': 'process-global', + 'inittab_copy': 'process-global', + 'PyHash_Func': 'process-global', + '_Py_HashSecret_Initialized': 'process-global', + '_TARGET_LOCALES': 'process-global', + + # startup (only changed before/during) + '_PyRuntime': 'runtime startup', + 'runtime_initialized': 'runtime startup', + 'static_arg_parsers': 'runtime startup', + 'orig_argv': 'runtime startup', + 'opt_ptr': 'runtime startup', + '_preinit_warnoptions': 'runtime startup', + '_Py_StandardStreamEncoding': 'runtime startup', + 'Py_FileSystemDefaultEncoding': 'runtime startup', + '_Py_StandardStreamErrors': 'runtime startup', + 'Py_FileSystemDefaultEncodeErrors': 'runtime startup', + 'Py_BytesWarningFlag': 'runtime startup', + 'Py_DebugFlag': 'runtime startup', + 'Py_DontWriteBytecodeFlag': 'runtime startup', + 'Py_FrozenFlag': 'runtime startup', + 'Py_HashRandomizationFlag': 'runtime startup', + 'Py_IgnoreEnvironmentFlag': 'runtime startup', + 'Py_InspectFlag': 'runtime startup', + 'Py_InteractiveFlag': 'runtime startup', + 'Py_IsolatedFlag': 'runtime startup', + 'Py_NoSiteFlag': 'runtime startup', + 'Py_NoUserSiteDirectory': 'runtime startup', + 'Py_OptimizeFlag': 'runtime startup', + 'Py_QuietFlag': 'runtime startup', + 'Py_UTF8Mode': 'runtime startup', + 'Py_UnbufferedStdioFlag': 'runtime startup', + 'Py_VerboseFlag': 'runtime startup', + '_Py_path_config': 'runtime startup', + '_PyOS_optarg': 'runtime startup', + '_PyOS_opterr': 'runtime startup', + '_PyOS_optind': 'runtime startup', + '_Py_HashSecret': 'runtime startup', + + # REPL + '_PyOS_ReadlineLock': 'repl', + '_PyOS_ReadlineTState': 'repl', + + # effectively const + 'tracemalloc_empty_traceback': 'const', + '_empty_bitmap_node': 'const', + 'posix_constants_pathconf': 'const', + 'posix_constants_confstr': 'const', + 'posix_constants_sysconf': 'const', + '_PySys_ImplCacheTag': 'const', + '_PySys_ImplName': 'const', + 'PyImport_Inittab': 'const', + '_PyImport_DynLoadFiletab': 'const', + '_PyParser_Grammar': 'const', + 'Py_hexdigits': 'const', + '_PyImport_Inittab': 'const', + '_PyByteArray_empty_string': 'const', + '_PyLong_DigitValue': 'const', + '_Py_SwappedOp': 'const', + 'PyStructSequence_UnnamedField': 'const', + + # signals are main-thread only + 'faulthandler_handlers': 'signals are main-thread only', + 'user_signals': 'signals are main-thread only', + 'wakeup': 'signals are main-thread only', + + # hacks + '_PySet_Dummy': 'only used as a placeholder', + } + +BENIGN = 'races here are benign and unlikely' + + +def is_supported(variable, ignored=None, known=None, *, + _ignored=(lambda *a, **k: _is_ignored(*a, **k)), + _vartype_okay=(lambda *a, **k: _is_vartype_okay(*a, **k)), + ): + """Return True if the given global variable is okay in CPython.""" + if _ignored(variable, + ignored and ignored.get('variables')): + return True + elif _vartype_okay(variable.vartype, + ignored.get('types')): + return True + else: + return False + + +def _is_ignored(variable, ignoredvars=None, *, + _IGNORED=IGNORED, + ): + """Return the reason if the variable is a supported global. + + Return None if the variable is not a supported global. + """ + if ignoredvars and (reason := ignoredvars.get(variable.id)): + return reason + + if variable.funcname is None: + if reason := _IGNORED.get(variable.name): + return reason + + # compiler + if variable.filename == 'Python/graminit.c': + if variable.vartype.startswith('static state '): + return 'compiler' + if variable.filename == 'Python/symtable.c': + if variable.vartype.startswith('static identifier '): + return 'compiler' + if variable.filename == 'Python/Python-ast.c': + # These should be const. + if variable.name.endswith('_field'): + return 'compiler' + if variable.name.endswith('_attribute'): + return 'compiler' + + # other + if variable.filename == 'Python/dtoa.c': + # guarded by lock? + if variable.name in ('p5s', 'freelist'): + return 'dtoa is thread-safe?' + if variable.name in ('private_mem', 'pmem_next'): + return 'dtoa is thread-safe?' + if variable.filename == 'Python/thread.c': + # Threads do not become an issue until after these have been set + # and these never get changed after that. + if variable.name in ('initialized', 'thread_debug'): + return 'thread-safe' + if variable.filename == 'Python/getversion.c': + if variable.name == 'version': + # Races are benign here, as well as unlikely. + return BENIGN + if variable.filename == 'Python/fileutils.c': + if variable.name == 'force_ascii': + return BENIGN + if variable.name == 'ioctl_works': + return BENIGN + if variable.name == '_Py_open_cloexec_works': + return BENIGN + if variable.filename == 'Python/codecs.c': + if variable.name == 'ucnhash_CAPI': + return BENIGN + if variable.filename == 'Python/bootstrap_hash.c': + if variable.name == 'getrandom_works': + return BENIGN + if variable.filename == 'Objects/unicodeobject.c': + if variable.name == 'ucnhash_CAPI': + return BENIGN + if variable.name == 'bloom_linebreak': + # *mostly* benign + return BENIGN + if variable.filename == 'Modules/getbuildinfo.c': + if variable.name == 'buildinfo': + # The static is used for pre-allocation. + return BENIGN + if variable.filename == 'Modules/posixmodule.c': + if variable.name == 'ticks_per_second': + return BENIGN + if variable.name == 'dup3_works': + return BENIGN + if variable.filename == 'Modules/timemodule.c': + if variable.name == 'ticks_per_second': + return BENIGN + if variable.filename == 'Objects/longobject.c': + if variable.name == 'log_base_BASE': + return BENIGN + if variable.name == 'convwidth_base': + return BENIGN + if variable.name == 'convmultmax_base': + return BENIGN + + return None + + +def _is_vartype_okay(vartype, ignoredtypes=None): + if _is_object(vartype): + return None + + if vartype.startswith('static const '): + return 'const' + if vartype.startswith('const '): + return 'const' + + # components for TypeObject definitions + for name in ('PyMethodDef', 'PyGetSetDef', 'PyMemberDef'): + if name in vartype: + return 'const' + for name in ('PyNumberMethods', 'PySequenceMethods', 'PyMappingMethods', + 'PyBufferProcs', 'PyAsyncMethods'): + if name in vartype: + return 'const' + for name in ('slotdef', 'newfunc'): + if name in vartype: + return 'const' + + # structseq + for name in ('PyStructSequence_Desc', 'PyStructSequence_Field'): + if name in vartype: + return 'const' + + # other definiitions + if 'PyModuleDef' in vartype: + return 'const' + + # thread-safe + if '_Py_atomic_int' in vartype: + return 'thread-safe' + if 'pthread_condattr_t' in vartype: + return 'thread-safe' + + # startup + if '_Py_PreInitEntry' in vartype: + return 'startup' + + # global +# if 'PyMemAllocatorEx' in vartype: +# return True + + # others +# if 'PyThread_type_lock' in vartype: +# return True + + # XXX ??? + # _Py_tss_t + # _Py_hashtable_t + # stack_t + # _PyUnicode_Name_CAPI + + # functions + if '(' in vartype and '[' not in vartype: + return 'function pointer' + + # XXX finish! + # * allow const values? + #raise NotImplementedError + return None + + +PYOBJECT_RE = re.compile(r''' + ^ + ( + # must start with "static " + static \s+ + ( + identifier + ) + \b + ) | + ( + # may start with "static " + ( static \s+ )? + ( + .* + ( + PyObject | + PyTypeObject | + _? Py \w+ Object | + _PyArg_Parser | + _Py_Identifier | + traceback_t | + PyAsyncGenASend | + _PyAsyncGenWrappedValue | + PyContext | + method_cache_entry + ) + \b + ) | + ( + ( + _Py_IDENTIFIER | + _Py_static_string + ) + [(] + ) + ) + ''', re.VERBOSE) + + +def _is_object(vartype): + if 'PyDictKeysObject' in vartype: + return False + if PYOBJECT_RE.match(vartype): + return True + if vartype.endswith((' _Py_FalseStruct', ' _Py_TrueStruct')): + return True + + # XXX Add more? + + #for part in vartype.split(): + # # XXX const is automatic True? + # if part == 'PyObject' or part.startswith('PyObject['): + # return True + return False + + +def ignored_from_file(infile, *, + _read_tsv=read_tsv, + ): + """Yield a Variable for each ignored var in the file.""" + ignored = { + 'variables': {}, + #'types': {}, + #'constants': {}, + #'macros': {}, + } + for row in _read_tsv(infile, IGNORED_HEADER): + filename, funcname, name, kind, reason = row + if not funcname or funcname == '-': + funcname = None + id = ID(filename, funcname, name) + if kind == 'variable': + values = ignored['variables'] + else: + raise ValueError(f'unsupported kind in row {row}') + values[id] = reason + return ignored + + +################################## +# generate + +def _get_row(varid, reason): + return ( + varid.filename, + varid.funcname or '-', + varid.name, + 'variable', + str(reason), + ) + + +def _get_rows(variables, ignored=None, *, + _as_row=_get_row, + _is_ignored=_is_ignored, + _vartype_okay=_is_vartype_okay, + ): + count = 0 + for variable in variables: + reason = _is_ignored(variable, + ignored and ignored.get('variables'), + ) + if not reason: + reason = _vartype_okay(variable.vartype, + ignored and ignored.get('types')) + if not reason: + continue + + print(' ', variable, repr(reason)) + yield _as_row(variable.id, reason) + count += 1 + print(f'total: {count}') + + +def _generate_ignored_file(variables, filename=None, *, + _generate_rows=_get_rows, + _write_tsv=write_tsv, + ): + if not filename: + filename = IGNORED_FILE + '.new' + rows = _generate_rows(variables) + _write_tsv(filename, IGNORED_HEADER, rows) + + +if __name__ == '__main__': + from cpython import SOURCE_DIRS + from cpython.known import ( + from_file as known_from_file, + DATA_FILE as KNOWN_FILE, + ) + # XXX This is wrong! + from . import find + known = known_from_file(KNOWN_FILE) + knownvars = (known or {}).get('variables') + variables = find.globals_from_binary(knownvars=knownvars, + dirnames=SOURCE_DIRS) + + _generate_ignored_file(variables) -- cgit v0.12