From 7ab9efdd6a2fb21cddca1ccd70175f1ac6bd9168 Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Thu, 28 Dec 2023 00:20:57 +0100 Subject: gh-113299: Move cpp.py into libclinic (#113526) --- Lib/test/test_clinic.py | 2 +- Tools/clinic/clinic.py | 4 +- Tools/clinic/cpp.py | 191 ----------------------------------------- Tools/clinic/libclinic/cpp.py | 194 ++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 197 insertions(+), 194 deletions(-) delete mode 100644 Tools/clinic/cpp.py create mode 100644 Tools/clinic/libclinic/cpp.py diff --git a/Lib/test/test_clinic.py b/Lib/test/test_clinic.py index 3d6816d..7323bdd 100644 --- a/Lib/test/test_clinic.py +++ b/Lib/test/test_clinic.py @@ -3920,7 +3920,7 @@ class ClinicReprTests(unittest.TestCase): self.assertEqual(repr(parameter), "") def test_Monitor_repr(self): - monitor = clinic.cpp.Monitor("test.c") + monitor = libclinic.cpp.Monitor("test.c") self.assertRegex(repr(monitor), r"") monitor.line_number = 42 diff --git a/Tools/clinic/clinic.py b/Tools/clinic/clinic.py index 82efff5..f6f9558 100755 --- a/Tools/clinic/clinic.py +++ b/Tools/clinic/clinic.py @@ -13,7 +13,6 @@ import builtins as bltns import collections import contextlib import copy -import cpp import dataclasses as dc import enum import functools @@ -53,6 +52,7 @@ from typing import ( # Local imports. import libclinic +import libclinic.cpp from libclinic import ClinicError @@ -648,7 +648,7 @@ class CLanguage(Language): def __init__(self, filename: str) -> None: super().__init__(filename) - self.cpp = cpp.Monitor(filename) + self.cpp = libclinic.cpp.Monitor(filename) def parse_line(self, line: str) -> None: self.cpp.writeline(line) diff --git a/Tools/clinic/cpp.py b/Tools/clinic/cpp.py deleted file mode 100644 index 6590990..0000000 --- a/Tools/clinic/cpp.py +++ /dev/null @@ -1,191 +0,0 @@ -import dataclasses as dc -import re -import sys -from typing import NoReturn - -from libclinic.errors import ParseError - - -TokenAndCondition = tuple[str, str] -TokenStack = list[TokenAndCondition] - -def negate(condition: str) -> str: - """ - Returns a CPP conditional that is the opposite of the conditional passed in. - """ - if condition.startswith('!'): - return condition[1:] - return "!" + condition - - -is_a_simple_defined = re.compile(r'^defined\s*\(\s*[A-Za-z0-9_]+\s*\)$').match - - -@dc.dataclass(repr=False) -class Monitor: - """ - A simple C preprocessor that scans C source and computes, line by line, - what the current C preprocessor #if state is. - - Doesn't handle everything--for example, if you have /* inside a C string, - without a matching */ (also inside a C string), or with a */ inside a C - string but on another line and with preprocessor macros in between... - the parser will get lost. - - Anyway this implementation seems to work well enough for the CPython sources. - """ - filename: str - _: dc.KW_ONLY - verbose: bool = False - - def __post_init__(self) -> None: - self.stack: TokenStack = [] - self.in_comment = False - self.continuation: str | None = None - self.line_number = 0 - - def __repr__(self) -> str: - parts = ( - str(id(self)), - f"line={self.line_number}", - f"condition={self.condition()!r}" - ) - return f"" - - def status(self) -> str: - return str(self.line_number).rjust(4) + ": " + self.condition() - - def condition(self) -> str: - """ - Returns the current preprocessor state, as a single #if condition. - """ - return " && ".join(condition for token, condition in self.stack) - - def fail(self, msg: str) -> NoReturn: - raise ParseError(msg, filename=self.filename, lineno=self.line_number) - - def writeline(self, line: str) -> None: - self.line_number += 1 - line = line.strip() - - def pop_stack() -> TokenAndCondition: - if not self.stack: - self.fail(f"#{token} without matching #if / #ifdef / #ifndef!") - return self.stack.pop() - - if self.continuation: - line = self.continuation + line - self.continuation = None - - if not line: - return - - if line.endswith('\\'): - self.continuation = line[:-1].rstrip() + " " - return - - # we have to ignore preprocessor commands inside comments - # - # we also have to handle this: - # /* start - # ... - # */ /* <-- tricky! - # ... - # */ - # and this: - # /* start - # ... - # */ /* also tricky! */ - if self.in_comment: - if '*/' in line: - # snip out the comment and continue - # - # GCC allows - # /* comment - # */ #include - # maybe other compilers too? - _, _, line = line.partition('*/') - self.in_comment = False - - while True: - if '/*' in line: - if self.in_comment: - self.fail("Nested block comment!") - - before, _, remainder = line.partition('/*') - comment, comment_ends, after = remainder.partition('*/') - if comment_ends: - # snip out the comment - line = before.rstrip() + ' ' + after.lstrip() - continue - # comment continues to eol - self.in_comment = True - line = before.rstrip() - break - - # we actually have some // comments - # (but block comments take precedence) - before, line_comment, comment = line.partition('//') - if line_comment: - line = before.rstrip() - - if not line.startswith('#'): - return - - line = line[1:].lstrip() - assert line - - fields = line.split() - token = fields[0].lower() - condition = ' '.join(fields[1:]).strip() - - if token in {'if', 'ifdef', 'ifndef', 'elif'}: - if not condition: - self.fail(f"Invalid format for #{token} line: no argument!") - if token in {'if', 'elif'}: - if not is_a_simple_defined(condition): - condition = "(" + condition + ")" - if token == 'elif': - previous_token, previous_condition = pop_stack() - self.stack.append((previous_token, negate(previous_condition))) - else: - fields = condition.split() - if len(fields) != 1: - self.fail(f"Invalid format for #{token} line: " - "should be exactly one argument!") - symbol = fields[0] - condition = 'defined(' + symbol + ')' - if token == 'ifndef': - condition = '!' + condition - token = 'if' - - self.stack.append((token, condition)) - - elif token == 'else': - previous_token, previous_condition = pop_stack() - self.stack.append((previous_token, negate(previous_condition))) - - elif token == 'endif': - while pop_stack()[0] != 'if': - pass - - else: - return - - if self.verbose: - print(self.status()) - - -def _main(filenames: list[str] | None = None) -> None: - filenames = filenames or sys.argv[1:] - for filename in filenames: - with open(filename) as f: - cpp = Monitor(filename, verbose=True) - print() - print(filename) - for line in f: - cpp.writeline(line) - - -if __name__ == '__main__': - _main() diff --git a/Tools/clinic/libclinic/cpp.py b/Tools/clinic/libclinic/cpp.py new file mode 100644 index 0000000..e115d65 --- /dev/null +++ b/Tools/clinic/libclinic/cpp.py @@ -0,0 +1,194 @@ +import dataclasses as dc +import re +import sys +from typing import NoReturn + +from .errors import ParseError + + +__all__ = ["Monitor"] + + +TokenAndCondition = tuple[str, str] +TokenStack = list[TokenAndCondition] + +def negate(condition: str) -> str: + """ + Returns a CPP conditional that is the opposite of the conditional passed in. + """ + if condition.startswith('!'): + return condition[1:] + return "!" + condition + + +is_a_simple_defined = re.compile(r'^defined\s*\(\s*[A-Za-z0-9_]+\s*\)$').match + + +@dc.dataclass(repr=False) +class Monitor: + """ + A simple C preprocessor that scans C source and computes, line by line, + what the current C preprocessor #if state is. + + Doesn't handle everything--for example, if you have /* inside a C string, + without a matching */ (also inside a C string), or with a */ inside a C + string but on another line and with preprocessor macros in between... + the parser will get lost. + + Anyway this implementation seems to work well enough for the CPython sources. + """ + filename: str + _: dc.KW_ONLY + verbose: bool = False + + def __post_init__(self) -> None: + self.stack: TokenStack = [] + self.in_comment = False + self.continuation: str | None = None + self.line_number = 0 + + def __repr__(self) -> str: + parts = ( + str(id(self)), + f"line={self.line_number}", + f"condition={self.condition()!r}" + ) + return f"" + + def status(self) -> str: + return str(self.line_number).rjust(4) + ": " + self.condition() + + def condition(self) -> str: + """ + Returns the current preprocessor state, as a single #if condition. + """ + return " && ".join(condition for token, condition in self.stack) + + def fail(self, msg: str) -> NoReturn: + raise ParseError(msg, filename=self.filename, lineno=self.line_number) + + def writeline(self, line: str) -> None: + self.line_number += 1 + line = line.strip() + + def pop_stack() -> TokenAndCondition: + if not self.stack: + self.fail(f"#{token} without matching #if / #ifdef / #ifndef!") + return self.stack.pop() + + if self.continuation: + line = self.continuation + line + self.continuation = None + + if not line: + return + + if line.endswith('\\'): + self.continuation = line[:-1].rstrip() + " " + return + + # we have to ignore preprocessor commands inside comments + # + # we also have to handle this: + # /* start + # ... + # */ /* <-- tricky! + # ... + # */ + # and this: + # /* start + # ... + # */ /* also tricky! */ + if self.in_comment: + if '*/' in line: + # snip out the comment and continue + # + # GCC allows + # /* comment + # */ #include + # maybe other compilers too? + _, _, line = line.partition('*/') + self.in_comment = False + + while True: + if '/*' in line: + if self.in_comment: + self.fail("Nested block comment!") + + before, _, remainder = line.partition('/*') + comment, comment_ends, after = remainder.partition('*/') + if comment_ends: + # snip out the comment + line = before.rstrip() + ' ' + after.lstrip() + continue + # comment continues to eol + self.in_comment = True + line = before.rstrip() + break + + # we actually have some // comments + # (but block comments take precedence) + before, line_comment, comment = line.partition('//') + if line_comment: + line = before.rstrip() + + if not line.startswith('#'): + return + + line = line[1:].lstrip() + assert line + + fields = line.split() + token = fields[0].lower() + condition = ' '.join(fields[1:]).strip() + + if token in {'if', 'ifdef', 'ifndef', 'elif'}: + if not condition: + self.fail(f"Invalid format for #{token} line: no argument!") + if token in {'if', 'elif'}: + if not is_a_simple_defined(condition): + condition = "(" + condition + ")" + if token == 'elif': + previous_token, previous_condition = pop_stack() + self.stack.append((previous_token, negate(previous_condition))) + else: + fields = condition.split() + if len(fields) != 1: + self.fail(f"Invalid format for #{token} line: " + "should be exactly one argument!") + symbol = fields[0] + condition = 'defined(' + symbol + ')' + if token == 'ifndef': + condition = '!' + condition + token = 'if' + + self.stack.append((token, condition)) + + elif token == 'else': + previous_token, previous_condition = pop_stack() + self.stack.append((previous_token, negate(previous_condition))) + + elif token == 'endif': + while pop_stack()[0] != 'if': + pass + + else: + return + + if self.verbose: + print(self.status()) + + +def _main(filenames: list[str] | None = None) -> None: + filenames = filenames or sys.argv[1:] + for filename in filenames: + with open(filename) as f: + cpp = Monitor(filename, verbose=True) + print() + print(filename) + for line in f: + cpp.writeline(line) + + +if __name__ == '__main__': + _main() -- cgit v0.12