summaryrefslogtreecommitdiffstats
path: root/Tools/clinic
diff options
context:
space:
mode:
authorVictor Stinner <vstinner@python.org>2024-03-14 16:11:39 (GMT)
committerGitHub <noreply@github.com>2024-03-14 16:11:39 (GMT)
commitb54d7c87aaf23fbd67171d0dd3e4f4ab736e6a48 (patch)
treec8f7b54508e5a8c89b331e9e6b96596817000226 /Tools/clinic
parentbae6579b46df50dee4dbb77ea242270d27cd0c9d (diff)
downloadcpython-b54d7c87aaf23fbd67171d0dd3e4f4ab736e6a48.zip
cpython-b54d7c87aaf23fbd67171d0dd3e4f4ab736e6a48.tar.gz
cpython-b54d7c87aaf23fbd67171d0dd3e4f4ab736e6a48.tar.bz2
gh-113317, AC: Add libclinic.block_parser module (#116819)
* Move Block and BlockParser classes to a new libclinic.block_parser module. * Move Language and PythonLanguage classes to a new libclinic.language module.
Diffstat (limited to 'Tools/clinic')
-rwxr-xr-xTools/clinic/clinic.py338
-rw-r--r--Tools/clinic/libclinic/block_parser.py256
-rw-r--r--Tools/clinic/libclinic/language.py103
3 files changed, 361 insertions, 336 deletions
diff --git a/Tools/clinic/clinic.py b/Tools/clinic/clinic.py
index 6488d91..ac20586 100755
--- a/Tools/clinic/clinic.py
+++ b/Tools/clinic/clinic.py
@@ -6,11 +6,9 @@
#
from __future__ import annotations
-import abc
import argparse
import ast
import builtins as bltns
-import collections
import contextlib
import dataclasses as dc
import enum
@@ -57,6 +55,8 @@ from libclinic.function import (
ClassDict, ModuleDict, FunctionKind,
CALLABLE, STATIC_METHOD, CLASS_METHOD, METHOD_INIT, METHOD_NEW,
GETTER, SETTER)
+from libclinic.language import Language, PythonLanguage
+from libclinic.block_parser import Block, BlockParser
# TODO:
@@ -144,96 +144,6 @@ class CRenderData:
self.unlock: list[str] = []
-class Language(metaclass=abc.ABCMeta):
-
- start_line = ""
- body_prefix = ""
- stop_line = ""
- checksum_line = ""
-
- def __init__(self, filename: str) -> None:
- self.filename = filename
-
- @abc.abstractmethod
- def render(
- self,
- clinic: Clinic,
- signatures: Iterable[Module | Class | Function]
- ) -> str:
- ...
-
- def parse_line(self, line: str) -> None:
- ...
-
- def validate(self) -> None:
- def assert_only_one(
- attr: str,
- *additional_fields: str
- ) -> None:
- """
- Ensures that the string found at getattr(self, attr)
- contains exactly one formatter replacement string for
- each valid field. The list of valid fields is
- ['dsl_name'] extended by additional_fields.
-
- e.g.
- self.fmt = "{dsl_name} {a} {b}"
-
- # this passes
- self.assert_only_one('fmt', 'a', 'b')
-
- # this fails, the format string has a {b} in it
- self.assert_only_one('fmt', 'a')
-
- # this fails, the format string doesn't have a {c} in it
- self.assert_only_one('fmt', 'a', 'b', 'c')
-
- # this fails, the format string has two {a}s in it,
- # it must contain exactly one
- self.fmt2 = '{dsl_name} {a} {a}'
- self.assert_only_one('fmt2', 'a')
-
- """
- fields = ['dsl_name']
- fields.extend(additional_fields)
- line: str = getattr(self, attr)
- fcf = libclinic.FormatCounterFormatter()
- fcf.format(line)
- def local_fail(should_be_there_but_isnt: bool) -> None:
- if should_be_there_but_isnt:
- fail("{} {} must contain {{{}}} exactly once!".format(
- self.__class__.__name__, attr, name))
- else:
- fail("{} {} must not contain {{{}}}!".format(
- self.__class__.__name__, attr, name))
-
- for name, count in fcf.counts.items():
- if name in fields:
- if count > 1:
- local_fail(True)
- else:
- local_fail(False)
- for name in fields:
- if fcf.counts.get(name) != 1:
- local_fail(True)
-
- assert_only_one('start_line')
- assert_only_one('stop_line')
-
- field = "arguments" if "{arguments}" in self.checksum_line else "checksum"
- assert_only_one('checksum_line', field)
-
-
-
-class PythonLanguage(Language):
-
- language = 'Python'
- start_line = "#/*[{dsl_name} input]"
- body_prefix = "#"
- stop_line = "#[{dsl_name} start generated code]*/"
- checksum_line = "#/*[{dsl_name} end generated code: {arguments}]*/"
-
-
ParamTuple = tuple["Parameter", ...]
@@ -1646,250 +1556,6 @@ class CLanguage(Language):
return clinic.get_destination('block').dump()
-@dc.dataclass(slots=True, repr=False)
-class Block:
- r"""
- Represents a single block of text embedded in
- another file. If dsl_name is None, the block represents
- verbatim text, raw original text from the file, in
- which case "input" will be the only non-false member.
- If dsl_name is not None, the block represents a Clinic
- block.
-
- input is always str, with embedded \n characters.
- input represents the original text from the file;
- if it's a Clinic block, it is the original text with
- the body_prefix and redundant leading whitespace removed.
-
- dsl_name is either str or None. If str, it's the text
- found on the start line of the block between the square
- brackets.
-
- signatures is a list.
- It may only contain clinic.Module, clinic.Class, and
- clinic.Function objects. At the moment it should
- contain at most one of each.
-
- output is either str or None. If str, it's the output
- from this block, with embedded '\n' characters.
-
- indent is a str. It's the leading whitespace
- that was found on every line of input. (If body_prefix is
- not empty, this is the indent *after* removing the
- body_prefix.)
-
- "indent" is different from the concept of "preindent"
- (which is not stored as state on Block objects).
- "preindent" is the whitespace that
- was found in front of every line of input *before* the
- "body_prefix" (see the Language object). If body_prefix
- is empty, preindent must always be empty too.
-
- To illustrate the difference between "indent" and "preindent":
-
- Assume that '_' represents whitespace.
- If the block processed was in a Python file, and looked like this:
- ____#/*[python]
- ____#__for a in range(20):
- ____#____print(a)
- ____#[python]*/
- "preindent" would be "____" and "indent" would be "__".
-
- """
- input: str
- dsl_name: str | None = None
- signatures: list[Module | Class | Function] = dc.field(default_factory=list)
- output: Any = None # TODO: Very dynamic; probably untypeable in its current form?
- indent: str = ''
-
- def __repr__(self) -> str:
- dsl_name = self.dsl_name or "text"
- def summarize(s: object) -> str:
- s = repr(s)
- if len(s) > 30:
- return s[:26] + "..." + s[0]
- return s
- parts = (
- repr(dsl_name),
- f"input={summarize(self.input)}",
- f"output={summarize(self.output)}"
- )
- return f"<clinic.Block {' '.join(parts)}>"
-
-
-class BlockParser:
- """
- Block-oriented parser for Argument Clinic.
- Iterator, yields Block objects.
- """
-
- def __init__(
- self,
- input: str,
- language: Language,
- *,
- verify: bool = True
- ) -> None:
- """
- "input" should be a str object
- with embedded \n characters.
-
- "language" should be a Language object.
- """
- language.validate()
-
- self.input = collections.deque(reversed(input.splitlines(keepends=True)))
- self.block_start_line_number = self.line_number = 0
-
- self.language = language
- before, _, after = language.start_line.partition('{dsl_name}')
- assert _ == '{dsl_name}'
- self.find_start_re = libclinic.create_regex(before, after,
- whole_line=False)
- self.start_re = libclinic.create_regex(before, after)
- self.verify = verify
- self.last_checksum_re: re.Pattern[str] | None = None
- self.last_dsl_name: str | None = None
- self.dsl_name: str | None = None
- self.first_block = True
-
- def __iter__(self) -> BlockParser:
- return self
-
- def __next__(self) -> Block:
- while True:
- if not self.input:
- raise StopIteration
-
- if self.dsl_name:
- try:
- return_value = self.parse_clinic_block(self.dsl_name)
- except ClinicError as exc:
- exc.filename = self.language.filename
- exc.lineno = self.line_number
- raise
- self.dsl_name = None
- self.first_block = False
- return return_value
- block = self.parse_verbatim_block()
- if self.first_block and not block.input:
- continue
- self.first_block = False
- return block
-
-
- def is_start_line(self, line: str) -> str | None:
- match = self.start_re.match(line.lstrip())
- return match.group(1) if match else None
-
- def _line(self, lookahead: bool = False) -> str:
- self.line_number += 1
- line = self.input.pop()
- if not lookahead:
- self.language.parse_line(line)
- return line
-
- def parse_verbatim_block(self) -> Block:
- lines = []
- self.block_start_line_number = self.line_number
-
- while self.input:
- line = self._line()
- dsl_name = self.is_start_line(line)
- if dsl_name:
- self.dsl_name = dsl_name
- break
- lines.append(line)
-
- return Block("".join(lines))
-
- def parse_clinic_block(self, dsl_name: str) -> Block:
- in_lines = []
- self.block_start_line_number = self.line_number + 1
- stop_line = self.language.stop_line.format(dsl_name=dsl_name)
- body_prefix = self.language.body_prefix.format(dsl_name=dsl_name)
-
- def is_stop_line(line: str) -> bool:
- # make sure to recognize stop line even if it
- # doesn't end with EOL (it could be the very end of the file)
- if line.startswith(stop_line):
- remainder = line.removeprefix(stop_line)
- if remainder and not remainder.isspace():
- fail(f"Garbage after stop line: {remainder!r}")
- return True
- else:
- # gh-92256: don't allow incorrectly formatted stop lines
- if line.lstrip().startswith(stop_line):
- fail(f"Whitespace is not allowed before the stop line: {line!r}")
- return False
-
- # consume body of program
- while self.input:
- line = self._line()
- if is_stop_line(line) or self.is_start_line(line):
- break
- if body_prefix:
- line = line.lstrip()
- assert line.startswith(body_prefix)
- line = line.removeprefix(body_prefix)
- in_lines.append(line)
-
- # consume output and checksum line, if present.
- if self.last_dsl_name == dsl_name:
- checksum_re = self.last_checksum_re
- else:
- before, _, after = self.language.checksum_line.format(dsl_name=dsl_name, arguments='{arguments}').partition('{arguments}')
- assert _ == '{arguments}'
- checksum_re = libclinic.create_regex(before, after, word=False)
- self.last_dsl_name = dsl_name
- self.last_checksum_re = checksum_re
- assert checksum_re is not None
-
- # scan forward for checksum line
- out_lines = []
- arguments = None
- while self.input:
- line = self._line(lookahead=True)
- match = checksum_re.match(line.lstrip())
- arguments = match.group(1) if match else None
- if arguments:
- break
- out_lines.append(line)
- if self.is_start_line(line):
- break
-
- output: str | None
- output = "".join(out_lines)
- if arguments:
- d = {}
- for field in shlex.split(arguments):
- name, equals, value = field.partition('=')
- if not equals:
- fail(f"Mangled Argument Clinic marker line: {line!r}")
- d[name.strip()] = value.strip()
-
- if self.verify:
- if 'input' in d:
- checksum = d['output']
- else:
- checksum = d['checksum']
-
- computed = libclinic.compute_checksum(output, len(checksum))
- if checksum != computed:
- fail("Checksum mismatch! "
- f"Expected {checksum!r}, computed {computed!r}. "
- "Suggested fix: remove all generated code including "
- "the end marker, or use the '-f' option.")
- else:
- # put back output
- output_lines = output.splitlines(keepends=True)
- self.line_number -= len(output_lines)
- self.input.extend(reversed(output_lines))
- output = None
-
- return Block("".join(in_lines), dsl_name, output=output)
-
-
@dc.dataclass(slots=True, frozen=True)
class Include:
"""
diff --git a/Tools/clinic/libclinic/block_parser.py b/Tools/clinic/libclinic/block_parser.py
new file mode 100644
index 0000000..4c0198b
--- /dev/null
+++ b/Tools/clinic/libclinic/block_parser.py
@@ -0,0 +1,256 @@
+from __future__ import annotations
+import collections
+import dataclasses as dc
+import re
+import shlex
+from typing import Any
+
+import libclinic
+from libclinic import fail, ClinicError
+from libclinic.language import Language
+from libclinic.function import (
+ Module, Class, Function)
+
+
+@dc.dataclass(slots=True, repr=False)
+class Block:
+ r"""
+ Represents a single block of text embedded in
+ another file. If dsl_name is None, the block represents
+ verbatim text, raw original text from the file, in
+ which case "input" will be the only non-false member.
+ If dsl_name is not None, the block represents a Clinic
+ block.
+
+ input is always str, with embedded \n characters.
+ input represents the original text from the file;
+ if it's a Clinic block, it is the original text with
+ the body_prefix and redundant leading whitespace removed.
+
+ dsl_name is either str or None. If str, it's the text
+ found on the start line of the block between the square
+ brackets.
+
+ signatures is a list.
+ It may only contain clinic.Module, clinic.Class, and
+ clinic.Function objects. At the moment it should
+ contain at most one of each.
+
+ output is either str or None. If str, it's the output
+ from this block, with embedded '\n' characters.
+
+ indent is a str. It's the leading whitespace
+ that was found on every line of input. (If body_prefix is
+ not empty, this is the indent *after* removing the
+ body_prefix.)
+
+ "indent" is different from the concept of "preindent"
+ (which is not stored as state on Block objects).
+ "preindent" is the whitespace that
+ was found in front of every line of input *before* the
+ "body_prefix" (see the Language object). If body_prefix
+ is empty, preindent must always be empty too.
+
+ To illustrate the difference between "indent" and "preindent":
+
+ Assume that '_' represents whitespace.
+ If the block processed was in a Python file, and looked like this:
+ ____#/*[python]
+ ____#__for a in range(20):
+ ____#____print(a)
+ ____#[python]*/
+ "preindent" would be "____" and "indent" would be "__".
+
+ """
+ input: str
+ dsl_name: str | None = None
+ signatures: list[Module | Class | Function] = dc.field(default_factory=list)
+ output: Any = None # TODO: Very dynamic; probably untypeable in its current form?
+ indent: str = ''
+
+ def __repr__(self) -> str:
+ dsl_name = self.dsl_name or "text"
+ def summarize(s: object) -> str:
+ s = repr(s)
+ if len(s) > 30:
+ return s[:26] + "..." + s[0]
+ return s
+ parts = (
+ repr(dsl_name),
+ f"input={summarize(self.input)}",
+ f"output={summarize(self.output)}"
+ )
+ return f"<clinic.Block {' '.join(parts)}>"
+
+
+class BlockParser:
+ """
+ Block-oriented parser for Argument Clinic.
+ Iterator, yields Block objects.
+ """
+
+ def __init__(
+ self,
+ input: str,
+ language: Language,
+ *,
+ verify: bool = True
+ ) -> None:
+ """
+ "input" should be a str object
+ with embedded \n characters.
+
+ "language" should be a Language object.
+ """
+ language.validate()
+
+ self.input = collections.deque(reversed(input.splitlines(keepends=True)))
+ self.block_start_line_number = self.line_number = 0
+
+ self.language = language
+ before, _, after = language.start_line.partition('{dsl_name}')
+ assert _ == '{dsl_name}'
+ self.find_start_re = libclinic.create_regex(before, after,
+ whole_line=False)
+ self.start_re = libclinic.create_regex(before, after)
+ self.verify = verify
+ self.last_checksum_re: re.Pattern[str] | None = None
+ self.last_dsl_name: str | None = None
+ self.dsl_name: str | None = None
+ self.first_block = True
+
+ def __iter__(self) -> BlockParser:
+ return self
+
+ def __next__(self) -> Block:
+ while True:
+ if not self.input:
+ raise StopIteration
+
+ if self.dsl_name:
+ try:
+ return_value = self.parse_clinic_block(self.dsl_name)
+ except ClinicError as exc:
+ exc.filename = self.language.filename
+ exc.lineno = self.line_number
+ raise
+ self.dsl_name = None
+ self.first_block = False
+ return return_value
+ block = self.parse_verbatim_block()
+ if self.first_block and not block.input:
+ continue
+ self.first_block = False
+ return block
+
+
+ def is_start_line(self, line: str) -> str | None:
+ match = self.start_re.match(line.lstrip())
+ return match.group(1) if match else None
+
+ def _line(self, lookahead: bool = False) -> str:
+ self.line_number += 1
+ line = self.input.pop()
+ if not lookahead:
+ self.language.parse_line(line)
+ return line
+
+ def parse_verbatim_block(self) -> Block:
+ lines = []
+ self.block_start_line_number = self.line_number
+
+ while self.input:
+ line = self._line()
+ dsl_name = self.is_start_line(line)
+ if dsl_name:
+ self.dsl_name = dsl_name
+ break
+ lines.append(line)
+
+ return Block("".join(lines))
+
+ def parse_clinic_block(self, dsl_name: str) -> Block:
+ in_lines = []
+ self.block_start_line_number = self.line_number + 1
+ stop_line = self.language.stop_line.format(dsl_name=dsl_name)
+ body_prefix = self.language.body_prefix.format(dsl_name=dsl_name)
+
+ def is_stop_line(line: str) -> bool:
+ # make sure to recognize stop line even if it
+ # doesn't end with EOL (it could be the very end of the file)
+ if line.startswith(stop_line):
+ remainder = line.removeprefix(stop_line)
+ if remainder and not remainder.isspace():
+ fail(f"Garbage after stop line: {remainder!r}")
+ return True
+ else:
+ # gh-92256: don't allow incorrectly formatted stop lines
+ if line.lstrip().startswith(stop_line):
+ fail(f"Whitespace is not allowed before the stop line: {line!r}")
+ return False
+
+ # consume body of program
+ while self.input:
+ line = self._line()
+ if is_stop_line(line) or self.is_start_line(line):
+ break
+ if body_prefix:
+ line = line.lstrip()
+ assert line.startswith(body_prefix)
+ line = line.removeprefix(body_prefix)
+ in_lines.append(line)
+
+ # consume output and checksum line, if present.
+ if self.last_dsl_name == dsl_name:
+ checksum_re = self.last_checksum_re
+ else:
+ before, _, after = self.language.checksum_line.format(dsl_name=dsl_name, arguments='{arguments}').partition('{arguments}')
+ assert _ == '{arguments}'
+ checksum_re = libclinic.create_regex(before, after, word=False)
+ self.last_dsl_name = dsl_name
+ self.last_checksum_re = checksum_re
+ assert checksum_re is not None
+
+ # scan forward for checksum line
+ out_lines = []
+ arguments = None
+ while self.input:
+ line = self._line(lookahead=True)
+ match = checksum_re.match(line.lstrip())
+ arguments = match.group(1) if match else None
+ if arguments:
+ break
+ out_lines.append(line)
+ if self.is_start_line(line):
+ break
+
+ output: str | None
+ output = "".join(out_lines)
+ if arguments:
+ d = {}
+ for field in shlex.split(arguments):
+ name, equals, value = field.partition('=')
+ if not equals:
+ fail(f"Mangled Argument Clinic marker line: {line!r}")
+ d[name.strip()] = value.strip()
+
+ if self.verify:
+ if 'input' in d:
+ checksum = d['output']
+ else:
+ checksum = d['checksum']
+
+ computed = libclinic.compute_checksum(output, len(checksum))
+ if checksum != computed:
+ fail("Checksum mismatch! "
+ f"Expected {checksum!r}, computed {computed!r}. "
+ "Suggested fix: remove all generated code including "
+ "the end marker, or use the '-f' option.")
+ else:
+ # put back output
+ output_lines = output.splitlines(keepends=True)
+ self.line_number -= len(output_lines)
+ self.input.extend(reversed(output_lines))
+ output = None
+
+ return Block("".join(in_lines), dsl_name, output=output)
diff --git a/Tools/clinic/libclinic/language.py b/Tools/clinic/libclinic/language.py
new file mode 100644
index 0000000..a90a9bb
--- /dev/null
+++ b/Tools/clinic/libclinic/language.py
@@ -0,0 +1,103 @@
+from __future__ import annotations
+import abc
+import typing
+from collections.abc import (
+ Iterable,
+)
+
+import libclinic
+from libclinic import fail
+from libclinic.function import (
+ Module, Class, Function)
+
+if typing.TYPE_CHECKING:
+ from clinic import Clinic
+
+
+class Language(metaclass=abc.ABCMeta):
+
+ start_line = ""
+ body_prefix = ""
+ stop_line = ""
+ checksum_line = ""
+
+ def __init__(self, filename: str) -> None:
+ self.filename = filename
+
+ @abc.abstractmethod
+ def render(
+ self,
+ clinic: Clinic,
+ signatures: Iterable[Module | Class | Function]
+ ) -> str:
+ ...
+
+ def parse_line(self, line: str) -> None:
+ ...
+
+ def validate(self) -> None:
+ def assert_only_one(
+ attr: str,
+ *additional_fields: str
+ ) -> None:
+ """
+ Ensures that the string found at getattr(self, attr)
+ contains exactly one formatter replacement string for
+ each valid field. The list of valid fields is
+ ['dsl_name'] extended by additional_fields.
+
+ e.g.
+ self.fmt = "{dsl_name} {a} {b}"
+
+ # this passes
+ self.assert_only_one('fmt', 'a', 'b')
+
+ # this fails, the format string has a {b} in it
+ self.assert_only_one('fmt', 'a')
+
+ # this fails, the format string doesn't have a {c} in it
+ self.assert_only_one('fmt', 'a', 'b', 'c')
+
+ # this fails, the format string has two {a}s in it,
+ # it must contain exactly one
+ self.fmt2 = '{dsl_name} {a} {a}'
+ self.assert_only_one('fmt2', 'a')
+
+ """
+ fields = ['dsl_name']
+ fields.extend(additional_fields)
+ line: str = getattr(self, attr)
+ fcf = libclinic.FormatCounterFormatter()
+ fcf.format(line)
+ def local_fail(should_be_there_but_isnt: bool) -> None:
+ if should_be_there_but_isnt:
+ fail("{} {} must contain {{{}}} exactly once!".format(
+ self.__class__.__name__, attr, name))
+ else:
+ fail("{} {} must not contain {{{}}}!".format(
+ self.__class__.__name__, attr, name))
+
+ for name, count in fcf.counts.items():
+ if name in fields:
+ if count > 1:
+ local_fail(True)
+ else:
+ local_fail(False)
+ for name in fields:
+ if fcf.counts.get(name) != 1:
+ local_fail(True)
+
+ assert_only_one('start_line')
+ assert_only_one('stop_line')
+
+ field = "arguments" if "{arguments}" in self.checksum_line else "checksum"
+ assert_only_one('checksum_line', field)
+
+
+class PythonLanguage(Language):
+
+ language = 'Python'
+ start_line = "#/*[{dsl_name} input]"
+ body_prefix = "#"
+ stop_line = "#[{dsl_name} start generated code]*/"
+ checksum_line = "#/*[{dsl_name} end generated code: {arguments}]*/"