diff options
author | Pablo Galindo Salgado <Pablogsal@gmail.com> | 2021-09-05 13:58:52 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-09-05 13:58:52 (GMT) |
commit | b01fd533fef78b088674bad73267b89bea98e904 (patch) | |
tree | de68ee97cc624f412d3e357db0d45eb98227207d /Tools | |
parent | 28264269de9ff88d9ee7110fc56ac2d2db275bec (diff) | |
download | cpython-b01fd533fef78b088674bad73267b89bea98e904.zip cpython-b01fd533fef78b088674bad73267b89bea98e904.tar.gz cpython-b01fd533fef78b088674bad73267b89bea98e904.tar.bz2 |
Extract visitors from the grammar nodes and call makers in the peg generator (GH-28172)
Simplify the peg generator logic by extracting as much visitors as possible to disentangle the flow and separate concerns.
Diffstat (limited to 'Tools')
-rwxr-xr-x | Tools/peg_generator/pegen/__main__.py | 3 | ||||
-rw-r--r-- | Tools/peg_generator/pegen/build.py | 11 | ||||
-rw-r--r-- | Tools/peg_generator/pegen/c_generator.py | 55 | ||||
-rwxr-xr-x | Tools/peg_generator/pegen/first_sets.py | 12 | ||||
-rw-r--r-- | Tools/peg_generator/pegen/grammar.py | 141 | ||||
-rw-r--r-- | Tools/peg_generator/pegen/grammar_visualizer.py | 3 | ||||
-rw-r--r-- | Tools/peg_generator/pegen/keywordgen.py | 6 | ||||
-rw-r--r-- | Tools/peg_generator/pegen/parser_generator.py | 208 | ||||
-rw-r--r-- | Tools/peg_generator/pegen/python_generator.py | 33 | ||||
-rw-r--r-- | Tools/peg_generator/pegen/testutil.py | 5 | ||||
-rw-r--r-- | Tools/peg_generator/pegen/validator.py | 7 |
11 files changed, 244 insertions, 240 deletions
diff --git a/Tools/peg_generator/pegen/__main__.py b/Tools/peg_generator/pegen/__main__.py index a12fe78..2910d6c 100755 --- a/Tools/peg_generator/pegen/__main__.py +++ b/Tools/peg_generator/pegen/__main__.py @@ -10,10 +10,9 @@ import sys import time import token import traceback - from typing import Tuple -from pegen.build import Grammar, Parser, Tokenizer, ParserGenerator +from pegen.build import Grammar, Parser, ParserGenerator, Tokenizer from pegen.validator import validate_grammar diff --git a/Tools/peg_generator/pegen/build.py b/Tools/peg_generator/pegen/build.py index 6f0a091..bf01078 100644 --- a/Tools/peg_generator/pegen/build.py +++ b/Tools/peg_generator/pegen/build.py @@ -1,11 +1,10 @@ +import itertools import pathlib import shutil -import tokenize import sysconfig import tempfile -import itertools - -from typing import Optional, Tuple, List, IO, Set, Dict +import tokenize +from typing import IO, Dict, List, Optional, Set, Tuple from pegen.c_generator import CParserGenerator from pegen.grammar import Grammar @@ -45,9 +44,9 @@ def compile_c_extension( of distutils (this is useful in case you want to use a temporary directory). """ import distutils.log - from distutils.core import Distribution, Extension - from distutils.command.clean import clean # type: ignore from distutils.command.build_ext import build_ext # type: ignore + from distutils.command.clean import clean # type: ignore + from distutils.core import Distribution, Extension from distutils.tests.support import fixup_build_ext # type: ignore if verbose: diff --git a/Tools/peg_generator/pegen/c_generator.py b/Tools/peg_generator/pegen/c_generator.py index e928fd3..d15e910 100644 --- a/Tools/peg_generator/pegen/c_generator.py +++ b/Tools/peg_generator/pegen/c_generator.py @@ -1,8 +1,8 @@ import ast -from dataclasses import field, dataclass import re -from typing import Any, Dict, IO, Optional, List, Text, Tuple, Set +from dataclasses import dataclass, field from enum import Enum +from typing import IO, Any, Dict, List, Optional, Set, Text, Tuple from pegen import grammar from pegen.grammar import ( @@ -27,7 +27,6 @@ from pegen.grammar import ( ) from pegen.parser_generator import ParserGenerator - EXTENSION_PREFIX = """\ #include "pegen.h" @@ -120,23 +119,18 @@ class CCallMakerVisitor(GrammarVisitor): self.exact_tokens = exact_tokens self.non_exact_tokens = non_exact_tokens self.cache: Dict[Any, FunctionCall] = {} - self.keyword_cache: Dict[str, int] = {} - self.soft_keywords: Set[str] = set() def keyword_helper(self, keyword: str) -> FunctionCall: - if keyword not in self.keyword_cache: - self.keyword_cache[keyword] = self.gen.keyword_type() return FunctionCall( assigned_variable="_keyword", function="_PyPegen_expect_token", - arguments=["p", self.keyword_cache[keyword]], + arguments=["p", self.gen.keywords[keyword]], return_type="Token *", nodetype=NodeTypes.KEYWORD, comment=f"token='{keyword}'", ) def soft_keyword_helper(self, value: str) -> FunctionCall: - self.soft_keywords.add(value.replace('"', "")) return FunctionCall( assigned_variable="_keyword", function="_PyPegen_expect_soft_keyword", @@ -200,20 +194,12 @@ class CCallMakerVisitor(GrammarVisitor): ) def visit_Rhs(self, node: Rhs) -> FunctionCall: - def can_we_inline(node: Rhs) -> int: - if len(node.alts) != 1 or len(node.alts[0].items) != 1: - return False - # If the alternative has an action we cannot inline - if getattr(node.alts[0], "action", None) is not None: - return False - return True - if node in self.cache: return self.cache[node] - if can_we_inline(node): + if node.can_be_inlined: self.cache[node] = self.generate_call(node.alts[0].items[0]) else: - name = self.gen.name_node(node) + name = self.gen.artifical_rule_from_rhs(node) self.cache[node] = FunctionCall( assigned_variable=f"{name}_var", function=f"{name}_rule", @@ -306,7 +292,7 @@ class CCallMakerVisitor(GrammarVisitor): def visit_Repeat0(self, node: Repeat0) -> FunctionCall: if node in self.cache: return self.cache[node] - name = self.gen.name_loop(node.node, False) + name = self.gen.artificial_rule_from_repeat(node.node, False) self.cache[node] = FunctionCall( assigned_variable=f"{name}_var", function=f"{name}_rule", @@ -319,7 +305,7 @@ class CCallMakerVisitor(GrammarVisitor): def visit_Repeat1(self, node: Repeat1) -> FunctionCall: if node in self.cache: return self.cache[node] - name = self.gen.name_loop(node.node, True) + name = self.gen.artificial_rule_from_repeat(node.node, True) self.cache[node] = FunctionCall( assigned_variable=f"{name}_var", function=f"{name}_rule", @@ -332,7 +318,7 @@ class CCallMakerVisitor(GrammarVisitor): def visit_Gather(self, node: Gather) -> FunctionCall: if node in self.cache: return self.cache[node] - name = self.gen.name_gather(node) + name = self.gen.artifical_rule_from_gather(node) self.cache[node] = FunctionCall( assigned_variable=f"{name}_var", function=f"{name}_rule", @@ -429,7 +415,7 @@ class CParserGenerator(ParserGenerator, GrammarVisitor): self.print(f"}}") def generate(self, filename: str) -> None: - self.collect_todo() + self.collect_rules() self.print(f"// @generated by pegen from {filename}") header = self.grammar.metas.get("header", EXTENSION_PREFIX) if header: @@ -439,11 +425,11 @@ class CParserGenerator(ParserGenerator, GrammarVisitor): self.print(subheader) self._setup_keywords() self._setup_soft_keywords() - for i, (rulename, rule) in enumerate(self.todo.items(), 1000): + for i, (rulename, rule) in enumerate(self.all_rules.items(), 1000): comment = " // Left-recursive" if rule.left_recursive else "" self.print(f"#define {rulename}_type {i}{comment}") self.print() - for rulename, rule in self.todo.items(): + for rulename, rule in self.all_rules.items(): if rule.is_loop() or rule.is_gather(): type = "asdl_seq *" elif rule.type: @@ -452,13 +438,11 @@ class CParserGenerator(ParserGenerator, GrammarVisitor): type = "void *" self.print(f"static {type}{rulename}_rule(Parser *p);") self.print() - while self.todo: - for rulename, rule in list(self.todo.items()): - del self.todo[rulename] - self.print() - if rule.left_recursive: - self.print("// Left-recursive") - self.visit(rule) + for rulename, rule in list(self.all_rules.items()): + self.print() + if rule.left_recursive: + self.print("// Left-recursive") + self.visit(rule) if self.skip_actions: mode = 0 else: @@ -472,7 +456,7 @@ class CParserGenerator(ParserGenerator, GrammarVisitor): def _group_keywords_by_length(self) -> Dict[int, List[Tuple[str, int]]]: groups: Dict[int, List[Tuple[str, int]]] = {} - for keyword_str, keyword_type in self.callmakervisitor.keyword_cache.items(): + for keyword_str, keyword_type in self.keywords.items(): length = len(keyword_str) if length in groups: groups[length].append((keyword_str, keyword_type)) @@ -481,9 +465,8 @@ class CParserGenerator(ParserGenerator, GrammarVisitor): return groups def _setup_keywords(self) -> None: - keyword_cache = self.callmakervisitor.keyword_cache n_keyword_lists = ( - len(max(keyword_cache.keys(), key=len)) + 1 if len(keyword_cache) > 0 else 0 + len(max(self.keywords.keys(), key=len)) + 1 if len(self.keywords) > 0 else 0 ) self.print(f"static const int n_keyword_lists = {n_keyword_lists};") groups = self._group_keywords_by_length() @@ -503,7 +486,7 @@ class CParserGenerator(ParserGenerator, GrammarVisitor): self.print("};") def _setup_soft_keywords(self) -> None: - soft_keywords = sorted(self.callmakervisitor.soft_keywords) + soft_keywords = sorted(self.soft_keywords) self.print("static char *soft_keywords[] = {") with self.indent(): for keyword in soft_keywords: diff --git a/Tools/peg_generator/pegen/first_sets.py b/Tools/peg_generator/pegen/first_sets.py index 50ced22..611ef51 100755 --- a/Tools/peg_generator/pegen/first_sets.py +++ b/Tools/peg_generator/pegen/first_sets.py @@ -3,30 +3,27 @@ import argparse import pprint import sys -from typing import Set, Dict +from typing import Dict, Set from pegen.build import build_parser from pegen.grammar import ( Alt, Cut, Gather, - Grammar, GrammarVisitor, Group, - Leaf, Lookahead, NamedItem, NameLeaf, NegativeLookahead, Opt, - Repeat, Repeat0, Repeat1, Rhs, Rule, StringLeaf, - PositiveLookahead, ) +from pegen.parser_generator import compute_nullables argparser = argparse.ArgumentParser( prog="calculate_first_sets", @@ -38,8 +35,7 @@ argparser.add_argument("grammar_file", help="The grammar file") class FirstSetCalculator(GrammarVisitor): def __init__(self, rules: Dict[str, Rule]) -> None: self.rules = rules - for rule in rules.values(): - rule.nullable_visit(rules) + self.nullables = compute_nullables(rules) self.first_sets: Dict[str, Set[str]] = dict() self.in_process: Set[str] = set() @@ -129,7 +125,7 @@ class FirstSetCalculator(GrammarVisitor): elif item.name not in self.first_sets: self.in_process.add(item.name) terminals = self.visit(item.rhs) - if item.nullable: + if item in self.nullables: terminals.add("") self.first_sets[item.name] = terminals self.in_process.remove(item.name) diff --git a/Tools/peg_generator/pegen/grammar.py b/Tools/peg_generator/pegen/grammar.py index 66fd5b3..fa47b98 100644 --- a/Tools/peg_generator/pegen/grammar.py +++ b/Tools/peg_generator/pegen/grammar.py @@ -2,6 +2,7 @@ from __future__ import annotations from abc import abstractmethod from typing import ( + TYPE_CHECKING, AbstractSet, Any, Dict, @@ -11,11 +12,9 @@ from typing import ( Optional, Set, Tuple, - TYPE_CHECKING, Union, ) - if TYPE_CHECKING: from pegen.parser_generator import ParserGenerator @@ -31,7 +30,7 @@ class GrammarVisitor: visitor = getattr(self, method, self.generic_visit) return visitor(node, *args, **kwargs) - def generic_visit(self, node: Iterable[Any], *args: Any, **kwargs: Any) -> None: + def generic_visit(self, node: Iterable[Any], *args: Any, **kwargs: Any) -> Any: """Called if no explicit visitor function exists for a node.""" for value in node: if isinstance(value, list): @@ -73,8 +72,6 @@ class Rule: self.type = type self.rhs = rhs self.memo = bool(memo) - self.visited = False - self.nullable = False self.left_recursive = False self.leader = False @@ -101,17 +98,6 @@ class Rule: def __iter__(self) -> Iterator[Rhs]: yield self.rhs - def nullable_visit(self, rules: Dict[str, Rule]) -> bool: - if self.visited: - # A left-recursive rule is considered non-nullable. - return False - self.visited = True - self.nullable = self.rhs.nullable_visit(rules) - return self.nullable - - def initial_names(self) -> AbstractSet[str]: - return self.rhs.initial_names() - def flatten(self) -> Rhs: # If it's a single parenthesized group, flatten it. rhs = self.rhs @@ -124,10 +110,6 @@ class Rule: rhs = rhs.alts[0].items[0].item.rhs return rhs - def collect_todo(self, gen: ParserGenerator) -> None: - rhs = self.flatten() - rhs.collect_todo(gen) - class Leaf: def __init__(self, value: str): @@ -140,14 +122,6 @@ class Leaf: if False: yield - @abstractmethod - def nullable_visit(self, rules: Dict[str, Rule]) -> bool: - raise NotImplementedError - - @abstractmethod - def initial_names(self) -> AbstractSet[str]: - raise NotImplementedError - class NameLeaf(Leaf): """The value is the name.""" @@ -160,15 +134,6 @@ class NameLeaf(Leaf): def __repr__(self) -> str: return f"NameLeaf({self.value!r})" - def nullable_visit(self, rules: Dict[str, Rule]) -> bool: - if self.value in rules: - return rules[self.value].nullable_visit(rules) - # Token or unknown; never empty. - return False - - def initial_names(self) -> AbstractSet[str]: - return {self.value} - class StringLeaf(Leaf): """The value is a string literal, including quotes.""" @@ -176,13 +141,6 @@ class StringLeaf(Leaf): def __repr__(self) -> str: return f"StringLeaf({self.value!r})" - def nullable_visit(self, rules: Dict[str, Rule]) -> bool: - # The string token '' is considered empty. - return not self.value - - def initial_names(self) -> AbstractSet[str]: - return set() - class Rhs: def __init__(self, alts: List[Alt]): @@ -198,21 +156,14 @@ class Rhs: def __iter__(self) -> Iterator[List[Alt]]: yield self.alts - def nullable_visit(self, rules: Dict[str, Rule]) -> bool: - for alt in self.alts: - if alt.nullable_visit(rules): - return True - return False - - def initial_names(self) -> AbstractSet[str]: - names: Set[str] = set() - for alt in self.alts: - names |= alt.initial_names() - return names - - def collect_todo(self, gen: ParserGenerator) -> None: - for alt in self.alts: - alt.collect_todo(gen) + @property + def can_be_inlined(self) -> bool: + if len(self.alts) != 1 or len(self.alts[0].items) != 1: + return False + # If the alternative has an action we cannot inline + if getattr(self.alts[0], "action", None) is not None: + return False + return True class Alt: @@ -239,31 +190,12 @@ class Alt: def __iter__(self) -> Iterator[List[NamedItem]]: yield self.items - def nullable_visit(self, rules: Dict[str, Rule]) -> bool: - for item in self.items: - if not item.nullable_visit(rules): - return False - return True - - def initial_names(self) -> AbstractSet[str]: - names: Set[str] = set() - for item in self.items: - names |= item.initial_names() - if not item.nullable: - break - return names - - def collect_todo(self, gen: ParserGenerator) -> None: - for item in self.items: - item.collect_todo(gen) - class NamedItem: def __init__(self, name: Optional[str], item: Item, type: Optional[str] = None): self.name = name self.item = item self.type = type - self.nullable = False def __str__(self) -> str: if not SIMPLE_STR and self.name: @@ -277,16 +209,6 @@ class NamedItem: def __iter__(self) -> Iterator[Item]: yield self.item - def nullable_visit(self, rules: Dict[str, Rule]) -> bool: - self.nullable = self.item.nullable_visit(rules) - return self.nullable - - def initial_names(self) -> AbstractSet[str]: - return self.item.initial_names() - - def collect_todo(self, gen: ParserGenerator) -> None: - gen.callmakervisitor.visit(self.item) - class Forced: def __init__(self, node: Plain): @@ -298,12 +220,6 @@ class Forced: def __iter__(self) -> Iterator[Plain]: yield self.node - def nullable_visit(self, rules: Dict[str, Rule]) -> bool: - return True - - def initial_names(self) -> AbstractSet[str]: - return set() - class Lookahead: def __init__(self, node: Plain, sign: str): @@ -316,12 +232,6 @@ class Lookahead: def __iter__(self) -> Iterator[Plain]: yield self.node - def nullable_visit(self, rules: Dict[str, Rule]) -> bool: - return True - - def initial_names(self) -> AbstractSet[str]: - return set() - class PositiveLookahead(Lookahead): def __init__(self, node: Plain): @@ -357,12 +267,6 @@ class Opt: def __iter__(self) -> Iterator[Item]: yield self.node - def nullable_visit(self, rules: Dict[str, Rule]) -> bool: - return True - - def initial_names(self) -> AbstractSet[str]: - return self.node.initial_names() - class Repeat: """Shared base class for x* and x+.""" @@ -371,16 +275,9 @@ class Repeat: self.node = node self.memo: Optional[Tuple[Optional[str], str]] = None - @abstractmethod - def nullable_visit(self, rules: Dict[str, Rule]) -> bool: - raise NotImplementedError - def __iter__(self) -> Iterator[Plain]: yield self.node - def initial_names(self) -> AbstractSet[str]: - return self.node.initial_names() - class Repeat0(Repeat): def __str__(self) -> str: @@ -394,9 +291,6 @@ class Repeat0(Repeat): def __repr__(self) -> str: return f"Repeat0({self.node!r})" - def nullable_visit(self, rules: Dict[str, Rule]) -> bool: - return True - class Repeat1(Repeat): def __str__(self) -> str: @@ -410,9 +304,6 @@ class Repeat1(Repeat): def __repr__(self) -> str: return f"Repeat1({self.node!r})" - def nullable_visit(self, rules: Dict[str, Rule]) -> bool: - return False - class Gather(Repeat): def __init__(self, separator: Plain, node: Plain): @@ -425,9 +316,6 @@ class Gather(Repeat): def __repr__(self) -> str: return f"Gather({self.separator!r}, {self.node!r})" - def nullable_visit(self, rules: Dict[str, Rule]) -> bool: - return False - class Group: def __init__(self, rhs: Rhs): @@ -442,12 +330,6 @@ class Group: def __iter__(self) -> Iterator[Rhs]: yield self.rhs - def nullable_visit(self, rules: Dict[str, Rule]) -> bool: - return self.rhs.nullable_visit(rules) - - def initial_names(self) -> AbstractSet[str]: - return self.rhs.initial_names() - class Cut: def __init__(self) -> None: @@ -468,9 +350,6 @@ class Cut: return NotImplemented return True - def nullable_visit(self, rules: Dict[str, Rule]) -> bool: - return True - def initial_names(self) -> AbstractSet[str]: return set() diff --git a/Tools/peg_generator/pegen/grammar_visualizer.py b/Tools/peg_generator/pegen/grammar_visualizer.py index 7362ec5..ab5c636 100644 --- a/Tools/peg_generator/pegen/grammar_visualizer.py +++ b/Tools/peg_generator/pegen/grammar_visualizer.py @@ -1,7 +1,6 @@ import argparse import sys - -from typing import Any, Iterator, Callable +from typing import Any, Callable, Iterator from pegen.build import build_parser from pegen.grammar import Grammar, Rule diff --git a/Tools/peg_generator/pegen/keywordgen.py b/Tools/peg_generator/pegen/keywordgen.py index 6a07f6e..35a5e1a 100644 --- a/Tools/peg_generator/pegen/keywordgen.py +++ b/Tools/peg_generator/pegen/keywordgen.py @@ -59,11 +59,11 @@ def main() -> None: with args.tokens_file as tok_file: all_tokens, exact_tok, non_exact_tok = generate_token_definitions(tok_file) gen = CParserGenerator(grammar, all_tokens, exact_tok, non_exact_tok, file=None) - gen.collect_todo() + gen.collect_rules() with args.keyword_file as thefile: - all_keywords = sorted(list(gen.callmakervisitor.keyword_cache.keys()) + EXTRA_KEYWORDS) - all_soft_keywords = sorted(gen.callmakervisitor.soft_keywords) + all_keywords = sorted(list(gen.keywords.keys()) + EXTRA_KEYWORDS) + all_soft_keywords = sorted(gen.soft_keywords) keywords = "" if not all_keywords else " " + ",\n ".join(map(repr, all_keywords)) soft_keywords = ( diff --git a/Tools/peg_generator/pegen/parser_generator.py b/Tools/peg_generator/pegen/parser_generator.py index 33ecee1..f2105d8 100644 --- a/Tools/peg_generator/pegen/parser_generator.py +++ b/Tools/peg_generator/pegen/parser_generator.py @@ -1,22 +1,76 @@ +import ast import contextlib +import re from abc import abstractmethod -from typing import IO, AbstractSet, Dict, Iterator, List, Optional, Set, Text, Tuple +from typing import ( + IO, + AbstractSet, + Any, + Dict, + Iterable, + Iterator, + List, + Optional, + Set, + Text, + Tuple, + Union, +) from pegen import sccutils from pegen.grammar import ( Alt, + Cut, + Forced, Gather, Grammar, GrammarError, GrammarVisitor, + Group, + Lookahead, NamedItem, NameLeaf, + Opt, Plain, + Repeat0, + Repeat1, Rhs, Rule, + StringLeaf, ) +class RuleCollectorVisitor(GrammarVisitor): + """Visitor that invokes a provieded callmaker visitor with just the NamedItem nodes""" + + def __init__(self, rules: Dict[str, Rule], callmakervisitor: GrammarVisitor) -> None: + self.rulses = rules + self.callmaker = callmakervisitor + + def visit_Rule(self, rule: Rule) -> None: + self.visit(rule.flatten()) + + def visit_NamedItem(self, item: NamedItem) -> None: + self.callmaker.visit(item) + + +class KeywordCollectorVisitor(GrammarVisitor): + """Visitor that collects all the keywods and soft keywords in the Grammar""" + + def __init__(self, gen: "ParserGenerator", keywords: Dict[str, int], soft_keywords: Set[str]): + self.generator = gen + self.keywords = keywords + self.soft_keywords = soft_keywords + + def visit_StringLeaf(self, node: StringLeaf) -> None: + val = ast.literal_eval(node.value) + if re.match(r"[a-zA-Z_]\w*\Z", val): # This is a keyword + if node.value.endswith("'") and node.value not in self.keywords: + self.keywords[val] = self.generator.keyword_type() + else: + return self.soft_keywords.add(node.value.replace('"', "")) + + class RuleCheckingVisitor(GrammarVisitor): def __init__(self, rules: Dict[str, Rule], tokens: Set[str]): self.rules = rules @@ -39,6 +93,8 @@ class ParserGenerator: def __init__(self, grammar: Grammar, tokens: Set[str], file: Optional[IO[Text]]): self.grammar = grammar self.tokens = tokens + self.keywords: Dict[str, int] = {} + self.soft_keywords: Set[str] = set() self.rules = grammar.rules self.validate_rule_names() if "trailer" not in grammar.metas and "start" not in self.rules: @@ -48,12 +104,10 @@ class ParserGenerator: checker.visit(rule) self.file = file self.level = 0 - compute_nullables(self.rules) self.first_graph, self.first_sccs = compute_left_recursives(self.rules) - self.todo = self.rules.copy() # Rules to generate self.counter = 0 # For name_rule()/name_loop() self.keyword_counter = 499 # For keyword_type() - self.all_rules: Dict[str, Rule] = {} # Rules + temporal rules + self.all_rules: Dict[str, Rule] = self.rules.copy() # Rules + temporal rules self._local_variable_stack: List[List[str]] = [] def validate_rule_names(self) -> None: @@ -94,39 +148,43 @@ class ParserGenerator: for line in lines.splitlines(): self.print(line) - def collect_todo(self) -> None: + def collect_rules(self) -> None: + keyword_collector = KeywordCollectorVisitor(self, self.keywords, self.soft_keywords) + for rule in self.all_rules.values(): + keyword_collector.visit(rule) + + rule_collector = RuleCollectorVisitor(self.rules, self.callmakervisitor) done: Set[str] = set() while True: - alltodo = list(self.todo) - self.all_rules.update(self.todo) - todo = [i for i in alltodo if i not in done] + computed_rules = list(self.all_rules) + todo = [i for i in computed_rules if i not in done] if not todo: break + done = set(self.all_rules) for rulename in todo: - self.todo[rulename].collect_todo(self) - done = set(alltodo) + rule_collector.visit(self.all_rules[rulename]) def keyword_type(self) -> int: self.keyword_counter += 1 return self.keyword_counter - def name_node(self, rhs: Rhs) -> str: + def artifical_rule_from_rhs(self, rhs: Rhs) -> str: self.counter += 1 name = f"_tmp_{self.counter}" # TODO: Pick a nicer name. - self.todo[name] = Rule(name, None, rhs) + self.all_rules[name] = Rule(name, None, rhs) return name - def name_loop(self, node: Plain, is_repeat1: bool) -> str: + def artificial_rule_from_repeat(self, node: Plain, is_repeat1: bool) -> str: self.counter += 1 if is_repeat1: prefix = "_loop1_" else: prefix = "_loop0_" - name = f"{prefix}{self.counter}" # TODO: It's ugly to signal via the name. - self.todo[name] = Rule(name, None, Rhs([Alt([NamedItem(None, node)])])) + name = f"{prefix}{self.counter}" + self.all_rules[name] = Rule(name, None, Rhs([Alt([NamedItem(None, node)])])) return name - def name_gather(self, node: Gather) -> str: + def artifical_rule_from_gather(self, node: Gather) -> str: self.counter += 1 name = f"_gather_{self.counter}" self.counter += 1 @@ -135,7 +193,7 @@ class ParserGenerator: [NamedItem(None, node.separator), NamedItem("elem", node.node)], action="elem", ) - self.todo[extra_function_name] = Rule( + self.all_rules[extra_function_name] = Rule( extra_function_name, None, Rhs([extra_function_alt]), @@ -143,7 +201,7 @@ class ParserGenerator: alt = Alt( [NamedItem("elem", node.node), NamedItem("seq", NameLeaf(extra_function_name))], ) - self.todo[name] = Rule( + self.all_rules[name] = Rule( name, None, Rhs([alt]), @@ -160,13 +218,120 @@ class ParserGenerator: return name -def compute_nullables(rules: Dict[str, Rule]) -> None: +class NullableVisitor(GrammarVisitor): + def __init__(self, rules: Dict[str, Rule]) -> None: + self.rules = rules + self.visited: Set[Any] = set() + self.nullables: Set[Union[Rule, NamedItem]] = set() + + def visit_Rule(self, rule: Rule) -> bool: + if rule in self.visited: + return False + self.visited.add(rule) + if self.visit(rule.rhs): + self.nullables.add(rule) + return rule in self.nullables + + def visit_Rhs(self, rhs: Rhs) -> bool: + for alt in rhs.alts: + if self.visit(alt): + return True + return False + + def visit_Alt(self, alt: Alt) -> bool: + for item in alt.items: + if not self.visit(item): + return False + return True + + def visit_Forced(self, force: Forced) -> bool: + return True + + def visit_LookAhead(self, lookahead: Lookahead) -> bool: + return True + + def visit_Opt(self, opt: Opt) -> bool: + return True + + def visit_Repeat0(self, repeat: Repeat0) -> bool: + return True + + def visit_Repeat1(self, repeat: Repeat1) -> bool: + return False + + def visit_Gather(self, gather: Gather) -> bool: + return False + + def visit_Cut(self, cut: Cut) -> bool: + return False + + def visit_Group(self, group: Group) -> bool: + return self.visit(group.rhs) + + def visit_NamedItem(self, item: NamedItem) -> bool: + if self.visit(item.item): + self.nullables.add(item) + return item in self.nullables + + def visit_NameLeaf(self, node: NameLeaf) -> bool: + if node.value in self.rules: + return self.visit(self.rules[node.value]) + # Token or unknown; never empty. + return False + + def visit_StringLeaf(self, node: StringLeaf) -> bool: + # The string token '' is considered empty. + return not node.value + + +def compute_nullables(rules: Dict[str, Rule]) -> Set[Any]: """Compute which rules in a grammar are nullable. Thanks to TatSu (tatsu/leftrec.py) for inspiration. """ + nullable_visitor = NullableVisitor(rules) for rule in rules.values(): - rule.nullable_visit(rules) + nullable_visitor.visit(rule) + return nullable_visitor.nullables + + +class InitialNamesVisitor(GrammarVisitor): + def __init__(self, rules: Dict[str, Rule]) -> None: + self.rules = rules + self.nullables = compute_nullables(rules) + + def generic_visit(self, node: Iterable[Any], *args: Any, **kwargs: Any) -> Set[Any]: + names: Set[str] = set() + for value in node: + if isinstance(value, list): + for item in value: + names |= self.visit(item, *args, **kwargs) + else: + names |= self.visit(value, *args, **kwargs) + return names + + def visit_Alt(self, alt: Alt) -> Set[Any]: + names: Set[str] = set() + for item in alt.items: + names |= self.visit(item) + if item not in self.nullables: + break + return names + + def visit_Forced(self, force: Forced) -> Set[Any]: + return set() + + def visit_LookAhead(self, lookahead: Lookahead) -> Set[Any]: + return set() + + def visit_Cut(self, cut: Cut) -> Set[Any]: + return set() + + def visit_NameLeaf(self, node: NameLeaf) -> Set[Any]: + return {node.value} + + def visit_StringLeaf(self, node: StringLeaf) -> Set[Any]: + return set() def compute_left_recursives( @@ -207,10 +372,11 @@ def make_first_graph(rules: Dict[str, Rule]) -> Dict[str, AbstractSet[str]]: Note that this requires the nullable flags to have been computed. """ + initial_name_visitor = InitialNamesVisitor(rules) graph = {} vertices: Set[str] = set() for rulename, rhs in rules.items(): - graph[rulename] = names = rhs.initial_names() + graph[rulename] = names = initial_name_visitor.visit(rhs) vertices |= names for vertex in vertices: graph.setdefault(vertex, set()) diff --git a/Tools/peg_generator/pegen/python_generator.py b/Tools/peg_generator/pegen/python_generator.py index 201bf2ba..7aa730a 100644 --- a/Tools/peg_generator/pegen/python_generator.py +++ b/Tools/peg_generator/pegen/python_generator.py @@ -95,8 +95,6 @@ class PythonCallMakerVisitor(GrammarVisitor): def __init__(self, parser_generator: ParserGenerator): self.gen = parser_generator self.cache: Dict[Any, Any] = {} - self.keywords: Set[str] = set() - self.soft_keywords: Set[str] = set() def visit_NameLeaf(self, node: NameLeaf) -> Tuple[Optional[str], str]: name = node.value @@ -111,12 +109,6 @@ class PythonCallMakerVisitor(GrammarVisitor): return name, f"self.{name}()" def visit_StringLeaf(self, node: StringLeaf) -> Tuple[str, str]: - val = ast.literal_eval(node.value) - if re.match(r"[a-zA-Z_]\w*\Z", val): # This is a keyword - if node.value.endswith("'"): - self.keywords.add(val) - else: - self.soft_keywords.add(val) return "literal", f"self.expect({node.value})" def visit_Rhs(self, node: Rhs) -> Tuple[Optional[str], str]: @@ -125,7 +117,7 @@ class PythonCallMakerVisitor(GrammarVisitor): if len(node.alts) == 1 and len(node.alts[0].items) == 1: self.cache[node] = self.visit(node.alts[0].items[0]) else: - name = self.gen.name_node(node) + name = self.gen.artifical_rule_from_rhs(node) self.cache[node] = name, f"self.{name}()" return self.cache[node] @@ -163,21 +155,21 @@ class PythonCallMakerVisitor(GrammarVisitor): def visit_Repeat0(self, node: Repeat0) -> Tuple[str, str]: if node in self.cache: return self.cache[node] - name = self.gen.name_loop(node.node, False) + name = self.gen.artificial_rule_from_repeat(node.node, False) self.cache[node] = name, f"self.{name}()," # Also a trailing comma! return self.cache[node] def visit_Repeat1(self, node: Repeat1) -> Tuple[str, str]: if node in self.cache: return self.cache[node] - name = self.gen.name_loop(node.node, True) + name = self.gen.artificial_rule_from_repeat(node.node, True) self.cache[node] = name, f"self.{name}()" # But no trailing comma here! return self.cache[node] def visit_Gather(self, node: Gather) -> Tuple[str, str]: if node in self.cache: return self.cache[node] - name = self.gen.name_gather(node) + name = self.gen.artifical_rule_from_gather(node) self.cache[node] = name, f"self.{name}()" # No trailing comma here either! return self.cache[node] @@ -219,6 +211,7 @@ class PythonParserGenerator(ParserGenerator, GrammarVisitor): ) def generate(self, filename: str) -> None: + self.collect_rules() header = self.grammar.metas.get("header", MODULE_PREFIX) if header is not None: self.print(header.rstrip("\n").format(filename=filename)) @@ -228,17 +221,15 @@ class PythonParserGenerator(ParserGenerator, GrammarVisitor): cls_name = self.grammar.metas.get("class", "GeneratedParser") self.print("# Keywords and soft keywords are listed at the end of the parser definition.") self.print(f"class {cls_name}(Parser):") - while self.todo: - for rulename, rule in list(self.todo.items()): - del self.todo[rulename] - self.print() - with self.indent(): - self.visit(rule) + for rule in self.all_rules.values(): + self.print() + with self.indent(): + self.visit(rule) self.print() with self.indent(): - self.print(f"KEYWORDS = {tuple(self.callmakervisitor.keywords)}") - self.print(f"SOFT_KEYWORDS = {tuple(self.callmakervisitor.soft_keywords)}") + self.print(f"KEYWORDS = {tuple(self.keywords)}") + self.print(f"SOFT_KEYWORDS = {tuple(self.soft_keywords)}") trailer = self.grammar.metas.get("trailer", MODULE_SUFFIX.format(class_name=cls_name)) if trailer is not None: @@ -270,8 +261,6 @@ class PythonParserGenerator(ParserGenerator, GrammarVisitor): self.print(f"def {node.name}(self) -> Optional[{node_type}]:") with self.indent(): self.print(f"# {node.name}: {rhs}") - if node.nullable: - self.print(f"# nullable={node.nullable}") self.print("mark = self._mark()") if self.alts_uses_locations(node.rhs.alts): self.print("tok = self._tokenizer.peek()") diff --git a/Tools/peg_generator/pegen/testutil.py b/Tools/peg_generator/pegen/testutil.py index e0928a4..8e5dbc5 100644 --- a/Tools/peg_generator/pegen/testutil.py +++ b/Tools/peg_generator/pegen/testutil.py @@ -4,10 +4,9 @@ import os import pathlib import sys import textwrap -import tokenize import token - -from typing import Any, cast, Dict, IO, Type, Final +import tokenize +from typing import IO, Any, Dict, Final, Type, cast from pegen.build import compile_c_extension from pegen.c_generator import CParserGenerator diff --git a/Tools/peg_generator/pegen/validator.py b/Tools/peg_generator/pegen/validator.py index e7d6980..c48a01e 100644 --- a/Tools/peg_generator/pegen/validator.py +++ b/Tools/peg_generator/pegen/validator.py @@ -1,12 +1,7 @@ from typing import Optional from pegen import grammar -from pegen.grammar import ( - Alt, - GrammarVisitor, - Rule, - Rhs, -) +from pegen.grammar import Alt, GrammarVisitor, Rhs, Rule class ValidationError(Exception): |