summaryrefslogtreecommitdiffstats
path: root/Tools
diff options
context:
space:
mode:
authorPablo Galindo Salgado <Pablogsal@gmail.com>2021-09-05 13:58:52 (GMT)
committerGitHub <noreply@github.com>2021-09-05 13:58:52 (GMT)
commitb01fd533fef78b088674bad73267b89bea98e904 (patch)
treede68ee97cc624f412d3e357db0d45eb98227207d /Tools
parent28264269de9ff88d9ee7110fc56ac2d2db275bec (diff)
downloadcpython-b01fd533fef78b088674bad73267b89bea98e904.zip
cpython-b01fd533fef78b088674bad73267b89bea98e904.tar.gz
cpython-b01fd533fef78b088674bad73267b89bea98e904.tar.bz2
Extract visitors from the grammar nodes and call makers in the peg generator (GH-28172)
Simplify the peg generator logic by extracting as much visitors as possible to disentangle the flow and separate concerns.
Diffstat (limited to 'Tools')
-rwxr-xr-xTools/peg_generator/pegen/__main__.py3
-rw-r--r--Tools/peg_generator/pegen/build.py11
-rw-r--r--Tools/peg_generator/pegen/c_generator.py55
-rwxr-xr-xTools/peg_generator/pegen/first_sets.py12
-rw-r--r--Tools/peg_generator/pegen/grammar.py141
-rw-r--r--Tools/peg_generator/pegen/grammar_visualizer.py3
-rw-r--r--Tools/peg_generator/pegen/keywordgen.py6
-rw-r--r--Tools/peg_generator/pegen/parser_generator.py208
-rw-r--r--Tools/peg_generator/pegen/python_generator.py33
-rw-r--r--Tools/peg_generator/pegen/testutil.py5
-rw-r--r--Tools/peg_generator/pegen/validator.py7
11 files changed, 244 insertions, 240 deletions
diff --git a/Tools/peg_generator/pegen/__main__.py b/Tools/peg_generator/pegen/__main__.py
index a12fe78..2910d6c 100755
--- a/Tools/peg_generator/pegen/__main__.py
+++ b/Tools/peg_generator/pegen/__main__.py
@@ -10,10 +10,9 @@ import sys
import time
import token
import traceback
-
from typing import Tuple
-from pegen.build import Grammar, Parser, Tokenizer, ParserGenerator
+from pegen.build import Grammar, Parser, ParserGenerator, Tokenizer
from pegen.validator import validate_grammar
diff --git a/Tools/peg_generator/pegen/build.py b/Tools/peg_generator/pegen/build.py
index 6f0a091..bf01078 100644
--- a/Tools/peg_generator/pegen/build.py
+++ b/Tools/peg_generator/pegen/build.py
@@ -1,11 +1,10 @@
+import itertools
import pathlib
import shutil
-import tokenize
import sysconfig
import tempfile
-import itertools
-
-from typing import Optional, Tuple, List, IO, Set, Dict
+import tokenize
+from typing import IO, Dict, List, Optional, Set, Tuple
from pegen.c_generator import CParserGenerator
from pegen.grammar import Grammar
@@ -45,9 +44,9 @@ def compile_c_extension(
of distutils (this is useful in case you want to use a temporary directory).
"""
import distutils.log
- from distutils.core import Distribution, Extension
- from distutils.command.clean import clean # type: ignore
from distutils.command.build_ext import build_ext # type: ignore
+ from distutils.command.clean import clean # type: ignore
+ from distutils.core import Distribution, Extension
from distutils.tests.support import fixup_build_ext # type: ignore
if verbose:
diff --git a/Tools/peg_generator/pegen/c_generator.py b/Tools/peg_generator/pegen/c_generator.py
index e928fd3..d15e910 100644
--- a/Tools/peg_generator/pegen/c_generator.py
+++ b/Tools/peg_generator/pegen/c_generator.py
@@ -1,8 +1,8 @@
import ast
-from dataclasses import field, dataclass
import re
-from typing import Any, Dict, IO, Optional, List, Text, Tuple, Set
+from dataclasses import dataclass, field
from enum import Enum
+from typing import IO, Any, Dict, List, Optional, Set, Text, Tuple
from pegen import grammar
from pegen.grammar import (
@@ -27,7 +27,6 @@ from pegen.grammar import (
)
from pegen.parser_generator import ParserGenerator
-
EXTENSION_PREFIX = """\
#include "pegen.h"
@@ -120,23 +119,18 @@ class CCallMakerVisitor(GrammarVisitor):
self.exact_tokens = exact_tokens
self.non_exact_tokens = non_exact_tokens
self.cache: Dict[Any, FunctionCall] = {}
- self.keyword_cache: Dict[str, int] = {}
- self.soft_keywords: Set[str] = set()
def keyword_helper(self, keyword: str) -> FunctionCall:
- if keyword not in self.keyword_cache:
- self.keyword_cache[keyword] = self.gen.keyword_type()
return FunctionCall(
assigned_variable="_keyword",
function="_PyPegen_expect_token",
- arguments=["p", self.keyword_cache[keyword]],
+ arguments=["p", self.gen.keywords[keyword]],
return_type="Token *",
nodetype=NodeTypes.KEYWORD,
comment=f"token='{keyword}'",
)
def soft_keyword_helper(self, value: str) -> FunctionCall:
- self.soft_keywords.add(value.replace('"', ""))
return FunctionCall(
assigned_variable="_keyword",
function="_PyPegen_expect_soft_keyword",
@@ -200,20 +194,12 @@ class CCallMakerVisitor(GrammarVisitor):
)
def visit_Rhs(self, node: Rhs) -> FunctionCall:
- def can_we_inline(node: Rhs) -> int:
- if len(node.alts) != 1 or len(node.alts[0].items) != 1:
- return False
- # If the alternative has an action we cannot inline
- if getattr(node.alts[0], "action", None) is not None:
- return False
- return True
-
if node in self.cache:
return self.cache[node]
- if can_we_inline(node):
+ if node.can_be_inlined:
self.cache[node] = self.generate_call(node.alts[0].items[0])
else:
- name = self.gen.name_node(node)
+ name = self.gen.artifical_rule_from_rhs(node)
self.cache[node] = FunctionCall(
assigned_variable=f"{name}_var",
function=f"{name}_rule",
@@ -306,7 +292,7 @@ class CCallMakerVisitor(GrammarVisitor):
def visit_Repeat0(self, node: Repeat0) -> FunctionCall:
if node in self.cache:
return self.cache[node]
- name = self.gen.name_loop(node.node, False)
+ name = self.gen.artificial_rule_from_repeat(node.node, False)
self.cache[node] = FunctionCall(
assigned_variable=f"{name}_var",
function=f"{name}_rule",
@@ -319,7 +305,7 @@ class CCallMakerVisitor(GrammarVisitor):
def visit_Repeat1(self, node: Repeat1) -> FunctionCall:
if node in self.cache:
return self.cache[node]
- name = self.gen.name_loop(node.node, True)
+ name = self.gen.artificial_rule_from_repeat(node.node, True)
self.cache[node] = FunctionCall(
assigned_variable=f"{name}_var",
function=f"{name}_rule",
@@ -332,7 +318,7 @@ class CCallMakerVisitor(GrammarVisitor):
def visit_Gather(self, node: Gather) -> FunctionCall:
if node in self.cache:
return self.cache[node]
- name = self.gen.name_gather(node)
+ name = self.gen.artifical_rule_from_gather(node)
self.cache[node] = FunctionCall(
assigned_variable=f"{name}_var",
function=f"{name}_rule",
@@ -429,7 +415,7 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
self.print(f"}}")
def generate(self, filename: str) -> None:
- self.collect_todo()
+ self.collect_rules()
self.print(f"// @generated by pegen from {filename}")
header = self.grammar.metas.get("header", EXTENSION_PREFIX)
if header:
@@ -439,11 +425,11 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
self.print(subheader)
self._setup_keywords()
self._setup_soft_keywords()
- for i, (rulename, rule) in enumerate(self.todo.items(), 1000):
+ for i, (rulename, rule) in enumerate(self.all_rules.items(), 1000):
comment = " // Left-recursive" if rule.left_recursive else ""
self.print(f"#define {rulename}_type {i}{comment}")
self.print()
- for rulename, rule in self.todo.items():
+ for rulename, rule in self.all_rules.items():
if rule.is_loop() or rule.is_gather():
type = "asdl_seq *"
elif rule.type:
@@ -452,13 +438,11 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
type = "void *"
self.print(f"static {type}{rulename}_rule(Parser *p);")
self.print()
- while self.todo:
- for rulename, rule in list(self.todo.items()):
- del self.todo[rulename]
- self.print()
- if rule.left_recursive:
- self.print("// Left-recursive")
- self.visit(rule)
+ for rulename, rule in list(self.all_rules.items()):
+ self.print()
+ if rule.left_recursive:
+ self.print("// Left-recursive")
+ self.visit(rule)
if self.skip_actions:
mode = 0
else:
@@ -472,7 +456,7 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
def _group_keywords_by_length(self) -> Dict[int, List[Tuple[str, int]]]:
groups: Dict[int, List[Tuple[str, int]]] = {}
- for keyword_str, keyword_type in self.callmakervisitor.keyword_cache.items():
+ for keyword_str, keyword_type in self.keywords.items():
length = len(keyword_str)
if length in groups:
groups[length].append((keyword_str, keyword_type))
@@ -481,9 +465,8 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
return groups
def _setup_keywords(self) -> None:
- keyword_cache = self.callmakervisitor.keyword_cache
n_keyword_lists = (
- len(max(keyword_cache.keys(), key=len)) + 1 if len(keyword_cache) > 0 else 0
+ len(max(self.keywords.keys(), key=len)) + 1 if len(self.keywords) > 0 else 0
)
self.print(f"static const int n_keyword_lists = {n_keyword_lists};")
groups = self._group_keywords_by_length()
@@ -503,7 +486,7 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
self.print("};")
def _setup_soft_keywords(self) -> None:
- soft_keywords = sorted(self.callmakervisitor.soft_keywords)
+ soft_keywords = sorted(self.soft_keywords)
self.print("static char *soft_keywords[] = {")
with self.indent():
for keyword in soft_keywords:
diff --git a/Tools/peg_generator/pegen/first_sets.py b/Tools/peg_generator/pegen/first_sets.py
index 50ced22..611ef51 100755
--- a/Tools/peg_generator/pegen/first_sets.py
+++ b/Tools/peg_generator/pegen/first_sets.py
@@ -3,30 +3,27 @@
import argparse
import pprint
import sys
-from typing import Set, Dict
+from typing import Dict, Set
from pegen.build import build_parser
from pegen.grammar import (
Alt,
Cut,
Gather,
- Grammar,
GrammarVisitor,
Group,
- Leaf,
Lookahead,
NamedItem,
NameLeaf,
NegativeLookahead,
Opt,
- Repeat,
Repeat0,
Repeat1,
Rhs,
Rule,
StringLeaf,
- PositiveLookahead,
)
+from pegen.parser_generator import compute_nullables
argparser = argparse.ArgumentParser(
prog="calculate_first_sets",
@@ -38,8 +35,7 @@ argparser.add_argument("grammar_file", help="The grammar file")
class FirstSetCalculator(GrammarVisitor):
def __init__(self, rules: Dict[str, Rule]) -> None:
self.rules = rules
- for rule in rules.values():
- rule.nullable_visit(rules)
+ self.nullables = compute_nullables(rules)
self.first_sets: Dict[str, Set[str]] = dict()
self.in_process: Set[str] = set()
@@ -129,7 +125,7 @@ class FirstSetCalculator(GrammarVisitor):
elif item.name not in self.first_sets:
self.in_process.add(item.name)
terminals = self.visit(item.rhs)
- if item.nullable:
+ if item in self.nullables:
terminals.add("")
self.first_sets[item.name] = terminals
self.in_process.remove(item.name)
diff --git a/Tools/peg_generator/pegen/grammar.py b/Tools/peg_generator/pegen/grammar.py
index 66fd5b3..fa47b98 100644
--- a/Tools/peg_generator/pegen/grammar.py
+++ b/Tools/peg_generator/pegen/grammar.py
@@ -2,6 +2,7 @@ from __future__ import annotations
from abc import abstractmethod
from typing import (
+ TYPE_CHECKING,
AbstractSet,
Any,
Dict,
@@ -11,11 +12,9 @@ from typing import (
Optional,
Set,
Tuple,
- TYPE_CHECKING,
Union,
)
-
if TYPE_CHECKING:
from pegen.parser_generator import ParserGenerator
@@ -31,7 +30,7 @@ class GrammarVisitor:
visitor = getattr(self, method, self.generic_visit)
return visitor(node, *args, **kwargs)
- def generic_visit(self, node: Iterable[Any], *args: Any, **kwargs: Any) -> None:
+ def generic_visit(self, node: Iterable[Any], *args: Any, **kwargs: Any) -> Any:
"""Called if no explicit visitor function exists for a node."""
for value in node:
if isinstance(value, list):
@@ -73,8 +72,6 @@ class Rule:
self.type = type
self.rhs = rhs
self.memo = bool(memo)
- self.visited = False
- self.nullable = False
self.left_recursive = False
self.leader = False
@@ -101,17 +98,6 @@ class Rule:
def __iter__(self) -> Iterator[Rhs]:
yield self.rhs
- def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
- if self.visited:
- # A left-recursive rule is considered non-nullable.
- return False
- self.visited = True
- self.nullable = self.rhs.nullable_visit(rules)
- return self.nullable
-
- def initial_names(self) -> AbstractSet[str]:
- return self.rhs.initial_names()
-
def flatten(self) -> Rhs:
# If it's a single parenthesized group, flatten it.
rhs = self.rhs
@@ -124,10 +110,6 @@ class Rule:
rhs = rhs.alts[0].items[0].item.rhs
return rhs
- def collect_todo(self, gen: ParserGenerator) -> None:
- rhs = self.flatten()
- rhs.collect_todo(gen)
-
class Leaf:
def __init__(self, value: str):
@@ -140,14 +122,6 @@ class Leaf:
if False:
yield
- @abstractmethod
- def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
- raise NotImplementedError
-
- @abstractmethod
- def initial_names(self) -> AbstractSet[str]:
- raise NotImplementedError
-
class NameLeaf(Leaf):
"""The value is the name."""
@@ -160,15 +134,6 @@ class NameLeaf(Leaf):
def __repr__(self) -> str:
return f"NameLeaf({self.value!r})"
- def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
- if self.value in rules:
- return rules[self.value].nullable_visit(rules)
- # Token or unknown; never empty.
- return False
-
- def initial_names(self) -> AbstractSet[str]:
- return {self.value}
-
class StringLeaf(Leaf):
"""The value is a string literal, including quotes."""
@@ -176,13 +141,6 @@ class StringLeaf(Leaf):
def __repr__(self) -> str:
return f"StringLeaf({self.value!r})"
- def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
- # The string token '' is considered empty.
- return not self.value
-
- def initial_names(self) -> AbstractSet[str]:
- return set()
-
class Rhs:
def __init__(self, alts: List[Alt]):
@@ -198,21 +156,14 @@ class Rhs:
def __iter__(self) -> Iterator[List[Alt]]:
yield self.alts
- def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
- for alt in self.alts:
- if alt.nullable_visit(rules):
- return True
- return False
-
- def initial_names(self) -> AbstractSet[str]:
- names: Set[str] = set()
- for alt in self.alts:
- names |= alt.initial_names()
- return names
-
- def collect_todo(self, gen: ParserGenerator) -> None:
- for alt in self.alts:
- alt.collect_todo(gen)
+ @property
+ def can_be_inlined(self) -> bool:
+ if len(self.alts) != 1 or len(self.alts[0].items) != 1:
+ return False
+ # If the alternative has an action we cannot inline
+ if getattr(self.alts[0], "action", None) is not None:
+ return False
+ return True
class Alt:
@@ -239,31 +190,12 @@ class Alt:
def __iter__(self) -> Iterator[List[NamedItem]]:
yield self.items
- def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
- for item in self.items:
- if not item.nullable_visit(rules):
- return False
- return True
-
- def initial_names(self) -> AbstractSet[str]:
- names: Set[str] = set()
- for item in self.items:
- names |= item.initial_names()
- if not item.nullable:
- break
- return names
-
- def collect_todo(self, gen: ParserGenerator) -> None:
- for item in self.items:
- item.collect_todo(gen)
-
class NamedItem:
def __init__(self, name: Optional[str], item: Item, type: Optional[str] = None):
self.name = name
self.item = item
self.type = type
- self.nullable = False
def __str__(self) -> str:
if not SIMPLE_STR and self.name:
@@ -277,16 +209,6 @@ class NamedItem:
def __iter__(self) -> Iterator[Item]:
yield self.item
- def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
- self.nullable = self.item.nullable_visit(rules)
- return self.nullable
-
- def initial_names(self) -> AbstractSet[str]:
- return self.item.initial_names()
-
- def collect_todo(self, gen: ParserGenerator) -> None:
- gen.callmakervisitor.visit(self.item)
-
class Forced:
def __init__(self, node: Plain):
@@ -298,12 +220,6 @@ class Forced:
def __iter__(self) -> Iterator[Plain]:
yield self.node
- def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
- return True
-
- def initial_names(self) -> AbstractSet[str]:
- return set()
-
class Lookahead:
def __init__(self, node: Plain, sign: str):
@@ -316,12 +232,6 @@ class Lookahead:
def __iter__(self) -> Iterator[Plain]:
yield self.node
- def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
- return True
-
- def initial_names(self) -> AbstractSet[str]:
- return set()
-
class PositiveLookahead(Lookahead):
def __init__(self, node: Plain):
@@ -357,12 +267,6 @@ class Opt:
def __iter__(self) -> Iterator[Item]:
yield self.node
- def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
- return True
-
- def initial_names(self) -> AbstractSet[str]:
- return self.node.initial_names()
-
class Repeat:
"""Shared base class for x* and x+."""
@@ -371,16 +275,9 @@ class Repeat:
self.node = node
self.memo: Optional[Tuple[Optional[str], str]] = None
- @abstractmethod
- def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
- raise NotImplementedError
-
def __iter__(self) -> Iterator[Plain]:
yield self.node
- def initial_names(self) -> AbstractSet[str]:
- return self.node.initial_names()
-
class Repeat0(Repeat):
def __str__(self) -> str:
@@ -394,9 +291,6 @@ class Repeat0(Repeat):
def __repr__(self) -> str:
return f"Repeat0({self.node!r})"
- def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
- return True
-
class Repeat1(Repeat):
def __str__(self) -> str:
@@ -410,9 +304,6 @@ class Repeat1(Repeat):
def __repr__(self) -> str:
return f"Repeat1({self.node!r})"
- def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
- return False
-
class Gather(Repeat):
def __init__(self, separator: Plain, node: Plain):
@@ -425,9 +316,6 @@ class Gather(Repeat):
def __repr__(self) -> str:
return f"Gather({self.separator!r}, {self.node!r})"
- def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
- return False
-
class Group:
def __init__(self, rhs: Rhs):
@@ -442,12 +330,6 @@ class Group:
def __iter__(self) -> Iterator[Rhs]:
yield self.rhs
- def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
- return self.rhs.nullable_visit(rules)
-
- def initial_names(self) -> AbstractSet[str]:
- return self.rhs.initial_names()
-
class Cut:
def __init__(self) -> None:
@@ -468,9 +350,6 @@ class Cut:
return NotImplemented
return True
- def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
- return True
-
def initial_names(self) -> AbstractSet[str]:
return set()
diff --git a/Tools/peg_generator/pegen/grammar_visualizer.py b/Tools/peg_generator/pegen/grammar_visualizer.py
index 7362ec5..ab5c636 100644
--- a/Tools/peg_generator/pegen/grammar_visualizer.py
+++ b/Tools/peg_generator/pegen/grammar_visualizer.py
@@ -1,7 +1,6 @@
import argparse
import sys
-
-from typing import Any, Iterator, Callable
+from typing import Any, Callable, Iterator
from pegen.build import build_parser
from pegen.grammar import Grammar, Rule
diff --git a/Tools/peg_generator/pegen/keywordgen.py b/Tools/peg_generator/pegen/keywordgen.py
index 6a07f6e..35a5e1a 100644
--- a/Tools/peg_generator/pegen/keywordgen.py
+++ b/Tools/peg_generator/pegen/keywordgen.py
@@ -59,11 +59,11 @@ def main() -> None:
with args.tokens_file as tok_file:
all_tokens, exact_tok, non_exact_tok = generate_token_definitions(tok_file)
gen = CParserGenerator(grammar, all_tokens, exact_tok, non_exact_tok, file=None)
- gen.collect_todo()
+ gen.collect_rules()
with args.keyword_file as thefile:
- all_keywords = sorted(list(gen.callmakervisitor.keyword_cache.keys()) + EXTRA_KEYWORDS)
- all_soft_keywords = sorted(gen.callmakervisitor.soft_keywords)
+ all_keywords = sorted(list(gen.keywords.keys()) + EXTRA_KEYWORDS)
+ all_soft_keywords = sorted(gen.soft_keywords)
keywords = "" if not all_keywords else " " + ",\n ".join(map(repr, all_keywords))
soft_keywords = (
diff --git a/Tools/peg_generator/pegen/parser_generator.py b/Tools/peg_generator/pegen/parser_generator.py
index 33ecee1..f2105d8 100644
--- a/Tools/peg_generator/pegen/parser_generator.py
+++ b/Tools/peg_generator/pegen/parser_generator.py
@@ -1,22 +1,76 @@
+import ast
import contextlib
+import re
from abc import abstractmethod
-from typing import IO, AbstractSet, Dict, Iterator, List, Optional, Set, Text, Tuple
+from typing import (
+ IO,
+ AbstractSet,
+ Any,
+ Dict,
+ Iterable,
+ Iterator,
+ List,
+ Optional,
+ Set,
+ Text,
+ Tuple,
+ Union,
+)
from pegen import sccutils
from pegen.grammar import (
Alt,
+ Cut,
+ Forced,
Gather,
Grammar,
GrammarError,
GrammarVisitor,
+ Group,
+ Lookahead,
NamedItem,
NameLeaf,
+ Opt,
Plain,
+ Repeat0,
+ Repeat1,
Rhs,
Rule,
+ StringLeaf,
)
+class RuleCollectorVisitor(GrammarVisitor):
+ """Visitor that invokes a provieded callmaker visitor with just the NamedItem nodes"""
+
+ def __init__(self, rules: Dict[str, Rule], callmakervisitor: GrammarVisitor) -> None:
+ self.rulses = rules
+ self.callmaker = callmakervisitor
+
+ def visit_Rule(self, rule: Rule) -> None:
+ self.visit(rule.flatten())
+
+ def visit_NamedItem(self, item: NamedItem) -> None:
+ self.callmaker.visit(item)
+
+
+class KeywordCollectorVisitor(GrammarVisitor):
+ """Visitor that collects all the keywods and soft keywords in the Grammar"""
+
+ def __init__(self, gen: "ParserGenerator", keywords: Dict[str, int], soft_keywords: Set[str]):
+ self.generator = gen
+ self.keywords = keywords
+ self.soft_keywords = soft_keywords
+
+ def visit_StringLeaf(self, node: StringLeaf) -> None:
+ val = ast.literal_eval(node.value)
+ if re.match(r"[a-zA-Z_]\w*\Z", val): # This is a keyword
+ if node.value.endswith("'") and node.value not in self.keywords:
+ self.keywords[val] = self.generator.keyword_type()
+ else:
+ return self.soft_keywords.add(node.value.replace('"', ""))
+
+
class RuleCheckingVisitor(GrammarVisitor):
def __init__(self, rules: Dict[str, Rule], tokens: Set[str]):
self.rules = rules
@@ -39,6 +93,8 @@ class ParserGenerator:
def __init__(self, grammar: Grammar, tokens: Set[str], file: Optional[IO[Text]]):
self.grammar = grammar
self.tokens = tokens
+ self.keywords: Dict[str, int] = {}
+ self.soft_keywords: Set[str] = set()
self.rules = grammar.rules
self.validate_rule_names()
if "trailer" not in grammar.metas and "start" not in self.rules:
@@ -48,12 +104,10 @@ class ParserGenerator:
checker.visit(rule)
self.file = file
self.level = 0
- compute_nullables(self.rules)
self.first_graph, self.first_sccs = compute_left_recursives(self.rules)
- self.todo = self.rules.copy() # Rules to generate
self.counter = 0 # For name_rule()/name_loop()
self.keyword_counter = 499 # For keyword_type()
- self.all_rules: Dict[str, Rule] = {} # Rules + temporal rules
+ self.all_rules: Dict[str, Rule] = self.rules.copy() # Rules + temporal rules
self._local_variable_stack: List[List[str]] = []
def validate_rule_names(self) -> None:
@@ -94,39 +148,43 @@ class ParserGenerator:
for line in lines.splitlines():
self.print(line)
- def collect_todo(self) -> None:
+ def collect_rules(self) -> None:
+ keyword_collector = KeywordCollectorVisitor(self, self.keywords, self.soft_keywords)
+ for rule in self.all_rules.values():
+ keyword_collector.visit(rule)
+
+ rule_collector = RuleCollectorVisitor(self.rules, self.callmakervisitor)
done: Set[str] = set()
while True:
- alltodo = list(self.todo)
- self.all_rules.update(self.todo)
- todo = [i for i in alltodo if i not in done]
+ computed_rules = list(self.all_rules)
+ todo = [i for i in computed_rules if i not in done]
if not todo:
break
+ done = set(self.all_rules)
for rulename in todo:
- self.todo[rulename].collect_todo(self)
- done = set(alltodo)
+ rule_collector.visit(self.all_rules[rulename])
def keyword_type(self) -> int:
self.keyword_counter += 1
return self.keyword_counter
- def name_node(self, rhs: Rhs) -> str:
+ def artifical_rule_from_rhs(self, rhs: Rhs) -> str:
self.counter += 1
name = f"_tmp_{self.counter}" # TODO: Pick a nicer name.
- self.todo[name] = Rule(name, None, rhs)
+ self.all_rules[name] = Rule(name, None, rhs)
return name
- def name_loop(self, node: Plain, is_repeat1: bool) -> str:
+ def artificial_rule_from_repeat(self, node: Plain, is_repeat1: bool) -> str:
self.counter += 1
if is_repeat1:
prefix = "_loop1_"
else:
prefix = "_loop0_"
- name = f"{prefix}{self.counter}" # TODO: It's ugly to signal via the name.
- self.todo[name] = Rule(name, None, Rhs([Alt([NamedItem(None, node)])]))
+ name = f"{prefix}{self.counter}"
+ self.all_rules[name] = Rule(name, None, Rhs([Alt([NamedItem(None, node)])]))
return name
- def name_gather(self, node: Gather) -> str:
+ def artifical_rule_from_gather(self, node: Gather) -> str:
self.counter += 1
name = f"_gather_{self.counter}"
self.counter += 1
@@ -135,7 +193,7 @@ class ParserGenerator:
[NamedItem(None, node.separator), NamedItem("elem", node.node)],
action="elem",
)
- self.todo[extra_function_name] = Rule(
+ self.all_rules[extra_function_name] = Rule(
extra_function_name,
None,
Rhs([extra_function_alt]),
@@ -143,7 +201,7 @@ class ParserGenerator:
alt = Alt(
[NamedItem("elem", node.node), NamedItem("seq", NameLeaf(extra_function_name))],
)
- self.todo[name] = Rule(
+ self.all_rules[name] = Rule(
name,
None,
Rhs([alt]),
@@ -160,13 +218,120 @@ class ParserGenerator:
return name
-def compute_nullables(rules: Dict[str, Rule]) -> None:
+class NullableVisitor(GrammarVisitor):
+ def __init__(self, rules: Dict[str, Rule]) -> None:
+ self.rules = rules
+ self.visited: Set[Any] = set()
+ self.nullables: Set[Union[Rule, NamedItem]] = set()
+
+ def visit_Rule(self, rule: Rule) -> bool:
+ if rule in self.visited:
+ return False
+ self.visited.add(rule)
+ if self.visit(rule.rhs):
+ self.nullables.add(rule)
+ return rule in self.nullables
+
+ def visit_Rhs(self, rhs: Rhs) -> bool:
+ for alt in rhs.alts:
+ if self.visit(alt):
+ return True
+ return False
+
+ def visit_Alt(self, alt: Alt) -> bool:
+ for item in alt.items:
+ if not self.visit(item):
+ return False
+ return True
+
+ def visit_Forced(self, force: Forced) -> bool:
+ return True
+
+ def visit_LookAhead(self, lookahead: Lookahead) -> bool:
+ return True
+
+ def visit_Opt(self, opt: Opt) -> bool:
+ return True
+
+ def visit_Repeat0(self, repeat: Repeat0) -> bool:
+ return True
+
+ def visit_Repeat1(self, repeat: Repeat1) -> bool:
+ return False
+
+ def visit_Gather(self, gather: Gather) -> bool:
+ return False
+
+ def visit_Cut(self, cut: Cut) -> bool:
+ return False
+
+ def visit_Group(self, group: Group) -> bool:
+ return self.visit(group.rhs)
+
+ def visit_NamedItem(self, item: NamedItem) -> bool:
+ if self.visit(item.item):
+ self.nullables.add(item)
+ return item in self.nullables
+
+ def visit_NameLeaf(self, node: NameLeaf) -> bool:
+ if node.value in self.rules:
+ return self.visit(self.rules[node.value])
+ # Token or unknown; never empty.
+ return False
+
+ def visit_StringLeaf(self, node: StringLeaf) -> bool:
+ # The string token '' is considered empty.
+ return not node.value
+
+
+def compute_nullables(rules: Dict[str, Rule]) -> Set[Any]:
"""Compute which rules in a grammar are nullable.
Thanks to TatSu (tatsu/leftrec.py) for inspiration.
"""
+ nullable_visitor = NullableVisitor(rules)
for rule in rules.values():
- rule.nullable_visit(rules)
+ nullable_visitor.visit(rule)
+ return nullable_visitor.nullables
+
+
+class InitialNamesVisitor(GrammarVisitor):
+ def __init__(self, rules: Dict[str, Rule]) -> None:
+ self.rules = rules
+ self.nullables = compute_nullables(rules)
+
+ def generic_visit(self, node: Iterable[Any], *args: Any, **kwargs: Any) -> Set[Any]:
+ names: Set[str] = set()
+ for value in node:
+ if isinstance(value, list):
+ for item in value:
+ names |= self.visit(item, *args, **kwargs)
+ else:
+ names |= self.visit(value, *args, **kwargs)
+ return names
+
+ def visit_Alt(self, alt: Alt) -> Set[Any]:
+ names: Set[str] = set()
+ for item in alt.items:
+ names |= self.visit(item)
+ if item not in self.nullables:
+ break
+ return names
+
+ def visit_Forced(self, force: Forced) -> Set[Any]:
+ return set()
+
+ def visit_LookAhead(self, lookahead: Lookahead) -> Set[Any]:
+ return set()
+
+ def visit_Cut(self, cut: Cut) -> Set[Any]:
+ return set()
+
+ def visit_NameLeaf(self, node: NameLeaf) -> Set[Any]:
+ return {node.value}
+
+ def visit_StringLeaf(self, node: StringLeaf) -> Set[Any]:
+ return set()
def compute_left_recursives(
@@ -207,10 +372,11 @@ def make_first_graph(rules: Dict[str, Rule]) -> Dict[str, AbstractSet[str]]:
Note that this requires the nullable flags to have been computed.
"""
+ initial_name_visitor = InitialNamesVisitor(rules)
graph = {}
vertices: Set[str] = set()
for rulename, rhs in rules.items():
- graph[rulename] = names = rhs.initial_names()
+ graph[rulename] = names = initial_name_visitor.visit(rhs)
vertices |= names
for vertex in vertices:
graph.setdefault(vertex, set())
diff --git a/Tools/peg_generator/pegen/python_generator.py b/Tools/peg_generator/pegen/python_generator.py
index 201bf2ba..7aa730a 100644
--- a/Tools/peg_generator/pegen/python_generator.py
+++ b/Tools/peg_generator/pegen/python_generator.py
@@ -95,8 +95,6 @@ class PythonCallMakerVisitor(GrammarVisitor):
def __init__(self, parser_generator: ParserGenerator):
self.gen = parser_generator
self.cache: Dict[Any, Any] = {}
- self.keywords: Set[str] = set()
- self.soft_keywords: Set[str] = set()
def visit_NameLeaf(self, node: NameLeaf) -> Tuple[Optional[str], str]:
name = node.value
@@ -111,12 +109,6 @@ class PythonCallMakerVisitor(GrammarVisitor):
return name, f"self.{name}()"
def visit_StringLeaf(self, node: StringLeaf) -> Tuple[str, str]:
- val = ast.literal_eval(node.value)
- if re.match(r"[a-zA-Z_]\w*\Z", val): # This is a keyword
- if node.value.endswith("'"):
- self.keywords.add(val)
- else:
- self.soft_keywords.add(val)
return "literal", f"self.expect({node.value})"
def visit_Rhs(self, node: Rhs) -> Tuple[Optional[str], str]:
@@ -125,7 +117,7 @@ class PythonCallMakerVisitor(GrammarVisitor):
if len(node.alts) == 1 and len(node.alts[0].items) == 1:
self.cache[node] = self.visit(node.alts[0].items[0])
else:
- name = self.gen.name_node(node)
+ name = self.gen.artifical_rule_from_rhs(node)
self.cache[node] = name, f"self.{name}()"
return self.cache[node]
@@ -163,21 +155,21 @@ class PythonCallMakerVisitor(GrammarVisitor):
def visit_Repeat0(self, node: Repeat0) -> Tuple[str, str]:
if node in self.cache:
return self.cache[node]
- name = self.gen.name_loop(node.node, False)
+ name = self.gen.artificial_rule_from_repeat(node.node, False)
self.cache[node] = name, f"self.{name}()," # Also a trailing comma!
return self.cache[node]
def visit_Repeat1(self, node: Repeat1) -> Tuple[str, str]:
if node in self.cache:
return self.cache[node]
- name = self.gen.name_loop(node.node, True)
+ name = self.gen.artificial_rule_from_repeat(node.node, True)
self.cache[node] = name, f"self.{name}()" # But no trailing comma here!
return self.cache[node]
def visit_Gather(self, node: Gather) -> Tuple[str, str]:
if node in self.cache:
return self.cache[node]
- name = self.gen.name_gather(node)
+ name = self.gen.artifical_rule_from_gather(node)
self.cache[node] = name, f"self.{name}()" # No trailing comma here either!
return self.cache[node]
@@ -219,6 +211,7 @@ class PythonParserGenerator(ParserGenerator, GrammarVisitor):
)
def generate(self, filename: str) -> None:
+ self.collect_rules()
header = self.grammar.metas.get("header", MODULE_PREFIX)
if header is not None:
self.print(header.rstrip("\n").format(filename=filename))
@@ -228,17 +221,15 @@ class PythonParserGenerator(ParserGenerator, GrammarVisitor):
cls_name = self.grammar.metas.get("class", "GeneratedParser")
self.print("# Keywords and soft keywords are listed at the end of the parser definition.")
self.print(f"class {cls_name}(Parser):")
- while self.todo:
- for rulename, rule in list(self.todo.items()):
- del self.todo[rulename]
- self.print()
- with self.indent():
- self.visit(rule)
+ for rule in self.all_rules.values():
+ self.print()
+ with self.indent():
+ self.visit(rule)
self.print()
with self.indent():
- self.print(f"KEYWORDS = {tuple(self.callmakervisitor.keywords)}")
- self.print(f"SOFT_KEYWORDS = {tuple(self.callmakervisitor.soft_keywords)}")
+ self.print(f"KEYWORDS = {tuple(self.keywords)}")
+ self.print(f"SOFT_KEYWORDS = {tuple(self.soft_keywords)}")
trailer = self.grammar.metas.get("trailer", MODULE_SUFFIX.format(class_name=cls_name))
if trailer is not None:
@@ -270,8 +261,6 @@ class PythonParserGenerator(ParserGenerator, GrammarVisitor):
self.print(f"def {node.name}(self) -> Optional[{node_type}]:")
with self.indent():
self.print(f"# {node.name}: {rhs}")
- if node.nullable:
- self.print(f"# nullable={node.nullable}")
self.print("mark = self._mark()")
if self.alts_uses_locations(node.rhs.alts):
self.print("tok = self._tokenizer.peek()")
diff --git a/Tools/peg_generator/pegen/testutil.py b/Tools/peg_generator/pegen/testutil.py
index e0928a4..8e5dbc5 100644
--- a/Tools/peg_generator/pegen/testutil.py
+++ b/Tools/peg_generator/pegen/testutil.py
@@ -4,10 +4,9 @@ import os
import pathlib
import sys
import textwrap
-import tokenize
import token
-
-from typing import Any, cast, Dict, IO, Type, Final
+import tokenize
+from typing import IO, Any, Dict, Final, Type, cast
from pegen.build import compile_c_extension
from pegen.c_generator import CParserGenerator
diff --git a/Tools/peg_generator/pegen/validator.py b/Tools/peg_generator/pegen/validator.py
index e7d6980..c48a01e 100644
--- a/Tools/peg_generator/pegen/validator.py
+++ b/Tools/peg_generator/pegen/validator.py
@@ -1,12 +1,7 @@
from typing import Optional
from pegen import grammar
-from pegen.grammar import (
- Alt,
- GrammarVisitor,
- Rule,
- Rhs,
-)
+from pegen.grammar import Alt, GrammarVisitor, Rhs, Rule
class ValidationError(Exception):