diff options
author | Pablo Galindo <Pablogsal@gmail.com> | 2020-04-29 09:42:21 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-04-29 09:42:21 (GMT) |
commit | 4db245ee9ddbe6c53d375de59a35ff59dea2a8e0 (patch) | |
tree | 2a70d3590bb1c1f68ddfe681328061a3aa48c69f /Tools/peg_generator/pegen/c_generator.py | |
parent | 9b64ef3ac7b434065dbff0048b9103999e4b491a (diff) | |
download | cpython-4db245ee9ddbe6c53d375de59a35ff59dea2a8e0.zip cpython-4db245ee9ddbe6c53d375de59a35ff59dea2a8e0.tar.gz cpython-4db245ee9ddbe6c53d375de59a35ff59dea2a8e0.tar.bz2 |
bpo-40334: refactor and cleanup for the PEG generators (GH-19775)
Diffstat (limited to 'Tools/peg_generator/pegen/c_generator.py')
-rw-r--r-- | Tools/peg_generator/pegen/c_generator.py | 318 |
1 files changed, 208 insertions, 110 deletions
diff --git a/Tools/peg_generator/pegen/c_generator.py b/Tools/peg_generator/pegen/c_generator.py index a01c309..a59da2f 100644 --- a/Tools/peg_generator/pegen/c_generator.py +++ b/Tools/peg_generator/pegen/c_generator.py @@ -1,33 +1,36 @@ import ast +from dataclasses import dataclass, field import re -from typing import Any, cast, Dict, IO, Optional, List, Text, Tuple, Set +from typing import IO, Any, Dict, List, Optional, Set, Text, Tuple +from enum import Enum +from pegen import grammar from pegen.grammar import ( + Alt, Cut, + Gather, GrammarVisitor, - Rhs, - Alt, + Group, + Lookahead, NamedItem, NameLeaf, - StringLeaf, - Lookahead, - PositiveLookahead, NegativeLookahead, Opt, + PositiveLookahead, Repeat0, Repeat1, - Gather, - Group, + Rhs, Rule, + StringLeaf, ) -from pegen import grammar -from pegen.parser_generator import dedupe, ParserGenerator +from pegen.parser_generator import ParserGenerator EXTENSION_PREFIX = """\ #include "pegen.h" """ + EXTENSION_SUFFIX = """ void * _PyPegen_parse(Parser *p) @@ -41,6 +44,43 @@ _PyPegen_parse(Parser *p) """ +class NodeTypes(Enum): + NAME_TOKEN = 0 + NUMBER_TOKEN = 1 + STRING_TOKEN = 2 + GENERIC_TOKEN = 3 + KEYWORD = 4 + CUT_OPERATOR = 5 + + +BASE_NODETYPES = { + "NAME": NodeTypes.NAME_TOKEN, + "NUMBER": NodeTypes.NUMBER_TOKEN, + "STRING": NodeTypes.STRING_TOKEN, +} + + +@dataclass +class FunctionCall: + function: str + arguments: Optional[List[Any]] = None + assigned_variable: Optional[str] = None + nodetype: Optional[NodeTypes] = None + force_true: bool = False + metadata: Dict[str, Any] = field(default_factory=dict) + + def __str__(self) -> str: + parts = [] + parts.append(self.function) + if self.arguments: + parts.append(f"({', '.join(map(str, self.arguments))})") + if self.force_true: + parts.append(", 1") + if self.assigned_variable: + parts = ["(", self.assigned_variable, " = ", *parts, ")"] + return "".join(parts) + + class CCallMakerVisitor(GrammarVisitor): def __init__( self, @@ -54,28 +94,57 @@ class CCallMakerVisitor(GrammarVisitor): self.cache: Dict[Any, Any] = {} self.keyword_cache: Dict[str, int] = {} - def keyword_helper(self, keyword: str) -> Tuple[str, str]: + def keyword_helper(self, keyword: str) -> FunctionCall: if keyword not in self.keyword_cache: self.keyword_cache[keyword] = self.gen.keyword_type() - return "keyword", f"_PyPegen_expect_token(p, {self.keyword_cache[keyword]})" + return FunctionCall( + assigned_variable="keyword", + function="_PyPegen_expect_token", + arguments=["p", self.keyword_cache[keyword]], + nodetype=NodeTypes.KEYWORD, + ) - def visit_NameLeaf(self, node: NameLeaf) -> Tuple[str, str]: + def visit_NameLeaf(self, node: NameLeaf) -> FunctionCall: name = node.value if name in self.non_exact_tokens: - name = name.lower() - return f"{name}_var", f"_PyPegen_{name}_token(p)" - return f"{name}_var", f"{name}_rule(p)" + if name in BASE_NODETYPES: + return FunctionCall( + assigned_variable=f"{name.lower()}_var", + function=f"_PyPegen_{name.lower()}_token", + arguments=["p"], + nodetype=BASE_NODETYPES[name], + metadata={"rulename": name.lower()}, + ) + return FunctionCall( + assigned_variable=f"{name.lower()}_var", + function=f"_PyPegen_expect_token", + arguments=["p", name], + nodetype=NodeTypes.GENERIC_TOKEN, + metadata={"rulename": name.lower()}, + ) + + return FunctionCall( + assigned_variable=f"{name}_var", + function=f"{name}_rule", + arguments=["p"], + metadata={"rulename": name.lower()}, + ) - def visit_StringLeaf(self, node: StringLeaf) -> Tuple[str, str]: + def visit_StringLeaf(self, node: StringLeaf) -> FunctionCall: val = ast.literal_eval(node.value) if re.match(r"[a-zA-Z_]\w*\Z", val): # This is a keyword return self.keyword_helper(val) else: assert val in self.exact_tokens, f"{node.value} is not a known literal" type = self.exact_tokens[val] - return "literal", f"_PyPegen_expect_token(p, {type})" + return FunctionCall( + assigned_variable="literal", + function=f"_PyPegen_expect_token", + arguments=["p", type], + nodetype=NodeTypes.GENERIC_TOKEN, + ) - def visit_Rhs(self, node: Rhs) -> Tuple[Optional[str], str]: + def visit_Rhs(self, node: Rhs) -> FunctionCall: def can_we_inline(node: Rhs) -> int: if len(node.alts) != 1 or len(node.alts[0].items) != 1: return False @@ -90,65 +159,96 @@ class CCallMakerVisitor(GrammarVisitor): self.cache[node] = self.visit(node.alts[0].items[0]) else: name = self.gen.name_node(node) - self.cache[node] = f"{name}_var", f"{name}_rule(p)" + self.cache[node] = FunctionCall( + assigned_variable=f"{name}_var", + function=f"{name}_rule", + arguments=["p"], + metadata={"rulename": name}, + ) return self.cache[node] - def visit_NamedItem(self, node: NamedItem) -> Tuple[Optional[str], str]: - name, call = self.visit(node.item) + def visit_NamedItem(self, node: NamedItem) -> FunctionCall: + call = self.visit(node.item) if node.name: - name = node.name - return name, call - - def lookahead_call_helper(self, node: Lookahead, positive: int) -> Tuple[None, str]: - name, call = self.visit(node.node) - func, args = call.split("(", 1) - assert args[-1] == ")" - args = args[:-1] - if "name_token" in call: - return None, f"_PyPegen_lookahead_with_name({positive}, {func}, {args})" - elif not args.startswith("p,"): - return None, f"_PyPegen_lookahead({positive}, {func}, {args})" - elif args[2:].strip().isalnum(): - return None, f"_PyPegen_lookahead_with_int({positive}, {func}, {args})" + call.assigned_variable = node.name + return call + + def lookahead_call_helper(self, node: Lookahead, positive: int) -> FunctionCall: + call = self.visit(node.node) + if call.nodetype == NodeTypes.NAME_TOKEN: + return FunctionCall( + function=f"_PyPegen_lookahead_with_name", + arguments=[positive, call.function, *call.arguments], + ) + elif call.nodetype in {NodeTypes.GENERIC_TOKEN, NodeTypes.KEYWORD}: + return FunctionCall( + function=f"_PyPegen_lookahead_with_int", + arguments=[positive, call.function, *call.arguments], + ) else: - return None, f"_PyPegen_lookahead_with_string({positive}, {func}, {args})" + return FunctionCall( + function=f"_PyPegen_lookahead", + arguments=[positive, call.function, *call.arguments], + ) - def visit_PositiveLookahead(self, node: PositiveLookahead) -> Tuple[None, str]: + def visit_PositiveLookahead(self, node: PositiveLookahead) -> FunctionCall: return self.lookahead_call_helper(node, 1) - def visit_NegativeLookahead(self, node: NegativeLookahead) -> Tuple[None, str]: + def visit_NegativeLookahead(self, node: NegativeLookahead) -> FunctionCall: return self.lookahead_call_helper(node, 0) - def visit_Opt(self, node: Opt) -> Tuple[str, str]: - name, call = self.visit(node.node) - return "opt_var", f"{call}, 1" # Using comma operator! + def visit_Opt(self, node: Opt) -> FunctionCall: + call = self.visit(node.node) + return FunctionCall( + assigned_variable="opt_var", + function=call.function, + arguments=call.arguments, + force_true=True, + ) - def visit_Repeat0(self, node: Repeat0) -> Tuple[str, str]: + def visit_Repeat0(self, node: Repeat0) -> FunctionCall: if node in self.cache: return self.cache[node] name = self.gen.name_loop(node.node, False) - self.cache[node] = f"{name}_var", f"{name}_rule(p)" + self.cache[node] = FunctionCall( + assigned_variable=f"{name}_var", + function=f"{name}_rule", + arguments=["p"], + metadata={"rulename": name}, + ) return self.cache[node] - def visit_Repeat1(self, node: Repeat1) -> Tuple[str, str]: + def visit_Repeat1(self, node: Repeat1) -> FunctionCall: if node in self.cache: return self.cache[node] name = self.gen.name_loop(node.node, True) - self.cache[node] = f"{name}_var", f"{name}_rule(p)" + self.cache[node] = FunctionCall( + assigned_variable=f"{name}_var", + function=f"{name}_rule", + arguments=["p"], + metadata={"rulename": name}, + ) return self.cache[node] - def visit_Gather(self, node: Gather) -> Tuple[str, str]: + def visit_Gather(self, node: Gather) -> FunctionCall: if node in self.cache: return self.cache[node] name = self.gen.name_gather(node) - self.cache[node] = f"{name}_var", f"{name}_rule(p)" + self.cache[node] = FunctionCall( + assigned_variable=f"{name}_var", + function=f"{name}_rule", + arguments=["p"], + metadata={"rulename": name}, + ) return self.cache[node] - def visit_Group(self, node: Group) -> Tuple[Optional[str], str]: + def visit_Group(self, node: Group) -> FunctionCall: return self.visit(node.rhs) - def visit_Cut(self, node: Cut) -> Tuple[str, str]: - return "cut_var", "1" + def visit_Cut(self, node: Cut) -> FunctionCall: + return FunctionCall( + assigned_variable="cut_var", function="1", nodetype=NodeTypes.CUT_OPERATOR + ) class CParserGenerator(ParserGenerator, GrammarVisitor): @@ -252,7 +352,6 @@ class CParserGenerator(ParserGenerator, GrammarVisitor): mode += 1 modulename = self.grammar.metas.get("modulename", "parse") trailer = self.grammar.metas.get("trailer", EXTENSION_SUFFIX) - keyword_cache = self.callmakervisitor.keyword_cache if trailer: self.print(trailer.rstrip("\n") % dict(mode=mode, modulename=modulename)) @@ -448,13 +547,11 @@ class CParserGenerator(ParserGenerator, GrammarVisitor): self._handle_default_rule_body(node, rhs, result_type) self.print("}") - def visit_NamedItem(self, node: NamedItem, names: List[str]) -> None: - name, call = self.callmakervisitor.visit(node) - if not name: - self.print(call) - else: - name = dedupe(name, names) - self.print(f"({name} = {call})") + def visit_NamedItem(self, node: NamedItem) -> None: + call = self.callmakervisitor.visit(node) + if call.assigned_variable: + call.assigned_variable = self.dedupe(call.assigned_variable) + self.print(call) def visit_Rhs( self, node: Rhs, is_loop: bool, is_gather: bool, rulename: Optional[str] @@ -464,7 +561,7 @@ class CParserGenerator(ParserGenerator, GrammarVisitor): for alt in node.alts: self.visit(alt, is_loop=is_loop, is_gather=is_gather, rulename=rulename) - def join_conditions(self, keyword: str, node: Any, names: List[str]) -> None: + def join_conditions(self, keyword: str, node: Any) -> None: self.print(f"{keyword} (") with self.indent(): first = True @@ -473,7 +570,7 @@ class CParserGenerator(ParserGenerator, GrammarVisitor): first = False else: self.print("&&") - self.visit(item, names=names) + self.visit(item) self.print(")") def emit_action(self, node: Alt, cleanup_code: Optional[str] = None) -> None: @@ -492,29 +589,34 @@ class CParserGenerator(ParserGenerator, GrammarVisitor): f'fprintf(stderr, "Hit with action [%d-%d]: %s\\n", mark, p->mark, "{node}");' ) - def emit_default_action(self, is_gather: bool, names: List[str], node: Alt) -> None: - if len(names) > 1: + def emit_default_action(self, is_gather: bool, node: Alt) -> None: + if len(self.local_variable_names) > 1: if is_gather: - assert len(names) == 2 - self.print(f"res = _PyPegen_seq_insert_in_front(p, {names[0]}, {names[1]});") + assert len(self.local_variable_names) == 2 + self.print( + f"res = _PyPegen_seq_insert_in_front(p, " + f"{self.local_variable_names[0]}, {self.local_variable_names[1]});" + ) else: if self.debug: self.print( f'fprintf(stderr, "Hit without action [%d:%d]: %s\\n", mark, p->mark, "{node}");' ) - self.print(f"res = _PyPegen_dummy_name(p, {', '.join(names)});") + self.print( + f"res = _PyPegen_dummy_name(p, {', '.join(self.local_variable_names)});" + ) else: if self.debug: self.print( f'fprintf(stderr, "Hit with default action [%d:%d]: %s\\n", mark, p->mark, "{node}");' ) - self.print(f"res = {names[0]};") + self.print(f"res = {self.local_variable_names[0]};") def emit_dummy_action(self) -> None: self.print(f"res = _PyPegen_dummy_name(p);") - def handle_alt_normal(self, node: Alt, is_gather: bool, names: List[str]) -> None: - self.join_conditions(keyword="if", node=node, names=names) + def handle_alt_normal(self, node: Alt, is_gather: bool) -> None: + self.join_conditions(keyword="if", node=node) self.print("{") # We have parsed successfully all the conditions for the option. with self.indent(): @@ -526,17 +628,15 @@ class CParserGenerator(ParserGenerator, GrammarVisitor): elif node.action: self.emit_action(node) else: - self.emit_default_action(is_gather, names, node) + self.emit_default_action(is_gather, node) # As the current option has parsed correctly, do not continue with the rest. self.print(f"goto done;") self.print("}") - def handle_alt_loop( - self, node: Alt, is_gather: bool, rulename: Optional[str], names: List[str] - ) -> None: + def handle_alt_loop(self, node: Alt, is_gather: bool, rulename: Optional[str]) -> None: # Condition of the main body of the alternative - self.join_conditions(keyword="while", node=node, names=names) + self.join_conditions(keyword="while", node=node) self.print("{") # We have parsed successfully one item! with self.indent(): @@ -548,7 +648,7 @@ class CParserGenerator(ParserGenerator, GrammarVisitor): elif node.action: self.emit_action(node, cleanup_code="PyMem_Free(children);") else: - self.emit_default_action(is_gather, names, node) + self.emit_default_action(is_gather, node) # Add the result of rule to the temporary buffer of children. This buffer # will populate later an asdl_seq with all elements to return. @@ -580,47 +680,45 @@ class CParserGenerator(ParserGenerator, GrammarVisitor): if v == "opt_var": self.print("UNUSED(opt_var); // Silence compiler warnings") - names: List[str] = [] - if is_loop: - self.handle_alt_loop(node, is_gather, rulename, names) - else: - self.handle_alt_normal(node, is_gather, names) + with self.local_variable_context(): + if is_loop: + self.handle_alt_loop(node, is_gather, rulename) + else: + self.handle_alt_normal(node, is_gather) self.print("p->mark = mark;") - if "cut_var" in names: + if "cut_var" in vars: self.print("if (cut_var) return NULL;") self.print("}") - def collect_vars(self, node: Alt) -> Dict[str, Optional[str]]: - names: List[str] = [] + def collect_vars(self, node: Alt) -> Dict[Optional[str], Optional[str]]: types = {} - for item in node.items: - name, type = self.add_var(item, names) - types[name] = type + with self.local_variable_context(): + for item in node.items: + name, type = self.add_var(item) + types[name] = type return types - def add_var(self, node: NamedItem, names: List[str]) -> Tuple[str, Optional[str]]: - name: str - call: str - name, call = self.callmakervisitor.visit(node.item) - type = None - if not name: - return name, type - if name.startswith("cut"): - return name, "int" - if name.endswith("_var"): - rulename = name[:-4] - rule = self.rules.get(rulename) - if rule is not None: - if rule.is_loop() or rule.is_gather(): - type = "asdl_seq *" - else: - type = rule.type - elif name.startswith("_loop") or name.startswith("_gather"): + def add_var(self, node: NamedItem) -> Tuple[Optional[str], Optional[str]]: + call = self.callmakervisitor.visit(node.item) + if not call.assigned_variable: + return None, None + if call.nodetype == NodeTypes.CUT_OPERATOR: + return call.assigned_variable, "int" + + name = call.assigned_variable + rulename = call.metadata.get("rulename") + + type: Optional[str] = None + + assert self.all_rules is not None + if rulename and rulename in self.all_rules: + rule = self.all_rules.get(rulename) + if rule.is_loop() or rule.is_gather(): type = "asdl_seq *" - elif name in ("name_var", "string_var", "number_var"): - type = "expr_ty" - if node.name: - name = node.name - name = dedupe(name, names) - return name, type + else: + type = rule.type + elif call.nodetype in BASE_NODETYPES.values(): + type = "expr_ty" + + return self.dedupe(node.name if node.name else call.assigned_variable), type |