summaryrefslogtreecommitdiffstats
path: root/Tools/peg_generator/pegen/c_generator.py
diff options
context:
space:
mode:
authorPablo Galindo <Pablogsal@gmail.com>2020-04-29 09:42:21 (GMT)
committerGitHub <noreply@github.com>2020-04-29 09:42:21 (GMT)
commit4db245ee9ddbe6c53d375de59a35ff59dea2a8e0 (patch)
tree2a70d3590bb1c1f68ddfe681328061a3aa48c69f /Tools/peg_generator/pegen/c_generator.py
parent9b64ef3ac7b434065dbff0048b9103999e4b491a (diff)
downloadcpython-4db245ee9ddbe6c53d375de59a35ff59dea2a8e0.zip
cpython-4db245ee9ddbe6c53d375de59a35ff59dea2a8e0.tar.gz
cpython-4db245ee9ddbe6c53d375de59a35ff59dea2a8e0.tar.bz2
bpo-40334: refactor and cleanup for the PEG generators (GH-19775)
Diffstat (limited to 'Tools/peg_generator/pegen/c_generator.py')
-rw-r--r--Tools/peg_generator/pegen/c_generator.py318
1 files changed, 208 insertions, 110 deletions
diff --git a/Tools/peg_generator/pegen/c_generator.py b/Tools/peg_generator/pegen/c_generator.py
index a01c309..a59da2f 100644
--- a/Tools/peg_generator/pegen/c_generator.py
+++ b/Tools/peg_generator/pegen/c_generator.py
@@ -1,33 +1,36 @@
import ast
+from dataclasses import dataclass, field
import re
-from typing import Any, cast, Dict, IO, Optional, List, Text, Tuple, Set
+from typing import IO, Any, Dict, List, Optional, Set, Text, Tuple
+from enum import Enum
+from pegen import grammar
from pegen.grammar import (
+ Alt,
Cut,
+ Gather,
GrammarVisitor,
- Rhs,
- Alt,
+ Group,
+ Lookahead,
NamedItem,
NameLeaf,
- StringLeaf,
- Lookahead,
- PositiveLookahead,
NegativeLookahead,
Opt,
+ PositiveLookahead,
Repeat0,
Repeat1,
- Gather,
- Group,
+ Rhs,
Rule,
+ StringLeaf,
)
-from pegen import grammar
-from pegen.parser_generator import dedupe, ParserGenerator
+from pegen.parser_generator import ParserGenerator
EXTENSION_PREFIX = """\
#include "pegen.h"
"""
+
EXTENSION_SUFFIX = """
void *
_PyPegen_parse(Parser *p)
@@ -41,6 +44,43 @@ _PyPegen_parse(Parser *p)
"""
+class NodeTypes(Enum):
+ NAME_TOKEN = 0
+ NUMBER_TOKEN = 1
+ STRING_TOKEN = 2
+ GENERIC_TOKEN = 3
+ KEYWORD = 4
+ CUT_OPERATOR = 5
+
+
+BASE_NODETYPES = {
+ "NAME": NodeTypes.NAME_TOKEN,
+ "NUMBER": NodeTypes.NUMBER_TOKEN,
+ "STRING": NodeTypes.STRING_TOKEN,
+}
+
+
+@dataclass
+class FunctionCall:
+ function: str
+ arguments: Optional[List[Any]] = None
+ assigned_variable: Optional[str] = None
+ nodetype: Optional[NodeTypes] = None
+ force_true: bool = False
+ metadata: Dict[str, Any] = field(default_factory=dict)
+
+ def __str__(self) -> str:
+ parts = []
+ parts.append(self.function)
+ if self.arguments:
+ parts.append(f"({', '.join(map(str, self.arguments))})")
+ if self.force_true:
+ parts.append(", 1")
+ if self.assigned_variable:
+ parts = ["(", self.assigned_variable, " = ", *parts, ")"]
+ return "".join(parts)
+
+
class CCallMakerVisitor(GrammarVisitor):
def __init__(
self,
@@ -54,28 +94,57 @@ class CCallMakerVisitor(GrammarVisitor):
self.cache: Dict[Any, Any] = {}
self.keyword_cache: Dict[str, int] = {}
- def keyword_helper(self, keyword: str) -> Tuple[str, str]:
+ def keyword_helper(self, keyword: str) -> FunctionCall:
if keyword not in self.keyword_cache:
self.keyword_cache[keyword] = self.gen.keyword_type()
- return "keyword", f"_PyPegen_expect_token(p, {self.keyword_cache[keyword]})"
+ return FunctionCall(
+ assigned_variable="keyword",
+ function="_PyPegen_expect_token",
+ arguments=["p", self.keyword_cache[keyword]],
+ nodetype=NodeTypes.KEYWORD,
+ )
- def visit_NameLeaf(self, node: NameLeaf) -> Tuple[str, str]:
+ def visit_NameLeaf(self, node: NameLeaf) -> FunctionCall:
name = node.value
if name in self.non_exact_tokens:
- name = name.lower()
- return f"{name}_var", f"_PyPegen_{name}_token(p)"
- return f"{name}_var", f"{name}_rule(p)"
+ if name in BASE_NODETYPES:
+ return FunctionCall(
+ assigned_variable=f"{name.lower()}_var",
+ function=f"_PyPegen_{name.lower()}_token",
+ arguments=["p"],
+ nodetype=BASE_NODETYPES[name],
+ metadata={"rulename": name.lower()},
+ )
+ return FunctionCall(
+ assigned_variable=f"{name.lower()}_var",
+ function=f"_PyPegen_expect_token",
+ arguments=["p", name],
+ nodetype=NodeTypes.GENERIC_TOKEN,
+ metadata={"rulename": name.lower()},
+ )
+
+ return FunctionCall(
+ assigned_variable=f"{name}_var",
+ function=f"{name}_rule",
+ arguments=["p"],
+ metadata={"rulename": name.lower()},
+ )
- def visit_StringLeaf(self, node: StringLeaf) -> Tuple[str, str]:
+ def visit_StringLeaf(self, node: StringLeaf) -> FunctionCall:
val = ast.literal_eval(node.value)
if re.match(r"[a-zA-Z_]\w*\Z", val): # This is a keyword
return self.keyword_helper(val)
else:
assert val in self.exact_tokens, f"{node.value} is not a known literal"
type = self.exact_tokens[val]
- return "literal", f"_PyPegen_expect_token(p, {type})"
+ return FunctionCall(
+ assigned_variable="literal",
+ function=f"_PyPegen_expect_token",
+ arguments=["p", type],
+ nodetype=NodeTypes.GENERIC_TOKEN,
+ )
- def visit_Rhs(self, node: Rhs) -> Tuple[Optional[str], str]:
+ def visit_Rhs(self, node: Rhs) -> FunctionCall:
def can_we_inline(node: Rhs) -> int:
if len(node.alts) != 1 or len(node.alts[0].items) != 1:
return False
@@ -90,65 +159,96 @@ class CCallMakerVisitor(GrammarVisitor):
self.cache[node] = self.visit(node.alts[0].items[0])
else:
name = self.gen.name_node(node)
- self.cache[node] = f"{name}_var", f"{name}_rule(p)"
+ self.cache[node] = FunctionCall(
+ assigned_variable=f"{name}_var",
+ function=f"{name}_rule",
+ arguments=["p"],
+ metadata={"rulename": name},
+ )
return self.cache[node]
- def visit_NamedItem(self, node: NamedItem) -> Tuple[Optional[str], str]:
- name, call = self.visit(node.item)
+ def visit_NamedItem(self, node: NamedItem) -> FunctionCall:
+ call = self.visit(node.item)
if node.name:
- name = node.name
- return name, call
-
- def lookahead_call_helper(self, node: Lookahead, positive: int) -> Tuple[None, str]:
- name, call = self.visit(node.node)
- func, args = call.split("(", 1)
- assert args[-1] == ")"
- args = args[:-1]
- if "name_token" in call:
- return None, f"_PyPegen_lookahead_with_name({positive}, {func}, {args})"
- elif not args.startswith("p,"):
- return None, f"_PyPegen_lookahead({positive}, {func}, {args})"
- elif args[2:].strip().isalnum():
- return None, f"_PyPegen_lookahead_with_int({positive}, {func}, {args})"
+ call.assigned_variable = node.name
+ return call
+
+ def lookahead_call_helper(self, node: Lookahead, positive: int) -> FunctionCall:
+ call = self.visit(node.node)
+ if call.nodetype == NodeTypes.NAME_TOKEN:
+ return FunctionCall(
+ function=f"_PyPegen_lookahead_with_name",
+ arguments=[positive, call.function, *call.arguments],
+ )
+ elif call.nodetype in {NodeTypes.GENERIC_TOKEN, NodeTypes.KEYWORD}:
+ return FunctionCall(
+ function=f"_PyPegen_lookahead_with_int",
+ arguments=[positive, call.function, *call.arguments],
+ )
else:
- return None, f"_PyPegen_lookahead_with_string({positive}, {func}, {args})"
+ return FunctionCall(
+ function=f"_PyPegen_lookahead",
+ arguments=[positive, call.function, *call.arguments],
+ )
- def visit_PositiveLookahead(self, node: PositiveLookahead) -> Tuple[None, str]:
+ def visit_PositiveLookahead(self, node: PositiveLookahead) -> FunctionCall:
return self.lookahead_call_helper(node, 1)
- def visit_NegativeLookahead(self, node: NegativeLookahead) -> Tuple[None, str]:
+ def visit_NegativeLookahead(self, node: NegativeLookahead) -> FunctionCall:
return self.lookahead_call_helper(node, 0)
- def visit_Opt(self, node: Opt) -> Tuple[str, str]:
- name, call = self.visit(node.node)
- return "opt_var", f"{call}, 1" # Using comma operator!
+ def visit_Opt(self, node: Opt) -> FunctionCall:
+ call = self.visit(node.node)
+ return FunctionCall(
+ assigned_variable="opt_var",
+ function=call.function,
+ arguments=call.arguments,
+ force_true=True,
+ )
- def visit_Repeat0(self, node: Repeat0) -> Tuple[str, str]:
+ def visit_Repeat0(self, node: Repeat0) -> FunctionCall:
if node in self.cache:
return self.cache[node]
name = self.gen.name_loop(node.node, False)
- self.cache[node] = f"{name}_var", f"{name}_rule(p)"
+ self.cache[node] = FunctionCall(
+ assigned_variable=f"{name}_var",
+ function=f"{name}_rule",
+ arguments=["p"],
+ metadata={"rulename": name},
+ )
return self.cache[node]
- def visit_Repeat1(self, node: Repeat1) -> Tuple[str, str]:
+ def visit_Repeat1(self, node: Repeat1) -> FunctionCall:
if node in self.cache:
return self.cache[node]
name = self.gen.name_loop(node.node, True)
- self.cache[node] = f"{name}_var", f"{name}_rule(p)"
+ self.cache[node] = FunctionCall(
+ assigned_variable=f"{name}_var",
+ function=f"{name}_rule",
+ arguments=["p"],
+ metadata={"rulename": name},
+ )
return self.cache[node]
- def visit_Gather(self, node: Gather) -> Tuple[str, str]:
+ def visit_Gather(self, node: Gather) -> FunctionCall:
if node in self.cache:
return self.cache[node]
name = self.gen.name_gather(node)
- self.cache[node] = f"{name}_var", f"{name}_rule(p)"
+ self.cache[node] = FunctionCall(
+ assigned_variable=f"{name}_var",
+ function=f"{name}_rule",
+ arguments=["p"],
+ metadata={"rulename": name},
+ )
return self.cache[node]
- def visit_Group(self, node: Group) -> Tuple[Optional[str], str]:
+ def visit_Group(self, node: Group) -> FunctionCall:
return self.visit(node.rhs)
- def visit_Cut(self, node: Cut) -> Tuple[str, str]:
- return "cut_var", "1"
+ def visit_Cut(self, node: Cut) -> FunctionCall:
+ return FunctionCall(
+ assigned_variable="cut_var", function="1", nodetype=NodeTypes.CUT_OPERATOR
+ )
class CParserGenerator(ParserGenerator, GrammarVisitor):
@@ -252,7 +352,6 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
mode += 1
modulename = self.grammar.metas.get("modulename", "parse")
trailer = self.grammar.metas.get("trailer", EXTENSION_SUFFIX)
- keyword_cache = self.callmakervisitor.keyword_cache
if trailer:
self.print(trailer.rstrip("\n") % dict(mode=mode, modulename=modulename))
@@ -448,13 +547,11 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
self._handle_default_rule_body(node, rhs, result_type)
self.print("}")
- def visit_NamedItem(self, node: NamedItem, names: List[str]) -> None:
- name, call = self.callmakervisitor.visit(node)
- if not name:
- self.print(call)
- else:
- name = dedupe(name, names)
- self.print(f"({name} = {call})")
+ def visit_NamedItem(self, node: NamedItem) -> None:
+ call = self.callmakervisitor.visit(node)
+ if call.assigned_variable:
+ call.assigned_variable = self.dedupe(call.assigned_variable)
+ self.print(call)
def visit_Rhs(
self, node: Rhs, is_loop: bool, is_gather: bool, rulename: Optional[str]
@@ -464,7 +561,7 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
for alt in node.alts:
self.visit(alt, is_loop=is_loop, is_gather=is_gather, rulename=rulename)
- def join_conditions(self, keyword: str, node: Any, names: List[str]) -> None:
+ def join_conditions(self, keyword: str, node: Any) -> None:
self.print(f"{keyword} (")
with self.indent():
first = True
@@ -473,7 +570,7 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
first = False
else:
self.print("&&")
- self.visit(item, names=names)
+ self.visit(item)
self.print(")")
def emit_action(self, node: Alt, cleanup_code: Optional[str] = None) -> None:
@@ -492,29 +589,34 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
f'fprintf(stderr, "Hit with action [%d-%d]: %s\\n", mark, p->mark, "{node}");'
)
- def emit_default_action(self, is_gather: bool, names: List[str], node: Alt) -> None:
- if len(names) > 1:
+ def emit_default_action(self, is_gather: bool, node: Alt) -> None:
+ if len(self.local_variable_names) > 1:
if is_gather:
- assert len(names) == 2
- self.print(f"res = _PyPegen_seq_insert_in_front(p, {names[0]}, {names[1]});")
+ assert len(self.local_variable_names) == 2
+ self.print(
+ f"res = _PyPegen_seq_insert_in_front(p, "
+ f"{self.local_variable_names[0]}, {self.local_variable_names[1]});"
+ )
else:
if self.debug:
self.print(
f'fprintf(stderr, "Hit without action [%d:%d]: %s\\n", mark, p->mark, "{node}");'
)
- self.print(f"res = _PyPegen_dummy_name(p, {', '.join(names)});")
+ self.print(
+ f"res = _PyPegen_dummy_name(p, {', '.join(self.local_variable_names)});"
+ )
else:
if self.debug:
self.print(
f'fprintf(stderr, "Hit with default action [%d:%d]: %s\\n", mark, p->mark, "{node}");'
)
- self.print(f"res = {names[0]};")
+ self.print(f"res = {self.local_variable_names[0]};")
def emit_dummy_action(self) -> None:
self.print(f"res = _PyPegen_dummy_name(p);")
- def handle_alt_normal(self, node: Alt, is_gather: bool, names: List[str]) -> None:
- self.join_conditions(keyword="if", node=node, names=names)
+ def handle_alt_normal(self, node: Alt, is_gather: bool) -> None:
+ self.join_conditions(keyword="if", node=node)
self.print("{")
# We have parsed successfully all the conditions for the option.
with self.indent():
@@ -526,17 +628,15 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
elif node.action:
self.emit_action(node)
else:
- self.emit_default_action(is_gather, names, node)
+ self.emit_default_action(is_gather, node)
# As the current option has parsed correctly, do not continue with the rest.
self.print(f"goto done;")
self.print("}")
- def handle_alt_loop(
- self, node: Alt, is_gather: bool, rulename: Optional[str], names: List[str]
- ) -> None:
+ def handle_alt_loop(self, node: Alt, is_gather: bool, rulename: Optional[str]) -> None:
# Condition of the main body of the alternative
- self.join_conditions(keyword="while", node=node, names=names)
+ self.join_conditions(keyword="while", node=node)
self.print("{")
# We have parsed successfully one item!
with self.indent():
@@ -548,7 +648,7 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
elif node.action:
self.emit_action(node, cleanup_code="PyMem_Free(children);")
else:
- self.emit_default_action(is_gather, names, node)
+ self.emit_default_action(is_gather, node)
# Add the result of rule to the temporary buffer of children. This buffer
# will populate later an asdl_seq with all elements to return.
@@ -580,47 +680,45 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
if v == "opt_var":
self.print("UNUSED(opt_var); // Silence compiler warnings")
- names: List[str] = []
- if is_loop:
- self.handle_alt_loop(node, is_gather, rulename, names)
- else:
- self.handle_alt_normal(node, is_gather, names)
+ with self.local_variable_context():
+ if is_loop:
+ self.handle_alt_loop(node, is_gather, rulename)
+ else:
+ self.handle_alt_normal(node, is_gather)
self.print("p->mark = mark;")
- if "cut_var" in names:
+ if "cut_var" in vars:
self.print("if (cut_var) return NULL;")
self.print("}")
- def collect_vars(self, node: Alt) -> Dict[str, Optional[str]]:
- names: List[str] = []
+ def collect_vars(self, node: Alt) -> Dict[Optional[str], Optional[str]]:
types = {}
- for item in node.items:
- name, type = self.add_var(item, names)
- types[name] = type
+ with self.local_variable_context():
+ for item in node.items:
+ name, type = self.add_var(item)
+ types[name] = type
return types
- def add_var(self, node: NamedItem, names: List[str]) -> Tuple[str, Optional[str]]:
- name: str
- call: str
- name, call = self.callmakervisitor.visit(node.item)
- type = None
- if not name:
- return name, type
- if name.startswith("cut"):
- return name, "int"
- if name.endswith("_var"):
- rulename = name[:-4]
- rule = self.rules.get(rulename)
- if rule is not None:
- if rule.is_loop() or rule.is_gather():
- type = "asdl_seq *"
- else:
- type = rule.type
- elif name.startswith("_loop") or name.startswith("_gather"):
+ def add_var(self, node: NamedItem) -> Tuple[Optional[str], Optional[str]]:
+ call = self.callmakervisitor.visit(node.item)
+ if not call.assigned_variable:
+ return None, None
+ if call.nodetype == NodeTypes.CUT_OPERATOR:
+ return call.assigned_variable, "int"
+
+ name = call.assigned_variable
+ rulename = call.metadata.get("rulename")
+
+ type: Optional[str] = None
+
+ assert self.all_rules is not None
+ if rulename and rulename in self.all_rules:
+ rule = self.all_rules.get(rulename)
+ if rule.is_loop() or rule.is_gather():
type = "asdl_seq *"
- elif name in ("name_var", "string_var", "number_var"):
- type = "expr_ty"
- if node.name:
- name = node.name
- name = dedupe(name, names)
- return name, type
+ else:
+ type = rule.type
+ elif call.nodetype in BASE_NODETYPES.values():
+ type = "expr_ty"
+
+ return self.dedupe(node.name if node.name else call.assigned_variable), type