summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Lib/test/test_peg_generator/test_c_parser.py30
-rw-r--r--Parser/pegen/pegen.c24
-rw-r--r--Parser/pegen/pegen.h1
-rw-r--r--Tools/peg_generator/pegen/c_generator.py24
4 files changed, 75 insertions, 4 deletions
diff --git a/Lib/test/test_peg_generator/test_c_parser.py b/Lib/test/test_peg_generator/test_c_parser.py
index f66b92d..72383d5 100644
--- a/Lib/test/test_peg_generator/test_c_parser.py
+++ b/Lib/test/test_peg_generator/test_c_parser.py
@@ -402,3 +402,33 @@ class TestCParser(TempdirManager, unittest.TestCase):
parse.parse_string("a", mode=0)
"""
self.run_test(grammar_source, test_source)
+
+ def test_no_soft_keywords(self) -> None:
+ grammar_source = """
+ start: expr+ NEWLINE? ENDMARKER
+ expr: 'foo'
+ """
+ grammar = parse_string(grammar_source, GrammarParser)
+ parser_source = generate_c_parser_source(grammar)
+ assert "expect_soft_keyword" not in parser_source
+
+ def test_soft_keywords(self) -> None:
+ grammar_source = """
+ start: expr+ NEWLINE? ENDMARKER
+ expr: "foo"
+ """
+ grammar = parse_string(grammar_source, GrammarParser)
+ parser_source = generate_c_parser_source(grammar)
+ assert "expect_soft_keyword" in parser_source
+
+ def test_soft_keywords_parse(self) -> None:
+ grammar_source = """
+ start: "if" expr '+' expr NEWLINE
+ expr: NAME
+ """
+ test_source = """
+ valid_cases = ["if if + if"]
+ invalid_cases = ["if if"]
+ self.check_input_strings_for_grammar(valid_cases, invalid_cases)
+ """
+ self.run_test(grammar_source, test_source)
diff --git a/Parser/pegen/pegen.c b/Parser/pegen/pegen.c
index cd87a9f..ee30c2c 100644
--- a/Parser/pegen/pegen.c
+++ b/Parser/pegen/pegen.c
@@ -753,6 +753,30 @@ _PyPegen_expect_token(Parser *p, int type)
return t;
}
+expr_ty
+_PyPegen_expect_soft_keyword(Parser *p, const char *keyword)
+{
+ if (p->mark == p->fill) {
+ if (_PyPegen_fill_token(p) < 0) {
+ p->error_indicator = 1;
+ return NULL;
+ }
+ }
+ Token *t = p->tokens[p->mark];
+ if (t->type != NAME) {
+ return NULL;
+ }
+ char* s = PyBytes_AsString(t->bytes);
+ if (!s) {
+ return NULL;
+ }
+ if (strcmp(s, keyword) != 0) {
+ return NULL;
+ }
+ expr_ty res = _PyPegen_name_token(p);
+ return res;
+}
+
Token *
_PyPegen_get_last_nonnwhitespace_token(Parser *p)
{
diff --git a/Parser/pegen/pegen.h b/Parser/pegen/pegen.h
index bd3056e..9507d99 100644
--- a/Parser/pegen/pegen.h
+++ b/Parser/pegen/pegen.h
@@ -122,6 +122,7 @@ int _PyPegen_lookahead_with_int(int, Token *(func)(Parser *, int), Parser *, int
int _PyPegen_lookahead(int, void *(func)(Parser *), Parser *);
Token *_PyPegen_expect_token(Parser *p, int type);
+expr_ty _PyPegen_expect_soft_keyword(Parser *p, const char *keyword);
Token *_PyPegen_get_last_nonnwhitespace_token(Parser *);
int _PyPegen_fill_token(Parser *p);
expr_ty _PyPegen_name_token(Parser *p);
diff --git a/Tools/peg_generator/pegen/c_generator.py b/Tools/peg_generator/pegen/c_generator.py
index 8bc2391..885ff05 100644
--- a/Tools/peg_generator/pegen/c_generator.py
+++ b/Tools/peg_generator/pegen/c_generator.py
@@ -117,6 +117,16 @@ class CCallMakerVisitor(GrammarVisitor):
comment=f"token='{keyword}'",
)
+ def soft_keyword_helper(self, value: str) -> FunctionCall:
+ return FunctionCall(
+ assigned_variable="_keyword",
+ function="_PyPegen_expect_soft_keyword",
+ arguments=["p", value],
+ return_type="expr_ty",
+ nodetype=NodeTypes.NAME_TOKEN,
+ comment=f"soft_keyword='{value}'",
+ )
+
def visit_NameLeaf(self, node: NameLeaf) -> FunctionCall:
name = node.value
if name in self.non_exact_tokens:
@@ -154,7 +164,10 @@ class CCallMakerVisitor(GrammarVisitor):
def visit_StringLeaf(self, node: StringLeaf) -> FunctionCall:
val = ast.literal_eval(node.value)
if re.match(r"[a-zA-Z_]\w*\Z", val): # This is a keyword
- return self.keyword_helper(val)
+ if node.value.endswith("'"):
+ return self.keyword_helper(val)
+ else:
+ return self.soft_keyword_helper(node.value)
else:
assert val in self.exact_tokens, f"{node.value} is not a known literal"
type = self.exact_tokens[val]
@@ -656,8 +669,9 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
self.print("{")
# We have parsed successfully all the conditions for the option.
with self.indent():
+ node_str = str(node).replace('"', '\\"')
self.print(
- f'D(fprintf(stderr, "%*c+ {rulename}[%d-%d]: %s succeeded!\\n", p->level, \' \', _mark, p->mark, "{node}"));'
+ f'D(fprintf(stderr, "%*c+ {rulename}[%d-%d]: %s succeeded!\\n", p->level, \' \', _mark, p->mark, "{node_str}"));'
)
# Prepare to emmit the rule action and do so
if node.action and "EXTRA" in node.action:
@@ -710,8 +724,9 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
self.print(f"{{ // {node}")
with self.indent():
self._check_for_errors()
+ node_str = str(node).replace('"', '\\"')
self.print(
- f'D(fprintf(stderr, "%*c> {rulename}[%d-%d]: %s\\n", p->level, \' \', _mark, p->mark, "{node}"));'
+ f'D(fprintf(stderr, "%*c> {rulename}[%d-%d]: %s\\n", p->level, \' \', _mark, p->mark, "{node_str}"));'
)
# Prepare variable declarations for the alternative
vars = self.collect_vars(node)
@@ -733,9 +748,10 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
self.handle_alt_normal(node, is_gather, rulename)
self.print("p->mark = _mark;")
+ node_str = str(node).replace('"', '\\"')
self.print(
f"D(fprintf(stderr, \"%*c%s {rulename}[%d-%d]: %s failed!\\n\", p->level, ' ',\n"
- f' p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "{node}"));'
+ f' p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "{node_str}"));'
)
if "_cut_var" in vars:
self.print("if (_cut_var) {")