summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPablo Galindo <Pablogsal@gmail.com>2020-05-26 23:15:52 (GMT)
committerGitHub <noreply@github.com>2020-05-26 23:15:52 (GMT)
commit404b23b85b17c84e022779f31fc89cb0ed0d37e8 (patch)
treefea9aca0072a7f5f06a5af49b36b434185d7e76c
parent21fda91f8da96406e6a912f7c312424209c19bef (diff)
downloadcpython-404b23b85b17c84e022779f31fc89cb0ed0d37e8.zip
cpython-404b23b85b17c84e022779f31fc89cb0ed0d37e8.tar.gz
cpython-404b23b85b17c84e022779f31fc89cb0ed0d37e8.tar.bz2
Fix lookahead of soft keywords in the PEG parser (GH-20436)
Automerge-Triggered-By: @gvanrossum
-rw-r--r--Lib/test/test_peg_generator/test_c_parser.py12
-rw-r--r--Parser/pegen/pegen.c9
-rw-r--r--Parser/pegen/pegen.h1
-rw-r--r--Tools/peg_generator/pegen/c_generator.py11
4 files changed, 31 insertions, 2 deletions
diff --git a/Lib/test/test_peg_generator/test_c_parser.py b/Lib/test/test_peg_generator/test_c_parser.py
index 72383d5..a5d8850 100644
--- a/Lib/test/test_peg_generator/test_c_parser.py
+++ b/Lib/test/test_peg_generator/test_c_parser.py
@@ -432,3 +432,15 @@ class TestCParser(TempdirManager, unittest.TestCase):
self.check_input_strings_for_grammar(valid_cases, invalid_cases)
"""
self.run_test(grammar_source, test_source)
+
+ def test_soft_keywords_lookahead(self) -> None:
+ grammar_source = """
+ start: &"if" "if" expr '+' expr NEWLINE
+ expr: NAME
+ """
+ test_source = """
+ valid_cases = ["if if + if"]
+ invalid_cases = ["if if"]
+ self.check_input_strings_for_grammar(valid_cases, invalid_cases)
+ """
+ self.run_test(grammar_source, test_source)
diff --git a/Parser/pegen/pegen.c b/Parser/pegen/pegen.c
index ee30c2c..a0285bc 100644
--- a/Parser/pegen/pegen.c
+++ b/Parser/pegen/pegen.c
@@ -719,6 +719,15 @@ _PyPegen_lookahead_with_name(int positive, expr_ty (func)(Parser *), Parser *p)
}
int
+_PyPegen_lookahead_with_string(int positive, expr_ty (func)(Parser *, const char*), Parser *p, const char* arg)
+{
+ int mark = p->mark;
+ void *res = func(p, arg);
+ p->mark = mark;
+ return (res != NULL) == positive;
+}
+
+int
_PyPegen_lookahead_with_int(int positive, Token *(func)(Parser *, int), Parser *p, int arg)
{
int mark = p->mark;
diff --git a/Parser/pegen/pegen.h b/Parser/pegen/pegen.h
index 9507d99..64cf0ec 100644
--- a/Parser/pegen/pegen.h
+++ b/Parser/pegen/pegen.h
@@ -119,6 +119,7 @@ int _PyPegen_is_memoized(Parser *p, int type, void *pres);
int _PyPegen_lookahead_with_name(int, expr_ty (func)(Parser *), Parser *);
int _PyPegen_lookahead_with_int(int, Token *(func)(Parser *, int), Parser *, int);
+int _PyPegen_lookahead_with_string(int , expr_ty (func)(Parser *, const char*), Parser *, const char*);
int _PyPegen_lookahead(int, void *(func)(Parser *), Parser *);
Token *_PyPegen_expect_token(Parser *p, int type);
diff --git a/Tools/peg_generator/pegen/c_generator.py b/Tools/peg_generator/pegen/c_generator.py
index 885ff05..ce1d6bb 100644
--- a/Tools/peg_generator/pegen/c_generator.py
+++ b/Tools/peg_generator/pegen/c_generator.py
@@ -58,7 +58,8 @@ class NodeTypes(Enum):
STRING_TOKEN = 2
GENERIC_TOKEN = 3
KEYWORD = 4
- CUT_OPERATOR = 5
+ SOFT_KEYWORD = 5
+ CUT_OPERATOR = 6
BASE_NODETYPES = {
@@ -123,7 +124,7 @@ class CCallMakerVisitor(GrammarVisitor):
function="_PyPegen_expect_soft_keyword",
arguments=["p", value],
return_type="expr_ty",
- nodetype=NodeTypes.NAME_TOKEN,
+ nodetype=NodeTypes.SOFT_KEYWORD,
comment=f"soft_keyword='{value}'",
)
@@ -217,6 +218,12 @@ class CCallMakerVisitor(GrammarVisitor):
arguments=[positive, call.function, *call.arguments],
return_type="int",
)
+ elif call.nodetype == NodeTypes.SOFT_KEYWORD:
+ return FunctionCall(
+ function=f"_PyPegen_lookahead_with_string",
+ arguments=[positive, call.function, *call.arguments],
+ return_type="int",
+ )
elif call.nodetype in {NodeTypes.GENERIC_TOKEN, NodeTypes.KEYWORD}:
return FunctionCall(
function=f"_PyPegen_lookahead_with_int",