summaryrefslogtreecommitdiffstats
path: root/Lib/test/test_peg_generator
diff options
context:
space:
mode:
authorPablo Galindo Salgado <Pablogsal@gmail.com>2021-08-12 16:37:30 (GMT)
committerGitHub <noreply@github.com>2021-08-12 16:37:30 (GMT)
commit953d27261e455066b17a106d9e07bab3bf12983b (patch)
tree98739da681414ff6117897a56eb90120d3dad069 /Lib/test/test_peg_generator
parent8e832fb2a2cb54d7262148b6ec15563dffb48d63 (diff)
downloadcpython-953d27261e455066b17a106d9e07bab3bf12983b.zip
cpython-953d27261e455066b17a106d9e07bab3bf12983b.tar.gz
cpython-953d27261e455066b17a106d9e07bab3bf12983b.tar.bz2
Update pegen to use the latest upstream developments (GH-27586)
Diffstat (limited to 'Lib/test/test_peg_generator')
-rw-r--r--Lib/test/test_peg_generator/test_c_parser.py29
-rw-r--r--Lib/test/test_peg_generator/test_first_sets.py197
-rw-r--r--Lib/test/test_peg_generator/test_grammar_validator.py4
-rw-r--r--Lib/test/test_peg_generator/test_pegen.py638
4 files changed, 636 insertions, 232 deletions
diff --git a/Lib/test/test_peg_generator/test_c_parser.py b/Lib/test/test_peg_generator/test_c_parser.py
index 013b3af..b761bd4 100644
--- a/Lib/test/test_peg_generator/test_c_parser.py
+++ b/Lib/test/test_peg_generator/test_c_parser.py
@@ -11,8 +11,8 @@ from test import support
from test.support import os_helper
from test.support.script_helper import assert_python_ok
-_py_cflags_nodist = sysconfig.get_config_var('PY_CFLAGS_NODIST')
-_pgo_flag = sysconfig.get_config_var('PGO_PROF_USE_FLAG')
+_py_cflags_nodist = sysconfig.get_config_var("PY_CFLAGS_NODIST")
+_pgo_flag = sysconfig.get_config_var("PGO_PROF_USE_FLAG")
if _pgo_flag and _py_cflags_nodist and _pgo_flag in _py_cflags_nodist:
raise unittest.SkipTest("peg_generator test disabled under PGO build")
@@ -458,3 +458,28 @@ class TestCParser(unittest.TestCase):
self.check_input_strings_for_grammar(valid_cases, invalid_cases)
"""
self.run_test(grammar_source, test_source)
+
+ def test_forced(self) -> None:
+ grammar_source = """
+ start: NAME &&':' | NAME
+ """
+ test_source = """
+ self.assertEqual(parse.parse_string("number :", mode=0), None)
+ with self.assertRaises(SyntaxError) as e:
+ parse.parse_string("a", mode=0)
+ self.assertIn("expected ':'", str(e.exception))
+ """
+ self.run_test(grammar_source, test_source)
+
+ def test_forced_with_group(self) -> None:
+ grammar_source = """
+ start: NAME &&(':' | ';') | NAME
+ """
+ test_source = """
+ self.assertEqual(parse.parse_string("number :", mode=0), None)
+ self.assertEqual(parse.parse_string("number ;", mode=0), None)
+ with self.assertRaises(SyntaxError) as e:
+ parse.parse_string("a", mode=0)
+ self.assertIn("expected (':' | ';')", e.exception.args[0])
+ """
+ self.run_test(grammar_source, test_source)
diff --git a/Lib/test/test_peg_generator/test_first_sets.py b/Lib/test/test_peg_generator/test_first_sets.py
index 425ee23..d6f8322 100644
--- a/Lib/test/test_peg_generator/test_first_sets.py
+++ b/Lib/test/test_peg_generator/test_first_sets.py
@@ -3,8 +3,8 @@ import unittest
from test import test_tools
from typing import Dict, Set
-test_tools.skip_if_missing('peg_generator')
-with test_tools.imports_under_tool('peg_generator'):
+test_tools.skip_if_missing("peg_generator")
+with test_tools.imports_under_tool("peg_generator"):
from pegen.grammar_parser import GeneratedParser as GrammarParser
from pegen.testutil import parse_string
from pegen.first_sets import FirstSetCalculator
@@ -23,29 +23,38 @@ class TestFirstSets(unittest.TestCase):
A: 'a' | '-'
B: 'b' | '+'
"""
- self.assertEqual(self.calculate_first_sets(grammar), {
- "A": {"'a'", "'-'"},
- "B": {"'+'", "'b'"},
- "expr": {"'+'", "'a'", "'b'", "'-'"},
- "start": {"'+'", "'a'", "'b'", "'-'"},
- })
+ self.assertEqual(
+ self.calculate_first_sets(grammar),
+ {
+ "A": {"'a'", "'-'"},
+ "B": {"'+'", "'b'"},
+ "expr": {"'+'", "'a'", "'b'", "'-'"},
+ "start": {"'+'", "'a'", "'b'", "'-'"},
+ },
+ )
def test_optionals(self) -> None:
grammar = """
start: expr NEWLINE
expr: ['a'] ['b'] 'c'
"""
- self.assertEqual(self.calculate_first_sets(grammar), {
- "expr": {"'c'", "'a'", "'b'"},
- "start": {"'c'", "'a'", "'b'"},
- })
+ self.assertEqual(
+ self.calculate_first_sets(grammar),
+ {
+ "expr": {"'c'", "'a'", "'b'"},
+ "start": {"'c'", "'a'", "'b'"},
+ },
+ )
def test_repeat_with_separator(self) -> None:
grammar = """
start: ','.thing+ NEWLINE
thing: NUMBER
"""
- self.assertEqual(self.calculate_first_sets(grammar), {"thing": {"NUMBER"}, "start": {"NUMBER"}})
+ self.assertEqual(
+ self.calculate_first_sets(grammar),
+ {"thing": {"NUMBER"}, "start": {"NUMBER"}},
+ )
def test_optional_operator(self) -> None:
grammar = """
@@ -53,11 +62,14 @@ class TestFirstSets(unittest.TestCase):
sum: (term)? 'b'
term: NUMBER
"""
- self.assertEqual(self.calculate_first_sets(grammar), {
- "term": {"NUMBER"},
- "sum": {"NUMBER", "'b'"},
- "start": {"'b'", "NUMBER"},
- })
+ self.assertEqual(
+ self.calculate_first_sets(grammar),
+ {
+ "term": {"NUMBER"},
+ "sum": {"NUMBER", "'b'"},
+ "start": {"'b'", "NUMBER"},
+ },
+ )
def test_optional_literal(self) -> None:
grammar = """
@@ -65,60 +77,83 @@ class TestFirstSets(unittest.TestCase):
sum: '+' ? term
term: NUMBER
"""
- self.assertEqual(self.calculate_first_sets(grammar), {
- "term": {"NUMBER"},
- "sum": {"'+'", "NUMBER"},
- "start": {"'+'", "NUMBER"},
- })
+ self.assertEqual(
+ self.calculate_first_sets(grammar),
+ {
+ "term": {"NUMBER"},
+ "sum": {"'+'", "NUMBER"},
+ "start": {"'+'", "NUMBER"},
+ },
+ )
def test_optional_after(self) -> None:
grammar = """
start: term NEWLINE
term: NUMBER ['+']
"""
- self.assertEqual(self.calculate_first_sets(grammar), {"term": {"NUMBER"}, "start": {"NUMBER"}})
+ self.assertEqual(
+ self.calculate_first_sets(grammar),
+ {"term": {"NUMBER"}, "start": {"NUMBER"}},
+ )
def test_optional_before(self) -> None:
grammar = """
start: term NEWLINE
term: ['+'] NUMBER
"""
- self.assertEqual(self.calculate_first_sets(grammar), {"term": {"NUMBER", "'+'"}, "start": {"NUMBER", "'+'"}})
+ self.assertEqual(
+ self.calculate_first_sets(grammar),
+ {"term": {"NUMBER", "'+'"}, "start": {"NUMBER", "'+'"}},
+ )
def test_repeat_0(self) -> None:
grammar = """
start: thing* "+" NEWLINE
thing: NUMBER
"""
- self.assertEqual(self.calculate_first_sets(grammar), {"thing": {"NUMBER"}, "start": {'"+"', "NUMBER"}})
+ self.assertEqual(
+ self.calculate_first_sets(grammar),
+ {"thing": {"NUMBER"}, "start": {'"+"', "NUMBER"}},
+ )
def test_repeat_0_with_group(self) -> None:
grammar = """
start: ('+' '-')* term NEWLINE
term: NUMBER
"""
- self.assertEqual(self.calculate_first_sets(grammar), {"term": {"NUMBER"}, "start": {"'+'", "NUMBER"}})
+ self.assertEqual(
+ self.calculate_first_sets(grammar),
+ {"term": {"NUMBER"}, "start": {"'+'", "NUMBER"}},
+ )
def test_repeat_1(self) -> None:
grammar = """
start: thing+ '-' NEWLINE
thing: NUMBER
"""
- self.assertEqual(self.calculate_first_sets(grammar), {"thing": {"NUMBER"}, "start": {"NUMBER"}})
+ self.assertEqual(
+ self.calculate_first_sets(grammar),
+ {"thing": {"NUMBER"}, "start": {"NUMBER"}},
+ )
def test_repeat_1_with_group(self) -> None:
grammar = """
start: ('+' term)+ term NEWLINE
term: NUMBER
"""
- self.assertEqual(self.calculate_first_sets(grammar), {"term": {"NUMBER"}, "start": {"'+'"}})
+ self.assertEqual(
+ self.calculate_first_sets(grammar), {"term": {"NUMBER"}, "start": {"'+'"}}
+ )
def test_gather(self) -> None:
grammar = """
start: ','.thing+ NEWLINE
thing: NUMBER
"""
- self.assertEqual(self.calculate_first_sets(grammar), {"thing": {"NUMBER"}, "start": {"NUMBER"}})
+ self.assertEqual(
+ self.calculate_first_sets(grammar),
+ {"thing": {"NUMBER"}, "start": {"NUMBER"}},
+ )
def test_positive_lookahead(self) -> None:
grammar = """
@@ -126,11 +161,14 @@ class TestFirstSets(unittest.TestCase):
expr: &'a' opt
opt: 'a' | 'b' | 'c'
"""
- self.assertEqual(self.calculate_first_sets(grammar), {
- "expr": {"'a'"},
- "start": {"'a'"},
- "opt": {"'b'", "'c'", "'a'"},
- })
+ self.assertEqual(
+ self.calculate_first_sets(grammar),
+ {
+ "expr": {"'a'"},
+ "start": {"'a'"},
+ "opt": {"'b'", "'c'", "'a'"},
+ },
+ )
def test_negative_lookahead(self) -> None:
grammar = """
@@ -138,11 +176,14 @@ class TestFirstSets(unittest.TestCase):
expr: !'a' opt
opt: 'a' | 'b' | 'c'
"""
- self.assertEqual(self.calculate_first_sets(grammar), {
- "opt": {"'b'", "'a'", "'c'"},
- "expr": {"'b'", "'c'"},
- "start": {"'b'", "'c'"},
- })
+ self.assertEqual(
+ self.calculate_first_sets(grammar),
+ {
+ "opt": {"'b'", "'a'", "'c'"},
+ "expr": {"'b'", "'c'"},
+ "start": {"'b'", "'c'"},
+ },
+ )
def test_left_recursion(self) -> None:
grammar = """
@@ -153,21 +194,27 @@ class TestFirstSets(unittest.TestCase):
bar: 'bar'
baz: 'baz'
"""
- self.assertEqual(self.calculate_first_sets(grammar), {
- "expr": {"NUMBER", "'-'"},
- "term": {"NUMBER"},
- "start": {"NUMBER", "'-'"},
- "foo": {"'foo'"},
- "bar": {"'bar'"},
- "baz": {"'baz'"},
- })
+ self.assertEqual(
+ self.calculate_first_sets(grammar),
+ {
+ "expr": {"NUMBER", "'-'"},
+ "term": {"NUMBER"},
+ "start": {"NUMBER", "'-'"},
+ "foo": {"'foo'"},
+ "bar": {"'bar'"},
+ "baz": {"'baz'"},
+ },
+ )
def test_advance_left_recursion(self) -> None:
grammar = """
start: NUMBER | sign start
sign: ['-']
"""
- self.assertEqual(self.calculate_first_sets(grammar), {"sign": {"'-'", ""}, "start": {"'-'", "NUMBER"}})
+ self.assertEqual(
+ self.calculate_first_sets(grammar),
+ {"sign": {"'-'", ""}, "start": {"'-'", "NUMBER"}},
+ )
def test_mutual_left_recursion(self) -> None:
grammar = """
@@ -175,11 +222,14 @@ class TestFirstSets(unittest.TestCase):
foo: bar 'A' | 'B'
bar: foo 'C' | 'D'
"""
- self.assertEqual(self.calculate_first_sets(grammar), {
- "foo": {"'D'", "'B'"},
- "bar": {"'D'"},
- "start": {"'D'", "'B'"},
- })
+ self.assertEqual(
+ self.calculate_first_sets(grammar),
+ {
+ "foo": {"'D'", "'B'"},
+ "bar": {"'D'"},
+ "start": {"'D'", "'B'"},
+ },
+ )
def test_nasty_left_recursion(self) -> None:
# TODO: Validate this
@@ -188,7 +238,10 @@ class TestFirstSets(unittest.TestCase):
target: maybe '+' | NAME
maybe: maybe '-' | target
"""
- self.assertEqual(self.calculate_first_sets(grammar), {"maybe": set(), "target": {"NAME"}, "start": {"NAME"}})
+ self.assertEqual(
+ self.calculate_first_sets(grammar),
+ {"maybe": set(), "target": {"NAME"}, "start": {"NAME"}},
+ )
def test_nullable_rule(self) -> None:
grammar = """
@@ -196,17 +249,22 @@ class TestFirstSets(unittest.TestCase):
sign: ['-']
thing: NUMBER
"""
- self.assertEqual(self.calculate_first_sets(grammar), {
- "sign": {"", "'-'"},
- "thing": {"NUMBER"},
- "start": {"NUMBER", "'-'"},
- })
+ self.assertEqual(
+ self.calculate_first_sets(grammar),
+ {
+ "sign": {"", "'-'"},
+ "thing": {"NUMBER"},
+ "start": {"NUMBER", "'-'"},
+ },
+ )
def test_epsilon_production_in_start_rule(self) -> None:
grammar = """
start: ['-'] $
"""
- self.assertEqual(self.calculate_first_sets(grammar), {"start": {"ENDMARKER", "'-'"}})
+ self.assertEqual(
+ self.calculate_first_sets(grammar), {"start": {"ENDMARKER", "'-'"}}
+ )
def test_multiple_nullable_rules(self) -> None:
grammar = """
@@ -216,10 +274,13 @@ class TestFirstSets(unittest.TestCase):
other: '*'
another: '/'
"""
- self.assertEqual(self.calculate_first_sets(grammar), {
- "sign": {"", "'-'"},
- "thing": {"'+'", ""},
- "start": {"'+'", "'-'", "'*'"},
- "other": {"'*'"},
- "another": {"'/'"},
- })
+ self.assertEqual(
+ self.calculate_first_sets(grammar),
+ {
+ "sign": {"", "'-'"},
+ "thing": {"'+'", ""},
+ "start": {"'+'", "'-'", "'*'"},
+ "other": {"'*'"},
+ "another": {"'/'"},
+ },
+ )
diff --git a/Lib/test/test_peg_generator/test_grammar_validator.py b/Lib/test/test_peg_generator/test_grammar_validator.py
index 2e72ff8..72c3d20 100644
--- a/Lib/test/test_peg_generator/test_grammar_validator.py
+++ b/Lib/test/test_peg_generator/test_grammar_validator.py
@@ -1,8 +1,8 @@
import unittest
from test import test_tools
-test_tools.skip_if_missing('peg_generator')
-with test_tools.imports_under_tool('peg_generator'):
+test_tools.skip_if_missing("peg_generator")
+with test_tools.imports_under_tool("peg_generator"):
from pegen.grammar_parser import GeneratedParser as GrammarParser
from pegen.validator import SubRuleValidator, ValidationError
from pegen.testutil import parse_string
diff --git a/Lib/test/test_peg_generator/test_pegen.py b/Lib/test/test_peg_generator/test_pegen.py
index bcfee3f..71b0fdc 100644
--- a/Lib/test/test_peg_generator/test_pegen.py
+++ b/Lib/test/test_peg_generator/test_pegen.py
@@ -1,3 +1,5 @@
+import ast
+import difflib
import io
import textwrap
import unittest
@@ -6,14 +8,10 @@ from test import test_tools
from typing import Dict, Any
from tokenize import TokenInfo, NAME, NEWLINE, NUMBER, OP
-test_tools.skip_if_missing('peg_generator')
-with test_tools.imports_under_tool('peg_generator'):
+test_tools.skip_if_missing("peg_generator")
+with test_tools.imports_under_tool("peg_generator"):
from pegen.grammar_parser import GeneratedParser as GrammarParser
- from pegen.testutil import (
- parse_string,
- generate_parser,
- make_parser
- )
+ from pegen.testutil import parse_string, generate_parser, make_parser
from pegen.grammar import GrammarVisitor, GrammarError, Grammar
from pegen.grammar_visualizer import ASTGrammarPrinter
from pegen.parser import Parser
@@ -38,7 +36,9 @@ class TestPegen(unittest.TestCase):
# Check the str() and repr() of a few rules; AST nodes don't support ==.
self.assertEqual(str(rules["start"]), "start: sum NEWLINE")
self.assertEqual(str(rules["sum"]), "sum: term '+' term | term")
- expected_repr = "Rule('term', None, Rhs([Alt([NamedItem(None, NameLeaf('NUMBER'))])]))"
+ expected_repr = (
+ "Rule('term', None, Rhs([Alt([NamedItem(None, NameLeaf('NUMBER'))])]))"
+ )
self.assertEqual(repr(rules["term"]), expected_repr)
def test_long_rule_str(self) -> None:
@@ -71,7 +71,7 @@ class TestPegen(unittest.TestCase):
self.assertEqual(str(rules["sum"]), "sum: term '+' term | term")
self.assertEqual(
repr(rules["term"]),
- "Rule('term', 'int', Rhs([Alt([NamedItem(None, NameLeaf('NUMBER'))])]))"
+ "Rule('term', 'int', Rhs([Alt([NamedItem(None, NameLeaf('NUMBER'))])]))",
)
def test_gather(self) -> None:
@@ -81,24 +81,31 @@ class TestPegen(unittest.TestCase):
"""
rules = parse_string(grammar, GrammarParser).rules
self.assertEqual(str(rules["start"]), "start: ','.thing+ NEWLINE")
- self.assertTrue(repr(rules["start"]).startswith(
- "Rule('start', None, Rhs([Alt([NamedItem(None, Gather(StringLeaf(\"','\"), NameLeaf('thing'"
- ))
+ self.assertTrue(
+ repr(rules["start"]).startswith(
+ "Rule('start', None, Rhs([Alt([NamedItem(None, Gather(StringLeaf(\"','\"), NameLeaf('thing'"
+ )
+ )
self.assertEqual(str(rules["thing"]), "thing: NUMBER")
parser_class = make_parser(grammar)
node = parse_string("42\n", parser_class)
- assert node == [
- [[TokenInfo(NUMBER, string="42", start=(1, 0), end=(1, 2), line="42\n")]],
- TokenInfo(NEWLINE, string="\n", start=(1, 2), end=(1, 3), line="42\n"),
- ]
node = parse_string("1, 2\n", parser_class)
- assert node == [
+ self.assertEqual(
+ node,
[
- [TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1, 2\n")],
- [TokenInfo(NUMBER, string="2", start=(1, 3), end=(1, 4), line="1, 2\n")],
+ [
+ TokenInfo(
+ NUMBER, string="1", start=(1, 0), end=(1, 1), line="1, 2\n"
+ ),
+ TokenInfo(
+ NUMBER, string="2", start=(1, 3), end=(1, 4), line="1, 2\n"
+ ),
+ ],
+ TokenInfo(
+ NEWLINE, string="\n", start=(1, 4), end=(1, 5), line="1, 2\n"
+ ),
],
- TokenInfo(NEWLINE, string="\n", start=(1, 4), end=(1, 5), line="1, 2\n"),
- ]
+ )
def test_expr_grammar(self) -> None:
grammar = """
@@ -108,10 +115,13 @@ class TestPegen(unittest.TestCase):
"""
parser_class = make_parser(grammar)
node = parse_string("42\n", parser_class)
- self.assertEqual(node, [
- [[TokenInfo(NUMBER, string="42", start=(1, 0), end=(1, 2), line="42\n")]],
- TokenInfo(NEWLINE, string="\n", start=(1, 2), end=(1, 3), line="42\n"),
- ])
+ self.assertEqual(
+ node,
+ [
+ TokenInfo(NUMBER, string="42", start=(1, 0), end=(1, 2), line="42\n"),
+ TokenInfo(NEWLINE, string="\n", start=(1, 2), end=(1, 3), line="42\n"),
+ ],
+ )
def test_optional_operator(self) -> None:
grammar = """
@@ -120,22 +130,39 @@ class TestPegen(unittest.TestCase):
term: NUMBER
"""
parser_class = make_parser(grammar)
- node = parse_string("1+2\n", parser_class)
- self.assertEqual(node, [
+ node = parse_string("1 + 2\n", parser_class)
+ self.assertEqual(
+ node,
[
- [TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1+2\n")],
[
- TokenInfo(OP, string="+", start=(1, 1), end=(1, 2), line="1+2\n"),
- [TokenInfo(NUMBER, string="2", start=(1, 2), end=(1, 3), line="1+2\n")],
+ TokenInfo(
+ NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 + 2\n"
+ ),
+ [
+ TokenInfo(
+ OP, string="+", start=(1, 2), end=(1, 3), line="1 + 2\n"
+ ),
+ TokenInfo(
+ NUMBER, string="2", start=(1, 4), end=(1, 5), line="1 + 2\n"
+ ),
+ ],
],
+ TokenInfo(
+ NEWLINE, string="\n", start=(1, 5), end=(1, 6), line="1 + 2\n"
+ ),
],
- TokenInfo(NEWLINE, string="\n", start=(1, 3), end=(1, 4), line="1+2\n"),
- ])
+ )
node = parse_string("1\n", parser_class)
- self.assertEqual(node, [
- [[TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1\n")], None],
- TokenInfo(NEWLINE, string="\n", start=(1, 1), end=(1, 2), line="1\n"),
- ])
+ self.assertEqual(
+ node,
+ [
+ [
+ TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1\n"),
+ None,
+ ],
+ TokenInfo(NEWLINE, string="\n", start=(1, 1), end=(1, 2), line="1\n"),
+ ],
+ )
def test_optional_literal(self) -> None:
grammar = """
@@ -145,18 +172,29 @@ class TestPegen(unittest.TestCase):
"""
parser_class = make_parser(grammar)
node = parse_string("1+\n", parser_class)
- self.assertEqual(node, [
+ self.assertEqual(
+ node,
[
- [TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1+\n")],
- TokenInfo(OP, string="+", start=(1, 1), end=(1, 2), line="1+\n"),
+ [
+ TokenInfo(
+ NUMBER, string="1", start=(1, 0), end=(1, 1), line="1+\n"
+ ),
+ TokenInfo(OP, string="+", start=(1, 1), end=(1, 2), line="1+\n"),
+ ],
+ TokenInfo(NEWLINE, string="\n", start=(1, 2), end=(1, 3), line="1+\n"),
],
- TokenInfo(NEWLINE, string="\n", start=(1, 2), end=(1, 3), line="1+\n"),
- ])
+ )
node = parse_string("1\n", parser_class)
- self.assertEqual(node, [
- [[TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1\n")], None],
- TokenInfo(NEWLINE, string="\n", start=(1, 1), end=(1, 2), line="1\n"),
- ])
+ self.assertEqual(
+ node,
+ [
+ [
+ TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1\n"),
+ None,
+ ],
+ TokenInfo(NEWLINE, string="\n", start=(1, 1), end=(1, 2), line="1\n"),
+ ],
+ )
def test_alt_optional_operator(self) -> None:
grammar = """
@@ -166,21 +204,38 @@ class TestPegen(unittest.TestCase):
"""
parser_class = make_parser(grammar)
node = parse_string("1 + 2\n", parser_class)
- self.assertEqual(node, [
+ self.assertEqual(
+ node,
[
- [TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 + 2\n")],
[
- TokenInfo(OP, string="+", start=(1, 2), end=(1, 3), line="1 + 2\n"),
- [TokenInfo(NUMBER, string="2", start=(1, 4), end=(1, 5), line="1 + 2\n")],
+ TokenInfo(
+ NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 + 2\n"
+ ),
+ [
+ TokenInfo(
+ OP, string="+", start=(1, 2), end=(1, 3), line="1 + 2\n"
+ ),
+ TokenInfo(
+ NUMBER, string="2", start=(1, 4), end=(1, 5), line="1 + 2\n"
+ ),
+ ],
],
+ TokenInfo(
+ NEWLINE, string="\n", start=(1, 5), end=(1, 6), line="1 + 2\n"
+ ),
],
- TokenInfo(NEWLINE, string="\n", start=(1, 5), end=(1, 6), line="1 + 2\n"),
- ])
+ )
node = parse_string("1\n", parser_class)
- self.assertEqual(node, [
- [[TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1\n")], None],
- TokenInfo(NEWLINE, string="\n", start=(1, 1), end=(1, 2), line="1\n"),
- ])
+ self.assertEqual(
+ node,
+ [
+ [
+ TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1\n"),
+ None,
+ ],
+ TokenInfo(NEWLINE, string="\n", start=(1, 1), end=(1, 2), line="1\n"),
+ ],
+ )
def test_repeat_0_simple(self) -> None:
grammar = """
@@ -189,20 +244,32 @@ class TestPegen(unittest.TestCase):
"""
parser_class = make_parser(grammar)
node = parse_string("1 2 3\n", parser_class)
- self.assertEqual(node, [
- [TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 2 3\n")],
+ self.assertEqual(
+ node,
[
- [[TokenInfo(NUMBER, string="2", start=(1, 2), end=(1, 3), line="1 2 3\n")]],
- [[TokenInfo(NUMBER, string="3", start=(1, 4), end=(1, 5), line="1 2 3\n")]],
+ TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 2 3\n"),
+ [
+ TokenInfo(
+ NUMBER, string="2", start=(1, 2), end=(1, 3), line="1 2 3\n"
+ ),
+ TokenInfo(
+ NUMBER, string="3", start=(1, 4), end=(1, 5), line="1 2 3\n"
+ ),
+ ],
+ TokenInfo(
+ NEWLINE, string="\n", start=(1, 5), end=(1, 6), line="1 2 3\n"
+ ),
],
- TokenInfo(NEWLINE, string="\n", start=(1, 5), end=(1, 6), line="1 2 3\n"),
- ])
+ )
node = parse_string("1\n", parser_class)
- self.assertEqual(node, [
- [TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1\n")],
- [],
- TokenInfo(NEWLINE, string="\n", start=(1, 1), end=(1, 2), line="1\n"),
- ])
+ self.assertEqual(
+ node,
+ [
+ TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1\n"),
+ [],
+ TokenInfo(NEWLINE, string="\n", start=(1, 1), end=(1, 2), line="1\n"),
+ ],
+ )
def test_repeat_0_complex(self) -> None:
grammar = """
@@ -211,24 +278,43 @@ class TestPegen(unittest.TestCase):
"""
parser_class = make_parser(grammar)
node = parse_string("1 + 2 + 3\n", parser_class)
- self.assertEqual(node, [
- [TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 + 2 + 3\n")],
+ self.assertEqual(
+ node,
[
+ TokenInfo(
+ NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 + 2 + 3\n"
+ ),
[
[
- TokenInfo(OP, string="+", start=(1, 2), end=(1, 3), line="1 + 2 + 3\n"),
- [TokenInfo(NUMBER, string="2", start=(1, 4), end=(1, 5), line="1 + 2 + 3\n")],
- ]
- ],
- [
+ TokenInfo(
+ OP, string="+", start=(1, 2), end=(1, 3), line="1 + 2 + 3\n"
+ ),
+ TokenInfo(
+ NUMBER,
+ string="2",
+ start=(1, 4),
+ end=(1, 5),
+ line="1 + 2 + 3\n",
+ ),
+ ],
[
- TokenInfo(OP, string="+", start=(1, 6), end=(1, 7), line="1 + 2 + 3\n"),
- [TokenInfo(NUMBER, string="3", start=(1, 8), end=(1, 9), line="1 + 2 + 3\n")],
- ]
+ TokenInfo(
+ OP, string="+", start=(1, 6), end=(1, 7), line="1 + 2 + 3\n"
+ ),
+ TokenInfo(
+ NUMBER,
+ string="3",
+ start=(1, 8),
+ end=(1, 9),
+ line="1 + 2 + 3\n",
+ ),
+ ],
],
+ TokenInfo(
+ NEWLINE, string="\n", start=(1, 9), end=(1, 10), line="1 + 2 + 3\n"
+ ),
],
- TokenInfo(NEWLINE, string="\n", start=(1, 9), end=(1, 10), line="1 + 2 + 3\n"),
- ])
+ )
def test_repeat_1_simple(self) -> None:
grammar = """
@@ -237,14 +323,23 @@ class TestPegen(unittest.TestCase):
"""
parser_class = make_parser(grammar)
node = parse_string("1 2 3\n", parser_class)
- self.assertEqual(node, [
- [TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 2 3\n")],
+ self.assertEqual(
+ node,
[
- [[TokenInfo(NUMBER, string="2", start=(1, 2), end=(1, 3), line="1 2 3\n")]],
- [[TokenInfo(NUMBER, string="3", start=(1, 4), end=(1, 5), line="1 2 3\n")]],
+ TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 2 3\n"),
+ [
+ TokenInfo(
+ NUMBER, string="2", start=(1, 2), end=(1, 3), line="1 2 3\n"
+ ),
+ TokenInfo(
+ NUMBER, string="3", start=(1, 4), end=(1, 5), line="1 2 3\n"
+ ),
+ ],
+ TokenInfo(
+ NEWLINE, string="\n", start=(1, 5), end=(1, 6), line="1 2 3\n"
+ ),
],
- TokenInfo(NEWLINE, string="\n", start=(1, 5), end=(1, 6), line="1 2 3\n"),
- ])
+ )
with self.assertRaises(SyntaxError):
parse_string("1\n", parser_class)
@@ -255,24 +350,43 @@ class TestPegen(unittest.TestCase):
"""
parser_class = make_parser(grammar)
node = parse_string("1 + 2 + 3\n", parser_class)
- self.assertEqual(node, [
- [TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 + 2 + 3\n")],
+ self.assertEqual(
+ node,
[
+ TokenInfo(
+ NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 + 2 + 3\n"
+ ),
[
[
- TokenInfo(OP, string="+", start=(1, 2), end=(1, 3), line="1 + 2 + 3\n"),
- [TokenInfo(NUMBER, string="2", start=(1, 4), end=(1, 5), line="1 + 2 + 3\n")],
- ]
- ],
- [
+ TokenInfo(
+ OP, string="+", start=(1, 2), end=(1, 3), line="1 + 2 + 3\n"
+ ),
+ TokenInfo(
+ NUMBER,
+ string="2",
+ start=(1, 4),
+ end=(1, 5),
+ line="1 + 2 + 3\n",
+ ),
+ ],
[
- TokenInfo(OP, string="+", start=(1, 6), end=(1, 7), line="1 + 2 + 3\n"),
- [TokenInfo(NUMBER, string="3", start=(1, 8), end=(1, 9), line="1 + 2 + 3\n")],
- ]
+ TokenInfo(
+ OP, string="+", start=(1, 6), end=(1, 7), line="1 + 2 + 3\n"
+ ),
+ TokenInfo(
+ NUMBER,
+ string="3",
+ start=(1, 8),
+ end=(1, 9),
+ line="1 + 2 + 3\n",
+ ),
+ ],
],
+ TokenInfo(
+ NEWLINE, string="\n", start=(1, 9), end=(1, 10), line="1 + 2 + 3\n"
+ ),
],
- TokenInfo(NEWLINE, string="\n", start=(1, 9), end=(1, 10), line="1 + 2 + 3\n"),
- ])
+ )
with self.assertRaises(SyntaxError):
parse_string("1\n", parser_class)
@@ -283,14 +397,25 @@ class TestPegen(unittest.TestCase):
"""
parser_class = make_parser(grammar)
node = parse_string("1, 2, 3\n", parser_class)
- self.assertEqual(node, [
+ self.assertEqual(
+ node,
[
- [TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1, 2, 3\n")],
- [TokenInfo(NUMBER, string="2", start=(1, 3), end=(1, 4), line="1, 2, 3\n")],
- [TokenInfo(NUMBER, string="3", start=(1, 6), end=(1, 7), line="1, 2, 3\n")],
+ [
+ TokenInfo(
+ NUMBER, string="1", start=(1, 0), end=(1, 1), line="1, 2, 3\n"
+ ),
+ TokenInfo(
+ NUMBER, string="2", start=(1, 3), end=(1, 4), line="1, 2, 3\n"
+ ),
+ TokenInfo(
+ NUMBER, string="3", start=(1, 6), end=(1, 7), line="1, 2, 3\n"
+ ),
+ ],
+ TokenInfo(
+ NEWLINE, string="\n", start=(1, 7), end=(1, 8), line="1, 2, 3\n"
+ ),
],
- TokenInfo(NEWLINE, string="\n", start=(1, 7), end=(1, 8), line="1, 2, 3\n"),
- ])
+ )
def test_left_recursive(self) -> None:
grammar_source = """
@@ -311,18 +436,41 @@ class TestPegen(unittest.TestCase):
self.assertFalse(rules["bar"].left_recursive)
self.assertFalse(rules["baz"].left_recursive)
node = parse_string("1 + 2 + 3\n", parser_class)
- self.assertEqual(node, [
+ self.assertEqual(
+ node,
[
[
- [[TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 + 2 + 3\n")]],
- TokenInfo(OP, string="+", start=(1, 2), end=(1, 3), line="1 + 2 + 3\n"),
- [TokenInfo(NUMBER, string="2", start=(1, 4), end=(1, 5), line="1 + 2 + 3\n")],
+ [
+ TokenInfo(
+ NUMBER,
+ string="1",
+ start=(1, 0),
+ end=(1, 1),
+ line="1 + 2 + 3\n",
+ ),
+ TokenInfo(
+ OP, string="+", start=(1, 2), end=(1, 3), line="1 + 2 + 3\n"
+ ),
+ TokenInfo(
+ NUMBER,
+ string="2",
+ start=(1, 4),
+ end=(1, 5),
+ line="1 + 2 + 3\n",
+ ),
+ ],
+ TokenInfo(
+ OP, string="+", start=(1, 6), end=(1, 7), line="1 + 2 + 3\n"
+ ),
+ TokenInfo(
+ NUMBER, string="3", start=(1, 8), end=(1, 9), line="1 + 2 + 3\n"
+ ),
],
- TokenInfo(OP, string="+", start=(1, 6), end=(1, 7), line="1 + 2 + 3\n"),
- [TokenInfo(NUMBER, string="3", start=(1, 8), end=(1, 9), line="1 + 2 + 3\n")],
+ TokenInfo(
+ NEWLINE, string="\n", start=(1, 9), end=(1, 10), line="1 + 2 + 3\n"
+ ),
],
- TokenInfo(NEWLINE, string="\n", start=(1, 9), end=(1, 10), line="1 + 2 + 3\n"),
- ])
+ )
def test_python_expr(self) -> None:
grammar = """
@@ -392,31 +540,79 @@ class TestPegen(unittest.TestCase):
exec(out.getvalue(), ns)
parser_class: Type[Parser] = ns["GeneratedParser"]
node = parse_string("D A C A E", parser_class)
- self.assertEqual(node, [
+
+ self.assertEqual(
+ node,
[
[
[
- [TokenInfo(type=NAME, string="D", start=(1, 0), end=(1, 1), line="D A C A E")],
- TokenInfo(type=NAME, string="A", start=(1, 2), end=(1, 3), line="D A C A E"),
+ [
+ TokenInfo(
+ type=NAME,
+ string="D",
+ start=(1, 0),
+ end=(1, 1),
+ line="D A C A E",
+ ),
+ TokenInfo(
+ type=NAME,
+ string="A",
+ start=(1, 2),
+ end=(1, 3),
+ line="D A C A E",
+ ),
+ ],
+ TokenInfo(
+ type=NAME,
+ string="C",
+ start=(1, 4),
+ end=(1, 5),
+ line="D A C A E",
+ ),
],
- TokenInfo(type=NAME, string="C", start=(1, 4), end=(1, 5), line="D A C A E"),
+ TokenInfo(
+ type=NAME,
+ string="A",
+ start=(1, 6),
+ end=(1, 7),
+ line="D A C A E",
+ ),
],
- TokenInfo(type=NAME, string="A", start=(1, 6), end=(1, 7), line="D A C A E"),
+ TokenInfo(
+ type=NAME, string="E", start=(1, 8), end=(1, 9), line="D A C A E"
+ ),
],
- TokenInfo(type=NAME, string="E", start=(1, 8), end=(1, 9), line="D A C A E"),
- ])
+ )
node = parse_string("B C A E", parser_class)
- self.assertIsNotNone(node)
- self.assertEqual(node, [
+ self.assertEqual(
+ node,
[
[
- [TokenInfo(type=NAME, string="B", start=(1, 0), end=(1, 1), line="B C A E")],
- TokenInfo(type=NAME, string="C", start=(1, 2), end=(1, 3), line="B C A E"),
+ [
+ TokenInfo(
+ type=NAME,
+ string="B",
+ start=(1, 0),
+ end=(1, 1),
+ line="B C A E",
+ ),
+ TokenInfo(
+ type=NAME,
+ string="C",
+ start=(1, 2),
+ end=(1, 3),
+ line="B C A E",
+ ),
+ ],
+ TokenInfo(
+ type=NAME, string="A", start=(1, 4), end=(1, 5), line="B C A E"
+ ),
],
- TokenInfo(type=NAME, string="A", start=(1, 4), end=(1, 5), line="B C A E"),
+ TokenInfo(
+ type=NAME, string="E", start=(1, 6), end=(1, 7), line="B C A E"
+ ),
],
- TokenInfo(type=NAME, string="E", start=(1, 6), end=(1, 7), line="B C A E"),
- ])
+ )
def test_nasty_mutually_left_recursive(self) -> None:
# This grammar does not recognize 'x - + =', much to my chagrin.
@@ -454,43 +650,44 @@ class TestPegen(unittest.TestCase):
"""
parser_class = make_parser(grammar)
node = parse_string("foo = 12 + 12 .", parser_class)
- self.assertEqual(node, [
+ self.assertEqual(
+ node,
[
+ TokenInfo(
+ NAME, string="foo", start=(1, 0), end=(1, 3), line="foo = 12 + 12 ."
+ ),
+ TokenInfo(
+ OP, string="=", start=(1, 4), end=(1, 5), line="foo = 12 + 12 ."
+ ),
[
- [TokenInfo(NAME, string="foo", start=(1, 0), end=(1, 3), line="foo = 12 + 12 .")],
- TokenInfo(OP, string="=", start=(1, 4), end=(1, 5), line="foo = 12 + 12 ."),
+ TokenInfo(
+ NUMBER,
+ string="12",
+ start=(1, 6),
+ end=(1, 8),
+ line="foo = 12 + 12 .",
+ ),
[
[
TokenInfo(
- NUMBER, string="12", start=(1, 6), end=(1, 8), line="foo = 12 + 12 ."
- )
- ],
- [
- [
- [
- TokenInfo(
- OP,
- string="+",
- start=(1, 9),
- end=(1, 10),
- line="foo = 12 + 12 .",
- ),
- [
- TokenInfo(
- NUMBER,
- string="12",
- start=(1, 11),
- end=(1, 13),
- line="foo = 12 + 12 .",
- )
- ],
- ]
- ]
- ],
+ OP,
+ string="+",
+ start=(1, 9),
+ end=(1, 10),
+ line="foo = 12 + 12 .",
+ ),
+ TokenInfo(
+ NUMBER,
+ string="12",
+ start=(1, 11),
+ end=(1, 13),
+ line="foo = 12 + 12 .",
+ ),
+ ]
],
- ]
- ]
- ])
+ ],
+ ],
+ )
def test_named_lookahead_error(self) -> None:
grammar = """
@@ -533,11 +730,14 @@ class TestPegen(unittest.TestCase):
"""
parser_class = make_parser(grammar)
node = parse_string("(1)", parser_class)
- self.assertEqual(node, [
- TokenInfo(OP, string="(", start=(1, 0), end=(1, 1), line="(1)"),
- [TokenInfo(NUMBER, string="1", start=(1, 1), end=(1, 2), line="(1)")],
- TokenInfo(OP, string=")", start=(1, 2), end=(1, 3), line="(1)"),
- ])
+ self.assertEqual(
+ node,
+ [
+ TokenInfo(OP, string="(", start=(1, 0), end=(1, 1), line="(1)"),
+ TokenInfo(NUMBER, string="1", start=(1, 1), end=(1, 2), line="(1)"),
+ TokenInfo(OP, string=")", start=(1, 2), end=(1, 3), line="(1)"),
+ ],
+ )
def test_dangling_reference(self) -> None:
grammar = """
@@ -589,6 +789,124 @@ class TestPegen(unittest.TestCase):
with self.assertRaisesRegex(GrammarError, "cannot start with underscore: '_x'"):
parser_class = make_parser(grammar)
+ def test_soft_keyword(self) -> None:
+ grammar = """
+ start:
+ | "number" n=NUMBER { eval(n.string) }
+ | "string" n=STRING { n.string }
+ | SOFT_KEYWORD l=NAME n=(NUMBER | NAME | STRING) { f"{l.string} = {n.string}"}
+ """
+ parser_class = make_parser(grammar)
+ self.assertEqual(parse_string("number 1", parser_class, verbose=True), 1)
+ self.assertEqual(parse_string("string 'b'", parser_class, verbose=True), "'b'")
+ self.assertEqual(
+ parse_string("number test 1", parser_class, verbose=True), "test = 1"
+ )
+ assert (
+ parse_string("string test 'b'", parser_class, verbose=True) == "test = 'b'"
+ )
+ with self.assertRaises(SyntaxError):
+ parse_string("test 1", parser_class, verbose=True)
+
+ def test_forced(self) -> None:
+ grammar = """
+ start: NAME &&':' | NAME
+ """
+ parser_class = make_parser(grammar)
+ self.assertTrue(parse_string("number :", parser_class, verbose=True))
+ with self.assertRaises(SyntaxError) as e:
+ parse_string("a", parser_class, verbose=True)
+
+ self.assertIn("expected ':'", str(e.exception))
+
+ def test_forced_with_group(self) -> None:
+ grammar = """
+ start: NAME &&(':' | ';') | NAME
+ """
+ parser_class = make_parser(grammar)
+ self.assertTrue(parse_string("number :", parser_class, verbose=True))
+ self.assertTrue(parse_string("number ;", parser_class, verbose=True))
+ with self.assertRaises(SyntaxError) as e:
+ parse_string("a", parser_class, verbose=True)
+ self.assertIn("expected (':' | ';')", e.exception.args[0])
+
+ def test_unreachable_explicit(self) -> None:
+ source = """
+ start: NAME { UNREACHABLE }
+ """
+ grammar = parse_string(source, GrammarParser)
+ out = io.StringIO()
+ genr = PythonParserGenerator(
+ grammar, out, unreachable_formatting="This is a test"
+ )
+ genr.generate("<string>")
+ self.assertIn("This is a test", out.getvalue())
+
+ def test_unreachable_implicit1(self) -> None:
+ source = """
+ start: NAME | invalid_input
+ invalid_input: NUMBER { None }
+ """
+ grammar = parse_string(source, GrammarParser)
+ out = io.StringIO()
+ genr = PythonParserGenerator(
+ grammar, out, unreachable_formatting="This is a test"
+ )
+ genr.generate("<string>")
+ self.assertIn("This is a test", out.getvalue())
+
+ def test_unreachable_implicit2(self) -> None:
+ source = """
+ start: NAME | '(' invalid_input ')'
+ invalid_input: NUMBER { None }
+ """
+ grammar = parse_string(source, GrammarParser)
+ out = io.StringIO()
+ genr = PythonParserGenerator(
+ grammar, out, unreachable_formatting="This is a test"
+ )
+ genr.generate("<string>")
+ self.assertIn("This is a test", out.getvalue())
+
+ def test_unreachable_implicit3(self) -> None:
+ source = """
+ start: NAME | invalid_input { None }
+ invalid_input: NUMBER
+ """
+ grammar = parse_string(source, GrammarParser)
+ out = io.StringIO()
+ genr = PythonParserGenerator(
+ grammar, out, unreachable_formatting="This is a test"
+ )
+ genr.generate("<string>")
+ self.assertNotIn("This is a test", out.getvalue())
+
+ def test_locations_in_alt_action_and_group(self) -> None:
+ grammar = """
+ start: t=term NEWLINE? $ { ast.Expression(t, LOCATIONS) }
+ term:
+ | l=term '*' r=factor { ast.BinOp(l, ast.Mult(), r, LOCATIONS) }
+ | l=term '/' r=factor { ast.BinOp(l, ast.Div(), r, LOCATIONS) }
+ | factor
+ factor:
+ | (
+ n=NAME { ast.Name(id=n.string, ctx=ast.Load(), LOCATIONS) } |
+ n=NUMBER { ast.Constant(value=ast.literal_eval(n.string), LOCATIONS) }
+ )
+ """
+ parser_class = make_parser(grammar)
+ source = "2*3\n"
+ o = ast.dump(parse_string(source, parser_class).body, include_attributes=True)
+ p = ast.dump(ast.parse(source).body[0].value, include_attributes=True).replace(
+ " kind=None,", ""
+ )
+ diff = "\n".join(
+ difflib.unified_diff(
+ o.split("\n"), p.split("\n"), "cpython", "python-pegen"
+ )
+ )
+ self.assertFalse(diff)
+
class TestGrammarVisitor:
class Visitor(GrammarVisitor):