diff options
author | Pablo Galindo Salgado <Pablogsal@gmail.com> | 2021-08-12 16:37:30 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-08-12 16:37:30 (GMT) |
commit | 953d27261e455066b17a106d9e07bab3bf12983b (patch) | |
tree | 98739da681414ff6117897a56eb90120d3dad069 /Lib/test/test_peg_generator | |
parent | 8e832fb2a2cb54d7262148b6ec15563dffb48d63 (diff) | |
download | cpython-953d27261e455066b17a106d9e07bab3bf12983b.zip cpython-953d27261e455066b17a106d9e07bab3bf12983b.tar.gz cpython-953d27261e455066b17a106d9e07bab3bf12983b.tar.bz2 |
Update pegen to use the latest upstream developments (GH-27586)
Diffstat (limited to 'Lib/test/test_peg_generator')
-rw-r--r-- | Lib/test/test_peg_generator/test_c_parser.py | 29 | ||||
-rw-r--r-- | Lib/test/test_peg_generator/test_first_sets.py | 197 | ||||
-rw-r--r-- | Lib/test/test_peg_generator/test_grammar_validator.py | 4 | ||||
-rw-r--r-- | Lib/test/test_peg_generator/test_pegen.py | 638 |
4 files changed, 636 insertions, 232 deletions
diff --git a/Lib/test/test_peg_generator/test_c_parser.py b/Lib/test/test_peg_generator/test_c_parser.py index 013b3af..b761bd4 100644 --- a/Lib/test/test_peg_generator/test_c_parser.py +++ b/Lib/test/test_peg_generator/test_c_parser.py @@ -11,8 +11,8 @@ from test import support from test.support import os_helper from test.support.script_helper import assert_python_ok -_py_cflags_nodist = sysconfig.get_config_var('PY_CFLAGS_NODIST') -_pgo_flag = sysconfig.get_config_var('PGO_PROF_USE_FLAG') +_py_cflags_nodist = sysconfig.get_config_var("PY_CFLAGS_NODIST") +_pgo_flag = sysconfig.get_config_var("PGO_PROF_USE_FLAG") if _pgo_flag and _py_cflags_nodist and _pgo_flag in _py_cflags_nodist: raise unittest.SkipTest("peg_generator test disabled under PGO build") @@ -458,3 +458,28 @@ class TestCParser(unittest.TestCase): self.check_input_strings_for_grammar(valid_cases, invalid_cases) """ self.run_test(grammar_source, test_source) + + def test_forced(self) -> None: + grammar_source = """ + start: NAME &&':' | NAME + """ + test_source = """ + self.assertEqual(parse.parse_string("number :", mode=0), None) + with self.assertRaises(SyntaxError) as e: + parse.parse_string("a", mode=0) + self.assertIn("expected ':'", str(e.exception)) + """ + self.run_test(grammar_source, test_source) + + def test_forced_with_group(self) -> None: + grammar_source = """ + start: NAME &&(':' | ';') | NAME + """ + test_source = """ + self.assertEqual(parse.parse_string("number :", mode=0), None) + self.assertEqual(parse.parse_string("number ;", mode=0), None) + with self.assertRaises(SyntaxError) as e: + parse.parse_string("a", mode=0) + self.assertIn("expected (':' | ';')", e.exception.args[0]) + """ + self.run_test(grammar_source, test_source) diff --git a/Lib/test/test_peg_generator/test_first_sets.py b/Lib/test/test_peg_generator/test_first_sets.py index 425ee23..d6f8322 100644 --- a/Lib/test/test_peg_generator/test_first_sets.py +++ b/Lib/test/test_peg_generator/test_first_sets.py @@ -3,8 +3,8 @@ import unittest from test import test_tools from typing import Dict, Set -test_tools.skip_if_missing('peg_generator') -with test_tools.imports_under_tool('peg_generator'): +test_tools.skip_if_missing("peg_generator") +with test_tools.imports_under_tool("peg_generator"): from pegen.grammar_parser import GeneratedParser as GrammarParser from pegen.testutil import parse_string from pegen.first_sets import FirstSetCalculator @@ -23,29 +23,38 @@ class TestFirstSets(unittest.TestCase): A: 'a' | '-' B: 'b' | '+' """ - self.assertEqual(self.calculate_first_sets(grammar), { - "A": {"'a'", "'-'"}, - "B": {"'+'", "'b'"}, - "expr": {"'+'", "'a'", "'b'", "'-'"}, - "start": {"'+'", "'a'", "'b'", "'-'"}, - }) + self.assertEqual( + self.calculate_first_sets(grammar), + { + "A": {"'a'", "'-'"}, + "B": {"'+'", "'b'"}, + "expr": {"'+'", "'a'", "'b'", "'-'"}, + "start": {"'+'", "'a'", "'b'", "'-'"}, + }, + ) def test_optionals(self) -> None: grammar = """ start: expr NEWLINE expr: ['a'] ['b'] 'c' """ - self.assertEqual(self.calculate_first_sets(grammar), { - "expr": {"'c'", "'a'", "'b'"}, - "start": {"'c'", "'a'", "'b'"}, - }) + self.assertEqual( + self.calculate_first_sets(grammar), + { + "expr": {"'c'", "'a'", "'b'"}, + "start": {"'c'", "'a'", "'b'"}, + }, + ) def test_repeat_with_separator(self) -> None: grammar = """ start: ','.thing+ NEWLINE thing: NUMBER """ - self.assertEqual(self.calculate_first_sets(grammar), {"thing": {"NUMBER"}, "start": {"NUMBER"}}) + self.assertEqual( + self.calculate_first_sets(grammar), + {"thing": {"NUMBER"}, "start": {"NUMBER"}}, + ) def test_optional_operator(self) -> None: grammar = """ @@ -53,11 +62,14 @@ class TestFirstSets(unittest.TestCase): sum: (term)? 'b' term: NUMBER """ - self.assertEqual(self.calculate_first_sets(grammar), { - "term": {"NUMBER"}, - "sum": {"NUMBER", "'b'"}, - "start": {"'b'", "NUMBER"}, - }) + self.assertEqual( + self.calculate_first_sets(grammar), + { + "term": {"NUMBER"}, + "sum": {"NUMBER", "'b'"}, + "start": {"'b'", "NUMBER"}, + }, + ) def test_optional_literal(self) -> None: grammar = """ @@ -65,60 +77,83 @@ class TestFirstSets(unittest.TestCase): sum: '+' ? term term: NUMBER """ - self.assertEqual(self.calculate_first_sets(grammar), { - "term": {"NUMBER"}, - "sum": {"'+'", "NUMBER"}, - "start": {"'+'", "NUMBER"}, - }) + self.assertEqual( + self.calculate_first_sets(grammar), + { + "term": {"NUMBER"}, + "sum": {"'+'", "NUMBER"}, + "start": {"'+'", "NUMBER"}, + }, + ) def test_optional_after(self) -> None: grammar = """ start: term NEWLINE term: NUMBER ['+'] """ - self.assertEqual(self.calculate_first_sets(grammar), {"term": {"NUMBER"}, "start": {"NUMBER"}}) + self.assertEqual( + self.calculate_first_sets(grammar), + {"term": {"NUMBER"}, "start": {"NUMBER"}}, + ) def test_optional_before(self) -> None: grammar = """ start: term NEWLINE term: ['+'] NUMBER """ - self.assertEqual(self.calculate_first_sets(grammar), {"term": {"NUMBER", "'+'"}, "start": {"NUMBER", "'+'"}}) + self.assertEqual( + self.calculate_first_sets(grammar), + {"term": {"NUMBER", "'+'"}, "start": {"NUMBER", "'+'"}}, + ) def test_repeat_0(self) -> None: grammar = """ start: thing* "+" NEWLINE thing: NUMBER """ - self.assertEqual(self.calculate_first_sets(grammar), {"thing": {"NUMBER"}, "start": {'"+"', "NUMBER"}}) + self.assertEqual( + self.calculate_first_sets(grammar), + {"thing": {"NUMBER"}, "start": {'"+"', "NUMBER"}}, + ) def test_repeat_0_with_group(self) -> None: grammar = """ start: ('+' '-')* term NEWLINE term: NUMBER """ - self.assertEqual(self.calculate_first_sets(grammar), {"term": {"NUMBER"}, "start": {"'+'", "NUMBER"}}) + self.assertEqual( + self.calculate_first_sets(grammar), + {"term": {"NUMBER"}, "start": {"'+'", "NUMBER"}}, + ) def test_repeat_1(self) -> None: grammar = """ start: thing+ '-' NEWLINE thing: NUMBER """ - self.assertEqual(self.calculate_first_sets(grammar), {"thing": {"NUMBER"}, "start": {"NUMBER"}}) + self.assertEqual( + self.calculate_first_sets(grammar), + {"thing": {"NUMBER"}, "start": {"NUMBER"}}, + ) def test_repeat_1_with_group(self) -> None: grammar = """ start: ('+' term)+ term NEWLINE term: NUMBER """ - self.assertEqual(self.calculate_first_sets(grammar), {"term": {"NUMBER"}, "start": {"'+'"}}) + self.assertEqual( + self.calculate_first_sets(grammar), {"term": {"NUMBER"}, "start": {"'+'"}} + ) def test_gather(self) -> None: grammar = """ start: ','.thing+ NEWLINE thing: NUMBER """ - self.assertEqual(self.calculate_first_sets(grammar), {"thing": {"NUMBER"}, "start": {"NUMBER"}}) + self.assertEqual( + self.calculate_first_sets(grammar), + {"thing": {"NUMBER"}, "start": {"NUMBER"}}, + ) def test_positive_lookahead(self) -> None: grammar = """ @@ -126,11 +161,14 @@ class TestFirstSets(unittest.TestCase): expr: &'a' opt opt: 'a' | 'b' | 'c' """ - self.assertEqual(self.calculate_first_sets(grammar), { - "expr": {"'a'"}, - "start": {"'a'"}, - "opt": {"'b'", "'c'", "'a'"}, - }) + self.assertEqual( + self.calculate_first_sets(grammar), + { + "expr": {"'a'"}, + "start": {"'a'"}, + "opt": {"'b'", "'c'", "'a'"}, + }, + ) def test_negative_lookahead(self) -> None: grammar = """ @@ -138,11 +176,14 @@ class TestFirstSets(unittest.TestCase): expr: !'a' opt opt: 'a' | 'b' | 'c' """ - self.assertEqual(self.calculate_first_sets(grammar), { - "opt": {"'b'", "'a'", "'c'"}, - "expr": {"'b'", "'c'"}, - "start": {"'b'", "'c'"}, - }) + self.assertEqual( + self.calculate_first_sets(grammar), + { + "opt": {"'b'", "'a'", "'c'"}, + "expr": {"'b'", "'c'"}, + "start": {"'b'", "'c'"}, + }, + ) def test_left_recursion(self) -> None: grammar = """ @@ -153,21 +194,27 @@ class TestFirstSets(unittest.TestCase): bar: 'bar' baz: 'baz' """ - self.assertEqual(self.calculate_first_sets(grammar), { - "expr": {"NUMBER", "'-'"}, - "term": {"NUMBER"}, - "start": {"NUMBER", "'-'"}, - "foo": {"'foo'"}, - "bar": {"'bar'"}, - "baz": {"'baz'"}, - }) + self.assertEqual( + self.calculate_first_sets(grammar), + { + "expr": {"NUMBER", "'-'"}, + "term": {"NUMBER"}, + "start": {"NUMBER", "'-'"}, + "foo": {"'foo'"}, + "bar": {"'bar'"}, + "baz": {"'baz'"}, + }, + ) def test_advance_left_recursion(self) -> None: grammar = """ start: NUMBER | sign start sign: ['-'] """ - self.assertEqual(self.calculate_first_sets(grammar), {"sign": {"'-'", ""}, "start": {"'-'", "NUMBER"}}) + self.assertEqual( + self.calculate_first_sets(grammar), + {"sign": {"'-'", ""}, "start": {"'-'", "NUMBER"}}, + ) def test_mutual_left_recursion(self) -> None: grammar = """ @@ -175,11 +222,14 @@ class TestFirstSets(unittest.TestCase): foo: bar 'A' | 'B' bar: foo 'C' | 'D' """ - self.assertEqual(self.calculate_first_sets(grammar), { - "foo": {"'D'", "'B'"}, - "bar": {"'D'"}, - "start": {"'D'", "'B'"}, - }) + self.assertEqual( + self.calculate_first_sets(grammar), + { + "foo": {"'D'", "'B'"}, + "bar": {"'D'"}, + "start": {"'D'", "'B'"}, + }, + ) def test_nasty_left_recursion(self) -> None: # TODO: Validate this @@ -188,7 +238,10 @@ class TestFirstSets(unittest.TestCase): target: maybe '+' | NAME maybe: maybe '-' | target """ - self.assertEqual(self.calculate_first_sets(grammar), {"maybe": set(), "target": {"NAME"}, "start": {"NAME"}}) + self.assertEqual( + self.calculate_first_sets(grammar), + {"maybe": set(), "target": {"NAME"}, "start": {"NAME"}}, + ) def test_nullable_rule(self) -> None: grammar = """ @@ -196,17 +249,22 @@ class TestFirstSets(unittest.TestCase): sign: ['-'] thing: NUMBER """ - self.assertEqual(self.calculate_first_sets(grammar), { - "sign": {"", "'-'"}, - "thing": {"NUMBER"}, - "start": {"NUMBER", "'-'"}, - }) + self.assertEqual( + self.calculate_first_sets(grammar), + { + "sign": {"", "'-'"}, + "thing": {"NUMBER"}, + "start": {"NUMBER", "'-'"}, + }, + ) def test_epsilon_production_in_start_rule(self) -> None: grammar = """ start: ['-'] $ """ - self.assertEqual(self.calculate_first_sets(grammar), {"start": {"ENDMARKER", "'-'"}}) + self.assertEqual( + self.calculate_first_sets(grammar), {"start": {"ENDMARKER", "'-'"}} + ) def test_multiple_nullable_rules(self) -> None: grammar = """ @@ -216,10 +274,13 @@ class TestFirstSets(unittest.TestCase): other: '*' another: '/' """ - self.assertEqual(self.calculate_first_sets(grammar), { - "sign": {"", "'-'"}, - "thing": {"'+'", ""}, - "start": {"'+'", "'-'", "'*'"}, - "other": {"'*'"}, - "another": {"'/'"}, - }) + self.assertEqual( + self.calculate_first_sets(grammar), + { + "sign": {"", "'-'"}, + "thing": {"'+'", ""}, + "start": {"'+'", "'-'", "'*'"}, + "other": {"'*'"}, + "another": {"'/'"}, + }, + ) diff --git a/Lib/test/test_peg_generator/test_grammar_validator.py b/Lib/test/test_peg_generator/test_grammar_validator.py index 2e72ff8..72c3d20 100644 --- a/Lib/test/test_peg_generator/test_grammar_validator.py +++ b/Lib/test/test_peg_generator/test_grammar_validator.py @@ -1,8 +1,8 @@ import unittest from test import test_tools -test_tools.skip_if_missing('peg_generator') -with test_tools.imports_under_tool('peg_generator'): +test_tools.skip_if_missing("peg_generator") +with test_tools.imports_under_tool("peg_generator"): from pegen.grammar_parser import GeneratedParser as GrammarParser from pegen.validator import SubRuleValidator, ValidationError from pegen.testutil import parse_string diff --git a/Lib/test/test_peg_generator/test_pegen.py b/Lib/test/test_peg_generator/test_pegen.py index bcfee3f..71b0fdc 100644 --- a/Lib/test/test_peg_generator/test_pegen.py +++ b/Lib/test/test_peg_generator/test_pegen.py @@ -1,3 +1,5 @@ +import ast +import difflib import io import textwrap import unittest @@ -6,14 +8,10 @@ from test import test_tools from typing import Dict, Any from tokenize import TokenInfo, NAME, NEWLINE, NUMBER, OP -test_tools.skip_if_missing('peg_generator') -with test_tools.imports_under_tool('peg_generator'): +test_tools.skip_if_missing("peg_generator") +with test_tools.imports_under_tool("peg_generator"): from pegen.grammar_parser import GeneratedParser as GrammarParser - from pegen.testutil import ( - parse_string, - generate_parser, - make_parser - ) + from pegen.testutil import parse_string, generate_parser, make_parser from pegen.grammar import GrammarVisitor, GrammarError, Grammar from pegen.grammar_visualizer import ASTGrammarPrinter from pegen.parser import Parser @@ -38,7 +36,9 @@ class TestPegen(unittest.TestCase): # Check the str() and repr() of a few rules; AST nodes don't support ==. self.assertEqual(str(rules["start"]), "start: sum NEWLINE") self.assertEqual(str(rules["sum"]), "sum: term '+' term | term") - expected_repr = "Rule('term', None, Rhs([Alt([NamedItem(None, NameLeaf('NUMBER'))])]))" + expected_repr = ( + "Rule('term', None, Rhs([Alt([NamedItem(None, NameLeaf('NUMBER'))])]))" + ) self.assertEqual(repr(rules["term"]), expected_repr) def test_long_rule_str(self) -> None: @@ -71,7 +71,7 @@ class TestPegen(unittest.TestCase): self.assertEqual(str(rules["sum"]), "sum: term '+' term | term") self.assertEqual( repr(rules["term"]), - "Rule('term', 'int', Rhs([Alt([NamedItem(None, NameLeaf('NUMBER'))])]))" + "Rule('term', 'int', Rhs([Alt([NamedItem(None, NameLeaf('NUMBER'))])]))", ) def test_gather(self) -> None: @@ -81,24 +81,31 @@ class TestPegen(unittest.TestCase): """ rules = parse_string(grammar, GrammarParser).rules self.assertEqual(str(rules["start"]), "start: ','.thing+ NEWLINE") - self.assertTrue(repr(rules["start"]).startswith( - "Rule('start', None, Rhs([Alt([NamedItem(None, Gather(StringLeaf(\"','\"), NameLeaf('thing'" - )) + self.assertTrue( + repr(rules["start"]).startswith( + "Rule('start', None, Rhs([Alt([NamedItem(None, Gather(StringLeaf(\"','\"), NameLeaf('thing'" + ) + ) self.assertEqual(str(rules["thing"]), "thing: NUMBER") parser_class = make_parser(grammar) node = parse_string("42\n", parser_class) - assert node == [ - [[TokenInfo(NUMBER, string="42", start=(1, 0), end=(1, 2), line="42\n")]], - TokenInfo(NEWLINE, string="\n", start=(1, 2), end=(1, 3), line="42\n"), - ] node = parse_string("1, 2\n", parser_class) - assert node == [ + self.assertEqual( + node, [ - [TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1, 2\n")], - [TokenInfo(NUMBER, string="2", start=(1, 3), end=(1, 4), line="1, 2\n")], + [ + TokenInfo( + NUMBER, string="1", start=(1, 0), end=(1, 1), line="1, 2\n" + ), + TokenInfo( + NUMBER, string="2", start=(1, 3), end=(1, 4), line="1, 2\n" + ), + ], + TokenInfo( + NEWLINE, string="\n", start=(1, 4), end=(1, 5), line="1, 2\n" + ), ], - TokenInfo(NEWLINE, string="\n", start=(1, 4), end=(1, 5), line="1, 2\n"), - ] + ) def test_expr_grammar(self) -> None: grammar = """ @@ -108,10 +115,13 @@ class TestPegen(unittest.TestCase): """ parser_class = make_parser(grammar) node = parse_string("42\n", parser_class) - self.assertEqual(node, [ - [[TokenInfo(NUMBER, string="42", start=(1, 0), end=(1, 2), line="42\n")]], - TokenInfo(NEWLINE, string="\n", start=(1, 2), end=(1, 3), line="42\n"), - ]) + self.assertEqual( + node, + [ + TokenInfo(NUMBER, string="42", start=(1, 0), end=(1, 2), line="42\n"), + TokenInfo(NEWLINE, string="\n", start=(1, 2), end=(1, 3), line="42\n"), + ], + ) def test_optional_operator(self) -> None: grammar = """ @@ -120,22 +130,39 @@ class TestPegen(unittest.TestCase): term: NUMBER """ parser_class = make_parser(grammar) - node = parse_string("1+2\n", parser_class) - self.assertEqual(node, [ + node = parse_string("1 + 2\n", parser_class) + self.assertEqual( + node, [ - [TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1+2\n")], [ - TokenInfo(OP, string="+", start=(1, 1), end=(1, 2), line="1+2\n"), - [TokenInfo(NUMBER, string="2", start=(1, 2), end=(1, 3), line="1+2\n")], + TokenInfo( + NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 + 2\n" + ), + [ + TokenInfo( + OP, string="+", start=(1, 2), end=(1, 3), line="1 + 2\n" + ), + TokenInfo( + NUMBER, string="2", start=(1, 4), end=(1, 5), line="1 + 2\n" + ), + ], ], + TokenInfo( + NEWLINE, string="\n", start=(1, 5), end=(1, 6), line="1 + 2\n" + ), ], - TokenInfo(NEWLINE, string="\n", start=(1, 3), end=(1, 4), line="1+2\n"), - ]) + ) node = parse_string("1\n", parser_class) - self.assertEqual(node, [ - [[TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1\n")], None], - TokenInfo(NEWLINE, string="\n", start=(1, 1), end=(1, 2), line="1\n"), - ]) + self.assertEqual( + node, + [ + [ + TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1\n"), + None, + ], + TokenInfo(NEWLINE, string="\n", start=(1, 1), end=(1, 2), line="1\n"), + ], + ) def test_optional_literal(self) -> None: grammar = """ @@ -145,18 +172,29 @@ class TestPegen(unittest.TestCase): """ parser_class = make_parser(grammar) node = parse_string("1+\n", parser_class) - self.assertEqual(node, [ + self.assertEqual( + node, [ - [TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1+\n")], - TokenInfo(OP, string="+", start=(1, 1), end=(1, 2), line="1+\n"), + [ + TokenInfo( + NUMBER, string="1", start=(1, 0), end=(1, 1), line="1+\n" + ), + TokenInfo(OP, string="+", start=(1, 1), end=(1, 2), line="1+\n"), + ], + TokenInfo(NEWLINE, string="\n", start=(1, 2), end=(1, 3), line="1+\n"), ], - TokenInfo(NEWLINE, string="\n", start=(1, 2), end=(1, 3), line="1+\n"), - ]) + ) node = parse_string("1\n", parser_class) - self.assertEqual(node, [ - [[TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1\n")], None], - TokenInfo(NEWLINE, string="\n", start=(1, 1), end=(1, 2), line="1\n"), - ]) + self.assertEqual( + node, + [ + [ + TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1\n"), + None, + ], + TokenInfo(NEWLINE, string="\n", start=(1, 1), end=(1, 2), line="1\n"), + ], + ) def test_alt_optional_operator(self) -> None: grammar = """ @@ -166,21 +204,38 @@ class TestPegen(unittest.TestCase): """ parser_class = make_parser(grammar) node = parse_string("1 + 2\n", parser_class) - self.assertEqual(node, [ + self.assertEqual( + node, [ - [TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 + 2\n")], [ - TokenInfo(OP, string="+", start=(1, 2), end=(1, 3), line="1 + 2\n"), - [TokenInfo(NUMBER, string="2", start=(1, 4), end=(1, 5), line="1 + 2\n")], + TokenInfo( + NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 + 2\n" + ), + [ + TokenInfo( + OP, string="+", start=(1, 2), end=(1, 3), line="1 + 2\n" + ), + TokenInfo( + NUMBER, string="2", start=(1, 4), end=(1, 5), line="1 + 2\n" + ), + ], ], + TokenInfo( + NEWLINE, string="\n", start=(1, 5), end=(1, 6), line="1 + 2\n" + ), ], - TokenInfo(NEWLINE, string="\n", start=(1, 5), end=(1, 6), line="1 + 2\n"), - ]) + ) node = parse_string("1\n", parser_class) - self.assertEqual(node, [ - [[TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1\n")], None], - TokenInfo(NEWLINE, string="\n", start=(1, 1), end=(1, 2), line="1\n"), - ]) + self.assertEqual( + node, + [ + [ + TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1\n"), + None, + ], + TokenInfo(NEWLINE, string="\n", start=(1, 1), end=(1, 2), line="1\n"), + ], + ) def test_repeat_0_simple(self) -> None: grammar = """ @@ -189,20 +244,32 @@ class TestPegen(unittest.TestCase): """ parser_class = make_parser(grammar) node = parse_string("1 2 3\n", parser_class) - self.assertEqual(node, [ - [TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 2 3\n")], + self.assertEqual( + node, [ - [[TokenInfo(NUMBER, string="2", start=(1, 2), end=(1, 3), line="1 2 3\n")]], - [[TokenInfo(NUMBER, string="3", start=(1, 4), end=(1, 5), line="1 2 3\n")]], + TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 2 3\n"), + [ + TokenInfo( + NUMBER, string="2", start=(1, 2), end=(1, 3), line="1 2 3\n" + ), + TokenInfo( + NUMBER, string="3", start=(1, 4), end=(1, 5), line="1 2 3\n" + ), + ], + TokenInfo( + NEWLINE, string="\n", start=(1, 5), end=(1, 6), line="1 2 3\n" + ), ], - TokenInfo(NEWLINE, string="\n", start=(1, 5), end=(1, 6), line="1 2 3\n"), - ]) + ) node = parse_string("1\n", parser_class) - self.assertEqual(node, [ - [TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1\n")], - [], - TokenInfo(NEWLINE, string="\n", start=(1, 1), end=(1, 2), line="1\n"), - ]) + self.assertEqual( + node, + [ + TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1\n"), + [], + TokenInfo(NEWLINE, string="\n", start=(1, 1), end=(1, 2), line="1\n"), + ], + ) def test_repeat_0_complex(self) -> None: grammar = """ @@ -211,24 +278,43 @@ class TestPegen(unittest.TestCase): """ parser_class = make_parser(grammar) node = parse_string("1 + 2 + 3\n", parser_class) - self.assertEqual(node, [ - [TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 + 2 + 3\n")], + self.assertEqual( + node, [ + TokenInfo( + NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 + 2 + 3\n" + ), [ [ - TokenInfo(OP, string="+", start=(1, 2), end=(1, 3), line="1 + 2 + 3\n"), - [TokenInfo(NUMBER, string="2", start=(1, 4), end=(1, 5), line="1 + 2 + 3\n")], - ] - ], - [ + TokenInfo( + OP, string="+", start=(1, 2), end=(1, 3), line="1 + 2 + 3\n" + ), + TokenInfo( + NUMBER, + string="2", + start=(1, 4), + end=(1, 5), + line="1 + 2 + 3\n", + ), + ], [ - TokenInfo(OP, string="+", start=(1, 6), end=(1, 7), line="1 + 2 + 3\n"), - [TokenInfo(NUMBER, string="3", start=(1, 8), end=(1, 9), line="1 + 2 + 3\n")], - ] + TokenInfo( + OP, string="+", start=(1, 6), end=(1, 7), line="1 + 2 + 3\n" + ), + TokenInfo( + NUMBER, + string="3", + start=(1, 8), + end=(1, 9), + line="1 + 2 + 3\n", + ), + ], ], + TokenInfo( + NEWLINE, string="\n", start=(1, 9), end=(1, 10), line="1 + 2 + 3\n" + ), ], - TokenInfo(NEWLINE, string="\n", start=(1, 9), end=(1, 10), line="1 + 2 + 3\n"), - ]) + ) def test_repeat_1_simple(self) -> None: grammar = """ @@ -237,14 +323,23 @@ class TestPegen(unittest.TestCase): """ parser_class = make_parser(grammar) node = parse_string("1 2 3\n", parser_class) - self.assertEqual(node, [ - [TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 2 3\n")], + self.assertEqual( + node, [ - [[TokenInfo(NUMBER, string="2", start=(1, 2), end=(1, 3), line="1 2 3\n")]], - [[TokenInfo(NUMBER, string="3", start=(1, 4), end=(1, 5), line="1 2 3\n")]], + TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 2 3\n"), + [ + TokenInfo( + NUMBER, string="2", start=(1, 2), end=(1, 3), line="1 2 3\n" + ), + TokenInfo( + NUMBER, string="3", start=(1, 4), end=(1, 5), line="1 2 3\n" + ), + ], + TokenInfo( + NEWLINE, string="\n", start=(1, 5), end=(1, 6), line="1 2 3\n" + ), ], - TokenInfo(NEWLINE, string="\n", start=(1, 5), end=(1, 6), line="1 2 3\n"), - ]) + ) with self.assertRaises(SyntaxError): parse_string("1\n", parser_class) @@ -255,24 +350,43 @@ class TestPegen(unittest.TestCase): """ parser_class = make_parser(grammar) node = parse_string("1 + 2 + 3\n", parser_class) - self.assertEqual(node, [ - [TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 + 2 + 3\n")], + self.assertEqual( + node, [ + TokenInfo( + NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 + 2 + 3\n" + ), [ [ - TokenInfo(OP, string="+", start=(1, 2), end=(1, 3), line="1 + 2 + 3\n"), - [TokenInfo(NUMBER, string="2", start=(1, 4), end=(1, 5), line="1 + 2 + 3\n")], - ] - ], - [ + TokenInfo( + OP, string="+", start=(1, 2), end=(1, 3), line="1 + 2 + 3\n" + ), + TokenInfo( + NUMBER, + string="2", + start=(1, 4), + end=(1, 5), + line="1 + 2 + 3\n", + ), + ], [ - TokenInfo(OP, string="+", start=(1, 6), end=(1, 7), line="1 + 2 + 3\n"), - [TokenInfo(NUMBER, string="3", start=(1, 8), end=(1, 9), line="1 + 2 + 3\n")], - ] + TokenInfo( + OP, string="+", start=(1, 6), end=(1, 7), line="1 + 2 + 3\n" + ), + TokenInfo( + NUMBER, + string="3", + start=(1, 8), + end=(1, 9), + line="1 + 2 + 3\n", + ), + ], ], + TokenInfo( + NEWLINE, string="\n", start=(1, 9), end=(1, 10), line="1 + 2 + 3\n" + ), ], - TokenInfo(NEWLINE, string="\n", start=(1, 9), end=(1, 10), line="1 + 2 + 3\n"), - ]) + ) with self.assertRaises(SyntaxError): parse_string("1\n", parser_class) @@ -283,14 +397,25 @@ class TestPegen(unittest.TestCase): """ parser_class = make_parser(grammar) node = parse_string("1, 2, 3\n", parser_class) - self.assertEqual(node, [ + self.assertEqual( + node, [ - [TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1, 2, 3\n")], - [TokenInfo(NUMBER, string="2", start=(1, 3), end=(1, 4), line="1, 2, 3\n")], - [TokenInfo(NUMBER, string="3", start=(1, 6), end=(1, 7), line="1, 2, 3\n")], + [ + TokenInfo( + NUMBER, string="1", start=(1, 0), end=(1, 1), line="1, 2, 3\n" + ), + TokenInfo( + NUMBER, string="2", start=(1, 3), end=(1, 4), line="1, 2, 3\n" + ), + TokenInfo( + NUMBER, string="3", start=(1, 6), end=(1, 7), line="1, 2, 3\n" + ), + ], + TokenInfo( + NEWLINE, string="\n", start=(1, 7), end=(1, 8), line="1, 2, 3\n" + ), ], - TokenInfo(NEWLINE, string="\n", start=(1, 7), end=(1, 8), line="1, 2, 3\n"), - ]) + ) def test_left_recursive(self) -> None: grammar_source = """ @@ -311,18 +436,41 @@ class TestPegen(unittest.TestCase): self.assertFalse(rules["bar"].left_recursive) self.assertFalse(rules["baz"].left_recursive) node = parse_string("1 + 2 + 3\n", parser_class) - self.assertEqual(node, [ + self.assertEqual( + node, [ [ - [[TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 + 2 + 3\n")]], - TokenInfo(OP, string="+", start=(1, 2), end=(1, 3), line="1 + 2 + 3\n"), - [TokenInfo(NUMBER, string="2", start=(1, 4), end=(1, 5), line="1 + 2 + 3\n")], + [ + TokenInfo( + NUMBER, + string="1", + start=(1, 0), + end=(1, 1), + line="1 + 2 + 3\n", + ), + TokenInfo( + OP, string="+", start=(1, 2), end=(1, 3), line="1 + 2 + 3\n" + ), + TokenInfo( + NUMBER, + string="2", + start=(1, 4), + end=(1, 5), + line="1 + 2 + 3\n", + ), + ], + TokenInfo( + OP, string="+", start=(1, 6), end=(1, 7), line="1 + 2 + 3\n" + ), + TokenInfo( + NUMBER, string="3", start=(1, 8), end=(1, 9), line="1 + 2 + 3\n" + ), ], - TokenInfo(OP, string="+", start=(1, 6), end=(1, 7), line="1 + 2 + 3\n"), - [TokenInfo(NUMBER, string="3", start=(1, 8), end=(1, 9), line="1 + 2 + 3\n")], + TokenInfo( + NEWLINE, string="\n", start=(1, 9), end=(1, 10), line="1 + 2 + 3\n" + ), ], - TokenInfo(NEWLINE, string="\n", start=(1, 9), end=(1, 10), line="1 + 2 + 3\n"), - ]) + ) def test_python_expr(self) -> None: grammar = """ @@ -392,31 +540,79 @@ class TestPegen(unittest.TestCase): exec(out.getvalue(), ns) parser_class: Type[Parser] = ns["GeneratedParser"] node = parse_string("D A C A E", parser_class) - self.assertEqual(node, [ + + self.assertEqual( + node, [ [ [ - [TokenInfo(type=NAME, string="D", start=(1, 0), end=(1, 1), line="D A C A E")], - TokenInfo(type=NAME, string="A", start=(1, 2), end=(1, 3), line="D A C A E"), + [ + TokenInfo( + type=NAME, + string="D", + start=(1, 0), + end=(1, 1), + line="D A C A E", + ), + TokenInfo( + type=NAME, + string="A", + start=(1, 2), + end=(1, 3), + line="D A C A E", + ), + ], + TokenInfo( + type=NAME, + string="C", + start=(1, 4), + end=(1, 5), + line="D A C A E", + ), ], - TokenInfo(type=NAME, string="C", start=(1, 4), end=(1, 5), line="D A C A E"), + TokenInfo( + type=NAME, + string="A", + start=(1, 6), + end=(1, 7), + line="D A C A E", + ), ], - TokenInfo(type=NAME, string="A", start=(1, 6), end=(1, 7), line="D A C A E"), + TokenInfo( + type=NAME, string="E", start=(1, 8), end=(1, 9), line="D A C A E" + ), ], - TokenInfo(type=NAME, string="E", start=(1, 8), end=(1, 9), line="D A C A E"), - ]) + ) node = parse_string("B C A E", parser_class) - self.assertIsNotNone(node) - self.assertEqual(node, [ + self.assertEqual( + node, [ [ - [TokenInfo(type=NAME, string="B", start=(1, 0), end=(1, 1), line="B C A E")], - TokenInfo(type=NAME, string="C", start=(1, 2), end=(1, 3), line="B C A E"), + [ + TokenInfo( + type=NAME, + string="B", + start=(1, 0), + end=(1, 1), + line="B C A E", + ), + TokenInfo( + type=NAME, + string="C", + start=(1, 2), + end=(1, 3), + line="B C A E", + ), + ], + TokenInfo( + type=NAME, string="A", start=(1, 4), end=(1, 5), line="B C A E" + ), ], - TokenInfo(type=NAME, string="A", start=(1, 4), end=(1, 5), line="B C A E"), + TokenInfo( + type=NAME, string="E", start=(1, 6), end=(1, 7), line="B C A E" + ), ], - TokenInfo(type=NAME, string="E", start=(1, 6), end=(1, 7), line="B C A E"), - ]) + ) def test_nasty_mutually_left_recursive(self) -> None: # This grammar does not recognize 'x - + =', much to my chagrin. @@ -454,43 +650,44 @@ class TestPegen(unittest.TestCase): """ parser_class = make_parser(grammar) node = parse_string("foo = 12 + 12 .", parser_class) - self.assertEqual(node, [ + self.assertEqual( + node, [ + TokenInfo( + NAME, string="foo", start=(1, 0), end=(1, 3), line="foo = 12 + 12 ." + ), + TokenInfo( + OP, string="=", start=(1, 4), end=(1, 5), line="foo = 12 + 12 ." + ), [ - [TokenInfo(NAME, string="foo", start=(1, 0), end=(1, 3), line="foo = 12 + 12 .")], - TokenInfo(OP, string="=", start=(1, 4), end=(1, 5), line="foo = 12 + 12 ."), + TokenInfo( + NUMBER, + string="12", + start=(1, 6), + end=(1, 8), + line="foo = 12 + 12 .", + ), [ [ TokenInfo( - NUMBER, string="12", start=(1, 6), end=(1, 8), line="foo = 12 + 12 ." - ) - ], - [ - [ - [ - TokenInfo( - OP, - string="+", - start=(1, 9), - end=(1, 10), - line="foo = 12 + 12 .", - ), - [ - TokenInfo( - NUMBER, - string="12", - start=(1, 11), - end=(1, 13), - line="foo = 12 + 12 .", - ) - ], - ] - ] - ], + OP, + string="+", + start=(1, 9), + end=(1, 10), + line="foo = 12 + 12 .", + ), + TokenInfo( + NUMBER, + string="12", + start=(1, 11), + end=(1, 13), + line="foo = 12 + 12 .", + ), + ] ], - ] - ] - ]) + ], + ], + ) def test_named_lookahead_error(self) -> None: grammar = """ @@ -533,11 +730,14 @@ class TestPegen(unittest.TestCase): """ parser_class = make_parser(grammar) node = parse_string("(1)", parser_class) - self.assertEqual(node, [ - TokenInfo(OP, string="(", start=(1, 0), end=(1, 1), line="(1)"), - [TokenInfo(NUMBER, string="1", start=(1, 1), end=(1, 2), line="(1)")], - TokenInfo(OP, string=")", start=(1, 2), end=(1, 3), line="(1)"), - ]) + self.assertEqual( + node, + [ + TokenInfo(OP, string="(", start=(1, 0), end=(1, 1), line="(1)"), + TokenInfo(NUMBER, string="1", start=(1, 1), end=(1, 2), line="(1)"), + TokenInfo(OP, string=")", start=(1, 2), end=(1, 3), line="(1)"), + ], + ) def test_dangling_reference(self) -> None: grammar = """ @@ -589,6 +789,124 @@ class TestPegen(unittest.TestCase): with self.assertRaisesRegex(GrammarError, "cannot start with underscore: '_x'"): parser_class = make_parser(grammar) + def test_soft_keyword(self) -> None: + grammar = """ + start: + | "number" n=NUMBER { eval(n.string) } + | "string" n=STRING { n.string } + | SOFT_KEYWORD l=NAME n=(NUMBER | NAME | STRING) { f"{l.string} = {n.string}"} + """ + parser_class = make_parser(grammar) + self.assertEqual(parse_string("number 1", parser_class, verbose=True), 1) + self.assertEqual(parse_string("string 'b'", parser_class, verbose=True), "'b'") + self.assertEqual( + parse_string("number test 1", parser_class, verbose=True), "test = 1" + ) + assert ( + parse_string("string test 'b'", parser_class, verbose=True) == "test = 'b'" + ) + with self.assertRaises(SyntaxError): + parse_string("test 1", parser_class, verbose=True) + + def test_forced(self) -> None: + grammar = """ + start: NAME &&':' | NAME + """ + parser_class = make_parser(grammar) + self.assertTrue(parse_string("number :", parser_class, verbose=True)) + with self.assertRaises(SyntaxError) as e: + parse_string("a", parser_class, verbose=True) + + self.assertIn("expected ':'", str(e.exception)) + + def test_forced_with_group(self) -> None: + grammar = """ + start: NAME &&(':' | ';') | NAME + """ + parser_class = make_parser(grammar) + self.assertTrue(parse_string("number :", parser_class, verbose=True)) + self.assertTrue(parse_string("number ;", parser_class, verbose=True)) + with self.assertRaises(SyntaxError) as e: + parse_string("a", parser_class, verbose=True) + self.assertIn("expected (':' | ';')", e.exception.args[0]) + + def test_unreachable_explicit(self) -> None: + source = """ + start: NAME { UNREACHABLE } + """ + grammar = parse_string(source, GrammarParser) + out = io.StringIO() + genr = PythonParserGenerator( + grammar, out, unreachable_formatting="This is a test" + ) + genr.generate("<string>") + self.assertIn("This is a test", out.getvalue()) + + def test_unreachable_implicit1(self) -> None: + source = """ + start: NAME | invalid_input + invalid_input: NUMBER { None } + """ + grammar = parse_string(source, GrammarParser) + out = io.StringIO() + genr = PythonParserGenerator( + grammar, out, unreachable_formatting="This is a test" + ) + genr.generate("<string>") + self.assertIn("This is a test", out.getvalue()) + + def test_unreachable_implicit2(self) -> None: + source = """ + start: NAME | '(' invalid_input ')' + invalid_input: NUMBER { None } + """ + grammar = parse_string(source, GrammarParser) + out = io.StringIO() + genr = PythonParserGenerator( + grammar, out, unreachable_formatting="This is a test" + ) + genr.generate("<string>") + self.assertIn("This is a test", out.getvalue()) + + def test_unreachable_implicit3(self) -> None: + source = """ + start: NAME | invalid_input { None } + invalid_input: NUMBER + """ + grammar = parse_string(source, GrammarParser) + out = io.StringIO() + genr = PythonParserGenerator( + grammar, out, unreachable_formatting="This is a test" + ) + genr.generate("<string>") + self.assertNotIn("This is a test", out.getvalue()) + + def test_locations_in_alt_action_and_group(self) -> None: + grammar = """ + start: t=term NEWLINE? $ { ast.Expression(t, LOCATIONS) } + term: + | l=term '*' r=factor { ast.BinOp(l, ast.Mult(), r, LOCATIONS) } + | l=term '/' r=factor { ast.BinOp(l, ast.Div(), r, LOCATIONS) } + | factor + factor: + | ( + n=NAME { ast.Name(id=n.string, ctx=ast.Load(), LOCATIONS) } | + n=NUMBER { ast.Constant(value=ast.literal_eval(n.string), LOCATIONS) } + ) + """ + parser_class = make_parser(grammar) + source = "2*3\n" + o = ast.dump(parse_string(source, parser_class).body, include_attributes=True) + p = ast.dump(ast.parse(source).body[0].value, include_attributes=True).replace( + " kind=None,", "" + ) + diff = "\n".join( + difflib.unified_diff( + o.split("\n"), p.split("\n"), "cpython", "python-pegen" + ) + ) + self.assertFalse(diff) + class TestGrammarVisitor: class Visitor(GrammarVisitor): |