diff options
26 files changed, 1240 insertions, 670 deletions
diff --git a/Lib/test/test_peg_generator/test_c_parser.py b/Lib/test/test_peg_generator/test_c_parser.py index 013b3af..b761bd4 100644 --- a/Lib/test/test_peg_generator/test_c_parser.py +++ b/Lib/test/test_peg_generator/test_c_parser.py @@ -11,8 +11,8 @@ from test import support from test.support import os_helper from test.support.script_helper import assert_python_ok -_py_cflags_nodist = sysconfig.get_config_var('PY_CFLAGS_NODIST') -_pgo_flag = sysconfig.get_config_var('PGO_PROF_USE_FLAG') +_py_cflags_nodist = sysconfig.get_config_var("PY_CFLAGS_NODIST") +_pgo_flag = sysconfig.get_config_var("PGO_PROF_USE_FLAG") if _pgo_flag and _py_cflags_nodist and _pgo_flag in _py_cflags_nodist: raise unittest.SkipTest("peg_generator test disabled under PGO build") @@ -458,3 +458,28 @@ class TestCParser(unittest.TestCase): self.check_input_strings_for_grammar(valid_cases, invalid_cases) """ self.run_test(grammar_source, test_source) + + def test_forced(self) -> None: + grammar_source = """ + start: NAME &&':' | NAME + """ + test_source = """ + self.assertEqual(parse.parse_string("number :", mode=0), None) + with self.assertRaises(SyntaxError) as e: + parse.parse_string("a", mode=0) + self.assertIn("expected ':'", str(e.exception)) + """ + self.run_test(grammar_source, test_source) + + def test_forced_with_group(self) -> None: + grammar_source = """ + start: NAME &&(':' | ';') | NAME + """ + test_source = """ + self.assertEqual(parse.parse_string("number :", mode=0), None) + self.assertEqual(parse.parse_string("number ;", mode=0), None) + with self.assertRaises(SyntaxError) as e: + parse.parse_string("a", mode=0) + self.assertIn("expected (':' | ';')", e.exception.args[0]) + """ + self.run_test(grammar_source, test_source) diff --git a/Lib/test/test_peg_generator/test_first_sets.py b/Lib/test/test_peg_generator/test_first_sets.py index 425ee23..d6f8322 100644 --- a/Lib/test/test_peg_generator/test_first_sets.py +++ b/Lib/test/test_peg_generator/test_first_sets.py @@ -3,8 +3,8 @@ import unittest from test import test_tools from typing import Dict, Set -test_tools.skip_if_missing('peg_generator') -with test_tools.imports_under_tool('peg_generator'): +test_tools.skip_if_missing("peg_generator") +with test_tools.imports_under_tool("peg_generator"): from pegen.grammar_parser import GeneratedParser as GrammarParser from pegen.testutil import parse_string from pegen.first_sets import FirstSetCalculator @@ -23,29 +23,38 @@ class TestFirstSets(unittest.TestCase): A: 'a' | '-' B: 'b' | '+' """ - self.assertEqual(self.calculate_first_sets(grammar), { - "A": {"'a'", "'-'"}, - "B": {"'+'", "'b'"}, - "expr": {"'+'", "'a'", "'b'", "'-'"}, - "start": {"'+'", "'a'", "'b'", "'-'"}, - }) + self.assertEqual( + self.calculate_first_sets(grammar), + { + "A": {"'a'", "'-'"}, + "B": {"'+'", "'b'"}, + "expr": {"'+'", "'a'", "'b'", "'-'"}, + "start": {"'+'", "'a'", "'b'", "'-'"}, + }, + ) def test_optionals(self) -> None: grammar = """ start: expr NEWLINE expr: ['a'] ['b'] 'c' """ - self.assertEqual(self.calculate_first_sets(grammar), { - "expr": {"'c'", "'a'", "'b'"}, - "start": {"'c'", "'a'", "'b'"}, - }) + self.assertEqual( + self.calculate_first_sets(grammar), + { + "expr": {"'c'", "'a'", "'b'"}, + "start": {"'c'", "'a'", "'b'"}, + }, + ) def test_repeat_with_separator(self) -> None: grammar = """ start: ','.thing+ NEWLINE thing: NUMBER """ - self.assertEqual(self.calculate_first_sets(grammar), {"thing": {"NUMBER"}, "start": {"NUMBER"}}) + self.assertEqual( + self.calculate_first_sets(grammar), + {"thing": {"NUMBER"}, "start": {"NUMBER"}}, + ) def test_optional_operator(self) -> None: grammar = """ @@ -53,11 +62,14 @@ class TestFirstSets(unittest.TestCase): sum: (term)? 'b' term: NUMBER """ - self.assertEqual(self.calculate_first_sets(grammar), { - "term": {"NUMBER"}, - "sum": {"NUMBER", "'b'"}, - "start": {"'b'", "NUMBER"}, - }) + self.assertEqual( + self.calculate_first_sets(grammar), + { + "term": {"NUMBER"}, + "sum": {"NUMBER", "'b'"}, + "start": {"'b'", "NUMBER"}, + }, + ) def test_optional_literal(self) -> None: grammar = """ @@ -65,60 +77,83 @@ class TestFirstSets(unittest.TestCase): sum: '+' ? term term: NUMBER """ - self.assertEqual(self.calculate_first_sets(grammar), { - "term": {"NUMBER"}, - "sum": {"'+'", "NUMBER"}, - "start": {"'+'", "NUMBER"}, - }) + self.assertEqual( + self.calculate_first_sets(grammar), + { + "term": {"NUMBER"}, + "sum": {"'+'", "NUMBER"}, + "start": {"'+'", "NUMBER"}, + }, + ) def test_optional_after(self) -> None: grammar = """ start: term NEWLINE term: NUMBER ['+'] """ - self.assertEqual(self.calculate_first_sets(grammar), {"term": {"NUMBER"}, "start": {"NUMBER"}}) + self.assertEqual( + self.calculate_first_sets(grammar), + {"term": {"NUMBER"}, "start": {"NUMBER"}}, + ) def test_optional_before(self) -> None: grammar = """ start: term NEWLINE term: ['+'] NUMBER """ - self.assertEqual(self.calculate_first_sets(grammar), {"term": {"NUMBER", "'+'"}, "start": {"NUMBER", "'+'"}}) + self.assertEqual( + self.calculate_first_sets(grammar), + {"term": {"NUMBER", "'+'"}, "start": {"NUMBER", "'+'"}}, + ) def test_repeat_0(self) -> None: grammar = """ start: thing* "+" NEWLINE thing: NUMBER """ - self.assertEqual(self.calculate_first_sets(grammar), {"thing": {"NUMBER"}, "start": {'"+"', "NUMBER"}}) + self.assertEqual( + self.calculate_first_sets(grammar), + {"thing": {"NUMBER"}, "start": {'"+"', "NUMBER"}}, + ) def test_repeat_0_with_group(self) -> None: grammar = """ start: ('+' '-')* term NEWLINE term: NUMBER """ - self.assertEqual(self.calculate_first_sets(grammar), {"term": {"NUMBER"}, "start": {"'+'", "NUMBER"}}) + self.assertEqual( + self.calculate_first_sets(grammar), + {"term": {"NUMBER"}, "start": {"'+'", "NUMBER"}}, + ) def test_repeat_1(self) -> None: grammar = """ start: thing+ '-' NEWLINE thing: NUMBER """ - self.assertEqual(self.calculate_first_sets(grammar), {"thing": {"NUMBER"}, "start": {"NUMBER"}}) + self.assertEqual( + self.calculate_first_sets(grammar), + {"thing": {"NUMBER"}, "start": {"NUMBER"}}, + ) def test_repeat_1_with_group(self) -> None: grammar = """ start: ('+' term)+ term NEWLINE term: NUMBER """ - self.assertEqual(self.calculate_first_sets(grammar), {"term": {"NUMBER"}, "start": {"'+'"}}) + self.assertEqual( + self.calculate_first_sets(grammar), {"term": {"NUMBER"}, "start": {"'+'"}} + ) def test_gather(self) -> None: grammar = """ start: ','.thing+ NEWLINE thing: NUMBER """ - self.assertEqual(self.calculate_first_sets(grammar), {"thing": {"NUMBER"}, "start": {"NUMBER"}}) + self.assertEqual( + self.calculate_first_sets(grammar), + {"thing": {"NUMBER"}, "start": {"NUMBER"}}, + ) def test_positive_lookahead(self) -> None: grammar = """ @@ -126,11 +161,14 @@ class TestFirstSets(unittest.TestCase): expr: &'a' opt opt: 'a' | 'b' | 'c' """ - self.assertEqual(self.calculate_first_sets(grammar), { - "expr": {"'a'"}, - "start": {"'a'"}, - "opt": {"'b'", "'c'", "'a'"}, - }) + self.assertEqual( + self.calculate_first_sets(grammar), + { + "expr": {"'a'"}, + "start": {"'a'"}, + "opt": {"'b'", "'c'", "'a'"}, + }, + ) def test_negative_lookahead(self) -> None: grammar = """ @@ -138,11 +176,14 @@ class TestFirstSets(unittest.TestCase): expr: !'a' opt opt: 'a' | 'b' | 'c' """ - self.assertEqual(self.calculate_first_sets(grammar), { - "opt": {"'b'", "'a'", "'c'"}, - "expr": {"'b'", "'c'"}, - "start": {"'b'", "'c'"}, - }) + self.assertEqual( + self.calculate_first_sets(grammar), + { + "opt": {"'b'", "'a'", "'c'"}, + "expr": {"'b'", "'c'"}, + "start": {"'b'", "'c'"}, + }, + ) def test_left_recursion(self) -> None: grammar = """ @@ -153,21 +194,27 @@ class TestFirstSets(unittest.TestCase): bar: 'bar' baz: 'baz' """ - self.assertEqual(self.calculate_first_sets(grammar), { - "expr": {"NUMBER", "'-'"}, - "term": {"NUMBER"}, - "start": {"NUMBER", "'-'"}, - "foo": {"'foo'"}, - "bar": {"'bar'"}, - "baz": {"'baz'"}, - }) + self.assertEqual( + self.calculate_first_sets(grammar), + { + "expr": {"NUMBER", "'-'"}, + "term": {"NUMBER"}, + "start": {"NUMBER", "'-'"}, + "foo": {"'foo'"}, + "bar": {"'bar'"}, + "baz": {"'baz'"}, + }, + ) def test_advance_left_recursion(self) -> None: grammar = """ start: NUMBER | sign start sign: ['-'] """ - self.assertEqual(self.calculate_first_sets(grammar), {"sign": {"'-'", ""}, "start": {"'-'", "NUMBER"}}) + self.assertEqual( + self.calculate_first_sets(grammar), + {"sign": {"'-'", ""}, "start": {"'-'", "NUMBER"}}, + ) def test_mutual_left_recursion(self) -> None: grammar = """ @@ -175,11 +222,14 @@ class TestFirstSets(unittest.TestCase): foo: bar 'A' | 'B' bar: foo 'C' | 'D' """ - self.assertEqual(self.calculate_first_sets(grammar), { - "foo": {"'D'", "'B'"}, - "bar": {"'D'"}, - "start": {"'D'", "'B'"}, - }) + self.assertEqual( + self.calculate_first_sets(grammar), + { + "foo": {"'D'", "'B'"}, + "bar": {"'D'"}, + "start": {"'D'", "'B'"}, + }, + ) def test_nasty_left_recursion(self) -> None: # TODO: Validate this @@ -188,7 +238,10 @@ class TestFirstSets(unittest.TestCase): target: maybe '+' | NAME maybe: maybe '-' | target """ - self.assertEqual(self.calculate_first_sets(grammar), {"maybe": set(), "target": {"NAME"}, "start": {"NAME"}}) + self.assertEqual( + self.calculate_first_sets(grammar), + {"maybe": set(), "target": {"NAME"}, "start": {"NAME"}}, + ) def test_nullable_rule(self) -> None: grammar = """ @@ -196,17 +249,22 @@ class TestFirstSets(unittest.TestCase): sign: ['-'] thing: NUMBER """ - self.assertEqual(self.calculate_first_sets(grammar), { - "sign": {"", "'-'"}, - "thing": {"NUMBER"}, - "start": {"NUMBER", "'-'"}, - }) + self.assertEqual( + self.calculate_first_sets(grammar), + { + "sign": {"", "'-'"}, + "thing": {"NUMBER"}, + "start": {"NUMBER", "'-'"}, + }, + ) def test_epsilon_production_in_start_rule(self) -> None: grammar = """ start: ['-'] $ """ - self.assertEqual(self.calculate_first_sets(grammar), {"start": {"ENDMARKER", "'-'"}}) + self.assertEqual( + self.calculate_first_sets(grammar), {"start": {"ENDMARKER", "'-'"}} + ) def test_multiple_nullable_rules(self) -> None: grammar = """ @@ -216,10 +274,13 @@ class TestFirstSets(unittest.TestCase): other: '*' another: '/' """ - self.assertEqual(self.calculate_first_sets(grammar), { - "sign": {"", "'-'"}, - "thing": {"'+'", ""}, - "start": {"'+'", "'-'", "'*'"}, - "other": {"'*'"}, - "another": {"'/'"}, - }) + self.assertEqual( + self.calculate_first_sets(grammar), + { + "sign": {"", "'-'"}, + "thing": {"'+'", ""}, + "start": {"'+'", "'-'", "'*'"}, + "other": {"'*'"}, + "another": {"'/'"}, + }, + ) diff --git a/Lib/test/test_peg_generator/test_grammar_validator.py b/Lib/test/test_peg_generator/test_grammar_validator.py index 2e72ff8..72c3d20 100644 --- a/Lib/test/test_peg_generator/test_grammar_validator.py +++ b/Lib/test/test_peg_generator/test_grammar_validator.py @@ -1,8 +1,8 @@ import unittest from test import test_tools -test_tools.skip_if_missing('peg_generator') -with test_tools.imports_under_tool('peg_generator'): +test_tools.skip_if_missing("peg_generator") +with test_tools.imports_under_tool("peg_generator"): from pegen.grammar_parser import GeneratedParser as GrammarParser from pegen.validator import SubRuleValidator, ValidationError from pegen.testutil import parse_string diff --git a/Lib/test/test_peg_generator/test_pegen.py b/Lib/test/test_peg_generator/test_pegen.py index bcfee3f..71b0fdc 100644 --- a/Lib/test/test_peg_generator/test_pegen.py +++ b/Lib/test/test_peg_generator/test_pegen.py @@ -1,3 +1,5 @@ +import ast +import difflib import io import textwrap import unittest @@ -6,14 +8,10 @@ from test import test_tools from typing import Dict, Any from tokenize import TokenInfo, NAME, NEWLINE, NUMBER, OP -test_tools.skip_if_missing('peg_generator') -with test_tools.imports_under_tool('peg_generator'): +test_tools.skip_if_missing("peg_generator") +with test_tools.imports_under_tool("peg_generator"): from pegen.grammar_parser import GeneratedParser as GrammarParser - from pegen.testutil import ( - parse_string, - generate_parser, - make_parser - ) + from pegen.testutil import parse_string, generate_parser, make_parser from pegen.grammar import GrammarVisitor, GrammarError, Grammar from pegen.grammar_visualizer import ASTGrammarPrinter from pegen.parser import Parser @@ -38,7 +36,9 @@ class TestPegen(unittest.TestCase): # Check the str() and repr() of a few rules; AST nodes don't support ==. self.assertEqual(str(rules["start"]), "start: sum NEWLINE") self.assertEqual(str(rules["sum"]), "sum: term '+' term | term") - expected_repr = "Rule('term', None, Rhs([Alt([NamedItem(None, NameLeaf('NUMBER'))])]))" + expected_repr = ( + "Rule('term', None, Rhs([Alt([NamedItem(None, NameLeaf('NUMBER'))])]))" + ) self.assertEqual(repr(rules["term"]), expected_repr) def test_long_rule_str(self) -> None: @@ -71,7 +71,7 @@ class TestPegen(unittest.TestCase): self.assertEqual(str(rules["sum"]), "sum: term '+' term | term") self.assertEqual( repr(rules["term"]), - "Rule('term', 'int', Rhs([Alt([NamedItem(None, NameLeaf('NUMBER'))])]))" + "Rule('term', 'int', Rhs([Alt([NamedItem(None, NameLeaf('NUMBER'))])]))", ) def test_gather(self) -> None: @@ -81,24 +81,31 @@ class TestPegen(unittest.TestCase): """ rules = parse_string(grammar, GrammarParser).rules self.assertEqual(str(rules["start"]), "start: ','.thing+ NEWLINE") - self.assertTrue(repr(rules["start"]).startswith( - "Rule('start', None, Rhs([Alt([NamedItem(None, Gather(StringLeaf(\"','\"), NameLeaf('thing'" - )) + self.assertTrue( + repr(rules["start"]).startswith( + "Rule('start', None, Rhs([Alt([NamedItem(None, Gather(StringLeaf(\"','\"), NameLeaf('thing'" + ) + ) self.assertEqual(str(rules["thing"]), "thing: NUMBER") parser_class = make_parser(grammar) node = parse_string("42\n", parser_class) - assert node == [ - [[TokenInfo(NUMBER, string="42", start=(1, 0), end=(1, 2), line="42\n")]], - TokenInfo(NEWLINE, string="\n", start=(1, 2), end=(1, 3), line="42\n"), - ] node = parse_string("1, 2\n", parser_class) - assert node == [ + self.assertEqual( + node, [ - [TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1, 2\n")], - [TokenInfo(NUMBER, string="2", start=(1, 3), end=(1, 4), line="1, 2\n")], + [ + TokenInfo( + NUMBER, string="1", start=(1, 0), end=(1, 1), line="1, 2\n" + ), + TokenInfo( + NUMBER, string="2", start=(1, 3), end=(1, 4), line="1, 2\n" + ), + ], + TokenInfo( + NEWLINE, string="\n", start=(1, 4), end=(1, 5), line="1, 2\n" + ), ], - TokenInfo(NEWLINE, string="\n", start=(1, 4), end=(1, 5), line="1, 2\n"), - ] + ) def test_expr_grammar(self) -> None: grammar = """ @@ -108,10 +115,13 @@ class TestPegen(unittest.TestCase): """ parser_class = make_parser(grammar) node = parse_string("42\n", parser_class) - self.assertEqual(node, [ - [[TokenInfo(NUMBER, string="42", start=(1, 0), end=(1, 2), line="42\n")]], - TokenInfo(NEWLINE, string="\n", start=(1, 2), end=(1, 3), line="42\n"), - ]) + self.assertEqual( + node, + [ + TokenInfo(NUMBER, string="42", start=(1, 0), end=(1, 2), line="42\n"), + TokenInfo(NEWLINE, string="\n", start=(1, 2), end=(1, 3), line="42\n"), + ], + ) def test_optional_operator(self) -> None: grammar = """ @@ -120,22 +130,39 @@ class TestPegen(unittest.TestCase): term: NUMBER """ parser_class = make_parser(grammar) - node = parse_string("1+2\n", parser_class) - self.assertEqual(node, [ + node = parse_string("1 + 2\n", parser_class) + self.assertEqual( + node, [ - [TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1+2\n")], [ - TokenInfo(OP, string="+", start=(1, 1), end=(1, 2), line="1+2\n"), - [TokenInfo(NUMBER, string="2", start=(1, 2), end=(1, 3), line="1+2\n")], + TokenInfo( + NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 + 2\n" + ), + [ + TokenInfo( + OP, string="+", start=(1, 2), end=(1, 3), line="1 + 2\n" + ), + TokenInfo( + NUMBER, string="2", start=(1, 4), end=(1, 5), line="1 + 2\n" + ), + ], ], + TokenInfo( + NEWLINE, string="\n", start=(1, 5), end=(1, 6), line="1 + 2\n" + ), ], - TokenInfo(NEWLINE, string="\n", start=(1, 3), end=(1, 4), line="1+2\n"), - ]) + ) node = parse_string("1\n", parser_class) - self.assertEqual(node, [ - [[TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1\n")], None], - TokenInfo(NEWLINE, string="\n", start=(1, 1), end=(1, 2), line="1\n"), - ]) + self.assertEqual( + node, + [ + [ + TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1\n"), + None, + ], + TokenInfo(NEWLINE, string="\n", start=(1, 1), end=(1, 2), line="1\n"), + ], + ) def test_optional_literal(self) -> None: grammar = """ @@ -145,18 +172,29 @@ class TestPegen(unittest.TestCase): """ parser_class = make_parser(grammar) node = parse_string("1+\n", parser_class) - self.assertEqual(node, [ + self.assertEqual( + node, [ - [TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1+\n")], - TokenInfo(OP, string="+", start=(1, 1), end=(1, 2), line="1+\n"), + [ + TokenInfo( + NUMBER, string="1", start=(1, 0), end=(1, 1), line="1+\n" + ), + TokenInfo(OP, string="+", start=(1, 1), end=(1, 2), line="1+\n"), + ], + TokenInfo(NEWLINE, string="\n", start=(1, 2), end=(1, 3), line="1+\n"), ], - TokenInfo(NEWLINE, string="\n", start=(1, 2), end=(1, 3), line="1+\n"), - ]) + ) node = parse_string("1\n", parser_class) - self.assertEqual(node, [ - [[TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1\n")], None], - TokenInfo(NEWLINE, string="\n", start=(1, 1), end=(1, 2), line="1\n"), - ]) + self.assertEqual( + node, + [ + [ + TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1\n"), + None, + ], + TokenInfo(NEWLINE, string="\n", start=(1, 1), end=(1, 2), line="1\n"), + ], + ) def test_alt_optional_operator(self) -> None: grammar = """ @@ -166,21 +204,38 @@ class TestPegen(unittest.TestCase): """ parser_class = make_parser(grammar) node = parse_string("1 + 2\n", parser_class) - self.assertEqual(node, [ + self.assertEqual( + node, [ - [TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 + 2\n")], [ - TokenInfo(OP, string="+", start=(1, 2), end=(1, 3), line="1 + 2\n"), - [TokenInfo(NUMBER, string="2", start=(1, 4), end=(1, 5), line="1 + 2\n")], + TokenInfo( + NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 + 2\n" + ), + [ + TokenInfo( + OP, string="+", start=(1, 2), end=(1, 3), line="1 + 2\n" + ), + TokenInfo( + NUMBER, string="2", start=(1, 4), end=(1, 5), line="1 + 2\n" + ), + ], ], + TokenInfo( + NEWLINE, string="\n", start=(1, 5), end=(1, 6), line="1 + 2\n" + ), ], - TokenInfo(NEWLINE, string="\n", start=(1, 5), end=(1, 6), line="1 + 2\n"), - ]) + ) node = parse_string("1\n", parser_class) - self.assertEqual(node, [ - [[TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1\n")], None], - TokenInfo(NEWLINE, string="\n", start=(1, 1), end=(1, 2), line="1\n"), - ]) + self.assertEqual( + node, + [ + [ + TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1\n"), + None, + ], + TokenInfo(NEWLINE, string="\n", start=(1, 1), end=(1, 2), line="1\n"), + ], + ) def test_repeat_0_simple(self) -> None: grammar = """ @@ -189,20 +244,32 @@ class TestPegen(unittest.TestCase): """ parser_class = make_parser(grammar) node = parse_string("1 2 3\n", parser_class) - self.assertEqual(node, [ - [TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 2 3\n")], + self.assertEqual( + node, [ - [[TokenInfo(NUMBER, string="2", start=(1, 2), end=(1, 3), line="1 2 3\n")]], - [[TokenInfo(NUMBER, string="3", start=(1, 4), end=(1, 5), line="1 2 3\n")]], + TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 2 3\n"), + [ + TokenInfo( + NUMBER, string="2", start=(1, 2), end=(1, 3), line="1 2 3\n" + ), + TokenInfo( + NUMBER, string="3", start=(1, 4), end=(1, 5), line="1 2 3\n" + ), + ], + TokenInfo( + NEWLINE, string="\n", start=(1, 5), end=(1, 6), line="1 2 3\n" + ), ], - TokenInfo(NEWLINE, string="\n", start=(1, 5), end=(1, 6), line="1 2 3\n"), - ]) + ) node = parse_string("1\n", parser_class) - self.assertEqual(node, [ - [TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1\n")], - [], - TokenInfo(NEWLINE, string="\n", start=(1, 1), end=(1, 2), line="1\n"), - ]) + self.assertEqual( + node, + [ + TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1\n"), + [], + TokenInfo(NEWLINE, string="\n", start=(1, 1), end=(1, 2), line="1\n"), + ], + ) def test_repeat_0_complex(self) -> None: grammar = """ @@ -211,24 +278,43 @@ class TestPegen(unittest.TestCase): """ parser_class = make_parser(grammar) node = parse_string("1 + 2 + 3\n", parser_class) - self.assertEqual(node, [ - [TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 + 2 + 3\n")], + self.assertEqual( + node, [ + TokenInfo( + NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 + 2 + 3\n" + ), [ [ - TokenInfo(OP, string="+", start=(1, 2), end=(1, 3), line="1 + 2 + 3\n"), - [TokenInfo(NUMBER, string="2", start=(1, 4), end=(1, 5), line="1 + 2 + 3\n")], - ] - ], - [ + TokenInfo( + OP, string="+", start=(1, 2), end=(1, 3), line="1 + 2 + 3\n" + ), + TokenInfo( + NUMBER, + string="2", + start=(1, 4), + end=(1, 5), + line="1 + 2 + 3\n", + ), + ], [ - TokenInfo(OP, string="+", start=(1, 6), end=(1, 7), line="1 + 2 + 3\n"), - [TokenInfo(NUMBER, string="3", start=(1, 8), end=(1, 9), line="1 + 2 + 3\n")], - ] + TokenInfo( + OP, string="+", start=(1, 6), end=(1, 7), line="1 + 2 + 3\n" + ), + TokenInfo( + NUMBER, + string="3", + start=(1, 8), + end=(1, 9), + line="1 + 2 + 3\n", + ), + ], ], + TokenInfo( + NEWLINE, string="\n", start=(1, 9), end=(1, 10), line="1 + 2 + 3\n" + ), ], - TokenInfo(NEWLINE, string="\n", start=(1, 9), end=(1, 10), line="1 + 2 + 3\n"), - ]) + ) def test_repeat_1_simple(self) -> None: grammar = """ @@ -237,14 +323,23 @@ class TestPegen(unittest.TestCase): """ parser_class = make_parser(grammar) node = parse_string("1 2 3\n", parser_class) - self.assertEqual(node, [ - [TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 2 3\n")], + self.assertEqual( + node, [ - [[TokenInfo(NUMBER, string="2", start=(1, 2), end=(1, 3), line="1 2 3\n")]], - [[TokenInfo(NUMBER, string="3", start=(1, 4), end=(1, 5), line="1 2 3\n")]], + TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 2 3\n"), + [ + TokenInfo( + NUMBER, string="2", start=(1, 2), end=(1, 3), line="1 2 3\n" + ), + TokenInfo( + NUMBER, string="3", start=(1, 4), end=(1, 5), line="1 2 3\n" + ), + ], + TokenInfo( + NEWLINE, string="\n", start=(1, 5), end=(1, 6), line="1 2 3\n" + ), ], - TokenInfo(NEWLINE, string="\n", start=(1, 5), end=(1, 6), line="1 2 3\n"), - ]) + ) with self.assertRaises(SyntaxError): parse_string("1\n", parser_class) @@ -255,24 +350,43 @@ class TestPegen(unittest.TestCase): """ parser_class = make_parser(grammar) node = parse_string("1 + 2 + 3\n", parser_class) - self.assertEqual(node, [ - [TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 + 2 + 3\n")], + self.assertEqual( + node, [ + TokenInfo( + NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 + 2 + 3\n" + ), [ [ - TokenInfo(OP, string="+", start=(1, 2), end=(1, 3), line="1 + 2 + 3\n"), - [TokenInfo(NUMBER, string="2", start=(1, 4), end=(1, 5), line="1 + 2 + 3\n")], - ] - ], - [ + TokenInfo( + OP, string="+", start=(1, 2), end=(1, 3), line="1 + 2 + 3\n" + ), + TokenInfo( + NUMBER, + string="2", + start=(1, 4), + end=(1, 5), + line="1 + 2 + 3\n", + ), + ], [ - TokenInfo(OP, string="+", start=(1, 6), end=(1, 7), line="1 + 2 + 3\n"), - [TokenInfo(NUMBER, string="3", start=(1, 8), end=(1, 9), line="1 + 2 + 3\n")], - ] + TokenInfo( + OP, string="+", start=(1, 6), end=(1, 7), line="1 + 2 + 3\n" + ), + TokenInfo( + NUMBER, + string="3", + start=(1, 8), + end=(1, 9), + line="1 + 2 + 3\n", + ), + ], ], + TokenInfo( + NEWLINE, string="\n", start=(1, 9), end=(1, 10), line="1 + 2 + 3\n" + ), ], - TokenInfo(NEWLINE, string="\n", start=(1, 9), end=(1, 10), line="1 + 2 + 3\n"), - ]) + ) with self.assertRaises(SyntaxError): parse_string("1\n", parser_class) @@ -283,14 +397,25 @@ class TestPegen(unittest.TestCase): """ parser_class = make_parser(grammar) node = parse_string("1, 2, 3\n", parser_class) - self.assertEqual(node, [ + self.assertEqual( + node, [ - [TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1, 2, 3\n")], - [TokenInfo(NUMBER, string="2", start=(1, 3), end=(1, 4), line="1, 2, 3\n")], - [TokenInfo(NUMBER, string="3", start=(1, 6), end=(1, 7), line="1, 2, 3\n")], + [ + TokenInfo( + NUMBER, string="1", start=(1, 0), end=(1, 1), line="1, 2, 3\n" + ), + TokenInfo( + NUMBER, string="2", start=(1, 3), end=(1, 4), line="1, 2, 3\n" + ), + TokenInfo( + NUMBER, string="3", start=(1, 6), end=(1, 7), line="1, 2, 3\n" + ), + ], + TokenInfo( + NEWLINE, string="\n", start=(1, 7), end=(1, 8), line="1, 2, 3\n" + ), ], - TokenInfo(NEWLINE, string="\n", start=(1, 7), end=(1, 8), line="1, 2, 3\n"), - ]) + ) def test_left_recursive(self) -> None: grammar_source = """ @@ -311,18 +436,41 @@ class TestPegen(unittest.TestCase): self.assertFalse(rules["bar"].left_recursive) self.assertFalse(rules["baz"].left_recursive) node = parse_string("1 + 2 + 3\n", parser_class) - self.assertEqual(node, [ + self.assertEqual( + node, [ [ - [[TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 + 2 + 3\n")]], - TokenInfo(OP, string="+", start=(1, 2), end=(1, 3), line="1 + 2 + 3\n"), - [TokenInfo(NUMBER, string="2", start=(1, 4), end=(1, 5), line="1 + 2 + 3\n")], + [ + TokenInfo( + NUMBER, + string="1", + start=(1, 0), + end=(1, 1), + line="1 + 2 + 3\n", + ), + TokenInfo( + OP, string="+", start=(1, 2), end=(1, 3), line="1 + 2 + 3\n" + ), + TokenInfo( + NUMBER, + string="2", + start=(1, 4), + end=(1, 5), + line="1 + 2 + 3\n", + ), + ], + TokenInfo( + OP, string="+", start=(1, 6), end=(1, 7), line="1 + 2 + 3\n" + ), + TokenInfo( + NUMBER, string="3", start=(1, 8), end=(1, 9), line="1 + 2 + 3\n" + ), ], - TokenInfo(OP, string="+", start=(1, 6), end=(1, 7), line="1 + 2 + 3\n"), - [TokenInfo(NUMBER, string="3", start=(1, 8), end=(1, 9), line="1 + 2 + 3\n")], + TokenInfo( + NEWLINE, string="\n", start=(1, 9), end=(1, 10), line="1 + 2 + 3\n" + ), ], - TokenInfo(NEWLINE, string="\n", start=(1, 9), end=(1, 10), line="1 + 2 + 3\n"), - ]) + ) def test_python_expr(self) -> None: grammar = """ @@ -392,31 +540,79 @@ class TestPegen(unittest.TestCase): exec(out.getvalue(), ns) parser_class: Type[Parser] = ns["GeneratedParser"] node = parse_string("D A C A E", parser_class) - self.assertEqual(node, [ + + self.assertEqual( + node, [ [ [ - [TokenInfo(type=NAME, string="D", start=(1, 0), end=(1, 1), line="D A C A E")], - TokenInfo(type=NAME, string="A", start=(1, 2), end=(1, 3), line="D A C A E"), + [ + TokenInfo( + type=NAME, + string="D", + start=(1, 0), + end=(1, 1), + line="D A C A E", + ), + TokenInfo( + type=NAME, + string="A", + start=(1, 2), + end=(1, 3), + line="D A C A E", + ), + ], + TokenInfo( + type=NAME, + string="C", + start=(1, 4), + end=(1, 5), + line="D A C A E", + ), ], - TokenInfo(type=NAME, string="C", start=(1, 4), end=(1, 5), line="D A C A E"), + TokenInfo( + type=NAME, + string="A", + start=(1, 6), + end=(1, 7), + line="D A C A E", + ), ], - TokenInfo(type=NAME, string="A", start=(1, 6), end=(1, 7), line="D A C A E"), + TokenInfo( + type=NAME, string="E", start=(1, 8), end=(1, 9), line="D A C A E" + ), ], - TokenInfo(type=NAME, string="E", start=(1, 8), end=(1, 9), line="D A C A E"), - ]) + ) node = parse_string("B C A E", parser_class) - self.assertIsNotNone(node) - self.assertEqual(node, [ + self.assertEqual( + node, [ [ - [TokenInfo(type=NAME, string="B", start=(1, 0), end=(1, 1), line="B C A E")], - TokenInfo(type=NAME, string="C", start=(1, 2), end=(1, 3), line="B C A E"), + [ + TokenInfo( + type=NAME, + string="B", + start=(1, 0), + end=(1, 1), + line="B C A E", + ), + TokenInfo( + type=NAME, + string="C", + start=(1, 2), + end=(1, 3), + line="B C A E", + ), + ], + TokenInfo( + type=NAME, string="A", start=(1, 4), end=(1, 5), line="B C A E" + ), ], - TokenInfo(type=NAME, string="A", start=(1, 4), end=(1, 5), line="B C A E"), + TokenInfo( + type=NAME, string="E", start=(1, 6), end=(1, 7), line="B C A E" + ), ], - TokenInfo(type=NAME, string="E", start=(1, 6), end=(1, 7), line="B C A E"), - ]) + ) def test_nasty_mutually_left_recursive(self) -> None: # This grammar does not recognize 'x - + =', much to my chagrin. @@ -454,43 +650,44 @@ class TestPegen(unittest.TestCase): """ parser_class = make_parser(grammar) node = parse_string("foo = 12 + 12 .", parser_class) - self.assertEqual(node, [ + self.assertEqual( + node, [ + TokenInfo( + NAME, string="foo", start=(1, 0), end=(1, 3), line="foo = 12 + 12 ." + ), + TokenInfo( + OP, string="=", start=(1, 4), end=(1, 5), line="foo = 12 + 12 ." + ), [ - [TokenInfo(NAME, string="foo", start=(1, 0), end=(1, 3), line="foo = 12 + 12 .")], - TokenInfo(OP, string="=", start=(1, 4), end=(1, 5), line="foo = 12 + 12 ."), + TokenInfo( + NUMBER, + string="12", + start=(1, 6), + end=(1, 8), + line="foo = 12 + 12 .", + ), [ [ TokenInfo( - NUMBER, string="12", start=(1, 6), end=(1, 8), line="foo = 12 + 12 ." - ) - ], - [ - [ - [ - TokenInfo( - OP, - string="+", - start=(1, 9), - end=(1, 10), - line="foo = 12 + 12 .", - ), - [ - TokenInfo( - NUMBER, - string="12", - start=(1, 11), - end=(1, 13), - line="foo = 12 + 12 .", - ) - ], - ] - ] - ], + OP, + string="+", + start=(1, 9), + end=(1, 10), + line="foo = 12 + 12 .", + ), + TokenInfo( + NUMBER, + string="12", + start=(1, 11), + end=(1, 13), + line="foo = 12 + 12 .", + ), + ] ], - ] - ] - ]) + ], + ], + ) def test_named_lookahead_error(self) -> None: grammar = """ @@ -533,11 +730,14 @@ class TestPegen(unittest.TestCase): """ parser_class = make_parser(grammar) node = parse_string("(1)", parser_class) - self.assertEqual(node, [ - TokenInfo(OP, string="(", start=(1, 0), end=(1, 1), line="(1)"), - [TokenInfo(NUMBER, string="1", start=(1, 1), end=(1, 2), line="(1)")], - TokenInfo(OP, string=")", start=(1, 2), end=(1, 3), line="(1)"), - ]) + self.assertEqual( + node, + [ + TokenInfo(OP, string="(", start=(1, 0), end=(1, 1), line="(1)"), + TokenInfo(NUMBER, string="1", start=(1, 1), end=(1, 2), line="(1)"), + TokenInfo(OP, string=")", start=(1, 2), end=(1, 3), line="(1)"), + ], + ) def test_dangling_reference(self) -> None: grammar = """ @@ -589,6 +789,124 @@ class TestPegen(unittest.TestCase): with self.assertRaisesRegex(GrammarError, "cannot start with underscore: '_x'"): parser_class = make_parser(grammar) + def test_soft_keyword(self) -> None: + grammar = """ + start: + | "number" n=NUMBER { eval(n.string) } + | "string" n=STRING { n.string } + | SOFT_KEYWORD l=NAME n=(NUMBER | NAME | STRING) { f"{l.string} = {n.string}"} + """ + parser_class = make_parser(grammar) + self.assertEqual(parse_string("number 1", parser_class, verbose=True), 1) + self.assertEqual(parse_string("string 'b'", parser_class, verbose=True), "'b'") + self.assertEqual( + parse_string("number test 1", parser_class, verbose=True), "test = 1" + ) + assert ( + parse_string("string test 'b'", parser_class, verbose=True) == "test = 'b'" + ) + with self.assertRaises(SyntaxError): + parse_string("test 1", parser_class, verbose=True) + + def test_forced(self) -> None: + grammar = """ + start: NAME &&':' | NAME + """ + parser_class = make_parser(grammar) + self.assertTrue(parse_string("number :", parser_class, verbose=True)) + with self.assertRaises(SyntaxError) as e: + parse_string("a", parser_class, verbose=True) + + self.assertIn("expected ':'", str(e.exception)) + + def test_forced_with_group(self) -> None: + grammar = """ + start: NAME &&(':' | ';') | NAME + """ + parser_class = make_parser(grammar) + self.assertTrue(parse_string("number :", parser_class, verbose=True)) + self.assertTrue(parse_string("number ;", parser_class, verbose=True)) + with self.assertRaises(SyntaxError) as e: + parse_string("a", parser_class, verbose=True) + self.assertIn("expected (':' | ';')", e.exception.args[0]) + + def test_unreachable_explicit(self) -> None: + source = """ + start: NAME { UNREACHABLE } + """ + grammar = parse_string(source, GrammarParser) + out = io.StringIO() + genr = PythonParserGenerator( + grammar, out, unreachable_formatting="This is a test" + ) + genr.generate("<string>") + self.assertIn("This is a test", out.getvalue()) + + def test_unreachable_implicit1(self) -> None: + source = """ + start: NAME | invalid_input + invalid_input: NUMBER { None } + """ + grammar = parse_string(source, GrammarParser) + out = io.StringIO() + genr = PythonParserGenerator( + grammar, out, unreachable_formatting="This is a test" + ) + genr.generate("<string>") + self.assertIn("This is a test", out.getvalue()) + + def test_unreachable_implicit2(self) -> None: + source = """ + start: NAME | '(' invalid_input ')' + invalid_input: NUMBER { None } + """ + grammar = parse_string(source, GrammarParser) + out = io.StringIO() + genr = PythonParserGenerator( + grammar, out, unreachable_formatting="This is a test" + ) + genr.generate("<string>") + self.assertIn("This is a test", out.getvalue()) + + def test_unreachable_implicit3(self) -> None: + source = """ + start: NAME | invalid_input { None } + invalid_input: NUMBER + """ + grammar = parse_string(source, GrammarParser) + out = io.StringIO() + genr = PythonParserGenerator( + grammar, out, unreachable_formatting="This is a test" + ) + genr.generate("<string>") + self.assertNotIn("This is a test", out.getvalue()) + + def test_locations_in_alt_action_and_group(self) -> None: + grammar = """ + start: t=term NEWLINE? $ { ast.Expression(t, LOCATIONS) } + term: + | l=term '*' r=factor { ast.BinOp(l, ast.Mult(), r, LOCATIONS) } + | l=term '/' r=factor { ast.BinOp(l, ast.Div(), r, LOCATIONS) } + | factor + factor: + | ( + n=NAME { ast.Name(id=n.string, ctx=ast.Load(), LOCATIONS) } | + n=NUMBER { ast.Constant(value=ast.literal_eval(n.string), LOCATIONS) } + ) + """ + parser_class = make_parser(grammar) + source = "2*3\n" + o = ast.dump(parse_string(source, parser_class).body, include_attributes=True) + p = ast.dump(ast.parse(source).body[0].value, include_attributes=True).replace( + " kind=None,", "" + ) + diff = "\n".join( + difflib.unified_diff( + o.split("\n"), p.split("\n"), "cpython", "python-pegen" + ) + ) + self.assertFalse(diff) + class TestGrammarVisitor: class Visitor(GrammarVisitor): diff --git a/Parser/parser.c b/Parser/parser.c index 543827a..01082fa 100644 --- a/Parser/parser.c +++ b/Parser/parser.c @@ -1176,7 +1176,7 @@ statements_rule(Parser *p) ) { D(fprintf(stderr, "%*c+ statements[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "statement+")); - _res = ( asdl_stmt_seq * ) _PyPegen_seq_flatten ( p , a ); + _res = ( asdl_stmt_seq* ) _PyPegen_seq_flatten ( p , a ); if (_res == NULL && PyErr_Occurred()) { p->error_indicator = 1; D(p->level--); @@ -1217,7 +1217,7 @@ statement_rule(Parser *p) ) { D(fprintf(stderr, "%*c+ statement[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "compound_stmt")); - _res = ( asdl_stmt_seq * ) _PyPegen_singleton_seq ( p , a ); + _res = ( asdl_stmt_seq* ) _PyPegen_singleton_seq ( p , a ); if (_res == NULL && PyErr_Occurred()) { p->error_indicator = 1; D(p->level--); @@ -1294,7 +1294,7 @@ statement_newline_rule(Parser *p) ) { D(fprintf(stderr, "%*c+ statement_newline[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "compound_stmt NEWLINE")); - _res = ( asdl_stmt_seq * ) _PyPegen_singleton_seq ( p , a ); + _res = ( asdl_stmt_seq* ) _PyPegen_singleton_seq ( p , a ); if (_res == NULL && PyErr_Occurred()) { p->error_indicator = 1; D(p->level--); @@ -1346,7 +1346,7 @@ statement_newline_rule(Parser *p) UNUSED(_end_lineno); // Only used by EXTRA macro int _end_col_offset = _token->end_col_offset; UNUSED(_end_col_offset); // Only used by EXTRA macro - _res = ( asdl_stmt_seq * ) _PyPegen_singleton_seq ( p , CHECK ( stmt_ty , _PyAST_Pass ( EXTRA ) ) ); + _res = ( asdl_stmt_seq* ) _PyPegen_singleton_seq ( p , CHECK ( stmt_ty , _PyAST_Pass ( EXTRA ) ) ); if (_res == NULL && PyErr_Occurred()) { p->error_indicator = 1; D(p->level--); @@ -1416,7 +1416,7 @@ simple_stmts_rule(Parser *p) ) { D(fprintf(stderr, "%*c+ simple_stmts[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "simple_stmt !';' NEWLINE")); - _res = ( asdl_stmt_seq * ) _PyPegen_singleton_seq ( p , a ); + _res = ( asdl_stmt_seq* ) _PyPegen_singleton_seq ( p , a ); if (_res == NULL && PyErr_Occurred()) { p->error_indicator = 1; D(p->level--); @@ -2403,7 +2403,7 @@ augassign_rule(Parser *p) ) { D(fprintf(stderr, "%*c+ augassign[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'@='")); - _res = CHECK_VERSION ( AugOperator * , 5 , "The '@' operator is" , _PyPegen_augoperator ( p , MatMult ) ); + _res = CHECK_VERSION ( AugOperator* , 5 , "The '@' operator is" , _PyPegen_augoperator ( p , MatMult ) ); if (_res == NULL && PyErr_Occurred()) { p->error_indicator = 1; D(p->level--); @@ -2841,7 +2841,7 @@ global_stmt_rule(Parser *p) UNUSED(_end_lineno); // Only used by EXTRA macro int _end_col_offset = _token->end_col_offset; UNUSED(_end_col_offset); // Only used by EXTRA macro - _res = _PyAST_Global ( CHECK ( asdl_identifier_seq * , _PyPegen_map_names_to_ids ( p , a ) ) , EXTRA ); + _res = _PyAST_Global ( CHECK ( asdl_identifier_seq* , _PyPegen_map_names_to_ids ( p , a ) ) , EXTRA ); if (_res == NULL && PyErr_Occurred()) { p->error_indicator = 1; D(p->level--); @@ -2903,7 +2903,7 @@ nonlocal_stmt_rule(Parser *p) UNUSED(_end_lineno); // Only used by EXTRA macro int _end_col_offset = _token->end_col_offset; UNUSED(_end_col_offset); // Only used by EXTRA macro - _res = _PyAST_Nonlocal ( CHECK ( asdl_identifier_seq * , _PyPegen_map_names_to_ids ( p , a ) ) , EXTRA ); + _res = _PyAST_Nonlocal ( CHECK ( asdl_identifier_seq* , _PyPegen_map_names_to_ids ( p , a ) ) , EXTRA ); if (_res == NULL && PyErr_Occurred()) { p->error_indicator = 1; D(p->level--); @@ -3460,7 +3460,7 @@ import_from_targets_rule(Parser *p) UNUSED(_end_lineno); // Only used by EXTRA macro int _end_col_offset = _token->end_col_offset; UNUSED(_end_col_offset); // Only used by EXTRA macro - _res = ( asdl_alias_seq * ) _PyPegen_singleton_seq ( p , CHECK ( alias_ty , _PyPegen_alias_for_star ( p , EXTRA ) ) ); + _res = ( asdl_alias_seq* ) _PyPegen_singleton_seq ( p , CHECK ( alias_ty , _PyPegen_alias_for_star ( p , EXTRA ) ) ); if (_res == NULL && PyErr_Occurred()) { p->error_indicator = 1; D(p->level--); @@ -4649,7 +4649,7 @@ slash_with_default_rule(Parser *p) ) { D(fprintf(stderr, "%*c+ slash_with_default[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "param_no_default* param_with_default+ '/' ','")); - _res = _PyPegen_slash_with_default ( p , ( asdl_arg_seq * ) a , b ); + _res = _PyPegen_slash_with_default ( p , ( asdl_arg_seq* ) a , b ); if (_res == NULL && PyErr_Occurred()) { p->error_indicator = 1; D(p->level--); @@ -4681,7 +4681,7 @@ slash_with_default_rule(Parser *p) ) { D(fprintf(stderr, "%*c+ slash_with_default[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "param_no_default* param_with_default+ '/' &')'")); - _res = _PyPegen_slash_with_default ( p , ( asdl_arg_seq * ) a , b ); + _res = _PyPegen_slash_with_default ( p , ( asdl_arg_seq* ) a , b ); if (_res == NULL && PyErr_Occurred()) { p->error_indicator = 1; D(p->level--); @@ -5340,7 +5340,7 @@ if_stmt_rule(Parser *p) UNUSED(_end_lineno); // Only used by EXTRA macro int _end_col_offset = _token->end_col_offset; UNUSED(_end_col_offset); // Only used by EXTRA macro - _res = _PyAST_If ( a , b , CHECK ( asdl_stmt_seq * , _PyPegen_singleton_seq ( p , c ) ) , EXTRA ); + _res = _PyAST_If ( a , b , CHECK ( asdl_stmt_seq* , _PyPegen_singleton_seq ( p , c ) ) , EXTRA ); if (_res == NULL && PyErr_Occurred()) { p->error_indicator = 1; D(p->level--); @@ -5478,7 +5478,7 @@ elif_stmt_rule(Parser *p) UNUSED(_end_lineno); // Only used by EXTRA macro int _end_col_offset = _token->end_col_offset; UNUSED(_end_col_offset); // Only used by EXTRA macro - _res = _PyAST_If ( a , b , CHECK ( asdl_stmt_seq * , _PyPegen_singleton_seq ( p , c ) ) , EXTRA ); + _res = _PyAST_If ( a , b , CHECK ( asdl_stmt_seq* , _PyPegen_singleton_seq ( p , c ) ) , EXTRA ); if (_res == NULL && PyErr_Occurred()) { p->error_indicator = 1; D(p->level--); @@ -6756,7 +6756,7 @@ subject_expr_rule(Parser *p) UNUSED(_end_lineno); // Only used by EXTRA macro int _end_col_offset = _token->end_col_offset; UNUSED(_end_col_offset); // Only used by EXTRA macro - _res = _PyAST_Tuple ( CHECK ( asdl_expr_seq * , _PyPegen_seq_insert_in_front ( p , value , values ) ) , Load , EXTRA ); + _res = _PyAST_Tuple ( CHECK ( asdl_expr_seq* , _PyPegen_seq_insert_in_front ( p , value , values ) ) , Load , EXTRA ); if (_res == NULL && PyErr_Occurred()) { p->error_indicator = 1; D(p->level--); @@ -9049,7 +9049,7 @@ mapping_pattern_rule(Parser *p) UNUSED(_end_lineno); // Only used by EXTRA macro int _end_col_offset = _token->end_col_offset; UNUSED(_end_col_offset); // Only used by EXTRA macro - _res = _PyAST_MatchMapping ( CHECK ( asdl_expr_seq * , _PyPegen_get_pattern_keys ( p , items ) ) , CHECK ( asdl_pattern_seq * , _PyPegen_get_patterns ( p , items ) ) , rest -> v . Name . id , EXTRA ); + _res = _PyAST_MatchMapping ( CHECK ( asdl_expr_seq* , _PyPegen_get_pattern_keys ( p , items ) ) , CHECK ( asdl_pattern_seq* , _PyPegen_get_patterns ( p , items ) ) , rest -> v . Name . id , EXTRA ); if (_res == NULL && PyErr_Occurred()) { p->error_indicator = 1; D(p->level--); @@ -9092,7 +9092,7 @@ mapping_pattern_rule(Parser *p) UNUSED(_end_lineno); // Only used by EXTRA macro int _end_col_offset = _token->end_col_offset; UNUSED(_end_col_offset); // Only used by EXTRA macro - _res = _PyAST_MatchMapping ( CHECK ( asdl_expr_seq * , _PyPegen_get_pattern_keys ( p , items ) ) , CHECK ( asdl_pattern_seq * , _PyPegen_get_patterns ( p , items ) ) , NULL , EXTRA ); + _res = _PyAST_MatchMapping ( CHECK ( asdl_expr_seq* , _PyPegen_get_pattern_keys ( p , items ) ) , CHECK ( asdl_pattern_seq* , _PyPegen_get_patterns ( p , items ) ) , NULL , EXTRA ); if (_res == NULL && PyErr_Occurred()) { p->error_indicator = 1; D(p->level--); @@ -9381,7 +9381,7 @@ class_pattern_rule(Parser *p) UNUSED(_end_lineno); // Only used by EXTRA macro int _end_col_offset = _token->end_col_offset; UNUSED(_end_col_offset); // Only used by EXTRA macro - _res = _PyAST_MatchClass ( cls , NULL , CHECK ( asdl_identifier_seq * , _PyPegen_map_names_to_ids ( p , CHECK ( asdl_expr_seq * , _PyPegen_get_pattern_keys ( p , keywords ) ) ) ) , CHECK ( asdl_pattern_seq * , _PyPegen_get_patterns ( p , keywords ) ) , EXTRA ); + _res = _PyAST_MatchClass ( cls , NULL , CHECK ( asdl_identifier_seq* , _PyPegen_map_names_to_ids ( p , CHECK ( asdl_expr_seq* , _PyPegen_get_pattern_keys ( p , keywords ) ) ) ) , CHECK ( asdl_pattern_seq* , _PyPegen_get_patterns ( p , keywords ) ) , EXTRA ); if (_res == NULL && PyErr_Occurred()) { p->error_indicator = 1; D(p->level--); @@ -9433,7 +9433,7 @@ class_pattern_rule(Parser *p) UNUSED(_end_lineno); // Only used by EXTRA macro int _end_col_offset = _token->end_col_offset; UNUSED(_end_col_offset); // Only used by EXTRA macro - _res = _PyAST_MatchClass ( cls , patterns , CHECK ( asdl_identifier_seq * , _PyPegen_map_names_to_ids ( p , CHECK ( asdl_expr_seq * , _PyPegen_get_pattern_keys ( p , keywords ) ) ) ) , CHECK ( asdl_pattern_seq * , _PyPegen_get_patterns ( p , keywords ) ) , EXTRA ); + _res = _PyAST_MatchClass ( cls , patterns , CHECK ( asdl_identifier_seq* , _PyPegen_map_names_to_ids ( p , CHECK ( asdl_expr_seq* , _PyPegen_get_pattern_keys ( p , keywords ) ) ) ) , CHECK ( asdl_pattern_seq* , _PyPegen_get_patterns ( p , keywords ) ) , EXTRA ); if (_res == NULL && PyErr_Occurred()) { p->error_indicator = 1; D(p->level--); @@ -9642,7 +9642,7 @@ expressions_rule(Parser *p) UNUSED(_end_lineno); // Only used by EXTRA macro int _end_col_offset = _token->end_col_offset; UNUSED(_end_col_offset); // Only used by EXTRA macro - _res = _PyAST_Tuple ( CHECK ( asdl_expr_seq * , _PyPegen_seq_insert_in_front ( p , a , b ) ) , Load , EXTRA ); + _res = _PyAST_Tuple ( CHECK ( asdl_expr_seq* , _PyPegen_seq_insert_in_front ( p , a , b ) ) , Load , EXTRA ); if (_res == NULL && PyErr_Occurred()) { p->error_indicator = 1; D(p->level--); @@ -9678,7 +9678,7 @@ expressions_rule(Parser *p) UNUSED(_end_lineno); // Only used by EXTRA macro int _end_col_offset = _token->end_col_offset; UNUSED(_end_col_offset); // Only used by EXTRA macro - _res = _PyAST_Tuple ( CHECK ( asdl_expr_seq * , _PyPegen_singleton_seq ( p , a ) ) , Load , EXTRA ); + _res = _PyAST_Tuple ( CHECK ( asdl_expr_seq* , _PyPegen_singleton_seq ( p , a ) ) , Load , EXTRA ); if (_res == NULL && PyErr_Occurred()) { p->error_indicator = 1; D(p->level--); @@ -10004,7 +10004,7 @@ star_expressions_rule(Parser *p) UNUSED(_end_lineno); // Only used by EXTRA macro int _end_col_offset = _token->end_col_offset; UNUSED(_end_col_offset); // Only used by EXTRA macro - _res = _PyAST_Tuple ( CHECK ( asdl_expr_seq * , _PyPegen_seq_insert_in_front ( p , a , b ) ) , Load , EXTRA ); + _res = _PyAST_Tuple ( CHECK ( asdl_expr_seq* , _PyPegen_seq_insert_in_front ( p , a , b ) ) , Load , EXTRA ); if (_res == NULL && PyErr_Occurred()) { p->error_indicator = 1; D(p->level--); @@ -10040,7 +10040,7 @@ star_expressions_rule(Parser *p) UNUSED(_end_lineno); // Only used by EXTRA macro int _end_col_offset = _token->end_col_offset; UNUSED(_end_col_offset); // Only used by EXTRA macro - _res = _PyAST_Tuple ( CHECK ( asdl_expr_seq * , _PyPegen_singleton_seq ( p , a ) ) , Load , EXTRA ); + _res = _PyAST_Tuple ( CHECK ( asdl_expr_seq* , _PyPegen_singleton_seq ( p , a ) ) , Load , EXTRA ); if (_res == NULL && PyErr_Occurred()) { p->error_indicator = 1; D(p->level--); @@ -10485,7 +10485,7 @@ disjunction_rule(Parser *p) UNUSED(_end_lineno); // Only used by EXTRA macro int _end_col_offset = _token->end_col_offset; UNUSED(_end_col_offset); // Only used by EXTRA macro - _res = _PyAST_BoolOp ( Or , CHECK ( asdl_expr_seq * , _PyPegen_seq_insert_in_front ( p , a , b ) ) , EXTRA ); + _res = _PyAST_BoolOp ( Or , CHECK ( asdl_expr_seq* , _PyPegen_seq_insert_in_front ( p , a , b ) ) , EXTRA ); if (_res == NULL && PyErr_Occurred()) { p->error_indicator = 1; D(p->level--); @@ -10571,7 +10571,7 @@ conjunction_rule(Parser *p) UNUSED(_end_lineno); // Only used by EXTRA macro int _end_col_offset = _token->end_col_offset; UNUSED(_end_col_offset); // Only used by EXTRA macro - _res = _PyAST_BoolOp ( And , CHECK ( asdl_expr_seq * , _PyPegen_seq_insert_in_front ( p , a , b ) ) , EXTRA ); + _res = _PyAST_BoolOp ( And , CHECK ( asdl_expr_seq* , _PyPegen_seq_insert_in_front ( p , a , b ) ) , EXTRA ); if (_res == NULL && PyErr_Occurred()) { p->error_indicator = 1; D(p->level--); @@ -10739,7 +10739,7 @@ comparison_rule(Parser *p) UNUSED(_end_lineno); // Only used by EXTRA macro int _end_col_offset = _token->end_col_offset; UNUSED(_end_col_offset); // Only used by EXTRA macro - _res = _PyAST_Compare ( a , CHECK ( asdl_int_seq * , _PyPegen_get_cmpops ( p , b ) ) , CHECK ( asdl_expr_seq * , _PyPegen_get_exprs ( p , b ) ) , EXTRA ); + _res = _PyAST_Compare ( a , CHECK ( asdl_int_seq* , _PyPegen_get_cmpops ( p , b ) ) , CHECK ( asdl_expr_seq* , _PyPegen_get_exprs ( p , b ) ) , EXTRA ); if (_res == NULL && PyErr_Occurred()) { p->error_indicator = 1; D(p->level--); @@ -12837,7 +12837,7 @@ primary_raw(Parser *p) UNUSED(_end_lineno); // Only used by EXTRA macro int _end_col_offset = _token->end_col_offset; UNUSED(_end_col_offset); // Only used by EXTRA macro - _res = _PyAST_Call ( a , CHECK ( asdl_expr_seq * , ( asdl_expr_seq * ) _PyPegen_singleton_seq ( p , b ) ) , NULL , EXTRA ); + _res = _PyAST_Call ( a , CHECK ( asdl_expr_seq* , ( asdl_expr_seq* ) _PyPegen_singleton_seq ( p , b ) ) , NULL , EXTRA ); if (_res == NULL && PyErr_Occurred()) { p->error_indicator = 1; D(p->level--); @@ -13896,7 +13896,7 @@ lambda_slash_with_default_rule(Parser *p) ) { D(fprintf(stderr, "%*c+ lambda_slash_with_default[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "lambda_param_no_default* lambda_param_with_default+ '/' ','")); - _res = _PyPegen_slash_with_default ( p , ( asdl_arg_seq * ) a , b ); + _res = _PyPegen_slash_with_default ( p , ( asdl_arg_seq* ) a , b ); if (_res == NULL && PyErr_Occurred()) { p->error_indicator = 1; D(p->level--); @@ -13928,7 +13928,7 @@ lambda_slash_with_default_rule(Parser *p) ) { D(fprintf(stderr, "%*c+ lambda_slash_with_default[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "lambda_param_no_default* lambda_param_with_default+ '/' &':'")); - _res = _PyPegen_slash_with_default ( p , ( asdl_arg_seq * ) a , b ); + _res = _PyPegen_slash_with_default ( p , ( asdl_arg_seq* ) a , b ); if (_res == NULL && PyErr_Occurred()) { p->error_indicator = 1; D(p->level--); @@ -14689,7 +14689,7 @@ dict_rule(Parser *p) UNUSED(_end_lineno); // Only used by EXTRA macro int _end_col_offset = _token->end_col_offset; UNUSED(_end_col_offset); // Only used by EXTRA macro - _res = _PyAST_Dict ( CHECK ( asdl_expr_seq * , _PyPegen_get_keys ( p , a ) ) , CHECK ( asdl_expr_seq * , _PyPegen_get_values ( p , a ) ) , EXTRA ); + _res = _PyAST_Dict ( CHECK ( asdl_expr_seq* , _PyPegen_get_keys ( p , a ) ) , CHECK ( asdl_expr_seq* , _PyPegen_get_values ( p , a ) ) , EXTRA ); if (_res == NULL && PyErr_Occurred()) { p->error_indicator = 1; D(p->level--); @@ -15556,7 +15556,7 @@ args_rule(Parser *p) UNUSED(_end_lineno); // Only used by EXTRA macro int _end_col_offset = _token->end_col_offset; UNUSED(_end_col_offset); // Only used by EXTRA macro - _res = _PyAST_Call ( _PyPegen_dummy_name ( p ) , CHECK_NULL_ALLOWED ( asdl_expr_seq * , _PyPegen_seq_extract_starred_exprs ( p , a ) ) , CHECK_NULL_ALLOWED ( asdl_keyword_seq * , _PyPegen_seq_delete_starred_exprs ( p , a ) ) , EXTRA ); + _res = _PyAST_Call ( _PyPegen_dummy_name ( p ) , CHECK_NULL_ALLOWED ( asdl_expr_seq* , _PyPegen_seq_extract_starred_exprs ( p , a ) ) , CHECK_NULL_ALLOWED ( asdl_keyword_seq* , _PyPegen_seq_delete_starred_exprs ( p , a ) ) , EXTRA ); if (_res == NULL && PyErr_Occurred()) { p->error_indicator = 1; D(p->level--); @@ -16026,7 +16026,7 @@ star_targets_rule(Parser *p) UNUSED(_end_lineno); // Only used by EXTRA macro int _end_col_offset = _token->end_col_offset; UNUSED(_end_col_offset); // Only used by EXTRA macro - _res = _PyAST_Tuple ( CHECK ( asdl_expr_seq * , _PyPegen_seq_insert_in_front ( p , a , b ) ) , Store , EXTRA ); + _res = _PyAST_Tuple ( CHECK ( asdl_expr_seq* , _PyPegen_seq_insert_in_front ( p , a , b ) ) , Store , EXTRA ); if (_res == NULL && PyErr_Occurred()) { p->error_indicator = 1; D(p->level--); @@ -16119,7 +16119,7 @@ star_targets_tuple_seq_rule(Parser *p) ) { D(fprintf(stderr, "%*c+ star_targets_tuple_seq[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "star_target ((',' star_target))+ ','?")); - _res = ( asdl_expr_seq * ) _PyPegen_seq_insert_in_front ( p , a , b ); + _res = ( asdl_expr_seq* ) _PyPegen_seq_insert_in_front ( p , a , b ); if (_res == NULL && PyErr_Occurred()) { p->error_indicator = 1; D(p->level--); @@ -16146,7 +16146,7 @@ star_targets_tuple_seq_rule(Parser *p) ) { D(fprintf(stderr, "%*c+ star_targets_tuple_seq[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "star_target ','")); - _res = ( asdl_expr_seq * ) _PyPegen_singleton_seq ( p , a ); + _res = ( asdl_expr_seq* ) _PyPegen_singleton_seq ( p , a ); if (_res == NULL && PyErr_Occurred()) { p->error_indicator = 1; D(p->level--); @@ -16923,7 +16923,7 @@ t_primary_raw(Parser *p) UNUSED(_end_lineno); // Only used by EXTRA macro int _end_col_offset = _token->end_col_offset; UNUSED(_end_col_offset); // Only used by EXTRA macro - _res = _PyAST_Call ( a , CHECK ( asdl_expr_seq * , ( asdl_expr_seq * ) _PyPegen_singleton_seq ( p , b ) ) , NULL , EXTRA ); + _res = _PyAST_Call ( a , CHECK ( asdl_expr_seq* , ( asdl_expr_seq* ) _PyPegen_singleton_seq ( p , b ) ) , NULL , EXTRA ); if (_res == NULL && PyErr_Occurred()) { p->error_indicator = 1; D(p->level--); @@ -17474,7 +17474,7 @@ type_expressions_rule(Parser *p) ) { D(fprintf(stderr, "%*c+ type_expressions[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "','.expression+ ',' '*' expression ',' '**' expression")); - _res = ( asdl_expr_seq * ) _PyPegen_seq_append_to_end ( p , CHECK ( asdl_seq * , _PyPegen_seq_append_to_end ( p , a , b ) ) , c ); + _res = ( asdl_expr_seq* ) _PyPegen_seq_append_to_end ( p , CHECK ( asdl_seq* , _PyPegen_seq_append_to_end ( p , a , b ) ) , c ); if (_res == NULL && PyErr_Occurred()) { p->error_indicator = 1; D(p->level--); @@ -17507,7 +17507,7 @@ type_expressions_rule(Parser *p) ) { D(fprintf(stderr, "%*c+ type_expressions[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "','.expression+ ',' '*' expression")); - _res = ( asdl_expr_seq * ) _PyPegen_seq_append_to_end ( p , a , b ); + _res = ( asdl_expr_seq* ) _PyPegen_seq_append_to_end ( p , a , b ); if (_res == NULL && PyErr_Occurred()) { p->error_indicator = 1; D(p->level--); @@ -17540,7 +17540,7 @@ type_expressions_rule(Parser *p) ) { D(fprintf(stderr, "%*c+ type_expressions[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "','.expression+ ',' '**' expression")); - _res = ( asdl_expr_seq * ) _PyPegen_seq_append_to_end ( p , a , b ); + _res = ( asdl_expr_seq* ) _PyPegen_seq_append_to_end ( p , a , b ); if (_res == NULL && PyErr_Occurred()) { p->error_indicator = 1; D(p->level--); @@ -17576,7 +17576,7 @@ type_expressions_rule(Parser *p) ) { D(fprintf(stderr, "%*c+ type_expressions[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'*' expression ',' '**' expression")); - _res = ( asdl_expr_seq * ) _PyPegen_seq_append_to_end ( p , CHECK ( asdl_seq * , _PyPegen_singleton_seq ( p , a ) ) , b ); + _res = ( asdl_expr_seq* ) _PyPegen_seq_append_to_end ( p , CHECK ( asdl_seq* , _PyPegen_singleton_seq ( p , a ) ) , b ); if (_res == NULL && PyErr_Occurred()) { p->error_indicator = 1; D(p->level--); @@ -17603,7 +17603,7 @@ type_expressions_rule(Parser *p) ) { D(fprintf(stderr, "%*c+ type_expressions[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'*' expression")); - _res = ( asdl_expr_seq * ) _PyPegen_singleton_seq ( p , a ); + _res = ( asdl_expr_seq* ) _PyPegen_singleton_seq ( p , a ); if (_res == NULL && PyErr_Occurred()) { p->error_indicator = 1; D(p->level--); @@ -17630,7 +17630,7 @@ type_expressions_rule(Parser *p) ) { D(fprintf(stderr, "%*c+ type_expressions[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'**' expression")); - _res = ( asdl_expr_seq * ) _PyPegen_singleton_seq ( p , a ); + _res = ( asdl_expr_seq* ) _PyPegen_singleton_seq ( p , a ); if (_res == NULL && PyErr_Occurred()) { p->error_indicator = 1; D(p->level--); @@ -20149,7 +20149,7 @@ invalid_match_stmt_rule(Parser *p) ) { D(fprintf(stderr, "%*c+ invalid_match_stmt[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "\"match\" subject_expr !':'")); - _res = CHECK_VERSION ( void * , 10 , "Pattern matching is" , RAISE_SYNTAX_ERROR ( "expected ':'" ) ); + _res = CHECK_VERSION ( void* , 10 , "Pattern matching is" , RAISE_SYNTAX_ERROR ( "expected ':'" ) ); if (_res == NULL && PyErr_Occurred()) { p->error_indicator = 1; D(p->level--); diff --git a/Parser/pegen.c b/Parser/pegen.c index f697f00..c77c534 100644 --- a/Parser/pegen.c +++ b/Parser/pegen.c @@ -897,6 +897,19 @@ _PyPegen_expect_token(Parser *p, int type) return t; } +void* +_PyPegen_expect_forced_result(Parser *p, void* result, const char* expected) { + + if (p->error_indicator == 1) { + return NULL; + } + if (result == NULL) { + RAISE_SYNTAX_ERROR("expected (%s)", expected); + return NULL; + } + return result; +} + Token * _PyPegen_expect_forced_token(Parser *p, int type, const char* expected) { diff --git a/Parser/pegen.h b/Parser/pegen.h index f4b6876..57d1177 100644 --- a/Parser/pegen.h +++ b/Parser/pegen.h @@ -130,6 +130,7 @@ int _PyPegen_lookahead_with_string(int , expr_ty (func)(Parser *, const char*), int _PyPegen_lookahead(int, void *(func)(Parser *), Parser *); Token *_PyPegen_expect_token(Parser *p, int type); +void* _PyPegen_expect_forced_result(Parser *p, void* result, const char* expected); Token *_PyPegen_expect_forced_token(Parser *p, int type, const char* expected); expr_ty _PyPegen_expect_soft_keyword(Parser *p, const char *keyword); expr_ty _PyPegen_soft_keyword_token(Parser *p); diff --git a/Tools/peg_generator/mypy.ini b/Tools/peg_generator/mypy.ini index 80d5c05..1732372 100644 --- a/Tools/peg_generator/mypy.ini +++ b/Tools/peg_generator/mypy.ini @@ -1,5 +1,5 @@ [mypy] -files = pegen, scripts +files = pegen follow_imports = error no_implicit_optional = True diff --git a/Tools/peg_generator/pegen/__main__.py b/Tools/peg_generator/pegen/__main__.py index c0f3b68..a12fe78 100755 --- a/Tools/peg_generator/pegen/__main__.py +++ b/Tools/peg_generator/pegen/__main__.py @@ -100,7 +100,9 @@ c_parser.add_argument( "--optimized", action="store_true", help="Compile the extension in optimized mode" ) c_parser.add_argument( - "--skip-actions", action="store_true", help="Suppress code emission for rule actions", + "--skip-actions", + action="store_true", + help="Suppress code emission for rule actions", ) python_parser = subparsers.add_parser("python", help="Generate Python code") @@ -114,7 +116,9 @@ python_parser.add_argument( help="Where to write the generated parser", ) python_parser.add_argument( - "--skip-actions", action="store_true", help="Suppress code emission for rule actions", + "--skip-actions", + action="store_true", + help="Suppress code emission for rule actions", ) diff --git a/Tools/peg_generator/pegen/ast_dump.py b/Tools/peg_generator/pegen/ast_dump.py index 93dfbfd..2c57d09 100644 --- a/Tools/peg_generator/pegen/ast_dump.py +++ b/Tools/peg_generator/pegen/ast_dump.py @@ -6,9 +6,17 @@ always fail. We rely on string comparison of the base classes instead. TODO: Remove the above-described hack. """ +from typing import Any, Optional, Tuple -def ast_dump(node, annotate_fields=True, include_attributes=False, *, indent=None): - def _format(node, level=0): + +def ast_dump( + node: Any, + annotate_fields: bool = True, + include_attributes: bool = False, + *, + indent: Optional[str] = None, +) -> str: + def _format(node: Any, level: int = 0) -> Tuple[str, bool]: if indent is not None: level += 1 prefix = "\n" + indent * level diff --git a/Tools/peg_generator/pegen/build.py b/Tools/peg_generator/pegen/build.py index b80fc85..6f0a091 100644 --- a/Tools/peg_generator/pegen/build.py +++ b/Tools/peg_generator/pegen/build.py @@ -58,7 +58,7 @@ def compile_c_extension( extra_compile_args = get_extra_flags("CFLAGS", "PY_CFLAGS_NODIST") extra_compile_args.append("-DPy_BUILD_CORE_MODULE") # Define _Py_TEST_PEGEN to not call PyAST_Validate() in Parser/pegen.c - extra_compile_args.append('-D_Py_TEST_PEGEN') + extra_compile_args.append("-D_Py_TEST_PEGEN") extra_link_args = get_extra_flags("LDFLAGS", "PY_LDFLAGS_NODIST") if keep_asserts: extra_compile_args.append("-UNDEBUG") @@ -175,7 +175,10 @@ def build_c_generator( def build_python_generator( - grammar: Grammar, grammar_file: str, output_file: str, skip_actions: bool = False, + grammar: Grammar, + grammar_file: str, + output_file: str, + skip_actions: bool = False, ) -> ParserGenerator: with open(output_file, "w") as file: gen: ParserGenerator = PythonParserGenerator(grammar, file) # TODO: skip_actions @@ -246,5 +249,10 @@ def build_python_parser_and_generator( skip_actions (bool, optional): Whether to pretend no rule has any actions. """ grammar, parser, tokenizer = build_parser(grammar_file, verbose_tokenizer, verbose_parser) - gen = build_python_generator(grammar, grammar_file, output_file, skip_actions=skip_actions,) + gen = build_python_generator( + grammar, + grammar_file, + output_file, + skip_actions=skip_actions, + ) return grammar, parser, tokenizer, gen diff --git a/Tools/peg_generator/pegen/c_generator.py b/Tools/peg_generator/pegen/c_generator.py index 7a2edbb..e928fd3 100644 --- a/Tools/peg_generator/pegen/c_generator.py +++ b/Tools/peg_generator/pegen/c_generator.py @@ -12,6 +12,7 @@ from pegen.grammar import ( Gather, GrammarVisitor, Group, + Leaf, Lookahead, NamedItem, NameLeaf, @@ -91,7 +92,16 @@ class FunctionCall: parts.append(", 1") if self.assigned_variable: if self.assigned_variable_type: - parts = ["(", self.assigned_variable, " = ", '(', self.assigned_variable_type, ')', *parts, ")"] + parts = [ + "(", + self.assigned_variable, + " = ", + "(", + self.assigned_variable_type, + ")", + *parts, + ")", + ] else: parts = ["(", self.assigned_variable, " = ", *parts, ")"] if self.comment: @@ -256,9 +266,10 @@ class CCallMakerVisitor(GrammarVisitor): def visit_Forced(self, node: Forced) -> FunctionCall: call = self.generate_call(node.node) - if call.nodetype == NodeTypes.GENERIC_TOKEN: + if isinstance(node.node, Leaf): + assert isinstance(node.node, Leaf) val = ast.literal_eval(node.node.value) - assert val in self.exact_tokens, f"{node.value} is not a known literal" + assert val in self.exact_tokens, f"{node.node.value} is not a known literal" type = self.exact_tokens[val] return FunctionCall( assigned_variable="_literal", @@ -268,9 +279,19 @@ class CCallMakerVisitor(GrammarVisitor): return_type="Token *", comment=f"forced_token='{val}'", ) + if isinstance(node.node, Group): + call = self.visit(node.node.rhs) + call.assigned_variable = None + call.comment = None + return FunctionCall( + assigned_variable="_literal", + function=f"_PyPegen_expect_forced_result", + arguments=["p", str(call), f'"{node.node.rhs!s}"'], + return_type="void *", + comment=f"forced_token=({node.node.rhs!s})", + ) else: - raise NotImplementedError( - f"Forced tokens don't work with {call.nodetype} tokens") + raise NotImplementedError(f"Forced tokens don't work with {node.node} nodes") def visit_Opt(self, node: Opt) -> FunctionCall: call = self.generate_call(node.node) @@ -347,7 +368,7 @@ class CParserGenerator(ParserGenerator, GrammarVisitor): debug: bool = False, skip_actions: bool = False, ): - super().__init__(grammar, tokens, file) + super().__init__(grammar, set(tokens.values()), file) self.callmakervisitor: CCallMakerVisitor = CCallMakerVisitor( self, exact_tokens, non_exact_tokens ) @@ -386,7 +407,11 @@ class CParserGenerator(ParserGenerator, GrammarVisitor): self.print(f"goto {goto_target};") self.print(f"}}") - def out_of_memory_return(self, expr: str, cleanup_code: Optional[str] = None,) -> None: + def out_of_memory_return( + self, + expr: str, + cleanup_code: Optional[str] = None, + ) -> None: self.print(f"if ({expr}) {{") with self.indent(): if cleanup_code is not None: @@ -568,7 +593,10 @@ class CParserGenerator(ParserGenerator, GrammarVisitor): if any(alt.action and "EXTRA" in alt.action for alt in rhs.alts): self._set_up_token_start_metadata_extraction() self.visit( - rhs, is_loop=False, is_gather=node.is_gather(), rulename=node.name, + rhs, + is_loop=False, + is_gather=node.is_gather(), + rulename=node.name, ) if self.debug: self.print(f'D(fprintf(stderr, "Fail at %d: {node.name}\\n", p->mark));') @@ -601,7 +629,10 @@ class CParserGenerator(ParserGenerator, GrammarVisitor): if any(alt.action and "EXTRA" in alt.action for alt in rhs.alts): self._set_up_token_start_metadata_extraction() self.visit( - rhs, is_loop=True, is_gather=node.is_gather(), rulename=node.name, + rhs, + is_loop=True, + is_gather=node.is_gather(), + rulename=node.name, ) if is_repeat1: self.print("if (_n == 0 || p->error_indicator) {") @@ -771,7 +802,7 @@ class CParserGenerator(ParserGenerator, GrammarVisitor): def visit_Alt( self, node: Alt, is_loop: bool, is_gather: bool, rulename: Optional[str] ) -> None: - if len(node.items) == 1 and str(node.items[0]).startswith('invalid_'): + if len(node.items) == 1 and str(node.items[0]).startswith("invalid_"): self.print(f"if (p->call_invalid_rules) {{ // {node}") else: self.print(f"{{ // {node}") @@ -791,7 +822,7 @@ class CParserGenerator(ParserGenerator, GrammarVisitor): if v == "_cut_var": v += " = 0" # cut_var must be initialized self.print(f"{var_type}{v};") - if v.startswith("_opt_var"): + if v and v.startswith("_opt_var"): self.print(f"UNUSED({v}); // Silence compiler warnings") with self.local_variable_context(): diff --git a/Tools/peg_generator/pegen/first_sets.py b/Tools/peg_generator/pegen/first_sets.py index 71be5a2..50ced22 100755 --- a/Tools/peg_generator/pegen/first_sets.py +++ b/Tools/peg_generator/pegen/first_sets.py @@ -29,7 +29,8 @@ from pegen.grammar import ( ) argparser = argparse.ArgumentParser( - prog="calculate_first_sets", description="Calculate the first sets of a grammar", + prog="calculate_first_sets", + description="Calculate the first sets of a grammar", ) argparser.add_argument("grammar_file", help="The grammar file") diff --git a/Tools/peg_generator/pegen/grammar_parser.py b/Tools/peg_generator/pegen/grammar_parser.py index 70fa5b0..6e9f7d3 100644 --- a/Tools/peg_generator/pegen/grammar_parser.py +++ b/Tools/peg_generator/pegen/grammar_parser.py @@ -2,7 +2,10 @@ # @generated by pegen from ./Tools/peg_generator/pegen/metagrammar.gram import ast -from typing import Optional, Any +import sys +import tokenize + +from typing import Any, Optional from pegen.parser import memoize, memoize_left_rec, logger, Parser from ast import literal_eval @@ -35,83 +38,71 @@ from pegen.grammar import ( StringLeaf, ) +# Keywords and soft keywords are listed at the end of the parser definition. class GeneratedParser(Parser): @memoize def start(self) -> Optional[Grammar]: # start: grammar $ - mark = self.mark() - cut = False + mark = self._mark() if ( (grammar := self.grammar()) and - (endmarker := self.expect('ENDMARKER')) + (_endmarker := self.expect('ENDMARKER')) ): return grammar - self.reset(mark) - if cut: return None + self._reset(mark) return None @memoize def grammar(self) -> Optional[Grammar]: # grammar: metas rules | rules - mark = self.mark() - cut = False + mark = self._mark() if ( (metas := self.metas()) and (rules := self.rules()) ): return Grammar ( rules , metas ) - self.reset(mark) - if cut: return None - cut = False + self._reset(mark) if ( (rules := self.rules()) ): - return Grammar ( rules , [ ] ) - self.reset(mark) - if cut: return None + return Grammar ( rules , [] ) + self._reset(mark) return None @memoize def metas(self) -> Optional[MetaList]: # metas: meta metas | meta - mark = self.mark() - cut = False + mark = self._mark() if ( (meta := self.meta()) and (metas := self.metas()) ): - return [ meta ] + metas - self.reset(mark) - if cut: return None - cut = False + return [meta] + metas + self._reset(mark) if ( (meta := self.meta()) ): - return [ meta ] - self.reset(mark) - if cut: return None + return [meta] + self._reset(mark) return None @memoize def meta(self) -> Optional[MetaTuple]: # meta: "@" NAME NEWLINE | "@" NAME NAME NEWLINE | "@" NAME STRING NEWLINE - mark = self.mark() - cut = False + mark = self._mark() if ( (literal := self.expect("@")) and (name := self.name()) and - (newline := self.expect('NEWLINE')) + (_newline := self.expect('NEWLINE')) ): return ( name . string , None ) - self.reset(mark) - if cut: return None - cut = False + self._reset(mark) if ( (literal := self.expect("@")) and @@ -119,12 +110,10 @@ class GeneratedParser(Parser): and (b := self.name()) and - (newline := self.expect('NEWLINE')) + (_newline := self.expect('NEWLINE')) ): return ( a . string , b . string ) - self.reset(mark) - if cut: return None - cut = False + self._reset(mark) if ( (literal := self.expect("@")) and @@ -132,40 +121,34 @@ class GeneratedParser(Parser): and (string := self.string()) and - (newline := self.expect('NEWLINE')) + (_newline := self.expect('NEWLINE')) ): return ( name . string , literal_eval ( string . string ) ) - self.reset(mark) - if cut: return None + self._reset(mark) return None @memoize def rules(self) -> Optional[RuleList]: # rules: rule rules | rule - mark = self.mark() - cut = False + mark = self._mark() if ( (rule := self.rule()) and (rules := self.rules()) ): - return [ rule ] + rules - self.reset(mark) - if cut: return None - cut = False + return [rule] + rules + self._reset(mark) if ( (rule := self.rule()) ): - return [ rule ] - self.reset(mark) - if cut: return None + return [rule] + self._reset(mark) return None @memoize def rule(self) -> Optional[Rule]: # rule: rulename memoflag? ":" alts NEWLINE INDENT more_alts DEDENT | rulename memoflag? ":" NEWLINE INDENT more_alts DEDENT | rulename memoflag? ":" alts NEWLINE - mark = self.mark() - cut = False + mark = self._mark() if ( (rulename := self.rulename()) and @@ -175,18 +158,16 @@ class GeneratedParser(Parser): and (alts := self.alts()) and - (newline := self.expect('NEWLINE')) + (_newline := self.expect('NEWLINE')) and - (indent := self.expect('INDENT')) + (_indent := self.expect('INDENT')) and (more_alts := self.more_alts()) and - (dedent := self.expect('DEDENT')) + (_dedent := self.expect('DEDENT')) ): - return Rule ( rulename [ 0 ] , rulename [ 1 ] , Rhs ( alts . alts + more_alts . alts ) , memo = opt ) - self.reset(mark) - if cut: return None - cut = False + return Rule ( rulename [0] , rulename [1] , Rhs ( alts . alts + more_alts . alts ) , memo = opt ) + self._reset(mark) if ( (rulename := self.rulename()) and @@ -194,18 +175,16 @@ class GeneratedParser(Parser): and (literal := self.expect(":")) and - (newline := self.expect('NEWLINE')) + (_newline := self.expect('NEWLINE')) and - (indent := self.expect('INDENT')) + (_indent := self.expect('INDENT')) and (more_alts := self.more_alts()) and - (dedent := self.expect('DEDENT')) + (_dedent := self.expect('DEDENT')) ): - return Rule ( rulename [ 0 ] , rulename [ 1 ] , more_alts , memo = opt ) - self.reset(mark) - if cut: return None - cut = False + return Rule ( rulename [0] , rulename [1] , more_alts , memo = opt ) + self._reset(mark) if ( (rulename := self.rulename()) and @@ -215,76 +194,49 @@ class GeneratedParser(Parser): and (alts := self.alts()) and - (newline := self.expect('NEWLINE')) + (_newline := self.expect('NEWLINE')) ): - return Rule ( rulename [ 0 ] , rulename [ 1 ] , alts , memo = opt ) - self.reset(mark) - if cut: return None + return Rule ( rulename [0] , rulename [1] , alts , memo = opt ) + self._reset(mark) return None @memoize def rulename(self) -> Optional[RuleName]: - # rulename: NAME '[' NAME '*' ']' | NAME '[' NAME ']' | NAME - mark = self.mark() - cut = False - if ( - (name := self.name()) - and - (literal := self.expect('[')) - and - (type := self.name()) - and - (literal_1 := self.expect('*')) - and - (literal_2 := self.expect(']')) - ): - return ( name . string , type . string + "*" ) - self.reset(mark) - if cut: return None - cut = False + # rulename: NAME annotation | NAME + mark = self._mark() if ( (name := self.name()) and - (literal := self.expect('[')) - and - (type := self.name()) - and - (literal_1 := self.expect(']')) + (annotation := self.annotation()) ): - return ( name . string , type . string ) - self.reset(mark) - if cut: return None - cut = False + return ( name . string , annotation ) + self._reset(mark) if ( (name := self.name()) ): return ( name . string , None ) - self.reset(mark) - if cut: return None + self._reset(mark) return None @memoize def memoflag(self) -> Optional[str]: - # memoflag: '(' 'memo' ')' - mark = self.mark() - cut = False + # memoflag: '(' "memo" ')' + mark = self._mark() if ( (literal := self.expect('(')) and - (literal_1 := self.expect('memo')) + (literal_1 := self.expect("memo")) and (literal_2 := self.expect(')')) ): return "memo" - self.reset(mark) - if cut: return None + self._reset(mark) return None @memoize def alts(self) -> Optional[Rhs]: # alts: alt "|" alts | alt - mark = self.mark() - cut = False + mark = self._mark() if ( (alt := self.alt()) and @@ -292,53 +244,45 @@ class GeneratedParser(Parser): and (alts := self.alts()) ): - return Rhs ( [ alt ] + alts . alts ) - self.reset(mark) - if cut: return None - cut = False + return Rhs ( [alt] + alts . alts ) + self._reset(mark) if ( (alt := self.alt()) ): - return Rhs ( [ alt ] ) - self.reset(mark) - if cut: return None + return Rhs ( [alt] ) + self._reset(mark) return None @memoize def more_alts(self) -> Optional[Rhs]: # more_alts: "|" alts NEWLINE more_alts | "|" alts NEWLINE - mark = self.mark() - cut = False + mark = self._mark() if ( (literal := self.expect("|")) and (alts := self.alts()) and - (newline := self.expect('NEWLINE')) + (_newline := self.expect('NEWLINE')) and (more_alts := self.more_alts()) ): return Rhs ( alts . alts + more_alts . alts ) - self.reset(mark) - if cut: return None - cut = False + self._reset(mark) if ( (literal := self.expect("|")) and (alts := self.alts()) and - (newline := self.expect('NEWLINE')) + (_newline := self.expect('NEWLINE')) ): return Rhs ( alts . alts ) - self.reset(mark) - if cut: return None + self._reset(mark) return None @memoize def alt(self) -> Optional[Alt]: # alt: items '$' action | items '$' | items action | items - mark = self.mark() - cut = False + mark = self._mark() if ( (items := self.items()) and @@ -346,101 +290,65 @@ class GeneratedParser(Parser): and (action := self.action()) ): - return Alt ( items + [ NamedItem ( None , NameLeaf ( 'ENDMARKER' ) ) ] , action = action ) - self.reset(mark) - if cut: return None - cut = False + return Alt ( items + [NamedItem ( None , NameLeaf ( 'ENDMARKER' ) )] , action = action ) + self._reset(mark) if ( (items := self.items()) and (literal := self.expect('$')) ): - return Alt ( items + [ NamedItem ( None , NameLeaf ( 'ENDMARKER' ) ) ] , action = None ) - self.reset(mark) - if cut: return None - cut = False + return Alt ( items + [NamedItem ( None , NameLeaf ( 'ENDMARKER' ) )] , action = None ) + self._reset(mark) if ( (items := self.items()) and (action := self.action()) ): return Alt ( items , action = action ) - self.reset(mark) - if cut: return None - cut = False + self._reset(mark) if ( (items := self.items()) ): return Alt ( items , action = None ) - self.reset(mark) - if cut: return None + self._reset(mark) return None @memoize def items(self) -> Optional[NamedItemList]: # items: named_item items | named_item - mark = self.mark() - cut = False + mark = self._mark() if ( (named_item := self.named_item()) and (items := self.items()) ): - return [ named_item ] + items - self.reset(mark) - if cut: return None - cut = False + return [named_item] + items + self._reset(mark) if ( (named_item := self.named_item()) ): - return [ named_item ] - self.reset(mark) - if cut: return None + return [named_item] + self._reset(mark) return None @memoize def named_item(self) -> Optional[NamedItem]: - # named_item: NAME '[' NAME '*' ']' '=' ~ item | NAME '[' NAME ']' '=' ~ item | NAME '=' ~ item | item | forced_atom | lookahead - mark = self.mark() - cut = False - if ( - (name := self.name()) - and - (literal := self.expect('[')) - and - (type := self.name()) - and - (literal_1 := self.expect('*')) - and - (literal_2 := self.expect(']')) - and - (literal_3 := self.expect('=')) - and - (cut := True) - and - (item := self.item()) - ): - return NamedItem ( name . string , item , f"{type.string}*" ) - self.reset(mark) - if cut: return None + # named_item: NAME annotation '=' ~ item | NAME '=' ~ item | item | forced_atom | lookahead + mark = self._mark() cut = False if ( (name := self.name()) and - (literal := self.expect('[')) - and - (type := self.name()) - and - (literal_1 := self.expect(']')) + (annotation := self.annotation()) and - (literal_2 := self.expect('=')) + (literal := self.expect('=')) and (cut := True) and (item := self.item()) ): - return NamedItem ( name . string , item , type . string ) - self.reset(mark) + return NamedItem ( name . string , item , annotation ) + self._reset(mark) if cut: return None cut = False if ( @@ -453,35 +361,29 @@ class GeneratedParser(Parser): (item := self.item()) ): return NamedItem ( name . string , item ) - self.reset(mark) + self._reset(mark) if cut: return None - cut = False if ( (item := self.item()) ): return NamedItem ( None , item ) - self.reset(mark) - if cut: return None - cut = False + self._reset(mark) if ( - (it := self.forced_atom()) + (forced := self.forced_atom()) ): - return NamedItem ( None , it ) - self.reset(mark) - if cut: return None - cut = False + return NamedItem ( None , forced ) + self._reset(mark) if ( (it := self.lookahead()) ): return NamedItem ( None , it ) - self.reset(mark) - if cut: return None + self._reset(mark) return None @memoize - def forced_atom(self) -> Optional[NamedItem]: + def forced_atom(self) -> Optional[Forced]: # forced_atom: '&' '&' ~ atom - mark = self.mark() + mark = self._mark() cut = False if ( (literal := self.expect('&')) @@ -493,14 +395,14 @@ class GeneratedParser(Parser): (atom := self.atom()) ): return Forced ( atom ) - self.reset(mark) + self._reset(mark) if cut: return None return None @memoize def lookahead(self) -> Optional[LookaheadOrCut]: # lookahead: '&' ~ atom | '!' ~ atom | '~' - mark = self.mark() + mark = self._mark() cut = False if ( (literal := self.expect('&')) @@ -510,7 +412,7 @@ class GeneratedParser(Parser): (atom := self.atom()) ): return PositiveLookahead ( atom ) - self.reset(mark) + self._reset(mark) if cut: return None cut = False if ( @@ -521,21 +423,19 @@ class GeneratedParser(Parser): (atom := self.atom()) ): return NegativeLookahead ( atom ) - self.reset(mark) + self._reset(mark) if cut: return None - cut = False if ( (literal := self.expect('~')) ): return Cut ( ) - self.reset(mark) - if cut: return None + self._reset(mark) return None @memoize def item(self) -> Optional[Item]: # item: '[' ~ alts ']' | atom '?' | atom '*' | atom '+' | atom '.' atom '+' | atom - mark = self.mark() + mark = self._mark() cut = False if ( (literal := self.expect('[')) @@ -547,36 +447,29 @@ class GeneratedParser(Parser): (literal_1 := self.expect(']')) ): return Opt ( alts ) - self.reset(mark) + self._reset(mark) if cut: return None - cut = False if ( (atom := self.atom()) and (literal := self.expect('?')) ): return Opt ( atom ) - self.reset(mark) - if cut: return None - cut = False + self._reset(mark) if ( (atom := self.atom()) and (literal := self.expect('*')) ): return Repeat0 ( atom ) - self.reset(mark) - if cut: return None - cut = False + self._reset(mark) if ( (atom := self.atom()) and (literal := self.expect('+')) ): return Repeat1 ( atom ) - self.reset(mark) - if cut: return None - cut = False + self._reset(mark) if ( (sep := self.atom()) and @@ -587,21 +480,18 @@ class GeneratedParser(Parser): (literal_1 := self.expect('+')) ): return Gather ( sep , node ) - self.reset(mark) - if cut: return None - cut = False + self._reset(mark) if ( (atom := self.atom()) ): return atom - self.reset(mark) - if cut: return None + self._reset(mark) return None @memoize def atom(self) -> Optional[Plain]: # atom: '(' ~ alts ')' | NAME | STRING - mark = self.mark() + mark = self._mark() cut = False if ( (literal := self.expect('(')) @@ -613,28 +503,24 @@ class GeneratedParser(Parser): (literal_1 := self.expect(')')) ): return Group ( alts ) - self.reset(mark) + self._reset(mark) if cut: return None - cut = False if ( (name := self.name()) ): return NameLeaf ( name . string ) - self.reset(mark) - if cut: return None - cut = False + self._reset(mark) if ( (string := self.string()) ): return StringLeaf ( string . string ) - self.reset(mark) - if cut: return None + self._reset(mark) return None @memoize def action(self) -> Optional[str]: # action: "{" ~ target_atoms "}" - mark = self.mark() + mark = self._mark() cut = False if ( (literal := self.expect("{")) @@ -646,95 +532,123 @@ class GeneratedParser(Parser): (literal_1 := self.expect("}")) ): return target_atoms - self.reset(mark) + self._reset(mark) + if cut: return None + return None + + @memoize + def annotation(self) -> Optional[str]: + # annotation: "[" ~ target_atoms "]" + mark = self._mark() + cut = False + if ( + (literal := self.expect("[")) + and + (cut := True) + and + (target_atoms := self.target_atoms()) + and + (literal_1 := self.expect("]")) + ): + return target_atoms + self._reset(mark) if cut: return None return None @memoize def target_atoms(self) -> Optional[str]: # target_atoms: target_atom target_atoms | target_atom - mark = self.mark() - cut = False + mark = self._mark() if ( (target_atom := self.target_atom()) and (target_atoms := self.target_atoms()) ): return target_atom + " " + target_atoms - self.reset(mark) - if cut: return None - cut = False + self._reset(mark) if ( (target_atom := self.target_atom()) ): return target_atom - self.reset(mark) - if cut: return None + self._reset(mark) return None @memoize def target_atom(self) -> Optional[str]: - # target_atom: "{" ~ target_atoms "}" | NAME | NUMBER | STRING | "?" | ":" | !"}" OP - mark = self.mark() + # target_atom: "{" ~ target_atoms? "}" | "[" ~ target_atoms? "]" | NAME "*" | NAME | NUMBER | STRING | "?" | ":" | !"}" !"]" OP + mark = self._mark() cut = False if ( (literal := self.expect("{")) and (cut := True) and - (target_atoms := self.target_atoms()) + (atoms := self.target_atoms(),) and (literal_1 := self.expect("}")) ): - return "{" + target_atoms + "}" - self.reset(mark) + return "{" + ( atoms or "" ) + "}" + self._reset(mark) if cut: return None cut = False if ( + (literal := self.expect("[")) + and + (cut := True) + and + (atoms := self.target_atoms(),) + and + (literal_1 := self.expect("]")) + ): + return "[" + ( atoms or "" ) + "]" + self._reset(mark) + if cut: return None + if ( + (name := self.name()) + and + (literal := self.expect("*")) + ): + return name . string + "*" + self._reset(mark) + if ( (name := self.name()) ): return name . string - self.reset(mark) - if cut: return None - cut = False + self._reset(mark) if ( (number := self.number()) ): return number . string - self.reset(mark) - if cut: return None - cut = False + self._reset(mark) if ( (string := self.string()) ): return string . string - self.reset(mark) - if cut: return None - cut = False + self._reset(mark) if ( (literal := self.expect("?")) ): return "?" - self.reset(mark) - if cut: return None - cut = False + self._reset(mark) if ( (literal := self.expect(":")) ): return ":" - self.reset(mark) - if cut: return None - cut = False + self._reset(mark) if ( self.negative_lookahead(self.expect, "}") and + self.negative_lookahead(self.expect, "]") + and (op := self.op()) ): return op . string - self.reset(mark) - if cut: return None + self._reset(mark) return None + KEYWORDS = () + SOFT_KEYWORDS = ('memo',) + if __name__ == '__main__': from pegen.parser import simple_parser_main diff --git a/Tools/peg_generator/pegen/keywordgen.py b/Tools/peg_generator/pegen/keywordgen.py index 2937ddb..6a07f6e 100644 --- a/Tools/peg_generator/pegen/keywordgen.py +++ b/Tools/peg_generator/pegen/keywordgen.py @@ -38,7 +38,7 @@ issoftkeyword = frozenset(softkwlist).__contains__ EXTRA_KEYWORDS = ["async", "await"] -def main(): +def main() -> None: parser = argparse.ArgumentParser( description="Generate the Lib/keywords.py file from the grammar." ) @@ -58,9 +58,7 @@ def main(): grammar, _, _ = build_parser(args.grammar) with args.tokens_file as tok_file: all_tokens, exact_tok, non_exact_tok = generate_token_definitions(tok_file) - gen: ParserGenerator = CParserGenerator( - grammar, all_tokens, exact_tok, non_exact_tok, file=None - ) + gen = CParserGenerator(grammar, all_tokens, exact_tok, non_exact_tok, file=None) gen.collect_todo() with args.keyword_file as thefile: @@ -68,7 +66,9 @@ def main(): all_soft_keywords = sorted(gen.callmakervisitor.soft_keywords) keywords = "" if not all_keywords else " " + ",\n ".join(map(repr, all_keywords)) - soft_keywords = "" if not all_soft_keywords else " " + ",\n ".join(map(repr, all_soft_keywords)) + soft_keywords = ( + "" if not all_soft_keywords else " " + ",\n ".join(map(repr, all_soft_keywords)) + ) thefile.write(TEMPLATE.format(keywords=keywords, soft_keywords=soft_keywords)) diff --git a/Tools/peg_generator/pegen/metagrammar.gram b/Tools/peg_generator/pegen/metagrammar.gram index bb4355f..f22c334 100644 --- a/Tools/peg_generator/pegen/metagrammar.gram +++ b/Tools/peg_generator/pegen/metagrammar.gram @@ -57,13 +57,12 @@ rule[Rule]: | rulename memoflag? ":" alts NEWLINE { Rule(rulename[0], rulename[1], alts, memo=opt) } rulename[RuleName]: - | NAME '[' type=NAME '*' ']' { (name.string, type.string+"*") } - | NAME '[' type=NAME ']' { (name.string, type.string) } + | NAME annotation { (name.string, annotation) } | NAME { (name.string, None) } # In the future this may return something more complicated memoflag[str]: - | '(' 'memo' ')' { "memo" } + | '(' "memo" ')' { "memo" } alts[Rhs]: | alt "|" alts { Rhs([alt] + alts.alts)} @@ -84,14 +83,13 @@ items[NamedItemList]: | named_item { [named_item] } named_item[NamedItem]: - | NAME '[' type=NAME '*' ']' '=' ~ item {NamedItem(name.string, item, f"{type.string}*")} - | NAME '[' type=NAME ']' '=' ~ item {NamedItem(name.string, item, type.string)} + | NAME annotation '=' ~ item {NamedItem(name.string, item, annotation)} | NAME '=' ~ item {NamedItem(name.string, item)} | item {NamedItem(None, item)} - | it=forced_atom {NamedItem(None, it)} + | forced=forced_atom {NamedItem(None, forced)} | it=lookahead {NamedItem(None, it)} -forced_atom[NamedItem]: +forced_atom[Forced]: | '&''&' ~ atom {Forced(atom)} lookahead[LookaheadOrCut]: @@ -112,19 +110,22 @@ atom[Plain]: | NAME {NameLeaf(name.string) } | STRING {StringLeaf(string.string)} -# Mini-grammar for the actions +# Mini-grammar for the actions and annotations action[str]: "{" ~ target_atoms "}" { target_atoms } +annotation[str]: "[" ~ target_atoms "]" { target_atoms } target_atoms[str]: | target_atom target_atoms { target_atom + " " + target_atoms } | target_atom { target_atom } target_atom[str]: - | "{" ~ target_atoms "}" { "{" + target_atoms + "}" } + | "{" ~ atoms=target_atoms? "}" { "{" + (atoms or "") + "}" } + | "[" ~ atoms=target_atoms? "]" { "[" + (atoms or "") + "]" } + | NAME "*" { name.string + "*" } | NAME { name.string } | NUMBER { number.string } | STRING { string.string } | "?" { "?" } | ":" { ":" } - | !"}" OP { op.string } + | !"}" !"]" OP { op.string } diff --git a/Tools/peg_generator/pegen/parser.py b/Tools/peg_generator/pegen/parser.py index 16d954d..4ce60e3 100644 --- a/Tools/peg_generator/pegen/parser.py +++ b/Tools/peg_generator/pegen/parser.py @@ -4,13 +4,10 @@ import time import token import tokenize import traceback - from abc import abstractmethod -from typing import Any, Callable, cast, Dict, Optional, Tuple, Type, TypeVar +from typing import Any, Callable, ClassVar, Dict, Optional, Tuple, Type, TypeVar, cast -from pegen.tokenizer import exact_token_types -from pegen.tokenizer import Mark -from pegen.tokenizer import Tokenizer +from pegen.tokenizer import Mark, Tokenizer, exact_token_types T = TypeVar("T") P = TypeVar("P", bound="Parser") @@ -45,12 +42,12 @@ def memoize(method: F) -> F: method_name = method.__name__ def memoize_wrapper(self: P, *args: object) -> T: - mark = self.mark() + mark = self._mark() key = mark, method_name, args # Fast path: cache hit, and not verbose. if key in self._cache and not self._verbose: tree, endmark = self._cache[key] - self.reset(endmark) + self._reset(endmark) return tree # Slow path: no cache hit, or verbose. verbose = self._verbose @@ -64,13 +61,13 @@ def memoize(method: F) -> F: self._level -= 1 if verbose: print(f"{fill}... {method_name}({argsr}) -> {tree!s:.200}") - endmark = self.mark() + endmark = self._mark() self._cache[key] = tree, endmark else: tree, endmark = self._cache[key] if verbose: print(f"{fill}{method_name}({argsr}) -> {tree!s:.200}") - self.reset(endmark) + self._reset(endmark) return tree memoize_wrapper.__wrapped__ = method # type: ignore @@ -82,12 +79,12 @@ def memoize_left_rec(method: Callable[[P], Optional[T]]) -> Callable[[P], Option method_name = method.__name__ def memoize_left_rec_wrapper(self: P) -> Optional[T]: - mark = self.mark() + mark = self._mark() key = mark, method_name, () # Fast path: cache hit, and not verbose. if key in self._cache and not self._verbose: tree, endmark = self._cache[key] - self.reset(endmark) + self._reset(endmark) return tree # Slow path: no cache hit, or verbose. verbose = self._verbose @@ -113,9 +110,13 @@ def memoize_left_rec(method: Callable[[P], Optional[T]]) -> Callable[[P], Option print(f"{fill}Recursive {method_name} at {mark} depth {depth}") while True: - self.reset(mark) - result = method(self) - endmark = self.mark() + self._reset(mark) + self.in_recursive_rule += 1 + try: + result = method(self) + finally: + self.in_recursive_rule -= 1 + endmark = self._mark() depth += 1 if verbose: print( @@ -131,24 +132,24 @@ def memoize_left_rec(method: Callable[[P], Optional[T]]) -> Callable[[P], Option break self._cache[key] = lastresult, lastmark = result, endmark - self.reset(lastmark) + self._reset(lastmark) tree = lastresult self._level -= 1 if verbose: print(f"{fill}{method_name}() -> {tree!s:.200} [cached]") if tree: - endmark = self.mark() + endmark = self._mark() else: endmark = mark - self.reset(endmark) + self._reset(endmark) self._cache[key] = tree, endmark else: tree, endmark = self._cache[key] if verbose: print(f"{fill}{method_name}() -> {tree!s:.200} [fresh]") if tree: - self.reset(endmark) + self._reset(endmark) return tree memoize_left_rec_wrapper.__wrapped__ = method # type: ignore @@ -158,15 +159,21 @@ def memoize_left_rec(method: Callable[[P], Optional[T]]) -> Callable[[P], Option class Parser: """Parsing base class.""" + KEYWORDS: ClassVar[Tuple[str, ...]] + + SOFT_KEYWORDS: ClassVar[Tuple[str, ...]] + def __init__(self, tokenizer: Tokenizer, *, verbose: bool = False): self._tokenizer = tokenizer self._verbose = verbose self._level = 0 self._cache: Dict[Tuple[Mark, str, Tuple[Any, ...]], Tuple[Any, Mark]] = {} + # Integer tracking wether we are in a left recursive rule or not. Can be useful + # for error reporting. + self.in_recursive_rule = 0 # Pass through common tokenizer methods. - # TODO: Rename to _mark and _reset. - self.mark = self._tokenizer.mark - self.reset = self._tokenizer.reset + self._mark = self._tokenizer.mark + self._reset = self._tokenizer.reset @abstractmethod def start(self) -> Any: @@ -179,7 +186,7 @@ class Parser: @memoize def name(self) -> Optional[tokenize.TokenInfo]: tok = self._tokenizer.peek() - if tok.type == token.NAME: + if tok.type == token.NAME and tok.string not in self.KEYWORDS: return self._tokenizer.getnext() return None @@ -205,6 +212,20 @@ class Parser: return None @memoize + def type_comment(self) -> Optional[tokenize.TokenInfo]: + tok = self._tokenizer.peek() + if tok.type == token.TYPE_COMMENT: + return self._tokenizer.getnext() + return None + + @memoize + def soft_keyword(self) -> Optional[tokenize.TokenInfo]: + tok = self._tokenizer.peek() + if tok.type == token.NAME and tok.string in self.SOFT_KEYWORDS: + return self._tokenizer.getnext() + return None + + @memoize def expect(self, type: str) -> Optional[tokenize.TokenInfo]: tok = self._tokenizer.peek() if tok.string == type: @@ -219,23 +240,26 @@ class Parser: return self._tokenizer.getnext() return None + def expect_forced(self, res: Any, expectation: str) -> Optional[tokenize.TokenInfo]: + if res is None: + raise self.make_syntax_error(f"expected {expectation}") + return res + def positive_lookahead(self, func: Callable[..., T], *args: object) -> T: - mark = self.mark() + mark = self._mark() ok = func(*args) - self.reset(mark) + self._reset(mark) return ok def negative_lookahead(self, func: Callable[..., object], *args: object) -> bool: - mark = self.mark() + mark = self._mark() ok = func(*args) - self.reset(mark) + self._reset(mark) return not ok - def make_syntax_error(self, filename: str = "<unknown>") -> SyntaxError: + def make_syntax_error(self, message: str, filename: str = "<unknown>") -> SyntaxError: tok = self._tokenizer.diagnose() - return SyntaxError( - "pegen parse failure", (filename, tok.start[0], 1 + tok.start[1], tok.line) - ) + return SyntaxError(message, (filename, tok.start[0], 1 + tok.start[1], tok.line)) def simple_parser_main(parser_class: Type[Parser]) -> None: diff --git a/Tools/peg_generator/pegen/parser_generator.py b/Tools/peg_generator/pegen/parser_generator.py index 364eccb..33ecee1 100644 --- a/Tools/peg_generator/pegen/parser_generator.py +++ b/Tools/peg_generator/pegen/parser_generator.py @@ -1,30 +1,29 @@ import contextlib from abc import abstractmethod - -from typing import AbstractSet, Dict, IO, Iterator, List, Optional, Set, Text, Tuple +from typing import IO, AbstractSet, Dict, Iterator, List, Optional, Set, Text, Tuple from pegen import sccutils from pegen.grammar import ( - Grammar, - Rule, - Rhs, Alt, + Gather, + Grammar, + GrammarError, + GrammarVisitor, NamedItem, - Plain, NameLeaf, - Gather, + Plain, + Rhs, + Rule, ) -from pegen.grammar import GrammarError, GrammarVisitor class RuleCheckingVisitor(GrammarVisitor): - def __init__(self, rules: Dict[str, Rule], tokens: Dict[int, str]): + def __init__(self, rules: Dict[str, Rule], tokens: Set[str]): self.rules = rules self.tokens = tokens def visit_NameLeaf(self, node: NameLeaf) -> None: - if node.value not in self.rules and node.value not in self.tokens.values(): - # TODO: Add line/col info to (leaf) nodes + if node.value not in self.rules and node.value not in self.tokens: raise GrammarError(f"Dangling reference to rule {node.value!r}") def visit_NamedItem(self, node: NamedItem) -> None: @@ -37,7 +36,7 @@ class ParserGenerator: callmakervisitor: GrammarVisitor - def __init__(self, grammar: Grammar, tokens: Dict[int, str], file: Optional[IO[Text]]): + def __init__(self, grammar: Grammar, tokens: Set[str], file: Optional[IO[Text]]): self.grammar = grammar self.tokens = tokens self.rules = grammar.rules @@ -133,13 +132,22 @@ class ParserGenerator: self.counter += 1 extra_function_name = f"_loop0_{self.counter}" extra_function_alt = Alt( - [NamedItem(None, node.separator), NamedItem("elem", node.node)], action="elem", + [NamedItem(None, node.separator), NamedItem("elem", node.node)], + action="elem", ) self.todo[extra_function_name] = Rule( - extra_function_name, None, Rhs([extra_function_alt]), + extra_function_name, + None, + Rhs([extra_function_alt]), + ) + alt = Alt( + [NamedItem("elem", node.node), NamedItem("seq", NameLeaf(extra_function_name))], + ) + self.todo[name] = Rule( + name, + None, + Rhs([alt]), ) - alt = Alt([NamedItem("elem", node.node), NamedItem("seq", NameLeaf(extra_function_name))],) - self.todo[name] = Rule(name, None, Rhs([alt]),) return name def dedupe(self, name: str) -> str: diff --git a/Tools/peg_generator/pegen/python_generator.py b/Tools/peg_generator/pegen/python_generator.py index b500e3e..201bf2ba 100644 --- a/Tools/peg_generator/pegen/python_generator.py +++ b/Tools/peg_generator/pegen/python_generator.py @@ -1,25 +1,28 @@ +import ast +import re import token -from typing import Any, Dict, Optional, IO, Text, Tuple +from typing import IO, Any, Dict, Optional, Sequence, Set, Text, Tuple +from pegen import grammar from pegen.grammar import ( + Alt, Cut, + Forced, + Gather, GrammarVisitor, - NameLeaf, - StringLeaf, - Rhs, - NamedItem, + Group, Lookahead, - PositiveLookahead, + NamedItem, + NameLeaf, NegativeLookahead, Opt, + PositiveLookahead, Repeat0, Repeat1, - Gather, - Group, + Rhs, Rule, - Alt, + StringLeaf, ) -from pegen import grammar from pegen.parser_generator import ParserGenerator MODULE_PREFIX = """\ @@ -27,7 +30,10 @@ MODULE_PREFIX = """\ # @generated by pegen from {filename} import ast -from typing import Optional, Any +import sys +import tokenize + +from typing import Any, Optional from pegen.parser import memoize, memoize_left_rec, logger, Parser @@ -36,25 +42,81 @@ MODULE_SUFFIX = """ if __name__ == '__main__': from pegen.parser import simple_parser_main - simple_parser_main(GeneratedParser) + simple_parser_main({class_name}) """ +class InvalidNodeVisitor(GrammarVisitor): + def visit_NameLeaf(self, node: NameLeaf) -> bool: + name = node.value + return name.startswith("invalid") + + def visit_StringLeaf(self, node: StringLeaf) -> bool: + return False + + def visit_NamedItem(self, node: NamedItem) -> bool: + return self.visit(node.item) + + def visit_Rhs(self, node: Rhs) -> bool: + return any(self.visit(alt) for alt in node.alts) + + def visit_Alt(self, node: Alt) -> bool: + return any(self.visit(item) for item in node.items) + + def lookahead_call_helper(self, node: Lookahead) -> bool: + return self.visit(node.node) + + def visit_PositiveLookahead(self, node: PositiveLookahead) -> bool: + return self.lookahead_call_helper(node) + + def visit_NegativeLookahead(self, node: NegativeLookahead) -> bool: + return self.lookahead_call_helper(node) + + def visit_Opt(self, node: Opt) -> bool: + return self.visit(node.node) + + def visit_Repeat(self, node: Repeat0) -> Tuple[str, str]: + return self.visit(node.node) + + def visit_Gather(self, node: Gather) -> Tuple[str, str]: + return self.visit(node.node) + + def visit_Group(self, node: Group) -> bool: + return self.visit(node.rhs) + + def visit_Cut(self, node: Cut) -> bool: + return False + + def visit_Forced(self, node: Forced) -> bool: + return self.visit(node.node) + + class PythonCallMakerVisitor(GrammarVisitor): def __init__(self, parser_generator: ParserGenerator): self.gen = parser_generator self.cache: Dict[Any, Any] = {} + self.keywords: Set[str] = set() + self.soft_keywords: Set[str] = set() def visit_NameLeaf(self, node: NameLeaf) -> Tuple[Optional[str], str]: name = node.value - if name in ("NAME", "NUMBER", "STRING", "OP"): + if name == "SOFT_KEYWORD": + return "soft_keyword", "self.soft_keyword()" + if name in ("NAME", "NUMBER", "STRING", "OP", "TYPE_COMMENT"): name = name.lower() return name, f"self.{name}()" if name in ("NEWLINE", "DEDENT", "INDENT", "ENDMARKER", "ASYNC", "AWAIT"): - return name.lower(), f"self.expect({name!r})" + # Avoid using names that can be Python keywords + return "_" + name.lower(), f"self.expect({name!r})" return name, f"self.{name}()" def visit_StringLeaf(self, node: StringLeaf) -> Tuple[str, str]: + val = ast.literal_eval(node.value) + if re.match(r"[a-zA-Z_]\w*\Z", val): # This is a keyword + if node.value.endswith("'"): + self.keywords.add(val) + else: + self.soft_keywords.add(val) return "literal", f"self.expect({node.value})" def visit_Rhs(self, node: Rhs) -> Tuple[Optional[str], str]: @@ -125,16 +187,36 @@ class PythonCallMakerVisitor(GrammarVisitor): def visit_Cut(self, node: Cut) -> Tuple[str, str]: return "cut", "True" + def visit_Forced(self, node: Forced) -> Tuple[str, str]: + if isinstance(node.node, Group): + _, val = self.visit(node.node.rhs) + return "forced", f"self.expect_forced({val}, '''({node.node.rhs!s})''')" + else: + return ( + "forced", + f"self.expect_forced(self.expect({node.node.value}), {node.node.value!r})", + ) + class PythonParserGenerator(ParserGenerator, GrammarVisitor): def __init__( self, grammar: grammar.Grammar, file: Optional[IO[Text]], - tokens: Dict[int, str] = token.tok_name, + tokens: Set[str] = set(token.tok_name.values()), + location_formatting: Optional[str] = None, + unreachable_formatting: Optional[str] = None, ): + tokens.add("SOFT_KEYWORD") super().__init__(grammar, tokens, file) - self.callmakervisitor = PythonCallMakerVisitor(self) + self.callmakervisitor: PythonCallMakerVisitor = PythonCallMakerVisitor(self) + self.invalidvisitor: InvalidNodeVisitor = InvalidNodeVisitor() + self.unreachable_formatting = unreachable_formatting or "None # pragma: no cover" + self.location_formatting = ( + location_formatting + or "lineno=start_lineno, col_offset=start_col_offset, " + "end_lineno=end_lineno, end_col_offset=end_col_offset" + ) def generate(self, filename: str) -> None: header = self.grammar.metas.get("header", MODULE_PREFIX) @@ -142,18 +224,35 @@ class PythonParserGenerator(ParserGenerator, GrammarVisitor): self.print(header.rstrip("\n").format(filename=filename)) subheader = self.grammar.metas.get("subheader", "") if subheader: - self.print(subheader.format(filename=filename)) - self.print("class GeneratedParser(Parser):") + self.print(subheader) + cls_name = self.grammar.metas.get("class", "GeneratedParser") + self.print("# Keywords and soft keywords are listed at the end of the parser definition.") + self.print(f"class {cls_name}(Parser):") while self.todo: for rulename, rule in list(self.todo.items()): del self.todo[rulename] self.print() with self.indent(): self.visit(rule) - trailer = self.grammar.metas.get("trailer", MODULE_SUFFIX) + + self.print() + with self.indent(): + self.print(f"KEYWORDS = {tuple(self.callmakervisitor.keywords)}") + self.print(f"SOFT_KEYWORDS = {tuple(self.callmakervisitor.soft_keywords)}") + + trailer = self.grammar.metas.get("trailer", MODULE_SUFFIX.format(class_name=cls_name)) if trailer is not None: self.print(trailer.rstrip("\n")) + def alts_uses_locations(self, alts: Sequence[Alt]) -> bool: + for alt in alts: + if alt.action and "LOCATIONS" in alt.action: + return True + for n in alt.items: + if isinstance(n.item, Group) and self.alts_uses_locations(n.item.rhs.alts): + return True + return False + def visit_Rule(self, node: Rule) -> None: is_loop = node.is_loop() is_gather = node.is_gather() @@ -173,7 +272,10 @@ class PythonParserGenerator(ParserGenerator, GrammarVisitor): self.print(f"# {node.name}: {rhs}") if node.nullable: self.print(f"# nullable={node.nullable}") - self.print("mark = self.mark()") + self.print("mark = self._mark()") + if self.alts_uses_locations(node.rhs.alts): + self.print("tok = self._tokenizer.peek()") + self.print("start_lineno, start_col_offset = tok.start") if is_loop: self.print("children = []") self.visit(rhs, is_loop=is_loop, is_gather=is_gather) @@ -200,8 +302,10 @@ class PythonParserGenerator(ParserGenerator, GrammarVisitor): self.visit(alt, is_loop=is_loop, is_gather=is_gather) def visit_Alt(self, node: Alt, is_loop: bool, is_gather: bool) -> None: + has_cut = any(isinstance(item.item, Cut) for item in node.items) with self.local_variable_context(): - self.print("cut = False") # TODO: Only if needed. + if has_cut: + self.print("cut = False") if is_loop: self.print("while (") else: @@ -227,12 +331,26 @@ class PythonParserGenerator(ParserGenerator, GrammarVisitor): f"[{self.local_variable_names[0]}] + {self.local_variable_names[1]}" ) else: - action = f"[{', '.join(self.local_variable_names)}]" + if self.invalidvisitor.visit(node): + action = "UNREACHABLE" + elif len(self.local_variable_names) == 1: + action = f"{self.local_variable_names[0]}" + else: + action = f"[{', '.join(self.local_variable_names)}]" + elif "LOCATIONS" in action: + self.print("tok = self._tokenizer.get_last_non_whitespace_token()") + self.print("end_lineno, end_col_offset = tok.end") + action = action.replace("LOCATIONS", self.location_formatting) + if is_loop: self.print(f"children.append({action})") - self.print(f"mark = self.mark()") + self.print(f"mark = self._mark()") else: + if "UNREACHABLE" in action: + action = action.replace("UNREACHABLE", self.unreachable_formatting) self.print(f"return {action}") - self.print("self.reset(mark)") + + self.print("self._reset(mark)") # Skip remaining alternatives if a cut was reached. - self.print("if cut: return None") # TODO: Only if needed. + if has_cut: + self.print("if cut: return None") diff --git a/Tools/peg_generator/pegen/testutil.py b/Tools/peg_generator/pegen/testutil.py index 920d246..e0928a4 100644 --- a/Tools/peg_generator/pegen/testutil.py +++ b/Tools/peg_generator/pegen/testutil.py @@ -18,7 +18,7 @@ from pegen.python_generator import PythonParserGenerator from pegen.tokenizer import Tokenizer ALL_TOKENS = token.tok_name -EXACT_TOKENS = token.EXACT_TOKEN_TYPES # type: ignore +EXACT_TOKENS = token.EXACT_TOKEN_TYPES NON_EXACT_TOKENS = { name for index, name in token.tok_name.items() if index not in EXACT_TOKENS.values() } @@ -42,7 +42,7 @@ def run_parser(file: IO[bytes], parser_class: Type[Parser], *, verbose: bool = F parser = parser_class(tokenizer, verbose=verbose) result = parser.start() if result is None: - raise parser.make_syntax_error() + raise parser.make_syntax_error("invalid syntax") return result @@ -66,6 +66,7 @@ def import_file(full_name: str, path: str) -> Any: """Import a python module from a path""" spec = importlib.util.spec_from_file_location(full_name, path) + assert spec is not None mod = importlib.util.module_from_spec(spec) # We assume this is not None and has an exec_module() method. diff --git a/Tools/peg_generator/pegen/tokenizer.py b/Tools/peg_generator/pegen/tokenizer.py index 61a28ef..7ee49e1 100644 --- a/Tools/peg_generator/pegen/tokenizer.py +++ b/Tools/peg_generator/pegen/tokenizer.py @@ -1,10 +1,10 @@ import token import tokenize -from typing import List, Iterator +from typing import Dict, Iterator, List Mark = int # NewType('Mark', int) -exact_token_types = token.EXACT_TOKEN_TYPES # type: ignore +exact_token_types = token.EXACT_TOKEN_TYPES def shorttok(tok: tokenize.TokenInfo) -> str: @@ -19,26 +19,22 @@ class Tokenizer: _tokens: List[tokenize.TokenInfo] - def __init__(self, tokengen: Iterator[tokenize.TokenInfo], *, verbose: bool = False): + def __init__( + self, tokengen: Iterator[tokenize.TokenInfo], *, path: str = "", verbose: bool = False + ): self._tokengen = tokengen self._tokens = [] self._index = 0 self._verbose = verbose + self._lines: Dict[int, str] = {} + self._path = path if verbose: self.report(False, False) def getnext(self) -> tokenize.TokenInfo: """Return the next token and updates the index.""" - cached = True - while self._index == len(self._tokens): - tok = next(self._tokengen) - if tok.type in (tokenize.NL, tokenize.COMMENT): - continue - if tok.type == token.ERRORTOKEN and tok.string.isspace(): - continue - self._tokens.append(tok) - cached = False - tok = self._tokens[self._index] + cached = not self._index == len(self._tokens) + tok = self.peek() self._index += 1 if self._verbose: self.report(cached, False) @@ -52,7 +48,15 @@ class Tokenizer: continue if tok.type == token.ERRORTOKEN and tok.string.isspace(): continue + if ( + tok.type == token.NEWLINE + and self._tokens + and self._tokens[-1].type == token.NEWLINE + ): + continue self._tokens.append(tok) + if not self._path: + self._lines[tok.start[0]] = tok.line return self._tokens[self._index] def diagnose(self) -> tokenize.TokenInfo: @@ -60,6 +64,34 @@ class Tokenizer: self.getnext() return self._tokens[-1] + def get_last_non_whitespace_token(self) -> tokenize.TokenInfo: + for tok in reversed(self._tokens[: self._index]): + if tok.type != tokenize.ENDMARKER and ( + tok.type < tokenize.NEWLINE or tok.type > tokenize.DEDENT + ): + break + return tok + + def get_lines(self, line_numbers: List[int]) -> List[str]: + """Retrieve source lines corresponding to line numbers.""" + if self._lines: + lines = self._lines + else: + n = len(line_numbers) + lines = {} + count = 0 + seen = 0 + with open(self._path) as f: + for l in f: + count += 1 + if count in line_numbers: + seen += 1 + lines[count] = l + if seen == n: + break + + return [lines[n] for n in line_numbers] + def mark(self) -> Mark: return self._index diff --git a/Tools/peg_generator/pegen/validator.py b/Tools/peg_generator/pegen/validator.py index 0e3dd41..e7d6980 100644 --- a/Tools/peg_generator/pegen/validator.py +++ b/Tools/peg_generator/pegen/validator.py @@ -1,51 +1,45 @@ +from typing import Optional + from pegen import grammar from pegen.grammar import ( Alt, - Cut, - Gather, GrammarVisitor, - Group, - Lookahead, - NamedItem, - NameLeaf, - NegativeLookahead, - Opt, - PositiveLookahead, - Repeat0, - Repeat1, - Rhs, Rule, - StringLeaf, + Rhs, ) + class ValidationError(Exception): pass + class GrammarValidator(GrammarVisitor): - def __init__(self, grammar: grammar.Grammar): + def __init__(self, grammar: grammar.Grammar) -> None: self.grammar = grammar - self.rulename = None + self.rulename: Optional[str] = None - def validate_rule(self, rulename: str, node: Rule): + def validate_rule(self, rulename: str, node: Rule) -> None: self.rulename = rulename self.visit(node) self.rulename = None class SubRuleValidator(GrammarValidator): - def visit_Rhs(self, node: Rule): + def visit_Rhs(self, node: Rhs) -> None: for index, alt in enumerate(node.alts): - alts_to_consider = node.alts[index+1:] + alts_to_consider = node.alts[index + 1 :] for other_alt in alts_to_consider: self.check_intersection(alt, other_alt) - def check_intersection(self, first_alt: Alt, second_alt: Alt) -> bool: + def check_intersection(self, first_alt: Alt, second_alt: Alt) -> None: if str(second_alt).startswith(str(first_alt)): raise ValidationError( - f"In {self.rulename} there is an alternative that will " - f"never be visited:\n{second_alt}") + f"In {self.rulename} there is an alternative that will " + f"never be visited:\n{second_alt}" + ) + -def validate_grammar(the_grammar: grammar.Grammar): +def validate_grammar(the_grammar: grammar.Grammar) -> None: for validator_cls in GrammarValidator.__subclasses__(): validator = validator_cls(the_grammar) for rule_name, rule in the_grammar.rules.items(): diff --git a/Tools/peg_generator/scripts/benchmark.py b/Tools/peg_generator/scripts/benchmark.py index 5fbedaa..4a063bf 100644 --- a/Tools/peg_generator/scripts/benchmark.py +++ b/Tools/peg_generator/scripts/benchmark.py @@ -76,7 +76,10 @@ def run_benchmark_stdlib(subcommand): parse_directory( "../../Lib", verbose=False, - excluded_files=["*/bad*", "*/lib2to3/tests/data/*",], + excluded_files=[ + "*/bad*", + "*/lib2to3/tests/data/*", + ], short=True, mode=modes[subcommand], ) diff --git a/Tools/peg_generator/scripts/download_pypi_packages.py b/Tools/peg_generator/scripts/download_pypi_packages.py index 9874202..0af876c 100755 --- a/Tools/peg_generator/scripts/download_pypi_packages.py +++ b/Tools/peg_generator/scripts/download_pypi_packages.py @@ -8,7 +8,8 @@ from typing import Dict, Any from urllib.request import urlretrieve argparser = argparse.ArgumentParser( - prog="download_pypi_packages", description="Helper program to download PyPI packages", + prog="download_pypi_packages", + description="Helper program to download PyPI packages", ) argparser.add_argument( "-n", "--number", type=int, default=100, help="Number of packages to download" diff --git a/Tools/peg_generator/scripts/grammar_grapher.py b/Tools/peg_generator/scripts/grammar_grapher.py index 4afdbce..4d77123 100755 --- a/Tools/peg_generator/scripts/grammar_grapher.py +++ b/Tools/peg_generator/scripts/grammar_grapher.py @@ -41,7 +41,10 @@ from pegen.grammar import ( Rhs, ) -argparser = argparse.ArgumentParser(prog="graph_grammar", description="Graph a grammar tree",) +argparser = argparse.ArgumentParser( + prog="graph_grammar", + description="Graph a grammar tree", +) argparser.add_argument( "-s", "--start", diff --git a/Tools/peg_generator/scripts/test_pypi_packages.py b/Tools/peg_generator/scripts/test_pypi_packages.py index f014753..e2eaef9 100755 --- a/Tools/peg_generator/scripts/test_pypi_packages.py +++ b/Tools/peg_generator/scripts/test_pypi_packages.py @@ -19,7 +19,8 @@ from scripts import test_parse_directory HERE = pathlib.Path(__file__).resolve().parent argparser = argparse.ArgumentParser( - prog="test_pypi_packages", description="Helper program to test parsing PyPI packages", + prog="test_pypi_packages", + description="Helper program to test parsing PyPI packages", ) argparser.add_argument( "-t", "--tree", action="count", help="Compare parse tree to official AST", default=0 |