summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPablo Galindo Salgado <Pablogsal@gmail.com>2021-08-12 16:37:30 (GMT)
committerGitHub <noreply@github.com>2021-08-12 16:37:30 (GMT)
commit953d27261e455066b17a106d9e07bab3bf12983b (patch)
tree98739da681414ff6117897a56eb90120d3dad069
parent8e832fb2a2cb54d7262148b6ec15563dffb48d63 (diff)
downloadcpython-953d27261e455066b17a106d9e07bab3bf12983b.zip
cpython-953d27261e455066b17a106d9e07bab3bf12983b.tar.gz
cpython-953d27261e455066b17a106d9e07bab3bf12983b.tar.bz2
Update pegen to use the latest upstream developments (GH-27586)
-rw-r--r--Lib/test/test_peg_generator/test_c_parser.py29
-rw-r--r--Lib/test/test_peg_generator/test_first_sets.py197
-rw-r--r--Lib/test/test_peg_generator/test_grammar_validator.py4
-rw-r--r--Lib/test/test_peg_generator/test_pegen.py638
-rw-r--r--Parser/parser.c82
-rw-r--r--Parser/pegen.c13
-rw-r--r--Parser/pegen.h1
-rw-r--r--Tools/peg_generator/mypy.ini2
-rwxr-xr-xTools/peg_generator/pegen/__main__.py8
-rw-r--r--Tools/peg_generator/pegen/ast_dump.py12
-rw-r--r--Tools/peg_generator/pegen/build.py14
-rw-r--r--Tools/peg_generator/pegen/c_generator.py53
-rwxr-xr-xTools/peg_generator/pegen/first_sets.py3
-rw-r--r--Tools/peg_generator/pegen/grammar_parser.py414
-rw-r--r--Tools/peg_generator/pegen/keywordgen.py10
-rw-r--r--Tools/peg_generator/pegen/metagrammar.gram21
-rw-r--r--Tools/peg_generator/pegen/parser.py84
-rw-r--r--Tools/peg_generator/pegen/parser_generator.py40
-rw-r--r--Tools/peg_generator/pegen/python_generator.py168
-rw-r--r--Tools/peg_generator/pegen/testutil.py5
-rw-r--r--Tools/peg_generator/pegen/tokenizer.py58
-rw-r--r--Tools/peg_generator/pegen/validator.py38
-rw-r--r--Tools/peg_generator/scripts/benchmark.py5
-rwxr-xr-xTools/peg_generator/scripts/download_pypi_packages.py3
-rwxr-xr-xTools/peg_generator/scripts/grammar_grapher.py5
-rwxr-xr-xTools/peg_generator/scripts/test_pypi_packages.py3
26 files changed, 1240 insertions, 670 deletions
diff --git a/Lib/test/test_peg_generator/test_c_parser.py b/Lib/test/test_peg_generator/test_c_parser.py
index 013b3af..b761bd4 100644
--- a/Lib/test/test_peg_generator/test_c_parser.py
+++ b/Lib/test/test_peg_generator/test_c_parser.py
@@ -11,8 +11,8 @@ from test import support
from test.support import os_helper
from test.support.script_helper import assert_python_ok
-_py_cflags_nodist = sysconfig.get_config_var('PY_CFLAGS_NODIST')
-_pgo_flag = sysconfig.get_config_var('PGO_PROF_USE_FLAG')
+_py_cflags_nodist = sysconfig.get_config_var("PY_CFLAGS_NODIST")
+_pgo_flag = sysconfig.get_config_var("PGO_PROF_USE_FLAG")
if _pgo_flag and _py_cflags_nodist and _pgo_flag in _py_cflags_nodist:
raise unittest.SkipTest("peg_generator test disabled under PGO build")
@@ -458,3 +458,28 @@ class TestCParser(unittest.TestCase):
self.check_input_strings_for_grammar(valid_cases, invalid_cases)
"""
self.run_test(grammar_source, test_source)
+
+ def test_forced(self) -> None:
+ grammar_source = """
+ start: NAME &&':' | NAME
+ """
+ test_source = """
+ self.assertEqual(parse.parse_string("number :", mode=0), None)
+ with self.assertRaises(SyntaxError) as e:
+ parse.parse_string("a", mode=0)
+ self.assertIn("expected ':'", str(e.exception))
+ """
+ self.run_test(grammar_source, test_source)
+
+ def test_forced_with_group(self) -> None:
+ grammar_source = """
+ start: NAME &&(':' | ';') | NAME
+ """
+ test_source = """
+ self.assertEqual(parse.parse_string("number :", mode=0), None)
+ self.assertEqual(parse.parse_string("number ;", mode=0), None)
+ with self.assertRaises(SyntaxError) as e:
+ parse.parse_string("a", mode=0)
+ self.assertIn("expected (':' | ';')", e.exception.args[0])
+ """
+ self.run_test(grammar_source, test_source)
diff --git a/Lib/test/test_peg_generator/test_first_sets.py b/Lib/test/test_peg_generator/test_first_sets.py
index 425ee23..d6f8322 100644
--- a/Lib/test/test_peg_generator/test_first_sets.py
+++ b/Lib/test/test_peg_generator/test_first_sets.py
@@ -3,8 +3,8 @@ import unittest
from test import test_tools
from typing import Dict, Set
-test_tools.skip_if_missing('peg_generator')
-with test_tools.imports_under_tool('peg_generator'):
+test_tools.skip_if_missing("peg_generator")
+with test_tools.imports_under_tool("peg_generator"):
from pegen.grammar_parser import GeneratedParser as GrammarParser
from pegen.testutil import parse_string
from pegen.first_sets import FirstSetCalculator
@@ -23,29 +23,38 @@ class TestFirstSets(unittest.TestCase):
A: 'a' | '-'
B: 'b' | '+'
"""
- self.assertEqual(self.calculate_first_sets(grammar), {
- "A": {"'a'", "'-'"},
- "B": {"'+'", "'b'"},
- "expr": {"'+'", "'a'", "'b'", "'-'"},
- "start": {"'+'", "'a'", "'b'", "'-'"},
- })
+ self.assertEqual(
+ self.calculate_first_sets(grammar),
+ {
+ "A": {"'a'", "'-'"},
+ "B": {"'+'", "'b'"},
+ "expr": {"'+'", "'a'", "'b'", "'-'"},
+ "start": {"'+'", "'a'", "'b'", "'-'"},
+ },
+ )
def test_optionals(self) -> None:
grammar = """
start: expr NEWLINE
expr: ['a'] ['b'] 'c'
"""
- self.assertEqual(self.calculate_first_sets(grammar), {
- "expr": {"'c'", "'a'", "'b'"},
- "start": {"'c'", "'a'", "'b'"},
- })
+ self.assertEqual(
+ self.calculate_first_sets(grammar),
+ {
+ "expr": {"'c'", "'a'", "'b'"},
+ "start": {"'c'", "'a'", "'b'"},
+ },
+ )
def test_repeat_with_separator(self) -> None:
grammar = """
start: ','.thing+ NEWLINE
thing: NUMBER
"""
- self.assertEqual(self.calculate_first_sets(grammar), {"thing": {"NUMBER"}, "start": {"NUMBER"}})
+ self.assertEqual(
+ self.calculate_first_sets(grammar),
+ {"thing": {"NUMBER"}, "start": {"NUMBER"}},
+ )
def test_optional_operator(self) -> None:
grammar = """
@@ -53,11 +62,14 @@ class TestFirstSets(unittest.TestCase):
sum: (term)? 'b'
term: NUMBER
"""
- self.assertEqual(self.calculate_first_sets(grammar), {
- "term": {"NUMBER"},
- "sum": {"NUMBER", "'b'"},
- "start": {"'b'", "NUMBER"},
- })
+ self.assertEqual(
+ self.calculate_first_sets(grammar),
+ {
+ "term": {"NUMBER"},
+ "sum": {"NUMBER", "'b'"},
+ "start": {"'b'", "NUMBER"},
+ },
+ )
def test_optional_literal(self) -> None:
grammar = """
@@ -65,60 +77,83 @@ class TestFirstSets(unittest.TestCase):
sum: '+' ? term
term: NUMBER
"""
- self.assertEqual(self.calculate_first_sets(grammar), {
- "term": {"NUMBER"},
- "sum": {"'+'", "NUMBER"},
- "start": {"'+'", "NUMBER"},
- })
+ self.assertEqual(
+ self.calculate_first_sets(grammar),
+ {
+ "term": {"NUMBER"},
+ "sum": {"'+'", "NUMBER"},
+ "start": {"'+'", "NUMBER"},
+ },
+ )
def test_optional_after(self) -> None:
grammar = """
start: term NEWLINE
term: NUMBER ['+']
"""
- self.assertEqual(self.calculate_first_sets(grammar), {"term": {"NUMBER"}, "start": {"NUMBER"}})
+ self.assertEqual(
+ self.calculate_first_sets(grammar),
+ {"term": {"NUMBER"}, "start": {"NUMBER"}},
+ )
def test_optional_before(self) -> None:
grammar = """
start: term NEWLINE
term: ['+'] NUMBER
"""
- self.assertEqual(self.calculate_first_sets(grammar), {"term": {"NUMBER", "'+'"}, "start": {"NUMBER", "'+'"}})
+ self.assertEqual(
+ self.calculate_first_sets(grammar),
+ {"term": {"NUMBER", "'+'"}, "start": {"NUMBER", "'+'"}},
+ )
def test_repeat_0(self) -> None:
grammar = """
start: thing* "+" NEWLINE
thing: NUMBER
"""
- self.assertEqual(self.calculate_first_sets(grammar), {"thing": {"NUMBER"}, "start": {'"+"', "NUMBER"}})
+ self.assertEqual(
+ self.calculate_first_sets(grammar),
+ {"thing": {"NUMBER"}, "start": {'"+"', "NUMBER"}},
+ )
def test_repeat_0_with_group(self) -> None:
grammar = """
start: ('+' '-')* term NEWLINE
term: NUMBER
"""
- self.assertEqual(self.calculate_first_sets(grammar), {"term": {"NUMBER"}, "start": {"'+'", "NUMBER"}})
+ self.assertEqual(
+ self.calculate_first_sets(grammar),
+ {"term": {"NUMBER"}, "start": {"'+'", "NUMBER"}},
+ )
def test_repeat_1(self) -> None:
grammar = """
start: thing+ '-' NEWLINE
thing: NUMBER
"""
- self.assertEqual(self.calculate_first_sets(grammar), {"thing": {"NUMBER"}, "start": {"NUMBER"}})
+ self.assertEqual(
+ self.calculate_first_sets(grammar),
+ {"thing": {"NUMBER"}, "start": {"NUMBER"}},
+ )
def test_repeat_1_with_group(self) -> None:
grammar = """
start: ('+' term)+ term NEWLINE
term: NUMBER
"""
- self.assertEqual(self.calculate_first_sets(grammar), {"term": {"NUMBER"}, "start": {"'+'"}})
+ self.assertEqual(
+ self.calculate_first_sets(grammar), {"term": {"NUMBER"}, "start": {"'+'"}}
+ )
def test_gather(self) -> None:
grammar = """
start: ','.thing+ NEWLINE
thing: NUMBER
"""
- self.assertEqual(self.calculate_first_sets(grammar), {"thing": {"NUMBER"}, "start": {"NUMBER"}})
+ self.assertEqual(
+ self.calculate_first_sets(grammar),
+ {"thing": {"NUMBER"}, "start": {"NUMBER"}},
+ )
def test_positive_lookahead(self) -> None:
grammar = """
@@ -126,11 +161,14 @@ class TestFirstSets(unittest.TestCase):
expr: &'a' opt
opt: 'a' | 'b' | 'c'
"""
- self.assertEqual(self.calculate_first_sets(grammar), {
- "expr": {"'a'"},
- "start": {"'a'"},
- "opt": {"'b'", "'c'", "'a'"},
- })
+ self.assertEqual(
+ self.calculate_first_sets(grammar),
+ {
+ "expr": {"'a'"},
+ "start": {"'a'"},
+ "opt": {"'b'", "'c'", "'a'"},
+ },
+ )
def test_negative_lookahead(self) -> None:
grammar = """
@@ -138,11 +176,14 @@ class TestFirstSets(unittest.TestCase):
expr: !'a' opt
opt: 'a' | 'b' | 'c'
"""
- self.assertEqual(self.calculate_first_sets(grammar), {
- "opt": {"'b'", "'a'", "'c'"},
- "expr": {"'b'", "'c'"},
- "start": {"'b'", "'c'"},
- })
+ self.assertEqual(
+ self.calculate_first_sets(grammar),
+ {
+ "opt": {"'b'", "'a'", "'c'"},
+ "expr": {"'b'", "'c'"},
+ "start": {"'b'", "'c'"},
+ },
+ )
def test_left_recursion(self) -> None:
grammar = """
@@ -153,21 +194,27 @@ class TestFirstSets(unittest.TestCase):
bar: 'bar'
baz: 'baz'
"""
- self.assertEqual(self.calculate_first_sets(grammar), {
- "expr": {"NUMBER", "'-'"},
- "term": {"NUMBER"},
- "start": {"NUMBER", "'-'"},
- "foo": {"'foo'"},
- "bar": {"'bar'"},
- "baz": {"'baz'"},
- })
+ self.assertEqual(
+ self.calculate_first_sets(grammar),
+ {
+ "expr": {"NUMBER", "'-'"},
+ "term": {"NUMBER"},
+ "start": {"NUMBER", "'-'"},
+ "foo": {"'foo'"},
+ "bar": {"'bar'"},
+ "baz": {"'baz'"},
+ },
+ )
def test_advance_left_recursion(self) -> None:
grammar = """
start: NUMBER | sign start
sign: ['-']
"""
- self.assertEqual(self.calculate_first_sets(grammar), {"sign": {"'-'", ""}, "start": {"'-'", "NUMBER"}})
+ self.assertEqual(
+ self.calculate_first_sets(grammar),
+ {"sign": {"'-'", ""}, "start": {"'-'", "NUMBER"}},
+ )
def test_mutual_left_recursion(self) -> None:
grammar = """
@@ -175,11 +222,14 @@ class TestFirstSets(unittest.TestCase):
foo: bar 'A' | 'B'
bar: foo 'C' | 'D'
"""
- self.assertEqual(self.calculate_first_sets(grammar), {
- "foo": {"'D'", "'B'"},
- "bar": {"'D'"},
- "start": {"'D'", "'B'"},
- })
+ self.assertEqual(
+ self.calculate_first_sets(grammar),
+ {
+ "foo": {"'D'", "'B'"},
+ "bar": {"'D'"},
+ "start": {"'D'", "'B'"},
+ },
+ )
def test_nasty_left_recursion(self) -> None:
# TODO: Validate this
@@ -188,7 +238,10 @@ class TestFirstSets(unittest.TestCase):
target: maybe '+' | NAME
maybe: maybe '-' | target
"""
- self.assertEqual(self.calculate_first_sets(grammar), {"maybe": set(), "target": {"NAME"}, "start": {"NAME"}})
+ self.assertEqual(
+ self.calculate_first_sets(grammar),
+ {"maybe": set(), "target": {"NAME"}, "start": {"NAME"}},
+ )
def test_nullable_rule(self) -> None:
grammar = """
@@ -196,17 +249,22 @@ class TestFirstSets(unittest.TestCase):
sign: ['-']
thing: NUMBER
"""
- self.assertEqual(self.calculate_first_sets(grammar), {
- "sign": {"", "'-'"},
- "thing": {"NUMBER"},
- "start": {"NUMBER", "'-'"},
- })
+ self.assertEqual(
+ self.calculate_first_sets(grammar),
+ {
+ "sign": {"", "'-'"},
+ "thing": {"NUMBER"},
+ "start": {"NUMBER", "'-'"},
+ },
+ )
def test_epsilon_production_in_start_rule(self) -> None:
grammar = """
start: ['-'] $
"""
- self.assertEqual(self.calculate_first_sets(grammar), {"start": {"ENDMARKER", "'-'"}})
+ self.assertEqual(
+ self.calculate_first_sets(grammar), {"start": {"ENDMARKER", "'-'"}}
+ )
def test_multiple_nullable_rules(self) -> None:
grammar = """
@@ -216,10 +274,13 @@ class TestFirstSets(unittest.TestCase):
other: '*'
another: '/'
"""
- self.assertEqual(self.calculate_first_sets(grammar), {
- "sign": {"", "'-'"},
- "thing": {"'+'", ""},
- "start": {"'+'", "'-'", "'*'"},
- "other": {"'*'"},
- "another": {"'/'"},
- })
+ self.assertEqual(
+ self.calculate_first_sets(grammar),
+ {
+ "sign": {"", "'-'"},
+ "thing": {"'+'", ""},
+ "start": {"'+'", "'-'", "'*'"},
+ "other": {"'*'"},
+ "another": {"'/'"},
+ },
+ )
diff --git a/Lib/test/test_peg_generator/test_grammar_validator.py b/Lib/test/test_peg_generator/test_grammar_validator.py
index 2e72ff8..72c3d20 100644
--- a/Lib/test/test_peg_generator/test_grammar_validator.py
+++ b/Lib/test/test_peg_generator/test_grammar_validator.py
@@ -1,8 +1,8 @@
import unittest
from test import test_tools
-test_tools.skip_if_missing('peg_generator')
-with test_tools.imports_under_tool('peg_generator'):
+test_tools.skip_if_missing("peg_generator")
+with test_tools.imports_under_tool("peg_generator"):
from pegen.grammar_parser import GeneratedParser as GrammarParser
from pegen.validator import SubRuleValidator, ValidationError
from pegen.testutil import parse_string
diff --git a/Lib/test/test_peg_generator/test_pegen.py b/Lib/test/test_peg_generator/test_pegen.py
index bcfee3f..71b0fdc 100644
--- a/Lib/test/test_peg_generator/test_pegen.py
+++ b/Lib/test/test_peg_generator/test_pegen.py
@@ -1,3 +1,5 @@
+import ast
+import difflib
import io
import textwrap
import unittest
@@ -6,14 +8,10 @@ from test import test_tools
from typing import Dict, Any
from tokenize import TokenInfo, NAME, NEWLINE, NUMBER, OP
-test_tools.skip_if_missing('peg_generator')
-with test_tools.imports_under_tool('peg_generator'):
+test_tools.skip_if_missing("peg_generator")
+with test_tools.imports_under_tool("peg_generator"):
from pegen.grammar_parser import GeneratedParser as GrammarParser
- from pegen.testutil import (
- parse_string,
- generate_parser,
- make_parser
- )
+ from pegen.testutil import parse_string, generate_parser, make_parser
from pegen.grammar import GrammarVisitor, GrammarError, Grammar
from pegen.grammar_visualizer import ASTGrammarPrinter
from pegen.parser import Parser
@@ -38,7 +36,9 @@ class TestPegen(unittest.TestCase):
# Check the str() and repr() of a few rules; AST nodes don't support ==.
self.assertEqual(str(rules["start"]), "start: sum NEWLINE")
self.assertEqual(str(rules["sum"]), "sum: term '+' term | term")
- expected_repr = "Rule('term', None, Rhs([Alt([NamedItem(None, NameLeaf('NUMBER'))])]))"
+ expected_repr = (
+ "Rule('term', None, Rhs([Alt([NamedItem(None, NameLeaf('NUMBER'))])]))"
+ )
self.assertEqual(repr(rules["term"]), expected_repr)
def test_long_rule_str(self) -> None:
@@ -71,7 +71,7 @@ class TestPegen(unittest.TestCase):
self.assertEqual(str(rules["sum"]), "sum: term '+' term | term")
self.assertEqual(
repr(rules["term"]),
- "Rule('term', 'int', Rhs([Alt([NamedItem(None, NameLeaf('NUMBER'))])]))"
+ "Rule('term', 'int', Rhs([Alt([NamedItem(None, NameLeaf('NUMBER'))])]))",
)
def test_gather(self) -> None:
@@ -81,24 +81,31 @@ class TestPegen(unittest.TestCase):
"""
rules = parse_string(grammar, GrammarParser).rules
self.assertEqual(str(rules["start"]), "start: ','.thing+ NEWLINE")
- self.assertTrue(repr(rules["start"]).startswith(
- "Rule('start', None, Rhs([Alt([NamedItem(None, Gather(StringLeaf(\"','\"), NameLeaf('thing'"
- ))
+ self.assertTrue(
+ repr(rules["start"]).startswith(
+ "Rule('start', None, Rhs([Alt([NamedItem(None, Gather(StringLeaf(\"','\"), NameLeaf('thing'"
+ )
+ )
self.assertEqual(str(rules["thing"]), "thing: NUMBER")
parser_class = make_parser(grammar)
node = parse_string("42\n", parser_class)
- assert node == [
- [[TokenInfo(NUMBER, string="42", start=(1, 0), end=(1, 2), line="42\n")]],
- TokenInfo(NEWLINE, string="\n", start=(1, 2), end=(1, 3), line="42\n"),
- ]
node = parse_string("1, 2\n", parser_class)
- assert node == [
+ self.assertEqual(
+ node,
[
- [TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1, 2\n")],
- [TokenInfo(NUMBER, string="2", start=(1, 3), end=(1, 4), line="1, 2\n")],
+ [
+ TokenInfo(
+ NUMBER, string="1", start=(1, 0), end=(1, 1), line="1, 2\n"
+ ),
+ TokenInfo(
+ NUMBER, string="2", start=(1, 3), end=(1, 4), line="1, 2\n"
+ ),
+ ],
+ TokenInfo(
+ NEWLINE, string="\n", start=(1, 4), end=(1, 5), line="1, 2\n"
+ ),
],
- TokenInfo(NEWLINE, string="\n", start=(1, 4), end=(1, 5), line="1, 2\n"),
- ]
+ )
def test_expr_grammar(self) -> None:
grammar = """
@@ -108,10 +115,13 @@ class TestPegen(unittest.TestCase):
"""
parser_class = make_parser(grammar)
node = parse_string("42\n", parser_class)
- self.assertEqual(node, [
- [[TokenInfo(NUMBER, string="42", start=(1, 0), end=(1, 2), line="42\n")]],
- TokenInfo(NEWLINE, string="\n", start=(1, 2), end=(1, 3), line="42\n"),
- ])
+ self.assertEqual(
+ node,
+ [
+ TokenInfo(NUMBER, string="42", start=(1, 0), end=(1, 2), line="42\n"),
+ TokenInfo(NEWLINE, string="\n", start=(1, 2), end=(1, 3), line="42\n"),
+ ],
+ )
def test_optional_operator(self) -> None:
grammar = """
@@ -120,22 +130,39 @@ class TestPegen(unittest.TestCase):
term: NUMBER
"""
parser_class = make_parser(grammar)
- node = parse_string("1+2\n", parser_class)
- self.assertEqual(node, [
+ node = parse_string("1 + 2\n", parser_class)
+ self.assertEqual(
+ node,
[
- [TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1+2\n")],
[
- TokenInfo(OP, string="+", start=(1, 1), end=(1, 2), line="1+2\n"),
- [TokenInfo(NUMBER, string="2", start=(1, 2), end=(1, 3), line="1+2\n")],
+ TokenInfo(
+ NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 + 2\n"
+ ),
+ [
+ TokenInfo(
+ OP, string="+", start=(1, 2), end=(1, 3), line="1 + 2\n"
+ ),
+ TokenInfo(
+ NUMBER, string="2", start=(1, 4), end=(1, 5), line="1 + 2\n"
+ ),
+ ],
],
+ TokenInfo(
+ NEWLINE, string="\n", start=(1, 5), end=(1, 6), line="1 + 2\n"
+ ),
],
- TokenInfo(NEWLINE, string="\n", start=(1, 3), end=(1, 4), line="1+2\n"),
- ])
+ )
node = parse_string("1\n", parser_class)
- self.assertEqual(node, [
- [[TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1\n")], None],
- TokenInfo(NEWLINE, string="\n", start=(1, 1), end=(1, 2), line="1\n"),
- ])
+ self.assertEqual(
+ node,
+ [
+ [
+ TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1\n"),
+ None,
+ ],
+ TokenInfo(NEWLINE, string="\n", start=(1, 1), end=(1, 2), line="1\n"),
+ ],
+ )
def test_optional_literal(self) -> None:
grammar = """
@@ -145,18 +172,29 @@ class TestPegen(unittest.TestCase):
"""
parser_class = make_parser(grammar)
node = parse_string("1+\n", parser_class)
- self.assertEqual(node, [
+ self.assertEqual(
+ node,
[
- [TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1+\n")],
- TokenInfo(OP, string="+", start=(1, 1), end=(1, 2), line="1+\n"),
+ [
+ TokenInfo(
+ NUMBER, string="1", start=(1, 0), end=(1, 1), line="1+\n"
+ ),
+ TokenInfo(OP, string="+", start=(1, 1), end=(1, 2), line="1+\n"),
+ ],
+ TokenInfo(NEWLINE, string="\n", start=(1, 2), end=(1, 3), line="1+\n"),
],
- TokenInfo(NEWLINE, string="\n", start=(1, 2), end=(1, 3), line="1+\n"),
- ])
+ )
node = parse_string("1\n", parser_class)
- self.assertEqual(node, [
- [[TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1\n")], None],
- TokenInfo(NEWLINE, string="\n", start=(1, 1), end=(1, 2), line="1\n"),
- ])
+ self.assertEqual(
+ node,
+ [
+ [
+ TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1\n"),
+ None,
+ ],
+ TokenInfo(NEWLINE, string="\n", start=(1, 1), end=(1, 2), line="1\n"),
+ ],
+ )
def test_alt_optional_operator(self) -> None:
grammar = """
@@ -166,21 +204,38 @@ class TestPegen(unittest.TestCase):
"""
parser_class = make_parser(grammar)
node = parse_string("1 + 2\n", parser_class)
- self.assertEqual(node, [
+ self.assertEqual(
+ node,
[
- [TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 + 2\n")],
[
- TokenInfo(OP, string="+", start=(1, 2), end=(1, 3), line="1 + 2\n"),
- [TokenInfo(NUMBER, string="2", start=(1, 4), end=(1, 5), line="1 + 2\n")],
+ TokenInfo(
+ NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 + 2\n"
+ ),
+ [
+ TokenInfo(
+ OP, string="+", start=(1, 2), end=(1, 3), line="1 + 2\n"
+ ),
+ TokenInfo(
+ NUMBER, string="2", start=(1, 4), end=(1, 5), line="1 + 2\n"
+ ),
+ ],
],
+ TokenInfo(
+ NEWLINE, string="\n", start=(1, 5), end=(1, 6), line="1 + 2\n"
+ ),
],
- TokenInfo(NEWLINE, string="\n", start=(1, 5), end=(1, 6), line="1 + 2\n"),
- ])
+ )
node = parse_string("1\n", parser_class)
- self.assertEqual(node, [
- [[TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1\n")], None],
- TokenInfo(NEWLINE, string="\n", start=(1, 1), end=(1, 2), line="1\n"),
- ])
+ self.assertEqual(
+ node,
+ [
+ [
+ TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1\n"),
+ None,
+ ],
+ TokenInfo(NEWLINE, string="\n", start=(1, 1), end=(1, 2), line="1\n"),
+ ],
+ )
def test_repeat_0_simple(self) -> None:
grammar = """
@@ -189,20 +244,32 @@ class TestPegen(unittest.TestCase):
"""
parser_class = make_parser(grammar)
node = parse_string("1 2 3\n", parser_class)
- self.assertEqual(node, [
- [TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 2 3\n")],
+ self.assertEqual(
+ node,
[
- [[TokenInfo(NUMBER, string="2", start=(1, 2), end=(1, 3), line="1 2 3\n")]],
- [[TokenInfo(NUMBER, string="3", start=(1, 4), end=(1, 5), line="1 2 3\n")]],
+ TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 2 3\n"),
+ [
+ TokenInfo(
+ NUMBER, string="2", start=(1, 2), end=(1, 3), line="1 2 3\n"
+ ),
+ TokenInfo(
+ NUMBER, string="3", start=(1, 4), end=(1, 5), line="1 2 3\n"
+ ),
+ ],
+ TokenInfo(
+ NEWLINE, string="\n", start=(1, 5), end=(1, 6), line="1 2 3\n"
+ ),
],
- TokenInfo(NEWLINE, string="\n", start=(1, 5), end=(1, 6), line="1 2 3\n"),
- ])
+ )
node = parse_string("1\n", parser_class)
- self.assertEqual(node, [
- [TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1\n")],
- [],
- TokenInfo(NEWLINE, string="\n", start=(1, 1), end=(1, 2), line="1\n"),
- ])
+ self.assertEqual(
+ node,
+ [
+ TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1\n"),
+ [],
+ TokenInfo(NEWLINE, string="\n", start=(1, 1), end=(1, 2), line="1\n"),
+ ],
+ )
def test_repeat_0_complex(self) -> None:
grammar = """
@@ -211,24 +278,43 @@ class TestPegen(unittest.TestCase):
"""
parser_class = make_parser(grammar)
node = parse_string("1 + 2 + 3\n", parser_class)
- self.assertEqual(node, [
- [TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 + 2 + 3\n")],
+ self.assertEqual(
+ node,
[
+ TokenInfo(
+ NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 + 2 + 3\n"
+ ),
[
[
- TokenInfo(OP, string="+", start=(1, 2), end=(1, 3), line="1 + 2 + 3\n"),
- [TokenInfo(NUMBER, string="2", start=(1, 4), end=(1, 5), line="1 + 2 + 3\n")],
- ]
- ],
- [
+ TokenInfo(
+ OP, string="+", start=(1, 2), end=(1, 3), line="1 + 2 + 3\n"
+ ),
+ TokenInfo(
+ NUMBER,
+ string="2",
+ start=(1, 4),
+ end=(1, 5),
+ line="1 + 2 + 3\n",
+ ),
+ ],
[
- TokenInfo(OP, string="+", start=(1, 6), end=(1, 7), line="1 + 2 + 3\n"),
- [TokenInfo(NUMBER, string="3", start=(1, 8), end=(1, 9), line="1 + 2 + 3\n")],
- ]
+ TokenInfo(
+ OP, string="+", start=(1, 6), end=(1, 7), line="1 + 2 + 3\n"
+ ),
+ TokenInfo(
+ NUMBER,
+ string="3",
+ start=(1, 8),
+ end=(1, 9),
+ line="1 + 2 + 3\n",
+ ),
+ ],
],
+ TokenInfo(
+ NEWLINE, string="\n", start=(1, 9), end=(1, 10), line="1 + 2 + 3\n"
+ ),
],
- TokenInfo(NEWLINE, string="\n", start=(1, 9), end=(1, 10), line="1 + 2 + 3\n"),
- ])
+ )
def test_repeat_1_simple(self) -> None:
grammar = """
@@ -237,14 +323,23 @@ class TestPegen(unittest.TestCase):
"""
parser_class = make_parser(grammar)
node = parse_string("1 2 3\n", parser_class)
- self.assertEqual(node, [
- [TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 2 3\n")],
+ self.assertEqual(
+ node,
[
- [[TokenInfo(NUMBER, string="2", start=(1, 2), end=(1, 3), line="1 2 3\n")]],
- [[TokenInfo(NUMBER, string="3", start=(1, 4), end=(1, 5), line="1 2 3\n")]],
+ TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 2 3\n"),
+ [
+ TokenInfo(
+ NUMBER, string="2", start=(1, 2), end=(1, 3), line="1 2 3\n"
+ ),
+ TokenInfo(
+ NUMBER, string="3", start=(1, 4), end=(1, 5), line="1 2 3\n"
+ ),
+ ],
+ TokenInfo(
+ NEWLINE, string="\n", start=(1, 5), end=(1, 6), line="1 2 3\n"
+ ),
],
- TokenInfo(NEWLINE, string="\n", start=(1, 5), end=(1, 6), line="1 2 3\n"),
- ])
+ )
with self.assertRaises(SyntaxError):
parse_string("1\n", parser_class)
@@ -255,24 +350,43 @@ class TestPegen(unittest.TestCase):
"""
parser_class = make_parser(grammar)
node = parse_string("1 + 2 + 3\n", parser_class)
- self.assertEqual(node, [
- [TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 + 2 + 3\n")],
+ self.assertEqual(
+ node,
[
+ TokenInfo(
+ NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 + 2 + 3\n"
+ ),
[
[
- TokenInfo(OP, string="+", start=(1, 2), end=(1, 3), line="1 + 2 + 3\n"),
- [TokenInfo(NUMBER, string="2", start=(1, 4), end=(1, 5), line="1 + 2 + 3\n")],
- ]
- ],
- [
+ TokenInfo(
+ OP, string="+", start=(1, 2), end=(1, 3), line="1 + 2 + 3\n"
+ ),
+ TokenInfo(
+ NUMBER,
+ string="2",
+ start=(1, 4),
+ end=(1, 5),
+ line="1 + 2 + 3\n",
+ ),
+ ],
[
- TokenInfo(OP, string="+", start=(1, 6), end=(1, 7), line="1 + 2 + 3\n"),
- [TokenInfo(NUMBER, string="3", start=(1, 8), end=(1, 9), line="1 + 2 + 3\n")],
- ]
+ TokenInfo(
+ OP, string="+", start=(1, 6), end=(1, 7), line="1 + 2 + 3\n"
+ ),
+ TokenInfo(
+ NUMBER,
+ string="3",
+ start=(1, 8),
+ end=(1, 9),
+ line="1 + 2 + 3\n",
+ ),
+ ],
],
+ TokenInfo(
+ NEWLINE, string="\n", start=(1, 9), end=(1, 10), line="1 + 2 + 3\n"
+ ),
],
- TokenInfo(NEWLINE, string="\n", start=(1, 9), end=(1, 10), line="1 + 2 + 3\n"),
- ])
+ )
with self.assertRaises(SyntaxError):
parse_string("1\n", parser_class)
@@ -283,14 +397,25 @@ class TestPegen(unittest.TestCase):
"""
parser_class = make_parser(grammar)
node = parse_string("1, 2, 3\n", parser_class)
- self.assertEqual(node, [
+ self.assertEqual(
+ node,
[
- [TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1, 2, 3\n")],
- [TokenInfo(NUMBER, string="2", start=(1, 3), end=(1, 4), line="1, 2, 3\n")],
- [TokenInfo(NUMBER, string="3", start=(1, 6), end=(1, 7), line="1, 2, 3\n")],
+ [
+ TokenInfo(
+ NUMBER, string="1", start=(1, 0), end=(1, 1), line="1, 2, 3\n"
+ ),
+ TokenInfo(
+ NUMBER, string="2", start=(1, 3), end=(1, 4), line="1, 2, 3\n"
+ ),
+ TokenInfo(
+ NUMBER, string="3", start=(1, 6), end=(1, 7), line="1, 2, 3\n"
+ ),
+ ],
+ TokenInfo(
+ NEWLINE, string="\n", start=(1, 7), end=(1, 8), line="1, 2, 3\n"
+ ),
],
- TokenInfo(NEWLINE, string="\n", start=(1, 7), end=(1, 8), line="1, 2, 3\n"),
- ])
+ )
def test_left_recursive(self) -> None:
grammar_source = """
@@ -311,18 +436,41 @@ class TestPegen(unittest.TestCase):
self.assertFalse(rules["bar"].left_recursive)
self.assertFalse(rules["baz"].left_recursive)
node = parse_string("1 + 2 + 3\n", parser_class)
- self.assertEqual(node, [
+ self.assertEqual(
+ node,
[
[
- [[TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 + 2 + 3\n")]],
- TokenInfo(OP, string="+", start=(1, 2), end=(1, 3), line="1 + 2 + 3\n"),
- [TokenInfo(NUMBER, string="2", start=(1, 4), end=(1, 5), line="1 + 2 + 3\n")],
+ [
+ TokenInfo(
+ NUMBER,
+ string="1",
+ start=(1, 0),
+ end=(1, 1),
+ line="1 + 2 + 3\n",
+ ),
+ TokenInfo(
+ OP, string="+", start=(1, 2), end=(1, 3), line="1 + 2 + 3\n"
+ ),
+ TokenInfo(
+ NUMBER,
+ string="2",
+ start=(1, 4),
+ end=(1, 5),
+ line="1 + 2 + 3\n",
+ ),
+ ],
+ TokenInfo(
+ OP, string="+", start=(1, 6), end=(1, 7), line="1 + 2 + 3\n"
+ ),
+ TokenInfo(
+ NUMBER, string="3", start=(1, 8), end=(1, 9), line="1 + 2 + 3\n"
+ ),
],
- TokenInfo(OP, string="+", start=(1, 6), end=(1, 7), line="1 + 2 + 3\n"),
- [TokenInfo(NUMBER, string="3", start=(1, 8), end=(1, 9), line="1 + 2 + 3\n")],
+ TokenInfo(
+ NEWLINE, string="\n", start=(1, 9), end=(1, 10), line="1 + 2 + 3\n"
+ ),
],
- TokenInfo(NEWLINE, string="\n", start=(1, 9), end=(1, 10), line="1 + 2 + 3\n"),
- ])
+ )
def test_python_expr(self) -> None:
grammar = """
@@ -392,31 +540,79 @@ class TestPegen(unittest.TestCase):
exec(out.getvalue(), ns)
parser_class: Type[Parser] = ns["GeneratedParser"]
node = parse_string("D A C A E", parser_class)
- self.assertEqual(node, [
+
+ self.assertEqual(
+ node,
[
[
[
- [TokenInfo(type=NAME, string="D", start=(1, 0), end=(1, 1), line="D A C A E")],
- TokenInfo(type=NAME, string="A", start=(1, 2), end=(1, 3), line="D A C A E"),
+ [
+ TokenInfo(
+ type=NAME,
+ string="D",
+ start=(1, 0),
+ end=(1, 1),
+ line="D A C A E",
+ ),
+ TokenInfo(
+ type=NAME,
+ string="A",
+ start=(1, 2),
+ end=(1, 3),
+ line="D A C A E",
+ ),
+ ],
+ TokenInfo(
+ type=NAME,
+ string="C",
+ start=(1, 4),
+ end=(1, 5),
+ line="D A C A E",
+ ),
],
- TokenInfo(type=NAME, string="C", start=(1, 4), end=(1, 5), line="D A C A E"),
+ TokenInfo(
+ type=NAME,
+ string="A",
+ start=(1, 6),
+ end=(1, 7),
+ line="D A C A E",
+ ),
],
- TokenInfo(type=NAME, string="A", start=(1, 6), end=(1, 7), line="D A C A E"),
+ TokenInfo(
+ type=NAME, string="E", start=(1, 8), end=(1, 9), line="D A C A E"
+ ),
],
- TokenInfo(type=NAME, string="E", start=(1, 8), end=(1, 9), line="D A C A E"),
- ])
+ )
node = parse_string("B C A E", parser_class)
- self.assertIsNotNone(node)
- self.assertEqual(node, [
+ self.assertEqual(
+ node,
[
[
- [TokenInfo(type=NAME, string="B", start=(1, 0), end=(1, 1), line="B C A E")],
- TokenInfo(type=NAME, string="C", start=(1, 2), end=(1, 3), line="B C A E"),
+ [
+ TokenInfo(
+ type=NAME,
+ string="B",
+ start=(1, 0),
+ end=(1, 1),
+ line="B C A E",
+ ),
+ TokenInfo(
+ type=NAME,
+ string="C",
+ start=(1, 2),
+ end=(1, 3),
+ line="B C A E",
+ ),
+ ],
+ TokenInfo(
+ type=NAME, string="A", start=(1, 4), end=(1, 5), line="B C A E"
+ ),
],
- TokenInfo(type=NAME, string="A", start=(1, 4), end=(1, 5), line="B C A E"),
+ TokenInfo(
+ type=NAME, string="E", start=(1, 6), end=(1, 7), line="B C A E"
+ ),
],
- TokenInfo(type=NAME, string="E", start=(1, 6), end=(1, 7), line="B C A E"),
- ])
+ )
def test_nasty_mutually_left_recursive(self) -> None:
# This grammar does not recognize 'x - + =', much to my chagrin.
@@ -454,43 +650,44 @@ class TestPegen(unittest.TestCase):
"""
parser_class = make_parser(grammar)
node = parse_string("foo = 12 + 12 .", parser_class)
- self.assertEqual(node, [
+ self.assertEqual(
+ node,
[
+ TokenInfo(
+ NAME, string="foo", start=(1, 0), end=(1, 3), line="foo = 12 + 12 ."
+ ),
+ TokenInfo(
+ OP, string="=", start=(1, 4), end=(1, 5), line="foo = 12 + 12 ."
+ ),
[
- [TokenInfo(NAME, string="foo", start=(1, 0), end=(1, 3), line="foo = 12 + 12 .")],
- TokenInfo(OP, string="=", start=(1, 4), end=(1, 5), line="foo = 12 + 12 ."),
+ TokenInfo(
+ NUMBER,
+ string="12",
+ start=(1, 6),
+ end=(1, 8),
+ line="foo = 12 + 12 .",
+ ),
[
[
TokenInfo(
- NUMBER, string="12", start=(1, 6), end=(1, 8), line="foo = 12 + 12 ."
- )
- ],
- [
- [
- [
- TokenInfo(
- OP,
- string="+",
- start=(1, 9),
- end=(1, 10),
- line="foo = 12 + 12 .",
- ),
- [
- TokenInfo(
- NUMBER,
- string="12",
- start=(1, 11),
- end=(1, 13),
- line="foo = 12 + 12 .",
- )
- ],
- ]
- ]
- ],
+ OP,
+ string="+",
+ start=(1, 9),
+ end=(1, 10),
+ line="foo = 12 + 12 .",
+ ),
+ TokenInfo(
+ NUMBER,
+ string="12",
+ start=(1, 11),
+ end=(1, 13),
+ line="foo = 12 + 12 .",
+ ),
+ ]
],
- ]
- ]
- ])
+ ],
+ ],
+ )
def test_named_lookahead_error(self) -> None:
grammar = """
@@ -533,11 +730,14 @@ class TestPegen(unittest.TestCase):
"""
parser_class = make_parser(grammar)
node = parse_string("(1)", parser_class)
- self.assertEqual(node, [
- TokenInfo(OP, string="(", start=(1, 0), end=(1, 1), line="(1)"),
- [TokenInfo(NUMBER, string="1", start=(1, 1), end=(1, 2), line="(1)")],
- TokenInfo(OP, string=")", start=(1, 2), end=(1, 3), line="(1)"),
- ])
+ self.assertEqual(
+ node,
+ [
+ TokenInfo(OP, string="(", start=(1, 0), end=(1, 1), line="(1)"),
+ TokenInfo(NUMBER, string="1", start=(1, 1), end=(1, 2), line="(1)"),
+ TokenInfo(OP, string=")", start=(1, 2), end=(1, 3), line="(1)"),
+ ],
+ )
def test_dangling_reference(self) -> None:
grammar = """
@@ -589,6 +789,124 @@ class TestPegen(unittest.TestCase):
with self.assertRaisesRegex(GrammarError, "cannot start with underscore: '_x'"):
parser_class = make_parser(grammar)
+ def test_soft_keyword(self) -> None:
+ grammar = """
+ start:
+ | "number" n=NUMBER { eval(n.string) }
+ | "string" n=STRING { n.string }
+ | SOFT_KEYWORD l=NAME n=(NUMBER | NAME | STRING) { f"{l.string} = {n.string}"}
+ """
+ parser_class = make_parser(grammar)
+ self.assertEqual(parse_string("number 1", parser_class, verbose=True), 1)
+ self.assertEqual(parse_string("string 'b'", parser_class, verbose=True), "'b'")
+ self.assertEqual(
+ parse_string("number test 1", parser_class, verbose=True), "test = 1"
+ )
+ assert (
+ parse_string("string test 'b'", parser_class, verbose=True) == "test = 'b'"
+ )
+ with self.assertRaises(SyntaxError):
+ parse_string("test 1", parser_class, verbose=True)
+
+ def test_forced(self) -> None:
+ grammar = """
+ start: NAME &&':' | NAME
+ """
+ parser_class = make_parser(grammar)
+ self.assertTrue(parse_string("number :", parser_class, verbose=True))
+ with self.assertRaises(SyntaxError) as e:
+ parse_string("a", parser_class, verbose=True)
+
+ self.assertIn("expected ':'", str(e.exception))
+
+ def test_forced_with_group(self) -> None:
+ grammar = """
+ start: NAME &&(':' | ';') | NAME
+ """
+ parser_class = make_parser(grammar)
+ self.assertTrue(parse_string("number :", parser_class, verbose=True))
+ self.assertTrue(parse_string("number ;", parser_class, verbose=True))
+ with self.assertRaises(SyntaxError) as e:
+ parse_string("a", parser_class, verbose=True)
+ self.assertIn("expected (':' | ';')", e.exception.args[0])
+
+ def test_unreachable_explicit(self) -> None:
+ source = """
+ start: NAME { UNREACHABLE }
+ """
+ grammar = parse_string(source, GrammarParser)
+ out = io.StringIO()
+ genr = PythonParserGenerator(
+ grammar, out, unreachable_formatting="This is a test"
+ )
+ genr.generate("<string>")
+ self.assertIn("This is a test", out.getvalue())
+
+ def test_unreachable_implicit1(self) -> None:
+ source = """
+ start: NAME | invalid_input
+ invalid_input: NUMBER { None }
+ """
+ grammar = parse_string(source, GrammarParser)
+ out = io.StringIO()
+ genr = PythonParserGenerator(
+ grammar, out, unreachable_formatting="This is a test"
+ )
+ genr.generate("<string>")
+ self.assertIn("This is a test", out.getvalue())
+
+ def test_unreachable_implicit2(self) -> None:
+ source = """
+ start: NAME | '(' invalid_input ')'
+ invalid_input: NUMBER { None }
+ """
+ grammar = parse_string(source, GrammarParser)
+ out = io.StringIO()
+ genr = PythonParserGenerator(
+ grammar, out, unreachable_formatting="This is a test"
+ )
+ genr.generate("<string>")
+ self.assertIn("This is a test", out.getvalue())
+
+ def test_unreachable_implicit3(self) -> None:
+ source = """
+ start: NAME | invalid_input { None }
+ invalid_input: NUMBER
+ """
+ grammar = parse_string(source, GrammarParser)
+ out = io.StringIO()
+ genr = PythonParserGenerator(
+ grammar, out, unreachable_formatting="This is a test"
+ )
+ genr.generate("<string>")
+ self.assertNotIn("This is a test", out.getvalue())
+
+ def test_locations_in_alt_action_and_group(self) -> None:
+ grammar = """
+ start: t=term NEWLINE? $ { ast.Expression(t, LOCATIONS) }
+ term:
+ | l=term '*' r=factor { ast.BinOp(l, ast.Mult(), r, LOCATIONS) }
+ | l=term '/' r=factor { ast.BinOp(l, ast.Div(), r, LOCATIONS) }
+ | factor
+ factor:
+ | (
+ n=NAME { ast.Name(id=n.string, ctx=ast.Load(), LOCATIONS) } |
+ n=NUMBER { ast.Constant(value=ast.literal_eval(n.string), LOCATIONS) }
+ )
+ """
+ parser_class = make_parser(grammar)
+ source = "2*3\n"
+ o = ast.dump(parse_string(source, parser_class).body, include_attributes=True)
+ p = ast.dump(ast.parse(source).body[0].value, include_attributes=True).replace(
+ " kind=None,", ""
+ )
+ diff = "\n".join(
+ difflib.unified_diff(
+ o.split("\n"), p.split("\n"), "cpython", "python-pegen"
+ )
+ )
+ self.assertFalse(diff)
+
class TestGrammarVisitor:
class Visitor(GrammarVisitor):
diff --git a/Parser/parser.c b/Parser/parser.c
index 543827a..01082fa 100644
--- a/Parser/parser.c
+++ b/Parser/parser.c
@@ -1176,7 +1176,7 @@ statements_rule(Parser *p)
)
{
D(fprintf(stderr, "%*c+ statements[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "statement+"));
- _res = ( asdl_stmt_seq * ) _PyPegen_seq_flatten ( p , a );
+ _res = ( asdl_stmt_seq* ) _PyPegen_seq_flatten ( p , a );
if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1;
D(p->level--);
@@ -1217,7 +1217,7 @@ statement_rule(Parser *p)
)
{
D(fprintf(stderr, "%*c+ statement[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "compound_stmt"));
- _res = ( asdl_stmt_seq * ) _PyPegen_singleton_seq ( p , a );
+ _res = ( asdl_stmt_seq* ) _PyPegen_singleton_seq ( p , a );
if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1;
D(p->level--);
@@ -1294,7 +1294,7 @@ statement_newline_rule(Parser *p)
)
{
D(fprintf(stderr, "%*c+ statement_newline[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "compound_stmt NEWLINE"));
- _res = ( asdl_stmt_seq * ) _PyPegen_singleton_seq ( p , a );
+ _res = ( asdl_stmt_seq* ) _PyPegen_singleton_seq ( p , a );
if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1;
D(p->level--);
@@ -1346,7 +1346,7 @@ statement_newline_rule(Parser *p)
UNUSED(_end_lineno); // Only used by EXTRA macro
int _end_col_offset = _token->end_col_offset;
UNUSED(_end_col_offset); // Only used by EXTRA macro
- _res = ( asdl_stmt_seq * ) _PyPegen_singleton_seq ( p , CHECK ( stmt_ty , _PyAST_Pass ( EXTRA ) ) );
+ _res = ( asdl_stmt_seq* ) _PyPegen_singleton_seq ( p , CHECK ( stmt_ty , _PyAST_Pass ( EXTRA ) ) );
if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1;
D(p->level--);
@@ -1416,7 +1416,7 @@ simple_stmts_rule(Parser *p)
)
{
D(fprintf(stderr, "%*c+ simple_stmts[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "simple_stmt !';' NEWLINE"));
- _res = ( asdl_stmt_seq * ) _PyPegen_singleton_seq ( p , a );
+ _res = ( asdl_stmt_seq* ) _PyPegen_singleton_seq ( p , a );
if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1;
D(p->level--);
@@ -2403,7 +2403,7 @@ augassign_rule(Parser *p)
)
{
D(fprintf(stderr, "%*c+ augassign[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'@='"));
- _res = CHECK_VERSION ( AugOperator * , 5 , "The '@' operator is" , _PyPegen_augoperator ( p , MatMult ) );
+ _res = CHECK_VERSION ( AugOperator* , 5 , "The '@' operator is" , _PyPegen_augoperator ( p , MatMult ) );
if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1;
D(p->level--);
@@ -2841,7 +2841,7 @@ global_stmt_rule(Parser *p)
UNUSED(_end_lineno); // Only used by EXTRA macro
int _end_col_offset = _token->end_col_offset;
UNUSED(_end_col_offset); // Only used by EXTRA macro
- _res = _PyAST_Global ( CHECK ( asdl_identifier_seq * , _PyPegen_map_names_to_ids ( p , a ) ) , EXTRA );
+ _res = _PyAST_Global ( CHECK ( asdl_identifier_seq* , _PyPegen_map_names_to_ids ( p , a ) ) , EXTRA );
if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1;
D(p->level--);
@@ -2903,7 +2903,7 @@ nonlocal_stmt_rule(Parser *p)
UNUSED(_end_lineno); // Only used by EXTRA macro
int _end_col_offset = _token->end_col_offset;
UNUSED(_end_col_offset); // Only used by EXTRA macro
- _res = _PyAST_Nonlocal ( CHECK ( asdl_identifier_seq * , _PyPegen_map_names_to_ids ( p , a ) ) , EXTRA );
+ _res = _PyAST_Nonlocal ( CHECK ( asdl_identifier_seq* , _PyPegen_map_names_to_ids ( p , a ) ) , EXTRA );
if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1;
D(p->level--);
@@ -3460,7 +3460,7 @@ import_from_targets_rule(Parser *p)
UNUSED(_end_lineno); // Only used by EXTRA macro
int _end_col_offset = _token->end_col_offset;
UNUSED(_end_col_offset); // Only used by EXTRA macro
- _res = ( asdl_alias_seq * ) _PyPegen_singleton_seq ( p , CHECK ( alias_ty , _PyPegen_alias_for_star ( p , EXTRA ) ) );
+ _res = ( asdl_alias_seq* ) _PyPegen_singleton_seq ( p , CHECK ( alias_ty , _PyPegen_alias_for_star ( p , EXTRA ) ) );
if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1;
D(p->level--);
@@ -4649,7 +4649,7 @@ slash_with_default_rule(Parser *p)
)
{
D(fprintf(stderr, "%*c+ slash_with_default[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "param_no_default* param_with_default+ '/' ','"));
- _res = _PyPegen_slash_with_default ( p , ( asdl_arg_seq * ) a , b );
+ _res = _PyPegen_slash_with_default ( p , ( asdl_arg_seq* ) a , b );
if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1;
D(p->level--);
@@ -4681,7 +4681,7 @@ slash_with_default_rule(Parser *p)
)
{
D(fprintf(stderr, "%*c+ slash_with_default[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "param_no_default* param_with_default+ '/' &')'"));
- _res = _PyPegen_slash_with_default ( p , ( asdl_arg_seq * ) a , b );
+ _res = _PyPegen_slash_with_default ( p , ( asdl_arg_seq* ) a , b );
if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1;
D(p->level--);
@@ -5340,7 +5340,7 @@ if_stmt_rule(Parser *p)
UNUSED(_end_lineno); // Only used by EXTRA macro
int _end_col_offset = _token->end_col_offset;
UNUSED(_end_col_offset); // Only used by EXTRA macro
- _res = _PyAST_If ( a , b , CHECK ( asdl_stmt_seq * , _PyPegen_singleton_seq ( p , c ) ) , EXTRA );
+ _res = _PyAST_If ( a , b , CHECK ( asdl_stmt_seq* , _PyPegen_singleton_seq ( p , c ) ) , EXTRA );
if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1;
D(p->level--);
@@ -5478,7 +5478,7 @@ elif_stmt_rule(Parser *p)
UNUSED(_end_lineno); // Only used by EXTRA macro
int _end_col_offset = _token->end_col_offset;
UNUSED(_end_col_offset); // Only used by EXTRA macro
- _res = _PyAST_If ( a , b , CHECK ( asdl_stmt_seq * , _PyPegen_singleton_seq ( p , c ) ) , EXTRA );
+ _res = _PyAST_If ( a , b , CHECK ( asdl_stmt_seq* , _PyPegen_singleton_seq ( p , c ) ) , EXTRA );
if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1;
D(p->level--);
@@ -6756,7 +6756,7 @@ subject_expr_rule(Parser *p)
UNUSED(_end_lineno); // Only used by EXTRA macro
int _end_col_offset = _token->end_col_offset;
UNUSED(_end_col_offset); // Only used by EXTRA macro
- _res = _PyAST_Tuple ( CHECK ( asdl_expr_seq * , _PyPegen_seq_insert_in_front ( p , value , values ) ) , Load , EXTRA );
+ _res = _PyAST_Tuple ( CHECK ( asdl_expr_seq* , _PyPegen_seq_insert_in_front ( p , value , values ) ) , Load , EXTRA );
if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1;
D(p->level--);
@@ -9049,7 +9049,7 @@ mapping_pattern_rule(Parser *p)
UNUSED(_end_lineno); // Only used by EXTRA macro
int _end_col_offset = _token->end_col_offset;
UNUSED(_end_col_offset); // Only used by EXTRA macro
- _res = _PyAST_MatchMapping ( CHECK ( asdl_expr_seq * , _PyPegen_get_pattern_keys ( p , items ) ) , CHECK ( asdl_pattern_seq * , _PyPegen_get_patterns ( p , items ) ) , rest -> v . Name . id , EXTRA );
+ _res = _PyAST_MatchMapping ( CHECK ( asdl_expr_seq* , _PyPegen_get_pattern_keys ( p , items ) ) , CHECK ( asdl_pattern_seq* , _PyPegen_get_patterns ( p , items ) ) , rest -> v . Name . id , EXTRA );
if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1;
D(p->level--);
@@ -9092,7 +9092,7 @@ mapping_pattern_rule(Parser *p)
UNUSED(_end_lineno); // Only used by EXTRA macro
int _end_col_offset = _token->end_col_offset;
UNUSED(_end_col_offset); // Only used by EXTRA macro
- _res = _PyAST_MatchMapping ( CHECK ( asdl_expr_seq * , _PyPegen_get_pattern_keys ( p , items ) ) , CHECK ( asdl_pattern_seq * , _PyPegen_get_patterns ( p , items ) ) , NULL , EXTRA );
+ _res = _PyAST_MatchMapping ( CHECK ( asdl_expr_seq* , _PyPegen_get_pattern_keys ( p , items ) ) , CHECK ( asdl_pattern_seq* , _PyPegen_get_patterns ( p , items ) ) , NULL , EXTRA );
if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1;
D(p->level--);
@@ -9381,7 +9381,7 @@ class_pattern_rule(Parser *p)
UNUSED(_end_lineno); // Only used by EXTRA macro
int _end_col_offset = _token->end_col_offset;
UNUSED(_end_col_offset); // Only used by EXTRA macro
- _res = _PyAST_MatchClass ( cls , NULL , CHECK ( asdl_identifier_seq * , _PyPegen_map_names_to_ids ( p , CHECK ( asdl_expr_seq * , _PyPegen_get_pattern_keys ( p , keywords ) ) ) ) , CHECK ( asdl_pattern_seq * , _PyPegen_get_patterns ( p , keywords ) ) , EXTRA );
+ _res = _PyAST_MatchClass ( cls , NULL , CHECK ( asdl_identifier_seq* , _PyPegen_map_names_to_ids ( p , CHECK ( asdl_expr_seq* , _PyPegen_get_pattern_keys ( p , keywords ) ) ) ) , CHECK ( asdl_pattern_seq* , _PyPegen_get_patterns ( p , keywords ) ) , EXTRA );
if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1;
D(p->level--);
@@ -9433,7 +9433,7 @@ class_pattern_rule(Parser *p)
UNUSED(_end_lineno); // Only used by EXTRA macro
int _end_col_offset = _token->end_col_offset;
UNUSED(_end_col_offset); // Only used by EXTRA macro
- _res = _PyAST_MatchClass ( cls , patterns , CHECK ( asdl_identifier_seq * , _PyPegen_map_names_to_ids ( p , CHECK ( asdl_expr_seq * , _PyPegen_get_pattern_keys ( p , keywords ) ) ) ) , CHECK ( asdl_pattern_seq * , _PyPegen_get_patterns ( p , keywords ) ) , EXTRA );
+ _res = _PyAST_MatchClass ( cls , patterns , CHECK ( asdl_identifier_seq* , _PyPegen_map_names_to_ids ( p , CHECK ( asdl_expr_seq* , _PyPegen_get_pattern_keys ( p , keywords ) ) ) ) , CHECK ( asdl_pattern_seq* , _PyPegen_get_patterns ( p , keywords ) ) , EXTRA );
if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1;
D(p->level--);
@@ -9642,7 +9642,7 @@ expressions_rule(Parser *p)
UNUSED(_end_lineno); // Only used by EXTRA macro
int _end_col_offset = _token->end_col_offset;
UNUSED(_end_col_offset); // Only used by EXTRA macro
- _res = _PyAST_Tuple ( CHECK ( asdl_expr_seq * , _PyPegen_seq_insert_in_front ( p , a , b ) ) , Load , EXTRA );
+ _res = _PyAST_Tuple ( CHECK ( asdl_expr_seq* , _PyPegen_seq_insert_in_front ( p , a , b ) ) , Load , EXTRA );
if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1;
D(p->level--);
@@ -9678,7 +9678,7 @@ expressions_rule(Parser *p)
UNUSED(_end_lineno); // Only used by EXTRA macro
int _end_col_offset = _token->end_col_offset;
UNUSED(_end_col_offset); // Only used by EXTRA macro
- _res = _PyAST_Tuple ( CHECK ( asdl_expr_seq * , _PyPegen_singleton_seq ( p , a ) ) , Load , EXTRA );
+ _res = _PyAST_Tuple ( CHECK ( asdl_expr_seq* , _PyPegen_singleton_seq ( p , a ) ) , Load , EXTRA );
if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1;
D(p->level--);
@@ -10004,7 +10004,7 @@ star_expressions_rule(Parser *p)
UNUSED(_end_lineno); // Only used by EXTRA macro
int _end_col_offset = _token->end_col_offset;
UNUSED(_end_col_offset); // Only used by EXTRA macro
- _res = _PyAST_Tuple ( CHECK ( asdl_expr_seq * , _PyPegen_seq_insert_in_front ( p , a , b ) ) , Load , EXTRA );
+ _res = _PyAST_Tuple ( CHECK ( asdl_expr_seq* , _PyPegen_seq_insert_in_front ( p , a , b ) ) , Load , EXTRA );
if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1;
D(p->level--);
@@ -10040,7 +10040,7 @@ star_expressions_rule(Parser *p)
UNUSED(_end_lineno); // Only used by EXTRA macro
int _end_col_offset = _token->end_col_offset;
UNUSED(_end_col_offset); // Only used by EXTRA macro
- _res = _PyAST_Tuple ( CHECK ( asdl_expr_seq * , _PyPegen_singleton_seq ( p , a ) ) , Load , EXTRA );
+ _res = _PyAST_Tuple ( CHECK ( asdl_expr_seq* , _PyPegen_singleton_seq ( p , a ) ) , Load , EXTRA );
if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1;
D(p->level--);
@@ -10485,7 +10485,7 @@ disjunction_rule(Parser *p)
UNUSED(_end_lineno); // Only used by EXTRA macro
int _end_col_offset = _token->end_col_offset;
UNUSED(_end_col_offset); // Only used by EXTRA macro
- _res = _PyAST_BoolOp ( Or , CHECK ( asdl_expr_seq * , _PyPegen_seq_insert_in_front ( p , a , b ) ) , EXTRA );
+ _res = _PyAST_BoolOp ( Or , CHECK ( asdl_expr_seq* , _PyPegen_seq_insert_in_front ( p , a , b ) ) , EXTRA );
if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1;
D(p->level--);
@@ -10571,7 +10571,7 @@ conjunction_rule(Parser *p)
UNUSED(_end_lineno); // Only used by EXTRA macro
int _end_col_offset = _token->end_col_offset;
UNUSED(_end_col_offset); // Only used by EXTRA macro
- _res = _PyAST_BoolOp ( And , CHECK ( asdl_expr_seq * , _PyPegen_seq_insert_in_front ( p , a , b ) ) , EXTRA );
+ _res = _PyAST_BoolOp ( And , CHECK ( asdl_expr_seq* , _PyPegen_seq_insert_in_front ( p , a , b ) ) , EXTRA );
if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1;
D(p->level--);
@@ -10739,7 +10739,7 @@ comparison_rule(Parser *p)
UNUSED(_end_lineno); // Only used by EXTRA macro
int _end_col_offset = _token->end_col_offset;
UNUSED(_end_col_offset); // Only used by EXTRA macro
- _res = _PyAST_Compare ( a , CHECK ( asdl_int_seq * , _PyPegen_get_cmpops ( p , b ) ) , CHECK ( asdl_expr_seq * , _PyPegen_get_exprs ( p , b ) ) , EXTRA );
+ _res = _PyAST_Compare ( a , CHECK ( asdl_int_seq* , _PyPegen_get_cmpops ( p , b ) ) , CHECK ( asdl_expr_seq* , _PyPegen_get_exprs ( p , b ) ) , EXTRA );
if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1;
D(p->level--);
@@ -12837,7 +12837,7 @@ primary_raw(Parser *p)
UNUSED(_end_lineno); // Only used by EXTRA macro
int _end_col_offset = _token->end_col_offset;
UNUSED(_end_col_offset); // Only used by EXTRA macro
- _res = _PyAST_Call ( a , CHECK ( asdl_expr_seq * , ( asdl_expr_seq * ) _PyPegen_singleton_seq ( p , b ) ) , NULL , EXTRA );
+ _res = _PyAST_Call ( a , CHECK ( asdl_expr_seq* , ( asdl_expr_seq* ) _PyPegen_singleton_seq ( p , b ) ) , NULL , EXTRA );
if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1;
D(p->level--);
@@ -13896,7 +13896,7 @@ lambda_slash_with_default_rule(Parser *p)
)
{
D(fprintf(stderr, "%*c+ lambda_slash_with_default[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "lambda_param_no_default* lambda_param_with_default+ '/' ','"));
- _res = _PyPegen_slash_with_default ( p , ( asdl_arg_seq * ) a , b );
+ _res = _PyPegen_slash_with_default ( p , ( asdl_arg_seq* ) a , b );
if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1;
D(p->level--);
@@ -13928,7 +13928,7 @@ lambda_slash_with_default_rule(Parser *p)
)
{
D(fprintf(stderr, "%*c+ lambda_slash_with_default[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "lambda_param_no_default* lambda_param_with_default+ '/' &':'"));
- _res = _PyPegen_slash_with_default ( p , ( asdl_arg_seq * ) a , b );
+ _res = _PyPegen_slash_with_default ( p , ( asdl_arg_seq* ) a , b );
if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1;
D(p->level--);
@@ -14689,7 +14689,7 @@ dict_rule(Parser *p)
UNUSED(_end_lineno); // Only used by EXTRA macro
int _end_col_offset = _token->end_col_offset;
UNUSED(_end_col_offset); // Only used by EXTRA macro
- _res = _PyAST_Dict ( CHECK ( asdl_expr_seq * , _PyPegen_get_keys ( p , a ) ) , CHECK ( asdl_expr_seq * , _PyPegen_get_values ( p , a ) ) , EXTRA );
+ _res = _PyAST_Dict ( CHECK ( asdl_expr_seq* , _PyPegen_get_keys ( p , a ) ) , CHECK ( asdl_expr_seq* , _PyPegen_get_values ( p , a ) ) , EXTRA );
if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1;
D(p->level--);
@@ -15556,7 +15556,7 @@ args_rule(Parser *p)
UNUSED(_end_lineno); // Only used by EXTRA macro
int _end_col_offset = _token->end_col_offset;
UNUSED(_end_col_offset); // Only used by EXTRA macro
- _res = _PyAST_Call ( _PyPegen_dummy_name ( p ) , CHECK_NULL_ALLOWED ( asdl_expr_seq * , _PyPegen_seq_extract_starred_exprs ( p , a ) ) , CHECK_NULL_ALLOWED ( asdl_keyword_seq * , _PyPegen_seq_delete_starred_exprs ( p , a ) ) , EXTRA );
+ _res = _PyAST_Call ( _PyPegen_dummy_name ( p ) , CHECK_NULL_ALLOWED ( asdl_expr_seq* , _PyPegen_seq_extract_starred_exprs ( p , a ) ) , CHECK_NULL_ALLOWED ( asdl_keyword_seq* , _PyPegen_seq_delete_starred_exprs ( p , a ) ) , EXTRA );
if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1;
D(p->level--);
@@ -16026,7 +16026,7 @@ star_targets_rule(Parser *p)
UNUSED(_end_lineno); // Only used by EXTRA macro
int _end_col_offset = _token->end_col_offset;
UNUSED(_end_col_offset); // Only used by EXTRA macro
- _res = _PyAST_Tuple ( CHECK ( asdl_expr_seq * , _PyPegen_seq_insert_in_front ( p , a , b ) ) , Store , EXTRA );
+ _res = _PyAST_Tuple ( CHECK ( asdl_expr_seq* , _PyPegen_seq_insert_in_front ( p , a , b ) ) , Store , EXTRA );
if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1;
D(p->level--);
@@ -16119,7 +16119,7 @@ star_targets_tuple_seq_rule(Parser *p)
)
{
D(fprintf(stderr, "%*c+ star_targets_tuple_seq[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "star_target ((',' star_target))+ ','?"));
- _res = ( asdl_expr_seq * ) _PyPegen_seq_insert_in_front ( p , a , b );
+ _res = ( asdl_expr_seq* ) _PyPegen_seq_insert_in_front ( p , a , b );
if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1;
D(p->level--);
@@ -16146,7 +16146,7 @@ star_targets_tuple_seq_rule(Parser *p)
)
{
D(fprintf(stderr, "%*c+ star_targets_tuple_seq[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "star_target ','"));
- _res = ( asdl_expr_seq * ) _PyPegen_singleton_seq ( p , a );
+ _res = ( asdl_expr_seq* ) _PyPegen_singleton_seq ( p , a );
if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1;
D(p->level--);
@@ -16923,7 +16923,7 @@ t_primary_raw(Parser *p)
UNUSED(_end_lineno); // Only used by EXTRA macro
int _end_col_offset = _token->end_col_offset;
UNUSED(_end_col_offset); // Only used by EXTRA macro
- _res = _PyAST_Call ( a , CHECK ( asdl_expr_seq * , ( asdl_expr_seq * ) _PyPegen_singleton_seq ( p , b ) ) , NULL , EXTRA );
+ _res = _PyAST_Call ( a , CHECK ( asdl_expr_seq* , ( asdl_expr_seq* ) _PyPegen_singleton_seq ( p , b ) ) , NULL , EXTRA );
if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1;
D(p->level--);
@@ -17474,7 +17474,7 @@ type_expressions_rule(Parser *p)
)
{
D(fprintf(stderr, "%*c+ type_expressions[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "','.expression+ ',' '*' expression ',' '**' expression"));
- _res = ( asdl_expr_seq * ) _PyPegen_seq_append_to_end ( p , CHECK ( asdl_seq * , _PyPegen_seq_append_to_end ( p , a , b ) ) , c );
+ _res = ( asdl_expr_seq* ) _PyPegen_seq_append_to_end ( p , CHECK ( asdl_seq* , _PyPegen_seq_append_to_end ( p , a , b ) ) , c );
if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1;
D(p->level--);
@@ -17507,7 +17507,7 @@ type_expressions_rule(Parser *p)
)
{
D(fprintf(stderr, "%*c+ type_expressions[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "','.expression+ ',' '*' expression"));
- _res = ( asdl_expr_seq * ) _PyPegen_seq_append_to_end ( p , a , b );
+ _res = ( asdl_expr_seq* ) _PyPegen_seq_append_to_end ( p , a , b );
if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1;
D(p->level--);
@@ -17540,7 +17540,7 @@ type_expressions_rule(Parser *p)
)
{
D(fprintf(stderr, "%*c+ type_expressions[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "','.expression+ ',' '**' expression"));
- _res = ( asdl_expr_seq * ) _PyPegen_seq_append_to_end ( p , a , b );
+ _res = ( asdl_expr_seq* ) _PyPegen_seq_append_to_end ( p , a , b );
if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1;
D(p->level--);
@@ -17576,7 +17576,7 @@ type_expressions_rule(Parser *p)
)
{
D(fprintf(stderr, "%*c+ type_expressions[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'*' expression ',' '**' expression"));
- _res = ( asdl_expr_seq * ) _PyPegen_seq_append_to_end ( p , CHECK ( asdl_seq * , _PyPegen_singleton_seq ( p , a ) ) , b );
+ _res = ( asdl_expr_seq* ) _PyPegen_seq_append_to_end ( p , CHECK ( asdl_seq* , _PyPegen_singleton_seq ( p , a ) ) , b );
if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1;
D(p->level--);
@@ -17603,7 +17603,7 @@ type_expressions_rule(Parser *p)
)
{
D(fprintf(stderr, "%*c+ type_expressions[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'*' expression"));
- _res = ( asdl_expr_seq * ) _PyPegen_singleton_seq ( p , a );
+ _res = ( asdl_expr_seq* ) _PyPegen_singleton_seq ( p , a );
if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1;
D(p->level--);
@@ -17630,7 +17630,7 @@ type_expressions_rule(Parser *p)
)
{
D(fprintf(stderr, "%*c+ type_expressions[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'**' expression"));
- _res = ( asdl_expr_seq * ) _PyPegen_singleton_seq ( p , a );
+ _res = ( asdl_expr_seq* ) _PyPegen_singleton_seq ( p , a );
if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1;
D(p->level--);
@@ -20149,7 +20149,7 @@ invalid_match_stmt_rule(Parser *p)
)
{
D(fprintf(stderr, "%*c+ invalid_match_stmt[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "\"match\" subject_expr !':'"));
- _res = CHECK_VERSION ( void * , 10 , "Pattern matching is" , RAISE_SYNTAX_ERROR ( "expected ':'" ) );
+ _res = CHECK_VERSION ( void* , 10 , "Pattern matching is" , RAISE_SYNTAX_ERROR ( "expected ':'" ) );
if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1;
D(p->level--);
diff --git a/Parser/pegen.c b/Parser/pegen.c
index f697f00..c77c534 100644
--- a/Parser/pegen.c
+++ b/Parser/pegen.c
@@ -897,6 +897,19 @@ _PyPegen_expect_token(Parser *p, int type)
return t;
}
+void*
+_PyPegen_expect_forced_result(Parser *p, void* result, const char* expected) {
+
+ if (p->error_indicator == 1) {
+ return NULL;
+ }
+ if (result == NULL) {
+ RAISE_SYNTAX_ERROR("expected (%s)", expected);
+ return NULL;
+ }
+ return result;
+}
+
Token *
_PyPegen_expect_forced_token(Parser *p, int type, const char* expected) {
diff --git a/Parser/pegen.h b/Parser/pegen.h
index f4b6876..57d1177 100644
--- a/Parser/pegen.h
+++ b/Parser/pegen.h
@@ -130,6 +130,7 @@ int _PyPegen_lookahead_with_string(int , expr_ty (func)(Parser *, const char*),
int _PyPegen_lookahead(int, void *(func)(Parser *), Parser *);
Token *_PyPegen_expect_token(Parser *p, int type);
+void* _PyPegen_expect_forced_result(Parser *p, void* result, const char* expected);
Token *_PyPegen_expect_forced_token(Parser *p, int type, const char* expected);
expr_ty _PyPegen_expect_soft_keyword(Parser *p, const char *keyword);
expr_ty _PyPegen_soft_keyword_token(Parser *p);
diff --git a/Tools/peg_generator/mypy.ini b/Tools/peg_generator/mypy.ini
index 80d5c05..1732372 100644
--- a/Tools/peg_generator/mypy.ini
+++ b/Tools/peg_generator/mypy.ini
@@ -1,5 +1,5 @@
[mypy]
-files = pegen, scripts
+files = pegen
follow_imports = error
no_implicit_optional = True
diff --git a/Tools/peg_generator/pegen/__main__.py b/Tools/peg_generator/pegen/__main__.py
index c0f3b68..a12fe78 100755
--- a/Tools/peg_generator/pegen/__main__.py
+++ b/Tools/peg_generator/pegen/__main__.py
@@ -100,7 +100,9 @@ c_parser.add_argument(
"--optimized", action="store_true", help="Compile the extension in optimized mode"
)
c_parser.add_argument(
- "--skip-actions", action="store_true", help="Suppress code emission for rule actions",
+ "--skip-actions",
+ action="store_true",
+ help="Suppress code emission for rule actions",
)
python_parser = subparsers.add_parser("python", help="Generate Python code")
@@ -114,7 +116,9 @@ python_parser.add_argument(
help="Where to write the generated parser",
)
python_parser.add_argument(
- "--skip-actions", action="store_true", help="Suppress code emission for rule actions",
+ "--skip-actions",
+ action="store_true",
+ help="Suppress code emission for rule actions",
)
diff --git a/Tools/peg_generator/pegen/ast_dump.py b/Tools/peg_generator/pegen/ast_dump.py
index 93dfbfd..2c57d09 100644
--- a/Tools/peg_generator/pegen/ast_dump.py
+++ b/Tools/peg_generator/pegen/ast_dump.py
@@ -6,9 +6,17 @@ always fail. We rely on string comparison of the base classes instead.
TODO: Remove the above-described hack.
"""
+from typing import Any, Optional, Tuple
-def ast_dump(node, annotate_fields=True, include_attributes=False, *, indent=None):
- def _format(node, level=0):
+
+def ast_dump(
+ node: Any,
+ annotate_fields: bool = True,
+ include_attributes: bool = False,
+ *,
+ indent: Optional[str] = None,
+) -> str:
+ def _format(node: Any, level: int = 0) -> Tuple[str, bool]:
if indent is not None:
level += 1
prefix = "\n" + indent * level
diff --git a/Tools/peg_generator/pegen/build.py b/Tools/peg_generator/pegen/build.py
index b80fc85..6f0a091 100644
--- a/Tools/peg_generator/pegen/build.py
+++ b/Tools/peg_generator/pegen/build.py
@@ -58,7 +58,7 @@ def compile_c_extension(
extra_compile_args = get_extra_flags("CFLAGS", "PY_CFLAGS_NODIST")
extra_compile_args.append("-DPy_BUILD_CORE_MODULE")
# Define _Py_TEST_PEGEN to not call PyAST_Validate() in Parser/pegen.c
- extra_compile_args.append('-D_Py_TEST_PEGEN')
+ extra_compile_args.append("-D_Py_TEST_PEGEN")
extra_link_args = get_extra_flags("LDFLAGS", "PY_LDFLAGS_NODIST")
if keep_asserts:
extra_compile_args.append("-UNDEBUG")
@@ -175,7 +175,10 @@ def build_c_generator(
def build_python_generator(
- grammar: Grammar, grammar_file: str, output_file: str, skip_actions: bool = False,
+ grammar: Grammar,
+ grammar_file: str,
+ output_file: str,
+ skip_actions: bool = False,
) -> ParserGenerator:
with open(output_file, "w") as file:
gen: ParserGenerator = PythonParserGenerator(grammar, file) # TODO: skip_actions
@@ -246,5 +249,10 @@ def build_python_parser_and_generator(
skip_actions (bool, optional): Whether to pretend no rule has any actions.
"""
grammar, parser, tokenizer = build_parser(grammar_file, verbose_tokenizer, verbose_parser)
- gen = build_python_generator(grammar, grammar_file, output_file, skip_actions=skip_actions,)
+ gen = build_python_generator(
+ grammar,
+ grammar_file,
+ output_file,
+ skip_actions=skip_actions,
+ )
return grammar, parser, tokenizer, gen
diff --git a/Tools/peg_generator/pegen/c_generator.py b/Tools/peg_generator/pegen/c_generator.py
index 7a2edbb..e928fd3 100644
--- a/Tools/peg_generator/pegen/c_generator.py
+++ b/Tools/peg_generator/pegen/c_generator.py
@@ -12,6 +12,7 @@ from pegen.grammar import (
Gather,
GrammarVisitor,
Group,
+ Leaf,
Lookahead,
NamedItem,
NameLeaf,
@@ -91,7 +92,16 @@ class FunctionCall:
parts.append(", 1")
if self.assigned_variable:
if self.assigned_variable_type:
- parts = ["(", self.assigned_variable, " = ", '(', self.assigned_variable_type, ')', *parts, ")"]
+ parts = [
+ "(",
+ self.assigned_variable,
+ " = ",
+ "(",
+ self.assigned_variable_type,
+ ")",
+ *parts,
+ ")",
+ ]
else:
parts = ["(", self.assigned_variable, " = ", *parts, ")"]
if self.comment:
@@ -256,9 +266,10 @@ class CCallMakerVisitor(GrammarVisitor):
def visit_Forced(self, node: Forced) -> FunctionCall:
call = self.generate_call(node.node)
- if call.nodetype == NodeTypes.GENERIC_TOKEN:
+ if isinstance(node.node, Leaf):
+ assert isinstance(node.node, Leaf)
val = ast.literal_eval(node.node.value)
- assert val in self.exact_tokens, f"{node.value} is not a known literal"
+ assert val in self.exact_tokens, f"{node.node.value} is not a known literal"
type = self.exact_tokens[val]
return FunctionCall(
assigned_variable="_literal",
@@ -268,9 +279,19 @@ class CCallMakerVisitor(GrammarVisitor):
return_type="Token *",
comment=f"forced_token='{val}'",
)
+ if isinstance(node.node, Group):
+ call = self.visit(node.node.rhs)
+ call.assigned_variable = None
+ call.comment = None
+ return FunctionCall(
+ assigned_variable="_literal",
+ function=f"_PyPegen_expect_forced_result",
+ arguments=["p", str(call), f'"{node.node.rhs!s}"'],
+ return_type="void *",
+ comment=f"forced_token=({node.node.rhs!s})",
+ )
else:
- raise NotImplementedError(
- f"Forced tokens don't work with {call.nodetype} tokens")
+ raise NotImplementedError(f"Forced tokens don't work with {node.node} nodes")
def visit_Opt(self, node: Opt) -> FunctionCall:
call = self.generate_call(node.node)
@@ -347,7 +368,7 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
debug: bool = False,
skip_actions: bool = False,
):
- super().__init__(grammar, tokens, file)
+ super().__init__(grammar, set(tokens.values()), file)
self.callmakervisitor: CCallMakerVisitor = CCallMakerVisitor(
self, exact_tokens, non_exact_tokens
)
@@ -386,7 +407,11 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
self.print(f"goto {goto_target};")
self.print(f"}}")
- def out_of_memory_return(self, expr: str, cleanup_code: Optional[str] = None,) -> None:
+ def out_of_memory_return(
+ self,
+ expr: str,
+ cleanup_code: Optional[str] = None,
+ ) -> None:
self.print(f"if ({expr}) {{")
with self.indent():
if cleanup_code is not None:
@@ -568,7 +593,10 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
if any(alt.action and "EXTRA" in alt.action for alt in rhs.alts):
self._set_up_token_start_metadata_extraction()
self.visit(
- rhs, is_loop=False, is_gather=node.is_gather(), rulename=node.name,
+ rhs,
+ is_loop=False,
+ is_gather=node.is_gather(),
+ rulename=node.name,
)
if self.debug:
self.print(f'D(fprintf(stderr, "Fail at %d: {node.name}\\n", p->mark));')
@@ -601,7 +629,10 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
if any(alt.action and "EXTRA" in alt.action for alt in rhs.alts):
self._set_up_token_start_metadata_extraction()
self.visit(
- rhs, is_loop=True, is_gather=node.is_gather(), rulename=node.name,
+ rhs,
+ is_loop=True,
+ is_gather=node.is_gather(),
+ rulename=node.name,
)
if is_repeat1:
self.print("if (_n == 0 || p->error_indicator) {")
@@ -771,7 +802,7 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
def visit_Alt(
self, node: Alt, is_loop: bool, is_gather: bool, rulename: Optional[str]
) -> None:
- if len(node.items) == 1 and str(node.items[0]).startswith('invalid_'):
+ if len(node.items) == 1 and str(node.items[0]).startswith("invalid_"):
self.print(f"if (p->call_invalid_rules) {{ // {node}")
else:
self.print(f"{{ // {node}")
@@ -791,7 +822,7 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
if v == "_cut_var":
v += " = 0" # cut_var must be initialized
self.print(f"{var_type}{v};")
- if v.startswith("_opt_var"):
+ if v and v.startswith("_opt_var"):
self.print(f"UNUSED({v}); // Silence compiler warnings")
with self.local_variable_context():
diff --git a/Tools/peg_generator/pegen/first_sets.py b/Tools/peg_generator/pegen/first_sets.py
index 71be5a2..50ced22 100755
--- a/Tools/peg_generator/pegen/first_sets.py
+++ b/Tools/peg_generator/pegen/first_sets.py
@@ -29,7 +29,8 @@ from pegen.grammar import (
)
argparser = argparse.ArgumentParser(
- prog="calculate_first_sets", description="Calculate the first sets of a grammar",
+ prog="calculate_first_sets",
+ description="Calculate the first sets of a grammar",
)
argparser.add_argument("grammar_file", help="The grammar file")
diff --git a/Tools/peg_generator/pegen/grammar_parser.py b/Tools/peg_generator/pegen/grammar_parser.py
index 70fa5b0..6e9f7d3 100644
--- a/Tools/peg_generator/pegen/grammar_parser.py
+++ b/Tools/peg_generator/pegen/grammar_parser.py
@@ -2,7 +2,10 @@
# @generated by pegen from ./Tools/peg_generator/pegen/metagrammar.gram
import ast
-from typing import Optional, Any
+import sys
+import tokenize
+
+from typing import Any, Optional
from pegen.parser import memoize, memoize_left_rec, logger, Parser
from ast import literal_eval
@@ -35,83 +38,71 @@ from pegen.grammar import (
StringLeaf,
)
+# Keywords and soft keywords are listed at the end of the parser definition.
class GeneratedParser(Parser):
@memoize
def start(self) -> Optional[Grammar]:
# start: grammar $
- mark = self.mark()
- cut = False
+ mark = self._mark()
if (
(grammar := self.grammar())
and
- (endmarker := self.expect('ENDMARKER'))
+ (_endmarker := self.expect('ENDMARKER'))
):
return grammar
- self.reset(mark)
- if cut: return None
+ self._reset(mark)
return None
@memoize
def grammar(self) -> Optional[Grammar]:
# grammar: metas rules | rules
- mark = self.mark()
- cut = False
+ mark = self._mark()
if (
(metas := self.metas())
and
(rules := self.rules())
):
return Grammar ( rules , metas )
- self.reset(mark)
- if cut: return None
- cut = False
+ self._reset(mark)
if (
(rules := self.rules())
):
- return Grammar ( rules , [ ] )
- self.reset(mark)
- if cut: return None
+ return Grammar ( rules , [] )
+ self._reset(mark)
return None
@memoize
def metas(self) -> Optional[MetaList]:
# metas: meta metas | meta
- mark = self.mark()
- cut = False
+ mark = self._mark()
if (
(meta := self.meta())
and
(metas := self.metas())
):
- return [ meta ] + metas
- self.reset(mark)
- if cut: return None
- cut = False
+ return [meta] + metas
+ self._reset(mark)
if (
(meta := self.meta())
):
- return [ meta ]
- self.reset(mark)
- if cut: return None
+ return [meta]
+ self._reset(mark)
return None
@memoize
def meta(self) -> Optional[MetaTuple]:
# meta: "@" NAME NEWLINE | "@" NAME NAME NEWLINE | "@" NAME STRING NEWLINE
- mark = self.mark()
- cut = False
+ mark = self._mark()
if (
(literal := self.expect("@"))
and
(name := self.name())
and
- (newline := self.expect('NEWLINE'))
+ (_newline := self.expect('NEWLINE'))
):
return ( name . string , None )
- self.reset(mark)
- if cut: return None
- cut = False
+ self._reset(mark)
if (
(literal := self.expect("@"))
and
@@ -119,12 +110,10 @@ class GeneratedParser(Parser):
and
(b := self.name())
and
- (newline := self.expect('NEWLINE'))
+ (_newline := self.expect('NEWLINE'))
):
return ( a . string , b . string )
- self.reset(mark)
- if cut: return None
- cut = False
+ self._reset(mark)
if (
(literal := self.expect("@"))
and
@@ -132,40 +121,34 @@ class GeneratedParser(Parser):
and
(string := self.string())
and
- (newline := self.expect('NEWLINE'))
+ (_newline := self.expect('NEWLINE'))
):
return ( name . string , literal_eval ( string . string ) )
- self.reset(mark)
- if cut: return None
+ self._reset(mark)
return None
@memoize
def rules(self) -> Optional[RuleList]:
# rules: rule rules | rule
- mark = self.mark()
- cut = False
+ mark = self._mark()
if (
(rule := self.rule())
and
(rules := self.rules())
):
- return [ rule ] + rules
- self.reset(mark)
- if cut: return None
- cut = False
+ return [rule] + rules
+ self._reset(mark)
if (
(rule := self.rule())
):
- return [ rule ]
- self.reset(mark)
- if cut: return None
+ return [rule]
+ self._reset(mark)
return None
@memoize
def rule(self) -> Optional[Rule]:
# rule: rulename memoflag? ":" alts NEWLINE INDENT more_alts DEDENT | rulename memoflag? ":" NEWLINE INDENT more_alts DEDENT | rulename memoflag? ":" alts NEWLINE
- mark = self.mark()
- cut = False
+ mark = self._mark()
if (
(rulename := self.rulename())
and
@@ -175,18 +158,16 @@ class GeneratedParser(Parser):
and
(alts := self.alts())
and
- (newline := self.expect('NEWLINE'))
+ (_newline := self.expect('NEWLINE'))
and
- (indent := self.expect('INDENT'))
+ (_indent := self.expect('INDENT'))
and
(more_alts := self.more_alts())
and
- (dedent := self.expect('DEDENT'))
+ (_dedent := self.expect('DEDENT'))
):
- return Rule ( rulename [ 0 ] , rulename [ 1 ] , Rhs ( alts . alts + more_alts . alts ) , memo = opt )
- self.reset(mark)
- if cut: return None
- cut = False
+ return Rule ( rulename [0] , rulename [1] , Rhs ( alts . alts + more_alts . alts ) , memo = opt )
+ self._reset(mark)
if (
(rulename := self.rulename())
and
@@ -194,18 +175,16 @@ class GeneratedParser(Parser):
and
(literal := self.expect(":"))
and
- (newline := self.expect('NEWLINE'))
+ (_newline := self.expect('NEWLINE'))
and
- (indent := self.expect('INDENT'))
+ (_indent := self.expect('INDENT'))
and
(more_alts := self.more_alts())
and
- (dedent := self.expect('DEDENT'))
+ (_dedent := self.expect('DEDENT'))
):
- return Rule ( rulename [ 0 ] , rulename [ 1 ] , more_alts , memo = opt )
- self.reset(mark)
- if cut: return None
- cut = False
+ return Rule ( rulename [0] , rulename [1] , more_alts , memo = opt )
+ self._reset(mark)
if (
(rulename := self.rulename())
and
@@ -215,76 +194,49 @@ class GeneratedParser(Parser):
and
(alts := self.alts())
and
- (newline := self.expect('NEWLINE'))
+ (_newline := self.expect('NEWLINE'))
):
- return Rule ( rulename [ 0 ] , rulename [ 1 ] , alts , memo = opt )
- self.reset(mark)
- if cut: return None
+ return Rule ( rulename [0] , rulename [1] , alts , memo = opt )
+ self._reset(mark)
return None
@memoize
def rulename(self) -> Optional[RuleName]:
- # rulename: NAME '[' NAME '*' ']' | NAME '[' NAME ']' | NAME
- mark = self.mark()
- cut = False
- if (
- (name := self.name())
- and
- (literal := self.expect('['))
- and
- (type := self.name())
- and
- (literal_1 := self.expect('*'))
- and
- (literal_2 := self.expect(']'))
- ):
- return ( name . string , type . string + "*" )
- self.reset(mark)
- if cut: return None
- cut = False
+ # rulename: NAME annotation | NAME
+ mark = self._mark()
if (
(name := self.name())
and
- (literal := self.expect('['))
- and
- (type := self.name())
- and
- (literal_1 := self.expect(']'))
+ (annotation := self.annotation())
):
- return ( name . string , type . string )
- self.reset(mark)
- if cut: return None
- cut = False
+ return ( name . string , annotation )
+ self._reset(mark)
if (
(name := self.name())
):
return ( name . string , None )
- self.reset(mark)
- if cut: return None
+ self._reset(mark)
return None
@memoize
def memoflag(self) -> Optional[str]:
- # memoflag: '(' 'memo' ')'
- mark = self.mark()
- cut = False
+ # memoflag: '(' "memo" ')'
+ mark = self._mark()
if (
(literal := self.expect('('))
and
- (literal_1 := self.expect('memo'))
+ (literal_1 := self.expect("memo"))
and
(literal_2 := self.expect(')'))
):
return "memo"
- self.reset(mark)
- if cut: return None
+ self._reset(mark)
return None
@memoize
def alts(self) -> Optional[Rhs]:
# alts: alt "|" alts | alt
- mark = self.mark()
- cut = False
+ mark = self._mark()
if (
(alt := self.alt())
and
@@ -292,53 +244,45 @@ class GeneratedParser(Parser):
and
(alts := self.alts())
):
- return Rhs ( [ alt ] + alts . alts )
- self.reset(mark)
- if cut: return None
- cut = False
+ return Rhs ( [alt] + alts . alts )
+ self._reset(mark)
if (
(alt := self.alt())
):
- return Rhs ( [ alt ] )
- self.reset(mark)
- if cut: return None
+ return Rhs ( [alt] )
+ self._reset(mark)
return None
@memoize
def more_alts(self) -> Optional[Rhs]:
# more_alts: "|" alts NEWLINE more_alts | "|" alts NEWLINE
- mark = self.mark()
- cut = False
+ mark = self._mark()
if (
(literal := self.expect("|"))
and
(alts := self.alts())
and
- (newline := self.expect('NEWLINE'))
+ (_newline := self.expect('NEWLINE'))
and
(more_alts := self.more_alts())
):
return Rhs ( alts . alts + more_alts . alts )
- self.reset(mark)
- if cut: return None
- cut = False
+ self._reset(mark)
if (
(literal := self.expect("|"))
and
(alts := self.alts())
and
- (newline := self.expect('NEWLINE'))
+ (_newline := self.expect('NEWLINE'))
):
return Rhs ( alts . alts )
- self.reset(mark)
- if cut: return None
+ self._reset(mark)
return None
@memoize
def alt(self) -> Optional[Alt]:
# alt: items '$' action | items '$' | items action | items
- mark = self.mark()
- cut = False
+ mark = self._mark()
if (
(items := self.items())
and
@@ -346,101 +290,65 @@ class GeneratedParser(Parser):
and
(action := self.action())
):
- return Alt ( items + [ NamedItem ( None , NameLeaf ( 'ENDMARKER' ) ) ] , action = action )
- self.reset(mark)
- if cut: return None
- cut = False
+ return Alt ( items + [NamedItem ( None , NameLeaf ( 'ENDMARKER' ) )] , action = action )
+ self._reset(mark)
if (
(items := self.items())
and
(literal := self.expect('$'))
):
- return Alt ( items + [ NamedItem ( None , NameLeaf ( 'ENDMARKER' ) ) ] , action = None )
- self.reset(mark)
- if cut: return None
- cut = False
+ return Alt ( items + [NamedItem ( None , NameLeaf ( 'ENDMARKER' ) )] , action = None )
+ self._reset(mark)
if (
(items := self.items())
and
(action := self.action())
):
return Alt ( items , action = action )
- self.reset(mark)
- if cut: return None
- cut = False
+ self._reset(mark)
if (
(items := self.items())
):
return Alt ( items , action = None )
- self.reset(mark)
- if cut: return None
+ self._reset(mark)
return None
@memoize
def items(self) -> Optional[NamedItemList]:
# items: named_item items | named_item
- mark = self.mark()
- cut = False
+ mark = self._mark()
if (
(named_item := self.named_item())
and
(items := self.items())
):
- return [ named_item ] + items
- self.reset(mark)
- if cut: return None
- cut = False
+ return [named_item] + items
+ self._reset(mark)
if (
(named_item := self.named_item())
):
- return [ named_item ]
- self.reset(mark)
- if cut: return None
+ return [named_item]
+ self._reset(mark)
return None
@memoize
def named_item(self) -> Optional[NamedItem]:
- # named_item: NAME '[' NAME '*' ']' '=' ~ item | NAME '[' NAME ']' '=' ~ item | NAME '=' ~ item | item | forced_atom | lookahead
- mark = self.mark()
- cut = False
- if (
- (name := self.name())
- and
- (literal := self.expect('['))
- and
- (type := self.name())
- and
- (literal_1 := self.expect('*'))
- and
- (literal_2 := self.expect(']'))
- and
- (literal_3 := self.expect('='))
- and
- (cut := True)
- and
- (item := self.item())
- ):
- return NamedItem ( name . string , item , f"{type.string}*" )
- self.reset(mark)
- if cut: return None
+ # named_item: NAME annotation '=' ~ item | NAME '=' ~ item | item | forced_atom | lookahead
+ mark = self._mark()
cut = False
if (
(name := self.name())
and
- (literal := self.expect('['))
- and
- (type := self.name())
- and
- (literal_1 := self.expect(']'))
+ (annotation := self.annotation())
and
- (literal_2 := self.expect('='))
+ (literal := self.expect('='))
and
(cut := True)
and
(item := self.item())
):
- return NamedItem ( name . string , item , type . string )
- self.reset(mark)
+ return NamedItem ( name . string , item , annotation )
+ self._reset(mark)
if cut: return None
cut = False
if (
@@ -453,35 +361,29 @@ class GeneratedParser(Parser):
(item := self.item())
):
return NamedItem ( name . string , item )
- self.reset(mark)
+ self._reset(mark)
if cut: return None
- cut = False
if (
(item := self.item())
):
return NamedItem ( None , item )
- self.reset(mark)
- if cut: return None
- cut = False
+ self._reset(mark)
if (
- (it := self.forced_atom())
+ (forced := self.forced_atom())
):
- return NamedItem ( None , it )
- self.reset(mark)
- if cut: return None
- cut = False
+ return NamedItem ( None , forced )
+ self._reset(mark)
if (
(it := self.lookahead())
):
return NamedItem ( None , it )
- self.reset(mark)
- if cut: return None
+ self._reset(mark)
return None
@memoize
- def forced_atom(self) -> Optional[NamedItem]:
+ def forced_atom(self) -> Optional[Forced]:
# forced_atom: '&' '&' ~ atom
- mark = self.mark()
+ mark = self._mark()
cut = False
if (
(literal := self.expect('&'))
@@ -493,14 +395,14 @@ class GeneratedParser(Parser):
(atom := self.atom())
):
return Forced ( atom )
- self.reset(mark)
+ self._reset(mark)
if cut: return None
return None
@memoize
def lookahead(self) -> Optional[LookaheadOrCut]:
# lookahead: '&' ~ atom | '!' ~ atom | '~'
- mark = self.mark()
+ mark = self._mark()
cut = False
if (
(literal := self.expect('&'))
@@ -510,7 +412,7 @@ class GeneratedParser(Parser):
(atom := self.atom())
):
return PositiveLookahead ( atom )
- self.reset(mark)
+ self._reset(mark)
if cut: return None
cut = False
if (
@@ -521,21 +423,19 @@ class GeneratedParser(Parser):
(atom := self.atom())
):
return NegativeLookahead ( atom )
- self.reset(mark)
+ self._reset(mark)
if cut: return None
- cut = False
if (
(literal := self.expect('~'))
):
return Cut ( )
- self.reset(mark)
- if cut: return None
+ self._reset(mark)
return None
@memoize
def item(self) -> Optional[Item]:
# item: '[' ~ alts ']' | atom '?' | atom '*' | atom '+' | atom '.' atom '+' | atom
- mark = self.mark()
+ mark = self._mark()
cut = False
if (
(literal := self.expect('['))
@@ -547,36 +447,29 @@ class GeneratedParser(Parser):
(literal_1 := self.expect(']'))
):
return Opt ( alts )
- self.reset(mark)
+ self._reset(mark)
if cut: return None
- cut = False
if (
(atom := self.atom())
and
(literal := self.expect('?'))
):
return Opt ( atom )
- self.reset(mark)
- if cut: return None
- cut = False
+ self._reset(mark)
if (
(atom := self.atom())
and
(literal := self.expect('*'))
):
return Repeat0 ( atom )
- self.reset(mark)
- if cut: return None
- cut = False
+ self._reset(mark)
if (
(atom := self.atom())
and
(literal := self.expect('+'))
):
return Repeat1 ( atom )
- self.reset(mark)
- if cut: return None
- cut = False
+ self._reset(mark)
if (
(sep := self.atom())
and
@@ -587,21 +480,18 @@ class GeneratedParser(Parser):
(literal_1 := self.expect('+'))
):
return Gather ( sep , node )
- self.reset(mark)
- if cut: return None
- cut = False
+ self._reset(mark)
if (
(atom := self.atom())
):
return atom
- self.reset(mark)
- if cut: return None
+ self._reset(mark)
return None
@memoize
def atom(self) -> Optional[Plain]:
# atom: '(' ~ alts ')' | NAME | STRING
- mark = self.mark()
+ mark = self._mark()
cut = False
if (
(literal := self.expect('('))
@@ -613,28 +503,24 @@ class GeneratedParser(Parser):
(literal_1 := self.expect(')'))
):
return Group ( alts )
- self.reset(mark)
+ self._reset(mark)
if cut: return None
- cut = False
if (
(name := self.name())
):
return NameLeaf ( name . string )
- self.reset(mark)
- if cut: return None
- cut = False
+ self._reset(mark)
if (
(string := self.string())
):
return StringLeaf ( string . string )
- self.reset(mark)
- if cut: return None
+ self._reset(mark)
return None
@memoize
def action(self) -> Optional[str]:
# action: "{" ~ target_atoms "}"
- mark = self.mark()
+ mark = self._mark()
cut = False
if (
(literal := self.expect("{"))
@@ -646,95 +532,123 @@ class GeneratedParser(Parser):
(literal_1 := self.expect("}"))
):
return target_atoms
- self.reset(mark)
+ self._reset(mark)
+ if cut: return None
+ return None
+
+ @memoize
+ def annotation(self) -> Optional[str]:
+ # annotation: "[" ~ target_atoms "]"
+ mark = self._mark()
+ cut = False
+ if (
+ (literal := self.expect("["))
+ and
+ (cut := True)
+ and
+ (target_atoms := self.target_atoms())
+ and
+ (literal_1 := self.expect("]"))
+ ):
+ return target_atoms
+ self._reset(mark)
if cut: return None
return None
@memoize
def target_atoms(self) -> Optional[str]:
# target_atoms: target_atom target_atoms | target_atom
- mark = self.mark()
- cut = False
+ mark = self._mark()
if (
(target_atom := self.target_atom())
and
(target_atoms := self.target_atoms())
):
return target_atom + " " + target_atoms
- self.reset(mark)
- if cut: return None
- cut = False
+ self._reset(mark)
if (
(target_atom := self.target_atom())
):
return target_atom
- self.reset(mark)
- if cut: return None
+ self._reset(mark)
return None
@memoize
def target_atom(self) -> Optional[str]:
- # target_atom: "{" ~ target_atoms "}" | NAME | NUMBER | STRING | "?" | ":" | !"}" OP
- mark = self.mark()
+ # target_atom: "{" ~ target_atoms? "}" | "[" ~ target_atoms? "]" | NAME "*" | NAME | NUMBER | STRING | "?" | ":" | !"}" !"]" OP
+ mark = self._mark()
cut = False
if (
(literal := self.expect("{"))
and
(cut := True)
and
- (target_atoms := self.target_atoms())
+ (atoms := self.target_atoms(),)
and
(literal_1 := self.expect("}"))
):
- return "{" + target_atoms + "}"
- self.reset(mark)
+ return "{" + ( atoms or "" ) + "}"
+ self._reset(mark)
if cut: return None
cut = False
if (
+ (literal := self.expect("["))
+ and
+ (cut := True)
+ and
+ (atoms := self.target_atoms(),)
+ and
+ (literal_1 := self.expect("]"))
+ ):
+ return "[" + ( atoms or "" ) + "]"
+ self._reset(mark)
+ if cut: return None
+ if (
+ (name := self.name())
+ and
+ (literal := self.expect("*"))
+ ):
+ return name . string + "*"
+ self._reset(mark)
+ if (
(name := self.name())
):
return name . string
- self.reset(mark)
- if cut: return None
- cut = False
+ self._reset(mark)
if (
(number := self.number())
):
return number . string
- self.reset(mark)
- if cut: return None
- cut = False
+ self._reset(mark)
if (
(string := self.string())
):
return string . string
- self.reset(mark)
- if cut: return None
- cut = False
+ self._reset(mark)
if (
(literal := self.expect("?"))
):
return "?"
- self.reset(mark)
- if cut: return None
- cut = False
+ self._reset(mark)
if (
(literal := self.expect(":"))
):
return ":"
- self.reset(mark)
- if cut: return None
- cut = False
+ self._reset(mark)
if (
self.negative_lookahead(self.expect, "}")
and
+ self.negative_lookahead(self.expect, "]")
+ and
(op := self.op())
):
return op . string
- self.reset(mark)
- if cut: return None
+ self._reset(mark)
return None
+ KEYWORDS = ()
+ SOFT_KEYWORDS = ('memo',)
+
if __name__ == '__main__':
from pegen.parser import simple_parser_main
diff --git a/Tools/peg_generator/pegen/keywordgen.py b/Tools/peg_generator/pegen/keywordgen.py
index 2937ddb..6a07f6e 100644
--- a/Tools/peg_generator/pegen/keywordgen.py
+++ b/Tools/peg_generator/pegen/keywordgen.py
@@ -38,7 +38,7 @@ issoftkeyword = frozenset(softkwlist).__contains__
EXTRA_KEYWORDS = ["async", "await"]
-def main():
+def main() -> None:
parser = argparse.ArgumentParser(
description="Generate the Lib/keywords.py file from the grammar."
)
@@ -58,9 +58,7 @@ def main():
grammar, _, _ = build_parser(args.grammar)
with args.tokens_file as tok_file:
all_tokens, exact_tok, non_exact_tok = generate_token_definitions(tok_file)
- gen: ParserGenerator = CParserGenerator(
- grammar, all_tokens, exact_tok, non_exact_tok, file=None
- )
+ gen = CParserGenerator(grammar, all_tokens, exact_tok, non_exact_tok, file=None)
gen.collect_todo()
with args.keyword_file as thefile:
@@ -68,7 +66,9 @@ def main():
all_soft_keywords = sorted(gen.callmakervisitor.soft_keywords)
keywords = "" if not all_keywords else " " + ",\n ".join(map(repr, all_keywords))
- soft_keywords = "" if not all_soft_keywords else " " + ",\n ".join(map(repr, all_soft_keywords))
+ soft_keywords = (
+ "" if not all_soft_keywords else " " + ",\n ".join(map(repr, all_soft_keywords))
+ )
thefile.write(TEMPLATE.format(keywords=keywords, soft_keywords=soft_keywords))
diff --git a/Tools/peg_generator/pegen/metagrammar.gram b/Tools/peg_generator/pegen/metagrammar.gram
index bb4355f..f22c334 100644
--- a/Tools/peg_generator/pegen/metagrammar.gram
+++ b/Tools/peg_generator/pegen/metagrammar.gram
@@ -57,13 +57,12 @@ rule[Rule]:
| rulename memoflag? ":" alts NEWLINE { Rule(rulename[0], rulename[1], alts, memo=opt) }
rulename[RuleName]:
- | NAME '[' type=NAME '*' ']' { (name.string, type.string+"*") }
- | NAME '[' type=NAME ']' { (name.string, type.string) }
+ | NAME annotation { (name.string, annotation) }
| NAME { (name.string, None) }
# In the future this may return something more complicated
memoflag[str]:
- | '(' 'memo' ')' { "memo" }
+ | '(' "memo" ')' { "memo" }
alts[Rhs]:
| alt "|" alts { Rhs([alt] + alts.alts)}
@@ -84,14 +83,13 @@ items[NamedItemList]:
| named_item { [named_item] }
named_item[NamedItem]:
- | NAME '[' type=NAME '*' ']' '=' ~ item {NamedItem(name.string, item, f"{type.string}*")}
- | NAME '[' type=NAME ']' '=' ~ item {NamedItem(name.string, item, type.string)}
+ | NAME annotation '=' ~ item {NamedItem(name.string, item, annotation)}
| NAME '=' ~ item {NamedItem(name.string, item)}
| item {NamedItem(None, item)}
- | it=forced_atom {NamedItem(None, it)}
+ | forced=forced_atom {NamedItem(None, forced)}
| it=lookahead {NamedItem(None, it)}
-forced_atom[NamedItem]:
+forced_atom[Forced]:
| '&''&' ~ atom {Forced(atom)}
lookahead[LookaheadOrCut]:
@@ -112,19 +110,22 @@ atom[Plain]:
| NAME {NameLeaf(name.string) }
| STRING {StringLeaf(string.string)}
-# Mini-grammar for the actions
+# Mini-grammar for the actions and annotations
action[str]: "{" ~ target_atoms "}" { target_atoms }
+annotation[str]: "[" ~ target_atoms "]" { target_atoms }
target_atoms[str]:
| target_atom target_atoms { target_atom + " " + target_atoms }
| target_atom { target_atom }
target_atom[str]:
- | "{" ~ target_atoms "}" { "{" + target_atoms + "}" }
+ | "{" ~ atoms=target_atoms? "}" { "{" + (atoms or "") + "}" }
+ | "[" ~ atoms=target_atoms? "]" { "[" + (atoms or "") + "]" }
+ | NAME "*" { name.string + "*" }
| NAME { name.string }
| NUMBER { number.string }
| STRING { string.string }
| "?" { "?" }
| ":" { ":" }
- | !"}" OP { op.string }
+ | !"}" !"]" OP { op.string }
diff --git a/Tools/peg_generator/pegen/parser.py b/Tools/peg_generator/pegen/parser.py
index 16d954d..4ce60e3 100644
--- a/Tools/peg_generator/pegen/parser.py
+++ b/Tools/peg_generator/pegen/parser.py
@@ -4,13 +4,10 @@ import time
import token
import tokenize
import traceback
-
from abc import abstractmethod
-from typing import Any, Callable, cast, Dict, Optional, Tuple, Type, TypeVar
+from typing import Any, Callable, ClassVar, Dict, Optional, Tuple, Type, TypeVar, cast
-from pegen.tokenizer import exact_token_types
-from pegen.tokenizer import Mark
-from pegen.tokenizer import Tokenizer
+from pegen.tokenizer import Mark, Tokenizer, exact_token_types
T = TypeVar("T")
P = TypeVar("P", bound="Parser")
@@ -45,12 +42,12 @@ def memoize(method: F) -> F:
method_name = method.__name__
def memoize_wrapper(self: P, *args: object) -> T:
- mark = self.mark()
+ mark = self._mark()
key = mark, method_name, args
# Fast path: cache hit, and not verbose.
if key in self._cache and not self._verbose:
tree, endmark = self._cache[key]
- self.reset(endmark)
+ self._reset(endmark)
return tree
# Slow path: no cache hit, or verbose.
verbose = self._verbose
@@ -64,13 +61,13 @@ def memoize(method: F) -> F:
self._level -= 1
if verbose:
print(f"{fill}... {method_name}({argsr}) -> {tree!s:.200}")
- endmark = self.mark()
+ endmark = self._mark()
self._cache[key] = tree, endmark
else:
tree, endmark = self._cache[key]
if verbose:
print(f"{fill}{method_name}({argsr}) -> {tree!s:.200}")
- self.reset(endmark)
+ self._reset(endmark)
return tree
memoize_wrapper.__wrapped__ = method # type: ignore
@@ -82,12 +79,12 @@ def memoize_left_rec(method: Callable[[P], Optional[T]]) -> Callable[[P], Option
method_name = method.__name__
def memoize_left_rec_wrapper(self: P) -> Optional[T]:
- mark = self.mark()
+ mark = self._mark()
key = mark, method_name, ()
# Fast path: cache hit, and not verbose.
if key in self._cache and not self._verbose:
tree, endmark = self._cache[key]
- self.reset(endmark)
+ self._reset(endmark)
return tree
# Slow path: no cache hit, or verbose.
verbose = self._verbose
@@ -113,9 +110,13 @@ def memoize_left_rec(method: Callable[[P], Optional[T]]) -> Callable[[P], Option
print(f"{fill}Recursive {method_name} at {mark} depth {depth}")
while True:
- self.reset(mark)
- result = method(self)
- endmark = self.mark()
+ self._reset(mark)
+ self.in_recursive_rule += 1
+ try:
+ result = method(self)
+ finally:
+ self.in_recursive_rule -= 1
+ endmark = self._mark()
depth += 1
if verbose:
print(
@@ -131,24 +132,24 @@ def memoize_left_rec(method: Callable[[P], Optional[T]]) -> Callable[[P], Option
break
self._cache[key] = lastresult, lastmark = result, endmark
- self.reset(lastmark)
+ self._reset(lastmark)
tree = lastresult
self._level -= 1
if verbose:
print(f"{fill}{method_name}() -> {tree!s:.200} [cached]")
if tree:
- endmark = self.mark()
+ endmark = self._mark()
else:
endmark = mark
- self.reset(endmark)
+ self._reset(endmark)
self._cache[key] = tree, endmark
else:
tree, endmark = self._cache[key]
if verbose:
print(f"{fill}{method_name}() -> {tree!s:.200} [fresh]")
if tree:
- self.reset(endmark)
+ self._reset(endmark)
return tree
memoize_left_rec_wrapper.__wrapped__ = method # type: ignore
@@ -158,15 +159,21 @@ def memoize_left_rec(method: Callable[[P], Optional[T]]) -> Callable[[P], Option
class Parser:
"""Parsing base class."""
+ KEYWORDS: ClassVar[Tuple[str, ...]]
+
+ SOFT_KEYWORDS: ClassVar[Tuple[str, ...]]
+
def __init__(self, tokenizer: Tokenizer, *, verbose: bool = False):
self._tokenizer = tokenizer
self._verbose = verbose
self._level = 0
self._cache: Dict[Tuple[Mark, str, Tuple[Any, ...]], Tuple[Any, Mark]] = {}
+ # Integer tracking wether we are in a left recursive rule or not. Can be useful
+ # for error reporting.
+ self.in_recursive_rule = 0
# Pass through common tokenizer methods.
- # TODO: Rename to _mark and _reset.
- self.mark = self._tokenizer.mark
- self.reset = self._tokenizer.reset
+ self._mark = self._tokenizer.mark
+ self._reset = self._tokenizer.reset
@abstractmethod
def start(self) -> Any:
@@ -179,7 +186,7 @@ class Parser:
@memoize
def name(self) -> Optional[tokenize.TokenInfo]:
tok = self._tokenizer.peek()
- if tok.type == token.NAME:
+ if tok.type == token.NAME and tok.string not in self.KEYWORDS:
return self._tokenizer.getnext()
return None
@@ -205,6 +212,20 @@ class Parser:
return None
@memoize
+ def type_comment(self) -> Optional[tokenize.TokenInfo]:
+ tok = self._tokenizer.peek()
+ if tok.type == token.TYPE_COMMENT:
+ return self._tokenizer.getnext()
+ return None
+
+ @memoize
+ def soft_keyword(self) -> Optional[tokenize.TokenInfo]:
+ tok = self._tokenizer.peek()
+ if tok.type == token.NAME and tok.string in self.SOFT_KEYWORDS:
+ return self._tokenizer.getnext()
+ return None
+
+ @memoize
def expect(self, type: str) -> Optional[tokenize.TokenInfo]:
tok = self._tokenizer.peek()
if tok.string == type:
@@ -219,23 +240,26 @@ class Parser:
return self._tokenizer.getnext()
return None
+ def expect_forced(self, res: Any, expectation: str) -> Optional[tokenize.TokenInfo]:
+ if res is None:
+ raise self.make_syntax_error(f"expected {expectation}")
+ return res
+
def positive_lookahead(self, func: Callable[..., T], *args: object) -> T:
- mark = self.mark()
+ mark = self._mark()
ok = func(*args)
- self.reset(mark)
+ self._reset(mark)
return ok
def negative_lookahead(self, func: Callable[..., object], *args: object) -> bool:
- mark = self.mark()
+ mark = self._mark()
ok = func(*args)
- self.reset(mark)
+ self._reset(mark)
return not ok
- def make_syntax_error(self, filename: str = "<unknown>") -> SyntaxError:
+ def make_syntax_error(self, message: str, filename: str = "<unknown>") -> SyntaxError:
tok = self._tokenizer.diagnose()
- return SyntaxError(
- "pegen parse failure", (filename, tok.start[0], 1 + tok.start[1], tok.line)
- )
+ return SyntaxError(message, (filename, tok.start[0], 1 + tok.start[1], tok.line))
def simple_parser_main(parser_class: Type[Parser]) -> None:
diff --git a/Tools/peg_generator/pegen/parser_generator.py b/Tools/peg_generator/pegen/parser_generator.py
index 364eccb..33ecee1 100644
--- a/Tools/peg_generator/pegen/parser_generator.py
+++ b/Tools/peg_generator/pegen/parser_generator.py
@@ -1,30 +1,29 @@
import contextlib
from abc import abstractmethod
-
-from typing import AbstractSet, Dict, IO, Iterator, List, Optional, Set, Text, Tuple
+from typing import IO, AbstractSet, Dict, Iterator, List, Optional, Set, Text, Tuple
from pegen import sccutils
from pegen.grammar import (
- Grammar,
- Rule,
- Rhs,
Alt,
+ Gather,
+ Grammar,
+ GrammarError,
+ GrammarVisitor,
NamedItem,
- Plain,
NameLeaf,
- Gather,
+ Plain,
+ Rhs,
+ Rule,
)
-from pegen.grammar import GrammarError, GrammarVisitor
class RuleCheckingVisitor(GrammarVisitor):
- def __init__(self, rules: Dict[str, Rule], tokens: Dict[int, str]):
+ def __init__(self, rules: Dict[str, Rule], tokens: Set[str]):
self.rules = rules
self.tokens = tokens
def visit_NameLeaf(self, node: NameLeaf) -> None:
- if node.value not in self.rules and node.value not in self.tokens.values():
- # TODO: Add line/col info to (leaf) nodes
+ if node.value not in self.rules and node.value not in self.tokens:
raise GrammarError(f"Dangling reference to rule {node.value!r}")
def visit_NamedItem(self, node: NamedItem) -> None:
@@ -37,7 +36,7 @@ class ParserGenerator:
callmakervisitor: GrammarVisitor
- def __init__(self, grammar: Grammar, tokens: Dict[int, str], file: Optional[IO[Text]]):
+ def __init__(self, grammar: Grammar, tokens: Set[str], file: Optional[IO[Text]]):
self.grammar = grammar
self.tokens = tokens
self.rules = grammar.rules
@@ -133,13 +132,22 @@ class ParserGenerator:
self.counter += 1
extra_function_name = f"_loop0_{self.counter}"
extra_function_alt = Alt(
- [NamedItem(None, node.separator), NamedItem("elem", node.node)], action="elem",
+ [NamedItem(None, node.separator), NamedItem("elem", node.node)],
+ action="elem",
)
self.todo[extra_function_name] = Rule(
- extra_function_name, None, Rhs([extra_function_alt]),
+ extra_function_name,
+ None,
+ Rhs([extra_function_alt]),
+ )
+ alt = Alt(
+ [NamedItem("elem", node.node), NamedItem("seq", NameLeaf(extra_function_name))],
+ )
+ self.todo[name] = Rule(
+ name,
+ None,
+ Rhs([alt]),
)
- alt = Alt([NamedItem("elem", node.node), NamedItem("seq", NameLeaf(extra_function_name))],)
- self.todo[name] = Rule(name, None, Rhs([alt]),)
return name
def dedupe(self, name: str) -> str:
diff --git a/Tools/peg_generator/pegen/python_generator.py b/Tools/peg_generator/pegen/python_generator.py
index b500e3e..201bf2ba 100644
--- a/Tools/peg_generator/pegen/python_generator.py
+++ b/Tools/peg_generator/pegen/python_generator.py
@@ -1,25 +1,28 @@
+import ast
+import re
import token
-from typing import Any, Dict, Optional, IO, Text, Tuple
+from typing import IO, Any, Dict, Optional, Sequence, Set, Text, Tuple
+from pegen import grammar
from pegen.grammar import (
+ Alt,
Cut,
+ Forced,
+ Gather,
GrammarVisitor,
- NameLeaf,
- StringLeaf,
- Rhs,
- NamedItem,
+ Group,
Lookahead,
- PositiveLookahead,
+ NamedItem,
+ NameLeaf,
NegativeLookahead,
Opt,
+ PositiveLookahead,
Repeat0,
Repeat1,
- Gather,
- Group,
+ Rhs,
Rule,
- Alt,
+ StringLeaf,
)
-from pegen import grammar
from pegen.parser_generator import ParserGenerator
MODULE_PREFIX = """\
@@ -27,7 +30,10 @@ MODULE_PREFIX = """\
# @generated by pegen from {filename}
import ast
-from typing import Optional, Any
+import sys
+import tokenize
+
+from typing import Any, Optional
from pegen.parser import memoize, memoize_left_rec, logger, Parser
@@ -36,25 +42,81 @@ MODULE_SUFFIX = """
if __name__ == '__main__':
from pegen.parser import simple_parser_main
- simple_parser_main(GeneratedParser)
+ simple_parser_main({class_name})
"""
+class InvalidNodeVisitor(GrammarVisitor):
+ def visit_NameLeaf(self, node: NameLeaf) -> bool:
+ name = node.value
+ return name.startswith("invalid")
+
+ def visit_StringLeaf(self, node: StringLeaf) -> bool:
+ return False
+
+ def visit_NamedItem(self, node: NamedItem) -> bool:
+ return self.visit(node.item)
+
+ def visit_Rhs(self, node: Rhs) -> bool:
+ return any(self.visit(alt) for alt in node.alts)
+
+ def visit_Alt(self, node: Alt) -> bool:
+ return any(self.visit(item) for item in node.items)
+
+ def lookahead_call_helper(self, node: Lookahead) -> bool:
+ return self.visit(node.node)
+
+ def visit_PositiveLookahead(self, node: PositiveLookahead) -> bool:
+ return self.lookahead_call_helper(node)
+
+ def visit_NegativeLookahead(self, node: NegativeLookahead) -> bool:
+ return self.lookahead_call_helper(node)
+
+ def visit_Opt(self, node: Opt) -> bool:
+ return self.visit(node.node)
+
+ def visit_Repeat(self, node: Repeat0) -> Tuple[str, str]:
+ return self.visit(node.node)
+
+ def visit_Gather(self, node: Gather) -> Tuple[str, str]:
+ return self.visit(node.node)
+
+ def visit_Group(self, node: Group) -> bool:
+ return self.visit(node.rhs)
+
+ def visit_Cut(self, node: Cut) -> bool:
+ return False
+
+ def visit_Forced(self, node: Forced) -> bool:
+ return self.visit(node.node)
+
+
class PythonCallMakerVisitor(GrammarVisitor):
def __init__(self, parser_generator: ParserGenerator):
self.gen = parser_generator
self.cache: Dict[Any, Any] = {}
+ self.keywords: Set[str] = set()
+ self.soft_keywords: Set[str] = set()
def visit_NameLeaf(self, node: NameLeaf) -> Tuple[Optional[str], str]:
name = node.value
- if name in ("NAME", "NUMBER", "STRING", "OP"):
+ if name == "SOFT_KEYWORD":
+ return "soft_keyword", "self.soft_keyword()"
+ if name in ("NAME", "NUMBER", "STRING", "OP", "TYPE_COMMENT"):
name = name.lower()
return name, f"self.{name}()"
if name in ("NEWLINE", "DEDENT", "INDENT", "ENDMARKER", "ASYNC", "AWAIT"):
- return name.lower(), f"self.expect({name!r})"
+ # Avoid using names that can be Python keywords
+ return "_" + name.lower(), f"self.expect({name!r})"
return name, f"self.{name}()"
def visit_StringLeaf(self, node: StringLeaf) -> Tuple[str, str]:
+ val = ast.literal_eval(node.value)
+ if re.match(r"[a-zA-Z_]\w*\Z", val): # This is a keyword
+ if node.value.endswith("'"):
+ self.keywords.add(val)
+ else:
+ self.soft_keywords.add(val)
return "literal", f"self.expect({node.value})"
def visit_Rhs(self, node: Rhs) -> Tuple[Optional[str], str]:
@@ -125,16 +187,36 @@ class PythonCallMakerVisitor(GrammarVisitor):
def visit_Cut(self, node: Cut) -> Tuple[str, str]:
return "cut", "True"
+ def visit_Forced(self, node: Forced) -> Tuple[str, str]:
+ if isinstance(node.node, Group):
+ _, val = self.visit(node.node.rhs)
+ return "forced", f"self.expect_forced({val}, '''({node.node.rhs!s})''')"
+ else:
+ return (
+ "forced",
+ f"self.expect_forced(self.expect({node.node.value}), {node.node.value!r})",
+ )
+
class PythonParserGenerator(ParserGenerator, GrammarVisitor):
def __init__(
self,
grammar: grammar.Grammar,
file: Optional[IO[Text]],
- tokens: Dict[int, str] = token.tok_name,
+ tokens: Set[str] = set(token.tok_name.values()),
+ location_formatting: Optional[str] = None,
+ unreachable_formatting: Optional[str] = None,
):
+ tokens.add("SOFT_KEYWORD")
super().__init__(grammar, tokens, file)
- self.callmakervisitor = PythonCallMakerVisitor(self)
+ self.callmakervisitor: PythonCallMakerVisitor = PythonCallMakerVisitor(self)
+ self.invalidvisitor: InvalidNodeVisitor = InvalidNodeVisitor()
+ self.unreachable_formatting = unreachable_formatting or "None # pragma: no cover"
+ self.location_formatting = (
+ location_formatting
+ or "lineno=start_lineno, col_offset=start_col_offset, "
+ "end_lineno=end_lineno, end_col_offset=end_col_offset"
+ )
def generate(self, filename: str) -> None:
header = self.grammar.metas.get("header", MODULE_PREFIX)
@@ -142,18 +224,35 @@ class PythonParserGenerator(ParserGenerator, GrammarVisitor):
self.print(header.rstrip("\n").format(filename=filename))
subheader = self.grammar.metas.get("subheader", "")
if subheader:
- self.print(subheader.format(filename=filename))
- self.print("class GeneratedParser(Parser):")
+ self.print(subheader)
+ cls_name = self.grammar.metas.get("class", "GeneratedParser")
+ self.print("# Keywords and soft keywords are listed at the end of the parser definition.")
+ self.print(f"class {cls_name}(Parser):")
while self.todo:
for rulename, rule in list(self.todo.items()):
del self.todo[rulename]
self.print()
with self.indent():
self.visit(rule)
- trailer = self.grammar.metas.get("trailer", MODULE_SUFFIX)
+
+ self.print()
+ with self.indent():
+ self.print(f"KEYWORDS = {tuple(self.callmakervisitor.keywords)}")
+ self.print(f"SOFT_KEYWORDS = {tuple(self.callmakervisitor.soft_keywords)}")
+
+ trailer = self.grammar.metas.get("trailer", MODULE_SUFFIX.format(class_name=cls_name))
if trailer is not None:
self.print(trailer.rstrip("\n"))
+ def alts_uses_locations(self, alts: Sequence[Alt]) -> bool:
+ for alt in alts:
+ if alt.action and "LOCATIONS" in alt.action:
+ return True
+ for n in alt.items:
+ if isinstance(n.item, Group) and self.alts_uses_locations(n.item.rhs.alts):
+ return True
+ return False
+
def visit_Rule(self, node: Rule) -> None:
is_loop = node.is_loop()
is_gather = node.is_gather()
@@ -173,7 +272,10 @@ class PythonParserGenerator(ParserGenerator, GrammarVisitor):
self.print(f"# {node.name}: {rhs}")
if node.nullable:
self.print(f"# nullable={node.nullable}")
- self.print("mark = self.mark()")
+ self.print("mark = self._mark()")
+ if self.alts_uses_locations(node.rhs.alts):
+ self.print("tok = self._tokenizer.peek()")
+ self.print("start_lineno, start_col_offset = tok.start")
if is_loop:
self.print("children = []")
self.visit(rhs, is_loop=is_loop, is_gather=is_gather)
@@ -200,8 +302,10 @@ class PythonParserGenerator(ParserGenerator, GrammarVisitor):
self.visit(alt, is_loop=is_loop, is_gather=is_gather)
def visit_Alt(self, node: Alt, is_loop: bool, is_gather: bool) -> None:
+ has_cut = any(isinstance(item.item, Cut) for item in node.items)
with self.local_variable_context():
- self.print("cut = False") # TODO: Only if needed.
+ if has_cut:
+ self.print("cut = False")
if is_loop:
self.print("while (")
else:
@@ -227,12 +331,26 @@ class PythonParserGenerator(ParserGenerator, GrammarVisitor):
f"[{self.local_variable_names[0]}] + {self.local_variable_names[1]}"
)
else:
- action = f"[{', '.join(self.local_variable_names)}]"
+ if self.invalidvisitor.visit(node):
+ action = "UNREACHABLE"
+ elif len(self.local_variable_names) == 1:
+ action = f"{self.local_variable_names[0]}"
+ else:
+ action = f"[{', '.join(self.local_variable_names)}]"
+ elif "LOCATIONS" in action:
+ self.print("tok = self._tokenizer.get_last_non_whitespace_token()")
+ self.print("end_lineno, end_col_offset = tok.end")
+ action = action.replace("LOCATIONS", self.location_formatting)
+
if is_loop:
self.print(f"children.append({action})")
- self.print(f"mark = self.mark()")
+ self.print(f"mark = self._mark()")
else:
+ if "UNREACHABLE" in action:
+ action = action.replace("UNREACHABLE", self.unreachable_formatting)
self.print(f"return {action}")
- self.print("self.reset(mark)")
+
+ self.print("self._reset(mark)")
# Skip remaining alternatives if a cut was reached.
- self.print("if cut: return None") # TODO: Only if needed.
+ if has_cut:
+ self.print("if cut: return None")
diff --git a/Tools/peg_generator/pegen/testutil.py b/Tools/peg_generator/pegen/testutil.py
index 920d246..e0928a4 100644
--- a/Tools/peg_generator/pegen/testutil.py
+++ b/Tools/peg_generator/pegen/testutil.py
@@ -18,7 +18,7 @@ from pegen.python_generator import PythonParserGenerator
from pegen.tokenizer import Tokenizer
ALL_TOKENS = token.tok_name
-EXACT_TOKENS = token.EXACT_TOKEN_TYPES # type: ignore
+EXACT_TOKENS = token.EXACT_TOKEN_TYPES
NON_EXACT_TOKENS = {
name for index, name in token.tok_name.items() if index not in EXACT_TOKENS.values()
}
@@ -42,7 +42,7 @@ def run_parser(file: IO[bytes], parser_class: Type[Parser], *, verbose: bool = F
parser = parser_class(tokenizer, verbose=verbose)
result = parser.start()
if result is None:
- raise parser.make_syntax_error()
+ raise parser.make_syntax_error("invalid syntax")
return result
@@ -66,6 +66,7 @@ def import_file(full_name: str, path: str) -> Any:
"""Import a python module from a path"""
spec = importlib.util.spec_from_file_location(full_name, path)
+ assert spec is not None
mod = importlib.util.module_from_spec(spec)
# We assume this is not None and has an exec_module() method.
diff --git a/Tools/peg_generator/pegen/tokenizer.py b/Tools/peg_generator/pegen/tokenizer.py
index 61a28ef..7ee49e1 100644
--- a/Tools/peg_generator/pegen/tokenizer.py
+++ b/Tools/peg_generator/pegen/tokenizer.py
@@ -1,10 +1,10 @@
import token
import tokenize
-from typing import List, Iterator
+from typing import Dict, Iterator, List
Mark = int # NewType('Mark', int)
-exact_token_types = token.EXACT_TOKEN_TYPES # type: ignore
+exact_token_types = token.EXACT_TOKEN_TYPES
def shorttok(tok: tokenize.TokenInfo) -> str:
@@ -19,26 +19,22 @@ class Tokenizer:
_tokens: List[tokenize.TokenInfo]
- def __init__(self, tokengen: Iterator[tokenize.TokenInfo], *, verbose: bool = False):
+ def __init__(
+ self, tokengen: Iterator[tokenize.TokenInfo], *, path: str = "", verbose: bool = False
+ ):
self._tokengen = tokengen
self._tokens = []
self._index = 0
self._verbose = verbose
+ self._lines: Dict[int, str] = {}
+ self._path = path
if verbose:
self.report(False, False)
def getnext(self) -> tokenize.TokenInfo:
"""Return the next token and updates the index."""
- cached = True
- while self._index == len(self._tokens):
- tok = next(self._tokengen)
- if tok.type in (tokenize.NL, tokenize.COMMENT):
- continue
- if tok.type == token.ERRORTOKEN and tok.string.isspace():
- continue
- self._tokens.append(tok)
- cached = False
- tok = self._tokens[self._index]
+ cached = not self._index == len(self._tokens)
+ tok = self.peek()
self._index += 1
if self._verbose:
self.report(cached, False)
@@ -52,7 +48,15 @@ class Tokenizer:
continue
if tok.type == token.ERRORTOKEN and tok.string.isspace():
continue
+ if (
+ tok.type == token.NEWLINE
+ and self._tokens
+ and self._tokens[-1].type == token.NEWLINE
+ ):
+ continue
self._tokens.append(tok)
+ if not self._path:
+ self._lines[tok.start[0]] = tok.line
return self._tokens[self._index]
def diagnose(self) -> tokenize.TokenInfo:
@@ -60,6 +64,34 @@ class Tokenizer:
self.getnext()
return self._tokens[-1]
+ def get_last_non_whitespace_token(self) -> tokenize.TokenInfo:
+ for tok in reversed(self._tokens[: self._index]):
+ if tok.type != tokenize.ENDMARKER and (
+ tok.type < tokenize.NEWLINE or tok.type > tokenize.DEDENT
+ ):
+ break
+ return tok
+
+ def get_lines(self, line_numbers: List[int]) -> List[str]:
+ """Retrieve source lines corresponding to line numbers."""
+ if self._lines:
+ lines = self._lines
+ else:
+ n = len(line_numbers)
+ lines = {}
+ count = 0
+ seen = 0
+ with open(self._path) as f:
+ for l in f:
+ count += 1
+ if count in line_numbers:
+ seen += 1
+ lines[count] = l
+ if seen == n:
+ break
+
+ return [lines[n] for n in line_numbers]
+
def mark(self) -> Mark:
return self._index
diff --git a/Tools/peg_generator/pegen/validator.py b/Tools/peg_generator/pegen/validator.py
index 0e3dd41..e7d6980 100644
--- a/Tools/peg_generator/pegen/validator.py
+++ b/Tools/peg_generator/pegen/validator.py
@@ -1,51 +1,45 @@
+from typing import Optional
+
from pegen import grammar
from pegen.grammar import (
Alt,
- Cut,
- Gather,
GrammarVisitor,
- Group,
- Lookahead,
- NamedItem,
- NameLeaf,
- NegativeLookahead,
- Opt,
- PositiveLookahead,
- Repeat0,
- Repeat1,
- Rhs,
Rule,
- StringLeaf,
+ Rhs,
)
+
class ValidationError(Exception):
pass
+
class GrammarValidator(GrammarVisitor):
- def __init__(self, grammar: grammar.Grammar):
+ def __init__(self, grammar: grammar.Grammar) -> None:
self.grammar = grammar
- self.rulename = None
+ self.rulename: Optional[str] = None
- def validate_rule(self, rulename: str, node: Rule):
+ def validate_rule(self, rulename: str, node: Rule) -> None:
self.rulename = rulename
self.visit(node)
self.rulename = None
class SubRuleValidator(GrammarValidator):
- def visit_Rhs(self, node: Rule):
+ def visit_Rhs(self, node: Rhs) -> None:
for index, alt in enumerate(node.alts):
- alts_to_consider = node.alts[index+1:]
+ alts_to_consider = node.alts[index + 1 :]
for other_alt in alts_to_consider:
self.check_intersection(alt, other_alt)
- def check_intersection(self, first_alt: Alt, second_alt: Alt) -> bool:
+ def check_intersection(self, first_alt: Alt, second_alt: Alt) -> None:
if str(second_alt).startswith(str(first_alt)):
raise ValidationError(
- f"In {self.rulename} there is an alternative that will "
- f"never be visited:\n{second_alt}")
+ f"In {self.rulename} there is an alternative that will "
+ f"never be visited:\n{second_alt}"
+ )
+
-def validate_grammar(the_grammar: grammar.Grammar):
+def validate_grammar(the_grammar: grammar.Grammar) -> None:
for validator_cls in GrammarValidator.__subclasses__():
validator = validator_cls(the_grammar)
for rule_name, rule in the_grammar.rules.items():
diff --git a/Tools/peg_generator/scripts/benchmark.py b/Tools/peg_generator/scripts/benchmark.py
index 5fbedaa..4a063bf 100644
--- a/Tools/peg_generator/scripts/benchmark.py
+++ b/Tools/peg_generator/scripts/benchmark.py
@@ -76,7 +76,10 @@ def run_benchmark_stdlib(subcommand):
parse_directory(
"../../Lib",
verbose=False,
- excluded_files=["*/bad*", "*/lib2to3/tests/data/*",],
+ excluded_files=[
+ "*/bad*",
+ "*/lib2to3/tests/data/*",
+ ],
short=True,
mode=modes[subcommand],
)
diff --git a/Tools/peg_generator/scripts/download_pypi_packages.py b/Tools/peg_generator/scripts/download_pypi_packages.py
index 9874202..0af876c 100755
--- a/Tools/peg_generator/scripts/download_pypi_packages.py
+++ b/Tools/peg_generator/scripts/download_pypi_packages.py
@@ -8,7 +8,8 @@ from typing import Dict, Any
from urllib.request import urlretrieve
argparser = argparse.ArgumentParser(
- prog="download_pypi_packages", description="Helper program to download PyPI packages",
+ prog="download_pypi_packages",
+ description="Helper program to download PyPI packages",
)
argparser.add_argument(
"-n", "--number", type=int, default=100, help="Number of packages to download"
diff --git a/Tools/peg_generator/scripts/grammar_grapher.py b/Tools/peg_generator/scripts/grammar_grapher.py
index 4afdbce..4d77123 100755
--- a/Tools/peg_generator/scripts/grammar_grapher.py
+++ b/Tools/peg_generator/scripts/grammar_grapher.py
@@ -41,7 +41,10 @@ from pegen.grammar import (
Rhs,
)
-argparser = argparse.ArgumentParser(prog="graph_grammar", description="Graph a grammar tree",)
+argparser = argparse.ArgumentParser(
+ prog="graph_grammar",
+ description="Graph a grammar tree",
+)
argparser.add_argument(
"-s",
"--start",
diff --git a/Tools/peg_generator/scripts/test_pypi_packages.py b/Tools/peg_generator/scripts/test_pypi_packages.py
index f014753..e2eaef9 100755
--- a/Tools/peg_generator/scripts/test_pypi_packages.py
+++ b/Tools/peg_generator/scripts/test_pypi_packages.py
@@ -19,7 +19,8 @@ from scripts import test_parse_directory
HERE = pathlib.Path(__file__).resolve().parent
argparser = argparse.ArgumentParser(
- prog="test_pypi_packages", description="Helper program to test parsing PyPI packages",
+ prog="test_pypi_packages",
+ description="Helper program to test parsing PyPI packages",
)
argparser.add_argument(
"-t", "--tree", action="count", help="Compare parse tree to official AST", default=0