summaryrefslogtreecommitdiffstats
path: root/Lib/test/test_peg_generator
diff options
context:
space:
mode:
authorPablo Galindo <Pablogsal@gmail.com>2020-04-22 22:29:27 (GMT)
committerGitHub <noreply@github.com>2020-04-22 22:29:27 (GMT)
commitc5fc15685202cda73f7c3f5c6f299b0945f58508 (patch)
tree7624ae45b95f2812e801c5879bdbdcb796f570a6 /Lib/test/test_peg_generator
parenta81849b0315277bb3937271174aaaa5059c0b445 (diff)
downloadcpython-c5fc15685202cda73f7c3f5c6f299b0945f58508.zip
cpython-c5fc15685202cda73f7c3f5c6f299b0945f58508.tar.gz
cpython-c5fc15685202cda73f7c3f5c6f299b0945f58508.tar.bz2
bpo-40334: PEP 617 implementation: New PEG parser for CPython (GH-19503)
Co-authored-by: Guido van Rossum <guido@python.org> Co-authored-by: Lysandros Nikolaou <lisandrosnik@gmail.com>
Diffstat (limited to 'Lib/test/test_peg_generator')
-rw-r--r--Lib/test/test_peg_generator/__init__.py7
-rw-r--r--Lib/test/test_peg_generator/__main__.py4
-rw-r--r--Lib/test/test_peg_generator/ast_dump.py62
-rw-r--r--Lib/test/test_peg_generator/test_c_parser.py333
-rw-r--r--Lib/test/test_peg_generator/test_first_sets.py225
-rw-r--r--Lib/test/test_peg_generator/test_pegen.py728
6 files changed, 1359 insertions, 0 deletions
diff --git a/Lib/test/test_peg_generator/__init__.py b/Lib/test/test_peg_generator/__init__.py
new file mode 100644
index 0000000..fa855f2
--- /dev/null
+++ b/Lib/test/test_peg_generator/__init__.py
@@ -0,0 +1,7 @@
+import os
+
+from test.support import load_package_tests
+
+# Load all tests in package
+def load_tests(*args):
+ return load_package_tests(os.path.dirname(__file__), *args)
diff --git a/Lib/test/test_peg_generator/__main__.py b/Lib/test/test_peg_generator/__main__.py
new file mode 100644
index 0000000..1fab1fd
--- /dev/null
+++ b/Lib/test/test_peg_generator/__main__.py
@@ -0,0 +1,4 @@
+import unittest
+from . import load_tests
+
+unittest.main()
diff --git a/Lib/test/test_peg_generator/ast_dump.py b/Lib/test/test_peg_generator/ast_dump.py
new file mode 100644
index 0000000..22d2dde
--- /dev/null
+++ b/Lib/test/test_peg_generator/ast_dump.py
@@ -0,0 +1,62 @@
+"""
+Copy-parse of ast.dump, removing the `isinstance` checks. This is needed,
+because testing pegen requires generating a C extension module, which contains
+a copy of the symbols defined in Python-ast.c. Thus, the isinstance check would
+always fail. We rely on string comparison of the base classes instead.
+TODO: Remove the above-described hack.
+"""
+
+def ast_dump(node, annotate_fields=True, include_attributes=False, *, indent=None):
+ def _format(node, level=0):
+ if indent is not None:
+ level += 1
+ prefix = '\n' + indent * level
+ sep = ',\n' + indent * level
+ else:
+ prefix = ''
+ sep = ', '
+ if any(cls.__name__ == 'AST' for cls in node.__class__.__mro__):
+ cls = type(node)
+ args = []
+ allsimple = True
+ keywords = annotate_fields
+ for name in node._fields:
+ try:
+ value = getattr(node, name)
+ except AttributeError:
+ keywords = True
+ continue
+ if value is None and getattr(cls, name, ...) is None:
+ keywords = True
+ continue
+ value, simple = _format(value, level)
+ allsimple = allsimple and simple
+ if keywords:
+ args.append('%s=%s' % (name, value))
+ else:
+ args.append(value)
+ if include_attributes and node._attributes:
+ for name in node._attributes:
+ try:
+ value = getattr(node, name)
+ except AttributeError:
+ continue
+ if value is None and getattr(cls, name, ...) is None:
+ continue
+ value, simple = _format(value, level)
+ allsimple = allsimple and simple
+ args.append('%s=%s' % (name, value))
+ if allsimple and len(args) <= 3:
+ return '%s(%s)' % (node.__class__.__name__, ', '.join(args)), not args
+ return '%s(%s%s)' % (node.__class__.__name__, prefix, sep.join(args)), False
+ elif isinstance(node, list):
+ if not node:
+ return '[]', True
+ return '[%s%s]' % (prefix, sep.join(_format(x, level)[0] for x in node)), False
+ return repr(node), True
+
+ if all(cls.__name__ != 'AST' for cls in node.__class__.__mro__):
+ raise TypeError('expected AST, got %r' % node.__class__.__name__)
+ if indent is not None and not isinstance(indent, str):
+ indent = ' ' * indent
+ return _format(node)[0]
diff --git a/Lib/test/test_peg_generator/test_c_parser.py b/Lib/test/test_peg_generator/test_c_parser.py
new file mode 100644
index 0000000..f2f699c
--- /dev/null
+++ b/Lib/test/test_peg_generator/test_c_parser.py
@@ -0,0 +1,333 @@
+import ast
+import contextlib
+import traceback
+import tempfile
+import shutil
+import unittest
+import sys
+
+from test import test_tools
+from test.test_peg_generator.ast_dump import ast_dump
+from pathlib import PurePath, Path
+from typing import Sequence
+
+test_tools.skip_if_missing('peg_generator')
+with test_tools.imports_under_tool('peg_generator'):
+ from pegen.grammar_parser import GeneratedParser as GrammarParser
+ from pegen.testutil import (
+ parse_string,
+ generate_parser_c_extension,
+ generate_c_parser_source,
+ )
+
+
+class TestCParser(unittest.TestCase):
+ def setUp(self):
+ self.tmp_path = tempfile.mkdtemp()
+
+ def tearDown(self):
+ with contextlib.suppress(PermissionError):
+ shutil.rmtree(self.tmp_path)
+
+ def check_input_strings_for_grammar(
+ self,
+ source: str,
+ tmp_path: PurePath,
+ valid_cases: Sequence[str] = (),
+ invalid_cases: Sequence[str] = (),
+ ) -> None:
+ grammar = parse_string(source, GrammarParser)
+ extension = generate_parser_c_extension(grammar, Path(tmp_path))
+
+ if valid_cases:
+ for case in valid_cases:
+ extension.parse_string(case, mode=0)
+
+ if invalid_cases:
+ for case in invalid_cases:
+ with self.assertRaises(SyntaxError):
+ extension.parse_string(case, mode=0)
+
+ def verify_ast_generation(self, source: str, stmt: str, tmp_path: PurePath) -> None:
+ grammar = parse_string(source, GrammarParser)
+ extension = generate_parser_c_extension(grammar, Path(tmp_path))
+
+ expected_ast = ast.parse(stmt)
+ actual_ast = extension.parse_string(stmt, mode=1)
+ self.assertEqual(ast_dump(expected_ast), ast_dump(actual_ast))
+
+ def test_c_parser(self) -> None:
+ grammar_source = """
+ start[mod_ty]: a=stmt* $ { Module(a, NULL, p->arena) }
+ stmt[stmt_ty]: a=expr_stmt { a }
+ expr_stmt[stmt_ty]: a=expression NEWLINE { _Py_Expr(a, EXTRA) }
+ expression[expr_ty]: ( l=expression '+' r=term { _Py_BinOp(l, Add, r, EXTRA) }
+ | l=expression '-' r=term { _Py_BinOp(l, Sub, r, EXTRA) }
+ | t=term { t }
+ )
+ term[expr_ty]: ( l=term '*' r=factor { _Py_BinOp(l, Mult, r, EXTRA) }
+ | l=term '/' r=factor { _Py_BinOp(l, Div, r, EXTRA) }
+ | f=factor { f }
+ )
+ factor[expr_ty]: ('(' e=expression ')' { e }
+ | a=atom { a }
+ )
+ atom[expr_ty]: ( n=NAME { n }
+ | n=NUMBER { n }
+ | s=STRING { s }
+ )
+ """
+ grammar = parse_string(grammar_source, GrammarParser)
+ extension = generate_parser_c_extension(grammar, Path(self.tmp_path))
+
+ expressions = [
+ "4+5",
+ "4-5",
+ "4*5",
+ "1+4*5",
+ "1+4/5",
+ "(1+1) + (1+1)",
+ "(1+1) - (1+1)",
+ "(1+1) * (1+1)",
+ "(1+1) / (1+1)",
+ ]
+
+ for expr in expressions:
+ the_ast = extension.parse_string(expr, mode=1)
+ expected_ast = ast.parse(expr)
+ self.assertEqual(ast_dump(the_ast), ast_dump(expected_ast))
+
+ def test_lookahead(self) -> None:
+ grammar = """
+ start: NAME &NAME expr NEWLINE? ENDMARKER
+ expr: NAME | NUMBER
+ """
+ valid_cases = ["foo bar"]
+ invalid_cases = ["foo 34"]
+ self.check_input_strings_for_grammar(grammar, self.tmp_path, valid_cases, invalid_cases)
+
+ def test_negative_lookahead(self) -> None:
+ grammar = """
+ start: NAME !NAME expr NEWLINE? ENDMARKER
+ expr: NAME | NUMBER
+ """
+ valid_cases = ["foo 34"]
+ invalid_cases = ["foo bar"]
+ self.check_input_strings_for_grammar(grammar, self.tmp_path, valid_cases, invalid_cases)
+
+ def test_cut(self) -> None:
+ grammar = """
+ start: X ~ Y Z | X Q S
+ X: 'x'
+ Y: 'y'
+ Z: 'z'
+ Q: 'q'
+ S: 's'
+ """
+ valid_cases = ["x y z"]
+ invalid_cases = ["x q s"]
+ self.check_input_strings_for_grammar(grammar, self.tmp_path, valid_cases, invalid_cases)
+
+ def test_gather(self) -> None:
+ grammar = """
+ start: ';'.pass_stmt+ NEWLINE
+ pass_stmt: 'pass'
+ """
+ valid_cases = ["pass", "pass; pass"]
+ invalid_cases = ["pass;", "pass; pass;"]
+ self.check_input_strings_for_grammar(grammar, self.tmp_path, valid_cases, invalid_cases)
+
+ def test_left_recursion(self) -> None:
+ grammar = """
+ start: expr NEWLINE
+ expr: ('-' term | expr '+' term | term)
+ term: NUMBER
+ """
+ valid_cases = ["-34", "34", "34 + 12", "1 + 1 + 2 + 3"]
+ self.check_input_strings_for_grammar(grammar, self.tmp_path, valid_cases)
+
+ def test_advanced_left_recursive(self) -> None:
+ grammar = """
+ start: NUMBER | sign start
+ sign: ['-']
+ """
+ valid_cases = ["23", "-34"]
+ self.check_input_strings_for_grammar(grammar, self.tmp_path, valid_cases)
+
+ def test_mutually_left_recursive(self) -> None:
+ grammar = """
+ start: foo 'E'
+ foo: bar 'A' | 'B'
+ bar: foo 'C' | 'D'
+ """
+ valid_cases = ["B E", "D A C A E"]
+ self.check_input_strings_for_grammar(grammar, self.tmp_path, valid_cases)
+
+ def test_nasty_mutually_left_recursive(self) -> None:
+ grammar = """
+ start: target '='
+ target: maybe '+' | NAME
+ maybe: maybe '-' | target
+ """
+ valid_cases = ["x ="]
+ invalid_cases = ["x - + ="]
+ self.check_input_strings_for_grammar(grammar, self.tmp_path, valid_cases, invalid_cases)
+
+ def test_return_stmt_noexpr_action(self) -> None:
+ grammar = """
+ start[mod_ty]: a=[statements] ENDMARKER { Module(a, NULL, p->arena) }
+ statements[asdl_seq*]: a=statement+ { a }
+ statement[stmt_ty]: simple_stmt
+ simple_stmt[stmt_ty]: small_stmt
+ small_stmt[stmt_ty]: return_stmt
+ return_stmt[stmt_ty]: a='return' NEWLINE { _Py_Return(NULL, EXTRA) }
+ """
+ stmt = "return"
+ self.verify_ast_generation(grammar, stmt, self.tmp_path)
+
+ def test_gather_action_ast(self) -> None:
+ grammar = """
+ start[mod_ty]: a=';'.pass_stmt+ NEWLINE ENDMARKER { Module(a, NULL, p->arena) }
+ pass_stmt[stmt_ty]: a='pass' { _Py_Pass(EXTRA)}
+ """
+ stmt = "pass; pass"
+ self.verify_ast_generation(grammar, stmt, self.tmp_path)
+
+ def test_pass_stmt_action(self) -> None:
+ grammar = """
+ start[mod_ty]: a=[statements] ENDMARKER { Module(a, NULL, p->arena) }
+ statements[asdl_seq*]: a=statement+ { a }
+ statement[stmt_ty]: simple_stmt
+ simple_stmt[stmt_ty]: small_stmt
+ small_stmt[stmt_ty]: pass_stmt
+ pass_stmt[stmt_ty]: a='pass' NEWLINE { _Py_Pass(EXTRA) }
+ """
+ stmt = "pass"
+ self.verify_ast_generation(grammar, stmt, self.tmp_path)
+
+ def test_if_stmt_action(self) -> None:
+ grammar = """
+ start[mod_ty]: a=[statements] ENDMARKER { Module(a, NULL, p->arena) }
+ statements[asdl_seq*]: a=statement+ { _PyPegen_seq_flatten(p, a) }
+ statement[asdl_seq*]: a=compound_stmt { _PyPegen_singleton_seq(p, a) } | simple_stmt
+
+ simple_stmt[asdl_seq*]: a=small_stmt b=further_small_stmt* [';'] NEWLINE { _PyPegen_seq_insert_in_front(p, a, b) }
+ further_small_stmt[stmt_ty]: ';' a=small_stmt { a }
+
+ block: simple_stmt | NEWLINE INDENT a=statements DEDENT { a }
+
+ compound_stmt: if_stmt
+
+ if_stmt: 'if' a=full_expression ':' b=block { _Py_If(a, b, NULL, EXTRA) }
+
+ small_stmt[stmt_ty]: pass_stmt
+
+ pass_stmt[stmt_ty]: a='pass' { _Py_Pass(EXTRA) }
+
+ full_expression: NAME
+ """
+ stmt = "pass"
+ self.verify_ast_generation(grammar, stmt, self.tmp_path)
+
+ def test_same_name_different_types(self) -> None:
+ source = """
+ start[mod_ty]: a=import_from+ NEWLINE ENDMARKER { Module(a, NULL, p->arena)}
+ import_from[stmt_ty]: ( a='from' !'import' c=simple_name 'import' d=import_as_names_from {
+ _Py_ImportFrom(c->v.Name.id, d, 0, EXTRA) }
+ | a='from' '.' 'import' c=import_as_names_from {
+ _Py_ImportFrom(NULL, c, 1, EXTRA) }
+ )
+ simple_name[expr_ty]: NAME
+ import_as_names_from[asdl_seq*]: a=','.import_as_name_from+ { a }
+ import_as_name_from[alias_ty]: a=NAME 'as' b=NAME { _Py_alias(((expr_ty) a)->v.Name.id, ((expr_ty) b)->v.Name.id, p->arena) }
+ """
+ grammar = parse_string(source, GrammarParser)
+ extension = generate_parser_c_extension(grammar, Path(self.tmp_path))
+
+ for stmt in ("from a import b as c", "from . import a as b"):
+ expected_ast = ast.parse(stmt)
+ actual_ast = extension.parse_string(stmt, mode=1)
+ self.assertEqual(ast_dump(expected_ast), ast_dump(actual_ast))
+
+ def test_with_stmt_with_paren(self) -> None:
+ grammar_source = """
+ start[mod_ty]: a=[statements] ENDMARKER { Module(a, NULL, p->arena) }
+ statements[asdl_seq*]: a=statement+ { _PyPegen_seq_flatten(p, a) }
+ statement[asdl_seq*]: a=compound_stmt { _PyPegen_singleton_seq(p, a) }
+ compound_stmt[stmt_ty]: with_stmt
+ with_stmt[stmt_ty]: (
+ a='with' '(' b=','.with_item+ ')' ':' c=block {
+ _Py_With(b, _PyPegen_singleton_seq(p, c), NULL, EXTRA) }
+ )
+ with_item[withitem_ty]: (
+ e=NAME o=['as' t=NAME { t }] { _Py_withitem(e, _PyPegen_set_expr_context(p, o, Store), p->arena) }
+ )
+ block[stmt_ty]: a=pass_stmt NEWLINE { a } | NEWLINE INDENT a=pass_stmt DEDENT { a }
+ pass_stmt[stmt_ty]: a='pass' { _Py_Pass(EXTRA) }
+ """
+ stmt = "with (\n a as b,\n c as d\n): pass"
+ grammar = parse_string(grammar_source, GrammarParser)
+ extension = generate_parser_c_extension(grammar, Path(self.tmp_path))
+ the_ast = extension.parse_string(stmt, mode=1)
+ self.assertTrue(ast_dump(the_ast).startswith(
+ "Module(body=[With(items=[withitem(context_expr=Name(id='a', ctx=Load()), optional_vars=Name(id='b', ctx=Store())), "
+ "withitem(context_expr=Name(id='c', ctx=Load()), optional_vars=Name(id='d', ctx=Store()))]"
+ ))
+
+ def test_ternary_operator(self) -> None:
+ grammar_source = """
+ start[mod_ty]: a=expr ENDMARKER { Module(a, NULL, p->arena) }
+ expr[asdl_seq*]: a=listcomp NEWLINE { _PyPegen_singleton_seq(p, _Py_Expr(a, EXTRA)) }
+ listcomp[expr_ty]: (
+ a='[' b=NAME c=for_if_clauses d=']' { _Py_ListComp(b, c, EXTRA) }
+ )
+ for_if_clauses[asdl_seq*]: (
+ a=(y=[ASYNC] 'for' a=NAME 'in' b=NAME c=('if' z=NAME { z })*
+ { _Py_comprehension(_Py_Name(((expr_ty) a)->v.Name.id, Store, EXTRA), b, c, (y == NULL) ? 0 : 1, p->arena) })+ { a }
+ )
+ """
+ stmt = "[i for i in a if b]"
+ self.verify_ast_generation(grammar_source, stmt, self.tmp_path)
+
+ def test_syntax_error_for_string(self) -> None:
+ grammar_source = """
+ start: expr+ NEWLINE? ENDMARKER
+ expr: NAME
+ """
+ grammar = parse_string(grammar_source, GrammarParser)
+ print(list(Path(self.tmp_path).iterdir()))
+ extension = generate_parser_c_extension(grammar, Path(self.tmp_path))
+ for text in ("a b 42 b a", "名 名 42 名 名"):
+ try:
+ extension.parse_string(text, mode=0)
+ except SyntaxError as e:
+ tb = traceback.format_exc()
+ self.assertTrue('File "<string>", line 1' in tb)
+ self.assertTrue(f"SyntaxError: invalid syntax" in tb)
+
+ def test_headers_and_trailer(self) -> None:
+ grammar_source = """
+ @header 'SOME HEADER'
+ @subheader 'SOME SUBHEADER'
+ @trailer 'SOME TRAILER'
+ start: expr+ NEWLINE? ENDMARKER
+ expr: x=NAME
+ """
+ grammar = parse_string(grammar_source, GrammarParser)
+ parser_source = generate_c_parser_source(grammar)
+
+ self.assertTrue("SOME HEADER" in parser_source)
+ self.assertTrue("SOME SUBHEADER" in parser_source)
+ self.assertTrue("SOME TRAILER" in parser_source)
+
+
+ def test_error_in_rules(self) -> None:
+ grammar_source = """
+ start: expr+ NEWLINE? ENDMARKER
+ expr: NAME {PyTuple_New(-1)}
+ """
+ grammar = parse_string(grammar_source, GrammarParser)
+ extension = generate_parser_c_extension(grammar, Path(self.tmp_path))
+ # PyTuple_New raises SystemError if an invalid argument was passed.
+ with self.assertRaises(SystemError):
+ extension.parse_string("a", mode=0)
diff --git a/Lib/test/test_peg_generator/test_first_sets.py b/Lib/test/test_peg_generator/test_first_sets.py
new file mode 100644
index 0000000..425ee23
--- /dev/null
+++ b/Lib/test/test_peg_generator/test_first_sets.py
@@ -0,0 +1,225 @@
+import unittest
+
+from test import test_tools
+from typing import Dict, Set
+
+test_tools.skip_if_missing('peg_generator')
+with test_tools.imports_under_tool('peg_generator'):
+ from pegen.grammar_parser import GeneratedParser as GrammarParser
+ from pegen.testutil import parse_string
+ from pegen.first_sets import FirstSetCalculator
+ from pegen.grammar import Grammar
+
+
+class TestFirstSets(unittest.TestCase):
+ def calculate_first_sets(self, grammar_source: str) -> Dict[str, Set[str]]:
+ grammar: Grammar = parse_string(grammar_source, GrammarParser)
+ return FirstSetCalculator(grammar.rules).calculate()
+
+ def test_alternatives(self) -> None:
+ grammar = """
+ start: expr NEWLINE? ENDMARKER
+ expr: A | B
+ A: 'a' | '-'
+ B: 'b' | '+'
+ """
+ self.assertEqual(self.calculate_first_sets(grammar), {
+ "A": {"'a'", "'-'"},
+ "B": {"'+'", "'b'"},
+ "expr": {"'+'", "'a'", "'b'", "'-'"},
+ "start": {"'+'", "'a'", "'b'", "'-'"},
+ })
+
+ def test_optionals(self) -> None:
+ grammar = """
+ start: expr NEWLINE
+ expr: ['a'] ['b'] 'c'
+ """
+ self.assertEqual(self.calculate_first_sets(grammar), {
+ "expr": {"'c'", "'a'", "'b'"},
+ "start": {"'c'", "'a'", "'b'"},
+ })
+
+ def test_repeat_with_separator(self) -> None:
+ grammar = """
+ start: ','.thing+ NEWLINE
+ thing: NUMBER
+ """
+ self.assertEqual(self.calculate_first_sets(grammar), {"thing": {"NUMBER"}, "start": {"NUMBER"}})
+
+ def test_optional_operator(self) -> None:
+ grammar = """
+ start: sum NEWLINE
+ sum: (term)? 'b'
+ term: NUMBER
+ """
+ self.assertEqual(self.calculate_first_sets(grammar), {
+ "term": {"NUMBER"},
+ "sum": {"NUMBER", "'b'"},
+ "start": {"'b'", "NUMBER"},
+ })
+
+ def test_optional_literal(self) -> None:
+ grammar = """
+ start: sum NEWLINE
+ sum: '+' ? term
+ term: NUMBER
+ """
+ self.assertEqual(self.calculate_first_sets(grammar), {
+ "term": {"NUMBER"},
+ "sum": {"'+'", "NUMBER"},
+ "start": {"'+'", "NUMBER"},
+ })
+
+ def test_optional_after(self) -> None:
+ grammar = """
+ start: term NEWLINE
+ term: NUMBER ['+']
+ """
+ self.assertEqual(self.calculate_first_sets(grammar), {"term": {"NUMBER"}, "start": {"NUMBER"}})
+
+ def test_optional_before(self) -> None:
+ grammar = """
+ start: term NEWLINE
+ term: ['+'] NUMBER
+ """
+ self.assertEqual(self.calculate_first_sets(grammar), {"term": {"NUMBER", "'+'"}, "start": {"NUMBER", "'+'"}})
+
+ def test_repeat_0(self) -> None:
+ grammar = """
+ start: thing* "+" NEWLINE
+ thing: NUMBER
+ """
+ self.assertEqual(self.calculate_first_sets(grammar), {"thing": {"NUMBER"}, "start": {'"+"', "NUMBER"}})
+
+ def test_repeat_0_with_group(self) -> None:
+ grammar = """
+ start: ('+' '-')* term NEWLINE
+ term: NUMBER
+ """
+ self.assertEqual(self.calculate_first_sets(grammar), {"term": {"NUMBER"}, "start": {"'+'", "NUMBER"}})
+
+ def test_repeat_1(self) -> None:
+ grammar = """
+ start: thing+ '-' NEWLINE
+ thing: NUMBER
+ """
+ self.assertEqual(self.calculate_first_sets(grammar), {"thing": {"NUMBER"}, "start": {"NUMBER"}})
+
+ def test_repeat_1_with_group(self) -> None:
+ grammar = """
+ start: ('+' term)+ term NEWLINE
+ term: NUMBER
+ """
+ self.assertEqual(self.calculate_first_sets(grammar), {"term": {"NUMBER"}, "start": {"'+'"}})
+
+ def test_gather(self) -> None:
+ grammar = """
+ start: ','.thing+ NEWLINE
+ thing: NUMBER
+ """
+ self.assertEqual(self.calculate_first_sets(grammar), {"thing": {"NUMBER"}, "start": {"NUMBER"}})
+
+ def test_positive_lookahead(self) -> None:
+ grammar = """
+ start: expr NEWLINE
+ expr: &'a' opt
+ opt: 'a' | 'b' | 'c'
+ """
+ self.assertEqual(self.calculate_first_sets(grammar), {
+ "expr": {"'a'"},
+ "start": {"'a'"},
+ "opt": {"'b'", "'c'", "'a'"},
+ })
+
+ def test_negative_lookahead(self) -> None:
+ grammar = """
+ start: expr NEWLINE
+ expr: !'a' opt
+ opt: 'a' | 'b' | 'c'
+ """
+ self.assertEqual(self.calculate_first_sets(grammar), {
+ "opt": {"'b'", "'a'", "'c'"},
+ "expr": {"'b'", "'c'"},
+ "start": {"'b'", "'c'"},
+ })
+
+ def test_left_recursion(self) -> None:
+ grammar = """
+ start: expr NEWLINE
+ expr: ('-' term | expr '+' term | term)
+ term: NUMBER
+ foo: 'foo'
+ bar: 'bar'
+ baz: 'baz'
+ """
+ self.assertEqual(self.calculate_first_sets(grammar), {
+ "expr": {"NUMBER", "'-'"},
+ "term": {"NUMBER"},
+ "start": {"NUMBER", "'-'"},
+ "foo": {"'foo'"},
+ "bar": {"'bar'"},
+ "baz": {"'baz'"},
+ })
+
+ def test_advance_left_recursion(self) -> None:
+ grammar = """
+ start: NUMBER | sign start
+ sign: ['-']
+ """
+ self.assertEqual(self.calculate_first_sets(grammar), {"sign": {"'-'", ""}, "start": {"'-'", "NUMBER"}})
+
+ def test_mutual_left_recursion(self) -> None:
+ grammar = """
+ start: foo 'E'
+ foo: bar 'A' | 'B'
+ bar: foo 'C' | 'D'
+ """
+ self.assertEqual(self.calculate_first_sets(grammar), {
+ "foo": {"'D'", "'B'"},
+ "bar": {"'D'"},
+ "start": {"'D'", "'B'"},
+ })
+
+ def test_nasty_left_recursion(self) -> None:
+ # TODO: Validate this
+ grammar = """
+ start: target '='
+ target: maybe '+' | NAME
+ maybe: maybe '-' | target
+ """
+ self.assertEqual(self.calculate_first_sets(grammar), {"maybe": set(), "target": {"NAME"}, "start": {"NAME"}})
+
+ def test_nullable_rule(self) -> None:
+ grammar = """
+ start: sign thing $
+ sign: ['-']
+ thing: NUMBER
+ """
+ self.assertEqual(self.calculate_first_sets(grammar), {
+ "sign": {"", "'-'"},
+ "thing": {"NUMBER"},
+ "start": {"NUMBER", "'-'"},
+ })
+
+ def test_epsilon_production_in_start_rule(self) -> None:
+ grammar = """
+ start: ['-'] $
+ """
+ self.assertEqual(self.calculate_first_sets(grammar), {"start": {"ENDMARKER", "'-'"}})
+
+ def test_multiple_nullable_rules(self) -> None:
+ grammar = """
+ start: sign thing other another $
+ sign: ['-']
+ thing: ['+']
+ other: '*'
+ another: '/'
+ """
+ self.assertEqual(self.calculate_first_sets(grammar), {
+ "sign": {"", "'-'"},
+ "thing": {"'+'", ""},
+ "start": {"'+'", "'-'", "'*'"},
+ "other": {"'*'"},
+ "another": {"'/'"},
+ })
diff --git a/Lib/test/test_peg_generator/test_pegen.py b/Lib/test/test_peg_generator/test_pegen.py
new file mode 100644
index 0000000..581c7ac
--- /dev/null
+++ b/Lib/test/test_peg_generator/test_pegen.py
@@ -0,0 +1,728 @@
+import io
+import textwrap
+import unittest
+
+from test import test_tools
+from typing import Dict, Any
+from tokenize import TokenInfo, NAME, NEWLINE, NUMBER, OP
+
+test_tools.skip_if_missing('peg_generator')
+with test_tools.imports_under_tool('peg_generator'):
+ from pegen.grammar_parser import GeneratedParser as GrammarParser
+ from pegen.testutil import (
+ parse_string,
+ generate_parser,
+ make_parser
+ )
+ from pegen.grammar import GrammarVisitor, GrammarError, Grammar
+ from pegen.grammar_visualizer import ASTGrammarPrinter
+ from pegen.parser import Parser
+ from pegen.python_generator import PythonParserGenerator
+
+
+class TestPegen(unittest.TestCase):
+ def test_parse_grammar(self) -> None:
+ grammar_source = """
+ start: sum NEWLINE
+ sum: t1=term '+' t2=term { action } | term
+ term: NUMBER
+ """
+ expected = """
+ start: sum NEWLINE
+ sum: term '+' term | term
+ term: NUMBER
+ """
+ grammar: Grammar = parse_string(grammar_source, GrammarParser)
+ rules = grammar.rules
+ self.assertEqual(str(grammar), textwrap.dedent(expected).strip())
+ # Check the str() and repr() of a few rules; AST nodes don't support ==.
+ self.assertEqual(str(rules["start"]), "start: sum NEWLINE")
+ self.assertEqual(str(rules["sum"]), "sum: term '+' term | term")
+ expected_repr = "Rule('term', None, Rhs([Alt([NamedItem(None, NameLeaf('NUMBER'))])]))"
+ self.assertEqual(repr(rules["term"]), expected_repr)
+
+ def test_long_rule_str(self) -> None:
+ grammar_source = """
+ start: zero | one | one zero | one one | one zero zero | one zero one | one one zero | one one one
+ """
+ expected = """
+ start:
+ | zero
+ | one
+ | one zero
+ | one one
+ | one zero zero
+ | one zero one
+ | one one zero
+ | one one one
+ """
+ grammar: Grammar = parse_string(grammar_source, GrammarParser)
+ self.assertEqual(str(grammar.rules["start"]), textwrap.dedent(expected).strip())
+
+ def test_typed_rules(self) -> None:
+ grammar = """
+ start[int]: sum NEWLINE
+ sum[int]: t1=term '+' t2=term { action } | term
+ term[int]: NUMBER
+ """
+ rules = parse_string(grammar, GrammarParser).rules
+ # Check the str() and repr() of a few rules; AST nodes don't support ==.
+ self.assertEqual(str(rules["start"]), "start: sum NEWLINE")
+ self.assertEqual(str(rules["sum"]), "sum: term '+' term | term")
+ self.assertEqual(
+ repr(rules["term"]),
+ "Rule('term', 'int', Rhs([Alt([NamedItem(None, NameLeaf('NUMBER'))])]))"
+ )
+
+ def test_repeat_with_separator_rules(self) -> None:
+ grammar = """
+ start: ','.thing+ NEWLINE
+ thing: NUMBER
+ """
+ rules = parse_string(grammar, GrammarParser).rules
+ self.assertEqual(str(rules["start"]), "start: ','.thing+ NEWLINE")
+ print(repr(rules["start"]))
+ self.assertTrue(repr(rules["start"]).startswith(
+ "Rule('start', None, Rhs([Alt([NamedItem(None, Gather(StringLeaf(\"','\"), NameLeaf('thing'"
+ ))
+ self.assertEqual(str(rules["thing"]), "thing: NUMBER")
+
+ def test_expr_grammar(self) -> None:
+ grammar = """
+ start: sum NEWLINE
+ sum: term '+' term | term
+ term: NUMBER
+ """
+ parser_class = make_parser(grammar)
+ node = parse_string("42\n", parser_class)
+ self.assertEqual(node, [
+ [[TokenInfo(NUMBER, string="42", start=(1, 0), end=(1, 2), line="42\n")]],
+ TokenInfo(NEWLINE, string="\n", start=(1, 2), end=(1, 3), line="42\n"),
+ ])
+
+ def test_optional_operator(self) -> None:
+ grammar = """
+ start: sum NEWLINE
+ sum: term ('+' term)?
+ term: NUMBER
+ """
+ parser_class = make_parser(grammar)
+ node = parse_string("1+2\n", parser_class)
+ self.assertEqual(node, [
+ [
+ [TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1+2\n")],
+ [
+ TokenInfo(OP, string="+", start=(1, 1), end=(1, 2), line="1+2\n"),
+ [TokenInfo(NUMBER, string="2", start=(1, 2), end=(1, 3), line="1+2\n")],
+ ],
+ ],
+ TokenInfo(NEWLINE, string="\n", start=(1, 3), end=(1, 4), line="1+2\n"),
+ ])
+ node = parse_string("1\n", parser_class)
+ self.assertEqual(node, [
+ [[TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1\n")], None],
+ TokenInfo(NEWLINE, string="\n", start=(1, 1), end=(1, 2), line="1\n"),
+ ])
+
+ def test_optional_literal(self) -> None:
+ grammar = """
+ start: sum NEWLINE
+ sum: term '+' ?
+ term: NUMBER
+ """
+ parser_class = make_parser(grammar)
+ node = parse_string("1+\n", parser_class)
+ self.assertEqual(node, [
+ [
+ [TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1+\n")],
+ TokenInfo(OP, string="+", start=(1, 1), end=(1, 2), line="1+\n"),
+ ],
+ TokenInfo(NEWLINE, string="\n", start=(1, 2), end=(1, 3), line="1+\n"),
+ ])
+ node = parse_string("1\n", parser_class)
+ self.assertEqual(node, [
+ [[TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1\n")], None],
+ TokenInfo(NEWLINE, string="\n", start=(1, 1), end=(1, 2), line="1\n"),
+ ])
+
+ def test_alt_optional_operator(self) -> None:
+ grammar = """
+ start: sum NEWLINE
+ sum: term ['+' term]
+ term: NUMBER
+ """
+ parser_class = make_parser(grammar)
+ node = parse_string("1 + 2\n", parser_class)
+ self.assertEqual(node, [
+ [
+ [TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 + 2\n")],
+ [
+ TokenInfo(OP, string="+", start=(1, 2), end=(1, 3), line="1 + 2\n"),
+ [TokenInfo(NUMBER, string="2", start=(1, 4), end=(1, 5), line="1 + 2\n")],
+ ],
+ ],
+ TokenInfo(NEWLINE, string="\n", start=(1, 5), end=(1, 6), line="1 + 2\n"),
+ ])
+ node = parse_string("1\n", parser_class)
+ self.assertEqual(node, [
+ [[TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1\n")], None],
+ TokenInfo(NEWLINE, string="\n", start=(1, 1), end=(1, 2), line="1\n"),
+ ])
+
+ def test_repeat_0_simple(self) -> None:
+ grammar = """
+ start: thing thing* NEWLINE
+ thing: NUMBER
+ """
+ parser_class = make_parser(grammar)
+ node = parse_string("1 2 3\n", parser_class)
+ self.assertEqual(node, [
+ [TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 2 3\n")],
+ [
+ [[TokenInfo(NUMBER, string="2", start=(1, 2), end=(1, 3), line="1 2 3\n")]],
+ [[TokenInfo(NUMBER, string="3", start=(1, 4), end=(1, 5), line="1 2 3\n")]],
+ ],
+ TokenInfo(NEWLINE, string="\n", start=(1, 5), end=(1, 6), line="1 2 3\n"),
+ ])
+ node = parse_string("1\n", parser_class)
+ self.assertEqual(node, [
+ [TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1\n")],
+ [],
+ TokenInfo(NEWLINE, string="\n", start=(1, 1), end=(1, 2), line="1\n"),
+ ])
+
+ def test_repeat_0_complex(self) -> None:
+ grammar = """
+ start: term ('+' term)* NEWLINE
+ term: NUMBER
+ """
+ parser_class = make_parser(grammar)
+ node = parse_string("1 + 2 + 3\n", parser_class)
+ self.assertEqual(node, [
+ [TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 + 2 + 3\n")],
+ [
+ [
+ [
+ TokenInfo(OP, string="+", start=(1, 2), end=(1, 3), line="1 + 2 + 3\n"),
+ [TokenInfo(NUMBER, string="2", start=(1, 4), end=(1, 5), line="1 + 2 + 3\n")],
+ ]
+ ],
+ [
+ [
+ TokenInfo(OP, string="+", start=(1, 6), end=(1, 7), line="1 + 2 + 3\n"),
+ [TokenInfo(NUMBER, string="3", start=(1, 8), end=(1, 9), line="1 + 2 + 3\n")],
+ ]
+ ],
+ ],
+ TokenInfo(NEWLINE, string="\n", start=(1, 9), end=(1, 10), line="1 + 2 + 3\n"),
+ ])
+
+ def test_repeat_1_simple(self) -> None:
+ grammar = """
+ start: thing thing+ NEWLINE
+ thing: NUMBER
+ """
+ parser_class = make_parser(grammar)
+ node = parse_string("1 2 3\n", parser_class)
+ self.assertEqual(node, [
+ [TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 2 3\n")],
+ [
+ [[TokenInfo(NUMBER, string="2", start=(1, 2), end=(1, 3), line="1 2 3\n")]],
+ [[TokenInfo(NUMBER, string="3", start=(1, 4), end=(1, 5), line="1 2 3\n")]],
+ ],
+ TokenInfo(NEWLINE, string="\n", start=(1, 5), end=(1, 6), line="1 2 3\n"),
+ ])
+ with self.assertRaises(SyntaxError):
+ parse_string("1\n", parser_class)
+
+ def test_repeat_1_complex(self) -> None:
+ grammar = """
+ start: term ('+' term)+ NEWLINE
+ term: NUMBER
+ """
+ parser_class = make_parser(grammar)
+ node = parse_string("1 + 2 + 3\n", parser_class)
+ self.assertEqual(node, [
+ [TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 + 2 + 3\n")],
+ [
+ [
+ [
+ TokenInfo(OP, string="+", start=(1, 2), end=(1, 3), line="1 + 2 + 3\n"),
+ [TokenInfo(NUMBER, string="2", start=(1, 4), end=(1, 5), line="1 + 2 + 3\n")],
+ ]
+ ],
+ [
+ [
+ TokenInfo(OP, string="+", start=(1, 6), end=(1, 7), line="1 + 2 + 3\n"),
+ [TokenInfo(NUMBER, string="3", start=(1, 8), end=(1, 9), line="1 + 2 + 3\n")],
+ ]
+ ],
+ ],
+ TokenInfo(NEWLINE, string="\n", start=(1, 9), end=(1, 10), line="1 + 2 + 3\n"),
+ ])
+ with self.assertRaises(SyntaxError):
+ parse_string("1\n", parser_class)
+
+ def test_repeat_with_sep_simple(self) -> None:
+ grammar = """
+ start: ','.thing+ NEWLINE
+ thing: NUMBER
+ """
+ parser_class = make_parser(grammar)
+ node = parse_string("1, 2, 3\n", parser_class)
+ self.assertEqual(node, [
+ [
+ [TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1, 2, 3\n")],
+ [TokenInfo(NUMBER, string="2", start=(1, 3), end=(1, 4), line="1, 2, 3\n")],
+ [TokenInfo(NUMBER, string="3", start=(1, 6), end=(1, 7), line="1, 2, 3\n")],
+ ],
+ TokenInfo(NEWLINE, string="\n", start=(1, 7), end=(1, 8), line="1, 2, 3\n"),
+ ])
+
+ def test_left_recursive(self) -> None:
+ grammar_source = """
+ start: expr NEWLINE
+ expr: ('-' term | expr '+' term | term)
+ term: NUMBER
+ foo: NAME+
+ bar: NAME*
+ baz: NAME?
+ """
+ grammar: Grammar = parse_string(grammar_source, GrammarParser)
+ parser_class = generate_parser(grammar)
+ rules = grammar.rules
+ self.assertFalse(rules["start"].left_recursive)
+ self.assertTrue(rules["expr"].left_recursive)
+ self.assertFalse(rules["term"].left_recursive)
+ self.assertFalse(rules["foo"].left_recursive)
+ self.assertFalse(rules["bar"].left_recursive)
+ self.assertFalse(rules["baz"].left_recursive)
+ node = parse_string("1 + 2 + 3\n", parser_class)
+ self.assertEqual(node, [
+ [
+ [
+ [[TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 + 2 + 3\n")]],
+ TokenInfo(OP, string="+", start=(1, 2), end=(1, 3), line="1 + 2 + 3\n"),
+ [TokenInfo(NUMBER, string="2", start=(1, 4), end=(1, 5), line="1 + 2 + 3\n")],
+ ],
+ TokenInfo(OP, string="+", start=(1, 6), end=(1, 7), line="1 + 2 + 3\n"),
+ [TokenInfo(NUMBER, string="3", start=(1, 8), end=(1, 9), line="1 + 2 + 3\n")],
+ ],
+ TokenInfo(NEWLINE, string="\n", start=(1, 9), end=(1, 10), line="1 + 2 + 3\n"),
+ ])
+
+ def test_python_expr(self) -> None:
+ grammar = """
+ start: expr NEWLINE? $ { ast.Expression(expr, lineno=1, col_offset=0) }
+ expr: ( expr '+' term { ast.BinOp(expr, ast.Add(), term, lineno=expr.lineno, col_offset=expr.col_offset, end_lineno=term.end_lineno, end_col_offset=term.end_col_offset) }
+ | expr '-' term { ast.BinOp(expr, ast.Sub(), term, lineno=expr.lineno, col_offset=expr.col_offset, end_lineno=term.end_lineno, end_col_offset=term.end_col_offset) }
+ | term { term }
+ )
+ term: ( l=term '*' r=factor { ast.BinOp(l, ast.Mult(), r, lineno=l.lineno, col_offset=l.col_offset, end_lineno=r.end_lineno, end_col_offset=r.end_col_offset) }
+ | l=term '/' r=factor { ast.BinOp(l, ast.Div(), r, lineno=l.lineno, col_offset=l.col_offset, end_lineno=r.end_lineno, end_col_offset=r.end_col_offset) }
+ | factor { factor }
+ )
+ factor: ( '(' expr ')' { expr }
+ | atom { atom }
+ )
+ atom: ( n=NAME { ast.Name(id=n.string, ctx=ast.Load(), lineno=n.start[0], col_offset=n.start[1], end_lineno=n.end[0], end_col_offset=n.end[1]) }
+ | n=NUMBER { ast.Constant(value=ast.literal_eval(n.string), lineno=n.start[0], col_offset=n.start[1], end_lineno=n.end[0], end_col_offset=n.end[1]) }
+ )
+ """
+ parser_class = make_parser(grammar)
+ node = parse_string("(1 + 2*3 + 5)/(6 - 2)\n", parser_class)
+ code = compile(node, "", "eval")
+ val = eval(code)
+ self.assertEqual(val, 3.0)
+
+ def test_nullable(self) -> None:
+ grammar_source = """
+ start: sign NUMBER
+ sign: ['-' | '+']
+ """
+ grammar: Grammar = parse_string(grammar_source, GrammarParser)
+ out = io.StringIO()
+ genr = PythonParserGenerator(grammar, out)
+ rules = grammar.rules
+ self.assertFalse(rules["start"].nullable) # Not None!
+ self.assertTrue(rules["sign"].nullable)
+
+ def test_advanced_left_recursive(self) -> None:
+ grammar_source = """
+ start: NUMBER | sign start
+ sign: ['-']
+ """
+ grammar: Grammar = parse_string(grammar_source, GrammarParser)
+ out = io.StringIO()
+ genr = PythonParserGenerator(grammar, out)
+ rules = grammar.rules
+ self.assertFalse(rules["start"].nullable) # Not None!
+ self.assertTrue(rules["sign"].nullable)
+ self.assertTrue(rules["start"].left_recursive)
+ self.assertFalse(rules["sign"].left_recursive)
+
+ def test_mutually_left_recursive(self) -> None:
+ grammar_source = """
+ start: foo 'E'
+ foo: bar 'A' | 'B'
+ bar: foo 'C' | 'D'
+ """
+ grammar: Grammar = parse_string(grammar_source, GrammarParser)
+ out = io.StringIO()
+ genr = PythonParserGenerator(grammar, out)
+ rules = grammar.rules
+ self.assertFalse(rules["start"].left_recursive)
+ self.assertTrue(rules["foo"].left_recursive)
+ self.assertTrue(rules["bar"].left_recursive)
+ genr.generate("<string>")
+ ns: Dict[str, Any] = {}
+ exec(out.getvalue(), ns)
+ parser_class: Type[Parser] = ns["GeneratedParser"]
+ node = parse_string("D A C A E", parser_class)
+ self.assertEqual(node, [
+ [
+ [
+ [
+ [TokenInfo(type=NAME, string="D", start=(1, 0), end=(1, 1), line="D A C A E")],
+ TokenInfo(type=NAME, string="A", start=(1, 2), end=(1, 3), line="D A C A E"),
+ ],
+ TokenInfo(type=NAME, string="C", start=(1, 4), end=(1, 5), line="D A C A E"),
+ ],
+ TokenInfo(type=NAME, string="A", start=(1, 6), end=(1, 7), line="D A C A E"),
+ ],
+ TokenInfo(type=NAME, string="E", start=(1, 8), end=(1, 9), line="D A C A E"),
+ ])
+ node = parse_string("B C A E", parser_class)
+ self.assertIsNotNone(node)
+ self.assertEqual(node, [
+ [
+ [
+ [TokenInfo(type=NAME, string="B", start=(1, 0), end=(1, 1), line="B C A E")],
+ TokenInfo(type=NAME, string="C", start=(1, 2), end=(1, 3), line="B C A E"),
+ ],
+ TokenInfo(type=NAME, string="A", start=(1, 4), end=(1, 5), line="B C A E"),
+ ],
+ TokenInfo(type=NAME, string="E", start=(1, 6), end=(1, 7), line="B C A E"),
+ ])
+
+ def test_nasty_mutually_left_recursive(self) -> None:
+ # This grammar does not recognize 'x - + =', much to my chagrin.
+ # But that's the way PEG works.
+ # [Breathlessly]
+ # The problem is that the toplevel target call
+ # recurses into maybe, which recognizes 'x - +',
+ # and then the toplevel target looks for another '+',
+ # which fails, so it retreats to NAME,
+ # which succeeds, so we end up just recognizing 'x',
+ # and then start fails because there's no '=' after that.
+ grammar_source = """
+ start: target '='
+ target: maybe '+' | NAME
+ maybe: maybe '-' | target
+ """
+ grammar: Grammar = parse_string(grammar_source, GrammarParser)
+ out = io.StringIO()
+ genr = PythonParserGenerator(grammar, out)
+ genr.generate("<string>")
+ ns: Dict[str, Any] = {}
+ exec(out.getvalue(), ns)
+ parser_class = ns["GeneratedParser"]
+ with self.assertRaises(SyntaxError):
+ parse_string("x - + =", parser_class)
+
+ def test_lookahead(self) -> None:
+ grammar = """
+ start: (expr_stmt | assign_stmt) &'.'
+ expr_stmt: !(target '=') expr
+ assign_stmt: target '=' expr
+ expr: term ('+' term)*
+ target: NAME
+ term: NUMBER
+ """
+ parser_class = make_parser(grammar)
+ node = parse_string("foo = 12 + 12 .", parser_class)
+ self.assertEqual(node, [
+ [
+ [
+ [TokenInfo(NAME, string="foo", start=(1, 0), end=(1, 3), line="foo = 12 + 12 .")],
+ TokenInfo(OP, string="=", start=(1, 4), end=(1, 5), line="foo = 12 + 12 ."),
+ [
+ [
+ TokenInfo(
+ NUMBER, string="12", start=(1, 6), end=(1, 8), line="foo = 12 + 12 ."
+ )
+ ],
+ [
+ [
+ [
+ TokenInfo(
+ OP,
+ string="+",
+ start=(1, 9),
+ end=(1, 10),
+ line="foo = 12 + 12 .",
+ ),
+ [
+ TokenInfo(
+ NUMBER,
+ string="12",
+ start=(1, 11),
+ end=(1, 13),
+ line="foo = 12 + 12 .",
+ )
+ ],
+ ]
+ ]
+ ],
+ ],
+ ]
+ ]
+ ])
+
+ def test_named_lookahead_error(self) -> None:
+ grammar = """
+ start: foo=!'x' NAME
+ """
+ with self.assertRaises(SyntaxError):
+ make_parser(grammar)
+
+ def test_start_leader(self) -> None:
+ grammar = """
+ start: attr | NAME
+ attr: start '.' NAME
+ """
+ # Would assert False without a special case in compute_left_recursives().
+ make_parser(grammar)
+
+ def test_left_recursion_too_complex(self) -> None:
+ grammar = """
+ start: foo
+ foo: bar '+' | baz '+' | '+'
+ bar: baz '-' | foo '-' | '-'
+ baz: foo '*' | bar '*' | '*'
+ """
+ with self.assertRaises(ValueError) as errinfo:
+ make_parser(grammar)
+ self.assertTrue("no leader" in str(errinfo.exception.value))
+
+ def test_cut(self) -> None:
+ grammar = """
+ start: '(' ~ expr ')'
+ expr: NUMBER
+ """
+ parser_class = make_parser(grammar)
+ node = parse_string("(1)", parser_class, verbose=True)
+ self.assertEqual(node, [
+ TokenInfo(OP, string="(", start=(1, 0), end=(1, 1), line="(1)"),
+ [TokenInfo(NUMBER, string="1", start=(1, 1), end=(1, 2), line="(1)")],
+ TokenInfo(OP, string=")", start=(1, 2), end=(1, 3), line="(1)"),
+ ])
+
+ def test_dangling_reference(self) -> None:
+ grammar = """
+ start: foo ENDMARKER
+ foo: bar NAME
+ """
+ with self.assertRaises(GrammarError):
+ parser_class = make_parser(grammar)
+
+ def test_bad_token_reference(self) -> None:
+ grammar = """
+ start: foo
+ foo: NAMEE
+ """
+ with self.assertRaises(GrammarError):
+ parser_class = make_parser(grammar)
+
+ def test_missing_start(self) -> None:
+ grammar = """
+ foo: NAME
+ """
+ with self.assertRaises(GrammarError):
+ parser_class = make_parser(grammar)
+
+
+class TestGrammarVisitor:
+ class Visitor(GrammarVisitor):
+ def __init__(self) -> None:
+ self.n_nodes = 0
+
+ def visit(self, node: Any, *args: Any, **kwargs: Any) -> None:
+ self.n_nodes += 1
+ super().visit(node, *args, **kwargs)
+
+ def test_parse_trivial_grammar(self) -> None:
+ grammar = """
+ start: 'a'
+ """
+ rules = parse_string(grammar, GrammarParser)
+ visitor = self.Visitor()
+
+ visitor.visit(rules)
+
+ self.assertEqual(visitor.n_nodes, 6)
+
+ def test_parse_or_grammar(self) -> None:
+ grammar = """
+ start: rule
+ rule: 'a' | 'b'
+ """
+ rules = parse_string(grammar, GrammarParser)
+ visitor = self.Visitor()
+
+ visitor.visit(rules)
+
+ # Grammar/Rule/Rhs/Alt/NamedItem/NameLeaf -> 6
+ # Rule/Rhs/ -> 2
+ # Alt/NamedItem/StringLeaf -> 3
+ # Alt/NamedItem/StringLeaf -> 3
+
+ self.assertEqual(visitor.n_nodes, 14)
+
+ def test_parse_repeat1_grammar(self) -> None:
+ grammar = """
+ start: 'a'+
+ """
+ rules = parse_string(grammar, GrammarParser)
+ visitor = self.Visitor()
+
+ visitor.visit(rules)
+
+ # Grammar/Rule/Rhs/Alt/NamedItem/Repeat1/StringLeaf -> 6
+ self.assertEqual(visitor.n_nodes, 7)
+
+ def test_parse_repeat0_grammar(self) -> None:
+ grammar = """
+ start: 'a'*
+ """
+ rules = parse_string(grammar, GrammarParser)
+ visitor = self.Visitor()
+
+ visitor.visit(rules)
+
+ # Grammar/Rule/Rhs/Alt/NamedItem/Repeat0/StringLeaf -> 6
+
+ self.assertEqual(visitor.n_nodes, 7)
+
+ def test_parse_optional_grammar(self) -> None:
+ grammar = """
+ start: 'a' ['b']
+ """
+ rules = parse_string(grammar, GrammarParser)
+ visitor = self.Visitor()
+
+ visitor.visit(rules)
+
+ # Grammar/Rule/Rhs/Alt/NamedItem/StringLeaf -> 6
+ # NamedItem/Opt/Rhs/Alt/NamedItem/Stringleaf -> 6
+
+ self.assertEqual(visitor.n_nodes, 12)
+
+
+class TestGrammarVisualizer(unittest.TestCase):
+ def test_simple_rule(self) -> None:
+ grammar = """
+ start: 'a' 'b'
+ """
+ rules = parse_string(grammar, GrammarParser)
+
+ printer = ASTGrammarPrinter()
+ lines: List[str] = []
+ printer.print_grammar_ast(rules, printer=lines.append)
+
+ output = "\n".join(lines)
+ expected_output = textwrap.dedent(
+ """\
+ └──Rule
+ └──Rhs
+ └──Alt
+ ├──NamedItem
+ │ └──StringLeaf("'a'")
+ └──NamedItem
+ └──StringLeaf("'b'")
+ """
+ )
+
+ self.assertEqual(output, expected_output)
+
+ def test_multiple_rules(self) -> None:
+ grammar = """
+ start: a b
+ a: 'a'
+ b: 'b'
+ """
+ rules = parse_string(grammar, GrammarParser)
+
+ printer = ASTGrammarPrinter()
+ lines: List[str] = []
+ printer.print_grammar_ast(rules, printer=lines.append)
+
+ output = "\n".join(lines)
+ expected_output = textwrap.dedent(
+ """\
+ └──Rule
+ └──Rhs
+ └──Alt
+ ├──NamedItem
+ │ └──NameLeaf('a')
+ └──NamedItem
+ └──NameLeaf('b')
+
+ └──Rule
+ └──Rhs
+ └──Alt
+ └──NamedItem
+ └──StringLeaf("'a'")
+
+ └──Rule
+ └──Rhs
+ └──Alt
+ └──NamedItem
+ └──StringLeaf("'b'")
+ """
+ )
+
+ self.assertEqual(output, expected_output)
+
+ def test_deep_nested_rule(self) -> None:
+ grammar = """
+ start: 'a' ['b'['c'['d']]]
+ """
+ rules = parse_string(grammar, GrammarParser)
+
+ printer = ASTGrammarPrinter()
+ lines: List[str] = []
+ printer.print_grammar_ast(rules, printer=lines.append)
+
+ output = "\n".join(lines)
+ print()
+ print(output)
+ expected_output = textwrap.dedent(
+ """\
+ └──Rule
+ └──Rhs
+ └──Alt
+ ├──NamedItem
+ │ └──StringLeaf("'a'")
+ └──NamedItem
+ └──Opt
+ └──Rhs
+ └──Alt
+ ├──NamedItem
+ │ └──StringLeaf("'b'")
+ └──NamedItem
+ └──Opt
+ └──Rhs
+ └──Alt
+ ├──NamedItem
+ │ └──StringLeaf("'c'")
+ └──NamedItem
+ └──Opt
+ └──Rhs
+ └──Alt
+ └──NamedItem
+ └──StringLeaf("'d'")
+ """
+ )
+
+ self.assertEqual(output, expected_output)