import sysconfig import textwrap import unittest import os import shutil import tempfile from pathlib import Path from test import test_tools from test import support from test.support import os_helper from test.support.script_helper import assert_python_ok _py_cflags_nodist = sysconfig.get_config_var("PY_CFLAGS_NODIST") _pgo_flag = sysconfig.get_config_var("PGO_PROF_USE_FLAG") if _pgo_flag and _py_cflags_nodist and _pgo_flag in _py_cflags_nodist: raise unittest.SkipTest("peg_generator test disabled under PGO build") test_tools.skip_if_missing("peg_generator") with test_tools.imports_under_tool("peg_generator"): from pegen.grammar_parser import GeneratedParser as GrammarParser from pegen.testutil import ( parse_string, generate_parser_c_extension, generate_c_parser_source, ) from pegen.ast_dump import ast_dump TEST_TEMPLATE = """ tmp_dir = {extension_path!r} import ast import traceback import sys import unittest from test import test_tools with test_tools.imports_under_tool("peg_generator"): from pegen.ast_dump import ast_dump sys.path.insert(0, tmp_dir) import parse class Tests(unittest.TestCase): def check_input_strings_for_grammar( self, valid_cases = (), invalid_cases = (), ): if valid_cases: for case in valid_cases: parse.parse_string(case, mode=0) if invalid_cases: for case in invalid_cases: with self.assertRaises(SyntaxError): parse.parse_string(case, mode=0) def verify_ast_generation(self, stmt): expected_ast = ast.parse(stmt) actual_ast = parse.parse_string(stmt, mode=1) self.assertEqual(ast_dump(expected_ast), ast_dump(actual_ast)) def test_parse(self): {test_source} unittest.main() """ @support.requires_subprocess() class TestCParser(unittest.TestCase): @classmethod def setUpClass(cls): # When running under regtest, a seperate tempdir is used # as the current directory and watched for left-overs. # Reusing that as the base for temporary directories # ensures everything is cleaned up properly and # cleans up afterwards if not (with warnings). cls.tmp_base = os.getcwd() if os.path.samefile(cls.tmp_base, os_helper.SAVEDCWD): cls.tmp_base = None # Create a directory for the reuseable static library part of # the pegen extension build process. This greatly reduces the # runtime overhead of spawning compiler processes. cls.library_dir = tempfile.mkdtemp(dir=cls.tmp_base) cls.addClassCleanup(shutil.rmtree, cls.library_dir) def setUp(self): self._backup_config_vars = dict(sysconfig._CONFIG_VARS) cmd = support.missing_compiler_executable() if cmd is not None: self.skipTest("The %r command is not found" % cmd) self.old_cwd = os.getcwd() self.tmp_path = tempfile.mkdtemp(dir=self.tmp_base) self.enterContext(os_helper.change_cwd(self.tmp_path)) def tearDown(self): os.chdir(self.old_cwd) shutil.rmtree(self.tmp_path) sysconfig._CONFIG_VARS.clear() sysconfig._CONFIG_VARS.update(self._backup_config_vars) def build_extension(self, grammar_source): grammar = parse_string(grammar_source, GrammarParser) # Because setUp() already changes the current directory to the # temporary path, use a relative path here to prevent excessive # path lengths when compiling. generate_parser_c_extension(grammar, Path('.'), library_dir=self.library_dir) def run_test(self, grammar_source, test_source): self.build_extension(grammar_source) test_source = textwrap.indent(textwrap.dedent(test_source), 8 * " ") assert_python_ok( "-c", TEST_TEMPLATE.format(extension_path=self.tmp_path, test_source=test_source), ) def test_c_parser(self) -> None: grammar_source = """ start[mod_ty]: a[asdl_stmt_seq*]=stmt* $ { _PyAST_Module(a, NULL, p->arena) } stmt[stmt_ty]: a=expr_stmt { a } expr_stmt[stmt_ty]: a=expression NEWLINE { _PyAST_Expr(a, EXTRA) } expression[expr_ty]: ( l=expression '+' r=term { _PyAST_BinOp(l, Add, r, EXTRA) } | l=expression '-' r=term { _PyAST_BinOp(l, Sub, r, EXTRA) } | t=term { t } ) term[expr_ty]: ( l=term '*' r=factor { _PyAST_BinOp(l, Mult, r, EXTRA) } | l=term '/' r=factor { _PyAST_BinOp(l, Div, r, EXTRA) } | f=factor { f } ) factor[expr_ty]: ('(' e=expression ')' { e } | a=atom { a } ) atom[expr_ty]: ( n=NAME { n } | n=NUMBER { n } | s=STRING { s } ) """ test_source = """ expressions = [ "4+5", "4-5", "4*5", "1+4*5", "1+4/5", "(1+1) + (1+1)", "(1+1) - (1+1)", "(1+1) * (1+1)", "(1+1) / (1+1)", ] for expr in expressions: the_ast = parse.parse_string(expr, mode=1) expected_ast = ast.parse(expr) self.assertEqual(ast_dump(the_ast), ast_dump(expected_ast)) """ self.run_test(grammar_source, test_source) def test_lookahead(self) -> None: grammar_source = """ start: NAME &NAME expr NEWLINE? ENDMARKER expr: NAME | NUMBER """ test_source = """ valid_cases = ["foo bar"] invalid_cases = ["foo 34"] self.check_input_strings_for_grammar(valid_cases, invalid_cases) """ self.run_test(grammar_source, test_source) def test_negative_lookahead(self) -> None: grammar_source = """ start: NAME !NAME expr NEWLINE? ENDMARKER expr: NAME | NUMBER """ test_source = """ valid_cases = ["foo 34"] invalid_cases = ["foo bar"] self.check_input_strings_for_grammar(valid_cases, invalid_cases) """ self.run_test(grammar_source, test_source) def test_cut(self) -> None: grammar_source = """ start: X ~ Y Z | X Q S X: 'x' Y: 'y' Z: 'z' Q: 'q' S: 's' """ test_source = """ valid_cases = ["x y z"] invalid_cases = ["x q s"] self.check_input_strings_for_grammar(valid_cases, invalid_cases) """ self.run_test(grammar_source, test_source) def test_gather(self) -> None: grammar_source = """ start: ';'.pass_stmt+ NEWLINE pass_stmt: 'pass' """ test_source = """ valid_cases = ["pass", "pass; pass"] invalid_cases = ["pass;", "pass; pass;"] self.check_input_strings_for_grammar(valid_cases, invalid_cases) """ self.run_test(grammar_source, test_source) def test_left_recursion(self) -> None: grammar_source = """ start: expr NEWLINE expr: ('-' term | expr '+' term | term) term: NUMBER """ test_source = """ valid_cases = ["-34", "34", "34 + 12", "1 + 1 + 2 + 3"] self.check_input_strings_for_grammar(valid_cases) """ self.run_test(grammar_source, test_source) def test_advanced_left_recursive(self) -> None: grammar_source = """ start: NUMBER | sign start sign: ['-'] """ test_source = """ valid_cases = ["23", "-34"] self.check_input_strings_for_grammar(valid_cases) """ self.run_test(grammar_source, test_source) def test_mutually_left_recursive(self) -> None: grammar_source = """ start: foo 'E' foo: bar 'A' | 'B' bar: foo 'C' | 'D' """ test_source = """ valid_cases = ["B E", "D A C A E"] self.check_input_strings_for_grammar(valid_cases) """ self.run_test(grammar_source, test_source) def test_nasty_mutually_left_recursive(self) -> None: grammar_source = """ start: target '=' target: maybe '+' | NAME maybe: maybe '-' | target """ test_source = """ valid_cases = ["x ="] invalid_cases = ["x - + ="] self.check_input_strings_for_grammar(valid_cases, invalid_cases) """ self.run_test(grammar_source, test_source) def test_return_stmt_noexpr_action(self) -> None: grammar_source = """ start[mod_ty]: a=[statements] ENDMARKER { _PyAST_Module(a, NULL, p->arena) } statements[asdl_stmt_seq*]: a[asdl_stmt_seq*]=statement+ { a } statement[stmt_ty]: simple_stmt simple_stmt[stmt_ty]: small_stmt small_stmt[stmt_ty]: return_stmt return_stmt[stmt_ty]: a='return' NEWLINE { _PyAST_Return(NULL, EXTRA) } """ test_source = """ stmt = "return" self.verify_ast_generation(stmt) """ self.run_test(grammar_source, test_source) def test_gather_action_ast(self) -> None: grammar_source = """ start[mod_ty]: a[asdl_stmt_seq*]=';'.pass_stmt+ NEWLINE ENDMARKER { _PyAST_Module(a, NULL, p->arena) } pass_stmt[stmt_ty]: a='pass' { _PyAST_Pass(EXTRA)} """ test_source = """ stmt = "pass; pass" self.verify_ast_generation(stmt) """ self.run_test(grammar_source, test_source) def test_pass_stmt_action(self) -> None: grammar_source = """ start[mod_ty]: a=[statements] ENDMARKER { _PyAST_Module(a, NULL, p->arena) } statements[asdl_stmt_seq*]: a[asdl_stmt_seq*]=statement+ { a } statement[stmt_ty]: simple_stmt simple_stmt[stmt_ty]: small_stmt small_stmt[stmt_ty]: pass_stmt pass_stmt[stmt_ty]: a='pass' NEWLINE { _PyAST_Pass(EXTRA) } """ test_source = """ stmt = "pass" self.verify_ast_generation(stmt) """ self.run_test(grammar_source, test_source) def test_if_stmt_action(self) -> None: grammar_source = """ start[mod_ty]: a=[statements] ENDMARKER { _PyAST_Module(a, NULL, p->arena) } statements[asdl_stmt_seq*]: a=statement+ { (asdl_stmt_seq*)_PyPegen_seq_flatten(p, a) } statement[asdl_stmt_seq*]: a=compound_stmt { (asdl_stmt_seq*)_PyPegen_singleton_seq(p, a) } | simple_stmt simple_stmt[asdl_stmt_seq*]: a=small_stmt b=further_small_stmt* [';'] NEWLINE { (asdl_stmt_seq*)_PyPegen_seq_insert_in_front(p, a, b) } further_small_stmt[stmt_ty]: ';' a=small_stmt { a } block: simple_stmt | NEWLINE INDENT a=statements DEDENT { a } compound_stmt: if_stmt if_stmt: 'if' a=full_expression ':' b=block { _PyAST_If(a, b, NULL, EXTRA) } small_stmt[stmt_ty]: pass_stmt pass_stmt[stmt_ty]: a='pass' { _PyAST_Pass(EXTRA) } full_expression: NAME """ test_source = """ stmt = "pass" self.verify_ast_generation(stmt) """ self.run_test(grammar_source, test_source) def test_same_name_different_types(self) -> None: grammar_source = """ start[mod_ty]: a[asdl_stmt_seq*]=import_from+ NEWLINE ENDMARKER { _PyAST_Module(a, NULL, p->arena)} import_from[stmt_ty]: ( a='from' !'import' c=simple_name 'import' d=import_as_names_from { _PyAST_ImportFrom(c->v.Name.id, d, 0, EXTRA) } | a='from' '.' 'import' c=import_as_names_from { _PyAST_ImportFrom(NULL, c, 1, EXTRA) } ) simple_name[expr_ty]: NAME import_as_names_from[asdl_alias_seq*]: a[asdl_alias_seq*]=','.import_as_name_from+ { a } import_as_name_from[alias_ty]: a=NAME 'as' b=NAME { _PyAST_alias(((expr_ty) a)->v.Name.id, ((expr_ty) b)->v.Name.id, EXTRA) } """ test_source = """ for stmt in ("from a import b as c", "from . import a as b"): expected_ast = ast.parse(stmt) actual_ast = parse.parse_string(stmt, mode=1) self.assertEqual(ast_dump(expected_ast), ast_dump(actual_ast)) """ self.run_test(grammar_source, test_source) def test_with_stmt_with_paren(self) -> None: grammar_source = """ start[mod_ty]: a=[statements] ENDMARKER { _PyAST_Module(a, NULL, p->arena) } statements[asdl_stmt_seq*]: a=statement+ { (asdl_stmt_seq*)_PyPegen_seq_flatten(p, a) } statement[asdl_stmt_seq*]: a=compound_stmt { (asdl_stmt_seq*)_PyPegen_singleton_seq(p, a) } compound_stmt[stmt_ty]: with_stmt with_stmt[stmt_ty]: ( a='with' '(' b[asdl_withitem_seq*]=','.with_item+ ')' ':' c=block { _PyAST_With(b, (asdl_stmt_seq*) _PyPegen_singleton_seq(p, c), NULL, EXTRA) } ) with_item[withitem_ty]: ( e=NAME o=['as' t=NAME { t }] { _PyAST_withitem(e, _PyPegen_set_expr_context(p, o, Store), p->arena) } ) block[stmt_ty]: a=pass_stmt NEWLINE { a } | NEWLINE INDENT a=pass_stmt DEDENT { a } pass_stmt[stmt_ty]: a='pass' { _PyAST_Pass(EXTRA) } """ test_source = """ stmt = "with (\\n a as b,\\n c as d\\n): pass" the_ast = parse.parse_string(stmt, mode=1) self.assertTrue(ast_dump(the_ast).startswith( "Module(body=[With(items=[withitem(context_expr=Name(id='a', ctx=Load()), optional_vars=Name(id='b', ctx=Store())), " "withitem(context_expr=Name(id='c', ctx=Load()), optional_vars=Name(id='d', ctx=Store()))]" )) """ self.run_test(grammar_source, test_source) def test_ternary_operator(self) -> None: grammar_source = """ start[mod_ty]: a=expr ENDMARKER { _PyAST_Module(a, NULL, p->arena) } expr[asdl_stmt_seq*]: a=listcomp NEWLINE { (asdl_stmt_seq*)_PyPegen_singleton_seq(p, _PyAST_Expr(a, EXTRA)) } listcomp[expr_ty]: ( a='[' b=NAME c=for_if_clauses d=']' { _PyAST_ListComp(b, c, EXTRA) } ) for_if_clauses[asdl_comprehension_seq*]: ( a[asdl_comprehension_seq*]=(y=[ASYNC] 'for' a=NAME 'in' b=NAME c[asdl_expr_seq*]=('if' z=NAME { z })* { _PyAST_comprehension(_PyAST_Name(((expr_ty) a)->v.Name.id, Store, EXTRA), b, c, (y == NULL) ? 0 : 1, p->arena) })+ { a } ) """ test_source = """ stmt = "[i for i in a if b]" self.verify_ast_generation(stmt) """ self.run_test(grammar_source, test_source) def test_syntax_error_for_string(self) -> None: grammar_source = """ start: expr+ NEWLINE? ENDMARKER expr: NAME """ test_source = r""" for text in ("a b 42 b a", "\u540d \u540d 42 \u540d \u540d"): try: parse.parse_string(text, mode=0) except SyntaxError as e: tb = traceback.format_exc() self.assertTrue('File "", line 1' in tb) self.assertTrue(f"SyntaxError: invalid syntax" in tb) """ self.run_test(grammar_source, test_source) def test_headers_and_trailer(self) -> None: grammar_source = """ @header 'SOME HEADER' @subheader 'SOME SUBHEADER' @trailer 'SOME TRAILER' start: expr+ NEWLINE? ENDMARKER expr: x=NAME """ grammar = parse_string(grammar_source, GrammarParser) parser_source = generate_c_parser_source(grammar) self.assertTrue("SOME HEADER" in parser_source) self.assertTrue("SOME SUBHEADER" in parser_source) self.assertTrue("SOME TRAILER" in parser_source) def test_error_in_rules(self) -> None: grammar_source = """ start: expr+ NEWLINE? ENDMARKER expr: NAME {PyTuple_New(-1)} """ # PyTuple_New raises SystemError if an invalid argument was passed. test_source = """ with self.assertRaises(SystemError): parse.parse_string("a", mode=0) """ self.run_test(grammar_source, test_source) def test_no_soft_keywords(self) -> None: grammar_source = """ start: expr+ NEWLINE? ENDMARKER expr: 'foo' """ grammar = parse_string(grammar_source, GrammarParser) parser_source = generate_c_parser_source(grammar) assert "expect_soft_keyword" not in parser_source def test_soft_keywords(self) -> None: grammar_source = """ start: expr+ NEWLINE? ENDMARKER expr: "foo" """ grammar = parse_string(grammar_source, GrammarParser) parser_source = generate_c_parser_source(grammar) assert "expect_soft_keyword" in parser_source def test_soft_keywords_parse(self) -> None: grammar_source = """ start: "if" expr '+' expr NEWLINE expr: NAME """ test_source = """ valid_cases = ["if if + if"] invalid_cases = ["if if"] self.check_input_strings_for_grammar(valid_cases, invalid_cases) """ self.run_test(grammar_source, test_source) def test_soft_keywords_lookahead(self) -> None: grammar_source = """ start: &"if" "if" expr '+' expr NEWLINE expr: NAME """ test_source = """ valid_cases = ["if if + if"] invalid_cases = ["if if"] self.check_input_strings_for_grammar(valid_cases, invalid_cases) """ self.run_test(grammar_source, test_source) def test_forced(self) -> None: grammar_source = """ start: NAME &&':' | NAME """ test_source = """ self.assertEqual(parse.parse_string("number :", mode=0), None) with self.assertRaises(SyntaxError) as e: parse.parse_string("a", mode=0) self.assertIn("expected ':'", str(e.exception)) """ self.run_test(grammar_source, test_source) def test_forced_with_group(self) -> None: grammar_source = """ start: NAME &&(':' | ';') | NAME """ test_source = """ self.assertEqual(parse.parse_string("number :", mode=0), None) self.assertEqual(parse.parse_string("number ;", mode=0), None) with self.assertRaises(SyntaxError) as e: parse.parse_string("a", mode=0) self.assertIn("expected (':' | ';')", e.exception.args[0]) """ self.run_test(grammar_source, test_source)