import ast import os import sys import _peg_parser as peg_parser import unittest from pathlib import PurePath from typing import Any, Union, Iterable, Tuple from textwrap import dedent from test import support TEST_CASES = [ ('annotated_assignment', 'x: int = 42'), ('annotated_assignment_with_tuple', 'x: tuple = 1, 2'), ('annotated_assignment_with_parens', '(paren): int = 3+2'), ('annotated_assignment_with_yield', 'x: int = yield 42'), ('annotated_no_assignment', 'x: int'), ('annotation_with_multiple_parens', '((parens)): int'), ('annotation_with_parens', '(parens): int'), ('annotated_assignment_with_attr', 'a.b: int'), ('annotated_assignment_with_subscript', 'a[b]: int'), ('annotated_assignment_with_attr_and_parens', '(a.b): int'), ('annotated_assignment_with_subscript_and_parens', '(a[b]): int'), ('assert', 'assert a'), ('assert_message', 'assert a, b'), ('assignment_false', 'a = False'), ('assignment_none', 'a = None'), ('assignment_true', 'a = True'), ('assignment_paren', '(a) = 42'), ('assignment_paren_multiple', '(a, b) = (0, 1)'), ('asyncfor', ''' async for i in a: pass '''), ('attribute_call', 'a.b()'), ('attribute_multiple_names', 'abcd.efg.hij'), ('attribute_simple', 'a.b'), ('attributes_subscript', 'a.b[0]'), ('augmented_assignment', 'x += 42'), ('binop_add', '1 + 1'), ('binop_add_multiple', '1 + 1 + 1 + 1'), ('binop_all', '1 + 2 * 5 + 3 ** 2 - -3'), ('binop_boolop_comp', '1 + 1 == 2 or 1 + 1 == 3 and not b'), ('boolop_or', 'a or b'), ('boolop_or_multiple', 'a or b or c'), ('class_def_bases', ''' class C(A, B): pass '''), ('class_def_decorators', ''' @a class C: pass '''), ('class_def_decorator_with_expression', ''' @lambda x: 42 class C: pass '''), ('class_def_decorator_with_expression_and_walrus', ''' @x:=lambda x: 42 class C: pass '''), ('class_def_keywords', ''' class C(keyword=a+b, **c): pass '''), ('class_def_mixed', ''' class C(A, B, keyword=0, **a): pass '''), ('class_def_simple', ''' class C: pass '''), ('class_def_starred_and_kwarg', ''' class C(A, B, *x, **y): pass '''), ('class_def_starred_in_kwargs', ''' class C(A, x=2, *[B, C], y=3): pass '''), ('call_attribute', 'f().b'), ('call_genexp', 'f(i for i in a)'), ('call_mixed_args', 'f(a, b, *c, **d)'), ('call_mixed_args_named', 'f(a, b, *c, d=4, **v)'), ('call_one_arg', 'f(a)'), ('call_posarg_genexp', 'f(a, (i for i in a))'), ('call_simple', 'f()'), ('call_subscript', 'f()[0]'), ('comp', 'a == b'), ('comp_multiple', 'a == b == c'), ('comp_paren_end', 'a == (b-1)'), ('comp_paren_start', '(a-1) == b'), ('decorator', ''' @a def f(): pass '''), ('decorator_async', ''' @a async def d(): pass '''), ('decorator_with_expression', ''' @lambda x: 42 def f(): pass '''), ('decorator_with_expression_and_walrus', ''' @x:=lambda x: 42 def f(): pass '''), ('del_attribute', 'del a.b'), ('del_call_attribute', 'del a().c'), ('del_call_genexp_attribute', 'del a(i for i in b).c'), ('del_empty', 'del()'), ('del_list', 'del a, [b, c]'), ('del_mixed', 'del a[0].b().c'), ('del_multiple', 'del a, b'), ('del_multiple_calls_attribute', 'del a()().b'), ('del_paren', 'del(a,b)'), ('del_paren_single_target', 'del(a)'), ('del_subscript_attribute', 'del a[0].b'), ('del_tuple', 'del a, (b, c)'), ('delete', 'del a'), ('dict', ''' { a: 1, b: 2, c: 3 } '''), ('dict_comp', '{x:1 for x in a}'), ('dict_comp_if', '{x:1+2 for x in a if b}'), ('dict_empty', '{}'), ('for', ''' for i in a: pass '''), ('for_else', ''' for i in a: pass else: pass '''), ('for_star_target_in_paren', 'for (a) in b: pass'), ('for_star_targets_attribute', 'for a.b in c: pass'), ('for_star_targets_call_attribute', 'for a().c in b: pass'), ('for_star_targets_empty', 'for () in a: pass'), ('for_star_targets_mixed', 'for a[0].b().c in d: pass'), ('for_star_targets_mixed_starred', ''' for a, *b, (c, d) in e: pass '''), ('for_star_targets_multiple', 'for a, b in c: pass'), ('for_star_targets_nested_starred', 'for *[*a] in b: pass'), ('for_star_targets_starred', 'for *a in b: pass'), ('for_star_targets_subscript_attribute', 'for a[0].b in c: pass'), ('for_star_targets_trailing_comma', ''' for a, (b, c), in d: pass '''), ('for_star_targets_tuple', 'for a, (b, c) in d: pass'), ('for_underscore', ''' for _ in a: pass '''), ('function_return_type', ''' def f() -> Any: pass '''), ('f-string_slice', "f'{x[2]}'"), ('f-string_slice_upper', "f'{x[2:3]}'"), ('f-string_slice_step', "f'{x[2:3:-2]}'"), ('f-string_constant', "f'{42}'"), ('f-string_boolop', "f'{x and y}'"), ('f-string_named_expr', "f'{(x:=42)}'"), ('f-string_binop', "f'{x+y}'"), ('f-string_unaryop', "f'{not x}'"), ('f-string_lambda', "f'{(lambda x, /, y, y2=42 , *z, k1, k2=34, **k3: 42)}'"), ('f-string_lambda_call', "f'{(lambda: 2)(2)}'"), ('f-string_ifexpr', "f'{x if y else z}'"), ('f-string_dict', "f'{ {2:34, 3:34} }'"), ('f-string_set', "f'{ {2,-45} }'"), ('f-string_list', "f'{ [2,-45] }'"), ('f-string_tuple', "f'{ (2,-45) }'"), ('f-string_listcomp', "f'{[x for x in y if z]}'"), ('f-string_setcomp', "f'{ {x for x in y if z} }'"), ('f-string_dictcomp', "f'{ {x:x for x in y if z} }'"), ('f-string_genexpr', "f'{ (x for x in y if z) }'"), ('f-string_yield', "f'{ (yield x) }'"), ('f-string_yieldfrom', "f'{ (yield from x) }'"), ('f-string_await', "f'{ await x }'"), ('f-string_compare', "f'{ x == y }'"), ('f-string_call', "f'{ f(x,y,z) }'"), ('f-string_attribute', "f'{ f.x.y.z }'"), ('f-string_starred', "f'{ *x, }'"), ('f-string_doublestarred', "f'{ {**x} }'"), ('f-string_escape_brace', "f'{{Escape'"), ('f-string_escape_closing_brace', "f'Escape}}'"), ('f-string_repr', "f'{a!r}'"), ('f-string_str', "f'{a!s}'"), ('f-string_ascii', "f'{a!a}'"), ('f-string_debug', "f'{a=}'"), ('f-string_padding', "f'{a:03d}'"), ('f-string_multiline', """ f''' {hello} ''' """), ('f-string_multiline_in_expr', """ f''' { hello } ''' """), ('f-string_multiline_in_call', """ f''' {f( a, b, c )} ''' """), ('global', 'global a, b'), ('group', '(yield a)'), ('if_elif', ''' if a: pass elif b: pass '''), ('if_elif_elif', ''' if a: pass elif b: pass elif c: pass '''), ('if_elif_else', ''' if a: pass elif b: pass else: pass '''), ('if_else', ''' if a: pass else: pass '''), ('if_simple', 'if a: pass'), ('import', 'import a'), ('import_alias', 'import a as b'), ('import_dotted', 'import a.b'), ('import_dotted_alias', 'import a.b as c'), ('import_dotted_multichar', 'import ab.cd'), ('import_from', 'from a import b'), ('import_from_alias', 'from a import b as c'), ('import_from_dotted', 'from a.b import c'), ('import_from_dotted_alias', 'from a.b import c as d'), ('import_from_multiple_aliases', 'from a import b as c, d as e'), ('import_from_one_dot', 'from .a import b'), ('import_from_one_dot_alias', 'from .a import b as c'), ('import_from_star', 'from a import *'), ('import_from_three_dots', 'from ...a import b'), ('import_from_trailing_comma', 'from a import (b,)'), ('kwarg', ''' def f(**a): pass '''), ('kwonly_args', ''' def f(*, a, b): pass '''), ('kwonly_args_with_default', ''' def f(*, a=2, b): pass '''), ('lambda_kwarg', 'lambda **a: 42'), ('lambda_kwonly_args', 'lambda *, a, b: 42'), ('lambda_kwonly_args_with_default', 'lambda *, a=2, b: 42'), ('lambda_mixed_args', 'lambda a, /, b, *, c: 42'), ('lambda_mixed_args_with_default', 'lambda a, b=2, /, c=3, *e, f, **g: 42'), ('lambda_no_args', 'lambda: 42'), ('lambda_pos_args', 'lambda a,b: 42'), ('lambda_pos_args_with_default', 'lambda a, b=2: 42'), ('lambda_pos_only_args', 'lambda a, /: 42'), ('lambda_pos_only_args_with_default', 'lambda a=0, /: 42'), ('lambda_pos_posonly_args', 'lambda a, b, /, c, d: 42'), ('lambda_pos_posonly_args_with_default', 'lambda a, b=0, /, c=2: 42'), ('lambda_vararg', 'lambda *a: 42'), ('lambda_vararg_kwonly_args', 'lambda *a, b: 42'), ('list', '[1, 2, a]'), ('list_comp', '[i for i in a]'), ('list_comp_if', '[i for i in a if b]'), ('list_trailing_comma', '[1+2, a, 3+4,]'), ('mixed_args', ''' def f(a, /, b, *, c): pass '''), ('mixed_args_with_default', ''' def f(a, b=2, /, c=3, *e, f, **g): pass '''), ('multipart_string_bytes', 'b"Hola" b"Hello" b"Bye"'), ('multipart_string_triple', '"""Something here""" "and now"'), ('multipart_string_different_prefixes', 'u"Something" "Other thing" r"last thing"'), ('multiple_assignments', 'x = y = z = 42'), ('multiple_assignments_with_yield', 'x = y = z = yield 42'), ('multiple_pass', ''' pass; pass pass '''), ('namedexpr', '(x := [1, 2, 3])'), ('namedexpr_false', '(x := False)'), ('namedexpr_none', '(x := None)'), ('namedexpr_true', '(x := True)'), ('nonlocal', 'nonlocal a, b'), ('number_complex', '-2.234+1j'), ('number_float', '-34.2333'), ('number_imaginary_literal', '1.1234j'), ('number_integer', '-234'), ('number_underscores', '1_234_567'), ('pass', 'pass'), ('pos_args', ''' def f(a, b): pass '''), ('pos_args_with_default', ''' def f(a, b=2): pass '''), ('pos_only_args', ''' def f(a, /): pass '''), ('pos_only_args_with_default', ''' def f(a=0, /): pass '''), ('pos_posonly_args', ''' def f(a, b, /, c, d): pass '''), ('pos_posonly_args_with_default', ''' def f(a, b=0, /, c=2): pass '''), ('primary_mixed', 'a.b.c().d[0]'), ('raise', 'raise'), ('raise_ellipsis', 'raise ...'), ('raise_expr', 'raise a'), ('raise_from', 'raise a from b'), ('return', 'return'), ('return_expr', 'return a'), ('set', '{1, 2+4, 3+5}'), ('set_comp', '{i for i in a}'), ('set_trailing_comma', '{1, 2, 3,}'), ('simple_assignment', 'x = 42'), ('simple_assignment_with_yield', 'x = yield 42'), ('string_bytes', 'b"hello"'), ('string_concatenation_bytes', 'b"hello" b"world"'), ('string_concatenation_simple', '"abcd" "efgh"'), ('string_format_simple', 'f"hello"'), ('string_format_with_formatted_value', 'f"hello {world}"'), ('string_simple', '"hello"'), ('string_unicode', 'u"hello"'), ('subscript_attribute', 'a[0].b'), ('subscript_call', 'a[b]()'), ('subscript_multiple_slices', 'a[0:a:2, 1]'), ('subscript_simple', 'a[0]'), ('subscript_single_element_tuple', 'a[0,]'), ('subscript_trailing_comma', 'a[0, 1, 2,]'), ('subscript_tuple', 'a[0, 1, 2]'), ('subscript_whole_slice', 'a[0+1:b:c]'), ('try_except', ''' try: pass except: pass '''), ('try_except_else', ''' try: pass except: pass else: pass '''), ('try_except_else_finally', ''' try: pass except: pass else: pass finally: pass '''), ('try_except_expr', ''' try: pass except a: pass '''), ('try_except_expr_target', ''' try: pass except a as b: pass '''), ('try_except_finally', ''' try: pass except: pass finally: pass '''), ('try_finally', ''' try: pass finally: pass '''), ('unpacking_binop', '[*([1, 2, 3] + [3, 4, 5])]'), ('unpacking_call', '[*b()]'), ('unpacking_compare', '[*(x < y)]'), ('unpacking_constant', '[*3]'), ('unpacking_dict', '[*{1: 2, 3: 4}]'), ('unpacking_dict_comprehension', '[*{x:y for x,y in z}]'), ('unpacking_ifexpr', '[*([1, 2, 3] if x else y)]'), ('unpacking_list', '[*[1,2,3]]'), ('unpacking_list_comprehension', '[*[x for x in y]]'), ('unpacking_namedexpr', '[*(x:=[1, 2, 3])]'), ('unpacking_set', '[*{1,2,3}]'), ('unpacking_set_comprehension', '[*{x for x in y}]'), ('unpacking_string', '[*"myvalue"]'), ('unpacking_tuple', '[*(1,2,3)]'), ('unpacking_unaryop', '[*(not [1, 2, 3])]'), ('unpacking_yield', '[*(yield 42)]'), ('unpacking_yieldfrom', '[*(yield from x)]'), ('tuple', '(1, 2, 3)'), ('vararg', ''' def f(*a): pass '''), ('vararg_kwonly_args', ''' def f(*a, b): pass '''), ('while', ''' while a: pass '''), ('while_else', ''' while a: pass else: pass '''), ('with', ''' with a: pass '''), ('with_as', ''' with a as b: pass '''), ('with_as_paren', ''' with a as (b): pass '''), ('with_as_empty', 'with a as (): pass'), ('with_list_recursive', ''' with a as [x, [y, z]]: pass '''), ('with_tuple_recursive', ''' with a as ((x, y), z): pass '''), ('with_tuple_target', ''' with a as (x, y): pass '''), ('yield', 'yield'), ('yield_expr', 'yield a'), ('yield_from', 'yield from a'), ] FAIL_TEST_CASES = [ ("annotation_multiple_targets", "(a, b): int = 42"), ("annotation_nested_tuple", "((a, b)): int"), ("annotation_list", "[a]: int"), ("annotation_lambda", "lambda: int = 42"), ("annotation_tuple", "(a,): int"), ("annotation_tuple_without_paren", "a,: int"), ("assignment_keyword", "a = if"), ("comprehension_lambda", "(a for a in lambda: b)"), ("comprehension_else", "(a for a in b if c else d"), ("del_call", "del a()"), ("del_call_genexp", "del a(i for i in b)"), ("del_subscript_call", "del a[b]()"), ("del_attribute_call", "del a.b()"), ("del_mixed_call", "del a[0].b().c.d()"), ("for_star_targets_call", "for a() in b: pass"), ("for_star_targets_subscript_call", "for a[b]() in c: pass"), ("for_star_targets_attribute_call", "for a.b() in c: pass"), ("for_star_targets_mixed_call", "for a[0].b().c.d() in e: pass"), ("for_star_targets_in", "for a, in in b: pass"), ("f-string_assignment", "f'{x = 42}'"), ("f-string_empty", "f'{}'"), ("f-string_function_def", "f'{def f(): pass}'"), ("f-string_lambda", "f'{lambda x: 42}'"), ("f-string_singe_brace", "f'{'"), ("f-string_single_closing_brace", "f'}'"), ("from_import_invalid", "from import import a"), ("from_import_trailing_comma", "from a import b,"), # This test case checks error paths involving tokens with uninitialized # values of col_offset and end_col_offset. ("invalid indentation", """ def f(): a a """), ("not_terminated_string", "a = 'example"), ] FAIL_SPECIALIZED_MESSAGE_CASES = [ ("f(x, y, z=1, **b, *a", "iterable argument unpacking follows keyword argument unpacking"), ("f(x, y=1, *z, **a, b", "positional argument follows keyword argument unpacking"), ("f(x, y, z=1, a=2, b", "positional argument follows keyword argument"), ("True = 1", "cannot assign to True"), ("a() = 1", "cannot assign to function call"), ("(a, b): int", "only single target (not tuple) can be annotated"), ("[a, b]: int", "only single target (not list) can be annotated"), ("a(): int", "illegal target for annotation"), ("1 += 1", "cannot assign to literal"), ("pass\n pass", "unexpected indent"), ("def f():\npass", "expected an indented block"), ] GOOD_BUT_FAIL_TEST_CASES = [ ('string_concatenation_format', 'f"{hello} world" f"again {and_again}"'), ('string_concatenation_multiple', ''' f"hello" f"{world} again" f"and_again" '''), ('f-string_multiline_comp', """ f''' {(i for i in a if b)} ''' """), ] FSTRINGS_TRACEBACKS = { 'multiline_fstrings_same_line_with_brace': ( """ f''' {a$b} ''' """, '(a$b)', ), 'multiline_fstring_brace_on_next_line': ( """ f''' {a$b }''' """, '(a$b', ), 'multiline_fstring_brace_on_previous_line': ( """ f''' { a$b}''' """, 'a$b)', ), } EXPRESSIONS_TEST_CASES = [ ("expression_add", "1+1"), ("expression_add_2", "a+b"), ("expression_call", "f(a, b=2, **kw)"), ("expression_tuple", "1, 2, 3"), ("expression_tuple_one_value", "1,") ] def cleanup_source(source: Any) -> str: if isinstance(source, str): result = dedent(source) elif not isinstance(source, (list, tuple)): result = "\n".join(source) else: raise TypeError(f"Invalid type for test source: {source}") return result def prepare_test_cases( test_cases: Iterable[Tuple[str, Union[str, Iterable[str]]]] ) -> Tuple[Iterable[str], Iterable[str]]: test_ids, _test_sources = zip(*test_cases) test_sources = list(_test_sources) for index, source in enumerate(test_sources): result = cleanup_source(source) test_sources[index] = result return test_ids, test_sources TEST_IDS, TEST_SOURCES = prepare_test_cases(TEST_CASES) GOOD_BUT_FAIL_TEST_IDS, GOOD_BUT_FAIL_SOURCES = prepare_test_cases( GOOD_BUT_FAIL_TEST_CASES ) FAIL_TEST_IDS, FAIL_SOURCES = prepare_test_cases(FAIL_TEST_CASES) EXPRESSIONS_TEST_IDS, EXPRESSIONS_TEST_SOURCES = prepare_test_cases( EXPRESSIONS_TEST_CASES ) class ASTGenerationTest(unittest.TestCase): def test_correct_ast_generation_on_source_files(self) -> None: self.maxDiff = None for source in TEST_SOURCES: actual_ast = peg_parser.parse_string(source) expected_ast = ast.parse(source) self.assertEqual( ast.dump(actual_ast, include_attributes=True), ast.dump(expected_ast, include_attributes=True), f"Wrong AST generation for source: {source}", ) def test_incorrect_ast_generation_on_source_files(self) -> None: for source in FAIL_SOURCES: with self.assertRaises(SyntaxError, msg=f"Parsing {source} did not raise an exception"): peg_parser.parse_string(source) def test_incorrect_ast_generation_with_specialized_errors(self) -> None: for source, error_text in FAIL_SPECIALIZED_MESSAGE_CASES: exc = IndentationError if "indent" in error_text else SyntaxError with self.assertRaises(exc) as se: peg_parser.parse_string(source) self.assertTrue( error_text in se.exception.msg, f"Actual error message does not match expexted for {source}" ) @support.skip_if_new_parser("This tests nothing for now, since compile uses pegen as well") @unittest.expectedFailure def test_correct_but_known_to_fail_ast_generation_on_source_files(self) -> None: for source in GOOD_BUT_FAIL_SOURCES: actual_ast = peg_parser.parse_string(source) expected_ast = ast.parse(source) self.assertEqual( ast.dump(actual_ast, include_attributes=True), ast.dump(expected_ast, include_attributes=True), f"Wrong AST generation for source: {source}", ) def test_correct_ast_generation_without_pos_info(self) -> None: for source in GOOD_BUT_FAIL_SOURCES: actual_ast = peg_parser.parse_string(source) expected_ast = ast.parse(source) self.assertEqual( ast.dump(actual_ast), ast.dump(expected_ast), f"Wrong AST generation for source: {source}", ) def test_fstring_parse_error_tracebacks(self) -> None: for source, error_text in FSTRINGS_TRACEBACKS.values(): with self.assertRaises(SyntaxError) as se: peg_parser.parse_string(dedent(source)) self.assertEqual(error_text, se.exception.text) def test_correct_ast_generatrion_eval(self) -> None: for source in EXPRESSIONS_TEST_SOURCES: actual_ast = peg_parser.parse_string(source, mode='eval') expected_ast = ast.parse(source, mode='eval') self.assertEqual( ast.dump(actual_ast, include_attributes=True), ast.dump(expected_ast, include_attributes=True), f"Wrong AST generation for source: {source}", ) def test_tokenizer_errors_are_propagated(self) -> None: n=201 with self.assertRaisesRegex(SyntaxError, "too many nested parentheses"): peg_parser.parse_string(n*'(' + ')'*n)