From ddcd57e3ea75ab0ad370bbaaa6b76338edbca395 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Date: Sat, 31 Oct 2020 00:40:42 +0000 Subject: [3.9] bpo-42214: Fix check for NOTEQUAL token in the PEG parser for the barry_as_flufl rule (GH-23048) (GH-23051) (cherry picked from commit 06f8c3328dcd81c84d1ee2b3a57b5381dcb38482) Co-authored-by: Pablo Galindo --- Grammar/python.gram | 2 +- Lib/test/test_syntax.py | 17 +++++++++++++++++ .../2020-10-30-22-16-30.bpo-42214.lXskM_.rst | 2 ++ Parser/pegen/parse.c | 2 +- Parser/pegen/pegen.c | 3 +-- Parser/pegen/pegen.h | 2 +- 6 files changed, 23 insertions(+), 5 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2020-10-30-22-16-30.bpo-42214.lXskM_.rst diff --git a/Grammar/python.gram b/Grammar/python.gram index b709d3d..60eeb36 100644 --- a/Grammar/python.gram +++ b/Grammar/python.gram @@ -413,7 +413,7 @@ compare_op_bitwise_or_pair[CmpopExprPair*]: | is_bitwise_or eq_bitwise_or[CmpopExprPair*]: '==' a=bitwise_or { _PyPegen_cmpop_expr_pair(p, Eq, a) } noteq_bitwise_or[CmpopExprPair*]: - | (tok='!=' {_PyPegen_check_barry_as_flufl(p) ? NULL : tok}) a=bitwise_or {_PyPegen_cmpop_expr_pair(p, NotEq, a) } + | (tok='!=' { _PyPegen_check_barry_as_flufl(p, tok) ? NULL : tok}) a=bitwise_or {_PyPegen_cmpop_expr_pair(p, NotEq, a) } lte_bitwise_or[CmpopExprPair*]: '<=' a=bitwise_or { _PyPegen_cmpop_expr_pair(p, LtE, a) } lt_bitwise_or[CmpopExprPair*]: '<' a=bitwise_or { _PyPegen_cmpop_expr_pair(p, Lt, a) } gte_bitwise_or[CmpopExprPair*]: '>=' a=bitwise_or { _PyPegen_cmpop_expr_pair(p, GtE, a) } diff --git a/Lib/test/test_syntax.py b/Lib/test/test_syntax.py index b0527e6..a95992d 100644 --- a/Lib/test/test_syntax.py +++ b/Lib/test/test_syntax.py @@ -947,6 +947,23 @@ pass self.fail("Empty line after a line continuation character is valid.") + def test_barry_as_flufl_with_syntax_errors(self): + # The "barry_as_flufl" rule can produce some "bugs-at-a-distance" if + # is reading the wrong token in the presence of syntax errors later + # in the file. See bpo-42214 for more information. + code = """ +def func1(): + if a != b: + raise ValueError + +def func2(): + try + return 1 + finally: + pass +""" + self._check_error(code, "invalid syntax") + def test_main(): support.run_unittest(SyntaxTestCase) from test import test_syntax diff --git a/Misc/NEWS.d/next/Core and Builtins/2020-10-30-22-16-30.bpo-42214.lXskM_.rst b/Misc/NEWS.d/next/Core and Builtins/2020-10-30-22-16-30.bpo-42214.lXskM_.rst new file mode 100644 index 0000000..3f85bbe --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2020-10-30-22-16-30.bpo-42214.lXskM_.rst @@ -0,0 +1,2 @@ +Fixed a possible crash in the PEG parser when checking for the '!=' token in +the ``barry_as_flufl`` rule. Patch by Pablo Galindo. diff --git a/Parser/pegen/parse.c b/Parser/pegen/parse.c index 48a2443..bae9463 100644 --- a/Parser/pegen/parse.c +++ b/Parser/pegen/parse.c @@ -21307,7 +21307,7 @@ _tmp_93_rule(Parser *p) ) { D(fprintf(stderr, "%*c+ _tmp_93[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'!='")); - _res = _PyPegen_check_barry_as_flufl ( p ) ? NULL : tok; + _res = _PyPegen_check_barry_as_flufl ( p , tok ) ? NULL : tok; if (_res == NULL && PyErr_Occurred()) { p->error_indicator = 1; D(p->level--); diff --git a/Parser/pegen/pegen.c b/Parser/pegen/pegen.c index 78891af..4e742a5 100644 --- a/Parser/pegen/pegen.c +++ b/Parser/pegen/pegen.c @@ -62,8 +62,7 @@ init_normalization(Parser *p) /* Checks if the NOTEQUAL token is valid given the current parser flags 0 indicates success and nonzero indicates failure (an exception may be set) */ int -_PyPegen_check_barry_as_flufl(Parser *p) { - Token *t = p->tokens[p->fill - 1]; +_PyPegen_check_barry_as_flufl(Parser *p, Token* t) { assert(t->bytes != NULL); assert(t->type == NOTEQUAL); diff --git a/Parser/pegen/pegen.h b/Parser/pegen/pegen.h index 2fea84f..a2f524a 100644 --- a/Parser/pegen/pegen.h +++ b/Parser/pegen/pegen.h @@ -263,7 +263,7 @@ expr_ty _PyPegen_collect_call_seqs(Parser *, asdl_seq *, asdl_seq *, int end_col_offset, PyArena *arena); expr_ty _PyPegen_concatenate_strings(Parser *p, asdl_seq *); asdl_seq *_PyPegen_join_sequences(Parser *, asdl_seq *, asdl_seq *); -int _PyPegen_check_barry_as_flufl(Parser *); +int _PyPegen_check_barry_as_flufl(Parser *, Token *); mod_ty _PyPegen_make_module(Parser *, asdl_seq *); // Error reporting helpers -- cgit v0.12