From 142fcb40b6e460fa9b4a89fe9846b1ce4176354e Mon Sep 17 00:00:00 2001 From: Pablo Galindo Salgado Date: Sun, 14 Nov 2021 01:47:27 +0000 Subject: bpo-45738: Fix computation of error location for invalid continuation characters in the parser (GH-29550) (GH-29552) (cherry picked from commit 25835c518aa7446f3680b62c1fb43827e0f190d9) --- Lib/test/test_syntax.py | 8 +++++++- .../2021-11-14-00-14-45.bpo-45738.e0cgKd.rst | 2 ++ Parser/pegen/pegen.c | 12 +++--------- Parser/tokenizer.c | 1 - 4 files changed, 12 insertions(+), 11 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2021-11-14-00-14-45.bpo-45738.e0cgKd.rst diff --git a/Lib/test/test_syntax.py b/Lib/test/test_syntax.py index eaa94ea..46f27d0 100644 --- a/Lib/test/test_syntax.py +++ b/Lib/test/test_syntax.py @@ -953,7 +953,13 @@ def func2(): def test_invalid_line_continuation_error_position(self): self._check_error(r"a = 3 \ 4", "unexpected character after line continuation character", - lineno=1, offset=(10 if support.use_old_parser() else 9)) + lineno=1, offset=8) + self._check_error('1,\\#\n2', + "unexpected character after line continuation character", + lineno=1, offset=4) + self._check_error('\nfgdfgf\n1,\\#\n2\n', + "unexpected character after line continuation character", + lineno=3, offset=4) def test_invalid_line_continuation_left_recursive(self): # Check bpo-42218: SyntaxErrors following left-recursive rules diff --git a/Misc/NEWS.d/next/Core and Builtins/2021-11-14-00-14-45.bpo-45738.e0cgKd.rst b/Misc/NEWS.d/next/Core and Builtins/2021-11-14-00-14-45.bpo-45738.e0cgKd.rst new file mode 100644 index 0000000..b238034 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2021-11-14-00-14-45.bpo-45738.e0cgKd.rst @@ -0,0 +1,2 @@ +Fix computation of error location for invalid continuation characters in the +parser. Patch by Pablo Galindo. diff --git a/Parser/pegen/pegen.c b/Parser/pegen/pegen.c index 98de05c..efcf9ac 100644 --- a/Parser/pegen/pegen.c +++ b/Parser/pegen/pegen.c @@ -348,14 +348,7 @@ tokenizer_error(Parser *p) msg = "too many levels of indentation"; break; case E_LINECONT: { - char* loc = strrchr(p->tok->buf, '\n'); - const char* last_char = p->tok->cur - 1; - if (loc != NULL && loc != last_char) { - col_offset = p->tok->cur - loc - 1; - p->tok->buf = loc; - } else { - col_offset = last_char - p->tok->buf - 1; - } + col_offset = p->tok->cur - p->tok->buf - 1; msg = "unexpected character after line continuation character"; break; } @@ -363,7 +356,8 @@ tokenizer_error(Parser *p) msg = "unknown parsing error"; } - RAISE_ERROR_KNOWN_LOCATION(p, errtype, p->tok->lineno, col_offset, msg); + RAISE_ERROR_KNOWN_LOCATION(p, errtype, p->tok->lineno, + col_offset >= 0 ? col_offset : 0, msg); return -1; } diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index 1a57db9..41bfdb8 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -1752,7 +1752,6 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end) c = tok_nextc(tok); if (c != '\n') { tok->done = E_LINECONT; - tok->cur = tok->inp; return ERRORTOKEN; } c = tok_nextc(tok); -- cgit v0.12