bpo-45738: Fix computation of error location for invalid continuation (GH-29550)

characters in the parser
author: Pablo Galindo Salgado <Pablogsal@gmail.com> 2021-11-14 01:06:41 (GMT)
committer: GitHub <noreply@github.com> 2021-11-14 01:06:41 (GMT)
commit: 25835c518aa7446f3680b62c1fb43827e0f190d9 (patch)
tree: 42aaedbf01d1b90eeb5987c7920d52db9bed2235 /Parser
parent: f8da00ef04fdadf7cd9821e8ec4b317ecf3ed663 (diff)
download: cpython-25835c518aa7446f3680b62c1fb43827e0f190d9.zip
cpython-25835c518aa7446f3680b62c1fb43827e0f190d9.tar.gz
cpython-25835c518aa7446f3680b62c1fb43827e0f190d9.tar.bz2
2 files changed, 5 insertions, 11 deletions
diff --git a/Parser/pegen.c b/Parser/pegen.c
index b00eff3..8a3f740 100644
--- a/Parser/pegen.c
+++ b/Parser/pegen.c
@@ -351,14 +351,7 @@ tokenizer_error(Parser *p)
             msg = "too many levels of indentation";
             break;
         case E_LINECONT: {
-            char* loc = strrchr(p->tok->buf, '\n');
-            const char* last_char = p->tok->cur - 1;
-            if (loc != NULL && loc != last_char) {
-                col_offset = p->tok->cur - loc - 1;
-                p->tok->buf = loc;
-            } else {
-                col_offset = last_char - p->tok->buf - 1;
-            }
+            col_offset = p->tok->cur - p->tok->buf - 1;
             msg = "unexpected character after line continuation character";
             break;
         }
@@ -366,7 +359,9 @@ tokenizer_error(Parser *p)
             msg = "unknown parsing error";
     }
 
-    RAISE_ERROR_KNOWN_LOCATION(p, errtype, p->tok->lineno, col_offset, p->tok->lineno, -1, msg);
+    RAISE_ERROR_KNOWN_LOCATION(p, errtype, p->tok->lineno,
+                               col_offset >= 0 ? col_offset : 0,
+                               p->tok->lineno, -1, msg);
     return -1;
 }
 
@@ -497,7 +492,7 @@ _PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
            does not physically exist */
         assert(p->tok->fp == NULL || p->tok->fp == stdin || p->tok->done == E_EOF || !uses_utf8_codec);
 
-        if (p->tok->lineno <= lineno) {
+        if (p->tok->lineno <= lineno && p->tok->inp > p->tok->buf) {
             Py_ssize_t size = p->tok->inp - p->tok->buf;
             error_line = PyUnicode_DecodeUTF8(p->tok->buf, size, "replace");
         }
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
index 8a19458..f281c42 100644
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -1970,7 +1970,6 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
         c = tok_nextc(tok);
         if (c != '\n') {
             tok->done = E_LINECONT;
-            tok->cur = tok->inp;
             return ERRORTOKEN;
         }
         c = tok_nextc(tok);
author	Pablo Galindo Salgado <Pablogsal@gmail.com>	2021-11-14 01:06:41 (GMT)
committer	GitHub <noreply@github.com>	2021-11-14 01:06:41 (GMT)
commit	25835c518aa7446f3680b62c1fb43827e0f190d9 (patch)
tree	42aaedbf01d1b90eeb5987c7920d52db9bed2235 /Parser
parent	f8da00ef04fdadf7cd9821e8ec4b317ecf3ed663 (diff)
download	cpython-25835c518aa7446f3680b62c1fb43827e0f190d9.zip cpython-25835c518aa7446f3680b62c1fb43827e0f190d9.tar.gz cpython-25835c518aa7446f3680b62c1fb43827e0f190d9.tar.bz2