bpo-45494: Fix parser crash when reporting errors involving invalid continuation characters (GH-28993)

There are two errors that this commit fixes: * The parser was not correctly computing the offset and the string source for E_LINECONT errors due to the incorrect usage of strtok(). * The parser was not correctly unwinding the call stack when a tokenizer exception happened in rules involving optionals ('?', [...]) as we always make them return valid results by using the comma operator. We need to check first if we don't have an error before continuing.
author: Pablo Galindo Salgado <Pablogsal@gmail.com> 2021-10-19 19:24:12 (GMT)
committer: GitHub <noreply@github.com> 2021-10-19 19:24:12 (GMT)
commit: a106343f632a99c8ebb0136fa140cf189b4a6a57 (patch)
tree: ff125fecd7da936de3a41375ecdcf5e30ec09b4a /Parser/pegen.c
parent: bda69abe849b37467350d3750ae24d356230c940 (diff)
download: cpython-a106343f632a99c8ebb0136fa140cf189b4a6a57.zip
cpython-a106343f632a99c8ebb0136fa140cf189b4a6a57.tar.gz
cpython-a106343f632a99c8ebb0136fa140cf189b4a6a57.tar.bz2
1 files changed, 10 insertions, 2 deletions
diff --git a/Parser/pegen.c b/Parser/pegen.c
index e4d2692..b00eff3 100644
--- a/Parser/pegen.c
+++ b/Parser/pegen.c
@@ -350,10 +350,18 @@ tokenizer_error(Parser *p)
             errtype = PyExc_IndentationError;
             msg = "too many levels of indentation";
             break;
-        case E_LINECONT:
-            col_offset = strlen(strtok(p->tok->buf, "\n")) - 1;
+        case E_LINECONT: {
+            char* loc = strrchr(p->tok->buf, '\n');
+            const char* last_char = p->tok->cur - 1;
+            if (loc != NULL && loc != last_char) {
+                col_offset = p->tok->cur - loc - 1;
+                p->tok->buf = loc;
+            } else {
+                col_offset = last_char - p->tok->buf - 1;
+            }
             msg = "unexpected character after line continuation character";
             break;
+        }
         default:
             msg = "unknown parsing error";
     }
author	Pablo Galindo Salgado <Pablogsal@gmail.com>	2021-10-19 19:24:12 (GMT)
committer	GitHub <noreply@github.com>	2021-10-19 19:24:12 (GMT)
commit	a106343f632a99c8ebb0136fa140cf189b4a6a57 (patch)
tree	ff125fecd7da936de3a41375ecdcf5e30ec09b4a /Parser/pegen.c
parent	bda69abe849b37467350d3750ae24d356230c940 (diff)
download	cpython-a106343f632a99c8ebb0136fa140cf189b4a6a57.zip cpython-a106343f632a99c8ebb0136fa140cf189b4a6a57.tar.gz cpython-a106343f632a99c8ebb0136fa140cf189b4a6a57.tar.bz2