summaryrefslogtreecommitdiffstats
path: root/Parser/pegen.c
diff options
context:
space:
mode:
authorPablo Galindo Salgado <Pablogsal@gmail.com>2021-10-19 19:24:12 (GMT)
committerGitHub <noreply@github.com>2021-10-19 19:24:12 (GMT)
commita106343f632a99c8ebb0136fa140cf189b4a6a57 (patch)
treeff125fecd7da936de3a41375ecdcf5e30ec09b4a /Parser/pegen.c
parentbda69abe849b37467350d3750ae24d356230c940 (diff)
downloadcpython-a106343f632a99c8ebb0136fa140cf189b4a6a57.zip
cpython-a106343f632a99c8ebb0136fa140cf189b4a6a57.tar.gz
cpython-a106343f632a99c8ebb0136fa140cf189b4a6a57.tar.bz2
bpo-45494: Fix parser crash when reporting errors involving invalid continuation characters (GH-28993)
There are two errors that this commit fixes: * The parser was not correctly computing the offset and the string source for E_LINECONT errors due to the incorrect usage of strtok(). * The parser was not correctly unwinding the call stack when a tokenizer exception happened in rules involving optionals ('?', [...]) as we always make them return valid results by using the comma operator. We need to check first if we don't have an error before continuing.
Diffstat (limited to 'Parser/pegen.c')
-rw-r--r--Parser/pegen.c12
1 files changed, 10 insertions, 2 deletions
diff --git a/Parser/pegen.c b/Parser/pegen.c
index e4d2692..b00eff3 100644
--- a/Parser/pegen.c
+++ b/Parser/pegen.c
@@ -350,10 +350,18 @@ tokenizer_error(Parser *p)
errtype = PyExc_IndentationError;
msg = "too many levels of indentation";
break;
- case E_LINECONT:
- col_offset = strlen(strtok(p->tok->buf, "\n")) - 1;
+ case E_LINECONT: {
+ char* loc = strrchr(p->tok->buf, '\n');
+ const char* last_char = p->tok->cur - 1;
+ if (loc != NULL && loc != last_char) {
+ col_offset = p->tok->cur - loc - 1;
+ p->tok->buf = loc;
+ } else {
+ col_offset = last_char - p->tok->buf - 1;
+ }
msg = "unexpected character after line continuation character";
break;
+ }
default:
msg = "unknown parsing error";
}