summaryrefslogtreecommitdiffstats
path: root/Parser
diff options
context:
space:
mode:
authorLysandros Nikolaou <lisandrosnik@gmail.com>2023-10-16 14:42:49 (GMT)
committerGitHub <noreply@github.com>2023-10-16 14:42:49 (GMT)
commita1ac5590e0f8fe008e5562d22edab65d0c1c5507 (patch)
tree17fb8b1657dcd80f0f6a2099a6ed4b665c72c505 /Parser
parentb3c9faf056e7d642785a8cfd53d1184b37a74a69 (diff)
downloadcpython-a1ac5590e0f8fe008e5562d22edab65d0c1c5507.zip
cpython-a1ac5590e0f8fe008e5562d22edab65d0c1c5507.tar.gz
cpython-a1ac5590e0f8fe008e5562d22edab65d0c1c5507.tar.bz2
gh-107450: Check for overflow in the tokenizer and fix overflow test (#110832)
Co-authored-by: Filipe LaĆ­ns <lains@riseup.net> Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
Diffstat (limited to 'Parser')
-rw-r--r--Parser/lexer/lexer.c4
-rw-r--r--Parser/pegen_errors.c5
2 files changed, 9 insertions, 0 deletions
diff --git a/Parser/lexer/lexer.c b/Parser/lexer/lexer.c
index c7134ab..1a01bb0 100644
--- a/Parser/lexer/lexer.c
+++ b/Parser/lexer/lexer.c
@@ -59,6 +59,10 @@ tok_nextc(struct tok_state *tok)
int rc;
for (;;) {
if (tok->cur != tok->inp) {
+ if ((unsigned int) tok->col_offset >= (unsigned int) INT_MAX) {
+ tok->done = E_COLUMNOVERFLOW;
+ return EOF;
+ }
tok->col_offset++;
return Py_CHARMASK(*tok->cur++); /* Fast path */
}
diff --git a/Parser/pegen_errors.c b/Parser/pegen_errors.c
index 15e99e2..057bf55 100644
--- a/Parser/pegen_errors.c
+++ b/Parser/pegen_errors.c
@@ -68,6 +68,7 @@ _Pypegen_tokenizer_error(Parser *p)
const char *msg = NULL;
PyObject* errtype = PyExc_SyntaxError;
Py_ssize_t col_offset = -1;
+ p->error_indicator = 1;
switch (p->tok->done) {
case E_TOKEN:
msg = "invalid token";
@@ -103,6 +104,10 @@ _Pypegen_tokenizer_error(Parser *p)
msg = "unexpected character after line continuation character";
break;
}
+ case E_COLUMNOVERFLOW:
+ PyErr_SetString(PyExc_OverflowError,
+ "Parser column offset overflow - source line is too big");
+ return -1;
default:
msg = "unknown parsing error";
}