diff options
author | Lysandros Nikolaou <lisandrosnik@gmail.com> | 2023-10-16 14:42:49 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-10-16 14:42:49 (GMT) |
commit | a1ac5590e0f8fe008e5562d22edab65d0c1c5507 (patch) | |
tree | 17fb8b1657dcd80f0f6a2099a6ed4b665c72c505 /Parser | |
parent | b3c9faf056e7d642785a8cfd53d1184b37a74a69 (diff) | |
download | cpython-a1ac5590e0f8fe008e5562d22edab65d0c1c5507.zip cpython-a1ac5590e0f8fe008e5562d22edab65d0c1c5507.tar.gz cpython-a1ac5590e0f8fe008e5562d22edab65d0c1c5507.tar.bz2 |
gh-107450: Check for overflow in the tokenizer and fix overflow test (#110832)
Co-authored-by: Filipe LaĆns <lains@riseup.net>
Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
Diffstat (limited to 'Parser')
-rw-r--r-- | Parser/lexer/lexer.c | 4 | ||||
-rw-r--r-- | Parser/pegen_errors.c | 5 |
2 files changed, 9 insertions, 0 deletions
diff --git a/Parser/lexer/lexer.c b/Parser/lexer/lexer.c index c7134ab..1a01bb0 100644 --- a/Parser/lexer/lexer.c +++ b/Parser/lexer/lexer.c @@ -59,6 +59,10 @@ tok_nextc(struct tok_state *tok) int rc; for (;;) { if (tok->cur != tok->inp) { + if ((unsigned int) tok->col_offset >= (unsigned int) INT_MAX) { + tok->done = E_COLUMNOVERFLOW; + return EOF; + } tok->col_offset++; return Py_CHARMASK(*tok->cur++); /* Fast path */ } diff --git a/Parser/pegen_errors.c b/Parser/pegen_errors.c index 15e99e2..057bf55 100644 --- a/Parser/pegen_errors.c +++ b/Parser/pegen_errors.c @@ -68,6 +68,7 @@ _Pypegen_tokenizer_error(Parser *p) const char *msg = NULL; PyObject* errtype = PyExc_SyntaxError; Py_ssize_t col_offset = -1; + p->error_indicator = 1; switch (p->tok->done) { case E_TOKEN: msg = "invalid token"; @@ -103,6 +104,10 @@ _Pypegen_tokenizer_error(Parser *p) msg = "unexpected character after line continuation character"; break; } + case E_COLUMNOVERFLOW: + PyErr_SetString(PyExc_OverflowError, + "Parser column offset overflow - source line is too big"); + return -1; default: msg = "unknown parsing error"; } |