diff options
-rw-r--r-- | Lib/test/test_compile.py | 14 | ||||
-rw-r--r-- | Misc/NEWS | 2 | ||||
-rw-r--r-- | Parser/tokenizer.c | 7 |
3 files changed, 18 insertions, 5 deletions
diff --git a/Lib/test/test_compile.py b/Lib/test/test_compile.py index 3d33bb5..41a92ff 100644 --- a/Lib/test/test_compile.py +++ b/Lib/test/test_compile.py @@ -1,9 +1,11 @@ import math +import os import unittest import sys import _ast +import tempfile import types -from test import support +from test import support, script_helper class TestSpecifics(unittest.TestCase): @@ -492,6 +494,16 @@ if 1: self.assertInvalidSingle('f()\nxy # blah\nblah()') self.assertInvalidSingle('x = 5 # comment\nx = 6\n') + def test_particularly_evil_undecodable(self): + # Issue 24022 + src = b'0000\x00\n00000000000\n\x00\n\x9e\n' + with tempfile.TemporaryDirectory() as tmpd: + fn = os.path.join(tmpd, "bad.py") + with open(fn, "wb") as fp: + fp.write(src) + res = script_helper.run_python_until_end(fn)[0] + self.assertIn(b"Non-UTF-8", res.err) + @support.cpython_only def test_compiler_recursion_limit(self): # Expected limit is sys.getrecursionlimit() * the scaling factor @@ -10,6 +10,8 @@ Release date: 2015-04-24 Core and Builtins ----------------- +- Issue #24022: Fix tokenizer crash when processing undecodable source code. + Library ------- diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index ef7b19f..ac413a8 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -1307,6 +1307,8 @@ verify_identifier(struct tok_state *tok) { PyObject *s; int result; + if (tok->decoding_erred) + return 0; s = PyUnicode_DecodeUTF8(tok->start, tok->cur - tok->start, NULL); if (s == NULL || PyUnicode_READY(s) == -1) { if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) { @@ -1475,11 +1477,8 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end) c = tok_nextc(tok); } tok_backup(tok, c); - if (nonascii && - !verify_identifier(tok)) { - tok->done = E_IDENTIFIER; + if (nonascii && !verify_identifier(tok)) return ERRORTOKEN; - } *p_start = tok->start; *p_end = tok->cur; return NAME; |