diff options
author | Benjamin Peterson <benjamin@python.org> | 2015-04-21 16:05:19 (GMT) |
---|---|---|
committer | Benjamin Peterson <benjamin@python.org> | 2015-04-21 16:05:19 (GMT) |
commit | d73aca769f1f6eebb46faa9161cbebe806db3659 (patch) | |
tree | d257c03a998c63a1a89b9e36bb820c43b1beb0db | |
parent | 6de708fd46207f6af67d4c0e8902f0d56ea4495c (diff) | |
download | cpython-d73aca769f1f6eebb46faa9161cbebe806db3659.zip cpython-d73aca769f1f6eebb46faa9161cbebe806db3659.tar.gz cpython-d73aca769f1f6eebb46faa9161cbebe806db3659.tar.bz2 |
do not call into python api if an exception is set (#24022)
-rw-r--r-- | Lib/test/test_compile.py | 14 | ||||
-rw-r--r-- | Misc/NEWS | 2 | ||||
-rw-r--r-- | Parser/tokenizer.c | 7 |
3 files changed, 18 insertions, 5 deletions
diff --git a/Lib/test/test_compile.py b/Lib/test/test_compile.py index 6116676..cff3c9e 100644 --- a/Lib/test/test_compile.py +++ b/Lib/test/test_compile.py @@ -1,9 +1,11 @@ import math +import os import unittest import sys import _ast +import tempfile import types -from test import support +from test import support, script_helper class TestSpecifics(unittest.TestCase): @@ -492,6 +494,16 @@ if 1: self.assertInvalidSingle('f()\nxy # blah\nblah()') self.assertInvalidSingle('x = 5 # comment\nx = 6\n') + def test_particularly_evil_undecodable(self): + # Issue 24022 + src = b'0000\x00\n00000000000\n\x00\n\x9e\n' + with tempfile.TemporaryDirectory() as tmpd: + fn = os.path.join(tmpd, "bad.py") + with open(fn, "wb") as fp: + fp.write(src) + res = script_helper.run_python_until_end(fn)[0] + self.assertIn(b"Non-UTF-8", res.err) + @support.cpython_only def test_compiler_recursion_limit(self): # Expected limit is sys.getrecursionlimit() * the scaling factor @@ -10,6 +10,8 @@ Release date: tba Core and Builtins ----------------- +- Issue #24022: Fix tokenizer crash when processing undecodable source code. + - Issue #23309: Avoid a deadlock at shutdown if a daemon thread is aborted while it is holding a lock to a buffered I/O object, and the main thread tries to use the same I/O object (typically stdout or stderr). A fatal diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index 22accd1..5e041ea 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -1301,6 +1301,8 @@ verify_identifier(struct tok_state *tok) { PyObject *s; int result; + if (tok->decoding_erred) + return 0; s = PyUnicode_DecodeUTF8(tok->start, tok->cur - tok->start, NULL); if (s == NULL || PyUnicode_READY(s) == -1) { if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) { @@ -1469,11 +1471,8 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end) c = tok_nextc(tok); } tok_backup(tok, c); - if (nonascii && - !verify_identifier(tok)) { - tok->done = E_IDENTIFIER; + if (nonascii && !verify_identifier(tok)) return ERRORTOKEN; - } *p_start = tok->start; *p_end = tok->cur; return NAME; |