summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBenjamin Peterson <benjamin@python.org>2015-04-21 16:05:19 (GMT)
committerBenjamin Peterson <benjamin@python.org>2015-04-21 16:05:19 (GMT)
commitd73aca769f1f6eebb46faa9161cbebe806db3659 (patch)
treed257c03a998c63a1a89b9e36bb820c43b1beb0db
parent6de708fd46207f6af67d4c0e8902f0d56ea4495c (diff)
downloadcpython-d73aca769f1f6eebb46faa9161cbebe806db3659.zip
cpython-d73aca769f1f6eebb46faa9161cbebe806db3659.tar.gz
cpython-d73aca769f1f6eebb46faa9161cbebe806db3659.tar.bz2
do not call into python api if an exception is set (#24022)
-rw-r--r--Lib/test/test_compile.py14
-rw-r--r--Misc/NEWS2
-rw-r--r--Parser/tokenizer.c7
3 files changed, 18 insertions, 5 deletions
diff --git a/Lib/test/test_compile.py b/Lib/test/test_compile.py
index 6116676..cff3c9e 100644
--- a/Lib/test/test_compile.py
+++ b/Lib/test/test_compile.py
@@ -1,9 +1,11 @@
import math
+import os
import unittest
import sys
import _ast
+import tempfile
import types
-from test import support
+from test import support, script_helper
class TestSpecifics(unittest.TestCase):
@@ -492,6 +494,16 @@ if 1:
self.assertInvalidSingle('f()\nxy # blah\nblah()')
self.assertInvalidSingle('x = 5 # comment\nx = 6\n')
+ def test_particularly_evil_undecodable(self):
+ # Issue 24022
+ src = b'0000\x00\n00000000000\n\x00\n\x9e\n'
+ with tempfile.TemporaryDirectory() as tmpd:
+ fn = os.path.join(tmpd, "bad.py")
+ with open(fn, "wb") as fp:
+ fp.write(src)
+ res = script_helper.run_python_until_end(fn)[0]
+ self.assertIn(b"Non-UTF-8", res.err)
+
@support.cpython_only
def test_compiler_recursion_limit(self):
# Expected limit is sys.getrecursionlimit() * the scaling factor
diff --git a/Misc/NEWS b/Misc/NEWS
index a6a3d82..183f7d1 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -10,6 +10,8 @@ Release date: tba
Core and Builtins
-----------------
+- Issue #24022: Fix tokenizer crash when processing undecodable source code.
+
- Issue #23309: Avoid a deadlock at shutdown if a daemon thread is aborted
while it is holding a lock to a buffered I/O object, and the main thread
tries to use the same I/O object (typically stdout or stderr). A fatal
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
index 22accd1..5e041ea 100644
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -1301,6 +1301,8 @@ verify_identifier(struct tok_state *tok)
{
PyObject *s;
int result;
+ if (tok->decoding_erred)
+ return 0;
s = PyUnicode_DecodeUTF8(tok->start, tok->cur - tok->start, NULL);
if (s == NULL || PyUnicode_READY(s) == -1) {
if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
@@ -1469,11 +1471,8 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
c = tok_nextc(tok);
}
tok_backup(tok, c);
- if (nonascii &&
- !verify_identifier(tok)) {
- tok->done = E_IDENTIFIER;
+ if (nonascii && !verify_identifier(tok))
return ERRORTOKEN;
- }
*p_start = tok->start;
*p_end = tok->cur;
return NAME;