From 5d7d26c403d86e9525820d872eb3e331dbc31750 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sat, 14 Nov 2015 15:14:29 +0200 Subject: Issue #25388: Fixed tokenizer hang when processing undecodable source code with a null byte. --- Lib/test/test_compile.py | 16 ++++++++++++++++ Misc/NEWS | 3 +++ Parser/tokenizer.c | 9 ++++++--- 3 files changed, 25 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_compile.py b/Lib/test/test_compile.py index cfc6389..c166ff1 100644 --- a/Lib/test/test_compile.py +++ b/Lib/test/test_compile.py @@ -3,6 +3,9 @@ import unittest import sys import _ast from test import test_support +from test import script_helper +import os +import tempfile import textwrap class TestSpecifics(unittest.TestCase): @@ -555,6 +558,19 @@ if 1: ast.body = [_ast.BoolOp()] self.assertRaises(TypeError, compile, ast, '', 'exec') + def test_yet_more_evil_still_undecodable(self): + # Issue #25388 + src = b"#\x00\n#\xfd\n" + tmpd = tempfile.mkdtemp() + try: + fn = os.path.join(tmpd, "bad.py") + with open(fn, "wb") as fp: + fp.write(src) + rc, out, err = script_helper.assert_python_failure(fn) + finally: + test_support.rmtree(tmpd) + self.assertIn(b"Non-ASCII", err) + class TestStackSize(unittest.TestCase): # These tests check that the computed stack size for a code object diff --git a/Misc/NEWS b/Misc/NEWS index 43e0418..5d30b1a 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -10,6 +10,9 @@ What's New in Python 2.7.11? Core and Builtins ----------------- +- Issue #25388: Fixed tokenizer hang when processing undecodable source code + with a null byte. + - Issue #22995: Default implementation of __reduce__ and __reduce_ex__ now rejects builtin types with not defined __new__. diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index 109c0ee..7e4a300 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -169,7 +169,8 @@ error_ret(struct tok_state *tok) /* XXX */ tok->decoding_erred = 1; if (tok->fp != NULL && tok->buf != NULL) /* see PyTokenizer_Free */ PyMem_FREE(tok->buf); - tok->buf = NULL; + tok->buf = tok->cur = tok->end = tok->inp = tok->start = NULL; + tok->done = E_DECODE; return NULL; /* as if it were EOF */ } @@ -921,7 +922,6 @@ tok_nextc(register struct tok_state *tok) if (tok->buf != NULL) PyMem_FREE(tok->buf); tok->buf = newtok; - tok->line_start = tok->buf; tok->cur = tok->buf; tok->line_start = tok->buf; tok->inp = strchr(tok->buf, '\0'); @@ -944,7 +944,8 @@ tok_nextc(register struct tok_state *tok) } if (decoding_fgets(tok->buf, (int)(tok->end - tok->buf), tok) == NULL) { - tok->done = E_EOF; + if (!tok->decoding_erred) + tok->done = E_EOF; done = 1; } else { @@ -978,6 +979,8 @@ tok_nextc(register struct tok_state *tok) return EOF; } tok->buf = newbuf; + tok->cur = tok->buf + cur; + tok->line_start = tok->cur; tok->inp = tok->buf + curvalid; tok->end = tok->buf + newsize; tok->start = curstart < 0 ? NULL : -- cgit v0.12