Issue #25388: Fixed tokenizer crash when processing undecodable source code

with a null byte.
author: Serhiy Storchaka <storchaka@gmail.com> 2015-11-14 13:10:35 (GMT)
committer: Serhiy Storchaka <storchaka@gmail.com> 2015-11-14 13:10:35 (GMT)
commit: 0d441119f5eb6437f6145e89e0963f75494d8a3f (patch)
tree: d8504a540716a9399777ba8f4a65ac7c1ef25ddb
parent: 806fb2540520839812887140fa1d4ba2e60ecc5e (diff)
download: cpython-0d441119f5eb6437f6145e89e0963f75494d8a3f.zip
cpython-0d441119f5eb6437f6145e89e0963f75494d8a3f.tar.gz
cpython-0d441119f5eb6437f6145e89e0963f75494d8a3f.tar.bz2
3 files changed, 19 insertions, 8 deletions
diff --git a/Lib/test/test_compile.py b/Lib/test/test_compile.py
index 2affcc9..ee28ca9 100644
--- a/Lib/test/test_compile.py
+++ b/Lib/test/test_compile.py
@@ -504,6 +504,16 @@ if 1:
             res = script_helper.run_python_until_end(fn)[0]
         self.assertIn(b"Non-UTF-8", res.err)
 
+    def test_yet_more_evil_still_undecodable(self):
+        # Issue #25388
+        src = b"#\x00\n#\xfd\n"
+        with tempfile.TemporaryDirectory() as tmpd:
+            fn = os.path.join(tmpd, "bad.py")
+            with open(fn, "wb") as fp:
+                fp.write(src)
+            res = script_helper.run_python_until_end(fn)[0]
+        self.assertIn(b"Non-UTF-8", res.err)
+
     @support.cpython_only
     def test_compiler_recursion_limit(self):
         # Expected limit is sys.getrecursionlimit() * the scaling factor
diff --git a/Misc/NEWS b/Misc/NEWS
index ec2b4af..1390642 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -10,6 +10,9 @@ Release date: tba
 Core and Builtins
 -----------------
 
+- Issue #25388: Fixed tokenizer crash when processing undecodable source code
+  with a null byte.
+
 - Issue #22995: Default implementation of __reduce__ and __reduce_ex__ now
   rejects builtin types with not defined __new__.
 
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
index 5e041ea..1540d26 100644
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -187,7 +187,8 @@ error_ret(struct tok_state *tok) /* XXX */
     tok->decoding_erred = 1;
     if (tok->fp != NULL && tok->buf != NULL) /* see PyTokenizer_Free */
         PyMem_FREE(tok->buf);
-    tok->buf = NULL;
+    tok->buf = tok->cur = tok->end = tok->inp = tok->start = NULL;
+    tok->done = E_DECODE;
     return NULL;                /* as if it were EOF */
 }
 
@@ -943,11 +944,6 @@ tok_nextc(struct tok_state *tok)
                 }
                 buflen = PyBytes_GET_SIZE(u);
                 buf = PyBytes_AS_STRING(u);
-                if (!buf) {
-                    Py_DECREF(u);
-                    tok->done = E_DECODE;
-                    return EOF;
-                }
                 newtok = PyMem_MALLOC(buflen+1);
                 strcpy(newtok, buf);
                 Py_DECREF(u);
@@ -989,7 +985,6 @@ tok_nextc(struct tok_state *tok)
                 if (tok->buf != NULL)
                     PyMem_FREE(tok->buf);
                 tok->buf = newtok;
-                tok->line_start = tok->buf;
                 tok->cur = tok->buf;
                 tok->line_start = tok->buf;
                 tok->inp = strchr(tok->buf, '\0');
@@ -1012,7 +1007,8 @@ tok_nextc(struct tok_state *tok)
                 }
                 if (decoding_fgets(tok->buf, (int)(tok->end - tok->buf),
                           tok) == NULL) {
-                    tok->done = E_EOF;
+                    if (!tok->decoding_erred)
+                        tok->done = E_EOF;
                     done = 1;
                 }
                 else {
@@ -1046,6 +1042,8 @@ tok_nextc(struct tok_state *tok)
                     return EOF;
                 }
                 tok->buf = newbuf;
+                tok->cur = tok->buf + cur;
+                tok->line_start = tok->cur;
                 tok->inp = tok->buf + curvalid;
                 tok->end = tok->buf + newsize;
                 tok->start = curstart < 0 ? NULL :
author	Serhiy Storchaka <storchaka@gmail.com>	2015-11-14 13:10:35 (GMT)
committer	Serhiy Storchaka <storchaka@gmail.com>	2015-11-14 13:10:35 (GMT)
commit	0d441119f5eb6437f6145e89e0963f75494d8a3f (patch)
tree	d8504a540716a9399777ba8f4a65ac7c1ef25ddb
parent	806fb2540520839812887140fa1d4ba2e60ecc5e (diff)
download	cpython-0d441119f5eb6437f6145e89e0963f75494d8a3f.zip cpython-0d441119f5eb6437f6145e89e0963f75494d8a3f.tar.gz cpython-0d441119f5eb6437f6145e89e0963f75494d8a3f.tar.bz2