summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPablo Galindo Salgado <Pablogsal@gmail.com>2022-11-20 22:30:15 (GMT)
committerGitHub <noreply@github.com>2022-11-20 22:30:15 (GMT)
commitad47c7d926a9f842a31247f4a15b5bb9f1566749 (patch)
tree236bd290258b5a7f65201737daf4713edb95c7e4
parent88b101ff52010f795b34e3afc04c0e934d662d82 (diff)
downloadcpython-ad47c7d926a9f842a31247f4a15b5bb9f1566749.zip
cpython-ad47c7d926a9f842a31247f4a15b5bb9f1566749.tar.gz
cpython-ad47c7d926a9f842a31247f4a15b5bb9f1566749.tar.bz2
[3.10] gh-99581: Fix a buffer overflow in the tokenizer when copying lines that fill the available buffer (GH-99605). (#99630)
-rw-r--r--Lib/test/test_tokenize.py16
-rw-r--r--Misc/NEWS.d/next/Core and Builtins/2022-11-19-22-27-52.gh-issue-99581.yKYPbf.rst3
-rw-r--r--Parser/tokenizer.c7
3 files changed, 25 insertions, 1 deletions
diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py
index 127f0a1..c55dff6 100644
--- a/Lib/test/test_tokenize.py
+++ b/Lib/test/test_tokenize.py
@@ -10,6 +10,8 @@ from textwrap import dedent
from unittest import TestCase, mock
from test.test_grammar import (VALID_UNDERSCORE_LITERALS,
INVALID_UNDERSCORE_LITERALS)
+from test.support import os_helper
+from test.support.script_helper import run_test_script, make_script
import os
import token
@@ -1654,5 +1656,19 @@ class TestRoundtrip(TestCase):
self.check_roundtrip(code)
+class CTokenizerBufferTests(unittest.TestCase):
+ def test_newline_at_the_end_of_buffer(self):
+ # See issue 99581: Make sure that if we need to add a new line at the
+ # end of the buffer, we have enough space in the buffer, specially when
+ # the current line is as long as the buffer space available.
+ test_script = f"""\
+ #coding: latin-1
+ #{"a"*10000}
+ #{"a"*10002}"""
+ with os_helper.temp_dir() as temp_dir:
+ file_name = make_script(temp_dir, 'foo', test_script)
+ run_test_script(file_name)
+
+
if __name__ == "__main__":
unittest.main()
diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-11-19-22-27-52.gh-issue-99581.yKYPbf.rst b/Misc/NEWS.d/next/Core and Builtins/2022-11-19-22-27-52.gh-issue-99581.yKYPbf.rst
new file mode 100644
index 0000000..8071fd1
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2022-11-19-22-27-52.gh-issue-99581.yKYPbf.rst
@@ -0,0 +1,3 @@
+Fixed a bug that was causing a buffer overflow if the tokenizer copies a
+line missing the newline caracter from a file that is as long as the
+available tokenizer buffer. Patch by Pablo galindo
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
index 0bbf1b1..13b666c 100644
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -419,7 +419,11 @@ tok_readline_recode(struct tok_state *tok) {
error_ret(tok);
goto error;
}
- if (!tok_reserve_buf(tok, buflen + 1)) {
+ // Make room for the null terminator *and* potentially
+ // an extra newline character that we may need to artificially
+ // add.
+ size_t buffer_size = buflen + 2;
+ if (!tok_reserve_buf(tok, buffer_size)) {
goto error;
}
memcpy(tok->inp, buf, buflen);
@@ -973,6 +977,7 @@ tok_underflow_file(struct tok_state *tok) {
return 0;
}
if (tok->inp[-1] != '\n') {
+ assert(tok->inp + 1 < tok->end);
/* Last line does not end in \n, fake one */
*tok->inp++ = '\n';
*tok->inp = '\0';