diff options
author | Pablo Galindo Salgado <Pablogsal@gmail.com> | 2022-11-20 22:30:15 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-11-20 22:30:15 (GMT) |
commit | ad47c7d926a9f842a31247f4a15b5bb9f1566749 (patch) | |
tree | 236bd290258b5a7f65201737daf4713edb95c7e4 | |
parent | 88b101ff52010f795b34e3afc04c0e934d662d82 (diff) | |
download | cpython-ad47c7d926a9f842a31247f4a15b5bb9f1566749.zip cpython-ad47c7d926a9f842a31247f4a15b5bb9f1566749.tar.gz cpython-ad47c7d926a9f842a31247f4a15b5bb9f1566749.tar.bz2 |
[3.10] gh-99581: Fix a buffer overflow in the tokenizer when copying lines that fill the available buffer (GH-99605). (#99630)
-rw-r--r-- | Lib/test/test_tokenize.py | 16 | ||||
-rw-r--r-- | Misc/NEWS.d/next/Core and Builtins/2022-11-19-22-27-52.gh-issue-99581.yKYPbf.rst | 3 | ||||
-rw-r--r-- | Parser/tokenizer.c | 7 |
3 files changed, 25 insertions, 1 deletions
diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py index 127f0a1..c55dff6 100644 --- a/Lib/test/test_tokenize.py +++ b/Lib/test/test_tokenize.py @@ -10,6 +10,8 @@ from textwrap import dedent from unittest import TestCase, mock from test.test_grammar import (VALID_UNDERSCORE_LITERALS, INVALID_UNDERSCORE_LITERALS) +from test.support import os_helper +from test.support.script_helper import run_test_script, make_script import os import token @@ -1654,5 +1656,19 @@ class TestRoundtrip(TestCase): self.check_roundtrip(code) +class CTokenizerBufferTests(unittest.TestCase): + def test_newline_at_the_end_of_buffer(self): + # See issue 99581: Make sure that if we need to add a new line at the + # end of the buffer, we have enough space in the buffer, specially when + # the current line is as long as the buffer space available. + test_script = f"""\ + #coding: latin-1 + #{"a"*10000} + #{"a"*10002}""" + with os_helper.temp_dir() as temp_dir: + file_name = make_script(temp_dir, 'foo', test_script) + run_test_script(file_name) + + if __name__ == "__main__": unittest.main() diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-11-19-22-27-52.gh-issue-99581.yKYPbf.rst b/Misc/NEWS.d/next/Core and Builtins/2022-11-19-22-27-52.gh-issue-99581.yKYPbf.rst new file mode 100644 index 0000000..8071fd1 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2022-11-19-22-27-52.gh-issue-99581.yKYPbf.rst @@ -0,0 +1,3 @@ +Fixed a bug that was causing a buffer overflow if the tokenizer copies a +line missing the newline caracter from a file that is as long as the +available tokenizer buffer. Patch by Pablo galindo diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index 0bbf1b1..13b666c 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -419,7 +419,11 @@ tok_readline_recode(struct tok_state *tok) { error_ret(tok); goto error; } - if (!tok_reserve_buf(tok, buflen + 1)) { + // Make room for the null terminator *and* potentially + // an extra newline character that we may need to artificially + // add. + size_t buffer_size = buflen + 2; + if (!tok_reserve_buf(tok, buffer_size)) { goto error; } memcpy(tok->inp, buf, buflen); @@ -973,6 +977,7 @@ tok_underflow_file(struct tok_state *tok) { return 0; } if (tok->inp[-1] != '\n') { + assert(tok->inp + 1 < tok->end); /* Last line does not end in \n, fake one */ *tok->inp++ = '\n'; *tok->inp = '\0'; |