From 7279fb64089abc73c03247fb8191082ee42a9671 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Salgado Date: Wed, 7 Jun 2023 13:31:48 +0100 Subject: gh-105435: Fix spurious NEWLINE token if file ends with comment without a newline (#105442) --- Lib/test/test_tokenize.py | 31 ++++++++++++++++++---- .../2023-06-07-12-20-59.gh-issue-105435.6VllI0.rst | 2 ++ Python/Python-tokenize.c | 11 ++++++++ 3 files changed, 39 insertions(+), 5 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2023-06-07-12-20-59.gh-issue-105435.6VllI0.rst diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py index f2847b2..6747b0d 100644 --- a/Lib/test/test_tokenize.py +++ b/Lib/test/test_tokenize.py @@ -1631,13 +1631,34 @@ class TestTokenize(TestCase): def test_comment_at_the_end_of_the_source_without_newline(self): # See http://bugs.python.org/issue44667 source = 'b = 1\n\n#test' - expected_tokens = [token.NAME, token.EQUAL, token.NUMBER, token.NEWLINE, token.NL, token.COMMENT] + expected_tokens = [ + TokenInfo(type=token.ENCODING, string='utf-8', start=(0, 0), end=(0, 0), line=''), + TokenInfo(type=token.NAME, string='b', start=(1, 0), end=(1, 1), line='b = 1\n'), + TokenInfo(type=token.OP, string='=', start=(1, 2), end=(1, 3), line='b = 1\n'), + TokenInfo(type=token.NUMBER, string='1', start=(1, 4), end=(1, 5), line='b = 1\n'), + TokenInfo(type=token.NEWLINE, string='\n', start=(1, 5), end=(1, 6), line='b = 1\n'), + TokenInfo(type=token.NL, string='\n', start=(2, 0), end=(2, 1), line='\n'), + TokenInfo(type=token.COMMENT, string='#test', start=(3, 0), end=(3, 5), line='#test\n'), + TokenInfo(type=token.NL, string='', start=(3, 5), end=(3, 6), line='#test\n'), + TokenInfo(type=token.ENDMARKER, string='', start=(4, 0), end=(4, 0), line='') + ] + + tokens = list(tokenize(BytesIO(source.encode('utf-8')).readline)) + self.assertEqual(tokens, expected_tokens) + + def test_newline_and_space_at_the_end_of_the_source_without_newline(self): + # See https://github.com/python/cpython/issues/105435 + source = 'a\n ' + expected_tokens = [ + TokenInfo(token.ENCODING, string='utf-8', start=(0, 0), end=(0, 0), line=''), + TokenInfo(token.NAME, string='a', start=(1, 0), end=(1, 1), line='a\n'), + TokenInfo(token.NEWLINE, string='\n', start=(1, 1), end=(1, 2), line='a\n'), + TokenInfo(token.NL, string='', start=(2, 1), end=(2, 2), line=' \n'), + TokenInfo(token.ENDMARKER, string='', start=(3, 0), end=(3, 0), line='') + ] tokens = list(tokenize(BytesIO(source.encode('utf-8')).readline)) - self.assertEqual(tok_name[tokens[0].exact_type], tok_name[ENCODING]) - for i in range(6): - self.assertEqual(tok_name[tokens[i + 1].exact_type], tok_name[expected_tokens[i]]) - self.assertEqual(tok_name[tokens[-1].exact_type], tok_name[token.ENDMARKER]) + self.assertEqual(tokens, expected_tokens) def test_invalid_character_in_fstring_middle(self): # See gh-103824 diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-06-07-12-20-59.gh-issue-105435.6VllI0.rst b/Misc/NEWS.d/next/Core and Builtins/2023-06-07-12-20-59.gh-issue-105435.6VllI0.rst new file mode 100644 index 0000000..9e4d7e1 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2023-06-07-12-20-59.gh-issue-105435.6VllI0.rst @@ -0,0 +1,2 @@ +Fix spurious newline character if file ends on a comment without a newline. +Patch by Pablo Galindo diff --git a/Python/Python-tokenize.c b/Python/Python-tokenize.c index 4d21793..83a129c 100644 --- a/Python/Python-tokenize.c +++ b/Python/Python-tokenize.c @@ -247,6 +247,17 @@ tokenizeriter_next(tokenizeriterobject *it) } end_col_offset++; } + else if (type == NL) { + if (it->tok->implicit_newline) { + Py_DECREF(str); + str = PyUnicode_FromString(""); + } + } + + if (str == NULL) { + Py_DECREF(line); + goto exit; + } } result = Py_BuildValue("(iN(nn)(nn)N)", type, str, lineno, col_offset, end_lineno, end_col_offset, line); -- cgit v0.12