summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMiss Islington (bot) <31488909+miss-islington@users.noreply.github.com>2021-08-02 09:44:01 (GMT)
committerGitHub <noreply@github.com>2021-08-02 09:44:01 (GMT)
commit2d11797c81be3ae776e418a5ba507098356d357c (patch)
tree00b1d2605ed6b592fc75b124344989508438bae0
parent2efa78180d07f108ff05464016c47989484f3efb (diff)
downloadcpython-2d11797c81be3ae776e418a5ba507098356d357c.zip
cpython-2d11797c81be3ae776e418a5ba507098356d357c.tar.gz
cpython-2d11797c81be3ae776e418a5ba507098356d357c.tar.bz2
bpo-44667: Treat correctly lines ending with comments and no newlines in the Python tokenizer (GH-27499) (GH-27501)
(cherry picked from commit b6bde9fc42aecad5be0457198d17cfe7b481ad79) Co-authored-by: Pablo Galindo Salgado <Pablogsal@gmail.com>
-rw-r--r--Lib/test/test_tokenize.py10
-rw-r--r--Lib/tokenize.py2
-rw-r--r--Misc/NEWS.d/next/Library/2021-07-30-23-27-30.bpo-44667.tu0Xrv.rst4
3 files changed, 15 insertions, 1 deletions
diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py
index 6de7aa8..e05a07f 100644
--- a/Lib/test/test_tokenize.py
+++ b/Lib/test/test_tokenize.py
@@ -1457,6 +1457,16 @@ class TestTokenize(TestCase):
# See http://bugs.python.org/issue16152
self.assertExactTypeEqual('@ ', token.AT)
+ def test_comment_at_the_end_of_the_source_without_newline(self):
+ # See http://bugs.python.org/issue44667
+ source = 'b = 1\n\n#test'
+ expected_tokens = [token.NAME, token.EQUAL, token.NUMBER, token.NEWLINE, token.NL, token.COMMENT]
+
+ tokens = list(tokenize(BytesIO(source.encode('utf-8')).readline))
+ self.assertEqual(tok_name[tokens[0].exact_type], tok_name[ENCODING])
+ for i in range(6):
+ self.assertEqual(tok_name[tokens[i + 1].exact_type], tok_name[expected_tokens[i]])
+ self.assertEqual(tok_name[tokens[-1].exact_type], tok_name[token.ENDMARKER])
class UntokenizeTest(TestCase):
diff --git a/Lib/tokenize.py b/Lib/tokenize.py
index 1aee21b..a782f62 100644
--- a/Lib/tokenize.py
+++ b/Lib/tokenize.py
@@ -602,7 +602,7 @@ def _tokenize(readline, encoding):
pos += 1
# Add an implicit NEWLINE if the input doesn't end in one
- if last_line and last_line[-1] not in '\r\n':
+ if last_line and last_line[-1] not in '\r\n' and not last_line.strip().startswith("#"):
yield TokenInfo(NEWLINE, '', (lnum - 1, len(last_line)), (lnum - 1, len(last_line) + 1), '')
for indent in indents[1:]: # pop remaining indent levels
yield TokenInfo(DEDENT, '', (lnum, 0), (lnum, 0), '')
diff --git a/Misc/NEWS.d/next/Library/2021-07-30-23-27-30.bpo-44667.tu0Xrv.rst b/Misc/NEWS.d/next/Library/2021-07-30-23-27-30.bpo-44667.tu0Xrv.rst
new file mode 100644
index 0000000..5b7e20e
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2021-07-30-23-27-30.bpo-44667.tu0Xrv.rst
@@ -0,0 +1,4 @@
+The :func:`tokenize.tokenize` doesn't incorrectly generate a ``NEWLINE``
+token if the source doesn't end with a new line character but the last line
+is a comment, as the function is already generating a ``NL`` token. Patch by
+Pablo Galindo