[3.6] bpo-33899: Make tokenize module mirror end-of-file is end-of-line behavior (GH-7891) (GH-8134)

Most of the change involves fixing up the test suite, which previously made the assumption that there wouldn't be a new line if the input didn't end in one. Contributed by Ammar Askar. (cherry picked from commit c4ef4896eac86a6759901c8546e26de4695a1389)
author: Ammar Askar <ammar_askar@hotmail.com> 2018-07-06 10:22:25 (GMT)
committer: Tal Einat <taleinat+github@gmail.com> 2018-07-06 10:22:25 (GMT)
commit: 11c36a3e16f7fd4e937466014e8393ede4b61a25 (patch)
tree: fdd7faaf59648aed31e1986543513f31c14797d8 /Lib/tokenize.py
parent: c6671aecbcbd12ed234fdc4470363fc5ba0f4a3a (diff)
download: cpython-11c36a3e16f7fd4e937466014e8393ede4b61a25.zip
cpython-11c36a3e16f7fd4e937466014e8393ede4b61a25.tar.gz
cpython-11c36a3e16f7fd4e937466014e8393ede4b61a25.tar.bz2
1 files changed, 11 insertions, 1 deletions
diff --git a/Lib/tokenize.py b/Lib/tokenize.py
index 825aa90..e131ae6 100644
--- a/Lib/tokenize.py
+++ b/Lib/tokenize.py
@@ -507,8 +507,15 @@ def _tokenize(readline, encoding):
             # BOM will already have been stripped.
             encoding = "utf-8"
         yield TokenInfo(ENCODING, encoding, (0, 0), (0, 0), '')
-    while True:             # loop over lines in stream
+    last_line = b''
+    line = b''
+    while True:                                # loop over lines in stream
         try:
+            # We capture the value of the line variable here because
+            # readline uses the empty string '' to signal end of input,
+            # hence `line` itself will always be overwritten at the end
+            # of this loop.
+            last_line = line
             line = readline()
         except StopIteration:
             line = b''
@@ -719,6 +726,9 @@ def _tokenize(readline, encoding):
         yield stashed
         stashed = None
 
+    # Add an implicit NEWLINE if the input doesn't end in one
+    if last_line and last_line[-1] not in '\r\n':
+        yield TokenInfo(NEWLINE, '', (lnum - 1, len(last_line)), (lnum - 1, len(last_line) + 1), '')
     for indent in indents[1:]:                 # pop remaining indent levels
         yield TokenInfo(DEDENT, '', (lnum, 0), (lnum, 0), '')
     yield TokenInfo(ENDMARKER, '', (lnum, 0), (lnum, 0), '')
author	Ammar Askar <ammar_askar@hotmail.com>	2018-07-06 10:22:25 (GMT)
committer	Tal Einat <taleinat+github@gmail.com>	2018-07-06 10:22:25 (GMT)
commit	11c36a3e16f7fd4e937466014e8393ede4b61a25 (patch)
tree	fdd7faaf59648aed31e1986543513f31c14797d8 /Lib/tokenize.py
parent	c6671aecbcbd12ed234fdc4470363fc5ba0f4a3a (diff)
download	cpython-11c36a3e16f7fd4e937466014e8393ede4b61a25.zip cpython-11c36a3e16f7fd4e937466014e8393ede4b61a25.tar.gz cpython-11c36a3e16f7fd4e937466014e8393ede4b61a25.tar.bz2