diff options
author | Ammar Askar <ammar_askar@hotmail.com> | 2018-07-06 10:22:25 (GMT) |
---|---|---|
committer | Tal Einat <taleinat+github@gmail.com> | 2018-07-06 10:22:25 (GMT) |
commit | 11c36a3e16f7fd4e937466014e8393ede4b61a25 (patch) | |
tree | fdd7faaf59648aed31e1986543513f31c14797d8 /Lib/tokenize.py | |
parent | c6671aecbcbd12ed234fdc4470363fc5ba0f4a3a (diff) | |
download | cpython-11c36a3e16f7fd4e937466014e8393ede4b61a25.zip cpython-11c36a3e16f7fd4e937466014e8393ede4b61a25.tar.gz cpython-11c36a3e16f7fd4e937466014e8393ede4b61a25.tar.bz2 |
[3.6] bpo-33899: Make tokenize module mirror end-of-file is end-of-line behavior (GH-7891) (GH-8134)
Most of the change involves fixing up the test suite, which previously made
the assumption that there wouldn't be a new line if the input didn't end in
one.
Contributed by Ammar Askar.
(cherry picked from commit c4ef4896eac86a6759901c8546e26de4695a1389)
Diffstat (limited to 'Lib/tokenize.py')
-rw-r--r-- | Lib/tokenize.py | 12 |
1 files changed, 11 insertions, 1 deletions
diff --git a/Lib/tokenize.py b/Lib/tokenize.py index 825aa90..e131ae6 100644 --- a/Lib/tokenize.py +++ b/Lib/tokenize.py @@ -507,8 +507,15 @@ def _tokenize(readline, encoding): # BOM will already have been stripped. encoding = "utf-8" yield TokenInfo(ENCODING, encoding, (0, 0), (0, 0), '') - while True: # loop over lines in stream + last_line = b'' + line = b'' + while True: # loop over lines in stream try: + # We capture the value of the line variable here because + # readline uses the empty string '' to signal end of input, + # hence `line` itself will always be overwritten at the end + # of this loop. + last_line = line line = readline() except StopIteration: line = b'' @@ -719,6 +726,9 @@ def _tokenize(readline, encoding): yield stashed stashed = None + # Add an implicit NEWLINE if the input doesn't end in one + if last_line and last_line[-1] not in '\r\n': + yield TokenInfo(NEWLINE, '', (lnum - 1, len(last_line)), (lnum - 1, len(last_line) + 1), '') for indent in indents[1:]: # pop remaining indent levels yield TokenInfo(DEDENT, '', (lnum, 0), (lnum, 0), '') yield TokenInfo(ENDMARKER, '', (lnum, 0), (lnum, 0), '') |