From a1f45ec73f0486b187633e7ebc0a4f559d29d7d9 Mon Sep 17 00:00:00 2001 From: Tal Einat Date: Wed, 24 Oct 2018 20:32:21 +0300 Subject: bpo-33899: Revert tokenize module adding an implicit final NEWLINE (GH-10072) This reverts commit 7829bba. --- Lib/test/test_tokenize.py | 46 ++++++---------------- Lib/tokenize.py | 10 ----- .../2018-06-24-01-57-14.bpo-33899.IaOcAr.rst | 3 -- 3 files changed, 12 insertions(+), 47 deletions(-) delete mode 100644 Misc/NEWS.d/next/Library/2018-06-24-01-57-14.bpo-33899.IaOcAr.rst diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py index a462597..fd9486b 100644 --- a/Lib/test/test_tokenize.py +++ b/Lib/test/test_tokenize.py @@ -1,54 +1,32 @@ from test import test_support -from tokenize import (untokenize, generate_tokens, NUMBER, NAME, OP, NEWLINE, +from tokenize import (untokenize, generate_tokens, NUMBER, NAME, OP, STRING, ENDMARKER, tok_name, Untokenizer, tokenize) from StringIO import StringIO import os from unittest import TestCase -# Converts a source string into a list of textual representation -# of the tokens such as: -# ` NAME 'if' (1, 0) (1, 2)` -# to make writing tests easier. -def stringify_tokens_from_source(token_generator, source_string): - result = [] - num_lines = len(source_string.splitlines()) - missing_trailing_nl = source_string[-1] not in '\r\n' - - for type, token, start, end, line in token_generator: - if type == ENDMARKER: - break - # Ignore the new line on the last line if the input lacks one - if missing_trailing_nl and type == NEWLINE and end[0] == num_lines: - continue - type = tok_name[type] - result.append(" %(type)-10.10s %(token)-13.13r %(start)s %(end)s" % - locals()) - - return result - class TokenizeTest(TestCase): # Tests for the tokenize module. # The tests can be really simple. Given a small fragment of source - # code, print out a table with tokens. The ENDMARKER, ENCODING and - # final NEWLINE are omitted for brevity. + # code, print out a table with tokens. The ENDMARKER is omitted for + # brevity. def check_tokenize(self, s, expected): # Format the tokens in s in a table format. + # The ENDMARKER is omitted. + result = [] f = StringIO(s) - result = stringify_tokens_from_source(generate_tokens(f.readline), s) - + for type, token, start, end, line in generate_tokens(f.readline): + if type == ENDMARKER: + break + type = tok_name[type] + result.append(" %(type)-10.10s %(token)-13.13r %(start)s %(end)s" % + locals()) self.assertEqual(result, expected.rstrip().splitlines()) - def test_implicit_newline(self): - # Make sure that the tokenizer puts in an implicit NEWLINE - # when the input lacks a trailing new line. - f = StringIO("x") - tokens = list(generate_tokens(f.readline)) - self.assertEqual(tokens[-2][0], NEWLINE) - self.assertEqual(tokens[-1][0], ENDMARKER) def test_basic(self): self.check_tokenize("1 + 1", """\ @@ -638,7 +616,7 @@ class TestRoundtrip(TestCase): self.check_roundtrip("if x == 1:\n" " print x\n") self.check_roundtrip("# This is a comment\n" - "# This also\n") + "# This also") # Some people use different formatting conventions, which makes # untokenize a little trickier. Note that this test involves trailing diff --git a/Lib/tokenize.py b/Lib/tokenize.py index 6c857f8..d426cd2 100644 --- a/Lib/tokenize.py +++ b/Lib/tokenize.py @@ -306,15 +306,8 @@ def generate_tokens(readline): contline = None indents = [0] - last_line = b'' - line = b'' while 1: # loop over lines in stream try: - # We capture the value of the line variable here because - # readline uses the empty string '' to signal end of input, - # hence `line` itself will always be overwritten at the end - # of this loop. - last_line = line line = readline() except StopIteration: line = '' @@ -444,9 +437,6 @@ def generate_tokens(readline): (lnum, pos), (lnum, pos+1), line) pos += 1 - # Add an implicit NEWLINE if the input doesn't end in one - if last_line and last_line[-1] not in '\r\n': - yield (NEWLINE, '', (lnum - 1, len(last_line)), (lnum - 1, len(last_line) + 1), '') for indent in indents[1:]: # pop remaining indent levels yield (DEDENT, '', (lnum, 0), (lnum, 0), '') yield (ENDMARKER, '', (lnum, 0), (lnum, 0), '') diff --git a/Misc/NEWS.d/next/Library/2018-06-24-01-57-14.bpo-33899.IaOcAr.rst b/Misc/NEWS.d/next/Library/2018-06-24-01-57-14.bpo-33899.IaOcAr.rst deleted file mode 100644 index 21c9095..0000000 --- a/Misc/NEWS.d/next/Library/2018-06-24-01-57-14.bpo-33899.IaOcAr.rst +++ /dev/null @@ -1,3 +0,0 @@ -Tokenize module now implicitly emits a NEWLINE when provided with input that -does not have a trailing new line. This behavior now matches what the C -tokenizer does internally. Contributed by Ammar Askar. -- cgit v0.12