[3.12] gh-104976: Ensure trailing dedent tokens are emitted as the previous tokenizer (GH-104980) (#105000)

author: Miss Islington (bot) <31488909+miss-islington@users.noreply.github.com> 2023-05-26 22:14:45 (GMT)
committer: GitHub <noreply@github.com> 2023-05-26 22:14:45 (GMT)
commit: 2c02c6886739f0ed420d900b2a29933bc1c5df37 (patch)
tree: 47e9a870570009cdaafb46d23bc5977d3e6a682a /Lib
parent: 05189f3054e3a831967a1bb53d14d97c97e31598 (diff)
download: cpython-2c02c6886739f0ed420d900b2a29933bc1c5df37.zip
cpython-2c02c6886739f0ed420d900b2a29933bc1c5df37.tar.gz
cpython-2c02c6886739f0ed420d900b2a29933bc1c5df37.tar.bz2
2 files changed, 8 insertions, 12 deletions
diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py
index 0b7c258..abb6885 100644
--- a/Lib/test/test_tokenize.py
+++ b/Lib/test/test_tokenize.py
@@ -82,7 +82,7 @@ class TokenizeTest(TestCase):
     NAME       'False'       (4, 11) (4, 16)
     COMMENT    '# NEWLINE'   (4, 17) (4, 26)
     NEWLINE    '\\n'          (4, 26) (4, 27)
-    DEDENT     ''            (4, 27) (4, 27)
+    DEDENT     ''            (5, 0) (5, 0)
     """)
         indent_error_file = b"""\
 def k(x):
@@ -755,8 +755,8 @@ def"', """\
     NEWLINE    '\\n'          (2, 5) (2, 6)
     INDENT     '        \\t'  (3, 0) (3, 9)
     NAME       'pass'        (3, 9) (3, 13)
-    DEDENT     ''            (3, 14) (3, 14)
-    DEDENT     ''            (3, 14) (3, 14)
+    DEDENT     ''            (4, 0) (4, 0)
+    DEDENT     ''            (4, 0) (4, 0)
     """)
 
     def test_non_ascii_identifiers(self):
@@ -968,7 +968,7 @@ async def foo():
     NUMBER     '1'           (2, 17) (2, 18)
     OP         ':'           (2, 18) (2, 19)
     NAME       'pass'        (2, 20) (2, 24)
-    DEDENT     ''            (2, 25) (2, 25)
+    DEDENT     ''            (3, 0) (3, 0)
     """)
 
         self.check_tokenize('''async def foo(async): await''', """\
@@ -1016,7 +1016,7 @@ def f():
     NAME       'await'       (6, 2) (6, 7)
     OP         '='           (6, 8) (6, 9)
     NUMBER     '2'           (6, 10) (6, 11)
-    DEDENT     ''            (6, 12) (6, 12)
+    DEDENT     ''            (7, 0) (7, 0)
     """)
 
         self.check_tokenize('''\
@@ -1054,7 +1054,7 @@ async def f():
     NAME       'await'       (6, 2) (6, 7)
     OP         '='           (6, 8) (6, 9)
     NUMBER     '2'           (6, 10) (6, 11)
-    DEDENT     ''            (6, 12) (6, 12)
+    DEDENT     ''            (7, 0) (7, 0)
     """)
 
     def test_newline_after_parenthesized_block_with_comment(self):
@@ -2680,7 +2680,8 @@ async def f():
 
         valid = generate_source(MAXINDENT - 1)
         tokens = list(_generate_tokens_from_c_tokenizer(valid))
-        self.assertEqual(tokens[-1].type, DEDENT)
+        self.assertEqual(tokens[-2].type, DEDENT)
+        self.assertEqual(tokens[-1].type, ENDMARKER)
         compile(valid, "<string>", "exec")
 
         invalid = generate_source(MAXINDENT)
diff --git a/Lib/tokenize.py b/Lib/tokenize.py
index 911f0f1..4895e94 100644
--- a/Lib/tokenize.py
+++ b/Lib/tokenize.py
@@ -447,13 +447,8 @@ def tokenize(readline):
 
 def _tokenize(rl_gen, encoding):
     source = b"".join(rl_gen).decode(encoding)
-    token = None
     for token in _generate_tokens_from_c_tokenizer(source, extra_tokens=True):
         yield token
-    if token is not None:
-        last_line, _ = token.start
-        yield TokenInfo(ENDMARKER, '', (last_line + 1, 0), (last_line + 1, 0), '')
-
 
 def generate_tokens(readline):
     """Tokenize a source reading Python code as unicode strings.
author	Miss Islington (bot) <31488909+miss-islington@users.noreply.github.com>	2023-05-26 22:14:45 (GMT)
committer	GitHub <noreply@github.com>	2023-05-26 22:14:45 (GMT)
commit	2c02c6886739f0ed420d900b2a29933bc1c5df37 (patch)
tree	47e9a870570009cdaafb46d23bc5977d3e6a682a /Lib
parent	05189f3054e3a831967a1bb53d14d97c97e31598 (diff)
download	cpython-2c02c6886739f0ed420d900b2a29933bc1c5df37.zip cpython-2c02c6886739f0ed420d900b2a29933bc1c5df37.tar.gz cpython-2c02c6886739f0ed420d900b2a29933bc1c5df37.tar.bz2