Issue #24619: Simplify async/await tokenization.

This commit simplifies async/await tokenization in tokenizer.c, tokenize.py & lib2to3/tokenize.py. Previous solution was to keep a stack of async-def & def blocks, whereas the new approach is just to remember position of the outermost async-def block. This change won't bring any parsing performance improvements, but it makes the code much easier to read and validate.
author: Yury Selivanov <yselivanov@sprymix.com> 2015-07-23 12:01:58 (GMT)
committer: Yury Selivanov <yselivanov@sprymix.com> 2015-07-23 12:01:58 (GMT)
commit: 96ec934e755355cfc5af036db8641646b7ddb45e (patch)
tree: a6fd6a4cbef1b75ab0cc10db01fd91ecf2e99976 /Lib/tokenize.py
parent: f315c1c01676bfabb5b1c6628642668f1ef436a6 (diff)
download: cpython-96ec934e755355cfc5af036db8641646b7ddb45e.zip
cpython-96ec934e755355cfc5af036db8641646b7ddb45e.tar.gz
cpython-96ec934e755355cfc5af036db8641646b7ddb45e.tar.bz2
1 files changed, 23 insertions, 16 deletions
diff --git a/Lib/tokenize.py b/Lib/tokenize.py
index c3efdda..65d06e5 100644
--- a/Lib/tokenize.py
+++ b/Lib/tokenize.py
@@ -498,10 +498,11 @@ def _tokenize(readline, encoding):
     contline = None
     indents = [0]
 
-    # 'stashed' and 'ctx' are used for async/await parsing
+    # 'stashed' and 'async_*' are used for async/await parsing
     stashed = None
-    ctx = [('sync', 0)]
-    in_async = 0
+    async_def = False
+    async_def_indent = 0
+    async_def_nl = False
 
     if encoding is not None:
         if encoding == "utf-8-sig":
@@ -579,15 +580,18 @@ def _tokenize(readline, encoding):
                         ("<tokenize>", lnum, pos, line))
                 indents = indents[:-1]
 
-                cur_indent = indents[-1]
-                while len(ctx) > 1 and ctx[-1][1] >= cur_indent:
-                    if ctx[-1][0] == 'async':
-                        in_async -= 1
-                        assert in_async >= 0
-                    ctx.pop()
+                if async_def and async_def_indent >= indents[-1]:
+                    async_def = False
+                    async_def_nl = False
+                    async_def_indent = 0
 
                 yield TokenInfo(DEDENT, '', (lnum, pos), (lnum, pos), line)
 
+            if async_def and async_def_nl and async_def_indent >= indents[-1]:
+                async_def = False
+                async_def_nl = False
+                async_def_indent = 0
+
         else:                                  # continued statement
             if not line:
                 raise TokenError("EOF in multi-line statement", (lnum, 0))
@@ -609,8 +613,13 @@ def _tokenize(readline, encoding):
                     if stashed:
                         yield stashed
                         stashed = None
-                    yield TokenInfo(NL if parenlev > 0 else NEWLINE,
-                           token, spos, epos, line)
+                    if parenlev > 0:
+                        yield TokenInfo(NL, token, spos, epos, line)
+                    else:
+                        yield TokenInfo(NEWLINE, token, spos, epos, line)
+                        if async_def:
+                            async_def_nl = True
+
                 elif initial == '#':
                     assert not token.endswith("\n")
                     if stashed:
@@ -644,7 +653,7 @@ def _tokenize(readline, encoding):
                         yield TokenInfo(STRING, token, spos, epos, line)
                 elif initial.isidentifier():               # ordinary name
                     if token in ('async', 'await'):
-                        if in_async:
+                        if async_def:
                             yield TokenInfo(
                                 ASYNC if token == 'async' else AWAIT,
                                 token, spos, epos, line)
@@ -660,15 +669,13 @@ def _tokenize(readline, encoding):
                                 and stashed.type == NAME
                                 and stashed.string == 'async'):
 
-                            ctx.append(('async', indents[-1]))
-                            in_async += 1
+                            async_def = True
+                            async_def_indent = indents[-1]
 
                             yield TokenInfo(ASYNC, stashed.string,
                                             stashed.start, stashed.end,
                                             stashed.line)
                             stashed = None
-                        else:
-                            ctx.append(('sync', indents[-1]))
 
                     if stashed:
                         yield stashed
author	Yury Selivanov <yselivanov@sprymix.com>	2015-07-23 12:01:58 (GMT)
committer	Yury Selivanov <yselivanov@sprymix.com>	2015-07-23 12:01:58 (GMT)
commit	96ec934e755355cfc5af036db8641646b7ddb45e (patch)
tree	a6fd6a4cbef1b75ab0cc10db01fd91ecf2e99976 /Lib/tokenize.py
parent	f315c1c01676bfabb5b1c6628642668f1ef436a6 (diff)
download	cpython-96ec934e755355cfc5af036db8641646b7ddb45e.zip cpython-96ec934e755355cfc5af036db8641646b7ddb45e.tar.gz cpython-96ec934e755355cfc5af036db8641646b7ddb45e.tar.bz2