Issue #24619: Simplify async/await tokenization.

This commit simplifies async/await tokenization in tokenizer.c, tokenize.py & lib2to3/tokenize.py. Previous solution was to keep a stack of async-def & def blocks, whereas the new approach is just to remember position of the outermost async-def block. This change won't bring any parsing performance improvements, but it makes the code much easier to read and validate.
author: Yury Selivanov <yselivanov@sprymix.com> 2015-07-23 12:01:58 (GMT)
committer: Yury Selivanov <yselivanov@sprymix.com> 2015-07-23 12:01:58 (GMT)
commit: 96ec934e755355cfc5af036db8641646b7ddb45e (patch)
tree: a6fd6a4cbef1b75ab0cc10db01fd91ecf2e99976 /Lib/lib2to3
parent: f315c1c01676bfabb5b1c6628642668f1ef436a6 (diff)
download: cpython-96ec934e755355cfc5af036db8641646b7ddb45e.zip
cpython-96ec934e755355cfc5af036db8641646b7ddb45e.tar.gz
cpython-96ec934e755355cfc5af036db8641646b7ddb45e.tar.bz2
2 files changed, 41 insertions, 14 deletions
diff --git a/Lib/lib2to3/pgen2/tokenize.py b/Lib/lib2to3/pgen2/tokenize.py
index 896b0fa..1ff1c61 100644
--- a/Lib/lib2to3/pgen2/tokenize.py
+++ b/Lib/lib2to3/pgen2/tokenize.py
@@ -366,10 +366,11 @@ def generate_tokens(readline):
     contline = None
     indents = [0]
 
-    # 'stashed' and 'ctx' are used for async/await parsing
+    # 'stashed' and 'async_*' are used for async/await parsing
     stashed = None
-    ctx = [('sync', 0)]
-    in_async = 0
+    async_def = False
+    async_def_indent = 0
+    async_def_nl = False
 
     while 1:                                   # loop over lines in stream
         try:
@@ -438,15 +439,18 @@ def generate_tokens(readline):
                         ("<tokenize>", lnum, pos, line))
                 indents = indents[:-1]
 
-                cur_indent = indents[-1]
-                while len(ctx) > 1 and ctx[-1][1] >= cur_indent:
-                    if ctx[-1][0] == 'async':
-                        in_async -= 1
-                        assert in_async >= 0
-                    ctx.pop()
+                if async_def and async_def_indent >= indents[-1]:
+                    async_def = False
+                    async_def_nl = False
+                    async_def_indent = 0
 
                 yield (DEDENT, '', (lnum, pos), (lnum, pos), line)
 
+            if async_def and async_def_nl and async_def_indent >= indents[-1]:
+                async_def = False
+                async_def_nl = False
+                async_def_indent = 0
+
         else:                                  # continued statement
             if not line:
                 raise TokenError("EOF in multi-line statement", (lnum, 0))
@@ -466,10 +470,13 @@ def generate_tokens(readline):
                     newline = NEWLINE
                     if parenlev > 0:
                         newline = NL
+                    elif async_def:
+                        async_def_nl = True
                     if stashed:
                         yield stashed
                         stashed = None
                     yield (newline, token, spos, epos, line)
+
                 elif initial == '#':
                     assert not token.endswith("\n")
                     if stashed:
@@ -508,7 +515,7 @@ def generate_tokens(readline):
                         yield (STRING, token, spos, epos, line)
                 elif initial in namechars:                 # ordinary name
                     if token in ('async', 'await'):
-                        if in_async:
+                        if async_def:
                             yield (ASYNC if token == 'async' else AWAIT,
                                    token, spos, epos, line)
                             continue
@@ -523,15 +530,13 @@ def generate_tokens(readline):
                                 and stashed[0] == NAME
                                 and stashed[1] == 'async'):
 
-                            ctx.append(('async', indents[-1]))
-                            in_async += 1
+                            async_def = True
+                            async_def_indent = indents[-1]
 
                             yield (ASYNC, stashed[1],
                                    stashed[2], stashed[3],
                                    stashed[4])
                             stashed = None
-                        else:
-                            ctx.append(('sync', indents[-1]))
 
                     if stashed:
                         yield stashed
diff --git a/Lib/lib2to3/tests/test_parser.py b/Lib/lib2to3/tests/test_parser.py
index 107b5ab..b533c01 100644
--- a/Lib/lib2to3/tests/test_parser.py
+++ b/Lib/lib2to3/tests/test_parser.py
@@ -67,10 +67,32 @@ class TestAsyncAwait(GrammarTest):
                              await x
                       """)
 
+        self.validate("""async def foo():
+
+            def foo(): pass
+
+            def foo(): pass
+
+            await x
+        """)
+
+        self.validate("""async def foo(): return await a""")
+
+        self.validate("""def foo():
+            def foo(): pass
+            async def foo(): await x
+        """)
+
         self.invalid_syntax("await x")
         self.invalid_syntax("""def foo():
                                    await x""")
 
+        self.invalid_syntax("""def foo():
+            def foo(): pass
+            async def foo(): pass
+            await x
+        """)
+
     def test_async_var(self):
         self.validate("""async = 1""")
         self.validate("""await = 1""")
author	Yury Selivanov <yselivanov@sprymix.com>	2015-07-23 12:01:58 (GMT)
committer	Yury Selivanov <yselivanov@sprymix.com>	2015-07-23 12:01:58 (GMT)
commit	96ec934e755355cfc5af036db8641646b7ddb45e (patch)
tree	a6fd6a4cbef1b75ab0cc10db01fd91ecf2e99976 /Lib/lib2to3
parent	f315c1c01676bfabb5b1c6628642668f1ef436a6 (diff)
download	cpython-96ec934e755355cfc5af036db8641646b7ddb45e.zip cpython-96ec934e755355cfc5af036db8641646b7ddb45e.tar.gz cpython-96ec934e755355cfc5af036db8641646b7ddb45e.tar.bz2