summaryrefslogtreecommitdiffstats
path: root/Lib
diff options
context:
space:
mode:
authorYury Selivanov <yselivanov@sprymix.com>2015-07-23 12:01:58 (GMT)
committerYury Selivanov <yselivanov@sprymix.com>2015-07-23 12:01:58 (GMT)
commit96ec934e755355cfc5af036db8641646b7ddb45e (patch)
treea6fd6a4cbef1b75ab0cc10db01fd91ecf2e99976 /Lib
parentf315c1c01676bfabb5b1c6628642668f1ef436a6 (diff)
downloadcpython-96ec934e755355cfc5af036db8641646b7ddb45e.zip
cpython-96ec934e755355cfc5af036db8641646b7ddb45e.tar.gz
cpython-96ec934e755355cfc5af036db8641646b7ddb45e.tar.bz2
Issue #24619: Simplify async/await tokenization.
This commit simplifies async/await tokenization in tokenizer.c, tokenize.py & lib2to3/tokenize.py. Previous solution was to keep a stack of async-def & def blocks, whereas the new approach is just to remember position of the outermost async-def block. This change won't bring any parsing performance improvements, but it makes the code much easier to read and validate.
Diffstat (limited to 'Lib')
-rw-r--r--Lib/lib2to3/pgen2/tokenize.py33
-rw-r--r--Lib/lib2to3/tests/test_parser.py22
-rw-r--r--Lib/test/test_coroutines.py1
-rw-r--r--Lib/test/test_tokenize.py73
-rw-r--r--Lib/tokenize.py39
5 files changed, 138 insertions, 30 deletions
diff --git a/Lib/lib2to3/pgen2/tokenize.py b/Lib/lib2to3/pgen2/tokenize.py
index 896b0fa..1ff1c61 100644
--- a/Lib/lib2to3/pgen2/tokenize.py
+++ b/Lib/lib2to3/pgen2/tokenize.py
@@ -366,10 +366,11 @@ def generate_tokens(readline):
contline = None
indents = [0]
- # 'stashed' and 'ctx' are used for async/await parsing
+ # 'stashed' and 'async_*' are used for async/await parsing
stashed = None
- ctx = [('sync', 0)]
- in_async = 0
+ async_def = False
+ async_def_indent = 0
+ async_def_nl = False
while 1: # loop over lines in stream
try:
@@ -438,15 +439,18 @@ def generate_tokens(readline):
("<tokenize>", lnum, pos, line))
indents = indents[:-1]
- cur_indent = indents[-1]
- while len(ctx) > 1 and ctx[-1][1] >= cur_indent:
- if ctx[-1][0] == 'async':
- in_async -= 1
- assert in_async >= 0
- ctx.pop()
+ if async_def and async_def_indent >= indents[-1]:
+ async_def = False
+ async_def_nl = False
+ async_def_indent = 0
yield (DEDENT, '', (lnum, pos), (lnum, pos), line)
+ if async_def and async_def_nl and async_def_indent >= indents[-1]:
+ async_def = False
+ async_def_nl = False
+ async_def_indent = 0
+
else: # continued statement
if not line:
raise TokenError("EOF in multi-line statement", (lnum, 0))
@@ -466,10 +470,13 @@ def generate_tokens(readline):
newline = NEWLINE
if parenlev > 0:
newline = NL
+ elif async_def:
+ async_def_nl = True
if stashed:
yield stashed
stashed = None
yield (newline, token, spos, epos, line)
+
elif initial == '#':
assert not token.endswith("\n")
if stashed:
@@ -508,7 +515,7 @@ def generate_tokens(readline):
yield (STRING, token, spos, epos, line)
elif initial in namechars: # ordinary name
if token in ('async', 'await'):
- if in_async:
+ if async_def:
yield (ASYNC if token == 'async' else AWAIT,
token, spos, epos, line)
continue
@@ -523,15 +530,13 @@ def generate_tokens(readline):
and stashed[0] == NAME
and stashed[1] == 'async'):
- ctx.append(('async', indents[-1]))
- in_async += 1
+ async_def = True
+ async_def_indent = indents[-1]
yield (ASYNC, stashed[1],
stashed[2], stashed[3],
stashed[4])
stashed = None
- else:
- ctx.append(('sync', indents[-1]))
if stashed:
yield stashed
diff --git a/Lib/lib2to3/tests/test_parser.py b/Lib/lib2to3/tests/test_parser.py
index 107b5ab..b533c01 100644
--- a/Lib/lib2to3/tests/test_parser.py
+++ b/Lib/lib2to3/tests/test_parser.py
@@ -67,10 +67,32 @@ class TestAsyncAwait(GrammarTest):
await x
""")
+ self.validate("""async def foo():
+
+ def foo(): pass
+
+ def foo(): pass
+
+ await x
+ """)
+
+ self.validate("""async def foo(): return await a""")
+
+ self.validate("""def foo():
+ def foo(): pass
+ async def foo(): await x
+ """)
+
self.invalid_syntax("await x")
self.invalid_syntax("""def foo():
await x""")
+ self.invalid_syntax("""def foo():
+ def foo(): pass
+ async def foo(): pass
+ await x
+ """)
+
def test_async_var(self):
self.validate("""async = 1""")
self.validate("""await = 1""")
diff --git a/Lib/test/test_coroutines.py b/Lib/test/test_coroutines.py
index 14682ca..10de856 100644
--- a/Lib/test/test_coroutines.py
+++ b/Lib/test/test_coroutines.py
@@ -330,6 +330,7 @@ class AsyncBadSyntaxTest(unittest.TestCase):
async def f():
async def g(): pass
await z
+ await = 1
self.assertTrue(inspect.iscoroutinefunction(f))
diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py
index e320562..b7ca089 100644
--- a/Lib/test/test_tokenize.py
+++ b/Lib/test/test_tokenize.py
@@ -840,6 +840,79 @@ Async/await extension:
OP ')' (1, 19) (1, 20)
OP ':' (1, 20) (1, 21)
AWAIT 'await' (1, 22) (1, 27)
+
+ >>> dump_tokens('''def f():
+ ...
+ ... def baz(): pass
+ ... async def bar(): pass
+ ...
+ ... await = 2''')
+ ENCODING 'utf-8' (0, 0) (0, 0)
+ NAME 'def' (1, 0) (1, 3)
+ NAME 'f' (1, 4) (1, 5)
+ OP '(' (1, 5) (1, 6)
+ OP ')' (1, 6) (1, 7)
+ OP ':' (1, 7) (1, 8)
+ NEWLINE '\\n' (1, 8) (1, 9)
+ NL '\\n' (2, 0) (2, 1)
+ INDENT ' ' (3, 0) (3, 2)
+ NAME 'def' (3, 2) (3, 5)
+ NAME 'baz' (3, 6) (3, 9)
+ OP '(' (3, 9) (3, 10)
+ OP ')' (3, 10) (3, 11)
+ OP ':' (3, 11) (3, 12)
+ NAME 'pass' (3, 13) (3, 17)
+ NEWLINE '\\n' (3, 17) (3, 18)
+ ASYNC 'async' (4, 2) (4, 7)
+ NAME 'def' (4, 8) (4, 11)
+ NAME 'bar' (4, 12) (4, 15)
+ OP '(' (4, 15) (4, 16)
+ OP ')' (4, 16) (4, 17)
+ OP ':' (4, 17) (4, 18)
+ NAME 'pass' (4, 19) (4, 23)
+ NEWLINE '\\n' (4, 23) (4, 24)
+ NL '\\n' (5, 0) (5, 1)
+ NAME 'await' (6, 2) (6, 7)
+ OP '=' (6, 8) (6, 9)
+ NUMBER '2' (6, 10) (6, 11)
+ DEDENT '' (7, 0) (7, 0)
+
+ >>> dump_tokens('''async def f():
+ ...
+ ... def baz(): pass
+ ... async def bar(): pass
+ ...
+ ... await = 2''')
+ ENCODING 'utf-8' (0, 0) (0, 0)
+ ASYNC 'async' (1, 0) (1, 5)
+ NAME 'def' (1, 6) (1, 9)
+ NAME 'f' (1, 10) (1, 11)
+ OP '(' (1, 11) (1, 12)
+ OP ')' (1, 12) (1, 13)
+ OP ':' (1, 13) (1, 14)
+ NEWLINE '\\n' (1, 14) (1, 15)
+ NL '\\n' (2, 0) (2, 1)
+ INDENT ' ' (3, 0) (3, 2)
+ NAME 'def' (3, 2) (3, 5)
+ NAME 'baz' (3, 6) (3, 9)
+ OP '(' (3, 9) (3, 10)
+ OP ')' (3, 10) (3, 11)
+ OP ':' (3, 11) (3, 12)
+ NAME 'pass' (3, 13) (3, 17)
+ NEWLINE '\\n' (3, 17) (3, 18)
+ ASYNC 'async' (4, 2) (4, 7)
+ NAME 'def' (4, 8) (4, 11)
+ NAME 'bar' (4, 12) (4, 15)
+ OP '(' (4, 15) (4, 16)
+ OP ')' (4, 16) (4, 17)
+ OP ':' (4, 17) (4, 18)
+ NAME 'pass' (4, 19) (4, 23)
+ NEWLINE '\\n' (4, 23) (4, 24)
+ NL '\\n' (5, 0) (5, 1)
+ AWAIT 'await' (6, 2) (6, 7)
+ OP '=' (6, 8) (6, 9)
+ NUMBER '2' (6, 10) (6, 11)
+ DEDENT '' (7, 0) (7, 0)
"""
from test import support
diff --git a/Lib/tokenize.py b/Lib/tokenize.py
index c3efdda..65d06e5 100644
--- a/Lib/tokenize.py
+++ b/Lib/tokenize.py
@@ -498,10 +498,11 @@ def _tokenize(readline, encoding):
contline = None
indents = [0]
- # 'stashed' and 'ctx' are used for async/await parsing
+ # 'stashed' and 'async_*' are used for async/await parsing
stashed = None
- ctx = [('sync', 0)]
- in_async = 0
+ async_def = False
+ async_def_indent = 0
+ async_def_nl = False
if encoding is not None:
if encoding == "utf-8-sig":
@@ -579,15 +580,18 @@ def _tokenize(readline, encoding):
("<tokenize>", lnum, pos, line))
indents = indents[:-1]
- cur_indent = indents[-1]
- while len(ctx) > 1 and ctx[-1][1] >= cur_indent:
- if ctx[-1][0] == 'async':
- in_async -= 1
- assert in_async >= 0
- ctx.pop()
+ if async_def and async_def_indent >= indents[-1]:
+ async_def = False
+ async_def_nl = False
+ async_def_indent = 0
yield TokenInfo(DEDENT, '', (lnum, pos), (lnum, pos), line)
+ if async_def and async_def_nl and async_def_indent >= indents[-1]:
+ async_def = False
+ async_def_nl = False
+ async_def_indent = 0
+
else: # continued statement
if not line:
raise TokenError("EOF in multi-line statement", (lnum, 0))
@@ -609,8 +613,13 @@ def _tokenize(readline, encoding):
if stashed:
yield stashed
stashed = None
- yield TokenInfo(NL if parenlev > 0 else NEWLINE,
- token, spos, epos, line)
+ if parenlev > 0:
+ yield TokenInfo(NL, token, spos, epos, line)
+ else:
+ yield TokenInfo(NEWLINE, token, spos, epos, line)
+ if async_def:
+ async_def_nl = True
+
elif initial == '#':
assert not token.endswith("\n")
if stashed:
@@ -644,7 +653,7 @@ def _tokenize(readline, encoding):
yield TokenInfo(STRING, token, spos, epos, line)
elif initial.isidentifier(): # ordinary name
if token in ('async', 'await'):
- if in_async:
+ if async_def:
yield TokenInfo(
ASYNC if token == 'async' else AWAIT,
token, spos, epos, line)
@@ -660,15 +669,13 @@ def _tokenize(readline, encoding):
and stashed.type == NAME
and stashed.string == 'async'):
- ctx.append(('async', indents[-1]))
- in_async += 1
+ async_def = True
+ async_def_indent = indents[-1]
yield TokenInfo(ASYNC, stashed.string,
stashed.start, stashed.end,
stashed.line)
stashed = None
- else:
- ctx.append(('sync', indents[-1]))
if stashed:
yield stashed