diff options
author | Yury Selivanov <yselivanov@sprymix.com> | 2015-07-23 12:01:58 (GMT) |
---|---|---|
committer | Yury Selivanov <yselivanov@sprymix.com> | 2015-07-23 12:01:58 (GMT) |
commit | 96ec934e755355cfc5af036db8641646b7ddb45e (patch) | |
tree | a6fd6a4cbef1b75ab0cc10db01fd91ecf2e99976 /Lib | |
parent | f315c1c01676bfabb5b1c6628642668f1ef436a6 (diff) | |
download | cpython-96ec934e755355cfc5af036db8641646b7ddb45e.zip cpython-96ec934e755355cfc5af036db8641646b7ddb45e.tar.gz cpython-96ec934e755355cfc5af036db8641646b7ddb45e.tar.bz2 |
Issue #24619: Simplify async/await tokenization.
This commit simplifies async/await tokenization in tokenizer.c,
tokenize.py & lib2to3/tokenize.py. Previous solution was to keep
a stack of async-def & def blocks, whereas the new approach is just
to remember position of the outermost async-def block.
This change won't bring any parsing performance improvements, but
it makes the code much easier to read and validate.
Diffstat (limited to 'Lib')
-rw-r--r-- | Lib/lib2to3/pgen2/tokenize.py | 33 | ||||
-rw-r--r-- | Lib/lib2to3/tests/test_parser.py | 22 | ||||
-rw-r--r-- | Lib/test/test_coroutines.py | 1 | ||||
-rw-r--r-- | Lib/test/test_tokenize.py | 73 | ||||
-rw-r--r-- | Lib/tokenize.py | 39 |
5 files changed, 138 insertions, 30 deletions
diff --git a/Lib/lib2to3/pgen2/tokenize.py b/Lib/lib2to3/pgen2/tokenize.py index 896b0fa..1ff1c61 100644 --- a/Lib/lib2to3/pgen2/tokenize.py +++ b/Lib/lib2to3/pgen2/tokenize.py @@ -366,10 +366,11 @@ def generate_tokens(readline): contline = None indents = [0] - # 'stashed' and 'ctx' are used for async/await parsing + # 'stashed' and 'async_*' are used for async/await parsing stashed = None - ctx = [('sync', 0)] - in_async = 0 + async_def = False + async_def_indent = 0 + async_def_nl = False while 1: # loop over lines in stream try: @@ -438,15 +439,18 @@ def generate_tokens(readline): ("<tokenize>", lnum, pos, line)) indents = indents[:-1] - cur_indent = indents[-1] - while len(ctx) > 1 and ctx[-1][1] >= cur_indent: - if ctx[-1][0] == 'async': - in_async -= 1 - assert in_async >= 0 - ctx.pop() + if async_def and async_def_indent >= indents[-1]: + async_def = False + async_def_nl = False + async_def_indent = 0 yield (DEDENT, '', (lnum, pos), (lnum, pos), line) + if async_def and async_def_nl and async_def_indent >= indents[-1]: + async_def = False + async_def_nl = False + async_def_indent = 0 + else: # continued statement if not line: raise TokenError("EOF in multi-line statement", (lnum, 0)) @@ -466,10 +470,13 @@ def generate_tokens(readline): newline = NEWLINE if parenlev > 0: newline = NL + elif async_def: + async_def_nl = True if stashed: yield stashed stashed = None yield (newline, token, spos, epos, line) + elif initial == '#': assert not token.endswith("\n") if stashed: @@ -508,7 +515,7 @@ def generate_tokens(readline): yield (STRING, token, spos, epos, line) elif initial in namechars: # ordinary name if token in ('async', 'await'): - if in_async: + if async_def: yield (ASYNC if token == 'async' else AWAIT, token, spos, epos, line) continue @@ -523,15 +530,13 @@ def generate_tokens(readline): and stashed[0] == NAME and stashed[1] == 'async'): - ctx.append(('async', indents[-1])) - in_async += 1 + async_def = True + async_def_indent = indents[-1] yield (ASYNC, stashed[1], stashed[2], stashed[3], stashed[4]) stashed = None - else: - ctx.append(('sync', indents[-1])) if stashed: yield stashed diff --git a/Lib/lib2to3/tests/test_parser.py b/Lib/lib2to3/tests/test_parser.py index 107b5ab..b533c01 100644 --- a/Lib/lib2to3/tests/test_parser.py +++ b/Lib/lib2to3/tests/test_parser.py @@ -67,10 +67,32 @@ class TestAsyncAwait(GrammarTest): await x """) + self.validate("""async def foo(): + + def foo(): pass + + def foo(): pass + + await x + """) + + self.validate("""async def foo(): return await a""") + + self.validate("""def foo(): + def foo(): pass + async def foo(): await x + """) + self.invalid_syntax("await x") self.invalid_syntax("""def foo(): await x""") + self.invalid_syntax("""def foo(): + def foo(): pass + async def foo(): pass + await x + """) + def test_async_var(self): self.validate("""async = 1""") self.validate("""await = 1""") diff --git a/Lib/test/test_coroutines.py b/Lib/test/test_coroutines.py index 14682ca..10de856 100644 --- a/Lib/test/test_coroutines.py +++ b/Lib/test/test_coroutines.py @@ -330,6 +330,7 @@ class AsyncBadSyntaxTest(unittest.TestCase): async def f(): async def g(): pass await z + await = 1 self.assertTrue(inspect.iscoroutinefunction(f)) diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py index e320562..b7ca089 100644 --- a/Lib/test/test_tokenize.py +++ b/Lib/test/test_tokenize.py @@ -840,6 +840,79 @@ Async/await extension: OP ')' (1, 19) (1, 20) OP ':' (1, 20) (1, 21) AWAIT 'await' (1, 22) (1, 27) + + >>> dump_tokens('''def f(): + ... + ... def baz(): pass + ... async def bar(): pass + ... + ... await = 2''') + ENCODING 'utf-8' (0, 0) (0, 0) + NAME 'def' (1, 0) (1, 3) + NAME 'f' (1, 4) (1, 5) + OP '(' (1, 5) (1, 6) + OP ')' (1, 6) (1, 7) + OP ':' (1, 7) (1, 8) + NEWLINE '\\n' (1, 8) (1, 9) + NL '\\n' (2, 0) (2, 1) + INDENT ' ' (3, 0) (3, 2) + NAME 'def' (3, 2) (3, 5) + NAME 'baz' (3, 6) (3, 9) + OP '(' (3, 9) (3, 10) + OP ')' (3, 10) (3, 11) + OP ':' (3, 11) (3, 12) + NAME 'pass' (3, 13) (3, 17) + NEWLINE '\\n' (3, 17) (3, 18) + ASYNC 'async' (4, 2) (4, 7) + NAME 'def' (4, 8) (4, 11) + NAME 'bar' (4, 12) (4, 15) + OP '(' (4, 15) (4, 16) + OP ')' (4, 16) (4, 17) + OP ':' (4, 17) (4, 18) + NAME 'pass' (4, 19) (4, 23) + NEWLINE '\\n' (4, 23) (4, 24) + NL '\\n' (5, 0) (5, 1) + NAME 'await' (6, 2) (6, 7) + OP '=' (6, 8) (6, 9) + NUMBER '2' (6, 10) (6, 11) + DEDENT '' (7, 0) (7, 0) + + >>> dump_tokens('''async def f(): + ... + ... def baz(): pass + ... async def bar(): pass + ... + ... await = 2''') + ENCODING 'utf-8' (0, 0) (0, 0) + ASYNC 'async' (1, 0) (1, 5) + NAME 'def' (1, 6) (1, 9) + NAME 'f' (1, 10) (1, 11) + OP '(' (1, 11) (1, 12) + OP ')' (1, 12) (1, 13) + OP ':' (1, 13) (1, 14) + NEWLINE '\\n' (1, 14) (1, 15) + NL '\\n' (2, 0) (2, 1) + INDENT ' ' (3, 0) (3, 2) + NAME 'def' (3, 2) (3, 5) + NAME 'baz' (3, 6) (3, 9) + OP '(' (3, 9) (3, 10) + OP ')' (3, 10) (3, 11) + OP ':' (3, 11) (3, 12) + NAME 'pass' (3, 13) (3, 17) + NEWLINE '\\n' (3, 17) (3, 18) + ASYNC 'async' (4, 2) (4, 7) + NAME 'def' (4, 8) (4, 11) + NAME 'bar' (4, 12) (4, 15) + OP '(' (4, 15) (4, 16) + OP ')' (4, 16) (4, 17) + OP ':' (4, 17) (4, 18) + NAME 'pass' (4, 19) (4, 23) + NEWLINE '\\n' (4, 23) (4, 24) + NL '\\n' (5, 0) (5, 1) + AWAIT 'await' (6, 2) (6, 7) + OP '=' (6, 8) (6, 9) + NUMBER '2' (6, 10) (6, 11) + DEDENT '' (7, 0) (7, 0) """ from test import support diff --git a/Lib/tokenize.py b/Lib/tokenize.py index c3efdda..65d06e5 100644 --- a/Lib/tokenize.py +++ b/Lib/tokenize.py @@ -498,10 +498,11 @@ def _tokenize(readline, encoding): contline = None indents = [0] - # 'stashed' and 'ctx' are used for async/await parsing + # 'stashed' and 'async_*' are used for async/await parsing stashed = None - ctx = [('sync', 0)] - in_async = 0 + async_def = False + async_def_indent = 0 + async_def_nl = False if encoding is not None: if encoding == "utf-8-sig": @@ -579,15 +580,18 @@ def _tokenize(readline, encoding): ("<tokenize>", lnum, pos, line)) indents = indents[:-1] - cur_indent = indents[-1] - while len(ctx) > 1 and ctx[-1][1] >= cur_indent: - if ctx[-1][0] == 'async': - in_async -= 1 - assert in_async >= 0 - ctx.pop() + if async_def and async_def_indent >= indents[-1]: + async_def = False + async_def_nl = False + async_def_indent = 0 yield TokenInfo(DEDENT, '', (lnum, pos), (lnum, pos), line) + if async_def and async_def_nl and async_def_indent >= indents[-1]: + async_def = False + async_def_nl = False + async_def_indent = 0 + else: # continued statement if not line: raise TokenError("EOF in multi-line statement", (lnum, 0)) @@ -609,8 +613,13 @@ def _tokenize(readline, encoding): if stashed: yield stashed stashed = None - yield TokenInfo(NL if parenlev > 0 else NEWLINE, - token, spos, epos, line) + if parenlev > 0: + yield TokenInfo(NL, token, spos, epos, line) + else: + yield TokenInfo(NEWLINE, token, spos, epos, line) + if async_def: + async_def_nl = True + elif initial == '#': assert not token.endswith("\n") if stashed: @@ -644,7 +653,7 @@ def _tokenize(readline, encoding): yield TokenInfo(STRING, token, spos, epos, line) elif initial.isidentifier(): # ordinary name if token in ('async', 'await'): - if in_async: + if async_def: yield TokenInfo( ASYNC if token == 'async' else AWAIT, token, spos, epos, line) @@ -660,15 +669,13 @@ def _tokenize(readline, encoding): and stashed.type == NAME and stashed.string == 'async'): - ctx.append(('async', indents[-1])) - in_async += 1 + async_def = True + async_def_indent = indents[-1] yield TokenInfo(ASYNC, stashed.string, stashed.start, stashed.end, stashed.line) stashed = None - else: - ctx.append(('sync', indents[-1])) if stashed: yield stashed |