diff options
author | Yury Selivanov <yselivanov@sprymix.com> | 2015-07-23 12:01:58 (GMT) |
---|---|---|
committer | Yury Selivanov <yselivanov@sprymix.com> | 2015-07-23 12:01:58 (GMT) |
commit | 96ec934e755355cfc5af036db8641646b7ddb45e (patch) | |
tree | a6fd6a4cbef1b75ab0cc10db01fd91ecf2e99976 /Lib/tokenize.py | |
parent | f315c1c01676bfabb5b1c6628642668f1ef436a6 (diff) | |
download | cpython-96ec934e755355cfc5af036db8641646b7ddb45e.zip cpython-96ec934e755355cfc5af036db8641646b7ddb45e.tar.gz cpython-96ec934e755355cfc5af036db8641646b7ddb45e.tar.bz2 |
Issue #24619: Simplify async/await tokenization.
This commit simplifies async/await tokenization in tokenizer.c,
tokenize.py & lib2to3/tokenize.py. Previous solution was to keep
a stack of async-def & def blocks, whereas the new approach is just
to remember position of the outermost async-def block.
This change won't bring any parsing performance improvements, but
it makes the code much easier to read and validate.
Diffstat (limited to 'Lib/tokenize.py')
-rw-r--r-- | Lib/tokenize.py | 39 |
1 files changed, 23 insertions, 16 deletions
diff --git a/Lib/tokenize.py b/Lib/tokenize.py index c3efdda..65d06e5 100644 --- a/Lib/tokenize.py +++ b/Lib/tokenize.py @@ -498,10 +498,11 @@ def _tokenize(readline, encoding): contline = None indents = [0] - # 'stashed' and 'ctx' are used for async/await parsing + # 'stashed' and 'async_*' are used for async/await parsing stashed = None - ctx = [('sync', 0)] - in_async = 0 + async_def = False + async_def_indent = 0 + async_def_nl = False if encoding is not None: if encoding == "utf-8-sig": @@ -579,15 +580,18 @@ def _tokenize(readline, encoding): ("<tokenize>", lnum, pos, line)) indents = indents[:-1] - cur_indent = indents[-1] - while len(ctx) > 1 and ctx[-1][1] >= cur_indent: - if ctx[-1][0] == 'async': - in_async -= 1 - assert in_async >= 0 - ctx.pop() + if async_def and async_def_indent >= indents[-1]: + async_def = False + async_def_nl = False + async_def_indent = 0 yield TokenInfo(DEDENT, '', (lnum, pos), (lnum, pos), line) + if async_def and async_def_nl and async_def_indent >= indents[-1]: + async_def = False + async_def_nl = False + async_def_indent = 0 + else: # continued statement if not line: raise TokenError("EOF in multi-line statement", (lnum, 0)) @@ -609,8 +613,13 @@ def _tokenize(readline, encoding): if stashed: yield stashed stashed = None - yield TokenInfo(NL if parenlev > 0 else NEWLINE, - token, spos, epos, line) + if parenlev > 0: + yield TokenInfo(NL, token, spos, epos, line) + else: + yield TokenInfo(NEWLINE, token, spos, epos, line) + if async_def: + async_def_nl = True + elif initial == '#': assert not token.endswith("\n") if stashed: @@ -644,7 +653,7 @@ def _tokenize(readline, encoding): yield TokenInfo(STRING, token, spos, epos, line) elif initial.isidentifier(): # ordinary name if token in ('async', 'await'): - if in_async: + if async_def: yield TokenInfo( ASYNC if token == 'async' else AWAIT, token, spos, epos, line) @@ -660,15 +669,13 @@ def _tokenize(readline, encoding): and stashed.type == NAME and stashed.string == 'async'): - ctx.append(('async', indents[-1])) - in_async += 1 + async_def = True + async_def_indent = indents[-1] yield TokenInfo(ASYNC, stashed.string, stashed.start, stashed.end, stashed.line) stashed = None - else: - ctx.append(('sync', indents[-1])) if stashed: yield stashed |