summaryrefslogtreecommitdiffstats
path: root/Lib/tokenize.py
diff options
context:
space:
mode:
authorYury Selivanov <yselivanov@sprymix.com>2015-07-23 12:01:58 (GMT)
committerYury Selivanov <yselivanov@sprymix.com>2015-07-23 12:01:58 (GMT)
commit96ec934e755355cfc5af036db8641646b7ddb45e (patch)
treea6fd6a4cbef1b75ab0cc10db01fd91ecf2e99976 /Lib/tokenize.py
parentf315c1c01676bfabb5b1c6628642668f1ef436a6 (diff)
downloadcpython-96ec934e755355cfc5af036db8641646b7ddb45e.zip
cpython-96ec934e755355cfc5af036db8641646b7ddb45e.tar.gz
cpython-96ec934e755355cfc5af036db8641646b7ddb45e.tar.bz2
Issue #24619: Simplify async/await tokenization.
This commit simplifies async/await tokenization in tokenizer.c, tokenize.py & lib2to3/tokenize.py. Previous solution was to keep a stack of async-def & def blocks, whereas the new approach is just to remember position of the outermost async-def block. This change won't bring any parsing performance improvements, but it makes the code much easier to read and validate.
Diffstat (limited to 'Lib/tokenize.py')
-rw-r--r--Lib/tokenize.py39
1 files changed, 23 insertions, 16 deletions
diff --git a/Lib/tokenize.py b/Lib/tokenize.py
index c3efdda..65d06e5 100644
--- a/Lib/tokenize.py
+++ b/Lib/tokenize.py
@@ -498,10 +498,11 @@ def _tokenize(readline, encoding):
contline = None
indents = [0]
- # 'stashed' and 'ctx' are used for async/await parsing
+ # 'stashed' and 'async_*' are used for async/await parsing
stashed = None
- ctx = [('sync', 0)]
- in_async = 0
+ async_def = False
+ async_def_indent = 0
+ async_def_nl = False
if encoding is not None:
if encoding == "utf-8-sig":
@@ -579,15 +580,18 @@ def _tokenize(readline, encoding):
("<tokenize>", lnum, pos, line))
indents = indents[:-1]
- cur_indent = indents[-1]
- while len(ctx) > 1 and ctx[-1][1] >= cur_indent:
- if ctx[-1][0] == 'async':
- in_async -= 1
- assert in_async >= 0
- ctx.pop()
+ if async_def and async_def_indent >= indents[-1]:
+ async_def = False
+ async_def_nl = False
+ async_def_indent = 0
yield TokenInfo(DEDENT, '', (lnum, pos), (lnum, pos), line)
+ if async_def and async_def_nl and async_def_indent >= indents[-1]:
+ async_def = False
+ async_def_nl = False
+ async_def_indent = 0
+
else: # continued statement
if not line:
raise TokenError("EOF in multi-line statement", (lnum, 0))
@@ -609,8 +613,13 @@ def _tokenize(readline, encoding):
if stashed:
yield stashed
stashed = None
- yield TokenInfo(NL if parenlev > 0 else NEWLINE,
- token, spos, epos, line)
+ if parenlev > 0:
+ yield TokenInfo(NL, token, spos, epos, line)
+ else:
+ yield TokenInfo(NEWLINE, token, spos, epos, line)
+ if async_def:
+ async_def_nl = True
+
elif initial == '#':
assert not token.endswith("\n")
if stashed:
@@ -644,7 +653,7 @@ def _tokenize(readline, encoding):
yield TokenInfo(STRING, token, spos, epos, line)
elif initial.isidentifier(): # ordinary name
if token in ('async', 'await'):
- if in_async:
+ if async_def:
yield TokenInfo(
ASYNC if token == 'async' else AWAIT,
token, spos, epos, line)
@@ -660,15 +669,13 @@ def _tokenize(readline, encoding):
and stashed.type == NAME
and stashed.string == 'async'):
- ctx.append(('async', indents[-1]))
- in_async += 1
+ async_def = True
+ async_def_indent = indents[-1]
yield TokenInfo(ASYNC, stashed.string,
stashed.start, stashed.end,
stashed.line)
stashed = None
- else:
- ctx.append(('sync', indents[-1]))
if stashed:
yield stashed