summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Doc/reference/compound_stmts.rst4
-rw-r--r--Lib/lib2to3/pgen2/tokenize.py12
-rw-r--r--Lib/test/badsyntax_async1.py5
-rw-r--r--Lib/test/badsyntax_async2.py5
-rw-r--r--Lib/test/badsyntax_async4.py2
-rw-r--r--Lib/test/badsyntax_async9.py2
-rw-r--r--Lib/test/test_coroutines.py226
-rw-r--r--Lib/test/test_grammar.py5
-rw-r--r--Lib/test/test_tokenize.py15
-rw-r--r--Lib/tokenize.py7
-rw-r--r--Misc/NEWS3
-rw-r--r--Parser/tokenizer.c105
-rw-r--r--Parser/tokenizer.h21
13 files changed, 343 insertions, 69 deletions
diff --git a/Doc/reference/compound_stmts.rst b/Doc/reference/compound_stmts.rst
index 76b3850..71f240f 100644
--- a/Doc/reference/compound_stmts.rst
+++ b/Doc/reference/compound_stmts.rst
@@ -685,9 +685,7 @@ Execution of Python coroutines can be suspended and resumed at many points
(see :term:`coroutine`). In the body of a coroutine, any ``await`` and
``async`` identifiers become reserved keywords; :keyword:`await` expressions,
:keyword:`async for` and :keyword:`async with` can only be used in
-coroutine bodies. However, to simplify the parser, these keywords cannot
-be used on the same line as a function or coroutine (:keyword:`def`
-statement) header.
+coroutine bodies.
Functions defined with ``async def`` syntax are always coroutine functions,
even if they do not contain ``await`` or ``async`` keywords.
diff --git a/Lib/lib2to3/pgen2/tokenize.py b/Lib/lib2to3/pgen2/tokenize.py
index 690fec4..896b0fa 100644
--- a/Lib/lib2to3/pgen2/tokenize.py
+++ b/Lib/lib2to3/pgen2/tokenize.py
@@ -369,6 +369,7 @@ def generate_tokens(readline):
# 'stashed' and 'ctx' are used for async/await parsing
stashed = None
ctx = [('sync', 0)]
+ in_async = 0
while 1: # loop over lines in stream
try:
@@ -436,6 +437,14 @@ def generate_tokens(readline):
"unindent does not match any outer indentation level",
("<tokenize>", lnum, pos, line))
indents = indents[:-1]
+
+ cur_indent = indents[-1]
+ while len(ctx) > 1 and ctx[-1][1] >= cur_indent:
+ if ctx[-1][0] == 'async':
+ in_async -= 1
+ assert in_async >= 0
+ ctx.pop()
+
yield (DEDENT, '', (lnum, pos), (lnum, pos), line)
else: # continued statement
@@ -499,7 +508,7 @@ def generate_tokens(readline):
yield (STRING, token, spos, epos, line)
elif initial in namechars: # ordinary name
if token in ('async', 'await'):
- if ctx[-1][0] == 'async' and ctx[-1][1] < indents[-1]:
+ if in_async:
yield (ASYNC if token == 'async' else AWAIT,
token, spos, epos, line)
continue
@@ -515,6 +524,7 @@ def generate_tokens(readline):
and stashed[1] == 'async'):
ctx.append(('async', indents[-1]))
+ in_async += 1
yield (ASYNC, stashed[1],
stashed[2], stashed[3],
diff --git a/Lib/test/badsyntax_async1.py b/Lib/test/badsyntax_async1.py
index 970445d..fb85e29 100644
--- a/Lib/test/badsyntax_async1.py
+++ b/Lib/test/badsyntax_async1.py
@@ -1,3 +1,2 @@
-async def foo():
- def foo(a=await something()):
- pass
+async def foo(a=await something()):
+ pass
diff --git a/Lib/test/badsyntax_async2.py b/Lib/test/badsyntax_async2.py
index 1e62a3e..6f6f4f5 100644
--- a/Lib/test/badsyntax_async2.py
+++ b/Lib/test/badsyntax_async2.py
@@ -1,3 +1,2 @@
-async def foo():
- def foo(a:await something()):
- pass
+async def foo(a:await something()):
+ pass
diff --git a/Lib/test/badsyntax_async4.py b/Lib/test/badsyntax_async4.py
index 4afda40..d033b28 100644
--- a/Lib/test/badsyntax_async4.py
+++ b/Lib/test/badsyntax_async4.py
@@ -1,2 +1,2 @@
async def foo():
- async def foo(): await something()
+ await
diff --git a/Lib/test/badsyntax_async9.py b/Lib/test/badsyntax_async9.py
deleted file mode 100644
index d033b28..0000000
--- a/Lib/test/badsyntax_async9.py
+++ /dev/null
@@ -1,2 +0,0 @@
-async def foo():
- await
diff --git a/Lib/test/test_coroutines.py b/Lib/test/test_coroutines.py
index 9d97123..3ba2f23 100644
--- a/Lib/test/test_coroutines.py
+++ b/Lib/test/test_coroutines.py
@@ -67,11 +67,11 @@ def silence_coro_gc():
class AsyncBadSyntaxTest(unittest.TestCase):
def test_badsyntax_1(self):
- with self.assertRaisesRegex(SyntaxError, 'invalid syntax'):
+ with self.assertRaisesRegex(SyntaxError, "'await' outside"):
import test.badsyntax_async1
def test_badsyntax_2(self):
- with self.assertRaisesRegex(SyntaxError, 'invalid syntax'):
+ with self.assertRaisesRegex(SyntaxError, "'await' outside"):
import test.badsyntax_async2
def test_badsyntax_3(self):
@@ -103,10 +103,6 @@ class AsyncBadSyntaxTest(unittest.TestCase):
import test.badsyntax_async8
def test_badsyntax_9(self):
- with self.assertRaisesRegex(SyntaxError, 'invalid syntax'):
- import test.badsyntax_async9
-
- def test_badsyntax_10(self):
ns = {}
for comp in {'(await a for a in b)',
'[await a for a in b]',
@@ -116,6 +112,221 @@ class AsyncBadSyntaxTest(unittest.TestCase):
with self.assertRaisesRegex(SyntaxError, 'await.*in comprehen'):
exec('async def f():\n\t{}'.format(comp), ns, ns)
+ def test_badsyntax_10(self):
+ # Tests for issue 24619
+
+ samples = [
+ """async def foo():
+ def bar(): pass
+ await = 1
+ """,
+
+ """async def foo():
+
+ def bar(): pass
+ await = 1
+ """,
+
+ """async def foo():
+ def bar(): pass
+ if 1:
+ await = 1
+ """,
+
+ """def foo():
+ async def bar(): pass
+ if 1:
+ await a
+ """,
+
+ """def foo():
+ async def bar(): pass
+ await a
+ """,
+
+ """def foo():
+ def baz(): pass
+ async def bar(): pass
+ await a
+ """,
+
+ """def foo():
+ def baz(): pass
+ # 456
+ async def bar(): pass
+ # 123
+ await a
+ """,
+
+ """async def foo():
+ def baz(): pass
+ # 456
+ async def bar(): pass
+ # 123
+ await = 2
+ """,
+
+ """def foo():
+
+ def baz(): pass
+
+ async def bar(): pass
+
+ await a
+ """,
+
+ """async def foo():
+
+ def baz(): pass
+
+ async def bar(): pass
+
+ await = 2
+ """,
+
+ """async def foo():
+ def async(): pass
+ """,
+
+ """async def foo():
+ def await(): pass
+ """,
+
+ """async def foo():
+ def bar():
+ await
+ """,
+
+ """async def foo():
+ return lambda async: await
+ """,
+
+ """async def foo():
+ return lambda a: await
+ """,
+
+ """async def foo(a: await b):
+ pass
+ """,
+
+ """def baz():
+ async def foo(a: await b):
+ pass
+ """,
+
+ """async def foo(async):
+ pass
+ """,
+
+ """async def foo():
+ def bar():
+ def baz():
+ async = 1
+ """,
+
+ """async def foo():
+ def bar():
+ def baz():
+ pass
+ async = 1
+ """,
+
+ """def foo():
+ async def bar():
+
+ async def baz():
+ pass
+
+ def baz():
+ 42
+
+ async = 1
+ """,
+
+ """async def foo():
+ def bar():
+ def baz():
+ pass\nawait foo()
+ """,
+
+ """def foo():
+ def bar():
+ async def baz():
+ pass\nawait foo()
+ """,
+
+ """async def foo(await):
+ pass
+ """,
+
+ """def foo():
+
+ async def bar(): pass
+
+ await a
+ """,
+
+ """def foo():
+ async def bar():
+ pass\nawait a
+ """]
+
+ ns = {}
+ for code in samples:
+ with self.subTest(code=code), self.assertRaises(SyntaxError):
+ exec(code, ns, ns)
+
+ def test_goodsyntax_1(self):
+ # Tests for issue 24619
+
+ def foo(await):
+ async def foo(): pass
+ async def foo():
+ pass
+ return await + 1
+ self.assertEqual(foo(10), 11)
+
+ def foo(await):
+ async def foo(): pass
+ async def foo(): pass
+ return await + 2
+ self.assertEqual(foo(20), 22)
+
+ def foo(await):
+
+ async def foo(): pass
+
+ async def foo(): pass
+
+ return await + 2
+ self.assertEqual(foo(20), 22)
+
+ def foo(await):
+ """spam"""
+ async def foo(): \
+ pass
+ # 123
+ async def foo(): pass
+ # 456
+ return await + 2
+ self.assertEqual(foo(20), 22)
+
+ def foo(await):
+ def foo(): pass
+ def foo(): pass
+ async def bar(): return await_
+ await_ = await
+ try:
+ bar().send(None)
+ except StopIteration as ex:
+ return ex.args[0]
+ self.assertEqual(foo(42), 42)
+
+ async def f():
+ async def g(): pass
+ await z
+ self.assertTrue(inspect.iscoroutinefunction(f))
+
class TokenizerRegrTest(unittest.TestCase):
@@ -461,8 +672,7 @@ class CoroutineTest(unittest.TestCase):
class Awaitable:
pass
- async def foo():
- return (await Awaitable())
+ async def foo(): return await Awaitable()
with self.assertRaisesRegex(
TypeError, "object Awaitable can't be used in 'await' expression"):
diff --git a/Lib/test/test_grammar.py b/Lib/test/test_grammar.py
index 2af7390..ca6b5d0 100644
--- a/Lib/test/test_grammar.py
+++ b/Lib/test/test_grammar.py
@@ -1051,10 +1051,7 @@ class GrammarTests(unittest.TestCase):
async def test():
def sum():
- async = 1
- await = 41
- return async + await
-
+ pass
if 1:
await someobj()
diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py
index 42fc78f..e320562 100644
--- a/Lib/test/test_tokenize.py
+++ b/Lib/test/test_tokenize.py
@@ -786,12 +786,12 @@ Async/await extension:
NAME 'def' (2, 2) (2, 5)
NAME 'foo' (2, 6) (2, 9)
OP '(' (2, 9) (2, 10)
- NAME 'await' (2, 10) (2, 15)
+ AWAIT 'await' (2, 10) (2, 15)
OP ')' (2, 15) (2, 16)
OP ':' (2, 16) (2, 17)
NEWLINE '\\n' (2, 17) (2, 18)
INDENT ' ' (3, 0) (3, 4)
- NAME 'await' (3, 4) (3, 9)
+ AWAIT 'await' (3, 4) (3, 9)
OP '=' (3, 10) (3, 11)
NUMBER '1' (3, 12) (3, 13)
NEWLINE '\\n' (3, 13) (3, 14)
@@ -829,6 +829,17 @@ Async/await extension:
OP ':' (2, 18) (2, 19)
NAME 'pass' (2, 20) (2, 24)
DEDENT '' (3, 0) (3, 0)
+
+ >>> dump_tokens('''async def foo(async): await''')
+ ENCODING 'utf-8' (0, 0) (0, 0)
+ ASYNC 'async' (1, 0) (1, 5)
+ NAME 'def' (1, 6) (1, 9)
+ NAME 'foo' (1, 10) (1, 13)
+ OP '(' (1, 13) (1, 14)
+ ASYNC 'async' (1, 14) (1, 19)
+ OP ')' (1, 19) (1, 20)
+ OP ':' (1, 20) (1, 21)
+ AWAIT 'await' (1, 22) (1, 27)
"""
from test import support
diff --git a/Lib/tokenize.py b/Lib/tokenize.py
index 3ec9018..c3efdda 100644
--- a/Lib/tokenize.py
+++ b/Lib/tokenize.py
@@ -501,6 +501,7 @@ def _tokenize(readline, encoding):
# 'stashed' and 'ctx' are used for async/await parsing
stashed = None
ctx = [('sync', 0)]
+ in_async = 0
if encoding is not None:
if encoding == "utf-8-sig":
@@ -580,6 +581,9 @@ def _tokenize(readline, encoding):
cur_indent = indents[-1]
while len(ctx) > 1 and ctx[-1][1] >= cur_indent:
+ if ctx[-1][0] == 'async':
+ in_async -= 1
+ assert in_async >= 0
ctx.pop()
yield TokenInfo(DEDENT, '', (lnum, pos), (lnum, pos), line)
@@ -640,7 +644,7 @@ def _tokenize(readline, encoding):
yield TokenInfo(STRING, token, spos, epos, line)
elif initial.isidentifier(): # ordinary name
if token in ('async', 'await'):
- if ctx[-1][0] == 'async' and ctx[-1][1] < indents[-1]:
+ if in_async:
yield TokenInfo(
ASYNC if token == 'async' else AWAIT,
token, spos, epos, line)
@@ -657,6 +661,7 @@ def _tokenize(readline, encoding):
and stashed.string == 'async'):
ctx.append(('async', indents[-1]))
+ in_async += 1
yield TokenInfo(ASYNC, stashed.string,
stashed.start, stashed.end,
diff --git a/Misc/NEWS b/Misc/NEWS
index f69138e..ba27a55 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -19,6 +19,9 @@ Core and Builtins
- Issue #24407: Fix crash when dict is mutated while being updated.
+- Issue #24619: New approach for tokenizing async/await. As a consequence,
+ is is now possible to have one-line 'async def foo(): await ..' functions.
+
Library
-------
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
index d4476ae..46c0580 100644
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -31,6 +31,12 @@
|| c == '_'\
|| (c >= 128))
+/* The following DEFTYPE* flags are used in 'tok_state->deftypestack',
+ and should be removed in 3.7, when async/await are regular
+ keywords. */
+#define DEFTYPE_ASYNC 1
+#define DEFTYPE_HAS_NL 2
+
extern char *PyOS_Readline(FILE *, FILE *, const char *);
/* Return malloc'ed string including trailing \n;
empty malloc'ed string for EOF;
@@ -130,6 +136,8 @@ tok_new(void)
tok->def = 0;
tok->defstack[0] = 0;
tok->deftypestack[0] = 0;
+ tok->def_async_behind = 0;
+ tok->def_in_async = 0;
tok->atbol = 1;
tok->pendin = 0;
@@ -1436,7 +1444,12 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
tok->pendin++;
while (tok->def && tok->defstack[tok->def] >= tok->indent) {
+ if (tok->deftypestack[tok->def] & DEFTYPE_ASYNC) {
+ tok->def_in_async--;
+ assert(tok->def_in_async >= 0);
+ }
tok->def--;
+ assert(tok->def >= 0);
}
return DEDENT;
@@ -1447,6 +1460,22 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
}
}
+ if (!blankline && tok->level == 0
+ && tok->def && tok->deftypestack[tok->def] & DEFTYPE_HAS_NL
+ && tok->defstack[tok->def] >= tok->indent)
+ {
+ /* The top function on the stack did have a NEWLINE
+ token, but didn't have an INDENT. That means that
+ it's a one-line function and it should now be removed
+ from the stack. */
+ if (tok->deftypestack[tok->def] & DEFTYPE_ASYNC) {
+ tok->def_in_async--;
+ assert(tok->def_in_async >= 0);
+ }
+ tok->def--;
+ assert(tok->def >= 0);
+ }
+
again:
tok->start = NULL;
/* Skip spaces */
@@ -1501,59 +1530,58 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
tok_len = tok->cur - tok->start;
if (tok_len == 3 && memcmp(tok->start, "def", 3) == 0) {
- if (tok->def && tok->deftypestack[tok->def] == 3) {
- tok->deftypestack[tok->def] = 2;
+ /* The current token is 'def'. */
+ if (tok->def + 1 >= MAXINDENT) {
+ tok->done = E_TOODEEP;
+ tok->cur = tok->inp;
+ return ERRORTOKEN;
}
- else if (tok->defstack[tok->def] < tok->indent) {
- /* We advance defs stack only when we see "def" *and*
- the indentation level was increased relative to the
- previous "def". */
- if (tok->def + 1 >= MAXINDENT) {
- tok->done = E_TOODEEP;
- tok->cur = tok->inp;
- return ERRORTOKEN;
- }
+ /* Advance defs stack. */
+ tok->def++;
+ tok->defstack[tok->def] = tok->indent;
- tok->def++;
- tok->defstack[tok->def] = tok->indent;
- tok->deftypestack[tok->def] = 1;
+ if (tok->def_async_behind) {
+ /* The previous token was 'async'. */
+ tok->def_async_behind = 0;
+ tok->deftypestack[tok->def] = DEFTYPE_ASYNC;
+ tok->def_in_async++;
+ }
+ else {
+ /* This is a regular function (not async def). */
+ tok->deftypestack[tok->def] = 0;
}
}
else if (tok_len == 5) {
if (memcmp(tok->start, "async", 5) == 0) {
+ /* The current token is 'async'. */
memcpy(&ahead_tok, tok, sizeof(ahead_tok));
+ /* Try to look ahead one token. */
ahead_tok_kind = tok_get(&ahead_tok, &ahead_tok_start,
&ahead_top_end);
- if (ahead_tok_kind == NAME &&
- ahead_tok.cur - ahead_tok.start == 3 &&
- memcmp(ahead_tok.start, "def", 3) == 0) {
-
- if (tok->def + 1 >= MAXINDENT) {
- tok->done = E_TOODEEP;
- tok->cur = tok->inp;
- return ERRORTOKEN;
- }
-
- tok->def++;
- tok->defstack[tok->def] = tok->indent;
- tok->deftypestack[tok->def] = 3;
-
+ if (ahead_tok_kind == NAME
+ && ahead_tok.cur - ahead_tok.start == 3
+ && memcmp(ahead_tok.start, "def", 3) == 0)
+ {
+ /* The next token is going to be 'def', so instead of
+ returning 'async' NAME token, we return ASYNC. */
+ tok->def_async_behind = 1;
return ASYNC;
}
- else if (tok->def && tok->deftypestack[tok->def] == 2
- && tok->defstack[tok->def] < tok->indent) {
-
+ else if (tok->def_in_async)
+ {
+ /* We're inside an 'async def' function, so we treat
+ 'async' token as ASYNC, instead of NAME. */
return ASYNC;
}
}
- else if (memcmp(tok->start, "await", 5) == 0
- && tok->def && tok->deftypestack[tok->def] == 2
- && tok->defstack[tok->def] < tok->indent) {
-
+ else if (memcmp(tok->start, "await", 5) == 0 && tok->def_in_async)
+ {
+ /* We're inside an 'async def' function, so we treat
+ 'await' token as AWAIT, instead of NAME. */
return AWAIT;
}
}
@@ -1569,6 +1597,13 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
*p_start = tok->start;
*p_end = tok->cur - 1; /* Leave '\n' out of the string */
tok->cont_line = 0;
+ if (tok->def) {
+ /* Mark the top function on the stack that it had
+ at least one NEWLINE. That will help us to
+ distinguish one-line functions from functions
+ with multiple statements. */
+ tok->deftypestack[tok->def] |= DEFTYPE_HAS_NL;
+ }
return NEWLINE;
}
diff --git a/Parser/tokenizer.h b/Parser/tokenizer.h
index 3bcdad6..e198a0b 100644
--- a/Parser/tokenizer.h
+++ b/Parser/tokenizer.h
@@ -66,12 +66,21 @@ struct tok_state {
const char* str;
const char* input; /* Tokenizer's newline translated copy of the string. */
- int defstack[MAXINDENT]; /* stack if funcs & indents where they
- were defined */
- int deftypestack[MAXINDENT]; /* stack of func types
- (0 not func; 1: "def name";
- 2: "async def name") */
- int def; /* Length of stack of func types */
+ /* `def*` fields are for parsing async/await in a backwards compatible
+ way. They should be removed in 3.7, when they will become
+ regular constants. See PEP 492 for more details. */
+ int defstack[MAXINDENT]; /* Stack of funcs & indents where they
+ were defined. */
+ int deftypestack[MAXINDENT]; /* Stack of func flags, see DEFTYPE_*
+ constants. */
+ int def; /* Length of stack of func types/flags. */
+ int def_async_behind; /* 1 if there was an 'async' token before
+ a 'def' token. */
+ int def_in_async; /* Counter of how deep 'async def's
+ are nested. If greater than 0,
+ we are somewhere in an 'async def'
+ body, so 'async' and 'await' should
+ be parsed as keywords.*/
};
extern struct tok_state *PyTokenizer_FromString(const char *, int);