diff options
author | Yury Selivanov <yselivanov@sprymix.com> | 2015-07-22 10:33:45 (GMT) |
---|---|---|
committer | Yury Selivanov <yselivanov@sprymix.com> | 2015-07-22 10:33:45 (GMT) |
commit | 8fb307cd650511ba019c4493275cb6684ad308bc (patch) | |
tree | dc1138644436a3e2c0592f096c6b8d0e47aec5ef /Lib | |
parent | 80acc3ebbc4c81f9c1bff864eca076d6bdbe9ec6 (diff) | |
download | cpython-8fb307cd650511ba019c4493275cb6684ad308bc.zip cpython-8fb307cd650511ba019c4493275cb6684ad308bc.tar.gz cpython-8fb307cd650511ba019c4493275cb6684ad308bc.tar.bz2 |
Issue #24619: New approach for tokenizing async/await.
This commit fixes how one-line async-defs and defs are tracked
by tokenizer. It allows to correctly parse invalid code such
as:
>>> async def f():
... def g(): pass
... async = 10
and valid code such as:
>>> async def f():
... async def g(): pass
... await z
As a consequence, is is now possible to have one-line
'async def foo(): await ..' functions:
>>> async def foo(): return await bar()
Diffstat (limited to 'Lib')
-rw-r--r-- | Lib/lib2to3/pgen2/tokenize.py | 12 | ||||
-rw-r--r-- | Lib/test/badsyntax_async1.py | 5 | ||||
-rw-r--r-- | Lib/test/badsyntax_async2.py | 5 | ||||
-rw-r--r-- | Lib/test/badsyntax_async4.py | 2 | ||||
-rw-r--r-- | Lib/test/badsyntax_async9.py | 2 | ||||
-rw-r--r-- | Lib/test/test_coroutines.py | 226 | ||||
-rw-r--r-- | Lib/test/test_grammar.py | 5 | ||||
-rw-r--r-- | Lib/test/test_tokenize.py | 15 | ||||
-rw-r--r-- | Lib/tokenize.py | 7 |
9 files changed, 254 insertions, 25 deletions
diff --git a/Lib/lib2to3/pgen2/tokenize.py b/Lib/lib2to3/pgen2/tokenize.py index 690fec4..896b0fa 100644 --- a/Lib/lib2to3/pgen2/tokenize.py +++ b/Lib/lib2to3/pgen2/tokenize.py @@ -369,6 +369,7 @@ def generate_tokens(readline): # 'stashed' and 'ctx' are used for async/await parsing stashed = None ctx = [('sync', 0)] + in_async = 0 while 1: # loop over lines in stream try: @@ -436,6 +437,14 @@ def generate_tokens(readline): "unindent does not match any outer indentation level", ("<tokenize>", lnum, pos, line)) indents = indents[:-1] + + cur_indent = indents[-1] + while len(ctx) > 1 and ctx[-1][1] >= cur_indent: + if ctx[-1][0] == 'async': + in_async -= 1 + assert in_async >= 0 + ctx.pop() + yield (DEDENT, '', (lnum, pos), (lnum, pos), line) else: # continued statement @@ -499,7 +508,7 @@ def generate_tokens(readline): yield (STRING, token, spos, epos, line) elif initial in namechars: # ordinary name if token in ('async', 'await'): - if ctx[-1][0] == 'async' and ctx[-1][1] < indents[-1]: + if in_async: yield (ASYNC if token == 'async' else AWAIT, token, spos, epos, line) continue @@ -515,6 +524,7 @@ def generate_tokens(readline): and stashed[1] == 'async'): ctx.append(('async', indents[-1])) + in_async += 1 yield (ASYNC, stashed[1], stashed[2], stashed[3], diff --git a/Lib/test/badsyntax_async1.py b/Lib/test/badsyntax_async1.py index 970445d..fb85e29 100644 --- a/Lib/test/badsyntax_async1.py +++ b/Lib/test/badsyntax_async1.py @@ -1,3 +1,2 @@ -async def foo(): - def foo(a=await something()): - pass +async def foo(a=await something()): + pass diff --git a/Lib/test/badsyntax_async2.py b/Lib/test/badsyntax_async2.py index 1e62a3e..6f6f4f5 100644 --- a/Lib/test/badsyntax_async2.py +++ b/Lib/test/badsyntax_async2.py @@ -1,3 +1,2 @@ -async def foo(): - def foo(a:await something()): - pass +async def foo(a:await something()): + pass diff --git a/Lib/test/badsyntax_async4.py b/Lib/test/badsyntax_async4.py index 4afda40..d033b28 100644 --- a/Lib/test/badsyntax_async4.py +++ b/Lib/test/badsyntax_async4.py @@ -1,2 +1,2 @@ async def foo(): - async def foo(): await something() + await diff --git a/Lib/test/badsyntax_async9.py b/Lib/test/badsyntax_async9.py deleted file mode 100644 index d033b28..0000000 --- a/Lib/test/badsyntax_async9.py +++ /dev/null @@ -1,2 +0,0 @@ -async def foo(): - await diff --git a/Lib/test/test_coroutines.py b/Lib/test/test_coroutines.py index 9d97123..3ba2f23 100644 --- a/Lib/test/test_coroutines.py +++ b/Lib/test/test_coroutines.py @@ -67,11 +67,11 @@ def silence_coro_gc(): class AsyncBadSyntaxTest(unittest.TestCase): def test_badsyntax_1(self): - with self.assertRaisesRegex(SyntaxError, 'invalid syntax'): + with self.assertRaisesRegex(SyntaxError, "'await' outside"): import test.badsyntax_async1 def test_badsyntax_2(self): - with self.assertRaisesRegex(SyntaxError, 'invalid syntax'): + with self.assertRaisesRegex(SyntaxError, "'await' outside"): import test.badsyntax_async2 def test_badsyntax_3(self): @@ -103,10 +103,6 @@ class AsyncBadSyntaxTest(unittest.TestCase): import test.badsyntax_async8 def test_badsyntax_9(self): - with self.assertRaisesRegex(SyntaxError, 'invalid syntax'): - import test.badsyntax_async9 - - def test_badsyntax_10(self): ns = {} for comp in {'(await a for a in b)', '[await a for a in b]', @@ -116,6 +112,221 @@ class AsyncBadSyntaxTest(unittest.TestCase): with self.assertRaisesRegex(SyntaxError, 'await.*in comprehen'): exec('async def f():\n\t{}'.format(comp), ns, ns) + def test_badsyntax_10(self): + # Tests for issue 24619 + + samples = [ + """async def foo(): + def bar(): pass + await = 1 + """, + + """async def foo(): + + def bar(): pass + await = 1 + """, + + """async def foo(): + def bar(): pass + if 1: + await = 1 + """, + + """def foo(): + async def bar(): pass + if 1: + await a + """, + + """def foo(): + async def bar(): pass + await a + """, + + """def foo(): + def baz(): pass + async def bar(): pass + await a + """, + + """def foo(): + def baz(): pass + # 456 + async def bar(): pass + # 123 + await a + """, + + """async def foo(): + def baz(): pass + # 456 + async def bar(): pass + # 123 + await = 2 + """, + + """def foo(): + + def baz(): pass + + async def bar(): pass + + await a + """, + + """async def foo(): + + def baz(): pass + + async def bar(): pass + + await = 2 + """, + + """async def foo(): + def async(): pass + """, + + """async def foo(): + def await(): pass + """, + + """async def foo(): + def bar(): + await + """, + + """async def foo(): + return lambda async: await + """, + + """async def foo(): + return lambda a: await + """, + + """async def foo(a: await b): + pass + """, + + """def baz(): + async def foo(a: await b): + pass + """, + + """async def foo(async): + pass + """, + + """async def foo(): + def bar(): + def baz(): + async = 1 + """, + + """async def foo(): + def bar(): + def baz(): + pass + async = 1 + """, + + """def foo(): + async def bar(): + + async def baz(): + pass + + def baz(): + 42 + + async = 1 + """, + + """async def foo(): + def bar(): + def baz(): + pass\nawait foo() + """, + + """def foo(): + def bar(): + async def baz(): + pass\nawait foo() + """, + + """async def foo(await): + pass + """, + + """def foo(): + + async def bar(): pass + + await a + """, + + """def foo(): + async def bar(): + pass\nawait a + """] + + ns = {} + for code in samples: + with self.subTest(code=code), self.assertRaises(SyntaxError): + exec(code, ns, ns) + + def test_goodsyntax_1(self): + # Tests for issue 24619 + + def foo(await): + async def foo(): pass + async def foo(): + pass + return await + 1 + self.assertEqual(foo(10), 11) + + def foo(await): + async def foo(): pass + async def foo(): pass + return await + 2 + self.assertEqual(foo(20), 22) + + def foo(await): + + async def foo(): pass + + async def foo(): pass + + return await + 2 + self.assertEqual(foo(20), 22) + + def foo(await): + """spam""" + async def foo(): \ + pass + # 123 + async def foo(): pass + # 456 + return await + 2 + self.assertEqual(foo(20), 22) + + def foo(await): + def foo(): pass + def foo(): pass + async def bar(): return await_ + await_ = await + try: + bar().send(None) + except StopIteration as ex: + return ex.args[0] + self.assertEqual(foo(42), 42) + + async def f(): + async def g(): pass + await z + self.assertTrue(inspect.iscoroutinefunction(f)) + class TokenizerRegrTest(unittest.TestCase): @@ -461,8 +672,7 @@ class CoroutineTest(unittest.TestCase): class Awaitable: pass - async def foo(): - return (await Awaitable()) + async def foo(): return await Awaitable() with self.assertRaisesRegex( TypeError, "object Awaitable can't be used in 'await' expression"): diff --git a/Lib/test/test_grammar.py b/Lib/test/test_grammar.py index 2af7390..ca6b5d0 100644 --- a/Lib/test/test_grammar.py +++ b/Lib/test/test_grammar.py @@ -1051,10 +1051,7 @@ class GrammarTests(unittest.TestCase): async def test(): def sum(): - async = 1 - await = 41 - return async + await - + pass if 1: await someobj() diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py index 42fc78f..e320562 100644 --- a/Lib/test/test_tokenize.py +++ b/Lib/test/test_tokenize.py @@ -786,12 +786,12 @@ Async/await extension: NAME 'def' (2, 2) (2, 5) NAME 'foo' (2, 6) (2, 9) OP '(' (2, 9) (2, 10) - NAME 'await' (2, 10) (2, 15) + AWAIT 'await' (2, 10) (2, 15) OP ')' (2, 15) (2, 16) OP ':' (2, 16) (2, 17) NEWLINE '\\n' (2, 17) (2, 18) INDENT ' ' (3, 0) (3, 4) - NAME 'await' (3, 4) (3, 9) + AWAIT 'await' (3, 4) (3, 9) OP '=' (3, 10) (3, 11) NUMBER '1' (3, 12) (3, 13) NEWLINE '\\n' (3, 13) (3, 14) @@ -829,6 +829,17 @@ Async/await extension: OP ':' (2, 18) (2, 19) NAME 'pass' (2, 20) (2, 24) DEDENT '' (3, 0) (3, 0) + + >>> dump_tokens('''async def foo(async): await''') + ENCODING 'utf-8' (0, 0) (0, 0) + ASYNC 'async' (1, 0) (1, 5) + NAME 'def' (1, 6) (1, 9) + NAME 'foo' (1, 10) (1, 13) + OP '(' (1, 13) (1, 14) + ASYNC 'async' (1, 14) (1, 19) + OP ')' (1, 19) (1, 20) + OP ':' (1, 20) (1, 21) + AWAIT 'await' (1, 22) (1, 27) """ from test import support diff --git a/Lib/tokenize.py b/Lib/tokenize.py index 3ec9018..c3efdda 100644 --- a/Lib/tokenize.py +++ b/Lib/tokenize.py @@ -501,6 +501,7 @@ def _tokenize(readline, encoding): # 'stashed' and 'ctx' are used for async/await parsing stashed = None ctx = [('sync', 0)] + in_async = 0 if encoding is not None: if encoding == "utf-8-sig": @@ -580,6 +581,9 @@ def _tokenize(readline, encoding): cur_indent = indents[-1] while len(ctx) > 1 and ctx[-1][1] >= cur_indent: + if ctx[-1][0] == 'async': + in_async -= 1 + assert in_async >= 0 ctx.pop() yield TokenInfo(DEDENT, '', (lnum, pos), (lnum, pos), line) @@ -640,7 +644,7 @@ def _tokenize(readline, encoding): yield TokenInfo(STRING, token, spos, epos, line) elif initial.isidentifier(): # ordinary name if token in ('async', 'await'): - if ctx[-1][0] == 'async' and ctx[-1][1] < indents[-1]: + if in_async: yield TokenInfo( ASYNC if token == 'async' else AWAIT, token, spos, epos, line) @@ -657,6 +661,7 @@ def _tokenize(readline, encoding): and stashed.string == 'async'): ctx.append(('async', indents[-1])) + in_async += 1 yield TokenInfo(ASYNC, stashed.string, stashed.start, stashed.end, |