diff options
author | Yury Selivanov <yselivanov@sprymix.com> | 2015-07-22 10:33:45 (GMT) |
---|---|---|
committer | Yury Selivanov <yselivanov@sprymix.com> | 2015-07-22 10:33:45 (GMT) |
commit | 8fb307cd650511ba019c4493275cb6684ad308bc (patch) | |
tree | dc1138644436a3e2c0592f096c6b8d0e47aec5ef /Parser | |
parent | 80acc3ebbc4c81f9c1bff864eca076d6bdbe9ec6 (diff) | |
download | cpython-8fb307cd650511ba019c4493275cb6684ad308bc.zip cpython-8fb307cd650511ba019c4493275cb6684ad308bc.tar.gz cpython-8fb307cd650511ba019c4493275cb6684ad308bc.tar.bz2 |
Issue #24619: New approach for tokenizing async/await.
This commit fixes how one-line async-defs and defs are tracked
by tokenizer. It allows to correctly parse invalid code such
as:
>>> async def f():
... def g(): pass
... async = 10
and valid code such as:
>>> async def f():
... async def g(): pass
... await z
As a consequence, is is now possible to have one-line
'async def foo(): await ..' functions:
>>> async def foo(): return await bar()
Diffstat (limited to 'Parser')
-rw-r--r-- | Parser/tokenizer.c | 105 | ||||
-rw-r--r-- | Parser/tokenizer.h | 21 |
2 files changed, 85 insertions, 41 deletions
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index d4476ae..46c0580 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -31,6 +31,12 @@ || c == '_'\ || (c >= 128)) +/* The following DEFTYPE* flags are used in 'tok_state->deftypestack', + and should be removed in 3.7, when async/await are regular + keywords. */ +#define DEFTYPE_ASYNC 1 +#define DEFTYPE_HAS_NL 2 + extern char *PyOS_Readline(FILE *, FILE *, const char *); /* Return malloc'ed string including trailing \n; empty malloc'ed string for EOF; @@ -130,6 +136,8 @@ tok_new(void) tok->def = 0; tok->defstack[0] = 0; tok->deftypestack[0] = 0; + tok->def_async_behind = 0; + tok->def_in_async = 0; tok->atbol = 1; tok->pendin = 0; @@ -1436,7 +1444,12 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end) tok->pendin++; while (tok->def && tok->defstack[tok->def] >= tok->indent) { + if (tok->deftypestack[tok->def] & DEFTYPE_ASYNC) { + tok->def_in_async--; + assert(tok->def_in_async >= 0); + } tok->def--; + assert(tok->def >= 0); } return DEDENT; @@ -1447,6 +1460,22 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end) } } + if (!blankline && tok->level == 0 + && tok->def && tok->deftypestack[tok->def] & DEFTYPE_HAS_NL + && tok->defstack[tok->def] >= tok->indent) + { + /* The top function on the stack did have a NEWLINE + token, but didn't have an INDENT. That means that + it's a one-line function and it should now be removed + from the stack. */ + if (tok->deftypestack[tok->def] & DEFTYPE_ASYNC) { + tok->def_in_async--; + assert(tok->def_in_async >= 0); + } + tok->def--; + assert(tok->def >= 0); + } + again: tok->start = NULL; /* Skip spaces */ @@ -1501,59 +1530,58 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end) tok_len = tok->cur - tok->start; if (tok_len == 3 && memcmp(tok->start, "def", 3) == 0) { - if (tok->def && tok->deftypestack[tok->def] == 3) { - tok->deftypestack[tok->def] = 2; + /* The current token is 'def'. */ + if (tok->def + 1 >= MAXINDENT) { + tok->done = E_TOODEEP; + tok->cur = tok->inp; + return ERRORTOKEN; } - else if (tok->defstack[tok->def] < tok->indent) { - /* We advance defs stack only when we see "def" *and* - the indentation level was increased relative to the - previous "def". */ - if (tok->def + 1 >= MAXINDENT) { - tok->done = E_TOODEEP; - tok->cur = tok->inp; - return ERRORTOKEN; - } + /* Advance defs stack. */ + tok->def++; + tok->defstack[tok->def] = tok->indent; - tok->def++; - tok->defstack[tok->def] = tok->indent; - tok->deftypestack[tok->def] = 1; + if (tok->def_async_behind) { + /* The previous token was 'async'. */ + tok->def_async_behind = 0; + tok->deftypestack[tok->def] = DEFTYPE_ASYNC; + tok->def_in_async++; + } + else { + /* This is a regular function (not async def). */ + tok->deftypestack[tok->def] = 0; } } else if (tok_len == 5) { if (memcmp(tok->start, "async", 5) == 0) { + /* The current token is 'async'. */ memcpy(&ahead_tok, tok, sizeof(ahead_tok)); + /* Try to look ahead one token. */ ahead_tok_kind = tok_get(&ahead_tok, &ahead_tok_start, &ahead_top_end); - if (ahead_tok_kind == NAME && - ahead_tok.cur - ahead_tok.start == 3 && - memcmp(ahead_tok.start, "def", 3) == 0) { - - if (tok->def + 1 >= MAXINDENT) { - tok->done = E_TOODEEP; - tok->cur = tok->inp; - return ERRORTOKEN; - } - - tok->def++; - tok->defstack[tok->def] = tok->indent; - tok->deftypestack[tok->def] = 3; - + if (ahead_tok_kind == NAME + && ahead_tok.cur - ahead_tok.start == 3 + && memcmp(ahead_tok.start, "def", 3) == 0) + { + /* The next token is going to be 'def', so instead of + returning 'async' NAME token, we return ASYNC. */ + tok->def_async_behind = 1; return ASYNC; } - else if (tok->def && tok->deftypestack[tok->def] == 2 - && tok->defstack[tok->def] < tok->indent) { - + else if (tok->def_in_async) + { + /* We're inside an 'async def' function, so we treat + 'async' token as ASYNC, instead of NAME. */ return ASYNC; } } - else if (memcmp(tok->start, "await", 5) == 0 - && tok->def && tok->deftypestack[tok->def] == 2 - && tok->defstack[tok->def] < tok->indent) { - + else if (memcmp(tok->start, "await", 5) == 0 && tok->def_in_async) + { + /* We're inside an 'async def' function, so we treat + 'await' token as AWAIT, instead of NAME. */ return AWAIT; } } @@ -1569,6 +1597,13 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end) *p_start = tok->start; *p_end = tok->cur - 1; /* Leave '\n' out of the string */ tok->cont_line = 0; + if (tok->def) { + /* Mark the top function on the stack that it had + at least one NEWLINE. That will help us to + distinguish one-line functions from functions + with multiple statements. */ + tok->deftypestack[tok->def] |= DEFTYPE_HAS_NL; + } return NEWLINE; } diff --git a/Parser/tokenizer.h b/Parser/tokenizer.h index 3bcdad6..e198a0b 100644 --- a/Parser/tokenizer.h +++ b/Parser/tokenizer.h @@ -66,12 +66,21 @@ struct tok_state { const char* str; const char* input; /* Tokenizer's newline translated copy of the string. */ - int defstack[MAXINDENT]; /* stack if funcs & indents where they - were defined */ - int deftypestack[MAXINDENT]; /* stack of func types - (0 not func; 1: "def name"; - 2: "async def name") */ - int def; /* Length of stack of func types */ + /* `def*` fields are for parsing async/await in a backwards compatible + way. They should be removed in 3.7, when they will become + regular constants. See PEP 492 for more details. */ + int defstack[MAXINDENT]; /* Stack of funcs & indents where they + were defined. */ + int deftypestack[MAXINDENT]; /* Stack of func flags, see DEFTYPE_* + constants. */ + int def; /* Length of stack of func types/flags. */ + int def_async_behind; /* 1 if there was an 'async' token before + a 'def' token. */ + int def_in_async; /* Counter of how deep 'async def's + are nested. If greater than 0, + we are somewhere in an 'async def' + body, so 'async' and 'await' should + be parsed as keywords.*/ }; extern struct tok_state *PyTokenizer_FromString(const char *, int); |