diff options
author | Yury Selivanov <yselivanov@sprymix.com> | 2015-07-23 12:01:58 (GMT) |
---|---|---|
committer | Yury Selivanov <yselivanov@sprymix.com> | 2015-07-23 12:01:58 (GMT) |
commit | 96ec934e755355cfc5af036db8641646b7ddb45e (patch) | |
tree | a6fd6a4cbef1b75ab0cc10db01fd91ecf2e99976 /Parser | |
parent | f315c1c01676bfabb5b1c6628642668f1ef436a6 (diff) | |
download | cpython-96ec934e755355cfc5af036db8641646b7ddb45e.zip cpython-96ec934e755355cfc5af036db8641646b7ddb45e.tar.gz cpython-96ec934e755355cfc5af036db8641646b7ddb45e.tar.bz2 |
Issue #24619: Simplify async/await tokenization.
This commit simplifies async/await tokenization in tokenizer.c,
tokenize.py & lib2to3/tokenize.py. Previous solution was to keep
a stack of async-def & def blocks, whereas the new approach is just
to remember position of the outermost async-def block.
This change won't bring any parsing performance improvements, but
it makes the code much easier to read and validate.
Diffstat (limited to 'Parser')
-rw-r--r-- | Parser/tokenizer.c | 126 | ||||
-rw-r--r-- | Parser/tokenizer.h | 21 |
2 files changed, 45 insertions, 102 deletions
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index 46c0580..04baeaf 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -31,12 +31,6 @@ || c == '_'\ || (c >= 128)) -/* The following DEFTYPE* flags are used in 'tok_state->deftypestack', - and should be removed in 3.7, when async/await are regular - keywords. */ -#define DEFTYPE_ASYNC 1 -#define DEFTYPE_HAS_NL 2 - extern char *PyOS_Readline(FILE *, FILE *, const char *); /* Return malloc'ed string including trailing \n; empty malloc'ed string for EOF; @@ -133,12 +127,6 @@ tok_new(void) tok->indent = 0; tok->indstack[0] = 0; - tok->def = 0; - tok->defstack[0] = 0; - tok->deftypestack[0] = 0; - tok->def_async_behind = 0; - tok->def_in_async = 0; - tok->atbol = 1; tok->pendin = 0; tok->prompt = tok->nextprompt = NULL; @@ -159,6 +147,11 @@ tok_new(void) tok->decoding_readline = NULL; tok->decoding_buffer = NULL; #endif + + tok->async_def = 0; + tok->async_def_indent = 0; + tok->async_def_nl = 0; + return tok; } @@ -1350,11 +1343,6 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end) int c; int blankline, nonascii; - int tok_len; - struct tok_state ahead_tok; - char *ahead_tok_start = NULL, *ahead_top_end = NULL; - int ahead_tok_kind; - *p_start = *p_end = NULL; nextline: tok->start = NULL; @@ -1442,16 +1430,6 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end) if (tok->pendin != 0) { if (tok->pendin < 0) { tok->pendin++; - - while (tok->def && tok->defstack[tok->def] >= tok->indent) { - if (tok->deftypestack[tok->def] & DEFTYPE_ASYNC) { - tok->def_in_async--; - assert(tok->def_in_async >= 0); - } - tok->def--; - assert(tok->def >= 0); - } - return DEDENT; } else { @@ -1460,20 +1438,19 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end) } } - if (!blankline && tok->level == 0 - && tok->def && tok->deftypestack[tok->def] & DEFTYPE_HAS_NL - && tok->defstack[tok->def] >= tok->indent) + if (tok->async_def + && !blankline + && tok->level == 0 + /* There was a NEWLINE after ASYNC DEF, + so we're past the signature. */ + && tok->async_def_nl + /* Current indentation level is less than where + the async function was defined */ + && tok->async_def_indent >= tok->indent) { - /* The top function on the stack did have a NEWLINE - token, but didn't have an INDENT. That means that - it's a one-line function and it should now be removed - from the stack. */ - if (tok->deftypestack[tok->def] & DEFTYPE_ASYNC) { - tok->def_in_async--; - assert(tok->def_in_async >= 0); - } - tok->def--; - assert(tok->def >= 0); + tok->async_def = 0; + tok->async_def_indent = 0; + tok->async_def_nl = 0; } again: @@ -1528,38 +1505,27 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end) *p_start = tok->start; *p_end = tok->cur; - tok_len = tok->cur - tok->start; - if (tok_len == 3 && memcmp(tok->start, "def", 3) == 0) { - /* The current token is 'def'. */ - if (tok->def + 1 >= MAXINDENT) { - tok->done = E_TOODEEP; - tok->cur = tok->inp; - return ERRORTOKEN; + /* async/await parsing block. */ + if (tok->cur - tok->start == 5) { + /* Current token length is 5. */ + if (tok->async_def) { + /* We're inside an 'async def' function. */ + if (memcmp(tok->start, "async", 5) == 0) + return ASYNC; + if (memcmp(tok->start, "await", 5) == 0) + return AWAIT; } + else if (memcmp(tok->start, "async", 5) == 0) { + /* The current token is 'async'. + Look ahead one token.*/ - /* Advance defs stack. */ - tok->def++; - tok->defstack[tok->def] = tok->indent; + struct tok_state ahead_tok; + char *ahead_tok_start = NULL, *ahead_tok_end = NULL; + int ahead_tok_kind; - if (tok->def_async_behind) { - /* The previous token was 'async'. */ - tok->def_async_behind = 0; - tok->deftypestack[tok->def] = DEFTYPE_ASYNC; - tok->def_in_async++; - } - else { - /* This is a regular function (not async def). */ - tok->deftypestack[tok->def] = 0; - } - } - else if (tok_len == 5) { - if (memcmp(tok->start, "async", 5) == 0) { - /* The current token is 'async'. */ memcpy(&ahead_tok, tok, sizeof(ahead_tok)); - - /* Try to look ahead one token. */ ahead_tok_kind = tok_get(&ahead_tok, &ahead_tok_start, - &ahead_top_end); + &ahead_tok_end); if (ahead_tok_kind == NAME && ahead_tok.cur - ahead_tok.start == 3 @@ -1567,22 +1533,10 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end) { /* The next token is going to be 'def', so instead of returning 'async' NAME token, we return ASYNC. */ - tok->def_async_behind = 1; + tok->async_def_indent = tok->indent; + tok->async_def = 1; return ASYNC; } - else if (tok->def_in_async) - { - /* We're inside an 'async def' function, so we treat - 'async' token as ASYNC, instead of NAME. */ - return ASYNC; - } - - } - else if (memcmp(tok->start, "await", 5) == 0 && tok->def_in_async) - { - /* We're inside an 'async def' function, so we treat - 'await' token as AWAIT, instead of NAME. */ - return AWAIT; } } @@ -1597,12 +1551,10 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end) *p_start = tok->start; *p_end = tok->cur - 1; /* Leave '\n' out of the string */ tok->cont_line = 0; - if (tok->def) { - /* Mark the top function on the stack that it had - at least one NEWLINE. That will help us to - distinguish one-line functions from functions - with multiple statements. */ - tok->deftypestack[tok->def] |= DEFTYPE_HAS_NL; + if (tok->async_def) { + /* We're somewhere inside an 'async def' function, and + we've encountered a NEWLINE after its signature. */ + tok->async_def_nl = 1; } return NEWLINE; } diff --git a/Parser/tokenizer.h b/Parser/tokenizer.h index e198a0b..af053e2 100644 --- a/Parser/tokenizer.h +++ b/Parser/tokenizer.h @@ -66,21 +66,12 @@ struct tok_state { const char* str; const char* input; /* Tokenizer's newline translated copy of the string. */ - /* `def*` fields are for parsing async/await in a backwards compatible - way. They should be removed in 3.7, when they will become - regular constants. See PEP 492 for more details. */ - int defstack[MAXINDENT]; /* Stack of funcs & indents where they - were defined. */ - int deftypestack[MAXINDENT]; /* Stack of func flags, see DEFTYPE_* - constants. */ - int def; /* Length of stack of func types/flags. */ - int def_async_behind; /* 1 if there was an 'async' token before - a 'def' token. */ - int def_in_async; /* Counter of how deep 'async def's - are nested. If greater than 0, - we are somewhere in an 'async def' - body, so 'async' and 'await' should - be parsed as keywords.*/ + /* async/await related fields; can be removed in 3.7 when async and await + become normal keywords. */ + int async_def; /* =1 if tokens are inside an 'async def' body. */ + int async_def_indent; /* Indentation level of the outermost 'async def'. */ + int async_def_nl; /* =1 if the outermost 'async def' had at least one + NEWLINE token after it. */ }; extern struct tok_state *PyTokenizer_FromString(const char *, int); |