summaryrefslogtreecommitdiffstats
path: root/Parser
diff options
context:
space:
mode:
authorYury Selivanov <yselivanov@sprymix.com>2015-07-22 10:33:45 (GMT)
committerYury Selivanov <yselivanov@sprymix.com>2015-07-22 10:33:45 (GMT)
commit8fb307cd650511ba019c4493275cb6684ad308bc (patch)
treedc1138644436a3e2c0592f096c6b8d0e47aec5ef /Parser
parent80acc3ebbc4c81f9c1bff864eca076d6bdbe9ec6 (diff)
downloadcpython-8fb307cd650511ba019c4493275cb6684ad308bc.zip
cpython-8fb307cd650511ba019c4493275cb6684ad308bc.tar.gz
cpython-8fb307cd650511ba019c4493275cb6684ad308bc.tar.bz2
Issue #24619: New approach for tokenizing async/await.
This commit fixes how one-line async-defs and defs are tracked by tokenizer. It allows to correctly parse invalid code such as: >>> async def f(): ... def g(): pass ... async = 10 and valid code such as: >>> async def f(): ... async def g(): pass ... await z As a consequence, is is now possible to have one-line 'async def foo(): await ..' functions: >>> async def foo(): return await bar()
Diffstat (limited to 'Parser')
-rw-r--r--Parser/tokenizer.c105
-rw-r--r--Parser/tokenizer.h21
2 files changed, 85 insertions, 41 deletions
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
index d4476ae..46c0580 100644
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -31,6 +31,12 @@
|| c == '_'\
|| (c >= 128))
+/* The following DEFTYPE* flags are used in 'tok_state->deftypestack',
+ and should be removed in 3.7, when async/await are regular
+ keywords. */
+#define DEFTYPE_ASYNC 1
+#define DEFTYPE_HAS_NL 2
+
extern char *PyOS_Readline(FILE *, FILE *, const char *);
/* Return malloc'ed string including trailing \n;
empty malloc'ed string for EOF;
@@ -130,6 +136,8 @@ tok_new(void)
tok->def = 0;
tok->defstack[0] = 0;
tok->deftypestack[0] = 0;
+ tok->def_async_behind = 0;
+ tok->def_in_async = 0;
tok->atbol = 1;
tok->pendin = 0;
@@ -1436,7 +1444,12 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
tok->pendin++;
while (tok->def && tok->defstack[tok->def] >= tok->indent) {
+ if (tok->deftypestack[tok->def] & DEFTYPE_ASYNC) {
+ tok->def_in_async--;
+ assert(tok->def_in_async >= 0);
+ }
tok->def--;
+ assert(tok->def >= 0);
}
return DEDENT;
@@ -1447,6 +1460,22 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
}
}
+ if (!blankline && tok->level == 0
+ && tok->def && tok->deftypestack[tok->def] & DEFTYPE_HAS_NL
+ && tok->defstack[tok->def] >= tok->indent)
+ {
+ /* The top function on the stack did have a NEWLINE
+ token, but didn't have an INDENT. That means that
+ it's a one-line function and it should now be removed
+ from the stack. */
+ if (tok->deftypestack[tok->def] & DEFTYPE_ASYNC) {
+ tok->def_in_async--;
+ assert(tok->def_in_async >= 0);
+ }
+ tok->def--;
+ assert(tok->def >= 0);
+ }
+
again:
tok->start = NULL;
/* Skip spaces */
@@ -1501,59 +1530,58 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
tok_len = tok->cur - tok->start;
if (tok_len == 3 && memcmp(tok->start, "def", 3) == 0) {
- if (tok->def && tok->deftypestack[tok->def] == 3) {
- tok->deftypestack[tok->def] = 2;
+ /* The current token is 'def'. */
+ if (tok->def + 1 >= MAXINDENT) {
+ tok->done = E_TOODEEP;
+ tok->cur = tok->inp;
+ return ERRORTOKEN;
}
- else if (tok->defstack[tok->def] < tok->indent) {
- /* We advance defs stack only when we see "def" *and*
- the indentation level was increased relative to the
- previous "def". */
- if (tok->def + 1 >= MAXINDENT) {
- tok->done = E_TOODEEP;
- tok->cur = tok->inp;
- return ERRORTOKEN;
- }
+ /* Advance defs stack. */
+ tok->def++;
+ tok->defstack[tok->def] = tok->indent;
- tok->def++;
- tok->defstack[tok->def] = tok->indent;
- tok->deftypestack[tok->def] = 1;
+ if (tok->def_async_behind) {
+ /* The previous token was 'async'. */
+ tok->def_async_behind = 0;
+ tok->deftypestack[tok->def] = DEFTYPE_ASYNC;
+ tok->def_in_async++;
+ }
+ else {
+ /* This is a regular function (not async def). */
+ tok->deftypestack[tok->def] = 0;
}
}
else if (tok_len == 5) {
if (memcmp(tok->start, "async", 5) == 0) {
+ /* The current token is 'async'. */
memcpy(&ahead_tok, tok, sizeof(ahead_tok));
+ /* Try to look ahead one token. */
ahead_tok_kind = tok_get(&ahead_tok, &ahead_tok_start,
&ahead_top_end);
- if (ahead_tok_kind == NAME &&
- ahead_tok.cur - ahead_tok.start == 3 &&
- memcmp(ahead_tok.start, "def", 3) == 0) {
-
- if (tok->def + 1 >= MAXINDENT) {
- tok->done = E_TOODEEP;
- tok->cur = tok->inp;
- return ERRORTOKEN;
- }
-
- tok->def++;
- tok->defstack[tok->def] = tok->indent;
- tok->deftypestack[tok->def] = 3;
-
+ if (ahead_tok_kind == NAME
+ && ahead_tok.cur - ahead_tok.start == 3
+ && memcmp(ahead_tok.start, "def", 3) == 0)
+ {
+ /* The next token is going to be 'def', so instead of
+ returning 'async' NAME token, we return ASYNC. */
+ tok->def_async_behind = 1;
return ASYNC;
}
- else if (tok->def && tok->deftypestack[tok->def] == 2
- && tok->defstack[tok->def] < tok->indent) {
-
+ else if (tok->def_in_async)
+ {
+ /* We're inside an 'async def' function, so we treat
+ 'async' token as ASYNC, instead of NAME. */
return ASYNC;
}
}
- else if (memcmp(tok->start, "await", 5) == 0
- && tok->def && tok->deftypestack[tok->def] == 2
- && tok->defstack[tok->def] < tok->indent) {
-
+ else if (memcmp(tok->start, "await", 5) == 0 && tok->def_in_async)
+ {
+ /* We're inside an 'async def' function, so we treat
+ 'await' token as AWAIT, instead of NAME. */
return AWAIT;
}
}
@@ -1569,6 +1597,13 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
*p_start = tok->start;
*p_end = tok->cur - 1; /* Leave '\n' out of the string */
tok->cont_line = 0;
+ if (tok->def) {
+ /* Mark the top function on the stack that it had
+ at least one NEWLINE. That will help us to
+ distinguish one-line functions from functions
+ with multiple statements. */
+ tok->deftypestack[tok->def] |= DEFTYPE_HAS_NL;
+ }
return NEWLINE;
}
diff --git a/Parser/tokenizer.h b/Parser/tokenizer.h
index 3bcdad6..e198a0b 100644
--- a/Parser/tokenizer.h
+++ b/Parser/tokenizer.h
@@ -66,12 +66,21 @@ struct tok_state {
const char* str;
const char* input; /* Tokenizer's newline translated copy of the string. */
- int defstack[MAXINDENT]; /* stack if funcs & indents where they
- were defined */
- int deftypestack[MAXINDENT]; /* stack of func types
- (0 not func; 1: "def name";
- 2: "async def name") */
- int def; /* Length of stack of func types */
+ /* `def*` fields are for parsing async/await in a backwards compatible
+ way. They should be removed in 3.7, when they will become
+ regular constants. See PEP 492 for more details. */
+ int defstack[MAXINDENT]; /* Stack of funcs & indents where they
+ were defined. */
+ int deftypestack[MAXINDENT]; /* Stack of func flags, see DEFTYPE_*
+ constants. */
+ int def; /* Length of stack of func types/flags. */
+ int def_async_behind; /* 1 if there was an 'async' token before
+ a 'def' token. */
+ int def_in_async; /* Counter of how deep 'async def's
+ are nested. If greater than 0,
+ we are somewhere in an 'async def'
+ body, so 'async' and 'await' should
+ be parsed as keywords.*/
};
extern struct tok_state *PyTokenizer_FromString(const char *, int);