diff options
author | Guido van Rossum <guido@python.org> | 2019-03-07 20:38:08 (GMT) |
---|---|---|
committer | Miss Islington (bot) <31488909+miss-islington@users.noreply.github.com> | 2019-03-07 20:38:08 (GMT) |
commit | 495da292255b92dd73758fdd0e4c7d27d82b1e57 (patch) | |
tree | 1378cf049d2d125593fa970ea1e9a9f77604fab1 /Parser | |
parent | bf94cc7b496a379e1f604aa2e4080bb70ca4020e (diff) | |
download | cpython-495da292255b92dd73758fdd0e4c7d27d82b1e57.zip cpython-495da292255b92dd73758fdd0e4c7d27d82b1e57.tar.gz cpython-495da292255b92dd73758fdd0e4c7d27d82b1e57.tar.bz2 |
bpo-35975: Support parsing earlier minor versions of Python 3 (GH-12086)
This adds a `feature_version` flag to `ast.parse()` (documented) and `compile()` (hidden) that allow tweaking the parser to support older versions of the grammar. In particular if `feature_version` is 5 or 6, the hacks for the `async` and `await` keyword from PEP 492 are reinstated. (For 7 or higher, these are unconditionally treated as keywords, but they are still special tokens rather than `NAME` tokens that the parser driver recognizes.)
https://bugs.python.org/issue35975
Diffstat (limited to 'Parser')
-rw-r--r-- | Parser/asdl_c.py | 6 | ||||
-rw-r--r-- | Parser/parsetok.c | 2 | ||||
-rw-r--r-- | Parser/token.c | 2 | ||||
-rw-r--r-- | Parser/tokenizer.c | 79 | ||||
-rw-r--r-- | Parser/tokenizer.h | 7 |
5 files changed, 96 insertions, 0 deletions
diff --git a/Parser/asdl_c.py b/Parser/asdl_c.py index 1526995..5224755 100644 --- a/Parser/asdl_c.py +++ b/Parser/asdl_c.py @@ -1189,6 +1189,11 @@ PyObject* PyAST_mod2obj(mod_ty t) /* mode is 0 for "exec", 1 for "eval" and 2 for "single" input */ mod_ty PyAST_obj2mod(PyObject* ast, PyArena* arena, int mode) { + return PyAST_obj2mod_ex(ast, arena, mode, PY_MINOR_VERSION); +} + +mod_ty PyAST_obj2mod_ex(PyObject* ast, PyArena* arena, int mode, int feature_version) +{ mod_ty res; PyObject *req_type[3]; char *req_name[] = {"Module", "Expression", "Interactive"}; @@ -1269,6 +1274,7 @@ def main(srcfile, dump_module=False): f.write("\n") f.write("PyObject* PyAST_mod2obj(mod_ty t);\n") f.write("mod_ty PyAST_obj2mod(PyObject* ast, PyArena* arena, int mode);\n") + f.write("mod_ty PyAST_obj2mod_ex(PyObject* ast, PyArena* arena, int mode, int feature_version);\n") f.write("int PyAST_Check(PyObject* obj);\n") f.write('\n') f.write('#ifdef __cplusplus\n') diff --git a/Parser/parsetok.c b/Parser/parsetok.c index 7a6c886..ba33a9a 100644 --- a/Parser/parsetok.c +++ b/Parser/parsetok.c @@ -101,6 +101,8 @@ PyParser_ParseStringObject(const char *s, PyObject *filename, Py_INCREF(err_ret->filename); tok->filename = err_ret->filename; + if (*flags & PyPARSE_ASYNC_HACKS) + tok->async_hacks = 1; return parsetok(tok, g, start, err_ret, flags); } diff --git a/Parser/token.c b/Parser/token.c index 228ecff..a489668 100644 --- a/Parser/token.c +++ b/Parser/token.c @@ -61,6 +61,8 @@ const char * const _PyParser_TokenNames[] = { "ELLIPSIS", "COLONEQUAL", "OP", + "AWAIT", + "ASYNC", "TYPE_IGNORE", "TYPE_COMMENT", "<ERRORTOKEN>", diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index 44ec415..8f0a9c8 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -84,6 +84,11 @@ tok_new(void) tok->decoding_buffer = NULL; tok->type_comments = 0; + tok->async_hacks = 0; + tok->async_def = 0; + tok->async_def_indent = 0; + tok->async_def_nl = 0; + return tok; } @@ -1196,6 +1201,31 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end) } } + /* Peek ahead at the next character */ + c = tok_nextc(tok); + tok_backup(tok, c); + /* Check if we are closing an async function */ + if (tok->async_def + && !blankline + /* Due to some implementation artifacts of type comments, + * a TYPE_COMMENT at the start of a function won't set an + * indentation level and it will produce a NEWLINE after it. + * To avoid spuriously ending an async function due to this, + * wait until we have some non-newline char in front of us. */ + && c != '\n' + && tok->level == 0 + /* There was a NEWLINE after ASYNC DEF, + so we're past the signature. */ + && tok->async_def_nl + /* Current indentation level is less than where + the async function was defined */ + && tok->async_def_indent >= tok->indent) + { + tok->async_def = 0; + tok->async_def_indent = 0; + tok->async_def_nl = 0; + } + again: tok->start = NULL; /* Skip spaces */ @@ -1310,6 +1340,50 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end) *p_start = tok->start; *p_end = tok->cur; + /* async/await parsing block. */ + if (tok->cur - tok->start == 5 && tok->start[0] == 'a') { + /* May be an 'async' or 'await' token. For Python 3.7 or + later we recognize them unconditionally. For Python + 3.5 or 3.6 we recognize 'async' in front of 'def', and + either one inside of 'async def'. (Technically we + shouldn't recognize these at all for 3.4 or earlier, + but there's no *valid* Python 3.4 code that would be + rejected, and async functions will be rejected in a + later phase.) */ + if (!tok->async_hacks || tok->async_def) { + /* Always recognize the keywords. */ + if (memcmp(tok->start, "async", 5) == 0) { + return ASYNC; + } + if (memcmp(tok->start, "await", 5) == 0) { + return AWAIT; + } + } + else if (memcmp(tok->start, "async", 5) == 0) { + /* The current token is 'async'. + Look ahead one token to see if that is 'def'. */ + + struct tok_state ahead_tok; + char *ahead_tok_start = NULL, *ahead_tok_end = NULL; + int ahead_tok_kind; + + memcpy(&ahead_tok, tok, sizeof(ahead_tok)); + ahead_tok_kind = tok_get(&ahead_tok, &ahead_tok_start, + &ahead_tok_end); + + if (ahead_tok_kind == NAME + && ahead_tok.cur - ahead_tok.start == 3 + && memcmp(ahead_tok.start, "def", 3) == 0) + { + /* The next token is going to be 'def', so instead of + returning a plain NAME token, return ASYNC. */ + tok->async_def_indent = tok->indent; + tok->async_def = 1; + return ASYNC; + } + } + } + return NAME; } @@ -1322,6 +1396,11 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end) *p_start = tok->start; *p_end = tok->cur - 1; /* Leave '\n' out of the string */ tok->cont_line = 0; + if (tok->async_def) { + /* We're somewhere inside an 'async def' function, and + we've encountered a NEWLINE after its signature. */ + tok->async_def_nl = 1; + } return NEWLINE; } diff --git a/Parser/tokenizer.h b/Parser/tokenizer.h index 22e91f7..06c7a14 100644 --- a/Parser/tokenizer.h +++ b/Parser/tokenizer.h @@ -64,6 +64,13 @@ struct tok_state { const char* input; /* Tokenizer's newline translated copy of the string. */ int type_comments; /* Whether to look for type comments */ + + /* async/await related fields (still needed depending on feature_version) */ + int async_hacks; /* =1 if async/await aren't always keywords */ + int async_def; /* =1 if tokens are inside an 'async def' body. */ + int async_def_indent; /* Indentation level of the outermost 'async def'. */ + int async_def_nl; /* =1 if the outermost 'async def' had at least one + NEWLINE token after it. */ }; extern struct tok_state *PyTokenizer_FromString(const char *, int); |