summaryrefslogtreecommitdiffstats
path: root/Parser
diff options
context:
space:
mode:
authorGuido van Rossum <guido@python.org>2019-03-07 20:38:08 (GMT)
committerMiss Islington (bot) <31488909+miss-islington@users.noreply.github.com>2019-03-07 20:38:08 (GMT)
commit495da292255b92dd73758fdd0e4c7d27d82b1e57 (patch)
tree1378cf049d2d125593fa970ea1e9a9f77604fab1 /Parser
parentbf94cc7b496a379e1f604aa2e4080bb70ca4020e (diff)
downloadcpython-495da292255b92dd73758fdd0e4c7d27d82b1e57.zip
cpython-495da292255b92dd73758fdd0e4c7d27d82b1e57.tar.gz
cpython-495da292255b92dd73758fdd0e4c7d27d82b1e57.tar.bz2
bpo-35975: Support parsing earlier minor versions of Python 3 (GH-12086)
This adds a `feature_version` flag to `ast.parse()` (documented) and `compile()` (hidden) that allow tweaking the parser to support older versions of the grammar. In particular if `feature_version` is 5 or 6, the hacks for the `async` and `await` keyword from PEP 492 are reinstated. (For 7 or higher, these are unconditionally treated as keywords, but they are still special tokens rather than `NAME` tokens that the parser driver recognizes.) https://bugs.python.org/issue35975
Diffstat (limited to 'Parser')
-rw-r--r--Parser/asdl_c.py6
-rw-r--r--Parser/parsetok.c2
-rw-r--r--Parser/token.c2
-rw-r--r--Parser/tokenizer.c79
-rw-r--r--Parser/tokenizer.h7
5 files changed, 96 insertions, 0 deletions
diff --git a/Parser/asdl_c.py b/Parser/asdl_c.py
index 1526995..5224755 100644
--- a/Parser/asdl_c.py
+++ b/Parser/asdl_c.py
@@ -1189,6 +1189,11 @@ PyObject* PyAST_mod2obj(mod_ty t)
/* mode is 0 for "exec", 1 for "eval" and 2 for "single" input */
mod_ty PyAST_obj2mod(PyObject* ast, PyArena* arena, int mode)
{
+ return PyAST_obj2mod_ex(ast, arena, mode, PY_MINOR_VERSION);
+}
+
+mod_ty PyAST_obj2mod_ex(PyObject* ast, PyArena* arena, int mode, int feature_version)
+{
mod_ty res;
PyObject *req_type[3];
char *req_name[] = {"Module", "Expression", "Interactive"};
@@ -1269,6 +1274,7 @@ def main(srcfile, dump_module=False):
f.write("\n")
f.write("PyObject* PyAST_mod2obj(mod_ty t);\n")
f.write("mod_ty PyAST_obj2mod(PyObject* ast, PyArena* arena, int mode);\n")
+ f.write("mod_ty PyAST_obj2mod_ex(PyObject* ast, PyArena* arena, int mode, int feature_version);\n")
f.write("int PyAST_Check(PyObject* obj);\n")
f.write('\n')
f.write('#ifdef __cplusplus\n')
diff --git a/Parser/parsetok.c b/Parser/parsetok.c
index 7a6c886..ba33a9a 100644
--- a/Parser/parsetok.c
+++ b/Parser/parsetok.c
@@ -101,6 +101,8 @@ PyParser_ParseStringObject(const char *s, PyObject *filename,
Py_INCREF(err_ret->filename);
tok->filename = err_ret->filename;
+ if (*flags & PyPARSE_ASYNC_HACKS)
+ tok->async_hacks = 1;
return parsetok(tok, g, start, err_ret, flags);
}
diff --git a/Parser/token.c b/Parser/token.c
index 228ecff..a489668 100644
--- a/Parser/token.c
+++ b/Parser/token.c
@@ -61,6 +61,8 @@ const char * const _PyParser_TokenNames[] = {
"ELLIPSIS",
"COLONEQUAL",
"OP",
+ "AWAIT",
+ "ASYNC",
"TYPE_IGNORE",
"TYPE_COMMENT",
"<ERRORTOKEN>",
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
index 44ec415..8f0a9c8 100644
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -84,6 +84,11 @@ tok_new(void)
tok->decoding_buffer = NULL;
tok->type_comments = 0;
+ tok->async_hacks = 0;
+ tok->async_def = 0;
+ tok->async_def_indent = 0;
+ tok->async_def_nl = 0;
+
return tok;
}
@@ -1196,6 +1201,31 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
}
}
+ /* Peek ahead at the next character */
+ c = tok_nextc(tok);
+ tok_backup(tok, c);
+ /* Check if we are closing an async function */
+ if (tok->async_def
+ && !blankline
+ /* Due to some implementation artifacts of type comments,
+ * a TYPE_COMMENT at the start of a function won't set an
+ * indentation level and it will produce a NEWLINE after it.
+ * To avoid spuriously ending an async function due to this,
+ * wait until we have some non-newline char in front of us. */
+ && c != '\n'
+ && tok->level == 0
+ /* There was a NEWLINE after ASYNC DEF,
+ so we're past the signature. */
+ && tok->async_def_nl
+ /* Current indentation level is less than where
+ the async function was defined */
+ && tok->async_def_indent >= tok->indent)
+ {
+ tok->async_def = 0;
+ tok->async_def_indent = 0;
+ tok->async_def_nl = 0;
+ }
+
again:
tok->start = NULL;
/* Skip spaces */
@@ -1310,6 +1340,50 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
*p_start = tok->start;
*p_end = tok->cur;
+ /* async/await parsing block. */
+ if (tok->cur - tok->start == 5 && tok->start[0] == 'a') {
+ /* May be an 'async' or 'await' token. For Python 3.7 or
+ later we recognize them unconditionally. For Python
+ 3.5 or 3.6 we recognize 'async' in front of 'def', and
+ either one inside of 'async def'. (Technically we
+ shouldn't recognize these at all for 3.4 or earlier,
+ but there's no *valid* Python 3.4 code that would be
+ rejected, and async functions will be rejected in a
+ later phase.) */
+ if (!tok->async_hacks || tok->async_def) {
+ /* Always recognize the keywords. */
+ if (memcmp(tok->start, "async", 5) == 0) {
+ return ASYNC;
+ }
+ if (memcmp(tok->start, "await", 5) == 0) {
+ return AWAIT;
+ }
+ }
+ else if (memcmp(tok->start, "async", 5) == 0) {
+ /* The current token is 'async'.
+ Look ahead one token to see if that is 'def'. */
+
+ struct tok_state ahead_tok;
+ char *ahead_tok_start = NULL, *ahead_tok_end = NULL;
+ int ahead_tok_kind;
+
+ memcpy(&ahead_tok, tok, sizeof(ahead_tok));
+ ahead_tok_kind = tok_get(&ahead_tok, &ahead_tok_start,
+ &ahead_tok_end);
+
+ if (ahead_tok_kind == NAME
+ && ahead_tok.cur - ahead_tok.start == 3
+ && memcmp(ahead_tok.start, "def", 3) == 0)
+ {
+ /* The next token is going to be 'def', so instead of
+ returning a plain NAME token, return ASYNC. */
+ tok->async_def_indent = tok->indent;
+ tok->async_def = 1;
+ return ASYNC;
+ }
+ }
+ }
+
return NAME;
}
@@ -1322,6 +1396,11 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
*p_start = tok->start;
*p_end = tok->cur - 1; /* Leave '\n' out of the string */
tok->cont_line = 0;
+ if (tok->async_def) {
+ /* We're somewhere inside an 'async def' function, and
+ we've encountered a NEWLINE after its signature. */
+ tok->async_def_nl = 1;
+ }
return NEWLINE;
}
diff --git a/Parser/tokenizer.h b/Parser/tokenizer.h
index 22e91f7..06c7a14 100644
--- a/Parser/tokenizer.h
+++ b/Parser/tokenizer.h
@@ -64,6 +64,13 @@ struct tok_state {
const char* input; /* Tokenizer's newline translated copy of the string. */
int type_comments; /* Whether to look for type comments */
+
+ /* async/await related fields (still needed depending on feature_version) */
+ int async_hacks; /* =1 if async/await aren't always keywords */
+ int async_def; /* =1 if tokens are inside an 'async def' body. */
+ int async_def_indent; /* Indentation level of the outermost 'async def'. */
+ int async_def_nl; /* =1 if the outermost 'async def' had at least one
+ NEWLINE token after it. */
};
extern struct tok_state *PyTokenizer_FromString(const char *, int);