diff options
author | Guido van Rossum <guido@python.org> | 2019-01-31 11:40:27 (GMT) |
---|---|---|
committer | Ćukasz Langa <lukasz@langa.pl> | 2019-01-31 11:40:27 (GMT) |
commit | dcfcd146f8e6fc5c2fc16a4c192a0c5f5ca8c53c (patch) | |
tree | 07829c4f286194d0e3d08151a26ef1f3494a849b /Parser | |
parent | d97daebfa69b4df95231bcae4123eacad6a48d14 (diff) | |
download | cpython-dcfcd146f8e6fc5c2fc16a4c192a0c5f5ca8c53c.zip cpython-dcfcd146f8e6fc5c2fc16a4c192a0c5f5ca8c53c.tar.gz cpython-dcfcd146f8e6fc5c2fc16a4c192a0c5f5ca8c53c.tar.bz2 |
bpo-35766: Merge typed_ast back into CPython (GH-11645)
Diffstat (limited to 'Parser')
-rw-r--r-- | Parser/Python.asdl | 23 | ||||
-rw-r--r-- | Parser/asdl_c.py | 16 | ||||
-rw-r--r-- | Parser/parser.c | 13 | ||||
-rw-r--r-- | Parser/parsetok.c | 78 | ||||
-rw-r--r-- | Parser/token.c | 2 | ||||
-rw-r--r-- | Parser/tokenizer.c | 57 | ||||
-rw-r--r-- | Parser/tokenizer.h | 2 |
7 files changed, 177 insertions, 14 deletions
diff --git a/Parser/Python.asdl b/Parser/Python.asdl index 7b2a873..85b686d 100644 --- a/Parser/Python.asdl +++ b/Parser/Python.asdl @@ -3,17 +3,20 @@ module Python { - mod = Module(stmt* body) + mod = Module(stmt* body, type_ignore *type_ignores) | Interactive(stmt* body) | Expression(expr body) + | FunctionType(expr* argtypes, expr returns) -- not really an actual node but useful in Jython's typesystem. | Suite(stmt* body) stmt = FunctionDef(identifier name, arguments args, - stmt* body, expr* decorator_list, expr? returns) + stmt* body, expr* decorator_list, expr? returns, + string? type_comment) | AsyncFunctionDef(identifier name, arguments args, - stmt* body, expr* decorator_list, expr? returns) + stmt* body, expr* decorator_list, expr? returns, + string? type_comment) | ClassDef(identifier name, expr* bases, @@ -23,18 +26,18 @@ module Python | Return(expr? value) | Delete(expr* targets) - | Assign(expr* targets, expr value) + | Assign(expr* targets, expr value, string? type_comment) | AugAssign(expr target, operator op, expr value) -- 'simple' indicates that we annotate simple name without parens | AnnAssign(expr target, expr annotation, expr? value, int simple) -- use 'orelse' because else is a keyword in target languages - | For(expr target, expr iter, stmt* body, stmt* orelse) - | AsyncFor(expr target, expr iter, stmt* body, stmt* orelse) + | For(expr target, expr iter, stmt* body, stmt* orelse, string? type_comment) + | AsyncFor(expr target, expr iter, stmt* body, stmt* orelse, string? type_comment) | While(expr test, stmt* body, stmt* orelse) | If(expr test, stmt* body, stmt* orelse) - | With(withitem* items, stmt* body) - | AsyncWith(withitem* items, stmt* body) + | With(withitem* items, stmt* body, string? type_comment) + | AsyncWith(withitem* items, stmt* body, string? type_comment) | Raise(expr? exc, expr? cause) | Try(stmt* body, excepthandler* handlers, stmt* orelse, stmt* finalbody) @@ -111,7 +114,7 @@ module Python arguments = (arg* args, arg? vararg, arg* kwonlyargs, expr* kw_defaults, arg? kwarg, expr* defaults) - arg = (identifier arg, expr? annotation) + arg = (identifier arg, expr? annotation, string? type_comment) attributes (int lineno, int col_offset, int? end_lineno, int? end_col_offset) -- keyword arguments supplied to call (NULL identifier for **kwargs) @@ -121,5 +124,7 @@ module Python alias = (identifier name, identifier? asname) withitem = (expr context_expr, expr? optional_vars) + + type_ignore = TypeIgnore(int lineno) } diff --git a/Parser/asdl_c.py b/Parser/asdl_c.py index 8640b29..a51a5db 100644 --- a/Parser/asdl_c.py +++ b/Parser/asdl_c.py @@ -890,6 +890,15 @@ static int obj2ast_identifier(PyObject* obj, PyObject** out, PyArena* arena) return obj2ast_object(obj, out, arena); } +static int obj2ast_string(PyObject* obj, PyObject** out, PyArena* arena) +{ + if (!PyUnicode_CheckExact(obj) && !PyBytes_CheckExact(obj)) { + PyErr_SetString(PyExc_TypeError, "AST string must be of type str"); + return 1; + } + return obj2ast_object(obj, out, arena); +} + static int obj2ast_int(PyObject* obj, int* out, PyArena* arena) { int i; @@ -993,6 +1002,8 @@ class ASTModuleVisitor(PickleVisitor): self.emit('if (PyDict_SetItemString(d, "AST", (PyObject*)&AST_type) < 0) return NULL;', 1) self.emit('if (PyModule_AddIntMacro(m, PyCF_ONLY_AST) < 0)', 1) self.emit("return NULL;", 2) + self.emit('if (PyModule_AddIntMacro(m, PyCF_TYPE_COMMENTS) < 0)', 1) + self.emit("return NULL;", 2) for dfn in mod.dfns: self.visit(dfn) self.emit("return m;", 1) @@ -1176,18 +1187,19 @@ PyObject* PyAST_mod2obj(mod_ty t) } /* mode is 0 for "exec", 1 for "eval" and 2 for "single" input */ +/* and 3 for "func_type" */ mod_ty PyAST_obj2mod(PyObject* ast, PyArena* arena, int mode) { mod_ty res; PyObject *req_type[3]; - char *req_name[] = {"Module", "Expression", "Interactive"}; + char *req_name[] = {"Module", "Expression", "Interactive", "FunctionType"}; int isinstance; req_type[0] = (PyObject*)Module_type; req_type[1] = (PyObject*)Expression_type; req_type[2] = (PyObject*)Interactive_type; - assert(0 <= mode && mode <= 2); + assert(0 <= mode && mode <= 3); if (!init_types()) return NULL; diff --git a/Parser/parser.c b/Parser/parser.c index a9916d3..fa4a8f0 100644 --- a/Parser/parser.c +++ b/Parser/parser.c @@ -12,6 +12,7 @@ #include "node.h" #include "parser.h" #include "errcode.h" +#include "graminit.h" #ifdef Py_DEBUG @@ -260,7 +261,15 @@ PyParser_AddToken(parser_state *ps, int type, char *str, /* Push non-terminal */ int nt = (x >> 8) + NT_OFFSET; int arrow = x & ((1<<7)-1); - dfa *d1 = PyGrammar_FindDFA( + dfa *d1; + if (nt == func_body_suite && !(ps->p_flags & PyCF_TYPE_COMMENTS)) { + /* When parsing type comments is not requested, + we can provide better errors about bad indentation + by using 'suite' for the body of a funcdef */ + D(printf(" [switch func_body_suite to suite]")); + nt = suite; + } + d1 = PyGrammar_FindDFA( ps->p_grammar, nt); if ((err = push(&ps->p_stack, nt, d1, arrow, lineno, col_offset, @@ -268,7 +277,7 @@ PyParser_AddToken(parser_state *ps, int type, char *str, D(printf(" MemError: push\n")); return err; } - D(printf(" Push ...\n")); + D(printf(" Push '%s'\n", d1->d_name)); continue; } diff --git a/Parser/parsetok.c b/Parser/parsetok.c index 2b5254a..7fddc5a 100644 --- a/Parser/parsetok.c +++ b/Parser/parsetok.c @@ -15,6 +15,42 @@ static node *parsetok(struct tok_state *, grammar *, int, perrdetail *, int *); static int initerr(perrdetail *err_ret, PyObject * filename); +typedef struct { + int *items; + size_t size; + size_t num_items; +} growable_int_array; + +static int +growable_int_array_init(growable_int_array *arr, size_t initial_size) { + assert(initial_size > 0); + arr->items = malloc(initial_size * sizeof(*arr->items)); + arr->size = initial_size; + arr->num_items = 0; + + return arr->items != NULL; +} + +static int +growable_int_array_add(growable_int_array *arr, int item) { + if (arr->num_items >= arr->size) { + arr->size *= 2; + arr->items = realloc(arr->items, arr->size * sizeof(*arr->items)); + if (!arr->items) { + return 0; + } + } + + arr->items[arr->num_items] = item; + arr->num_items++; + return 1; +} + +static void +growable_int_array_deallocate(growable_int_array *arr) { + free(arr->items); +} + /* Parse input coming from a string. Return error code, print some errors. */ node * PyParser_ParseString(const char *s, grammar *g, int start, perrdetail *err_ret) @@ -59,6 +95,9 @@ PyParser_ParseStringObject(const char *s, PyObject *filename, err_ret->error = PyErr_Occurred() ? E_DECODE : E_NOMEM; return NULL; } + if (*flags & PyPARSE_TYPE_COMMENTS) { + tok->type_comments = 1; + } #ifndef PGEN Py_INCREF(err_ret->filename); @@ -127,6 +166,9 @@ PyParser_ParseFileObject(FILE *fp, PyObject *filename, err_ret->error = E_NOMEM; return NULL; } + if (*flags & PyPARSE_TYPE_COMMENTS) { + tok->type_comments = 1; + } #ifndef PGEN Py_INCREF(err_ret->filename); tok->filename = err_ret->filename; @@ -188,6 +230,13 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret, node *n; int started = 0; int col_offset, end_col_offset; + growable_int_array type_ignores; + + if (!growable_int_array_init(&type_ignores, 10)) { + err_ret->error = E_NOMEM; + PyTokenizer_Free(tok); + return NULL; + } if ((ps = PyParser_New(g, start)) == NULL) { err_ret->error = E_NOMEM; @@ -197,6 +246,8 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret, #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD if (*flags & PyPARSE_BARRY_AS_BDFL) ps->p_flags |= CO_FUTURE_BARRY_AS_BDFL; + if (*flags & PyPARSE_TYPE_COMMENTS) + ps->p_flags |= PyCF_TYPE_COMMENTS; #endif for (;;) { @@ -277,6 +328,15 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret, else { end_col_offset = -1; } + + if (type == TYPE_IGNORE) { + if (!growable_int_array_add(&type_ignores, tok->lineno)) { + err_ret->error = E_NOMEM; + break; + } + continue; + } + if ((err_ret->error = PyParser_AddToken(ps, (int)type, str, lineno, col_offset, tok->lineno, end_col_offset, @@ -293,6 +353,24 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret, n = ps->p_tree; ps->p_tree = NULL; + if (n->n_type == file_input) { + /* Put type_ignore nodes in the ENDMARKER of file_input. */ + int num; + node *ch; + size_t i; + + num = NCH(n); + ch = CHILD(n, num - 1); + REQ(ch, ENDMARKER); + + for (i = 0; i < type_ignores.num_items; i++) { + PyNode_AddChild(ch, TYPE_IGNORE, NULL, + type_ignores.items[i], 0, + type_ignores.items[i], 0); + } + } + growable_int_array_deallocate(&type_ignores); + #ifndef PGEN /* Check that the source for a single input statement really is a single statement by looking at what is left in the diff --git a/Parser/token.c b/Parser/token.c index d27f98a..228ecff 100644 --- a/Parser/token.c +++ b/Parser/token.c @@ -61,6 +61,8 @@ const char * const _PyParser_TokenNames[] = { "ELLIPSIS", "COLONEQUAL", "OP", + "TYPE_IGNORE", + "TYPE_COMMENT", "<ERRORTOKEN>", "<COMMENT>", "<NL>", diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index 3e3cf2c..1ded9ad 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -48,6 +48,10 @@ static int tok_nextc(struct tok_state *tok); static void tok_backup(struct tok_state *tok, int c); +/* Spaces in this constant are treated as "zero or more spaces or tabs" when + tokenizing. */ +static const char* type_comment_prefix = "# type: "; + /* Create and initialize a new tok_state structure */ static struct tok_state * @@ -82,6 +86,7 @@ tok_new(void) tok->decoding_readline = NULL; tok->decoding_buffer = NULL; #endif + tok->type_comments = 0; return tok; } @@ -1245,11 +1250,61 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end) /* Set start of current token */ tok->start = tok->cur - 1; - /* Skip comment */ + /* Skip comment, unless it's a type comment */ if (c == '#') { + const char *prefix, *p, *type_start; + while (c != EOF && c != '\n') { c = tok_nextc(tok); } + + if (tok->type_comments) { + p = tok->start; + prefix = type_comment_prefix; + while (*prefix && p < tok->cur) { + if (*prefix == ' ') { + while (*p == ' ' || *p == '\t') { + p++; + } + } else if (*prefix == *p) { + p++; + } else { + break; + } + + prefix++; + } + + /* This is a type comment if we matched all of type_comment_prefix. */ + if (!*prefix) { + int is_type_ignore = 1; + tok_backup(tok, c); /* don't eat the newline or EOF */ + + type_start = p; + + is_type_ignore = tok->cur >= p + 6 && memcmp(p, "ignore", 6) == 0; + p += 6; + while (is_type_ignore && p < tok->cur) { + if (*p == '#') + break; + is_type_ignore = is_type_ignore && (*p == ' ' || *p == '\t'); + p++; + } + + if (is_type_ignore) { + /* If this type ignore is the only thing on the line, consume the newline also. */ + if (blankline) { + tok_nextc(tok); + tok->atbol = 1; + } + return TYPE_IGNORE; + } else { + *p_start = (char *) type_start; /* after type_comment_prefix */ + *p_end = tok->cur; + return TYPE_COMMENT; + } + } + } } /* Check for EOF and errors now */ diff --git a/Parser/tokenizer.h b/Parser/tokenizer.h index 096ce68..9639c65 100644 --- a/Parser/tokenizer.h +++ b/Parser/tokenizer.h @@ -70,6 +70,8 @@ struct tok_state { const char* enc; /* Encoding for the current str. */ const char* str; const char* input; /* Tokenizer's newline translated copy of the string. */ + + int type_comments; /* Whether to look for type comments */ }; extern struct tok_state *PyTokenizer_FromString(const char *, int); |