summaryrefslogtreecommitdiffstats
path: root/Parser
diff options
context:
space:
mode:
authorGuido van Rossum <guido@python.org>2019-01-31 11:40:27 (GMT)
committerƁukasz Langa <lukasz@langa.pl>2019-01-31 11:40:27 (GMT)
commitdcfcd146f8e6fc5c2fc16a4c192a0c5f5ca8c53c (patch)
tree07829c4f286194d0e3d08151a26ef1f3494a849b /Parser
parentd97daebfa69b4df95231bcae4123eacad6a48d14 (diff)
downloadcpython-dcfcd146f8e6fc5c2fc16a4c192a0c5f5ca8c53c.zip
cpython-dcfcd146f8e6fc5c2fc16a4c192a0c5f5ca8c53c.tar.gz
cpython-dcfcd146f8e6fc5c2fc16a4c192a0c5f5ca8c53c.tar.bz2
bpo-35766: Merge typed_ast back into CPython (GH-11645)
Diffstat (limited to 'Parser')
-rw-r--r--Parser/Python.asdl23
-rw-r--r--Parser/asdl_c.py16
-rw-r--r--Parser/parser.c13
-rw-r--r--Parser/parsetok.c78
-rw-r--r--Parser/token.c2
-rw-r--r--Parser/tokenizer.c57
-rw-r--r--Parser/tokenizer.h2
7 files changed, 177 insertions, 14 deletions
diff --git a/Parser/Python.asdl b/Parser/Python.asdl
index 7b2a873..85b686d 100644
--- a/Parser/Python.asdl
+++ b/Parser/Python.asdl
@@ -3,17 +3,20 @@
module Python
{
- mod = Module(stmt* body)
+ mod = Module(stmt* body, type_ignore *type_ignores)
| Interactive(stmt* body)
| Expression(expr body)
+ | FunctionType(expr* argtypes, expr returns)
-- not really an actual node but useful in Jython's typesystem.
| Suite(stmt* body)
stmt = FunctionDef(identifier name, arguments args,
- stmt* body, expr* decorator_list, expr? returns)
+ stmt* body, expr* decorator_list, expr? returns,
+ string? type_comment)
| AsyncFunctionDef(identifier name, arguments args,
- stmt* body, expr* decorator_list, expr? returns)
+ stmt* body, expr* decorator_list, expr? returns,
+ string? type_comment)
| ClassDef(identifier name,
expr* bases,
@@ -23,18 +26,18 @@ module Python
| Return(expr? value)
| Delete(expr* targets)
- | Assign(expr* targets, expr value)
+ | Assign(expr* targets, expr value, string? type_comment)
| AugAssign(expr target, operator op, expr value)
-- 'simple' indicates that we annotate simple name without parens
| AnnAssign(expr target, expr annotation, expr? value, int simple)
-- use 'orelse' because else is a keyword in target languages
- | For(expr target, expr iter, stmt* body, stmt* orelse)
- | AsyncFor(expr target, expr iter, stmt* body, stmt* orelse)
+ | For(expr target, expr iter, stmt* body, stmt* orelse, string? type_comment)
+ | AsyncFor(expr target, expr iter, stmt* body, stmt* orelse, string? type_comment)
| While(expr test, stmt* body, stmt* orelse)
| If(expr test, stmt* body, stmt* orelse)
- | With(withitem* items, stmt* body)
- | AsyncWith(withitem* items, stmt* body)
+ | With(withitem* items, stmt* body, string? type_comment)
+ | AsyncWith(withitem* items, stmt* body, string? type_comment)
| Raise(expr? exc, expr? cause)
| Try(stmt* body, excepthandler* handlers, stmt* orelse, stmt* finalbody)
@@ -111,7 +114,7 @@ module Python
arguments = (arg* args, arg? vararg, arg* kwonlyargs, expr* kw_defaults,
arg? kwarg, expr* defaults)
- arg = (identifier arg, expr? annotation)
+ arg = (identifier arg, expr? annotation, string? type_comment)
attributes (int lineno, int col_offset, int? end_lineno, int? end_col_offset)
-- keyword arguments supplied to call (NULL identifier for **kwargs)
@@ -121,5 +124,7 @@ module Python
alias = (identifier name, identifier? asname)
withitem = (expr context_expr, expr? optional_vars)
+
+ type_ignore = TypeIgnore(int lineno)
}
diff --git a/Parser/asdl_c.py b/Parser/asdl_c.py
index 8640b29..a51a5db 100644
--- a/Parser/asdl_c.py
+++ b/Parser/asdl_c.py
@@ -890,6 +890,15 @@ static int obj2ast_identifier(PyObject* obj, PyObject** out, PyArena* arena)
return obj2ast_object(obj, out, arena);
}
+static int obj2ast_string(PyObject* obj, PyObject** out, PyArena* arena)
+{
+ if (!PyUnicode_CheckExact(obj) && !PyBytes_CheckExact(obj)) {
+ PyErr_SetString(PyExc_TypeError, "AST string must be of type str");
+ return 1;
+ }
+ return obj2ast_object(obj, out, arena);
+}
+
static int obj2ast_int(PyObject* obj, int* out, PyArena* arena)
{
int i;
@@ -993,6 +1002,8 @@ class ASTModuleVisitor(PickleVisitor):
self.emit('if (PyDict_SetItemString(d, "AST", (PyObject*)&AST_type) < 0) return NULL;', 1)
self.emit('if (PyModule_AddIntMacro(m, PyCF_ONLY_AST) < 0)', 1)
self.emit("return NULL;", 2)
+ self.emit('if (PyModule_AddIntMacro(m, PyCF_TYPE_COMMENTS) < 0)', 1)
+ self.emit("return NULL;", 2)
for dfn in mod.dfns:
self.visit(dfn)
self.emit("return m;", 1)
@@ -1176,18 +1187,19 @@ PyObject* PyAST_mod2obj(mod_ty t)
}
/* mode is 0 for "exec", 1 for "eval" and 2 for "single" input */
+/* and 3 for "func_type" */
mod_ty PyAST_obj2mod(PyObject* ast, PyArena* arena, int mode)
{
mod_ty res;
PyObject *req_type[3];
- char *req_name[] = {"Module", "Expression", "Interactive"};
+ char *req_name[] = {"Module", "Expression", "Interactive", "FunctionType"};
int isinstance;
req_type[0] = (PyObject*)Module_type;
req_type[1] = (PyObject*)Expression_type;
req_type[2] = (PyObject*)Interactive_type;
- assert(0 <= mode && mode <= 2);
+ assert(0 <= mode && mode <= 3);
if (!init_types())
return NULL;
diff --git a/Parser/parser.c b/Parser/parser.c
index a9916d3..fa4a8f0 100644
--- a/Parser/parser.c
+++ b/Parser/parser.c
@@ -12,6 +12,7 @@
#include "node.h"
#include "parser.h"
#include "errcode.h"
+#include "graminit.h"
#ifdef Py_DEBUG
@@ -260,7 +261,15 @@ PyParser_AddToken(parser_state *ps, int type, char *str,
/* Push non-terminal */
int nt = (x >> 8) + NT_OFFSET;
int arrow = x & ((1<<7)-1);
- dfa *d1 = PyGrammar_FindDFA(
+ dfa *d1;
+ if (nt == func_body_suite && !(ps->p_flags & PyCF_TYPE_COMMENTS)) {
+ /* When parsing type comments is not requested,
+ we can provide better errors about bad indentation
+ by using 'suite' for the body of a funcdef */
+ D(printf(" [switch func_body_suite to suite]"));
+ nt = suite;
+ }
+ d1 = PyGrammar_FindDFA(
ps->p_grammar, nt);
if ((err = push(&ps->p_stack, nt, d1,
arrow, lineno, col_offset,
@@ -268,7 +277,7 @@ PyParser_AddToken(parser_state *ps, int type, char *str,
D(printf(" MemError: push\n"));
return err;
}
- D(printf(" Push ...\n"));
+ D(printf(" Push '%s'\n", d1->d_name));
continue;
}
diff --git a/Parser/parsetok.c b/Parser/parsetok.c
index 2b5254a..7fddc5a 100644
--- a/Parser/parsetok.c
+++ b/Parser/parsetok.c
@@ -15,6 +15,42 @@
static node *parsetok(struct tok_state *, grammar *, int, perrdetail *, int *);
static int initerr(perrdetail *err_ret, PyObject * filename);
+typedef struct {
+ int *items;
+ size_t size;
+ size_t num_items;
+} growable_int_array;
+
+static int
+growable_int_array_init(growable_int_array *arr, size_t initial_size) {
+ assert(initial_size > 0);
+ arr->items = malloc(initial_size * sizeof(*arr->items));
+ arr->size = initial_size;
+ arr->num_items = 0;
+
+ return arr->items != NULL;
+}
+
+static int
+growable_int_array_add(growable_int_array *arr, int item) {
+ if (arr->num_items >= arr->size) {
+ arr->size *= 2;
+ arr->items = realloc(arr->items, arr->size * sizeof(*arr->items));
+ if (!arr->items) {
+ return 0;
+ }
+ }
+
+ arr->items[arr->num_items] = item;
+ arr->num_items++;
+ return 1;
+}
+
+static void
+growable_int_array_deallocate(growable_int_array *arr) {
+ free(arr->items);
+}
+
/* Parse input coming from a string. Return error code, print some errors. */
node *
PyParser_ParseString(const char *s, grammar *g, int start, perrdetail *err_ret)
@@ -59,6 +95,9 @@ PyParser_ParseStringObject(const char *s, PyObject *filename,
err_ret->error = PyErr_Occurred() ? E_DECODE : E_NOMEM;
return NULL;
}
+ if (*flags & PyPARSE_TYPE_COMMENTS) {
+ tok->type_comments = 1;
+ }
#ifndef PGEN
Py_INCREF(err_ret->filename);
@@ -127,6 +166,9 @@ PyParser_ParseFileObject(FILE *fp, PyObject *filename,
err_ret->error = E_NOMEM;
return NULL;
}
+ if (*flags & PyPARSE_TYPE_COMMENTS) {
+ tok->type_comments = 1;
+ }
#ifndef PGEN
Py_INCREF(err_ret->filename);
tok->filename = err_ret->filename;
@@ -188,6 +230,13 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
node *n;
int started = 0;
int col_offset, end_col_offset;
+ growable_int_array type_ignores;
+
+ if (!growable_int_array_init(&type_ignores, 10)) {
+ err_ret->error = E_NOMEM;
+ PyTokenizer_Free(tok);
+ return NULL;
+ }
if ((ps = PyParser_New(g, start)) == NULL) {
err_ret->error = E_NOMEM;
@@ -197,6 +246,8 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
if (*flags & PyPARSE_BARRY_AS_BDFL)
ps->p_flags |= CO_FUTURE_BARRY_AS_BDFL;
+ if (*flags & PyPARSE_TYPE_COMMENTS)
+ ps->p_flags |= PyCF_TYPE_COMMENTS;
#endif
for (;;) {
@@ -277,6 +328,15 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
else {
end_col_offset = -1;
}
+
+ if (type == TYPE_IGNORE) {
+ if (!growable_int_array_add(&type_ignores, tok->lineno)) {
+ err_ret->error = E_NOMEM;
+ break;
+ }
+ continue;
+ }
+
if ((err_ret->error =
PyParser_AddToken(ps, (int)type, str,
lineno, col_offset, tok->lineno, end_col_offset,
@@ -293,6 +353,24 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
n = ps->p_tree;
ps->p_tree = NULL;
+ if (n->n_type == file_input) {
+ /* Put type_ignore nodes in the ENDMARKER of file_input. */
+ int num;
+ node *ch;
+ size_t i;
+
+ num = NCH(n);
+ ch = CHILD(n, num - 1);
+ REQ(ch, ENDMARKER);
+
+ for (i = 0; i < type_ignores.num_items; i++) {
+ PyNode_AddChild(ch, TYPE_IGNORE, NULL,
+ type_ignores.items[i], 0,
+ type_ignores.items[i], 0);
+ }
+ }
+ growable_int_array_deallocate(&type_ignores);
+
#ifndef PGEN
/* Check that the source for a single input statement really
is a single statement by looking at what is left in the
diff --git a/Parser/token.c b/Parser/token.c
index d27f98a..228ecff 100644
--- a/Parser/token.c
+++ b/Parser/token.c
@@ -61,6 +61,8 @@ const char * const _PyParser_TokenNames[] = {
"ELLIPSIS",
"COLONEQUAL",
"OP",
+ "TYPE_IGNORE",
+ "TYPE_COMMENT",
"<ERRORTOKEN>",
"<COMMENT>",
"<NL>",
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
index 3e3cf2c..1ded9ad 100644
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -48,6 +48,10 @@ static int tok_nextc(struct tok_state *tok);
static void tok_backup(struct tok_state *tok, int c);
+/* Spaces in this constant are treated as "zero or more spaces or tabs" when
+ tokenizing. */
+static const char* type_comment_prefix = "# type: ";
+
/* Create and initialize a new tok_state structure */
static struct tok_state *
@@ -82,6 +86,7 @@ tok_new(void)
tok->decoding_readline = NULL;
tok->decoding_buffer = NULL;
#endif
+ tok->type_comments = 0;
return tok;
}
@@ -1245,11 +1250,61 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
/* Set start of current token */
tok->start = tok->cur - 1;
- /* Skip comment */
+ /* Skip comment, unless it's a type comment */
if (c == '#') {
+ const char *prefix, *p, *type_start;
+
while (c != EOF && c != '\n') {
c = tok_nextc(tok);
}
+
+ if (tok->type_comments) {
+ p = tok->start;
+ prefix = type_comment_prefix;
+ while (*prefix && p < tok->cur) {
+ if (*prefix == ' ') {
+ while (*p == ' ' || *p == '\t') {
+ p++;
+ }
+ } else if (*prefix == *p) {
+ p++;
+ } else {
+ break;
+ }
+
+ prefix++;
+ }
+
+ /* This is a type comment if we matched all of type_comment_prefix. */
+ if (!*prefix) {
+ int is_type_ignore = 1;
+ tok_backup(tok, c); /* don't eat the newline or EOF */
+
+ type_start = p;
+
+ is_type_ignore = tok->cur >= p + 6 && memcmp(p, "ignore", 6) == 0;
+ p += 6;
+ while (is_type_ignore && p < tok->cur) {
+ if (*p == '#')
+ break;
+ is_type_ignore = is_type_ignore && (*p == ' ' || *p == '\t');
+ p++;
+ }
+
+ if (is_type_ignore) {
+ /* If this type ignore is the only thing on the line, consume the newline also. */
+ if (blankline) {
+ tok_nextc(tok);
+ tok->atbol = 1;
+ }
+ return TYPE_IGNORE;
+ } else {
+ *p_start = (char *) type_start; /* after type_comment_prefix */
+ *p_end = tok->cur;
+ return TYPE_COMMENT;
+ }
+ }
+ }
}
/* Check for EOF and errors now */
diff --git a/Parser/tokenizer.h b/Parser/tokenizer.h
index 096ce68..9639c65 100644
--- a/Parser/tokenizer.h
+++ b/Parser/tokenizer.h
@@ -70,6 +70,8 @@ struct tok_state {
const char* enc; /* Encoding for the current str. */
const char* str;
const char* input; /* Tokenizer's newline translated copy of the string. */
+
+ int type_comments; /* Whether to look for type comments */
};
extern struct tok_state *PyTokenizer_FromString(const char *, int);