diff options
author | Benjamin Peterson <benjamin@python.org> | 2011-08-09 21:15:04 (GMT) |
---|---|---|
committer | Benjamin Peterson <benjamin@python.org> | 2011-08-09 21:15:04 (GMT) |
commit | 832bfe2ebd5ecfa92031cd40c8b41835ba90487f (patch) | |
tree | 7677d685b8f7a7ca7901f2589d92753513b66e01 /Python | |
parent | 450bb594c8be8aca2782c0a862b27d9fa79160b0 (diff) | |
download | cpython-832bfe2ebd5ecfa92031cd40c8b41835ba90487f.zip cpython-832bfe2ebd5ecfa92031cd40c8b41835ba90487f.tar.gz cpython-832bfe2ebd5ecfa92031cd40c8b41835ba90487f.tar.bz2 |
add a AST validator (closes #12575)
Diffstat (limited to 'Python')
-rw-r--r-- | Python/ast.c | 486 | ||||
-rw-r--r-- | Python/bltinmodule.c | 4 |
2 files changed, 486 insertions, 4 deletions
diff --git a/Python/ast.c b/Python/ast.c index 0d4d9ee..14500ee 100644 --- a/Python/ast.c +++ b/Python/ast.c @@ -1,19 +1,497 @@ /* * This file includes functions to transform a concrete syntax tree (CST) to - * an abstract syntax tree (AST). The main function is PyAST_FromNode(). + * an abstract syntax tree (AST). The main function is PyAST_FromNode(). * */ #include "Python.h" #include "Python-ast.h" -#include "grammar.h" #include "node.h" #include "ast.h" #include "token.h" -#include "parsetok.h" -#include "graminit.h" #include <assert.h> +static int validate_stmts(asdl_seq *); +static int validate_exprs(asdl_seq *, expr_context_ty, int); +static int validate_nonempty_seq(asdl_seq *, const char *, const char *); +static int validate_stmt(stmt_ty); +static int validate_expr(expr_ty, expr_context_ty); + +static int +validate_comprehension(asdl_seq *gens) +{ + int i; + if (!asdl_seq_LEN(gens)) { + PyErr_SetString(PyExc_ValueError, "comprehension with no generators"); + return 0; + } + for (i = 0; i < asdl_seq_LEN(gens); i++) { + comprehension_ty comp = asdl_seq_GET(gens, i); + if (!validate_expr(comp->target, Store) || + !validate_expr(comp->iter, Load) || + !validate_exprs(comp->ifs, Load, 0)) + return 0; + } + return 1; +} + +static int +validate_slice(slice_ty slice) +{ + switch (slice->kind) { + case Slice_kind: + return (!slice->v.Slice.lower || validate_expr(slice->v.Slice.lower, Load)) && + (!slice->v.Slice.upper || validate_expr(slice->v.Slice.upper, Load)) && + (!slice->v.Slice.step || validate_expr(slice->v.Slice.step, Load)); + case ExtSlice_kind: { + int i; + if (!validate_nonempty_seq(slice->v.ExtSlice.dims, "dims", "ExtSlice")) + return 0; + for (i = 0; i < asdl_seq_LEN(slice->v.ExtSlice.dims); i++) + if (!validate_slice(asdl_seq_GET(slice->v.ExtSlice.dims, i))) + return 0; + return 1; + } + case Index_kind: + return validate_expr(slice->v.Index.value, Load); + default: + PyErr_SetString(PyExc_SystemError, "unknown slice node"); + return 0; + } +} + +static int +validate_keywords(asdl_seq *keywords) +{ + int i; + for (i = 0; i < asdl_seq_LEN(keywords); i++) + if (!validate_expr(((keyword_ty)asdl_seq_GET(keywords, i))->value, Load)) + return 0; + return 1; +} + +static int +validate_args(asdl_seq *args) +{ + int i; + for (i = 0; i < asdl_seq_LEN(args); i++) { + arg_ty arg = asdl_seq_GET(args, i); + if (arg->annotation && !validate_expr(arg->annotation, Load)) + return 0; + } + return 1; +} + +static const char * +expr_context_name(expr_context_ty ctx) +{ + switch (ctx) { + case Load: + return "Load"; + case Store: + return "Store"; + case Del: + return "Del"; + case AugLoad: + return "AugLoad"; + case AugStore: + return "AugStore"; + case Param: + return "Param"; + default: + assert(0); + return "(unknown)"; + } +} + +static int +validate_arguments(arguments_ty args) +{ + if (!validate_args(args->args)) + return 0; + if (args->varargannotation) { + if (!args->vararg) { + PyErr_SetString(PyExc_ValueError, "varargannotation but no vararg on arguments"); + return 0; + } + if (!validate_expr(args->varargannotation, Load)) + return 0; + } + if (!validate_args(args->kwonlyargs)) + return 0; + if (args->kwargannotation) { + if (!args->kwarg) { + PyErr_SetString(PyExc_ValueError, "kwargannotation but no kwarg on arguments"); + return 0; + } + if (!validate_expr(args->kwargannotation, Load)) + return 0; + } + if (asdl_seq_LEN(args->defaults) > asdl_seq_LEN(args->args)) { + PyErr_SetString(PyExc_ValueError, "more positional defaults than args on arguments"); + return 0; + } + if (asdl_seq_LEN(args->kw_defaults) != asdl_seq_LEN(args->kwonlyargs)) { + PyErr_SetString(PyExc_ValueError, "length of kwonlyargs is not the same as " + "kw_defaults on arguments"); + return 0; + } + return validate_exprs(args->defaults, Load, 0) && validate_exprs(args->kw_defaults, Load, 1); +} + +static int +validate_expr(expr_ty exp, expr_context_ty ctx) +{ + int check_ctx = 1; + expr_context_ty actual_ctx; + + /* First check expression context. */ + switch (exp->kind) { + case Attribute_kind: + actual_ctx = exp->v.Attribute.ctx; + break; + case Subscript_kind: + actual_ctx = exp->v.Subscript.ctx; + break; + case Starred_kind: + actual_ctx = exp->v.Starred.ctx; + break; + case Name_kind: + actual_ctx = exp->v.Name.ctx; + break; + case List_kind: + actual_ctx = exp->v.List.ctx; + break; + case Tuple_kind: + actual_ctx = exp->v.Tuple.ctx; + break; + default: + if (ctx != Load) { + PyErr_Format(PyExc_ValueError, "expression which can't be " + "assigned to in %s context", expr_context_name(ctx)); + return 0; + } + check_ctx = 0; + } + if (check_ctx && actual_ctx != ctx) { + PyErr_Format(PyExc_ValueError, "expression must have %s context but has %s instead", + expr_context_name(ctx), expr_context_name(actual_ctx)); + return 0; + } + + /* Now validate expression. */ + switch (exp->kind) { + case BoolOp_kind: + if (asdl_seq_LEN(exp->v.BoolOp.values) < 2) { + PyErr_SetString(PyExc_ValueError, "BoolOp with less than 2 values"); + return 0; + } + return validate_exprs(exp->v.BoolOp.values, Load, 0); + case BinOp_kind: + return validate_expr(exp->v.BinOp.left, Load) && + validate_expr(exp->v.BinOp.right, Load); + case UnaryOp_kind: + return validate_expr(exp->v.UnaryOp.operand, Load); + case Lambda_kind: + return validate_arguments(exp->v.Lambda.args) && + validate_expr(exp->v.Lambda.body, Load); + case IfExp_kind: + return validate_expr(exp->v.IfExp.test, Load) && + validate_expr(exp->v.IfExp.body, Load) && + validate_expr(exp->v.IfExp.orelse, Load); + case Dict_kind: + if (asdl_seq_LEN(exp->v.Dict.keys) != asdl_seq_LEN(exp->v.Dict.values)) { + PyErr_SetString(PyExc_ValueError, + "Dict doesn't have the same number of keys as values"); + return 0; + } + return validate_exprs(exp->v.Dict.keys, Load, 0) && + validate_exprs(exp->v.Dict.values, Load, 0); + case Set_kind: + return validate_exprs(exp->v.Set.elts, Load, 0); +#define COMP(NAME) \ + case NAME ## _kind: \ + return validate_comprehension(exp->v.NAME.generators) && \ + validate_expr(exp->v.NAME.elt, Load); + COMP(ListComp) + COMP(SetComp) + COMP(GeneratorExp) +#undef COMP + case DictComp_kind: + return validate_comprehension(exp->v.DictComp.generators) && + validate_expr(exp->v.DictComp.key, Load) && + validate_expr(exp->v.DictComp.value, Load); + case Yield_kind: + return !exp->v.Yield.value || validate_expr(exp->v.Yield.value, Load); + case Compare_kind: + if (!asdl_seq_LEN(exp->v.Compare.comparators)) { + PyErr_SetString(PyExc_ValueError, "Compare with no comparators"); + return 0; + } + if (asdl_seq_LEN(exp->v.Compare.comparators) != + asdl_seq_LEN(exp->v.Compare.ops)) { + PyErr_SetString(PyExc_ValueError, "Compare has a different number " + "of comparators and operands"); + return 0; + } + return validate_exprs(exp->v.Compare.comparators, Load, 0) && + validate_expr(exp->v.Compare.left, Load); + case Call_kind: + return validate_expr(exp->v.Call.func, Load) && + validate_exprs(exp->v.Call.args, Load, 0) && + validate_keywords(exp->v.Call.keywords) && + (!exp->v.Call.starargs || validate_expr(exp->v.Call.starargs, Load)) && + (!exp->v.Call.kwargs || validate_expr(exp->v.Call.kwargs, Load)); + case Num_kind: { + PyObject *n = exp->v.Num.n; + if (!PyLong_CheckExact(n) && !PyFloat_CheckExact(n) && + !PyComplex_CheckExact(n)) { + PyErr_SetString(PyExc_TypeError, "non-numeric type in Num"); + return 0; + } + return 1; + } + case Str_kind: { + PyObject *s = exp->v.Str.s; + if (!PyUnicode_CheckExact(s)) { + PyErr_SetString(PyExc_TypeError, "non-string type in Str"); + return 0; + } + return 1; + } + case Bytes_kind: { + PyObject *b = exp->v.Bytes.s; + if (!PyBytes_CheckExact(b)) { + PyErr_SetString(PyExc_TypeError, "non-bytes type in Bytes"); + return 0; + } + return 1; + } + case Attribute_kind: + return validate_expr(exp->v.Attribute.value, Load); + case Subscript_kind: + return validate_slice(exp->v.Subscript.slice) && + validate_expr(exp->v.Subscript.value, Load); + case Starred_kind: + return validate_expr(exp->v.Starred.value, ctx); + case List_kind: + return validate_exprs(exp->v.List.elts, ctx, 0); + case Tuple_kind: + return validate_exprs(exp->v.Tuple.elts, ctx, 0); + /* These last cases don't have any checking. */ + case Name_kind: + case Ellipsis_kind: + return 1; + default: + PyErr_SetString(PyExc_SystemError, "unexpected expression"); + return 0; + } +} + +static int +validate_nonempty_seq(asdl_seq *seq, const char *what, const char *owner) +{ + if (asdl_seq_LEN(seq)) + return 1; + PyErr_Format(PyExc_ValueError, "empty %s on %s", what, owner); + return 0; +} + +static int +validate_assignlist(asdl_seq *targets, expr_context_ty ctx) +{ + return validate_nonempty_seq(targets, "targets", ctx == Del ? "Delete" : "Assign") && + validate_exprs(targets, ctx, 0); +} + +static int +validate_body(asdl_seq *body, const char *owner) +{ + return validate_nonempty_seq(body, "body", owner) && validate_stmts(body); +} + +static int +validate_stmt(stmt_ty stmt) +{ + int i; + switch (stmt->kind) { + case FunctionDef_kind: + return validate_body(stmt->v.FunctionDef.body, "FunctionDef") && + validate_arguments(stmt->v.FunctionDef.args) && + validate_exprs(stmt->v.FunctionDef.decorator_list, Load, 0) && + (!stmt->v.FunctionDef.returns || + validate_expr(stmt->v.FunctionDef.returns, Load)); + case ClassDef_kind: + return validate_body(stmt->v.ClassDef.body, "ClassDef") && + validate_exprs(stmt->v.ClassDef.bases, Load, 0) && + validate_keywords(stmt->v.ClassDef.keywords) && + validate_exprs(stmt->v.ClassDef.decorator_list, Load, 0) && + (!stmt->v.ClassDef.starargs || validate_expr(stmt->v.ClassDef.starargs, Load)) && + (!stmt->v.ClassDef.kwargs || validate_expr(stmt->v.ClassDef.kwargs, Load)); + case Return_kind: + return !stmt->v.Return.value || validate_expr(stmt->v.Return.value, Load); + case Delete_kind: + return validate_assignlist(stmt->v.Delete.targets, Del); + case Assign_kind: + return validate_assignlist(stmt->v.Assign.targets, Store) && + validate_expr(stmt->v.Assign.value, Load); + case AugAssign_kind: + return validate_expr(stmt->v.AugAssign.target, Store) && + validate_expr(stmt->v.AugAssign.value, Load); + case For_kind: + return validate_expr(stmt->v.For.target, Store) && + validate_expr(stmt->v.For.iter, Load) && + validate_body(stmt->v.For.body, "For") && + validate_stmts(stmt->v.For.orelse); + case While_kind: + return validate_expr(stmt->v.While.test, Load) && + validate_body(stmt->v.While.body, "While") && + validate_stmts(stmt->v.While.orelse); + case If_kind: + return validate_expr(stmt->v.If.test, Load) && + validate_body(stmt->v.If.body, "If") && + validate_stmts(stmt->v.If.orelse); + case With_kind: + if (!validate_nonempty_seq(stmt->v.With.items, "items", "With")) + return 0; + for (i = 0; i < asdl_seq_LEN(stmt->v.With.items); i++) { + withitem_ty item = asdl_seq_GET(stmt->v.With.items, i); + if (!validate_expr(item->context_expr, Load) || + (item->optional_vars && !validate_expr(item->optional_vars, Store))) + return 0; + } + return validate_body(stmt->v.With.body, "With"); + case Raise_kind: + if (stmt->v.Raise.exc) { + return validate_expr(stmt->v.Raise.exc, Load) && + (!stmt->v.Raise.cause || validate_expr(stmt->v.Raise.cause, Load)); + } + if (stmt->v.Raise.cause) { + PyErr_SetString(PyExc_ValueError, "Raise with cause but no exception"); + return 0; + } + return 1; + case Try_kind: + if (!validate_body(stmt->v.Try.body, "Try")) + return 0; + if (!asdl_seq_LEN(stmt->v.Try.handlers) && + !asdl_seq_LEN(stmt->v.Try.finalbody)) { + PyErr_SetString(PyExc_ValueError, "Try has neither except handlers nor finalbody"); + return 0; + } + if (!asdl_seq_LEN(stmt->v.Try.handlers) && + asdl_seq_LEN(stmt->v.Try.orelse)) { + PyErr_SetString(PyExc_ValueError, "Try has orelse but no except handlers"); + return 0; + } + for (i = 0; i < asdl_seq_LEN(stmt->v.Try.handlers); i++) { + excepthandler_ty handler = asdl_seq_GET(stmt->v.Try.handlers, i); + if ((handler->v.ExceptHandler.type && + !validate_expr(handler->v.ExceptHandler.type, Load)) || + !validate_body(handler->v.ExceptHandler.body, "ExceptHandler")) + return 0; + } + return (!asdl_seq_LEN(stmt->v.Try.finalbody) || + validate_stmts(stmt->v.Try.finalbody)) && + (!asdl_seq_LEN(stmt->v.Try.orelse) || + validate_stmts(stmt->v.Try.orelse)); + case Assert_kind: + return validate_expr(stmt->v.Assert.test, Load) && + (!stmt->v.Assert.msg || validate_expr(stmt->v.Assert.msg, Load)); + case Import_kind: + return validate_nonempty_seq(stmt->v.Import.names, "names", "Import"); + case ImportFrom_kind: + if (stmt->v.ImportFrom.level < -1) { + PyErr_SetString(PyExc_ValueError, "ImportFrom level less than -1"); + return 0; + } + return validate_nonempty_seq(stmt->v.ImportFrom.names, "names", "ImportFrom"); + case Global_kind: + return validate_nonempty_seq(stmt->v.Global.names, "names", "Global"); + case Nonlocal_kind: + return validate_nonempty_seq(stmt->v.Nonlocal.names, "names", "Nonlocal"); + case Expr_kind: + return validate_expr(stmt->v.Expr.value, Load); + case Pass_kind: + case Break_kind: + case Continue_kind: + return 1; + default: + PyErr_SetString(PyExc_SystemError, "unexpected statement"); + return 0; + } +} + +static int +validate_stmts(asdl_seq *seq) +{ + int i; + for (i = 0; i < asdl_seq_LEN(seq); i++) { + stmt_ty stmt = asdl_seq_GET(seq, i); + if (stmt) { + if (!validate_stmt(stmt)) + return 0; + } + else { + PyErr_SetString(PyExc_ValueError, + "None disallowed in statement list"); + return 0; + } + } + return 1; +} + +static int +validate_exprs(asdl_seq *exprs, expr_context_ty ctx, int null_ok) +{ + int i; + for (i = 0; i < asdl_seq_LEN(exprs); i++) { + expr_ty expr = asdl_seq_GET(exprs, i); + if (expr) { + if (!validate_expr(expr, ctx)) + return 0; + } + else if (!null_ok) { + PyErr_SetString(PyExc_ValueError, + "None disallowed in expression list"); + return 0; + } + + } + return 1; +} + +int +PyAST_Validate(mod_ty mod) +{ + int res = 0; + + switch (mod->kind) { + case Module_kind: + res = validate_stmts(mod->v.Module.body); + break; + case Interactive_kind: + res = validate_stmts(mod->v.Interactive.body); + break; + case Expression_kind: + res = validate_expr(mod->v.Expression.body, Load); + break; + case Suite_kind: + PyErr_SetString(PyExc_ValueError, "Suite is not valid in the CPython compiler"); + break; + default: + PyErr_SetString(PyExc_SystemError, "impossible module node"); + res = 0; + break; + } + return res; +} + +/* This is down here, so defines like "test" don't intefere with access AST above. */ +#include "grammar.h" +#include "parsetok.h" +#include "graminit.h" + /* Data structure used internally */ struct compiling { char *c_encoding; /* source encoding */ diff --git a/Python/bltinmodule.c b/Python/bltinmodule.c index 152210b..94b2798 100644 --- a/Python/bltinmodule.c +++ b/Python/bltinmodule.c @@ -604,6 +604,10 @@ builtin_compile(PyObject *self, PyObject *args, PyObject *kwds) PyArena_Free(arena); goto error; } + if (!PyAST_Validate(mod)) { + PyArena_Free(arena); + goto error; + } result = (PyObject*)PyAST_CompileEx(mod, filename, &cf, optimize, arena); PyArena_Free(arena); |