diff options
Diffstat (limited to 'Python/ast.c')
| -rw-r--r-- | Python/ast.c | 925 | 
1 files changed, 688 insertions, 237 deletions
| diff --git a/Python/ast.c b/Python/ast.c index d2f063b..0c0c1a6 100644 --- a/Python/ast.c +++ b/Python/ast.c @@ -1,24 +1,507 @@  /*   * This file includes functions to transform a concrete syntax tree (CST) to - * an abstract syntax tree (AST).  The main function is PyAST_FromNode(). + * an abstract syntax tree (AST). The main function is PyAST_FromNode().   *   */  #include "Python.h"  #include "Python-ast.h" -#include "grammar.h"  #include "node.h"  #include "ast.h"  #include "token.h" -#include "parsetok.h" -#include "graminit.h"  #include <assert.h> +static int validate_stmts(asdl_seq *); +static int validate_exprs(asdl_seq *, expr_context_ty, int); +static int validate_nonempty_seq(asdl_seq *, const char *, const char *); +static int validate_stmt(stmt_ty); +static int validate_expr(expr_ty, expr_context_ty); + +static int +validate_comprehension(asdl_seq *gens) +{ +    int i; +    if (!asdl_seq_LEN(gens)) { +        PyErr_SetString(PyExc_ValueError, "comprehension with no generators"); +        return 0; +    } +    for (i = 0; i < asdl_seq_LEN(gens); i++) { +        comprehension_ty comp = asdl_seq_GET(gens, i); +        if (!validate_expr(comp->target, Store) || +            !validate_expr(comp->iter, Load) || +            !validate_exprs(comp->ifs, Load, 0)) +            return 0; +    } +    return 1; +} + +static int +validate_slice(slice_ty slice) +{ +    switch (slice->kind) { +    case Slice_kind: +        return (!slice->v.Slice.lower || validate_expr(slice->v.Slice.lower, Load)) && +            (!slice->v.Slice.upper || validate_expr(slice->v.Slice.upper, Load)) && +            (!slice->v.Slice.step || validate_expr(slice->v.Slice.step, Load)); +    case ExtSlice_kind: { +        int i; +        if (!validate_nonempty_seq(slice->v.ExtSlice.dims, "dims", "ExtSlice")) +            return 0; +        for (i = 0; i < asdl_seq_LEN(slice->v.ExtSlice.dims); i++) +            if (!validate_slice(asdl_seq_GET(slice->v.ExtSlice.dims, i))) +                return 0; +        return 1; +    } +    case Index_kind: +        return validate_expr(slice->v.Index.value, Load); +    default: +        PyErr_SetString(PyExc_SystemError, "unknown slice node"); +        return 0; +    } +} + +static int +validate_keywords(asdl_seq *keywords) +{ +    int i; +    for (i = 0; i < asdl_seq_LEN(keywords); i++) +        if (!validate_expr(((keyword_ty)asdl_seq_GET(keywords, i))->value, Load)) +            return 0; +    return 1; +} + +static int +validate_args(asdl_seq *args) +{ +    int i; +    for (i = 0; i < asdl_seq_LEN(args); i++) { +        arg_ty arg = asdl_seq_GET(args, i); +        if (arg->annotation && !validate_expr(arg->annotation, Load)) +            return 0; +    } +    return 1; +} + +static const char * +expr_context_name(expr_context_ty ctx) +{ +    switch (ctx) { +    case Load: +        return "Load"; +    case Store: +        return "Store"; +    case Del: +        return "Del"; +    case AugLoad: +        return "AugLoad"; +    case AugStore: +        return "AugStore"; +    case Param: +        return "Param"; +    default: +        assert(0); +        return "(unknown)"; +    } +} + +static int +validate_arguments(arguments_ty args) +{ +    if (!validate_args(args->args)) +        return 0; +    if (args->varargannotation) { +        if (!args->vararg) { +            PyErr_SetString(PyExc_ValueError, "varargannotation but no vararg on arguments"); +            return 0; +        } +        if (!validate_expr(args->varargannotation, Load)) +            return 0; +    } +    if (!validate_args(args->kwonlyargs)) +        return 0; +    if (args->kwargannotation) { +        if (!args->kwarg) { +            PyErr_SetString(PyExc_ValueError, "kwargannotation but no kwarg on arguments"); +            return 0; +        } +        if (!validate_expr(args->kwargannotation, Load)) +            return 0; +    } +    if (asdl_seq_LEN(args->defaults) > asdl_seq_LEN(args->args)) { +        PyErr_SetString(PyExc_ValueError, "more positional defaults than args on arguments"); +        return 0; +    } +    if (asdl_seq_LEN(args->kw_defaults) != asdl_seq_LEN(args->kwonlyargs)) { +        PyErr_SetString(PyExc_ValueError, "length of kwonlyargs is not the same as " +                        "kw_defaults on arguments"); +        return 0; +    } +    return validate_exprs(args->defaults, Load, 0) && validate_exprs(args->kw_defaults, Load, 1); +} + +static int +validate_expr(expr_ty exp, expr_context_ty ctx) +{ +    int check_ctx = 1; +    expr_context_ty actual_ctx; + +    /* First check expression context. */ +    switch (exp->kind) { +    case Attribute_kind: +        actual_ctx = exp->v.Attribute.ctx; +        break; +    case Subscript_kind: +        actual_ctx = exp->v.Subscript.ctx; +        break; +    case Starred_kind: +        actual_ctx = exp->v.Starred.ctx; +        break; +    case Name_kind: +        actual_ctx = exp->v.Name.ctx; +        break; +    case List_kind: +        actual_ctx = exp->v.List.ctx; +        break; +    case Tuple_kind: +        actual_ctx = exp->v.Tuple.ctx; +        break; +    default: +        if (ctx != Load) { +            PyErr_Format(PyExc_ValueError, "expression which can't be " +                         "assigned to in %s context", expr_context_name(ctx)); +            return 0; +        } +        check_ctx = 0; +    } +    if (check_ctx && actual_ctx != ctx) { +        PyErr_Format(PyExc_ValueError, "expression must have %s context but has %s instead", +                     expr_context_name(ctx), expr_context_name(actual_ctx)); +        return 0; +    } + +    /* Now validate expression. */ +    switch (exp->kind) { +    case BoolOp_kind: +        if (asdl_seq_LEN(exp->v.BoolOp.values) < 2) { +            PyErr_SetString(PyExc_ValueError, "BoolOp with less than 2 values"); +            return 0; +        } +        return validate_exprs(exp->v.BoolOp.values, Load, 0); +    case BinOp_kind: +        return validate_expr(exp->v.BinOp.left, Load) && +            validate_expr(exp->v.BinOp.right, Load); +    case UnaryOp_kind: +        return validate_expr(exp->v.UnaryOp.operand, Load); +    case Lambda_kind: +        return validate_arguments(exp->v.Lambda.args) && +            validate_expr(exp->v.Lambda.body, Load); +    case IfExp_kind: +        return validate_expr(exp->v.IfExp.test, Load) && +            validate_expr(exp->v.IfExp.body, Load) && +            validate_expr(exp->v.IfExp.orelse, Load); +    case Dict_kind: +        if (asdl_seq_LEN(exp->v.Dict.keys) != asdl_seq_LEN(exp->v.Dict.values)) { +            PyErr_SetString(PyExc_ValueError, +                            "Dict doesn't have the same number of keys as values"); +            return 0; +        } +        return validate_exprs(exp->v.Dict.keys, Load, 0) && +            validate_exprs(exp->v.Dict.values, Load, 0); +    case Set_kind: +        return validate_exprs(exp->v.Set.elts, Load, 0); +#define COMP(NAME) \ +        case NAME ## _kind: \ +            return validate_comprehension(exp->v.NAME.generators) && \ +                validate_expr(exp->v.NAME.elt, Load); +    COMP(ListComp) +    COMP(SetComp) +    COMP(GeneratorExp) +#undef COMP +    case DictComp_kind: +        return validate_comprehension(exp->v.DictComp.generators) && +            validate_expr(exp->v.DictComp.key, Load) && +            validate_expr(exp->v.DictComp.value, Load); +    case Yield_kind: +        return !exp->v.Yield.value || validate_expr(exp->v.Yield.value, Load); +    case YieldFrom_kind: +        return !exp->v.YieldFrom.value || +            validate_expr(exp->v.YieldFrom.value, Load); +    case Compare_kind: +        if (!asdl_seq_LEN(exp->v.Compare.comparators)) { +            PyErr_SetString(PyExc_ValueError, "Compare with no comparators"); +            return 0; +        } +        if (asdl_seq_LEN(exp->v.Compare.comparators) != +            asdl_seq_LEN(exp->v.Compare.ops)) { +            PyErr_SetString(PyExc_ValueError, "Compare has a different number " +                            "of comparators and operands"); +            return 0; +        } +        return validate_exprs(exp->v.Compare.comparators, Load, 0) && +            validate_expr(exp->v.Compare.left, Load); +    case Call_kind: +        return validate_expr(exp->v.Call.func, Load) && +            validate_exprs(exp->v.Call.args, Load, 0) && +            validate_keywords(exp->v.Call.keywords) && +            (!exp->v.Call.starargs || validate_expr(exp->v.Call.starargs, Load)) && +            (!exp->v.Call.kwargs || validate_expr(exp->v.Call.kwargs, Load)); +    case Num_kind: { +        PyObject *n = exp->v.Num.n; +        if (!PyLong_CheckExact(n) && !PyFloat_CheckExact(n) && +            !PyComplex_CheckExact(n)) { +            PyErr_SetString(PyExc_TypeError, "non-numeric type in Num"); +            return 0; +        } +        return 1; +    } +    case Str_kind: { +        PyObject *s = exp->v.Str.s; +        if (!PyUnicode_CheckExact(s)) { +            PyErr_SetString(PyExc_TypeError, "non-string type in Str"); +            return 0; +        } +        return 1; +    } +    case Bytes_kind: { +        PyObject *b = exp->v.Bytes.s; +        if (!PyBytes_CheckExact(b)) { +            PyErr_SetString(PyExc_TypeError, "non-bytes type in Bytes"); +            return 0; +        } +        return 1; +    } +    case Attribute_kind: +        return validate_expr(exp->v.Attribute.value, Load); +    case Subscript_kind: +        return validate_slice(exp->v.Subscript.slice) && +            validate_expr(exp->v.Subscript.value, Load); +    case Starred_kind: +        return validate_expr(exp->v.Starred.value, ctx); +    case List_kind: +        return validate_exprs(exp->v.List.elts, ctx, 0); +    case Tuple_kind: +        return validate_exprs(exp->v.Tuple.elts, ctx, 0); +    /* These last cases don't have any checking. */ +    case Name_kind: +    case Ellipsis_kind: +        return 1; +    default: +        PyErr_SetString(PyExc_SystemError, "unexpected expression"); +        return 0; +    } +} + +static int +validate_nonempty_seq(asdl_seq *seq, const char *what, const char *owner) +{ +    if (asdl_seq_LEN(seq)) +        return 1; +    PyErr_Format(PyExc_ValueError, "empty %s on %s", what, owner); +    return 0; +} + +static int +validate_assignlist(asdl_seq *targets, expr_context_ty ctx) +{ +    return validate_nonempty_seq(targets, "targets", ctx == Del ? "Delete" : "Assign") && +        validate_exprs(targets, ctx, 0); +} + +static int +validate_body(asdl_seq *body, const char *owner) +{ +    return validate_nonempty_seq(body, "body", owner) && validate_stmts(body); +} + +static int +validate_stmt(stmt_ty stmt) +{ +    int i; +    switch (stmt->kind) { +    case FunctionDef_kind: +        return validate_body(stmt->v.FunctionDef.body, "FunctionDef") && +            validate_arguments(stmt->v.FunctionDef.args) && +            validate_exprs(stmt->v.FunctionDef.decorator_list, Load, 0) && +            (!stmt->v.FunctionDef.returns || +             validate_expr(stmt->v.FunctionDef.returns, Load)); +    case ClassDef_kind: +        return validate_body(stmt->v.ClassDef.body, "ClassDef") && +            validate_exprs(stmt->v.ClassDef.bases, Load, 0) && +            validate_keywords(stmt->v.ClassDef.keywords) && +            validate_exprs(stmt->v.ClassDef.decorator_list, Load, 0) && +            (!stmt->v.ClassDef.starargs || validate_expr(stmt->v.ClassDef.starargs, Load)) && +            (!stmt->v.ClassDef.kwargs || validate_expr(stmt->v.ClassDef.kwargs, Load)); +    case Return_kind: +        return !stmt->v.Return.value || validate_expr(stmt->v.Return.value, Load); +    case Delete_kind: +        return validate_assignlist(stmt->v.Delete.targets, Del); +    case Assign_kind: +        return validate_assignlist(stmt->v.Assign.targets, Store) && +            validate_expr(stmt->v.Assign.value, Load); +    case AugAssign_kind: +        return validate_expr(stmt->v.AugAssign.target, Store) && +            validate_expr(stmt->v.AugAssign.value, Load); +    case For_kind: +        return validate_expr(stmt->v.For.target, Store) && +            validate_expr(stmt->v.For.iter, Load) && +            validate_body(stmt->v.For.body, "For") && +            validate_stmts(stmt->v.For.orelse); +    case While_kind: +        return validate_expr(stmt->v.While.test, Load) && +            validate_body(stmt->v.While.body, "While") && +            validate_stmts(stmt->v.While.orelse); +    case If_kind: +        return validate_expr(stmt->v.If.test, Load) && +            validate_body(stmt->v.If.body, "If") && +            validate_stmts(stmt->v.If.orelse); +    case With_kind: +        if (!validate_nonempty_seq(stmt->v.With.items, "items", "With")) +            return 0; +        for (i = 0; i < asdl_seq_LEN(stmt->v.With.items); i++) { +            withitem_ty item = asdl_seq_GET(stmt->v.With.items, i); +            if (!validate_expr(item->context_expr, Load) || +                (item->optional_vars && !validate_expr(item->optional_vars, Store))) +                return 0; +        } +        return validate_body(stmt->v.With.body, "With"); +    case Raise_kind: +        if (stmt->v.Raise.exc) { +            return validate_expr(stmt->v.Raise.exc, Load) && +                (!stmt->v.Raise.cause || validate_expr(stmt->v.Raise.cause, Load)); +        } +        if (stmt->v.Raise.cause) { +            PyErr_SetString(PyExc_ValueError, "Raise with cause but no exception"); +            return 0; +        } +        return 1; +    case Try_kind: +        if (!validate_body(stmt->v.Try.body, "Try")) +            return 0; +        if (!asdl_seq_LEN(stmt->v.Try.handlers) && +            !asdl_seq_LEN(stmt->v.Try.finalbody)) { +            PyErr_SetString(PyExc_ValueError, "Try has neither except handlers nor finalbody"); +            return 0; +        } +        if (!asdl_seq_LEN(stmt->v.Try.handlers) && +            asdl_seq_LEN(stmt->v.Try.orelse)) { +            PyErr_SetString(PyExc_ValueError, "Try has orelse but no except handlers"); +            return 0; +        } +        for (i = 0; i < asdl_seq_LEN(stmt->v.Try.handlers); i++) { +            excepthandler_ty handler = asdl_seq_GET(stmt->v.Try.handlers, i); +            if ((handler->v.ExceptHandler.type && +                 !validate_expr(handler->v.ExceptHandler.type, Load)) || +                !validate_body(handler->v.ExceptHandler.body, "ExceptHandler")) +                return 0; +        } +        return (!asdl_seq_LEN(stmt->v.Try.finalbody) || +                validate_stmts(stmt->v.Try.finalbody)) && +            (!asdl_seq_LEN(stmt->v.Try.orelse) || +             validate_stmts(stmt->v.Try.orelse)); +    case Assert_kind: +        return validate_expr(stmt->v.Assert.test, Load) && +            (!stmt->v.Assert.msg || validate_expr(stmt->v.Assert.msg, Load)); +    case Import_kind: +        return validate_nonempty_seq(stmt->v.Import.names, "names", "Import"); +    case ImportFrom_kind: +        if (stmt->v.ImportFrom.level < -1) { +            PyErr_SetString(PyExc_ValueError, "ImportFrom level less than -1"); +            return 0; +        } +        return validate_nonempty_seq(stmt->v.ImportFrom.names, "names", "ImportFrom"); +    case Global_kind: +        return validate_nonempty_seq(stmt->v.Global.names, "names", "Global"); +    case Nonlocal_kind: +        return validate_nonempty_seq(stmt->v.Nonlocal.names, "names", "Nonlocal"); +    case Expr_kind: +        return validate_expr(stmt->v.Expr.value, Load); +    case Pass_kind: +    case Break_kind: +    case Continue_kind: +        return 1; +    default: +        PyErr_SetString(PyExc_SystemError, "unexpected statement"); +        return 0; +    } +} + +static int +validate_stmts(asdl_seq *seq) +{ +    int i; +    for (i = 0; i < asdl_seq_LEN(seq); i++) { +        stmt_ty stmt = asdl_seq_GET(seq, i); +        if (stmt) { +            if (!validate_stmt(stmt)) +                return 0; +        } +        else { +            PyErr_SetString(PyExc_ValueError, +                            "None disallowed in statement list"); +            return 0; +        } +    } +    return 1; +} + +static int +validate_exprs(asdl_seq *exprs, expr_context_ty ctx, int null_ok) +{ +    int i; +    for (i = 0; i < asdl_seq_LEN(exprs); i++) { +        expr_ty expr = asdl_seq_GET(exprs, i); +        if (expr) { +            if (!validate_expr(expr, ctx)) +                return 0; +        } +        else if (!null_ok) { +            PyErr_SetString(PyExc_ValueError, +                            "None disallowed in expression list"); +            return 0; +        } +             +    } +    return 1; +} + +int +PyAST_Validate(mod_ty mod) +{ +    int res = 0; + +    switch (mod->kind) { +    case Module_kind: +        res = validate_stmts(mod->v.Module.body); +        break; +    case Interactive_kind: +        res = validate_stmts(mod->v.Interactive.body); +        break; +    case Expression_kind: +        res = validate_expr(mod->v.Expression.body, Load); +        break; +    case Suite_kind: +        PyErr_SetString(PyExc_ValueError, "Suite is not valid in the CPython compiler"); +        break; +    default: +        PyErr_SetString(PyExc_SystemError, "impossible module node"); +        res = 0; +        break; +    } +    return res; +} + +/* This is done here, so defines like "test" don't interfere with AST use above. */ +#include "grammar.h" +#include "parsetok.h" +#include "graminit.h" +  /* Data structure used internally */  struct compiling {      char *c_encoding; /* source encoding */      PyArena *c_arena; /* arena for allocating memeory */      const char *c_filename; /* filename */ +    PyObject *c_normalize; /* Normalization function from unicodedata. */ +    PyObject *c_normalize_args; /* Normalization argument tuple. */  };  static asdl_seq *seq_for_testlist(struct compiling *, const node *); @@ -38,127 +521,96 @@ static PyObject *parsestr(struct compiling *, const node *n, int *bytesmode);  static PyObject *parsestrplus(struct compiling *, const node *n,                                int *bytesmode); -#ifndef LINENO -#define LINENO(n)       ((n)->n_lineno) -#endif -  #define COMP_GENEXP   0  #define COMP_LISTCOMP 1  #define COMP_SETCOMP  2 +static int +init_normalization(struct compiling *c) +{ +    PyObject *m = PyImport_ImportModuleNoBlock("unicodedata"); +    if (!m) +        return 0; +    c->c_normalize = PyObject_GetAttrString(m, "normalize"); +    Py_DECREF(m); +    if (!c->c_normalize) +        return 0; +    c->c_normalize_args = Py_BuildValue("(sN)", "NFKC", Py_None); +    PyTuple_SET_ITEM(c->c_normalize_args, 1, NULL); +    if (!c->c_normalize_args) { +        Py_CLEAR(c->c_normalize); +        return 0; +    } +    return 1; +} +  static identifier -new_identifier(const char *n, PyArena *arena) +new_identifier(const char *n, struct compiling *c)  {      PyObject *id = PyUnicode_DecodeUTF8(n, strlen(n), NULL); -    Py_UNICODE *u;      if (!id)          return NULL; -    u = PyUnicode_AS_UNICODE(id); +    /* PyUnicode_DecodeUTF8 should always return a ready string. */ +    assert(PyUnicode_IS_READY(id));      /* Check whether there are non-ASCII characters in the         identifier; if so, normalize to NFKC. */ -    for (; *u; u++) { -        if (*u >= 128) { -            PyObject *m = PyImport_ImportModuleNoBlock("unicodedata"); -            PyObject *id2; -            if (!m) -                return NULL; -            id2 = PyObject_CallMethod(m, "normalize", "sO", "NFKC", id); -            Py_DECREF(m); -            if (!id2) -                return NULL; +    if (!PyUnicode_IS_ASCII(id)) { +        PyObject *id2; +        if (!c->c_normalize && !init_normalization(c)) {              Py_DECREF(id); -            id = id2; -            break; +            return NULL;          } +        PyTuple_SET_ITEM(c->c_normalize_args, 1, id); +        id2 = PyObject_Call(c->c_normalize, c->c_normalize_args, NULL); +        Py_DECREF(id); +        if (!id2) +            return NULL; +        id = id2;      }      PyUnicode_InternInPlace(&id); -    PyArena_AddPyObject(arena, id); +    PyArena_AddPyObject(c->c_arena, id);      return id;  } -#define NEW_IDENTIFIER(n) new_identifier(STR(n), c->c_arena) - -/* This routine provides an invalid object for the syntax error. -   The outermost routine must unpack this error and create the -   proper object.  We do this so that we don't have to pass -   the filename to everything function. - -   XXX Maybe we should just pass the filename... -*/ +#define NEW_IDENTIFIER(n) new_identifier(STR(n), c)  static int -ast_error(const node *n, const char *errstr) +ast_error(struct compiling *c, const node *n, const char *errmsg)  { -    PyObject *u = Py_BuildValue("zii", errstr, LINENO(n), n->n_col_offset), *save; -    if (!u) -        return 0; -    /* -     * Prevent the error from being chained. PyErr_SetObject will normalize the -     * exception in order to chain it. ast_error_finish, however, requires the -     * error not to be normalized. -     */ -    save = PyThreadState_GET()->exc_value; -    PyThreadState_GET()->exc_value = NULL; -    PyErr_SetObject(PyExc_SyntaxError, u); -    PyThreadState_GET()->exc_value = save; -    Py_DECREF(u); -    return 0; -} - -static void -ast_error_finish(const char *filename) -{ -    PyObject *type, *value, *tback, *errstr, *offset, *loc, *tmp; +    PyObject *value, *errstr, *loc, *tmp;      PyObject *filename_obj; -    long lineno; - -    assert(PyErr_Occurred()); -    if (!PyErr_ExceptionMatches(PyExc_SyntaxError)) -        return; - -    PyErr_Fetch(&type, &value, &tback); -    errstr = PyTuple_GetItem(value, 0); -    if (!errstr) -        return; -    Py_INCREF(errstr); -    lineno = PyLong_AsLong(PyTuple_GetItem(value, 1)); -    if (lineno == -1) { -        Py_DECREF(errstr); -        return; -    } -    offset = PyTuple_GetItem(value, 2); -    if (!offset) { -        Py_DECREF(errstr); -        return; -    } -    Py_DECREF(value); - -    loc = PyErr_ProgramText(filename, lineno); + +    loc = PyErr_ProgramText(c->c_filename, LINENO(n));      if (!loc) {          Py_INCREF(Py_None);          loc = Py_None;      } -    if (filename != NULL) -        filename_obj = PyUnicode_DecodeFSDefault(filename); -    else { +    if (c->c_filename) { +        filename_obj = PyUnicode_DecodeFSDefault(c->c_filename); +        if (!filename_obj) { +            Py_DECREF(loc); +            return 0; +        } +    } else {          Py_INCREF(Py_None);          filename_obj = Py_None;      } -    if (filename_obj != NULL) -        tmp = Py_BuildValue("(NlOO)", filename_obj, lineno, offset, loc); -    else -        tmp = NULL; -    Py_DECREF(loc); -    if (!tmp) { -        Py_DECREF(errstr); -        return; +    tmp = Py_BuildValue("(NiiN)", filename_obj, LINENO(n), n->n_col_offset, loc); +    if (!tmp) +        return 0; +    errstr = PyUnicode_FromString(errmsg); +    if (!errstr) { +        Py_DECREF(tmp); +        return 0;      }      value = PyTuple_Pack(2, errstr, tmp);      Py_DECREF(errstr);      Py_DECREF(tmp); -    if (!value) -        return; -    PyErr_Restore(type, value, tback); +    if (value) { +        PyErr_SetObject(PyExc_SyntaxError, value); +        Py_DECREF(value); +    } +    return 0;  }  /* num_stmts() returns number of contained statements. @@ -234,13 +686,17 @@ PyAST_FromNode(const node *n, PyCompilerFlags *flags, const char *filename,      stmt_ty s;      node *ch;      struct compiling c; +    mod_ty res = NULL; +    c.c_arena = arena; +    c.c_filename = filename; +    c.c_normalize = c.c_normalize_args = NULL;      if (flags && flags->cf_flags & PyCF_SOURCE_IS_UTF8) {          c.c_encoding = "utf-8";          if (TYPE(n) == encoding_decl) {  #if 0 -            ast_error(n, "encoding declaration in Unicode string"); -            goto error; +            ast_error(c, n, "encoding declaration in Unicode string"); +            goto out;  #endif              n = CHILD(n, 0);          } @@ -251,15 +707,13 @@ PyAST_FromNode(const node *n, PyCompilerFlags *flags, const char *filename,          /* PEP 3120 */          c.c_encoding = "utf-8";      } -    c.c_arena = arena; -    c.c_filename = filename;      k = 0;      switch (TYPE(n)) {          case file_input:              stmts = asdl_seq_new(num_stmts(n), arena);              if (!stmts) -                return NULL; +                goto out;              for (i = 0; i < NCH(n) - 1; i++) {                  ch = CHILD(n, i);                  if (TYPE(ch) == NEWLINE) @@ -269,7 +723,7 @@ PyAST_FromNode(const node *n, PyCompilerFlags *flags, const char *filename,                  if (num == 1) {                      s = ast_for_stmt(&c, ch);                      if (!s) -                        goto error; +                        goto out;                      asdl_seq_SET(stmts, k++, s);                  }                  else { @@ -278,42 +732,44 @@ PyAST_FromNode(const node *n, PyCompilerFlags *flags, const char *filename,                      for (j = 0; j < num; j++) {                          s = ast_for_stmt(&c, CHILD(ch, j * 2));                          if (!s) -                            goto error; +                            goto out;                          asdl_seq_SET(stmts, k++, s);                      }                  }              } -            return Module(stmts, arena); +            res = Module(stmts, arena); +            break;          case eval_input: {              expr_ty testlist_ast;              /* XXX Why not comp_for here? */              testlist_ast = ast_for_testlist(&c, CHILD(n, 0));              if (!testlist_ast) -                goto error; -            return Expression(testlist_ast, arena); +                goto out; +            res = Expression(testlist_ast, arena); +            break;          }          case single_input:              if (TYPE(CHILD(n, 0)) == NEWLINE) {                  stmts = asdl_seq_new(1, arena);                  if (!stmts) -                    goto error; +                    goto out;                  asdl_seq_SET(stmts, 0, Pass(n->n_lineno, n->n_col_offset,                                              arena));                  if (!asdl_seq_GET(stmts, 0)) -                    goto error; -                return Interactive(stmts, arena); +                    goto out; +                res = Interactive(stmts, arena);              }              else {                  n = CHILD(n, 0);                  num = num_stmts(n);                  stmts = asdl_seq_new(num, arena);                  if (!stmts) -                    goto error; +                    goto out;                  if (num == 1) {                      s = ast_for_stmt(&c, n);                      if (!s) -                        goto error; +                        goto out;                      asdl_seq_SET(stmts, 0, s);                  }                  else { @@ -324,21 +780,26 @@ PyAST_FromNode(const node *n, PyCompilerFlags *flags, const char *filename,                              break;                          s = ast_for_stmt(&c, CHILD(n, i));                          if (!s) -                            goto error; +                            goto out;                          asdl_seq_SET(stmts, i / 2, s);                      }                  } -                return Interactive(stmts, arena); +                res = Interactive(stmts, arena);              } +            break;          default:              PyErr_Format(PyExc_SystemError,                           "invalid node %d for PyAST_FromNode", TYPE(n)); -            goto error; +            goto out;      } - error: -    ast_error_finish(filename); -    return NULL; + out: +    if (c.c_normalize) { +        Py_DECREF(c.c_normalize); +        PyTuple_SET_ITEM(c.c_normalize_args, 1, NULL); +        Py_DECREF(c.c_normalize_args); +    } +    return res;  }  /* Return the AST repr. of the operator represented as syntax (|, ^, etc.) @@ -383,18 +844,18 @@ static const char* FORBIDDEN[] = {  };  static int -forbidden_name(identifier name, const node *n, int full_checks) +forbidden_name(struct compiling *c, identifier name, const node *n, int full_checks)  {      assert(PyUnicode_Check(name));      if (PyUnicode_CompareWithASCIIString(name, "__debug__") == 0) { -        ast_error(n, "assignment to keyword"); +        ast_error(c, n, "assignment to keyword");          return 1;      }      if (full_checks) {          const char **p;          for (p = FORBIDDEN; *p; p++) {              if (PyUnicode_CompareWithASCIIString(name, *p) == 0) { -                ast_error(n, "assignment to keyword"); +                ast_error(c, n, "assignment to keyword");                  return 1;              }          } @@ -430,7 +891,7 @@ set_context(struct compiling *c, expr_ty e, expr_context_ty ctx, const node *n)      switch (e->kind) {          case Attribute_kind:              e->v.Attribute.ctx = ctx; -            if (ctx == Store && forbidden_name(e->v.Attribute.attr, n, 1)) +            if (ctx == Store && forbidden_name(c, e->v.Attribute.attr, n, 1))                  return 0;              break;          case Subscript_kind: @@ -443,7 +904,7 @@ set_context(struct compiling *c, expr_ty e, expr_context_ty ctx, const node *n)              break;          case Name_kind:              if (ctx == Store) { -                if (forbidden_name(e->v.Name.id, n, 1)) +                if (forbidden_name(c, e->v.Name.id, n, 1))                      return 0; /* forbidden_name() calls ast_error() */              }              e->v.Name.ctx = ctx; @@ -476,6 +937,7 @@ set_context(struct compiling *c, expr_ty e, expr_context_ty ctx, const node *n)              expr_name = "generator expression";              break;          case Yield_kind: +        case YieldFrom_kind:              expr_name = "yield expression";              break;          case ListComp_kind: @@ -516,7 +978,7 @@ set_context(struct compiling *c, expr_ty e, expr_context_ty ctx, const node *n)                        "can't %s %s",                        ctx == Store ? "assign to" : "delete",                        expr_name); -        return ast_error(n, buf); +        return ast_error(c, n, buf);      }      /* If the LHS is a list or tuple, we need to set the assignment @@ -664,7 +1126,7 @@ ast_for_arg(struct compiling *c, const node *n)      name = NEW_IDENTIFIER(ch);      if (!name)          return NULL; -    if (forbidden_name(name, ch, 0)) +    if (forbidden_name(c, name, ch, 0))          return NULL;      if (NCH(n) == 3 && TYPE(CHILD(n, 1)) == COLON) { @@ -694,7 +1156,7 @@ handle_keywordonly_args(struct compiling *c, const node *n, int start,      int j = 0; /* index for kwdefaults and kwonlyargs */      if (kwonlyargs == NULL) { -        ast_error(CHILD(n, start), "named arguments must follow bare *"); +        ast_error(c, CHILD(n, start), "named arguments must follow bare *");          return -1;      }      assert(kwdefaults != NULL); @@ -726,7 +1188,7 @@ handle_keywordonly_args(struct compiling *c, const node *n, int start,                  argname = NEW_IDENTIFIER(ch);                  if (!argname)                      goto error; -                if (forbidden_name(argname, ch, 0)) +                if (forbidden_name(c, argname, ch, 0))                      goto error;                  arg = arg(argname, annotation, c->c_arena);                  if (!arg) @@ -737,7 +1199,7 @@ handle_keywordonly_args(struct compiling *c, const node *n, int start,              case DOUBLESTAR:                  return i;              default: -                ast_error(ch, "unexpected node"); +                ast_error(c, ch, "unexpected node");                  goto error;          }      } @@ -829,7 +1291,7 @@ ast_for_arguments(struct compiling *c, const node *n)          return NULL;      if (nposargs + nkwonlyargs > 255) { -        ast_error(n, "more than 255 arguments"); +        ast_error(c, n, "more than 255 arguments");          return NULL;      } @@ -857,7 +1319,7 @@ ast_for_arguments(struct compiling *c, const node *n)                      found_default = 1;                  }                  else if (found_default) { -                    ast_error(n, +                    ast_error(c, n,                               "non-default argument follows default argument");                      return NULL;                  } @@ -869,7 +1331,7 @@ ast_for_arguments(struct compiling *c, const node *n)                  break;              case STAR:                  if (i+1 >= NCH(n)) { -                    ast_error(CHILD(n, i), +                    ast_error(c, CHILD(n, i),                          "named arguments must follow bare *");                      return NULL;                  } @@ -886,7 +1348,7 @@ ast_for_arguments(struct compiling *c, const node *n)                      vararg = NEW_IDENTIFIER(CHILD(ch, 0));                      if (!vararg)                          return NULL; -                    if (forbidden_name(vararg, CHILD(ch, 0), 0)) +                    if (forbidden_name(c, vararg, CHILD(ch, 0), 0))                          return NULL;                      if (NCH(ch) > 1) {                          /* there is an annotation on the vararg */ @@ -917,7 +1379,7 @@ ast_for_arguments(struct compiling *c, const node *n)                      if (!kwargannotation)                          return NULL;                  } -                if (forbidden_name(kwarg, CHILD(ch, 0), 0)) +                if (forbidden_name(c, kwarg, CHILD(ch, 0), 0))                      return NULL;                  i += 3;                  break; @@ -1036,7 +1498,7 @@ ast_for_funcdef(struct compiling *c, const node *n, asdl_seq *decorator_seq)      name = NEW_IDENTIFIER(CHILD(n, name_i));      if (!name)          return NULL; -    if (forbidden_name(name, CHILD(n, name_i), 0)) +    if (forbidden_name(c, name, CHILD(n, name_i), 0))          return NULL;      args = ast_for_arguments(c, CHILD(n, name_i + 1));      if (!args) @@ -1377,10 +1839,10 @@ ast_for_atom(struct compiling *c, const node *n)                      char buf[128];                      s = _PyUnicode_AsString(errstr);                      PyOS_snprintf(buf, sizeof(buf), "(unicode error) %s", s); -                    ast_error(n, buf); +                    ast_error(c, n, buf);                      Py_DECREF(errstr);                  } else { -                    ast_error(n, "(unicode error) unknown error"); +                    ast_error(c, n, "(unicode error) unknown error");                  }                  Py_DECREF(type);                  Py_DECREF(value); @@ -1897,12 +2359,25 @@ ast_for_expr(struct compiling *c, const node *n)              }              return ast_for_binop(c, n);          case yield_expr: { +            node *an = NULL; +            node *en = NULL; +            int is_from = 0;              expr_ty exp = NULL; -            if (NCH(n) == 2) { -                exp = ast_for_testlist(c, CHILD(n, 1)); +            if (NCH(n) > 1) +                an = CHILD(n, 1); /* yield_arg */ +            if (an) { +                en = CHILD(an, NCH(an) - 1); +                if (NCH(an) == 2) { +                    is_from = 1; +                    exp = ast_for_expr(c, en); +                } +                else +                    exp = ast_for_testlist(c, en);                  if (!exp)                      return NULL;              } +            if (is_from) +                return YieldFrom(exp, LINENO(n), n->n_col_offset, c->c_arena);              return Yield(exp, LINENO(n), n->n_col_offset, c->c_arena);          }          case factor: @@ -1927,7 +2402,7 @@ ast_for_call(struct compiling *c, const node *n, expr_ty func)      /*        arglist: (argument ',')* (argument [',']| '*' test [',' '**' test]                 | '**' test) -      argument: [test '='] test [comp_for]        # Really [keyword '='] test +      argument: [test '='] (test) [comp_for]        # Really [keyword '='] test      */      int i, nargs, nkeywords, ngens; @@ -1952,14 +2427,14 @@ ast_for_call(struct compiling *c, const node *n, expr_ty func)          }      }      if (ngens > 1 || (ngens && (nargs || nkeywords))) { -        ast_error(n, "Generator expression must be parenthesized " +        ast_error(c, n, "Generator expression must be parenthesized "                    "if not sole argument");          return NULL;      }      if (nargs + nkeywords + ngens > 255) { -      ast_error(n, "more than 255 arguments"); -      return NULL; +        ast_error(c, n, "more than 255 arguments"); +        return NULL;      }      args = asdl_seq_new(nargs + ngens, c->c_arena); @@ -1976,12 +2451,12 @@ ast_for_call(struct compiling *c, const node *n, expr_ty func)              expr_ty e;              if (NCH(ch) == 1) {                  if (nkeywords) { -                    ast_error(CHILD(ch, 0), +                    ast_error(c, CHILD(ch, 0),                                "non-keyword arg after keyword arg");                      return NULL;                  }                  if (vararg) { -                    ast_error(CHILD(ch, 0), +                    ast_error(c, CHILD(ch, 0),                                "only named arguments may follow *expression");                      return NULL;                  } @@ -2011,19 +2486,19 @@ ast_for_call(struct compiling *c, const node *n, expr_ty func)                   * then is very confusing.                   */                  if (e->kind == Lambda_kind) { -                  ast_error(CHILD(ch, 0), "lambda cannot contain assignment"); -                  return NULL; +                    ast_error(c, CHILD(ch, 0), "lambda cannot contain assignment"); +                    return NULL;                  } else if (e->kind != Name_kind) { -                  ast_error(CHILD(ch, 0), "keyword can't be an expression"); -                  return NULL; -                } else if (forbidden_name(e->v.Name.id, ch, 1)) { -                  return NULL; +                    ast_error(c, CHILD(ch, 0), "keyword can't be an expression"); +                    return NULL; +                } else if (forbidden_name(c, e->v.Name.id, ch, 1)) { +                    return NULL;                  }                  key = e->v.Name.id;                  for (k = 0; k < nkeywords; k++) {                      tmp = ((keyword_ty)asdl_seq_GET(keywords, k))->arg;                      if (!PyUnicode_Compare(tmp, key)) { -                        ast_error(CHILD(ch, 0), "keyword argument repeated"); +                        ast_error(c, CHILD(ch, 0), "keyword argument repeated");                          return NULL;                      }                  } @@ -2116,7 +2591,7 @@ ast_for_expr_stmt(struct compiling *c, const node *n)              case Subscript_kind:                  break;              default: -                ast_error(ch, "illegal expression for augmented assignment"); +                ast_error(c, ch, "illegal expression for augmented assignment");                  return NULL;          } @@ -2149,7 +2624,7 @@ ast_for_expr_stmt(struct compiling *c, const node *n)              expr_ty e;              node *ch = CHILD(n, i);              if (TYPE(ch) == yield_expr) { -                ast_error(ch, "assignment to yield expression not possible"); +                ast_error(c, ch, "assignment to yield expression not possible");                  return NULL;              }              e = ast_for_testlist(c, ch); @@ -2221,7 +2696,7 @@ ast_for_flow_stmt(struct compiling *c, const node *n)        continue_stmt: 'continue'        return_stmt: 'return' [testlist]        yield_stmt: yield_expr -      yield_expr: 'yield' testlist +      yield_expr: 'yield' testlist | 'yield' 'from' test        raise_stmt: 'raise' [test [',' test [',' test]]]      */      node *ch; @@ -2296,11 +2771,11 @@ alias_for_import_name(struct compiling *c, const node *n, int store)                  str = NEW_IDENTIFIER(str_node);                  if (!str)                      return NULL; -                if (store && forbidden_name(str, str_node, 0)) +                if (store && forbidden_name(c, str, str_node, 0))                      return NULL;              }              else { -                if (forbidden_name(name, name_node, 0)) +                if (forbidden_name(c, name, name_node, 0))                      return NULL;              }              return alias(name, str, c->c_arena); @@ -2319,7 +2794,7 @@ alias_for_import_name(struct compiling *c, const node *n, int store)                  a->asname = NEW_IDENTIFIER(asname_node);                  if (!a->asname)                      return NULL; -                if (forbidden_name(a->asname, asname_node, 0)) +                if (forbidden_name(c, a->asname, asname_node, 0))                      return NULL;                  return a;              } @@ -2330,7 +2805,7 @@ alias_for_import_name(struct compiling *c, const node *n, int store)                  name = NEW_IDENTIFIER(name_node);                  if (!name)                      return NULL; -                if (store && forbidden_name(name, name_node, 0)) +                if (store && forbidden_name(c, name, name_node, 0))                      return NULL;                  return alias(name, NULL, c->c_arena);              } @@ -2459,13 +2934,13 @@ ast_for_import_stmt(struct compiling *c, const node *n)              n = CHILD(n, idx);              n_children = NCH(n);              if (n_children % 2 == 0) { -                ast_error(n, "trailing comma not allowed without" +                ast_error(c, n, "trailing comma not allowed without"                               " surrounding parentheses");                  return NULL;              }              break;          default: -            ast_error(n, "Unexpected node-type in from-import"); +            ast_error(c, n, "Unexpected node-type in from-import");              return NULL;          } @@ -2871,7 +3346,7 @@ ast_for_except_clause(struct compiling *c, const node *exc, node *body)          identifier e = NEW_IDENTIFIER(CHILD(exc, 3));          if (!e)              return NULL; -        if (forbidden_name(e, CHILD(exc, 3), 0)) +        if (forbidden_name(c, e, CHILD(exc, 3), 0))              return NULL;          expression = ast_for_expr(c, CHILD(exc, 1));          if (!expression) @@ -2895,7 +3370,7 @@ ast_for_try_stmt(struct compiling *c, const node *n)  {      const int nch = NCH(n);      int n_except = (nch - 3)/3; -    asdl_seq *body, *orelse = NULL, *finally = NULL; +    asdl_seq *body, *handlers = NULL, *orelse = NULL, *finally = NULL;      REQ(n, try_stmt); @@ -2930,15 +3405,14 @@ ast_for_try_stmt(struct compiling *c, const node *n)          }      }      else if (TYPE(CHILD(n, nch - 3)) != except_clause) { -        ast_error(n, "malformed 'try' statement"); +        ast_error(c, n, "malformed 'try' statement");          return NULL;      }      if (n_except > 0) {          int i; -        stmt_ty except_st;          /* process except statements to create a try ... except */ -        asdl_seq *handlers = asdl_seq_new(n_except, c->c_arena); +        handlers = asdl_seq_new(n_except, c->c_arena);          if (handlers == NULL)              return NULL; @@ -2949,28 +3423,15 @@ ast_for_try_stmt(struct compiling *c, const node *n)                  return NULL;              asdl_seq_SET(handlers, i, e);          } - -        except_st = TryExcept(body, handlers, orelse, LINENO(n), -                              n->n_col_offset, c->c_arena); -        if (!finally) -            return except_st; - -        /* if a 'finally' is present too, we nest the TryExcept within a -           TryFinally to emulate try ... except ... finally */ -        body = asdl_seq_new(1, c->c_arena); -        if (body == NULL) -            return NULL; -        asdl_seq_SET(body, 0, except_st);      } -    /* must be a try ... finally (except clauses are in body, if any exist) */ -    assert(finally != NULL); -    return TryFinally(body, finally, LINENO(n), n->n_col_offset, c->c_arena); +    assert(finally != NULL || asdl_seq_LEN(handlers)); +    return Try(body, handlers, orelse, finally, LINENO(n), n->n_col_offset, c->c_arena);  }  /* with_item: test ['as' expr] */ -static stmt_ty -ast_for_with_item(struct compiling *c, const node *n, asdl_seq *content) +static withitem_ty +ast_for_with_item(struct compiling *c, const node *n)  {      expr_ty context_expr, optional_vars = NULL; @@ -2989,43 +3450,34 @@ ast_for_with_item(struct compiling *c, const node *n, asdl_seq *content)          }      } -    return With(context_expr, optional_vars, content, LINENO(n), -                n->n_col_offset, c->c_arena); +    return withitem(context_expr, optional_vars, c->c_arena);  }  /* with_stmt: 'with' with_item (',' with_item)* ':' suite */  static stmt_ty  ast_for_with_stmt(struct compiling *c, const node *n)  { -    int i; -    stmt_ty ret; -    asdl_seq *inner; +    int i, n_items; +    asdl_seq *items, *body;      REQ(n, with_stmt); -    /* process the with items inside-out */ -    i = NCH(n) - 1; -    /* the suite of the innermost with item is the suite of the with stmt */ -    inner = ast_for_suite(c, CHILD(n, i)); -    if (!inner) +    n_items = (NCH(n) - 2) / 2; +    items = asdl_seq_new(n_items, c->c_arena); +    if (!items)          return NULL; - -    for (;;) { -        i -= 2; -        ret = ast_for_with_item(c, CHILD(n, i), inner); -        if (!ret) -            return NULL; -        /* was this the last item? */ -        if (i == 1) -            break; -        /* if not, wrap the result so far in a new sequence */ -        inner = asdl_seq_new(1, c->c_arena); -        if (!inner) +    for (i = 1; i < NCH(n) - 2; i += 2) { +        withitem_ty item = ast_for_with_item(c, CHILD(n, i)); +        if (!item)              return NULL; -        asdl_seq_SET(inner, 0, ret); +        asdl_seq_SET(items, (i - 1) / 2, item);      } -    return ret; +    body = ast_for_suite(c, CHILD(n, NCH(n) - 1)); +    if (!body) +        return NULL; + +    return With(items, body, LINENO(n), n->n_col_offset, c->c_arena);  }  static stmt_ty @@ -3045,7 +3497,7 @@ ast_for_classdef(struct compiling *c, const node *n, asdl_seq *decorator_seq)          classname = NEW_IDENTIFIER(CHILD(n, 1));          if (!classname)              return NULL; -        if (forbidden_name(classname, CHILD(n, 3), 0)) +        if (forbidden_name(c, classname, CHILD(n, 3), 0))              return NULL;          return ClassDef(classname, NULL, NULL, NULL, NULL, s, decorator_seq,                          LINENO(n), n->n_col_offset, c->c_arena); @@ -3058,7 +3510,7 @@ ast_for_classdef(struct compiling *c, const node *n, asdl_seq *decorator_seq)          classname = NEW_IDENTIFIER(CHILD(n, 1));          if (!classname)              return NULL; -        if (forbidden_name(classname, CHILD(n, 3), 0)) +        if (forbidden_name(c, classname, CHILD(n, 3), 0))              return NULL;          return ClassDef(classname, NULL, NULL, NULL, NULL, s, decorator_seq,                          LINENO(n), n->n_col_offset, c->c_arena); @@ -3083,7 +3535,7 @@ ast_for_classdef(struct compiling *c, const node *n, asdl_seq *decorator_seq)      classname = NEW_IDENTIFIER(CHILD(n, 1));      if (!classname)          return NULL; -    if (forbidden_name(classname, CHILD(n, 1), 0)) +    if (forbidden_name(c, classname, CHILD(n, 1), 0))          return NULL;      return ClassDef(classname, call->v.Call.args, call->v.Call.keywords, @@ -3209,20 +3661,14 @@ parsenumber(struct compiling *c, const char *s)  }  static PyObject * -decode_utf8(struct compiling *c, const char **sPtr, const char *end, char* encoding) +decode_utf8(struct compiling *c, const char **sPtr, const char *end)  { -    PyObject *u, *v;      char *s, *t;      t = s = (char *)*sPtr;      /* while (s < end && *s != '\\') s++; */ /* inefficient for u".." */      while (s < end && (*s & 0x80)) s++;      *sPtr = s; -    u = PyUnicode_DecodeUTF8(t, s - t, NULL); -    if (u == NULL) -        return NULL; -    v = PyUnicode_AsEncodedString(u, encoding, NULL); -    Py_DECREF(u); -    return v; +    return PyUnicode_DecodeUTF8(t, s - t, NULL);  }  static PyObject * @@ -3234,7 +3680,6 @@ decode_unicode(struct compiling *c, const char *s, size_t len, int rawmode, cons      const char *end;      if (encoding == NULL) { -        buf = (char *)s;          u = NULL;      } else {          /* check for integer overflow */ @@ -3257,22 +3702,20 @@ decode_unicode(struct compiling *c, const char *s, size_t len, int rawmode, cons              }              if (*s & 0x80) { /* XXX inefficient */                  PyObject *w; -                char *r; -                Py_ssize_t rn, i; -                w = decode_utf8(c, &s, end, "utf-32-be"); +                int kind; +                void *data; +                Py_ssize_t len, i; +                w = decode_utf8(c, &s, end);                  if (w == NULL) {                      Py_DECREF(u);                      return NULL;                  } -                r = PyBytes_AS_STRING(w); -                rn = Py_SIZE(w); -                assert(rn % 4 == 0); -                for (i = 0; i < rn; i += 4) { -                    sprintf(p, "\\U%02x%02x%02x%02x", -                            r[i + 0] & 0xFF, -                            r[i + 1] & 0xFF, -                            r[i + 2] & 0xFF, -                            r[i + 3] & 0xFF); +                kind = PyUnicode_KIND(w); +                data = PyUnicode_DATA(w); +                len = PyUnicode_GET_LENGTH(w); +                for (i = 0; i < len; i++) { +                    Py_UCS4 chr = PyUnicode_READ(kind, data, i); +                    sprintf(p, "\\U%08x", chr);                      p += 10;                  }                  /* Should be impossible to overflow */ @@ -3306,13 +3749,21 @@ parsestr(struct compiling *c, const node *n, int *bytesmode)      int rawmode = 0;      int need_encoding;      if (isalpha(quote)) { -        if (quote == 'b' || quote == 'B') { -            quote = *++s; -            *bytesmode = 1; -        } -        if (quote == 'r' || quote == 'R') { -            quote = *++s; -            rawmode = 1; +        while (!*bytesmode || !rawmode) { +            if (quote == 'b' || quote == 'B') { +                quote = *++s; +                *bytesmode = 1; +            } +            else if (quote == 'u' || quote == 'U') { +                quote = *++s; +            } +            else if (quote == 'r' || quote == 'R') { +                quote = *++s; +                rawmode = 1; +            } +            else { +                break; +            }          }      }      if (quote != '\'' && quote != '\"') { @@ -3343,10 +3794,10 @@ parsestr(struct compiling *c, const node *n, int *bytesmode)      }      if (*bytesmode) {          /* Disallow non-ascii characters (but not escapes) */ -        const char *c; -        for (c = s; *c; c++) { -            if (Py_CHARMASK(*c) >= 0x80) { -                ast_error(n, "bytes can only contain ASCII " +        const char *ch; +        for (ch = s; *ch; ch++) { +            if (Py_CHARMASK(*ch) >= 0x80) { +                ast_error(c, n, "bytes can only contain ASCII "                            "literal characters.");                  return NULL;              } @@ -3394,7 +3845,7 @@ parsestrplus(struct compiling *c, const node *n, int *bytesmode)              if (s == NULL)                  goto onError;              if (*bytesmode != subbm) { -                ast_error(n, "cannot mix bytes and nonbytes literals"); +                ast_error(c, n, "cannot mix bytes and nonbytes literals");                  Py_DECREF(s);                  goto onError;              } | 
