diff options
author | Lysandros Nikolaou <lisandrosnik@gmail.com> | 2020-06-27 23:41:48 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-06-27 23:41:48 (GMT) |
commit | 1f0f4abb110b9fbade6175842b6a26ab0b8df6dd (patch) | |
tree | f967a28bdffe1656593c04ae0634e7cc6b3bf566 /Parser/string_parser.c | |
parent | 89e82c4a6285c89c054980591c078245a5cc6337 (diff) | |
download | cpython-1f0f4abb110b9fbade6175842b6a26ab0b8df6dd.zip cpython-1f0f4abb110b9fbade6175842b6a26ab0b8df6dd.tar.gz cpython-1f0f4abb110b9fbade6175842b6a26ab0b8df6dd.tar.bz2 |
bpo-41076: Pre-feed the parser with the f-string expression location (GH-21054)
This commit changes the parsing of f-string expressions with the new parser. The parser gets pre-fed with the location of the expression itself (not the f-string, which was what we were doing before). This allows us to completely skip the shifting of the AST nodes after the parsing is completed.
Diffstat (limited to 'Parser/string_parser.c')
-rw-r--r-- | Parser/string_parser.c | 264 |
1 files changed, 22 insertions, 242 deletions
diff --git a/Parser/string_parser.c b/Parser/string_parser.c index ed7ca7f..9f56ce2 100644 --- a/Parser/string_parser.c +++ b/Parser/string_parser.c @@ -271,235 +271,6 @@ _PyPegen_parsestr(Parser *p, int *bytesmode, int *rawmode, PyObject **result, // FSTRING STUFF -static void fstring_shift_expr_locations(expr_ty n, int lineno, int col_offset); -static void fstring_shift_argument(expr_ty parent, arg_ty args, int lineno, int col_offset); - - -static inline void shift_expr(expr_ty parent, expr_ty n, int line, int col) { - if (n == NULL) { - return; - } - if (parent->lineno < n->lineno) { - col = 0; - } - fstring_shift_expr_locations(n, line, col); -} - -static inline void shift_arg(expr_ty parent, arg_ty n, int line, int col) { - if (parent->lineno < n->lineno) { - col = 0; - } - fstring_shift_argument(parent, n, line, col); -} - -static void fstring_shift_seq_locations(expr_ty parent, asdl_seq *seq, int lineno, int col_offset) { - for (Py_ssize_t i = 0, l = asdl_seq_LEN(seq); i < l; i++) { - expr_ty expr = asdl_seq_GET(seq, i); - if (expr == NULL){ - continue; - } - shift_expr(parent, expr, lineno, col_offset); - } -} - -static void fstring_shift_slice_locations(expr_ty parent, expr_ty slice, int lineno, int col_offset) { - switch (slice->kind) { - case Slice_kind: - if (slice->v.Slice.lower) { - shift_expr(parent, slice->v.Slice.lower, lineno, col_offset); - } - if (slice->v.Slice.upper) { - shift_expr(parent, slice->v.Slice.upper, lineno, col_offset); - } - if (slice->v.Slice.step) { - shift_expr(parent, slice->v.Slice.step, lineno, col_offset); - } - break; - case Tuple_kind: - fstring_shift_seq_locations(parent, slice->v.Tuple.elts, lineno, col_offset); - break; - default: - break; - } -} - -static void fstring_shift_comprehension(expr_ty parent, comprehension_ty comp, int lineno, int col_offset) { - shift_expr(parent, comp->target, lineno, col_offset); - shift_expr(parent, comp->iter, lineno, col_offset); - fstring_shift_seq_locations(parent, comp->ifs, lineno, col_offset); -} - -static void fstring_shift_argument(expr_ty parent, arg_ty arg, int lineno, int col_offset) { - if (arg->annotation != NULL){ - shift_expr(parent, arg->annotation, lineno, col_offset); - } - arg->col_offset = arg->col_offset + col_offset; - arg->end_col_offset = arg->end_col_offset + col_offset; - arg->lineno = arg->lineno + lineno; - arg->end_lineno = arg->end_lineno + lineno; -} - -static void fstring_shift_arguments(expr_ty parent, arguments_ty args, int lineno, int col_offset) { - for (Py_ssize_t i = 0, l = asdl_seq_LEN(args->posonlyargs); i < l; i++) { - arg_ty arg = asdl_seq_GET(args->posonlyargs, i); - shift_arg(parent, arg, lineno, col_offset); - } - - for (Py_ssize_t i = 0, l = asdl_seq_LEN(args->args); i < l; i++) { - arg_ty arg = asdl_seq_GET(args->args, i); - shift_arg(parent, arg, lineno, col_offset); - } - - if (args->vararg != NULL) { - shift_arg(parent, args->vararg, lineno, col_offset); - } - - for (Py_ssize_t i = 0, l = asdl_seq_LEN(args->kwonlyargs); i < l; i++) { - arg_ty arg = asdl_seq_GET(args->kwonlyargs, i); - shift_arg(parent, arg, lineno, col_offset); - } - - fstring_shift_seq_locations(parent, args->kw_defaults, lineno, col_offset); - - if (args->kwarg != NULL) { - shift_arg(parent, args->kwarg, lineno, col_offset); - } - - fstring_shift_seq_locations(parent, args->defaults, lineno, col_offset); -} - -static void fstring_shift_children_locations(expr_ty node, int lineno, int col_offset) { - switch (node->kind) { - case BoolOp_kind: - fstring_shift_seq_locations(node, node->v.BoolOp.values, lineno, col_offset); - break; - case NamedExpr_kind: - shift_expr(node, node->v.NamedExpr.target, lineno, col_offset); - shift_expr(node, node->v.NamedExpr.value, lineno, col_offset); - break; - case BinOp_kind: - shift_expr(node, node->v.BinOp.left, lineno, col_offset); - shift_expr(node, node->v.BinOp.right, lineno, col_offset); - break; - case UnaryOp_kind: - shift_expr(node, node->v.UnaryOp.operand, lineno, col_offset); - break; - case Lambda_kind: - fstring_shift_arguments(node, node->v.Lambda.args, lineno, col_offset); - shift_expr(node, node->v.Lambda.body, lineno, col_offset); - break; - case IfExp_kind: - shift_expr(node, node->v.IfExp.test, lineno, col_offset); - shift_expr(node, node->v.IfExp.body, lineno, col_offset); - shift_expr(node, node->v.IfExp.orelse, lineno, col_offset); - break; - case Dict_kind: - fstring_shift_seq_locations(node, node->v.Dict.keys, lineno, col_offset); - fstring_shift_seq_locations(node, node->v.Dict.values, lineno, col_offset); - break; - case Set_kind: - fstring_shift_seq_locations(node, node->v.Set.elts, lineno, col_offset); - break; - case ListComp_kind: - shift_expr(node, node->v.ListComp.elt, lineno, col_offset); - for (Py_ssize_t i = 0, l = asdl_seq_LEN(node->v.ListComp.generators); i < l; i++) { - comprehension_ty comp = asdl_seq_GET(node->v.ListComp.generators, i); - fstring_shift_comprehension(node, comp, lineno, col_offset); - } - break; - case SetComp_kind: - shift_expr(node, node->v.SetComp.elt, lineno, col_offset); - for (Py_ssize_t i = 0, l = asdl_seq_LEN(node->v.SetComp.generators); i < l; i++) { - comprehension_ty comp = asdl_seq_GET(node->v.SetComp.generators, i); - fstring_shift_comprehension(node, comp, lineno, col_offset); - } - break; - case DictComp_kind: - shift_expr(node, node->v.DictComp.key, lineno, col_offset); - shift_expr(node, node->v.DictComp.value, lineno, col_offset); - for (Py_ssize_t i = 0, l = asdl_seq_LEN(node->v.DictComp.generators); i < l; i++) { - comprehension_ty comp = asdl_seq_GET(node->v.DictComp.generators, i); - fstring_shift_comprehension(node, comp, lineno, col_offset); - } - break; - case GeneratorExp_kind: - shift_expr(node, node->v.GeneratorExp.elt, lineno, col_offset); - for (Py_ssize_t i = 0, l = asdl_seq_LEN(node->v.GeneratorExp.generators); i < l; i++) { - comprehension_ty comp = asdl_seq_GET(node->v.GeneratorExp.generators, i); - fstring_shift_comprehension(node, comp, lineno, col_offset); - } - break; - case Await_kind: - shift_expr(node, node->v.Await.value, lineno, col_offset); - break; - case Yield_kind: - shift_expr(node, node->v.Yield.value, lineno, col_offset); - break; - case YieldFrom_kind: - shift_expr(node, node->v.YieldFrom.value, lineno, col_offset); - break; - case Compare_kind: - shift_expr(node, node->v.Compare.left, lineno, col_offset); - fstring_shift_seq_locations(node, node->v.Compare.comparators, lineno, col_offset); - break; - case Call_kind: - shift_expr(node, node->v.Call.func, lineno, col_offset); - fstring_shift_seq_locations(node, node->v.Call.args, lineno, col_offset); - for (Py_ssize_t i = 0, l = asdl_seq_LEN(node->v.Call.keywords); i < l; i++) { - keyword_ty keyword = asdl_seq_GET(node->v.Call.keywords, i); - shift_expr(node, keyword->value, lineno, col_offset); - } - break; - case Attribute_kind: - shift_expr(node, node->v.Attribute.value, lineno, col_offset); - break; - case Subscript_kind: - shift_expr(node, node->v.Subscript.value, lineno, col_offset); - fstring_shift_slice_locations(node, node->v.Subscript.slice, lineno, col_offset); - shift_expr(node, node->v.Subscript.slice, lineno, col_offset); - break; - case Starred_kind: - shift_expr(node, node->v.Starred.value, lineno, col_offset); - break; - case List_kind: - fstring_shift_seq_locations(node, node->v.List.elts, lineno, col_offset); - break; - case Tuple_kind: - fstring_shift_seq_locations(node, node->v.Tuple.elts, lineno, col_offset); - break; - case JoinedStr_kind: - fstring_shift_seq_locations(node, node->v.JoinedStr.values, lineno, col_offset); - break; - case FormattedValue_kind: - shift_expr(node, node->v.FormattedValue.value, lineno, col_offset); - if (node->v.FormattedValue.format_spec) { - shift_expr(node, node->v.FormattedValue.format_spec, lineno, col_offset); - } - break; - default: - return; - } -} - -/* Shift locations for the given node and all its children by adding `lineno` - and `col_offset` to existing locations. Note that n is the already parsed - expression. */ -static void fstring_shift_expr_locations(expr_ty n, int lineno, int col_offset) -{ - n->col_offset = n->col_offset + col_offset; - - // The following is needed, in order for nodes spanning across multiple lines - // to be shifted correctly. An example of such a node is a Call node, the closing - // parenthesis of which is not on the same line as its name. - if (n->lineno == n->end_lineno) { - n->end_col_offset = n->end_col_offset + col_offset; - } - - fstring_shift_children_locations(n, lineno, col_offset); - n->lineno = n->lineno + lineno; - n->end_lineno = n->end_lineno + lineno; -} - /* Fix locations for the given node and its children. `parent` is the enclosing node. @@ -507,7 +278,7 @@ static void fstring_shift_expr_locations(expr_ty n, int lineno, int col_offset) `expr_str` is the child node's string representation, including braces. */ static void -fstring_fix_expr_location(Token *parent, expr_ty n, char *expr_str) +fstring_find_expr_location(Token *parent, char *expr_str, int *p_lines, int *p_cols) { char *substr = NULL; char *start; @@ -552,7 +323,8 @@ fstring_fix_expr_location(Token *parent, expr_ty n, char *expr_str) } } } - fstring_shift_expr_locations(n, lines, cols); + *p_lines = lines; + *p_cols = cols; } @@ -598,11 +370,26 @@ fstring_compile_expr(Parser *p, const char *expr_start, const char *expr_end, return NULL; } - str[0] = '('; + // The call to fstring_find_expr_location is responsible for finding the column offset + // the generated AST nodes need to be shifted to the right, which is equal to the number + // of the f-string characters before the expression starts. In order to correctly compute + // this offset, strstr gets called in fstring_find_expr_location which only succeeds + // if curly braces appear before and after the f-string expression (exactly like they do + // in the f-string itself), hence the following lines. + str[0] = '{'; memcpy(str+1, expr_start, len); - str[len+1] = ')'; + str[len+1] = '}'; str[len+2] = 0; + int lines, cols; + fstring_find_expr_location(t, str, &lines, &cols); + + // The parentheses are needed in order to allow for leading whitespace withing + // the f-string expression. This consequently gets parsed as a group (see the + // group rule in python.gram). + str[0] = '('; + str[len+1] = ')'; + struct tok_state* tok = PyTokenizer_FromString(str, 1); if (tok == NULL) { PyMem_Free(str); @@ -613,21 +400,14 @@ fstring_compile_expr(Parser *p, const char *expr_start, const char *expr_end, Parser *p2 = _PyPegen_Parser_New(tok, Py_fstring_input, p->flags, p->feature_version, NULL, p->arena); - p2->starting_lineno = p->starting_lineno + p->tok->first_lineno - 1; - p2->starting_col_offset = p->tok->first_lineno == p->tok->lineno - ? p->starting_col_offset + t->col_offset : 0; + p2->starting_lineno = t->lineno + lines - 1; + p2->starting_col_offset = p->tok->first_lineno == p->tok->lineno ? t->col_offset + cols : cols; expr = _PyPegen_run_parser(p2); if (expr == NULL) { goto exit; } - - /* Reuse str to find the correct column offset. */ - str[0] = '{'; - str[len+1] = '}'; - fstring_fix_expr_location(t, expr, str); - result = expr; exit: |