summaryrefslogtreecommitdiffstats
path: root/Parser/string_parser.c
diff options
context:
space:
mode:
authorLysandros Nikolaou <lisandrosnik@gmail.com>2020-06-27 23:41:48 (GMT)
committerGitHub <noreply@github.com>2020-06-27 23:41:48 (GMT)
commit1f0f4abb110b9fbade6175842b6a26ab0b8df6dd (patch)
treef967a28bdffe1656593c04ae0634e7cc6b3bf566 /Parser/string_parser.c
parent89e82c4a6285c89c054980591c078245a5cc6337 (diff)
downloadcpython-1f0f4abb110b9fbade6175842b6a26ab0b8df6dd.zip
cpython-1f0f4abb110b9fbade6175842b6a26ab0b8df6dd.tar.gz
cpython-1f0f4abb110b9fbade6175842b6a26ab0b8df6dd.tar.bz2
bpo-41076: Pre-feed the parser with the f-string expression location (GH-21054)
This commit changes the parsing of f-string expressions with the new parser. The parser gets pre-fed with the location of the expression itself (not the f-string, which was what we were doing before). This allows us to completely skip the shifting of the AST nodes after the parsing is completed.
Diffstat (limited to 'Parser/string_parser.c')
-rw-r--r--Parser/string_parser.c264
1 files changed, 22 insertions, 242 deletions
diff --git a/Parser/string_parser.c b/Parser/string_parser.c
index ed7ca7f..9f56ce2 100644
--- a/Parser/string_parser.c
+++ b/Parser/string_parser.c
@@ -271,235 +271,6 @@ _PyPegen_parsestr(Parser *p, int *bytesmode, int *rawmode, PyObject **result,
// FSTRING STUFF
-static void fstring_shift_expr_locations(expr_ty n, int lineno, int col_offset);
-static void fstring_shift_argument(expr_ty parent, arg_ty args, int lineno, int col_offset);
-
-
-static inline void shift_expr(expr_ty parent, expr_ty n, int line, int col) {
- if (n == NULL) {
- return;
- }
- if (parent->lineno < n->lineno) {
- col = 0;
- }
- fstring_shift_expr_locations(n, line, col);
-}
-
-static inline void shift_arg(expr_ty parent, arg_ty n, int line, int col) {
- if (parent->lineno < n->lineno) {
- col = 0;
- }
- fstring_shift_argument(parent, n, line, col);
-}
-
-static void fstring_shift_seq_locations(expr_ty parent, asdl_seq *seq, int lineno, int col_offset) {
- for (Py_ssize_t i = 0, l = asdl_seq_LEN(seq); i < l; i++) {
- expr_ty expr = asdl_seq_GET(seq, i);
- if (expr == NULL){
- continue;
- }
- shift_expr(parent, expr, lineno, col_offset);
- }
-}
-
-static void fstring_shift_slice_locations(expr_ty parent, expr_ty slice, int lineno, int col_offset) {
- switch (slice->kind) {
- case Slice_kind:
- if (slice->v.Slice.lower) {
- shift_expr(parent, slice->v.Slice.lower, lineno, col_offset);
- }
- if (slice->v.Slice.upper) {
- shift_expr(parent, slice->v.Slice.upper, lineno, col_offset);
- }
- if (slice->v.Slice.step) {
- shift_expr(parent, slice->v.Slice.step, lineno, col_offset);
- }
- break;
- case Tuple_kind:
- fstring_shift_seq_locations(parent, slice->v.Tuple.elts, lineno, col_offset);
- break;
- default:
- break;
- }
-}
-
-static void fstring_shift_comprehension(expr_ty parent, comprehension_ty comp, int lineno, int col_offset) {
- shift_expr(parent, comp->target, lineno, col_offset);
- shift_expr(parent, comp->iter, lineno, col_offset);
- fstring_shift_seq_locations(parent, comp->ifs, lineno, col_offset);
-}
-
-static void fstring_shift_argument(expr_ty parent, arg_ty arg, int lineno, int col_offset) {
- if (arg->annotation != NULL){
- shift_expr(parent, arg->annotation, lineno, col_offset);
- }
- arg->col_offset = arg->col_offset + col_offset;
- arg->end_col_offset = arg->end_col_offset + col_offset;
- arg->lineno = arg->lineno + lineno;
- arg->end_lineno = arg->end_lineno + lineno;
-}
-
-static void fstring_shift_arguments(expr_ty parent, arguments_ty args, int lineno, int col_offset) {
- for (Py_ssize_t i = 0, l = asdl_seq_LEN(args->posonlyargs); i < l; i++) {
- arg_ty arg = asdl_seq_GET(args->posonlyargs, i);
- shift_arg(parent, arg, lineno, col_offset);
- }
-
- for (Py_ssize_t i = 0, l = asdl_seq_LEN(args->args); i < l; i++) {
- arg_ty arg = asdl_seq_GET(args->args, i);
- shift_arg(parent, arg, lineno, col_offset);
- }
-
- if (args->vararg != NULL) {
- shift_arg(parent, args->vararg, lineno, col_offset);
- }
-
- for (Py_ssize_t i = 0, l = asdl_seq_LEN(args->kwonlyargs); i < l; i++) {
- arg_ty arg = asdl_seq_GET(args->kwonlyargs, i);
- shift_arg(parent, arg, lineno, col_offset);
- }
-
- fstring_shift_seq_locations(parent, args->kw_defaults, lineno, col_offset);
-
- if (args->kwarg != NULL) {
- shift_arg(parent, args->kwarg, lineno, col_offset);
- }
-
- fstring_shift_seq_locations(parent, args->defaults, lineno, col_offset);
-}
-
-static void fstring_shift_children_locations(expr_ty node, int lineno, int col_offset) {
- switch (node->kind) {
- case BoolOp_kind:
- fstring_shift_seq_locations(node, node->v.BoolOp.values, lineno, col_offset);
- break;
- case NamedExpr_kind:
- shift_expr(node, node->v.NamedExpr.target, lineno, col_offset);
- shift_expr(node, node->v.NamedExpr.value, lineno, col_offset);
- break;
- case BinOp_kind:
- shift_expr(node, node->v.BinOp.left, lineno, col_offset);
- shift_expr(node, node->v.BinOp.right, lineno, col_offset);
- break;
- case UnaryOp_kind:
- shift_expr(node, node->v.UnaryOp.operand, lineno, col_offset);
- break;
- case Lambda_kind:
- fstring_shift_arguments(node, node->v.Lambda.args, lineno, col_offset);
- shift_expr(node, node->v.Lambda.body, lineno, col_offset);
- break;
- case IfExp_kind:
- shift_expr(node, node->v.IfExp.test, lineno, col_offset);
- shift_expr(node, node->v.IfExp.body, lineno, col_offset);
- shift_expr(node, node->v.IfExp.orelse, lineno, col_offset);
- break;
- case Dict_kind:
- fstring_shift_seq_locations(node, node->v.Dict.keys, lineno, col_offset);
- fstring_shift_seq_locations(node, node->v.Dict.values, lineno, col_offset);
- break;
- case Set_kind:
- fstring_shift_seq_locations(node, node->v.Set.elts, lineno, col_offset);
- break;
- case ListComp_kind:
- shift_expr(node, node->v.ListComp.elt, lineno, col_offset);
- for (Py_ssize_t i = 0, l = asdl_seq_LEN(node->v.ListComp.generators); i < l; i++) {
- comprehension_ty comp = asdl_seq_GET(node->v.ListComp.generators, i);
- fstring_shift_comprehension(node, comp, lineno, col_offset);
- }
- break;
- case SetComp_kind:
- shift_expr(node, node->v.SetComp.elt, lineno, col_offset);
- for (Py_ssize_t i = 0, l = asdl_seq_LEN(node->v.SetComp.generators); i < l; i++) {
- comprehension_ty comp = asdl_seq_GET(node->v.SetComp.generators, i);
- fstring_shift_comprehension(node, comp, lineno, col_offset);
- }
- break;
- case DictComp_kind:
- shift_expr(node, node->v.DictComp.key, lineno, col_offset);
- shift_expr(node, node->v.DictComp.value, lineno, col_offset);
- for (Py_ssize_t i = 0, l = asdl_seq_LEN(node->v.DictComp.generators); i < l; i++) {
- comprehension_ty comp = asdl_seq_GET(node->v.DictComp.generators, i);
- fstring_shift_comprehension(node, comp, lineno, col_offset);
- }
- break;
- case GeneratorExp_kind:
- shift_expr(node, node->v.GeneratorExp.elt, lineno, col_offset);
- for (Py_ssize_t i = 0, l = asdl_seq_LEN(node->v.GeneratorExp.generators); i < l; i++) {
- comprehension_ty comp = asdl_seq_GET(node->v.GeneratorExp.generators, i);
- fstring_shift_comprehension(node, comp, lineno, col_offset);
- }
- break;
- case Await_kind:
- shift_expr(node, node->v.Await.value, lineno, col_offset);
- break;
- case Yield_kind:
- shift_expr(node, node->v.Yield.value, lineno, col_offset);
- break;
- case YieldFrom_kind:
- shift_expr(node, node->v.YieldFrom.value, lineno, col_offset);
- break;
- case Compare_kind:
- shift_expr(node, node->v.Compare.left, lineno, col_offset);
- fstring_shift_seq_locations(node, node->v.Compare.comparators, lineno, col_offset);
- break;
- case Call_kind:
- shift_expr(node, node->v.Call.func, lineno, col_offset);
- fstring_shift_seq_locations(node, node->v.Call.args, lineno, col_offset);
- for (Py_ssize_t i = 0, l = asdl_seq_LEN(node->v.Call.keywords); i < l; i++) {
- keyword_ty keyword = asdl_seq_GET(node->v.Call.keywords, i);
- shift_expr(node, keyword->value, lineno, col_offset);
- }
- break;
- case Attribute_kind:
- shift_expr(node, node->v.Attribute.value, lineno, col_offset);
- break;
- case Subscript_kind:
- shift_expr(node, node->v.Subscript.value, lineno, col_offset);
- fstring_shift_slice_locations(node, node->v.Subscript.slice, lineno, col_offset);
- shift_expr(node, node->v.Subscript.slice, lineno, col_offset);
- break;
- case Starred_kind:
- shift_expr(node, node->v.Starred.value, lineno, col_offset);
- break;
- case List_kind:
- fstring_shift_seq_locations(node, node->v.List.elts, lineno, col_offset);
- break;
- case Tuple_kind:
- fstring_shift_seq_locations(node, node->v.Tuple.elts, lineno, col_offset);
- break;
- case JoinedStr_kind:
- fstring_shift_seq_locations(node, node->v.JoinedStr.values, lineno, col_offset);
- break;
- case FormattedValue_kind:
- shift_expr(node, node->v.FormattedValue.value, lineno, col_offset);
- if (node->v.FormattedValue.format_spec) {
- shift_expr(node, node->v.FormattedValue.format_spec, lineno, col_offset);
- }
- break;
- default:
- return;
- }
-}
-
-/* Shift locations for the given node and all its children by adding `lineno`
- and `col_offset` to existing locations. Note that n is the already parsed
- expression. */
-static void fstring_shift_expr_locations(expr_ty n, int lineno, int col_offset)
-{
- n->col_offset = n->col_offset + col_offset;
-
- // The following is needed, in order for nodes spanning across multiple lines
- // to be shifted correctly. An example of such a node is a Call node, the closing
- // parenthesis of which is not on the same line as its name.
- if (n->lineno == n->end_lineno) {
- n->end_col_offset = n->end_col_offset + col_offset;
- }
-
- fstring_shift_children_locations(n, lineno, col_offset);
- n->lineno = n->lineno + lineno;
- n->end_lineno = n->end_lineno + lineno;
-}
-
/* Fix locations for the given node and its children.
`parent` is the enclosing node.
@@ -507,7 +278,7 @@ static void fstring_shift_expr_locations(expr_ty n, int lineno, int col_offset)
`expr_str` is the child node's string representation, including braces.
*/
static void
-fstring_fix_expr_location(Token *parent, expr_ty n, char *expr_str)
+fstring_find_expr_location(Token *parent, char *expr_str, int *p_lines, int *p_cols)
{
char *substr = NULL;
char *start;
@@ -552,7 +323,8 @@ fstring_fix_expr_location(Token *parent, expr_ty n, char *expr_str)
}
}
}
- fstring_shift_expr_locations(n, lines, cols);
+ *p_lines = lines;
+ *p_cols = cols;
}
@@ -598,11 +370,26 @@ fstring_compile_expr(Parser *p, const char *expr_start, const char *expr_end,
return NULL;
}
- str[0] = '(';
+ // The call to fstring_find_expr_location is responsible for finding the column offset
+ // the generated AST nodes need to be shifted to the right, which is equal to the number
+ // of the f-string characters before the expression starts. In order to correctly compute
+ // this offset, strstr gets called in fstring_find_expr_location which only succeeds
+ // if curly braces appear before and after the f-string expression (exactly like they do
+ // in the f-string itself), hence the following lines.
+ str[0] = '{';
memcpy(str+1, expr_start, len);
- str[len+1] = ')';
+ str[len+1] = '}';
str[len+2] = 0;
+ int lines, cols;
+ fstring_find_expr_location(t, str, &lines, &cols);
+
+ // The parentheses are needed in order to allow for leading whitespace withing
+ // the f-string expression. This consequently gets parsed as a group (see the
+ // group rule in python.gram).
+ str[0] = '(';
+ str[len+1] = ')';
+
struct tok_state* tok = PyTokenizer_FromString(str, 1);
if (tok == NULL) {
PyMem_Free(str);
@@ -613,21 +400,14 @@ fstring_compile_expr(Parser *p, const char *expr_start, const char *expr_end,
Parser *p2 = _PyPegen_Parser_New(tok, Py_fstring_input, p->flags, p->feature_version,
NULL, p->arena);
- p2->starting_lineno = p->starting_lineno + p->tok->first_lineno - 1;
- p2->starting_col_offset = p->tok->first_lineno == p->tok->lineno
- ? p->starting_col_offset + t->col_offset : 0;
+ p2->starting_lineno = t->lineno + lines - 1;
+ p2->starting_col_offset = p->tok->first_lineno == p->tok->lineno ? t->col_offset + cols : cols;
expr = _PyPegen_run_parser(p2);
if (expr == NULL) {
goto exit;
}
-
- /* Reuse str to find the correct column offset. */
- str[0] = '{';
- str[len+1] = '}';
- fstring_fix_expr_location(t, expr, str);
-
result = expr;
exit: