summaryrefslogtreecommitdiffstats
path: root/Parser/pegen
diff options
context:
space:
mode:
authorPablo Galindo <Pablogsal@gmail.com>2020-06-28 00:15:28 (GMT)
committerGitHub <noreply@github.com>2020-06-28 00:15:28 (GMT)
commitdab533d0ee067159812d4ea51f6fbbb1bd37d8b7 (patch)
treeb6f71930c913781bc1e4d81ee24f6349087e9844 /Parser/pegen
parent102ca529ef5d45b9ef70a341705ddf2577914135 (diff)
downloadcpython-dab533d0ee067159812d4ea51f6fbbb1bd37d8b7.zip
cpython-dab533d0ee067159812d4ea51f6fbbb1bd37d8b7.tar.gz
cpython-dab533d0ee067159812d4ea51f6fbbb1bd37d8b7.tar.bz2
[3.9] bpo-41076: Pre-feed the parser with the f-string expression location (GH-21054) (GH-21190)
This commit changes the parsing of f-string expressions with the new parser. The parser gets pre-fed with the location of the expression itself (not the f-string, which was what we were doing before). This allows us to completely skip the shifting of the AST nodes after the parsing is completed.. (cherry picked from commit 1f0f4abb110b9fbade6175842b6a26ab0b8df6dd)
Diffstat (limited to 'Parser/pegen')
-rw-r--r--Parser/pegen/parse_string.c264
-rw-r--r--Parser/pegen/pegen.c3
2 files changed, 25 insertions, 242 deletions
diff --git a/Parser/pegen/parse_string.c b/Parser/pegen/parse_string.c
index 61e6044..88b10c3 100644
--- a/Parser/pegen/parse_string.c
+++ b/Parser/pegen/parse_string.c
@@ -276,235 +276,6 @@ _PyPegen_parsestr(Parser *p, int *bytesmode, int *rawmode, PyObject **result,
// FSTRING STUFF
-static void fstring_shift_expr_locations(expr_ty n, int lineno, int col_offset);
-static void fstring_shift_argument(expr_ty parent, arg_ty args, int lineno, int col_offset);
-
-
-static inline void shift_expr(expr_ty parent, expr_ty n, int line, int col) {
- if (n == NULL) {
- return;
- }
- if (parent->lineno < n->lineno) {
- col = 0;
- }
- fstring_shift_expr_locations(n, line, col);
-}
-
-static inline void shift_arg(expr_ty parent, arg_ty n, int line, int col) {
- if (parent->lineno < n->lineno) {
- col = 0;
- }
- fstring_shift_argument(parent, n, line, col);
-}
-
-static void fstring_shift_seq_locations(expr_ty parent, asdl_seq *seq, int lineno, int col_offset) {
- for (Py_ssize_t i = 0, l = asdl_seq_LEN(seq); i < l; i++) {
- expr_ty expr = asdl_seq_GET(seq, i);
- if (expr == NULL){
- continue;
- }
- shift_expr(parent, expr, lineno, col_offset);
- }
-}
-
-static void fstring_shift_slice_locations(expr_ty parent, expr_ty slice, int lineno, int col_offset) {
- switch (slice->kind) {
- case Slice_kind:
- if (slice->v.Slice.lower) {
- shift_expr(parent, slice->v.Slice.lower, lineno, col_offset);
- }
- if (slice->v.Slice.upper) {
- shift_expr(parent, slice->v.Slice.upper, lineno, col_offset);
- }
- if (slice->v.Slice.step) {
- shift_expr(parent, slice->v.Slice.step, lineno, col_offset);
- }
- break;
- case Tuple_kind:
- fstring_shift_seq_locations(parent, slice->v.Tuple.elts, lineno, col_offset);
- break;
- default:
- break;
- }
-}
-
-static void fstring_shift_comprehension(expr_ty parent, comprehension_ty comp, int lineno, int col_offset) {
- shift_expr(parent, comp->target, lineno, col_offset);
- shift_expr(parent, comp->iter, lineno, col_offset);
- fstring_shift_seq_locations(parent, comp->ifs, lineno, col_offset);
-}
-
-static void fstring_shift_argument(expr_ty parent, arg_ty arg, int lineno, int col_offset) {
- if (arg->annotation != NULL){
- shift_expr(parent, arg->annotation, lineno, col_offset);
- }
- arg->col_offset = arg->col_offset + col_offset;
- arg->end_col_offset = arg->end_col_offset + col_offset;
- arg->lineno = arg->lineno + lineno;
- arg->end_lineno = arg->end_lineno + lineno;
-}
-
-static void fstring_shift_arguments(expr_ty parent, arguments_ty args, int lineno, int col_offset) {
- for (Py_ssize_t i = 0, l = asdl_seq_LEN(args->posonlyargs); i < l; i++) {
- arg_ty arg = asdl_seq_GET(args->posonlyargs, i);
- shift_arg(parent, arg, lineno, col_offset);
- }
-
- for (Py_ssize_t i = 0, l = asdl_seq_LEN(args->args); i < l; i++) {
- arg_ty arg = asdl_seq_GET(args->args, i);
- shift_arg(parent, arg, lineno, col_offset);
- }
-
- if (args->vararg != NULL) {
- shift_arg(parent, args->vararg, lineno, col_offset);
- }
-
- for (Py_ssize_t i = 0, l = asdl_seq_LEN(args->kwonlyargs); i < l; i++) {
- arg_ty arg = asdl_seq_GET(args->kwonlyargs, i);
- shift_arg(parent, arg, lineno, col_offset);
- }
-
- fstring_shift_seq_locations(parent, args->kw_defaults, lineno, col_offset);
-
- if (args->kwarg != NULL) {
- shift_arg(parent, args->kwarg, lineno, col_offset);
- }
-
- fstring_shift_seq_locations(parent, args->defaults, lineno, col_offset);
-}
-
-static void fstring_shift_children_locations(expr_ty node, int lineno, int col_offset) {
- switch (node->kind) {
- case BoolOp_kind:
- fstring_shift_seq_locations(node, node->v.BoolOp.values, lineno, col_offset);
- break;
- case NamedExpr_kind:
- shift_expr(node, node->v.NamedExpr.target, lineno, col_offset);
- shift_expr(node, node->v.NamedExpr.value, lineno, col_offset);
- break;
- case BinOp_kind:
- shift_expr(node, node->v.BinOp.left, lineno, col_offset);
- shift_expr(node, node->v.BinOp.right, lineno, col_offset);
- break;
- case UnaryOp_kind:
- shift_expr(node, node->v.UnaryOp.operand, lineno, col_offset);
- break;
- case Lambda_kind:
- fstring_shift_arguments(node, node->v.Lambda.args, lineno, col_offset);
- shift_expr(node, node->v.Lambda.body, lineno, col_offset);
- break;
- case IfExp_kind:
- shift_expr(node, node->v.IfExp.test, lineno, col_offset);
- shift_expr(node, node->v.IfExp.body, lineno, col_offset);
- shift_expr(node, node->v.IfExp.orelse, lineno, col_offset);
- break;
- case Dict_kind:
- fstring_shift_seq_locations(node, node->v.Dict.keys, lineno, col_offset);
- fstring_shift_seq_locations(node, node->v.Dict.values, lineno, col_offset);
- break;
- case Set_kind:
- fstring_shift_seq_locations(node, node->v.Set.elts, lineno, col_offset);
- break;
- case ListComp_kind:
- shift_expr(node, node->v.ListComp.elt, lineno, col_offset);
- for (Py_ssize_t i = 0, l = asdl_seq_LEN(node->v.ListComp.generators); i < l; i++) {
- comprehension_ty comp = asdl_seq_GET(node->v.ListComp.generators, i);
- fstring_shift_comprehension(node, comp, lineno, col_offset);
- }
- break;
- case SetComp_kind:
- shift_expr(node, node->v.SetComp.elt, lineno, col_offset);
- for (Py_ssize_t i = 0, l = asdl_seq_LEN(node->v.SetComp.generators); i < l; i++) {
- comprehension_ty comp = asdl_seq_GET(node->v.SetComp.generators, i);
- fstring_shift_comprehension(node, comp, lineno, col_offset);
- }
- break;
- case DictComp_kind:
- shift_expr(node, node->v.DictComp.key, lineno, col_offset);
- shift_expr(node, node->v.DictComp.value, lineno, col_offset);
- for (Py_ssize_t i = 0, l = asdl_seq_LEN(node->v.DictComp.generators); i < l; i++) {
- comprehension_ty comp = asdl_seq_GET(node->v.DictComp.generators, i);
- fstring_shift_comprehension(node, comp, lineno, col_offset);
- }
- break;
- case GeneratorExp_kind:
- shift_expr(node, node->v.GeneratorExp.elt, lineno, col_offset);
- for (Py_ssize_t i = 0, l = asdl_seq_LEN(node->v.GeneratorExp.generators); i < l; i++) {
- comprehension_ty comp = asdl_seq_GET(node->v.GeneratorExp.generators, i);
- fstring_shift_comprehension(node, comp, lineno, col_offset);
- }
- break;
- case Await_kind:
- shift_expr(node, node->v.Await.value, lineno, col_offset);
- break;
- case Yield_kind:
- shift_expr(node, node->v.Yield.value, lineno, col_offset);
- break;
- case YieldFrom_kind:
- shift_expr(node, node->v.YieldFrom.value, lineno, col_offset);
- break;
- case Compare_kind:
- shift_expr(node, node->v.Compare.left, lineno, col_offset);
- fstring_shift_seq_locations(node, node->v.Compare.comparators, lineno, col_offset);
- break;
- case Call_kind:
- shift_expr(node, node->v.Call.func, lineno, col_offset);
- fstring_shift_seq_locations(node, node->v.Call.args, lineno, col_offset);
- for (Py_ssize_t i = 0, l = asdl_seq_LEN(node->v.Call.keywords); i < l; i++) {
- keyword_ty keyword = asdl_seq_GET(node->v.Call.keywords, i);
- shift_expr(node, keyword->value, lineno, col_offset);
- }
- break;
- case Attribute_kind:
- shift_expr(node, node->v.Attribute.value, lineno, col_offset);
- break;
- case Subscript_kind:
- shift_expr(node, node->v.Subscript.value, lineno, col_offset);
- fstring_shift_slice_locations(node, node->v.Subscript.slice, lineno, col_offset);
- shift_expr(node, node->v.Subscript.slice, lineno, col_offset);
- break;
- case Starred_kind:
- shift_expr(node, node->v.Starred.value, lineno, col_offset);
- break;
- case List_kind:
- fstring_shift_seq_locations(node, node->v.List.elts, lineno, col_offset);
- break;
- case Tuple_kind:
- fstring_shift_seq_locations(node, node->v.Tuple.elts, lineno, col_offset);
- break;
- case JoinedStr_kind:
- fstring_shift_seq_locations(node, node->v.JoinedStr.values, lineno, col_offset);
- break;
- case FormattedValue_kind:
- shift_expr(node, node->v.FormattedValue.value, lineno, col_offset);
- if (node->v.FormattedValue.format_spec) {
- shift_expr(node, node->v.FormattedValue.format_spec, lineno, col_offset);
- }
- break;
- default:
- return;
- }
-}
-
-/* Shift locations for the given node and all its children by adding `lineno`
- and `col_offset` to existing locations. Note that n is the already parsed
- expression. */
-static void fstring_shift_expr_locations(expr_ty n, int lineno, int col_offset)
-{
- n->col_offset = n->col_offset + col_offset;
-
- // The following is needed, in order for nodes spanning across multiple lines
- // to be shifted correctly. An example of such a node is a Call node, the closing
- // parenthesis of which is not on the same line as its name.
- if (n->lineno == n->end_lineno) {
- n->end_col_offset = n->end_col_offset + col_offset;
- }
-
- fstring_shift_children_locations(n, lineno, col_offset);
- n->lineno = n->lineno + lineno;
- n->end_lineno = n->end_lineno + lineno;
-}
-
/* Fix locations for the given node and its children.
`parent` is the enclosing node.
@@ -512,7 +283,7 @@ static void fstring_shift_expr_locations(expr_ty n, int lineno, int col_offset)
`expr_str` is the child node's string representation, including braces.
*/
static void
-fstring_fix_expr_location(Token *parent, expr_ty n, char *expr_str)
+fstring_find_expr_location(Token *parent, char *expr_str, int *p_lines, int *p_cols)
{
char *substr = NULL;
char *start;
@@ -557,7 +328,8 @@ fstring_fix_expr_location(Token *parent, expr_ty n, char *expr_str)
}
}
}
- fstring_shift_expr_locations(n, lines, cols);
+ *p_lines = lines;
+ *p_cols = cols;
}
@@ -603,11 +375,26 @@ fstring_compile_expr(Parser *p, const char *expr_start, const char *expr_end,
return NULL;
}
- str[0] = '(';
+ // The call to fstring_find_expr_location is responsible for finding the column offset
+ // the generated AST nodes need to be shifted to the right, which is equal to the number
+ // of the f-string characters before the expression starts. In order to correctly compute
+ // this offset, strstr gets called in fstring_find_expr_location which only succeeds
+ // if curly braces appear before and after the f-string expression (exactly like they do
+ // in the f-string itself), hence the following lines.
+ str[0] = '{';
memcpy(str+1, expr_start, len);
- str[len+1] = ')';
+ str[len+1] = '}';
str[len+2] = 0;
+ int lines, cols;
+ fstring_find_expr_location(t, str, &lines, &cols);
+
+ // The parentheses are needed in order to allow for leading whitespace withing
+ // the f-string expression. This consequently gets parsed as a group (see the
+ // group rule in python.gram).
+ str[0] = '(';
+ str[len+1] = ')';
+
struct tok_state* tok = PyTokenizer_FromString(str, 1);
if (tok == NULL) {
PyMem_Free(str);
@@ -618,21 +405,14 @@ fstring_compile_expr(Parser *p, const char *expr_start, const char *expr_end,
Parser *p2 = _PyPegen_Parser_New(tok, Py_fstring_input, p->flags, p->feature_version,
NULL, p->arena);
- p2->starting_lineno = p->starting_lineno + p->tok->first_lineno - 1;
- p2->starting_col_offset = p->tok->first_lineno == p->tok->lineno
- ? p->starting_col_offset + t->col_offset : 0;
+ p2->starting_lineno = t->lineno + lines - 1;
+ p2->starting_col_offset = p->tok->first_lineno == p->tok->lineno ? t->col_offset + cols : cols;
expr = _PyPegen_run_parser(p2);
if (expr == NULL) {
goto exit;
}
-
- /* Reuse str to find the correct column offset. */
- str[0] = '{';
- str[len+1] = '}';
- fstring_fix_expr_location(t, expr, str);
-
result = expr;
exit:
diff --git a/Parser/pegen/pegen.c b/Parser/pegen/pegen.c
index 53591d2..ef48ade 100644
--- a/Parser/pegen/pegen.c
+++ b/Parser/pegen/pegen.c
@@ -423,6 +423,9 @@ _PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
}
}
+ if (p->start_rule == Py_fstring_input) {
+ col_offset -= p->starting_col_offset;
+ }
Py_ssize_t col_number = col_offset;
if (p->tok->encoding != NULL) {