From 4b86c9c5146c339c689830619be9d29b8f7bf417 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Salgado Date: Thu, 12 Aug 2021 18:46:35 +0100 Subject: [3.9] bpo-44885: Correct the ast locations of f-strings with format specs and repeated expressions (GH-27729) (GH-27744) (cherry picked from commit 8e832fb2a2cb54d7262148b6ec15563dffb48d63) Co-authored-by: Pablo Galindo Salgado --- Lib/test/test_fstring.py | 40 +++++++++---- Lib/test/test_peg_parser.py | 5 -- .../2021-08-11-15-39-57.bpo-44885.i4noUO.rst | 2 + Parser/pegen/parse_string.c | 70 ++++++++++------------ 4 files changed, 62 insertions(+), 55 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2021-08-11-15-39-57.bpo-44885.i4noUO.rst diff --git a/Lib/test/test_fstring.py b/Lib/test/test_fstring.py index 05e7102..fbd97ff 100644 --- a/Lib/test/test_fstring.py +++ b/Lib/test/test_fstring.py @@ -212,11 +212,6 @@ f'{a * f"-{x()}-"}'""" self.assertEqual(call.col_offset, 11) def test_ast_line_numbers_duplicate_expression(self): - """Duplicate expression - - NOTE: this is currently broken, always sets location of the first - expression. - """ expr = """ a = 10 f'{a * x()} {a * x()} {a * x()}' @@ -266,9 +261,9 @@ f'{a * x()} {a * x()} {a * x()}' self.assertEqual(binop.lineno, 3) self.assertEqual(binop.left.lineno, 3) self.assertEqual(binop.right.lineno, 3) - self.assertEqual(binop.col_offset, 3) # FIXME: this is wrong - self.assertEqual(binop.left.col_offset, 3) # FIXME: this is wrong - self.assertEqual(binop.right.col_offset, 7) # FIXME: this is wrong + self.assertEqual(binop.col_offset, 13) + self.assertEqual(binop.left.col_offset, 13) + self.assertEqual(binop.right.col_offset, 17) # check the third binop location binop = t.body[1].value.values[4].value self.assertEqual(type(binop), ast.BinOp) @@ -278,9 +273,32 @@ f'{a * x()} {a * x()} {a * x()}' self.assertEqual(binop.lineno, 3) self.assertEqual(binop.left.lineno, 3) self.assertEqual(binop.right.lineno, 3) - self.assertEqual(binop.col_offset, 3) # FIXME: this is wrong - self.assertEqual(binop.left.col_offset, 3) # FIXME: this is wrong - self.assertEqual(binop.right.col_offset, 7) # FIXME: this is wrong + self.assertEqual(binop.col_offset, 23) + self.assertEqual(binop.left.col_offset, 23) + self.assertEqual(binop.right.col_offset, 27) + + def test_ast_numbers_fstring_with_formatting(self): + + t = ast.parse('f"Here is that pesky {xxx:.3f} again"') + self.assertEqual(len(t.body), 1) + self.assertEqual(t.body[0].lineno, 1) + + self.assertEqual(type(t.body[0]), ast.Expr) + self.assertEqual(type(t.body[0].value), ast.JoinedStr) + self.assertEqual(len(t.body[0].value.values), 3) + + self.assertEqual(type(t.body[0].value.values[0]), ast.Constant) + self.assertEqual(type(t.body[0].value.values[1]), ast.FormattedValue) + self.assertEqual(type(t.body[0].value.values[2]), ast.Constant) + + _, expr, _ = t.body[0].value.values + + name = expr.value + self.assertEqual(type(name), ast.Name) + self.assertEqual(name.lineno, 1) + self.assertEqual(name.end_lineno, 1) + self.assertEqual(name.col_offset, 22) + self.assertEqual(name.end_col_offset, 25) def test_ast_line_numbers_multiline_fstring(self): # See bpo-30465 for details. diff --git a/Lib/test/test_peg_parser.py b/Lib/test/test_peg_parser.py index f4aaef8..b7bd863 100644 --- a/Lib/test/test_peg_parser.py +++ b/Lib/test/test_peg_parser.py @@ -231,11 +231,6 @@ TEST_CASES = [ ('f-string_doublestarred', "f'{ {**x} }'"), ('f-string_escape_brace', "f'{{Escape'"), ('f-string_escape_closing_brace', "f'Escape}}'"), - ('f-string_repr', "f'{a!r}'"), - ('f-string_str', "f'{a!s}'"), - ('f-string_ascii', "f'{a!a}'"), - ('f-string_debug', "f'{a=}'"), - ('f-string_padding', "f'{a:03d}'"), ('f-string_multiline', """ f''' diff --git a/Misc/NEWS.d/next/Core and Builtins/2021-08-11-15-39-57.bpo-44885.i4noUO.rst b/Misc/NEWS.d/next/Core and Builtins/2021-08-11-15-39-57.bpo-44885.i4noUO.rst new file mode 100644 index 0000000..c6abd73 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2021-08-11-15-39-57.bpo-44885.i4noUO.rst @@ -0,0 +1,2 @@ +Correct the ast locations of f-strings with format specs and repeated +expressions. Patch by Pablo Galindo diff --git a/Parser/pegen/parse_string.c b/Parser/pegen/parse_string.c index c852e5b..41fdc2d 100644 --- a/Parser/pegen/parse_string.c +++ b/Parser/pegen/parse_string.c @@ -284,49 +284,48 @@ _PyPegen_parsestr(Parser *p, int *bytesmode, int *rawmode, PyObject **result, /* Fix locations for the given node and its children. `parent` is the enclosing node. + `expr_start` is the starting position of the expression (pointing to the open brace). `n` is the node which locations are going to be fixed relative to parent. `expr_str` is the child node's string representation, including braces. */ static bool -fstring_find_expr_location(Token *parent, char *expr_str, int *p_lines, int *p_cols) +fstring_find_expr_location(Token *parent, const char* expr_start, char *expr_str, int *p_lines, int *p_cols) { *p_lines = 0; *p_cols = 0; + assert(expr_start != NULL && *expr_start == '{'); if (parent && parent->bytes) { char *parent_str = PyBytes_AsString(parent->bytes); if (!parent_str) { return false; } - char *substr = strstr(parent_str, expr_str); - if (substr) { - // The following is needed, in order to correctly shift the column - // offset, in the case that (disregarding any whitespace) a newline - // immediately follows the opening curly brace of the fstring expression. - bool newline_after_brace = 1; - char *start = substr + 1; - while (start && *start != '}' && *start != '\n') { - if (*start != ' ' && *start != '\t' && *start != '\f') { - newline_after_brace = 0; - break; - } - start++; + // The following is needed, in order to correctly shift the column + // offset, in the case that (disregarding any whitespace) a newline + // immediately follows the opening curly brace of the fstring expression. + bool newline_after_brace = 1; + const char *start = expr_start + 1; + while (start && *start != '}' && *start != '\n') { + if (*start != ' ' && *start != '\t' && *start != '\f') { + newline_after_brace = 0; + break; } + start++; + } - // Account for the characters from the last newline character to our - // left until the beginning of substr. - if (!newline_after_brace) { - start = substr; - while (start > parent_str && *start != '\n') { - start--; - } - *p_cols += (int)(substr - start); + // Account for the characters from the last newline character to our + // left until the beginning of expr_start. + if (!newline_after_brace) { + start = expr_start; + while (start > parent_str && *start != '\n') { + start--; } - /* adjust the start based on the number of newlines encountered - before the f-string expression */ - for (char* p = parent_str; p < substr; p++) { - if (*p == '\n') { - (*p_lines)++; - } + *p_cols += (int)(expr_start - start); + } + /* adjust the start based on the number of newlines encountered + before the f-string expression */ + for (const char *p = parent_str; p < expr_start; p++) { + if (*p == '\n') { + (*p_lines)++; } } } @@ -370,7 +369,7 @@ fstring_compile_expr(Parser *p, const char *expr_start, const char *expr_end, len = expr_end - expr_start; /* Allocate 3 extra bytes: open paren, close paren, null byte. */ - str = PyMem_Malloc(len + 3); + str = PyMem_Calloc(len + 3, sizeof(char)); if (str == NULL) { PyErr_NoMemory(); return NULL; @@ -378,18 +377,11 @@ fstring_compile_expr(Parser *p, const char *expr_start, const char *expr_end, // The call to fstring_find_expr_location is responsible for finding the column offset // the generated AST nodes need to be shifted to the right, which is equal to the number - // of the f-string characters before the expression starts. In order to correctly compute - // this offset, strstr gets called in fstring_find_expr_location which only succeeds - // if curly braces appear before and after the f-string expression (exactly like they do - // in the f-string itself), hence the following lines. - str[0] = '{'; + // of the f-string characters before the expression starts. memcpy(str+1, expr_start, len); - str[len+1] = '}'; - str[len+2] = 0; - int lines, cols; - if (!fstring_find_expr_location(t, str, &lines, &cols)) { - PyMem_FREE(str); + if (!fstring_find_expr_location(t, expr_start-1, str+1, &lines, &cols)) { + PyMem_Free(str); return NULL; } -- cgit v0.12