summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPablo Galindo Salgado <Pablogsal@gmail.com>2021-08-12 17:46:35 (GMT)
committerGitHub <noreply@github.com>2021-08-12 17:46:35 (GMT)
commit4b86c9c5146c339c689830619be9d29b8f7bf417 (patch)
tree922ade83b3de3ffa5280687c19a323bff62466bf
parentf7635f0e542c916bfd62542e7b60ee23ff681d0d (diff)
downloadcpython-4b86c9c5146c339c689830619be9d29b8f7bf417.zip
cpython-4b86c9c5146c339c689830619be9d29b8f7bf417.tar.gz
cpython-4b86c9c5146c339c689830619be9d29b8f7bf417.tar.bz2
[3.9] bpo-44885: Correct the ast locations of f-strings with format specs and repeated expressions (GH-27729) (GH-27744)
(cherry picked from commit 8e832fb2a2cb54d7262148b6ec15563dffb48d63) Co-authored-by: Pablo Galindo Salgado <Pablogsal@gmail.com>
-rw-r--r--Lib/test/test_fstring.py40
-rw-r--r--Lib/test/test_peg_parser.py5
-rw-r--r--Misc/NEWS.d/next/Core and Builtins/2021-08-11-15-39-57.bpo-44885.i4noUO.rst2
-rw-r--r--Parser/pegen/parse_string.c70
4 files changed, 62 insertions, 55 deletions
diff --git a/Lib/test/test_fstring.py b/Lib/test/test_fstring.py
index 05e7102..fbd97ff 100644
--- a/Lib/test/test_fstring.py
+++ b/Lib/test/test_fstring.py
@@ -212,11 +212,6 @@ f'{a * f"-{x()}-"}'"""
self.assertEqual(call.col_offset, 11)
def test_ast_line_numbers_duplicate_expression(self):
- """Duplicate expression
-
- NOTE: this is currently broken, always sets location of the first
- expression.
- """
expr = """
a = 10
f'{a * x()} {a * x()} {a * x()}'
@@ -266,9 +261,9 @@ f'{a * x()} {a * x()} {a * x()}'
self.assertEqual(binop.lineno, 3)
self.assertEqual(binop.left.lineno, 3)
self.assertEqual(binop.right.lineno, 3)
- self.assertEqual(binop.col_offset, 3) # FIXME: this is wrong
- self.assertEqual(binop.left.col_offset, 3) # FIXME: this is wrong
- self.assertEqual(binop.right.col_offset, 7) # FIXME: this is wrong
+ self.assertEqual(binop.col_offset, 13)
+ self.assertEqual(binop.left.col_offset, 13)
+ self.assertEqual(binop.right.col_offset, 17)
# check the third binop location
binop = t.body[1].value.values[4].value
self.assertEqual(type(binop), ast.BinOp)
@@ -278,9 +273,32 @@ f'{a * x()} {a * x()} {a * x()}'
self.assertEqual(binop.lineno, 3)
self.assertEqual(binop.left.lineno, 3)
self.assertEqual(binop.right.lineno, 3)
- self.assertEqual(binop.col_offset, 3) # FIXME: this is wrong
- self.assertEqual(binop.left.col_offset, 3) # FIXME: this is wrong
- self.assertEqual(binop.right.col_offset, 7) # FIXME: this is wrong
+ self.assertEqual(binop.col_offset, 23)
+ self.assertEqual(binop.left.col_offset, 23)
+ self.assertEqual(binop.right.col_offset, 27)
+
+ def test_ast_numbers_fstring_with_formatting(self):
+
+ t = ast.parse('f"Here is that pesky {xxx:.3f} again"')
+ self.assertEqual(len(t.body), 1)
+ self.assertEqual(t.body[0].lineno, 1)
+
+ self.assertEqual(type(t.body[0]), ast.Expr)
+ self.assertEqual(type(t.body[0].value), ast.JoinedStr)
+ self.assertEqual(len(t.body[0].value.values), 3)
+
+ self.assertEqual(type(t.body[0].value.values[0]), ast.Constant)
+ self.assertEqual(type(t.body[0].value.values[1]), ast.FormattedValue)
+ self.assertEqual(type(t.body[0].value.values[2]), ast.Constant)
+
+ _, expr, _ = t.body[0].value.values
+
+ name = expr.value
+ self.assertEqual(type(name), ast.Name)
+ self.assertEqual(name.lineno, 1)
+ self.assertEqual(name.end_lineno, 1)
+ self.assertEqual(name.col_offset, 22)
+ self.assertEqual(name.end_col_offset, 25)
def test_ast_line_numbers_multiline_fstring(self):
# See bpo-30465 for details.
diff --git a/Lib/test/test_peg_parser.py b/Lib/test/test_peg_parser.py
index f4aaef8..b7bd863 100644
--- a/Lib/test/test_peg_parser.py
+++ b/Lib/test/test_peg_parser.py
@@ -231,11 +231,6 @@ TEST_CASES = [
('f-string_doublestarred', "f'{ {**x} }'"),
('f-string_escape_brace', "f'{{Escape'"),
('f-string_escape_closing_brace', "f'Escape}}'"),
- ('f-string_repr', "f'{a!r}'"),
- ('f-string_str', "f'{a!s}'"),
- ('f-string_ascii', "f'{a!a}'"),
- ('f-string_debug', "f'{a=}'"),
- ('f-string_padding', "f'{a:03d}'"),
('f-string_multiline',
"""
f'''
diff --git a/Misc/NEWS.d/next/Core and Builtins/2021-08-11-15-39-57.bpo-44885.i4noUO.rst b/Misc/NEWS.d/next/Core and Builtins/2021-08-11-15-39-57.bpo-44885.i4noUO.rst
new file mode 100644
index 0000000..c6abd73
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2021-08-11-15-39-57.bpo-44885.i4noUO.rst
@@ -0,0 +1,2 @@
+Correct the ast locations of f-strings with format specs and repeated
+expressions. Patch by Pablo Galindo
diff --git a/Parser/pegen/parse_string.c b/Parser/pegen/parse_string.c
index c852e5b..41fdc2d 100644
--- a/Parser/pegen/parse_string.c
+++ b/Parser/pegen/parse_string.c
@@ -284,49 +284,48 @@ _PyPegen_parsestr(Parser *p, int *bytesmode, int *rawmode, PyObject **result,
/* Fix locations for the given node and its children.
`parent` is the enclosing node.
+ `expr_start` is the starting position of the expression (pointing to the open brace).
`n` is the node which locations are going to be fixed relative to parent.
`expr_str` is the child node's string representation, including braces.
*/
static bool
-fstring_find_expr_location(Token *parent, char *expr_str, int *p_lines, int *p_cols)
+fstring_find_expr_location(Token *parent, const char* expr_start, char *expr_str, int *p_lines, int *p_cols)
{
*p_lines = 0;
*p_cols = 0;
+ assert(expr_start != NULL && *expr_start == '{');
if (parent && parent->bytes) {
char *parent_str = PyBytes_AsString(parent->bytes);
if (!parent_str) {
return false;
}
- char *substr = strstr(parent_str, expr_str);
- if (substr) {
- // The following is needed, in order to correctly shift the column
- // offset, in the case that (disregarding any whitespace) a newline
- // immediately follows the opening curly brace of the fstring expression.
- bool newline_after_brace = 1;
- char *start = substr + 1;
- while (start && *start != '}' && *start != '\n') {
- if (*start != ' ' && *start != '\t' && *start != '\f') {
- newline_after_brace = 0;
- break;
- }
- start++;
+ // The following is needed, in order to correctly shift the column
+ // offset, in the case that (disregarding any whitespace) a newline
+ // immediately follows the opening curly brace of the fstring expression.
+ bool newline_after_brace = 1;
+ const char *start = expr_start + 1;
+ while (start && *start != '}' && *start != '\n') {
+ if (*start != ' ' && *start != '\t' && *start != '\f') {
+ newline_after_brace = 0;
+ break;
}
+ start++;
+ }
- // Account for the characters from the last newline character to our
- // left until the beginning of substr.
- if (!newline_after_brace) {
- start = substr;
- while (start > parent_str && *start != '\n') {
- start--;
- }
- *p_cols += (int)(substr - start);
+ // Account for the characters from the last newline character to our
+ // left until the beginning of expr_start.
+ if (!newline_after_brace) {
+ start = expr_start;
+ while (start > parent_str && *start != '\n') {
+ start--;
}
- /* adjust the start based on the number of newlines encountered
- before the f-string expression */
- for (char* p = parent_str; p < substr; p++) {
- if (*p == '\n') {
- (*p_lines)++;
- }
+ *p_cols += (int)(expr_start - start);
+ }
+ /* adjust the start based on the number of newlines encountered
+ before the f-string expression */
+ for (const char *p = parent_str; p < expr_start; p++) {
+ if (*p == '\n') {
+ (*p_lines)++;
}
}
}
@@ -370,7 +369,7 @@ fstring_compile_expr(Parser *p, const char *expr_start, const char *expr_end,
len = expr_end - expr_start;
/* Allocate 3 extra bytes: open paren, close paren, null byte. */
- str = PyMem_Malloc(len + 3);
+ str = PyMem_Calloc(len + 3, sizeof(char));
if (str == NULL) {
PyErr_NoMemory();
return NULL;
@@ -378,18 +377,11 @@ fstring_compile_expr(Parser *p, const char *expr_start, const char *expr_end,
// The call to fstring_find_expr_location is responsible for finding the column offset
// the generated AST nodes need to be shifted to the right, which is equal to the number
- // of the f-string characters before the expression starts. In order to correctly compute
- // this offset, strstr gets called in fstring_find_expr_location which only succeeds
- // if curly braces appear before and after the f-string expression (exactly like they do
- // in the f-string itself), hence the following lines.
- str[0] = '{';
+ // of the f-string characters before the expression starts.
memcpy(str+1, expr_start, len);
- str[len+1] = '}';
- str[len+2] = 0;
-
int lines, cols;
- if (!fstring_find_expr_location(t, str, &lines, &cols)) {
- PyMem_FREE(str);
+ if (!fstring_find_expr_location(t, expr_start-1, str+1, &lines, &cols)) {
+ PyMem_Free(str);
return NULL;
}