From cc389ef627b2a486ab89d9a11245bef48224efb1 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Salgado Date: Thu, 5 Oct 2023 14:26:44 +0100 Subject: gh-110259: Fix f-strings with multiline expressions and format specs (#110271) Signed-off-by: Pablo Galindo --- Lib/ast.py | 11 ++- Lib/test/test_tokenize.py | 97 ++++++++++++++++++++++ Lib/test/test_unparse.py | 3 +- .../2023-10-03-11-43-48.gh-issue-110259.ka93x5.rst | 3 + Parser/tokenizer.c | 24 ++++-- 5 files changed, 128 insertions(+), 10 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2023-10-03-11-43-48.gh-issue-110259.ka93x5.rst diff --git a/Lib/ast.py b/Lib/ast.py index 1f54309..f7888d1 100644 --- a/Lib/ast.py +++ b/Lib/ast.py @@ -1270,13 +1270,15 @@ class _Unparser(NodeVisitor): quote_type = quote_types[0] self.write(f"{quote_type}{value}{quote_type}") - def _write_fstring_inner(self, node): + def _write_fstring_inner(self, node, scape_newlines=False): if isinstance(node, JoinedStr): # for both the f-string itself, and format_spec for value in node.values: - self._write_fstring_inner(value) + self._write_fstring_inner(value, scape_newlines=scape_newlines) elif isinstance(node, Constant) and isinstance(node.value, str): value = node.value.replace("{", "{{").replace("}", "}}") + if scape_newlines: + value = value.replace("\n", "\\n") self.write(value) elif isinstance(node, FormattedValue): self.visit_FormattedValue(node) @@ -1299,7 +1301,10 @@ class _Unparser(NodeVisitor): self.write(f"!{chr(node.conversion)}") if node.format_spec: self.write(":") - self._write_fstring_inner(node.format_spec) + self._write_fstring_inner( + node.format_spec, + scape_newlines=True + ) def visit_Name(self, node): self.write(node.id) diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py index 94fb6d9..9369560 100644 --- a/Lib/test/test_tokenize.py +++ b/Lib/test/test_tokenize.py @@ -567,6 +567,55 @@ f'''{ OP '}' (3, 1) (3, 2) FSTRING_END "'''" (3, 2) (3, 5) """) + self.check_tokenize("""\ +f'''__{ + x:a +}__'''""", """\ + FSTRING_START "f'''" (1, 0) (1, 4) + FSTRING_MIDDLE '__' (1, 4) (1, 6) + OP '{' (1, 6) (1, 7) + NL '\\n' (1, 7) (1, 8) + NAME 'x' (2, 4) (2, 5) + OP ':' (2, 5) (2, 6) + FSTRING_MIDDLE 'a\\n' (2, 6) (3, 0) + OP '}' (3, 0) (3, 1) + FSTRING_MIDDLE '__' (3, 1) (3, 3) + FSTRING_END "'''" (3, 3) (3, 6) + """) + self.check_tokenize("""\ +f'''__{ + x:a + b + c + d +}__'''""", """\ + FSTRING_START "f'''" (1, 0) (1, 4) + FSTRING_MIDDLE '__' (1, 4) (1, 6) + OP '{' (1, 6) (1, 7) + NL '\\n' (1, 7) (1, 8) + NAME 'x' (2, 4) (2, 5) + OP ':' (2, 5) (2, 6) + FSTRING_MIDDLE 'a\\n b\\n c\\n d\\n' (2, 6) (6, 0) + OP '}' (6, 0) (6, 1) + FSTRING_MIDDLE '__' (6, 1) (6, 3) + FSTRING_END "'''" (6, 3) (6, 6) + """) + self.check_tokenize("""\ +f'__{ + x:d +}__'""", """\ + FSTRING_START "f'" (1, 0) (1, 2) + FSTRING_MIDDLE '__' (1, 2) (1, 4) + OP '{' (1, 4) (1, 5) + NL '\\n' (1, 5) (1, 6) + NAME 'x' (2, 4) (2, 5) + OP ':' (2, 5) (2, 6) + FSTRING_MIDDLE 'd' (2, 6) (2, 7) + NL '\\n' (2, 7) (2, 8) + OP '}' (3, 0) (3, 1) + FSTRING_MIDDLE '__' (3, 1) (3, 3) + FSTRING_END "'" (3, 3) (3, 4) + """) def test_function(self): self.check_tokenize("def d22(a, b, c=2, d=2, *k): pass", """\ @@ -2279,6 +2328,54 @@ def"', """\ FSTRING_END \'"\' (1, 16) (1, 17) """) + self.check_tokenize("""\ +f'''__{ + x:a +}__'''""", """\ + FSTRING_START "f'''" (1, 0) (1, 4) + FSTRING_MIDDLE '__' (1, 4) (1, 6) + LBRACE '{' (1, 6) (1, 7) + NAME 'x' (2, 4) (2, 5) + COLON ':' (2, 5) (2, 6) + FSTRING_MIDDLE 'a\\n' (2, 6) (3, 0) + RBRACE '}' (3, 0) (3, 1) + FSTRING_MIDDLE '__' (3, 1) (3, 3) + FSTRING_END "'''" (3, 3) (3, 6) + """) + + self.check_tokenize("""\ +f'''__{ + x:a + b + c + d +}__'''""", """\ + FSTRING_START "f'''" (1, 0) (1, 4) + FSTRING_MIDDLE '__' (1, 4) (1, 6) + LBRACE '{' (1, 6) (1, 7) + NAME 'x' (2, 4) (2, 5) + COLON ':' (2, 5) (2, 6) + FSTRING_MIDDLE 'a\\n b\\n c\\n d\\n' (2, 6) (6, 0) + RBRACE '}' (6, 0) (6, 1) + FSTRING_MIDDLE '__' (6, 1) (6, 3) + FSTRING_END "'''" (6, 3) (6, 6) + """) + + self.check_tokenize("""\ +f'__{ + x:d +}__'""", """\ + FSTRING_START "f'" (1, 0) (1, 2) + FSTRING_MIDDLE '__' (1, 2) (1, 4) + LBRACE '{' (1, 4) (1, 5) + NAME 'x' (2, 4) (2, 5) + COLON ':' (2, 5) (2, 6) + FSTRING_MIDDLE 'd' (2, 6) (2, 7) + RBRACE '}' (3, 0) (3, 1) + FSTRING_MIDDLE '__' (3, 1) (3, 3) + FSTRING_END "'" (3, 3) (3, 4) + """) + def test_function(self): self.check_tokenize('def d22(a, b, c=2, d=2, *k): pass', """\ diff --git a/Lib/test/test_unparse.py b/Lib/test/test_unparse.py index bdf7b05..6f698a8 100644 --- a/Lib/test/test_unparse.py +++ b/Lib/test/test_unparse.py @@ -730,7 +730,8 @@ class DirectoryTestCase(ASTTestCase): test_directories = (lib_dir, lib_dir / "test") run_always_files = {"test_grammar.py", "test_syntax.py", "test_compile.py", "test_ast.py", "test_asdl_parser.py", "test_fstring.py", - "test_patma.py", "test_type_alias.py", "test_type_params.py"} + "test_patma.py", "test_type_alias.py", "test_type_params.py", + "test_tokenize.py"} _files_to_test = None diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-10-03-11-43-48.gh-issue-110259.ka93x5.rst b/Misc/NEWS.d/next/Core and Builtins/2023-10-03-11-43-48.gh-issue-110259.ka93x5.rst new file mode 100644 index 0000000..55c743d --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2023-10-03-11-43-48.gh-issue-110259.ka93x5.rst @@ -0,0 +1,3 @@ +Correctly identify the format spec in f-strings (with single or triple +quotes) that have multiple lines in the expression part and include a +formatting spec. Patch by Pablo Galindo diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index 41d0d16..5e3816f 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -2690,11 +2690,28 @@ f_string_middle: if (tok->done == E_ERROR) { return MAKE_TOKEN(ERRORTOKEN); } - if (c == EOF || (current_tok->f_string_quote_size == 1 && c == '\n')) { + int in_format_spec = ( + current_tok->last_expr_end != -1 + && + INSIDE_FSTRING_EXPR(current_tok) + ); + + if (c == EOF || (current_tok->f_string_quote_size == 1 && c == '\n')) { if (tok->decoding_erred) { return MAKE_TOKEN(ERRORTOKEN); } + // If we are in a format spec and we found a newline, + // it means that the format spec ends here and we should + // return to the regular mode. + if (in_format_spec && c == '\n') { + tok_backup(tok, c); + TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE; + p_start = tok->start; + p_end = tok->cur; + return MAKE_TOKEN(FSTRING_MIDDLE); + } + assert(tok->multi_line_start != NULL); // shift the tok_state's location into // the start of string, and report the error @@ -2726,11 +2743,6 @@ f_string_middle: end_quote_size = 0; } - int in_format_spec = ( - current_tok->last_expr_end != -1 - && - INSIDE_FSTRING_EXPR(current_tok) - ); if (c == '{') { int peek = tok_nextc(tok); if (peek != '{' || in_format_spec) { -- cgit v0.12