From cc389ef627b2a486ab89d9a11245bef48224efb1 Mon Sep 17 00:00:00 2001
From: Pablo Galindo Salgado <Pablogsal@gmail.com>
Date: Thu, 5 Oct 2023 14:26:44 +0100
Subject: gh-110259: Fix f-strings with multiline expressions and format specs
 (#110271)

Signed-off-by: Pablo Galindo <pablogsal@gmail.com>
---
 Lib/ast.py                                         | 11 ++-
 Lib/test/test_tokenize.py                          | 97 ++++++++++++++++++++++
 Lib/test/test_unparse.py                           |  3 +-
 .../2023-10-03-11-43-48.gh-issue-110259.ka93x5.rst |  3 +
 Parser/tokenizer.c                                 | 24 ++++--
 5 files changed, 128 insertions(+), 10 deletions(-)
 create mode 100644 Misc/NEWS.d/next/Core and Builtins/2023-10-03-11-43-48.gh-issue-110259.ka93x5.rst

diff --git a/Lib/ast.py b/Lib/ast.py
index 1f54309..f7888d1 100644
--- a/Lib/ast.py
+++ b/Lib/ast.py
@@ -1270,13 +1270,15 @@ class _Unparser(NodeVisitor):
         quote_type = quote_types[0]
         self.write(f"{quote_type}{value}{quote_type}")
 
-    def _write_fstring_inner(self, node):
+    def _write_fstring_inner(self, node, scape_newlines=False):
         if isinstance(node, JoinedStr):
             # for both the f-string itself, and format_spec
             for value in node.values:
-                self._write_fstring_inner(value)
+                self._write_fstring_inner(value, scape_newlines=scape_newlines)
         elif isinstance(node, Constant) and isinstance(node.value, str):
             value = node.value.replace("{", "{{").replace("}", "}}")
+            if scape_newlines:
+                value = value.replace("\n", "\\n")
             self.write(value)
         elif isinstance(node, FormattedValue):
             self.visit_FormattedValue(node)
@@ -1299,7 +1301,10 @@ class _Unparser(NodeVisitor):
                 self.write(f"!{chr(node.conversion)}")
             if node.format_spec:
                 self.write(":")
-                self._write_fstring_inner(node.format_spec)
+                self._write_fstring_inner(
+                    node.format_spec,
+                    scape_newlines=True
+                )
 
     def visit_Name(self, node):
         self.write(node.id)
diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py
index 94fb6d9..9369560 100644
--- a/Lib/test/test_tokenize.py
+++ b/Lib/test/test_tokenize.py
@@ -567,6 +567,55 @@ f'''{
     OP         '}'           (3, 1) (3, 2)
     FSTRING_END "'''"         (3, 2) (3, 5)
     """)
+        self.check_tokenize("""\
+f'''__{
+    x:a
+}__'''""", """\
+    FSTRING_START "f'''"        (1, 0) (1, 4)
+    FSTRING_MIDDLE '__'          (1, 4) (1, 6)
+    OP         '{'           (1, 6) (1, 7)
+    NL         '\\n'          (1, 7) (1, 8)
+    NAME       'x'           (2, 4) (2, 5)
+    OP         ':'           (2, 5) (2, 6)
+    FSTRING_MIDDLE 'a\\n'         (2, 6) (3, 0)
+    OP         '}'           (3, 0) (3, 1)
+    FSTRING_MIDDLE '__'          (3, 1) (3, 3)
+    FSTRING_END "'''"         (3, 3) (3, 6)
+    """)
+        self.check_tokenize("""\
+f'''__{
+    x:a
+    b
+     c
+      d
+}__'''""", """\
+    FSTRING_START "f'''"        (1, 0) (1, 4)
+    FSTRING_MIDDLE '__'          (1, 4) (1, 6)
+    OP         '{'           (1, 6) (1, 7)
+    NL         '\\n'          (1, 7) (1, 8)
+    NAME       'x'           (2, 4) (2, 5)
+    OP         ':'           (2, 5) (2, 6)
+    FSTRING_MIDDLE 'a\\n    b\\n     c\\n      d\\n' (2, 6) (6, 0)
+    OP         '}'           (6, 0) (6, 1)
+    FSTRING_MIDDLE '__'          (6, 1) (6, 3)
+    FSTRING_END "'''"         (6, 3) (6, 6)
+    """)
+        self.check_tokenize("""\
+f'__{
+    x:d
+}__'""", """\
+    FSTRING_START "f'"          (1, 0) (1, 2)
+    FSTRING_MIDDLE '__'          (1, 2) (1, 4)
+    OP         '{'           (1, 4) (1, 5)
+    NL         '\\n'          (1, 5) (1, 6)
+    NAME       'x'           (2, 4) (2, 5)
+    OP         ':'           (2, 5) (2, 6)
+    FSTRING_MIDDLE 'd'           (2, 6) (2, 7)
+    NL         '\\n'          (2, 7) (2, 8)
+    OP         '}'           (3, 0) (3, 1)
+    FSTRING_MIDDLE '__'          (3, 1) (3, 3)
+    FSTRING_END "'"           (3, 3) (3, 4)
+    """)
 
     def test_function(self):
         self.check_tokenize("def d22(a, b, c=2, d=2, *k): pass", """\
@@ -2279,6 +2328,54 @@ def"', """\
     FSTRING_END \'"\'           (1, 16) (1, 17)
     """)
 
+        self.check_tokenize("""\
+f'''__{
+    x:a
+}__'''""", """\
+    FSTRING_START "f'''"        (1, 0) (1, 4)
+    FSTRING_MIDDLE '__'          (1, 4) (1, 6)
+    LBRACE     '{'           (1, 6) (1, 7)
+    NAME       'x'           (2, 4) (2, 5)
+    COLON      ':'           (2, 5) (2, 6)
+    FSTRING_MIDDLE 'a\\n'         (2, 6) (3, 0)
+    RBRACE     '}'           (3, 0) (3, 1)
+    FSTRING_MIDDLE '__'          (3, 1) (3, 3)
+    FSTRING_END "'''"         (3, 3) (3, 6)
+    """)
+
+        self.check_tokenize("""\
+f'''__{
+    x:a
+    b
+     c
+      d
+}__'''""", """\
+    FSTRING_START "f'''"        (1, 0) (1, 4)
+    FSTRING_MIDDLE '__'          (1, 4) (1, 6)
+    LBRACE     '{'           (1, 6) (1, 7)
+    NAME       'x'           (2, 4) (2, 5)
+    COLON      ':'           (2, 5) (2, 6)
+    FSTRING_MIDDLE 'a\\n    b\\n     c\\n      d\\n' (2, 6) (6, 0)
+    RBRACE     '}'           (6, 0) (6, 1)
+    FSTRING_MIDDLE '__'          (6, 1) (6, 3)
+    FSTRING_END "'''"         (6, 3) (6, 6)
+    """)
+
+        self.check_tokenize("""\
+f'__{
+    x:d
+}__'""", """\
+    FSTRING_START "f'"          (1, 0) (1, 2)
+    FSTRING_MIDDLE '__'          (1, 2) (1, 4)
+    LBRACE     '{'           (1, 4) (1, 5)
+    NAME       'x'           (2, 4) (2, 5)
+    COLON      ':'           (2, 5) (2, 6)
+    FSTRING_MIDDLE 'd'           (2, 6) (2, 7)
+    RBRACE     '}'           (3, 0) (3, 1)
+    FSTRING_MIDDLE '__'          (3, 1) (3, 3)
+    FSTRING_END "'"           (3, 3) (3, 4)
+    """)
+
     def test_function(self):
 
         self.check_tokenize('def d22(a, b, c=2, d=2, *k): pass', """\
diff --git a/Lib/test/test_unparse.py b/Lib/test/test_unparse.py
index bdf7b05..6f698a8 100644
--- a/Lib/test/test_unparse.py
+++ b/Lib/test/test_unparse.py
@@ -730,7 +730,8 @@ class DirectoryTestCase(ASTTestCase):
     test_directories = (lib_dir, lib_dir / "test")
     run_always_files = {"test_grammar.py", "test_syntax.py", "test_compile.py",
                         "test_ast.py", "test_asdl_parser.py", "test_fstring.py",
-                        "test_patma.py", "test_type_alias.py", "test_type_params.py"}
+                        "test_patma.py", "test_type_alias.py", "test_type_params.py",
+                        "test_tokenize.py"}
 
     _files_to_test = None
 
diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-10-03-11-43-48.gh-issue-110259.ka93x5.rst b/Misc/NEWS.d/next/Core and Builtins/2023-10-03-11-43-48.gh-issue-110259.ka93x5.rst
new file mode 100644
index 0000000..55c743d
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2023-10-03-11-43-48.gh-issue-110259.ka93x5.rst	
@@ -0,0 +1,3 @@
+Correctly identify the format spec in f-strings (with single or triple
+quotes) that have multiple lines in the expression part and include a
+formatting spec. Patch by Pablo Galindo
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
index 41d0d16..5e3816f 100644
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -2690,11 +2690,28 @@ f_string_middle:
         if (tok->done == E_ERROR) {
             return MAKE_TOKEN(ERRORTOKEN);
         }
-        if (c == EOF || (current_tok->f_string_quote_size == 1 && c == '\n')) {
+        int in_format_spec = (
+                current_tok->last_expr_end != -1
+                &&
+                INSIDE_FSTRING_EXPR(current_tok)
+        );
+
+       if (c == EOF || (current_tok->f_string_quote_size == 1 && c == '\n')) {
             if (tok->decoding_erred) {
                 return MAKE_TOKEN(ERRORTOKEN);
             }
 
+            // If we are in a format spec and we found a newline,
+            // it means that the format spec ends here and we should
+            // return to the regular mode.
+            if (in_format_spec && c == '\n') {
+                tok_backup(tok, c);
+                TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
+                p_start = tok->start;
+                p_end = tok->cur;
+                return MAKE_TOKEN(FSTRING_MIDDLE);
+            }
+
             assert(tok->multi_line_start != NULL);
             // shift the tok_state's location into
             // the start of string, and report the error
@@ -2726,11 +2743,6 @@ f_string_middle:
             end_quote_size = 0;
         }
 
-        int in_format_spec = (
-                current_tok->last_expr_end != -1
-                &&
-                INSIDE_FSTRING_EXPR(current_tok)
-        );
         if (c == '{') {
             int peek = tok_nextc(tok);
             if (peek != '{' || in_format_spec) {
-- 
cgit v0.12