summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Lib/ast.py11
-rw-r--r--Lib/test/test_tokenize.py97
-rw-r--r--Lib/test/test_unparse.py3
-rw-r--r--Misc/NEWS.d/next/Core and Builtins/2023-10-03-11-43-48.gh-issue-110259.ka93x5.rst3
-rw-r--r--Parser/tokenizer.c24
5 files changed, 128 insertions, 10 deletions
diff --git a/Lib/ast.py b/Lib/ast.py
index 1f54309..f7888d1 100644
--- a/Lib/ast.py
+++ b/Lib/ast.py
@@ -1270,13 +1270,15 @@ class _Unparser(NodeVisitor):
quote_type = quote_types[0]
self.write(f"{quote_type}{value}{quote_type}")
- def _write_fstring_inner(self, node):
+ def _write_fstring_inner(self, node, scape_newlines=False):
if isinstance(node, JoinedStr):
# for both the f-string itself, and format_spec
for value in node.values:
- self._write_fstring_inner(value)
+ self._write_fstring_inner(value, scape_newlines=scape_newlines)
elif isinstance(node, Constant) and isinstance(node.value, str):
value = node.value.replace("{", "{{").replace("}", "}}")
+ if scape_newlines:
+ value = value.replace("\n", "\\n")
self.write(value)
elif isinstance(node, FormattedValue):
self.visit_FormattedValue(node)
@@ -1299,7 +1301,10 @@ class _Unparser(NodeVisitor):
self.write(f"!{chr(node.conversion)}")
if node.format_spec:
self.write(":")
- self._write_fstring_inner(node.format_spec)
+ self._write_fstring_inner(
+ node.format_spec,
+ scape_newlines=True
+ )
def visit_Name(self, node):
self.write(node.id)
diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py
index 94fb6d9..9369560 100644
--- a/Lib/test/test_tokenize.py
+++ b/Lib/test/test_tokenize.py
@@ -567,6 +567,55 @@ f'''{
OP '}' (3, 1) (3, 2)
FSTRING_END "'''" (3, 2) (3, 5)
""")
+ self.check_tokenize("""\
+f'''__{
+ x:a
+}__'''""", """\
+ FSTRING_START "f'''" (1, 0) (1, 4)
+ FSTRING_MIDDLE '__' (1, 4) (1, 6)
+ OP '{' (1, 6) (1, 7)
+ NL '\\n' (1, 7) (1, 8)
+ NAME 'x' (2, 4) (2, 5)
+ OP ':' (2, 5) (2, 6)
+ FSTRING_MIDDLE 'a\\n' (2, 6) (3, 0)
+ OP '}' (3, 0) (3, 1)
+ FSTRING_MIDDLE '__' (3, 1) (3, 3)
+ FSTRING_END "'''" (3, 3) (3, 6)
+ """)
+ self.check_tokenize("""\
+f'''__{
+ x:a
+ b
+ c
+ d
+}__'''""", """\
+ FSTRING_START "f'''" (1, 0) (1, 4)
+ FSTRING_MIDDLE '__' (1, 4) (1, 6)
+ OP '{' (1, 6) (1, 7)
+ NL '\\n' (1, 7) (1, 8)
+ NAME 'x' (2, 4) (2, 5)
+ OP ':' (2, 5) (2, 6)
+ FSTRING_MIDDLE 'a\\n b\\n c\\n d\\n' (2, 6) (6, 0)
+ OP '}' (6, 0) (6, 1)
+ FSTRING_MIDDLE '__' (6, 1) (6, 3)
+ FSTRING_END "'''" (6, 3) (6, 6)
+ """)
+ self.check_tokenize("""\
+f'__{
+ x:d
+}__'""", """\
+ FSTRING_START "f'" (1, 0) (1, 2)
+ FSTRING_MIDDLE '__' (1, 2) (1, 4)
+ OP '{' (1, 4) (1, 5)
+ NL '\\n' (1, 5) (1, 6)
+ NAME 'x' (2, 4) (2, 5)
+ OP ':' (2, 5) (2, 6)
+ FSTRING_MIDDLE 'd' (2, 6) (2, 7)
+ NL '\\n' (2, 7) (2, 8)
+ OP '}' (3, 0) (3, 1)
+ FSTRING_MIDDLE '__' (3, 1) (3, 3)
+ FSTRING_END "'" (3, 3) (3, 4)
+ """)
def test_function(self):
self.check_tokenize("def d22(a, b, c=2, d=2, *k): pass", """\
@@ -2279,6 +2328,54 @@ def"', """\
FSTRING_END \'"\' (1, 16) (1, 17)
""")
+ self.check_tokenize("""\
+f'''__{
+ x:a
+}__'''""", """\
+ FSTRING_START "f'''" (1, 0) (1, 4)
+ FSTRING_MIDDLE '__' (1, 4) (1, 6)
+ LBRACE '{' (1, 6) (1, 7)
+ NAME 'x' (2, 4) (2, 5)
+ COLON ':' (2, 5) (2, 6)
+ FSTRING_MIDDLE 'a\\n' (2, 6) (3, 0)
+ RBRACE '}' (3, 0) (3, 1)
+ FSTRING_MIDDLE '__' (3, 1) (3, 3)
+ FSTRING_END "'''" (3, 3) (3, 6)
+ """)
+
+ self.check_tokenize("""\
+f'''__{
+ x:a
+ b
+ c
+ d
+}__'''""", """\
+ FSTRING_START "f'''" (1, 0) (1, 4)
+ FSTRING_MIDDLE '__' (1, 4) (1, 6)
+ LBRACE '{' (1, 6) (1, 7)
+ NAME 'x' (2, 4) (2, 5)
+ COLON ':' (2, 5) (2, 6)
+ FSTRING_MIDDLE 'a\\n b\\n c\\n d\\n' (2, 6) (6, 0)
+ RBRACE '}' (6, 0) (6, 1)
+ FSTRING_MIDDLE '__' (6, 1) (6, 3)
+ FSTRING_END "'''" (6, 3) (6, 6)
+ """)
+
+ self.check_tokenize("""\
+f'__{
+ x:d
+}__'""", """\
+ FSTRING_START "f'" (1, 0) (1, 2)
+ FSTRING_MIDDLE '__' (1, 2) (1, 4)
+ LBRACE '{' (1, 4) (1, 5)
+ NAME 'x' (2, 4) (2, 5)
+ COLON ':' (2, 5) (2, 6)
+ FSTRING_MIDDLE 'd' (2, 6) (2, 7)
+ RBRACE '}' (3, 0) (3, 1)
+ FSTRING_MIDDLE '__' (3, 1) (3, 3)
+ FSTRING_END "'" (3, 3) (3, 4)
+ """)
+
def test_function(self):
self.check_tokenize('def d22(a, b, c=2, d=2, *k): pass', """\
diff --git a/Lib/test/test_unparse.py b/Lib/test/test_unparse.py
index bdf7b05..6f698a8 100644
--- a/Lib/test/test_unparse.py
+++ b/Lib/test/test_unparse.py
@@ -730,7 +730,8 @@ class DirectoryTestCase(ASTTestCase):
test_directories = (lib_dir, lib_dir / "test")
run_always_files = {"test_grammar.py", "test_syntax.py", "test_compile.py",
"test_ast.py", "test_asdl_parser.py", "test_fstring.py",
- "test_patma.py", "test_type_alias.py", "test_type_params.py"}
+ "test_patma.py", "test_type_alias.py", "test_type_params.py",
+ "test_tokenize.py"}
_files_to_test = None
diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-10-03-11-43-48.gh-issue-110259.ka93x5.rst b/Misc/NEWS.d/next/Core and Builtins/2023-10-03-11-43-48.gh-issue-110259.ka93x5.rst
new file mode 100644
index 0000000..55c743d
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2023-10-03-11-43-48.gh-issue-110259.ka93x5.rst
@@ -0,0 +1,3 @@
+Correctly identify the format spec in f-strings (with single or triple
+quotes) that have multiple lines in the expression part and include a
+formatting spec. Patch by Pablo Galindo
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
index 41d0d16..5e3816f 100644
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -2690,11 +2690,28 @@ f_string_middle:
if (tok->done == E_ERROR) {
return MAKE_TOKEN(ERRORTOKEN);
}
- if (c == EOF || (current_tok->f_string_quote_size == 1 && c == '\n')) {
+ int in_format_spec = (
+ current_tok->last_expr_end != -1
+ &&
+ INSIDE_FSTRING_EXPR(current_tok)
+ );
+
+ if (c == EOF || (current_tok->f_string_quote_size == 1 && c == '\n')) {
if (tok->decoding_erred) {
return MAKE_TOKEN(ERRORTOKEN);
}
+ // If we are in a format spec and we found a newline,
+ // it means that the format spec ends here and we should
+ // return to the regular mode.
+ if (in_format_spec && c == '\n') {
+ tok_backup(tok, c);
+ TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
+ p_start = tok->start;
+ p_end = tok->cur;
+ return MAKE_TOKEN(FSTRING_MIDDLE);
+ }
+
assert(tok->multi_line_start != NULL);
// shift the tok_state's location into
// the start of string, and report the error
@@ -2726,11 +2743,6 @@ f_string_middle:
end_quote_size = 0;
}
- int in_format_spec = (
- current_tok->last_expr_end != -1
- &&
- INSIDE_FSTRING_EXPR(current_tok)
- );
if (c == '{') {
int peek = tok_nextc(tok);
if (peek != '{' || in_format_spec) {