Issue #24965: Implement PEP 498 "Literal String Interpolation". Documentation is still needed, I'll open an issue for that.

author: Eric V. Smith <eric@trueblade.com> 2015-09-19 18:51:32 (GMT)
committer: Eric V. Smith <eric@trueblade.com> 2015-09-19 18:51:32 (GMT)
commit: 235a6f09847ad554d8bf073d4e1d58d1e398ae8c (patch)
tree: 36ff217247cfcd108914065cea8ddf3ad056d192
parent: aed8830af3bb5a79878cf0f603ebbd8a37f5b36e (diff)
download: cpython-235a6f09847ad554d8bf073d4e1d58d1e398ae8c.zip
cpython-235a6f09847ad554d8bf073d4e1d58d1e398ae8c.tar.gz
cpython-235a6f09847ad554d8bf073d4e1d58d1e398ae8c.tar.bz2
9 files changed, 1965 insertions, 63 deletions
diff --git a/Include/Python-ast.h b/Include/Python-ast.h
index 3bc015f..ea6679c 100644
--- a/Include/Python-ast.h
+++ b/Include/Python-ast.h
@@ -201,9 +201,10 @@ enum _expr_kind {BoolOp_kind=1, BinOp_kind=2, UnaryOp_kind=3, Lambda_kind=4,
                   SetComp_kind=9, DictComp_kind=10, GeneratorExp_kind=11,
                   Await_kind=12, Yield_kind=13, YieldFrom_kind=14,
                   Compare_kind=15, Call_kind=16, Num_kind=17, Str_kind=18,
-                  Bytes_kind=19, NameConstant_kind=20, Ellipsis_kind=21,
-                  Attribute_kind=22, Subscript_kind=23, Starred_kind=24,
-                  Name_kind=25, List_kind=26, Tuple_kind=27};
+                  FormattedValue_kind=19, JoinedStr_kind=20, Bytes_kind=21,
+                  NameConstant_kind=22, Ellipsis_kind=23, Attribute_kind=24,
+                  Subscript_kind=25, Starred_kind=26, Name_kind=27,
+                  List_kind=28, Tuple_kind=29};
 struct _expr {
     enum _expr_kind kind;
     union {
@@ -297,6 +298,16 @@ struct _expr {
         } Str;
         
         struct {
+            expr_ty value;
+            int conversion;
+            expr_ty format_spec;
+        } FormattedValue;
+        
+        struct {
+            asdl_seq *values;
+        } JoinedStr;
+        
+        struct {
             bytes s;
         } Bytes;
         
@@ -543,6 +554,12 @@ expr_ty _Py_Call(expr_ty func, asdl_seq * args, asdl_seq * keywords, int
 expr_ty _Py_Num(object n, int lineno, int col_offset, PyArena *arena);
 #define Str(a0, a1, a2, a3) _Py_Str(a0, a1, a2, a3)
 expr_ty _Py_Str(string s, int lineno, int col_offset, PyArena *arena);
+#define FormattedValue(a0, a1, a2, a3, a4, a5) _Py_FormattedValue(a0, a1, a2, a3, a4, a5)
+expr_ty _Py_FormattedValue(expr_ty value, int conversion, expr_ty format_spec,
+                           int lineno, int col_offset, PyArena *arena);
+#define JoinedStr(a0, a1, a2, a3) _Py_JoinedStr(a0, a1, a2, a3)
+expr_ty _Py_JoinedStr(asdl_seq * values, int lineno, int col_offset, PyArena
+                      *arena);
 #define Bytes(a0, a1, a2, a3) _Py_Bytes(a0, a1, a2, a3)
 expr_ty _Py_Bytes(bytes s, int lineno, int col_offset, PyArena *arena);
 #define NameConstant(a0, a1, a2, a3) _Py_NameConstant(a0, a1, a2, a3)
diff --git a/Lib/test/test_fstring.py b/Lib/test/test_fstring.py
new file mode 100644
index 0000000..a6ff9cf
--- /dev/null
+++ b/Lib/test/test_fstring.py
@@ -0,0 +1,715 @@
+import ast
+import types
+import decimal
+import unittest
+
+a_global = 'global variable'
+
+# You could argue that I'm too strict in looking for specific error
+#  values with assertRaisesRegex, but without it it's way too easy to
+#  make a syntax error in the test strings. Especially with all of the
+#  triple quotes, raw strings, backslashes, etc. I think it's a
+#  worthwhile tradeoff. When I switched to this method, I found many
+#  examples where I wasn't testing what I thought I was.
+
+class TestCase(unittest.TestCase):
+    def assertAllRaise(self, exception_type, regex, error_strings):
+        for str in error_strings:
+            with self.subTest(str=str):
+                with self.assertRaisesRegex(exception_type, regex):
+                    eval(str)
+
+    def test__format__lookup(self):
+        # Make sure __format__ is looked up on the type, not the instance.
+        class X:
+            def __format__(self, spec):
+                return 'class'
+
+        x = X()
+
+        # Add a bound __format__ method to the 'y' instance, but not
+        #  the 'x' instance.
+        y = X()
+        y.__format__ = types.MethodType(lambda self, spec: 'instance', y)
+
+        self.assertEqual(f'{y}', format(y))
+        self.assertEqual(f'{y}', 'class')
+        self.assertEqual(format(x), format(y))
+
+        # __format__ is not called this way, but still make sure it
+        #  returns what we expect (so we can make sure we're bypassing
+        #  it).
+        self.assertEqual(x.__format__(''), 'class')
+        self.assertEqual(y.__format__(''), 'instance')
+
+        # This is how __format__ is actually called.
+        self.assertEqual(type(x).__format__(x, ''), 'class')
+        self.assertEqual(type(y).__format__(y, ''), 'class')
+
+    def test_ast(self):
+        # Inspired by http://bugs.python.org/issue24975
+        class X:
+            def __init__(self):
+                self.called = False
+            def __call__(self):
+                self.called = True
+                return 4
+        x = X()
+        expr = """
+a = 10
+f'{a * x()}'"""
+        t = ast.parse(expr)
+        c = compile(t, '', 'exec')
+
+        # Make sure x was not called.
+        self.assertFalse(x.called)
+
+        # Actually run the code.
+        exec(c)
+
+        # Make sure x was called.
+        self.assertTrue(x.called)
+
+    def test_literal_eval(self):
+        # With no expressions, an f-string is okay.
+        self.assertEqual(ast.literal_eval("f'x'"), 'x')
+        self.assertEqual(ast.literal_eval("f'x' 'y'"), 'xy')
+
+        # But this should raise an error.
+        with self.assertRaisesRegex(ValueError, 'malformed node or string'):
+            ast.literal_eval("f'x{3}'")
+
+        # As should this, which uses a different ast node
+        with self.assertRaisesRegex(ValueError, 'malformed node or string'):
+            ast.literal_eval("f'{3}'")
+
+    def test_ast_compile_time_concat(self):
+        x = ['']
+
+        expr = """x[0] = 'foo' f'{3}'"""
+        t = ast.parse(expr)
+        c = compile(t, '', 'exec')
+        exec(c)
+        self.assertEqual(x[0], 'foo3')
+
+    def test_literal(self):
+        self.assertEqual(f'', '')
+        self.assertEqual(f'a', 'a')
+        self.assertEqual(f' ', ' ')
+        self.assertEqual(f'\N{GREEK CAPITAL LETTER DELTA}',
+                         '\N{GREEK CAPITAL LETTER DELTA}')
+        self.assertEqual(f'\N{GREEK CAPITAL LETTER DELTA}',
+                         '\u0394')
+        self.assertEqual(f'\N{True}', '\u22a8')
+        self.assertEqual(rf'\N{True}', r'\NTrue')
+
+    def test_escape_order(self):
+        # note that hex(ord('{')) == 0x7b, so this
+        #  string becomes f'a{4*10}b'
+        self.assertEqual(f'a\u007b4*10}b', 'a40b')
+        self.assertEqual(f'a\x7b4*10}b', 'a40b')
+        self.assertEqual(f'a\x7b4*10\N{RIGHT CURLY BRACKET}b', 'a40b')
+        self.assertEqual(f'{"a"!\N{LATIN SMALL LETTER R}}', "'a'")
+        self.assertEqual(f'{10\x3a02X}', '0A')
+        self.assertEqual(f'{10:02\N{LATIN CAPITAL LETTER X}}', '0A')
+
+        self.assertAllRaise(SyntaxError, "f-string: single '}' is not allowed",
+                            [r"""f'a{\u007b4*10}b'""",    # mis-matched brackets
+                             ])
+        self.assertAllRaise(SyntaxError, 'unexpected character after line continuation character',
+                            [r"""f'{"a"\!r}'""",
+                             r"""f'{a\!r}'""",
+                             ])
+
+    def test_unterminated_string(self):
+        self.assertAllRaise(SyntaxError, 'f-string: unterminated string',
+                            [r"""f'{"x'""",
+                             r"""f'{"x}'""",
+                             r"""f'{("x'""",
+                             r"""f'{("x}'""",
+                             ])
+
+    def test_mismatched_parens(self):
+        self.assertAllRaise(SyntaxError, 'f-string: mismatched',
+                            ["f'{((}'",
+                             ])
+
+    def test_double_braces(self):
+        self.assertEqual(f'{{', '{')
+        self.assertEqual(f'a{{', 'a{')
+        self.assertEqual(f'{{b', '{b')
+        self.assertEqual(f'a{{b', 'a{b')
+        self.assertEqual(f'}}', '}')
+        self.assertEqual(f'a}}', 'a}')
+        self.assertEqual(f'}}b', '}b')
+        self.assertEqual(f'a}}b', 'a}b')
+
+        self.assertEqual(f'{{{10}', '{10')
+        self.assertEqual(f'}}{10}', '}10')
+        self.assertEqual(f'}}{{{10}', '}{10')
+        self.assertEqual(f'}}a{{{10}', '}a{10')
+
+        self.assertEqual(f'{10}{{', '10{')
+        self.assertEqual(f'{10}}}', '10}')
+        self.assertEqual(f'{10}}}{{', '10}{')
+        self.assertEqual(f'{10}}}a{{' '}', '10}a{}')
+
+        # Inside of strings, don't interpret doubled brackets.
+        self.assertEqual(f'{"{{}}"}', '{{}}')
+
+        self.assertAllRaise(TypeError, 'unhashable type',
+                            ["f'{ {{}} }'", # dict in a set
+                             ])
+
+    def test_compile_time_concat(self):
+        x = 'def'
+        self.assertEqual('abc' f'## {x}ghi', 'abc## defghi')
+        self.assertEqual('abc' f'{x}' 'ghi', 'abcdefghi')
+        self.assertEqual('abc' f'{x}' 'gh' f'i{x:4}', 'abcdefghidef ')
+        self.assertEqual('{x}' f'{x}', '{x}def')
+        self.assertEqual('{x' f'{x}', '{xdef')
+        self.assertEqual('{x}' f'{x}', '{x}def')
+        self.assertEqual('{{x}}' f'{x}', '{{x}}def')
+        self.assertEqual('{{x' f'{x}', '{{xdef')
+        self.assertEqual('x}}' f'{x}', 'x}}def')
+        self.assertEqual(f'{x}' 'x}}', 'defx}}')
+        self.assertEqual(f'{x}' '', 'def')
+        self.assertEqual('' f'{x}' '', 'def')
+        self.assertEqual('' f'{x}', 'def')
+        self.assertEqual(f'{x}' '2', 'def2')
+        self.assertEqual('1' f'{x}' '2', '1def2')
+        self.assertEqual('1' f'{x}', '1def')
+        self.assertEqual(f'{x}' f'-{x}', 'def-def')
+        self.assertEqual('' f'', '')
+        self.assertEqual('' f'' '', '')
+        self.assertEqual('' f'' '' f'', '')
+        self.assertEqual(f'', '')
+        self.assertEqual(f'' '', '')
+        self.assertEqual(f'' '' f'', '')
+        self.assertEqual(f'' '' f'' '', '')
+
+        self.assertAllRaise(SyntaxError, "f-string: expecting '}'",
+                            ["f'{3' f'}'",  # can't concat to get a valid f-string
+                             ])
+
+    def test_comments(self):
+        # These aren't comments, since they're in strings.
+        d = {'#': 'hash'}
+        self.assertEqual(f'{"#"}', '#')
+        self.assertEqual(f'{d["#"]}', 'hash')
+
+        self.assertAllRaise(SyntaxError, "f-string cannot include '#'",
+                            ["f'{1#}'",   # error because the expression becomes "(1#)"
+                             "f'{3(#)}'",
+                             ])
+
+    def test_many_expressions(self):
+        # Create a string with many expressions in it. Note that
+        #  because we have a space in here as a literal, we're actually
+        #  going to use twice as many ast nodes: one for each literal
+        #  plus one for each expression.
+        def build_fstr(n, extra=''):
+            return "f'" + ('{x} ' * n) + extra + "'"
+
+        x = 'X'
+        width = 1
+
+        # Test around 256.
+        for i in range(250, 260):
+            self.assertEqual(eval(build_fstr(i)), (x+' ')*i)
+
+        # Test concatenating 2 largs fstrings.
+        self.assertEqual(eval(build_fstr(255)*256), (x+' ')*(255*256))
+
+        s = build_fstr(253, '{x:{width}} ')
+        self.assertEqual(eval(s), (x+' ')*254)
+
+        # Test lots of expressions and constants, concatenated.
+        s = "f'{1}' 'x' 'y'" * 1024
+        self.assertEqual(eval(s), '1xy' * 1024)
+
+    def test_format_specifier_expressions(self):
+        width = 10
+        precision = 4
+        value = decimal.Decimal('12.34567')
+        self.assertEqual(f'result: {value:{width}.{precision}}', 'result:      12.35')
+        self.assertEqual(f'result: {value:{width!r}.{precision}}', 'result:      12.35')
+        self.assertEqual(f'result: {value:{width:0}.{precision:1}}', 'result:      12.35')
+        self.assertEqual(f'result: {value:{1}{0:0}.{precision:1}}', 'result:      12.35')
+        self.assertEqual(f'result: {value:{ 1}{ 0:0}.{ precision:1}}', 'result:      12.35')
+        self.assertEqual(f'{10:#{1}0x}', '       0xa')
+        self.assertEqual(f'{10:{"#"}1{0}{"x"}}', '       0xa')
+        self.assertEqual(f'{-10:-{"#"}1{0}x}', '      -0xa')
+        self.assertEqual(f'{-10:{"-"}#{1}0{"x"}}', '      -0xa')
+        self.assertEqual(f'{10:#{3 != {4:5} and width}x}', '       0xa')
+
+        self.assertAllRaise(SyntaxError, "f-string: expecting '}'",
+                            ["""f'{"s"!r{":10"}}'""",
+
+                             # This looks like a nested format spec.
+                             ])
+
+        self.assertAllRaise(SyntaxError, "invalid syntax",
+                            [# Invalid sytax inside a nested spec.
+                             "f'{4:{/5}}'",
+                             ])
+
+        self.assertAllRaise(SyntaxError, "f-string: expressions nested too deeply",
+                            [# Can't nest format specifiers.
+                             "f'result: {value:{width:{0}}.{precision:1}}'",
+                             ])
+
+        self.assertAllRaise(SyntaxError, 'f-string: invalid conversion character',
+                            [# No expansion inside conversion or for
+                             #  the : or ! itself.
+                             """f'{"s"!{"r"}}'""",
+                             ])
+
+    def test_side_effect_order(self):
+        class X:
+            def __init__(self):
+                self.i = 0
+            def __format__(self, spec):
+                self.i += 1
+                return str(self.i)
+
+        x = X()
+        self.assertEqual(f'{x} {x}', '1 2')
+
+    def test_missing_expression(self):
+        self.assertAllRaise(SyntaxError, 'f-string: empty expression not allowed',
+                            ["f'{}'",
+                             "f'{ }'"
+                             "f' {} '",
+                             "f'{!r}'",
+                             "f'{ !r}'",
+                             "f'{10:{ }}'",
+                             "f' { } '",
+                             r"f'{\n}'",
+                             r"f'{\n \n}'",
+                             ])
+
+    def test_parens_in_expressions(self):
+        self.assertEqual(f'{3,}', '(3,)')
+
+        # Add these because when an expression is evaluated, parens
+        #  are added around it. But we shouldn't go from an invalid
+        #  expression to a valid one. The added parens are just
+        #  supposed to allow whitespace (including newlines).
+        self.assertAllRaise(SyntaxError, 'invalid syntax',
+                            ["f'{,}'",
+                             "f'{,}'",  # this is (,), which is an error
+                             ])
+
+        self.assertAllRaise(SyntaxError, "f-string: expecting '}'",
+                            ["f'{3)+(4}'",
+                             ])
+
+        self.assertAllRaise(SyntaxError, 'EOL while scanning string literal',
+                            ["f'{\n}'",
+                             ])
+
+    def test_newlines_in_expressions(self):
+        self.assertEqual(f'{0}', '0')
+        self.assertEqual(f'{0\n}', '0')
+        self.assertEqual(f'{0\r}', '0')
+        self.assertEqual(f'{\n0\n}', '0')
+        self.assertEqual(f'{\r0\r}', '0')
+        self.assertEqual(f'{\n0\r}', '0')
+        self.assertEqual(f'{\n0}', '0')
+        self.assertEqual(f'{3+\n4}', '7')
+        self.assertEqual(f'{3+\\\n4}', '7')
+        self.assertEqual(rf'''{3+
+4}''', '7')
+        self.assertEqual(f'''{3+\
+4}''', '7')
+
+        self.assertAllRaise(SyntaxError, 'f-string: empty expression not allowed',
+                            [r"f'{\n}'",
+                             ])
+
+    def test_lambda(self):
+        x = 5
+        self.assertEqual(f'{(lambda y:x*y)("8")!r}', "'88888'")
+        self.assertEqual(f'{(lambda y:x*y)("8")!r:10}', "'88888'   ")
+        self.assertEqual(f'{(lambda y:x*y)("8"):10}', "88888     ")
+
+        # lambda doesn't work without parens, because the colon
+        #  makes the parser think it's a format_spec
+        self.assertAllRaise(SyntaxError, 'unexpected EOF while parsing',
+                            ["f'{lambda x:x}'",
+                             ])
+
+    def test_yield(self):
+        # Not terribly useful, but make sure the yield turns
+        #  a function into a generator
+        def fn(y):
+            f'y:{yield y*2}'
+
+        g = fn(4)
+        self.assertEqual(next(g), 8)
+
+    def test_yield_send(self):
+        def fn(x):
+            yield f'x:{yield (lambda i: x * i)}'
+
+        g = fn(10)
+        the_lambda = next(g)
+        self.assertEqual(the_lambda(4), 40)
+        self.assertEqual(g.send('string'), 'x:string')
+
+    def test_expressions_with_triple_quoted_strings(self):
+        self.assertEqual(f"{'''x'''}", 'x')
+        self.assertEqual(f"{'''eric's'''}", "eric's")
+        self.assertEqual(f'{"""eric\'s"""}', "eric's")
+        self.assertEqual(f"{'''eric\"s'''}", 'eric"s')
+        self.assertEqual(f'{"""eric"s"""}', 'eric"s')
+
+        # Test concatenation within an expression
+        self.assertEqual(f'{"x" """eric"s""" "y"}', 'xeric"sy')
+        self.assertEqual(f'{"x" """eric"s"""}', 'xeric"s')
+        self.assertEqual(f'{"""eric"s""" "y"}', 'eric"sy')
+        self.assertEqual(f'{"""x""" """eric"s""" "y"}', 'xeric"sy')
+        self.assertEqual(f'{"""x""" """eric"s""" """y"""}', 'xeric"sy')
+        self.assertEqual(f'{r"""x""" """eric"s""" """y"""}', 'xeric"sy')
+
+    def test_multiple_vars(self):
+        x = 98
+        y = 'abc'
+        self.assertEqual(f'{x}{y}', '98abc')
+
+        self.assertEqual(f'X{x}{y}', 'X98abc')
+        self.assertEqual(f'{x}X{y}', '98Xabc')
+        self.assertEqual(f'{x}{y}X', '98abcX')
+
+        self.assertEqual(f'X{x}Y{y}', 'X98Yabc')
+        self.assertEqual(f'X{x}{y}Y', 'X98abcY')
+        self.assertEqual(f'{x}X{y}Y', '98XabcY')
+
+        self.assertEqual(f'X{x}Y{y}Z', 'X98YabcZ')
+
+    def test_closure(self):
+        def outer(x):
+            def inner():
+                return f'x:{x}'
+            return inner
+
+        self.assertEqual(outer('987')(), 'x:987')
+        self.assertEqual(outer(7)(), 'x:7')
+
+    def test_arguments(self):
+        y = 2
+        def f(x, width):
+            return f'x={x*y:{width}}'
+
+        self.assertEqual(f('foo', 10), 'x=foofoo    ')
+        x = 'bar'
+        self.assertEqual(f(10, 10), 'x=        20')
+
+    def test_locals(self):
+        value = 123
+        self.assertEqual(f'v:{value}', 'v:123')
+
+    def test_missing_variable(self):
+        with self.assertRaises(NameError):
+            f'v:{value}'
+
+    def test_missing_format_spec(self):
+        class O:
+            def __format__(self, spec):
+                if not spec:
+                    return '*'
+                return spec
+
+        self.assertEqual(f'{O():x}', 'x')
+        self.assertEqual(f'{O()}', '*')
+        self.assertEqual(f'{O():}', '*')
+
+        self.assertEqual(f'{3:}', '3')
+        self.assertEqual(f'{3!s:}', '3')
+
+    def test_global(self):
+        self.assertEqual(f'g:{a_global}', 'g:global variable')
+        self.assertEqual(f'g:{a_global!r}', "g:'global variable'")
+
+        a_local = 'local variable'
+        self.assertEqual(f'g:{a_global} l:{a_local}',
+                         'g:global variable l:local variable')
+        self.assertEqual(f'g:{a_global!r}',
+                         "g:'global variable'")
+        self.assertEqual(f'g:{a_global} l:{a_local!r}',
+                         "g:global variable l:'local variable'")
+
+        self.assertIn("module 'unittest' from", f'{unittest}')
+
+    def test_shadowed_global(self):
+        a_global = 'really a local'
+        self.assertEqual(f'g:{a_global}', 'g:really a local')
+        self.assertEqual(f'g:{a_global!r}', "g:'really a local'")
+
+        a_local = 'local variable'
+        self.assertEqual(f'g:{a_global} l:{a_local}',
+                         'g:really a local l:local variable')
+        self.assertEqual(f'g:{a_global!r}',
+                         "g:'really a local'")
+        self.assertEqual(f'g:{a_global} l:{a_local!r}',
+                         "g:really a local l:'local variable'")
+
+    def test_call(self):
+        def foo(x):
+            return 'x=' + str(x)
+
+        self.assertEqual(f'{foo(10)}', 'x=10')
+
+    def test_nested_fstrings(self):
+        y = 5
+        self.assertEqual(f'{f"{0}"*3}', '000')
+        self.assertEqual(f'{f"{y}"*3}', '555')
+        self.assertEqual(f'{f"{\'x\'}"*3}', 'xxx')
+
+        self.assertEqual(f"{r'x' f'{\"s\"}'}", 'xs')
+        self.assertEqual(f"{r'x'rf'{\"s\"}'}", 'xs')
+
+    def test_invalid_string_prefixes(self):
+        self.assertAllRaise(SyntaxError, 'unexpected EOF while parsing',
+                            ["fu''",
+                             "uf''",
+                             "Fu''",
+                             "fU''",
+                             "Uf''",
+                             "uF''",
+                             "ufr''",
+                             "urf''",
+                             "fur''",
+                             "fru''",
+                             "rfu''",
+                             "ruf''",
+                             "FUR''",
+                             "Fur''",
+                             ])
+
+    def test_leading_trailing_spaces(self):
+        self.assertEqual(f'{ 3}', '3')
+        self.assertEqual(f'{  3}', '3')
+        self.assertEqual(f'{\t3}', '3')
+        self.assertEqual(f'{\t\t3}', '3')
+        self.assertEqual(f'{3 }', '3')
+        self.assertEqual(f'{3  }', '3')
+        self.assertEqual(f'{3\t}', '3')
+        self.assertEqual(f'{3\t\t}', '3')
+
+        self.assertEqual(f'expr={ {x: y for x, y in [(1, 2), ]}}',
+                         'expr={1: 2}')
+        self.assertEqual(f'expr={ {x: y for x, y in [(1, 2), ]} }',
+                         'expr={1: 2}')
+
+    def test_character_name(self):
+        self.assertEqual(f'{4}\N{GREEK CAPITAL LETTER DELTA}{3}',
+                         '4\N{GREEK CAPITAL LETTER DELTA}3')
+        self.assertEqual(f'{{}}\N{GREEK CAPITAL LETTER DELTA}{3}',
+                         '{}\N{GREEK CAPITAL LETTER DELTA}3')
+
+    def test_not_equal(self):
+        # There's a special test for this because there's a special
+        #  case in the f-string parser to look for != as not ending an
+        #  expression. Normally it would, while looking for !s or !r.
+
+        self.assertEqual(f'{3!=4}', 'True')
+        self.assertEqual(f'{3!=4:}', 'True')
+        self.assertEqual(f'{3!=4!s}', 'True')
+        self.assertEqual(f'{3!=4!s:.3}', 'Tru')
+
+    def test_conversions(self):
+        self.assertEqual(f'{3.14:10.10}', '      3.14')
+        self.assertEqual(f'{3.14!s:10.10}', '3.14      ')
+        self.assertEqual(f'{3.14!r:10.10}', '3.14      ')
+        self.assertEqual(f'{3.14!a:10.10}', '3.14      ')
+
+        self.assertEqual(f'{"a"}', 'a')
+        self.assertEqual(f'{"a"!r}', "'a'")
+        self.assertEqual(f'{"a"!a}', "'a'")
+
+        # Not a conversion.
+        self.assertEqual(f'{"a!r"}', "a!r")
+
+        # Not a conversion, but show that ! is allowed in a format spec.
+        self.assertEqual(f'{3.14:!<10.10}', '3.14!!!!!!')
+
+        self.assertEqual(f'{"\N{GREEK CAPITAL LETTER DELTA}"}', '\u0394')
+        self.assertEqual(f'{"\N{GREEK CAPITAL LETTER DELTA}"!r}', "'\u0394'")
+        self.assertEqual(f'{"\N{GREEK CAPITAL LETTER DELTA}"!a}', "'\\u0394'")
+
+        self.assertAllRaise(SyntaxError, 'f-string: invalid conversion character',
+                            ["f'{3!g}'",
+                             "f'{3!A}'",
+                             "f'{3!A}'",
+                             "f'{3!A}'",
+                             "f'{3!!}'",
+                             "f'{3!:}'",
+                             "f'{3!\N{GREEK CAPITAL LETTER DELTA}}'",
+                             "f'{3! s}'",  # no space before conversion char
+                             "f'{x!\\x00:.<10}'",
+                             ])
+
+        self.assertAllRaise(SyntaxError, "f-string: expecting '}'",
+                            ["f'{x!s{y}}'",
+                             "f'{3!ss}'",
+                             "f'{3!ss:}'",
+                             "f'{3!ss:s}'",
+                             ])
+
+    def test_assignment(self):
+        self.assertAllRaise(SyntaxError, 'invalid syntax',
+                            ["f'' = 3",
+                             "f'{0}' = x",
+                             "f'{x}' = x",
+                             ])
+
+    def test_del(self):
+        self.assertAllRaise(SyntaxError, 'invalid syntax',
+                            ["del f''",
+                             "del '' f''",
+                             ])
+
+    def test_mismatched_braces(self):
+        self.assertAllRaise(SyntaxError, "f-string: single '}' is not allowed",
+                            ["f'{{}'",
+                             "f'{{}}}'",
+                             "f'}'",
+                             "f'x}'",
+                             "f'x}x'",
+
+                             # Can't have { or } in a format spec.
+                             "f'{3:}>10}'",
+                             r"f'{3:\\}>10}'",
+                             "f'{3:}}>10}'",
+                             ])
+
+        self.assertAllRaise(SyntaxError, "f-string: expecting '}'",
+                            ["f'{3:{{>10}'",
+                             "f'{3'",
+                             "f'{3!'",
+                             "f'{3:'",
+                             "f'{3!s'",
+                             "f'{3!s:'",
+                             "f'{3!s:3'",
+                             "f'x{'",
+                             "f'x{x'",
+                             "f'{3:s'",
+                             "f'{{{'",
+                             "f'{{}}{'",
+                             "f'{'",
+                             ])
+
+        self.assertAllRaise(SyntaxError, 'invalid syntax',
+                            [r"f'{3:\\{>10}'",
+                             ])
+
+        # But these are just normal strings.
+        self.assertEqual(f'{"{"}', '{')
+        self.assertEqual(f'{"}"}', '}')
+        self.assertEqual(f'{3:{"}"}>10}', '}}}}}}}}}3')
+        self.assertEqual(f'{2:{"{"}>10}', '{{{{{{{{{2')
+
+    def test_if_conditional(self):
+        # There's special logic in compile.c to test if the
+        #  conditional for an if (and while) are constants. Exercise
+        #  that code.
+
+        def test_fstring(x, expected):
+            flag = 0
+            if f'{x}':
+                flag = 1
+            else:
+                flag = 2
+            self.assertEqual(flag, expected)
+
+        def test_concat_empty(x, expected):
+            flag = 0
+            if '' f'{x}':
+                flag = 1
+            else:
+                flag = 2
+            self.assertEqual(flag, expected)
+
+        def test_concat_non_empty(x, expected):
+            flag = 0
+            if ' ' f'{x}':
+                flag = 1
+            else:
+                flag = 2
+            self.assertEqual(flag, expected)
+
+        test_fstring('', 2)
+        test_fstring(' ', 1)
+
+        test_concat_empty('', 2)
+        test_concat_empty(' ', 1)
+
+        test_concat_non_empty('', 1)
+        test_concat_non_empty(' ', 1)
+
+    def test_empty_format_specifier(self):
+        x = 'test'
+        self.assertEqual(f'{x}', 'test')
+        self.assertEqual(f'{x:}', 'test')
+        self.assertEqual(f'{x!s:}', 'test')
+        self.assertEqual(f'{x!r:}', "'test'")
+
+    def test_str_format_differences(self):
+        d = {'a': 'string',
+             0: 'integer',
+             }
+        a = 0
+        self.assertEqual(f'{d[0]}', 'integer')
+        self.assertEqual(f'{d["a"]}', 'string')
+        self.assertEqual(f'{d[a]}', 'integer')
+        self.assertEqual('{d[a]}'.format(d=d), 'string')
+        self.assertEqual('{d[0]}'.format(d=d), 'integer')
+
+    def test_invalid_expressions(self):
+        self.assertAllRaise(SyntaxError, 'invalid syntax',
+                            [r"f'{a[4)}'",
+                             r"f'{a(4]}'",
+                            ])
+
+    def test_loop(self):
+        for i in range(1000):
+            self.assertEqual(f'i:{i}', 'i:' + str(i))
+
+    def test_dict(self):
+        d = {'"': 'dquote',
+             "'": 'squote',
+             'foo': 'bar',
+             }
+        self.assertEqual(f'{d["\'"]}', 'squote')
+        self.assertEqual(f"{d['\"']}", 'dquote')
+
+        self.assertEqual(f'''{d["'"]}''', 'squote')
+        self.assertEqual(f"""{d['"']}""", 'dquote')
+
+        self.assertEqual(f'{d["foo"]}', 'bar')
+        self.assertEqual(f"{d['foo']}", 'bar')
+        self.assertEqual(f'{d[\'foo\']}', 'bar')
+        self.assertEqual(f"{d[\"foo\"]}", 'bar')
+
+    def test_escaped_quotes(self):
+        d = {'"': 'a',
+             "'": 'b'}
+
+        self.assertEqual(fr"{d['\"']}", 'a')
+        self.assertEqual(fr'{d["\'"]}', 'b')
+        self.assertEqual(fr"{'\"'}", '"')
+        self.assertEqual(fr'{"\'"}', "'")
+        self.assertEqual(f'{"\\"3"}', '"3')
+
+        self.assertAllRaise(SyntaxError, 'f-string: unterminated string',
+                            [r'''f'{"""\\}' ''',  # Backslash at end of expression
+                             ])
+        self.assertAllRaise(SyntaxError, 'unexpected character after line continuation',
+                            [r"rf'{3\}'",
+                             ])
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/Misc/NEWS b/Misc/NEWS
index a2c76f5..46f7a3e 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -19,6 +19,11 @@ Core and Builtins
   argument list of a function declaration.  For example, "def f(*, a =
   3,): pass" is now legal. Patch from Mark Dickinson.
 
+- Issue #24965: Implement PEP 498 "Literal String Interpolation". This
+  allows you to embed expressions inside f-strings, which are
+  converted to normal strings at run time. Given x=3, then
+  f'value={x}' == 'value=3'. Patch by Eric V. Smith.
+
 Library
 -------
 
diff --git a/Parser/Python.asdl b/Parser/Python.asdl
index cd0832d..22775c6 100644
--- a/Parser/Python.asdl
+++ b/Parser/Python.asdl
@@ -71,6 +71,8 @@ module Python
          | Call(expr func, expr* args, keyword* keywords)
          | Num(object n) -- a number as a PyObject.
          | Str(string s) -- need to specify raw, unicode, etc?
+         | FormattedValue(expr value, int? conversion, expr? format_spec)
+         | JoinedStr(expr* values)
          | Bytes(bytes s)
          | NameConstant(singleton value)
          | Ellipsis
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
index 5046fa5..2369be4 100644
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -1477,17 +1477,19 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
     nonascii = 0;
     if (is_potential_identifier_start(c)) {
         /* Process b"", r"", u"", br"" and rb"" */
-        int saw_b = 0, saw_r = 0, saw_u = 0;
+        int saw_b = 0, saw_r = 0, saw_u = 0, saw_f = 0;
         while (1) {
-            if (!(saw_b || saw_u) && (c == 'b' || c == 'B'))
+            if (!(saw_b || saw_u || saw_f) && (c == 'b' || c == 'B'))
                 saw_b = 1;
             /* Since this is a backwards compatibility support literal we don't
                want to support it in arbitrary order like byte literals. */
-            else if (!(saw_b || saw_u || saw_r) && (c == 'u' || c == 'U'))
+            else if (!(saw_b || saw_u || saw_r || saw_f) && (c == 'u' || c == 'U'))
                 saw_u = 1;
             /* ur"" and ru"" are not supported */
             else if (!(saw_r || saw_u) && (c == 'r' || c == 'R'))
                 saw_r = 1;
+            else if (!(saw_f || saw_b || saw_u) && (c == 'f' || c == 'F'))
+                saw_f = 1;
             else
                 break;
             c = tok_nextc(tok);
diff --git a/Python/Python-ast.c b/Python/Python-ast.c
index fd7f17e..a2e9816 100644
--- a/Python/Python-ast.c
+++ b/Python/Python-ast.c
@@ -285,6 +285,18 @@ _Py_IDENTIFIER(s);
 static char *Str_fields[]={
     "s",
 };
+static PyTypeObject *FormattedValue_type;
+_Py_IDENTIFIER(conversion);
+_Py_IDENTIFIER(format_spec);
+static char *FormattedValue_fields[]={
+    "value",
+    "conversion",
+    "format_spec",
+};
+static PyTypeObject *JoinedStr_type;
+static char *JoinedStr_fields[]={
+    "values",
+};
 static PyTypeObject *Bytes_type;
 static char *Bytes_fields[]={
     "s",
@@ -917,6 +929,11 @@ static int init_types(void)
     if (!Num_type) return 0;
     Str_type = make_type("Str", expr_type, Str_fields, 1);
     if (!Str_type) return 0;
+    FormattedValue_type = make_type("FormattedValue", expr_type,
+                                    FormattedValue_fields, 3);
+    if (!FormattedValue_type) return 0;
+    JoinedStr_type = make_type("JoinedStr", expr_type, JoinedStr_fields, 1);
+    if (!JoinedStr_type) return 0;
     Bytes_type = make_type("Bytes", expr_type, Bytes_fields, 1);
     if (!Bytes_type) return 0;
     NameConstant_type = make_type("NameConstant", expr_type,
@@ -2063,6 +2080,42 @@ Str(string s, int lineno, int col_offset, PyArena *arena)
 }
 
 expr_ty
+FormattedValue(expr_ty value, int conversion, expr_ty format_spec, int lineno,
+               int col_offset, PyArena *arena)
+{
+    expr_ty p;
+    if (!value) {
+        PyErr_SetString(PyExc_ValueError,
+                        "field value is required for FormattedValue");
+        return NULL;
+    }
+    p = (expr_ty)PyArena_Malloc(arena, sizeof(*p));
+    if (!p)
+        return NULL;
+    p->kind = FormattedValue_kind;
+    p->v.FormattedValue.value = value;
+    p->v.FormattedValue.conversion = conversion;
+    p->v.FormattedValue.format_spec = format_spec;
+    p->lineno = lineno;
+    p->col_offset = col_offset;
+    return p;
+}
+
+expr_ty
+JoinedStr(asdl_seq * values, int lineno, int col_offset, PyArena *arena)
+{
+    expr_ty p;
+    p = (expr_ty)PyArena_Malloc(arena, sizeof(*p));
+    if (!p)
+        return NULL;
+    p->kind = JoinedStr_kind;
+    p->v.JoinedStr.values = values;
+    p->lineno = lineno;
+    p->col_offset = col_offset;
+    return p;
+}
+
+expr_ty
 Bytes(bytes s, int lineno, int col_offset, PyArena *arena)
 {
     expr_ty p;
@@ -3161,6 +3214,34 @@ ast2obj_expr(void* _o)
             goto failed;
         Py_DECREF(value);
         break;
+    case FormattedValue_kind:
+        result = PyType_GenericNew(FormattedValue_type, NULL, NULL);
+        if (!result) goto failed;
+        value = ast2obj_expr(o->v.FormattedValue.value);
+        if (!value) goto failed;
+        if (_PyObject_SetAttrId(result, &PyId_value, value) == -1)
+            goto failed;
+        Py_DECREF(value);
+        value = ast2obj_int(o->v.FormattedValue.conversion);
+        if (!value) goto failed;
+        if (_PyObject_SetAttrId(result, &PyId_conversion, value) == -1)
+            goto failed;
+        Py_DECREF(value);
+        value = ast2obj_expr(o->v.FormattedValue.format_spec);
+        if (!value) goto failed;
+        if (_PyObject_SetAttrId(result, &PyId_format_spec, value) == -1)
+            goto failed;
+        Py_DECREF(value);
+        break;
+    case JoinedStr_kind:
+        result = PyType_GenericNew(JoinedStr_type, NULL, NULL);
+        if (!result) goto failed;
+        value = ast2obj_list(o->v.JoinedStr.values, ast2obj_expr);
+        if (!value) goto failed;
+        if (_PyObject_SetAttrId(result, &PyId_values, value) == -1)
+            goto failed;
+        Py_DECREF(value);
+        break;
     case Bytes_kind:
         result = PyType_GenericNew(Bytes_type, NULL, NULL);
         if (!result) goto failed;
@@ -6022,6 +6103,86 @@ obj2ast_expr(PyObject* obj, expr_ty* out, PyArena* arena)
         if (*out == NULL) goto failed;
         return 0;
     }
+    isinstance = PyObject_IsInstance(obj, (PyObject*)FormattedValue_type);
+    if (isinstance == -1) {
+        return 1;
+    }
+    if (isinstance) {
+        expr_ty value;
+        int conversion;
+        expr_ty format_spec;
+
+        if (_PyObject_HasAttrId(obj, &PyId_value)) {
+            int res;
+            tmp = _PyObject_GetAttrId(obj, &PyId_value);
+            if (tmp == NULL) goto failed;
+            res = obj2ast_expr(tmp, &value, arena);
+            if (res != 0) goto failed;
+            Py_CLEAR(tmp);
+        } else {
+            PyErr_SetString(PyExc_TypeError, "required field \"value\" missing from FormattedValue");
+            return 1;
+        }
+        if (exists_not_none(obj, &PyId_conversion)) {
+            int res;
+            tmp = _PyObject_GetAttrId(obj, &PyId_conversion);
+            if (tmp == NULL) goto failed;
+            res = obj2ast_int(tmp, &conversion, arena);
+            if (res != 0) goto failed;
+            Py_CLEAR(tmp);
+        } else {
+            conversion = 0;
+        }
+        if (exists_not_none(obj, &PyId_format_spec)) {
+            int res;
+            tmp = _PyObject_GetAttrId(obj, &PyId_format_spec);
+            if (tmp == NULL) goto failed;
+            res = obj2ast_expr(tmp, &format_spec, arena);
+            if (res != 0) goto failed;
+            Py_CLEAR(tmp);
+        } else {
+            format_spec = NULL;
+        }
+        *out = FormattedValue(value, conversion, format_spec, lineno,
+                              col_offset, arena);
+        if (*out == NULL) goto failed;
+        return 0;
+    }
+    isinstance = PyObject_IsInstance(obj, (PyObject*)JoinedStr_type);
+    if (isinstance == -1) {
+        return 1;
+    }
+    if (isinstance) {
+        asdl_seq* values;
+
+        if (_PyObject_HasAttrId(obj, &PyId_values)) {
+            int res;
+            Py_ssize_t len;
+            Py_ssize_t i;
+            tmp = _PyObject_GetAttrId(obj, &PyId_values);
+            if (tmp == NULL) goto failed;
+            if (!PyList_Check(tmp)) {
+                PyErr_Format(PyExc_TypeError, "JoinedStr field \"values\" must be a list, not a %.200s", tmp->ob_type->tp_name);
+                goto failed;
+            }
+            len = PyList_GET_SIZE(tmp);
+            values = _Py_asdl_seq_new(len, arena);
+            if (values == NULL) goto failed;
+            for (i = 0; i < len; i++) {
+                expr_ty value;
+                res = obj2ast_expr(PyList_GET_ITEM(tmp, i), &value, arena);
+                if (res != 0) goto failed;
+                asdl_seq_SET(values, i, value);
+            }
+            Py_CLEAR(tmp);
+        } else {
+            PyErr_SetString(PyExc_TypeError, "required field \"values\" missing from JoinedStr");
+            return 1;
+        }
+        *out = JoinedStr(values, lineno, col_offset, arena);
+        if (*out == NULL) goto failed;
+        return 0;
+    }
     isinstance = PyObject_IsInstance(obj, (PyObject*)Bytes_type);
     if (isinstance == -1) {
         return 1;
@@ -7319,6 +7480,10 @@ PyInit__ast(void)
     if (PyDict_SetItemString(d, "Call", (PyObject*)Call_type) < 0) return NULL;
     if (PyDict_SetItemString(d, "Num", (PyObject*)Num_type) < 0) return NULL;
     if (PyDict_SetItemString(d, "Str", (PyObject*)Str_type) < 0) return NULL;
+    if (PyDict_SetItemString(d, "FormattedValue",
+        (PyObject*)FormattedValue_type) < 0) return NULL;
+    if (PyDict_SetItemString(d, "JoinedStr", (PyObject*)JoinedStr_type) < 0)
+        return NULL;
     if (PyDict_SetItemString(d, "Bytes", (PyObject*)Bytes_type) < 0) return
         NULL;
     if (PyDict_SetItemString(d, "NameConstant", (PyObject*)NameConstant_type) <
diff --git a/Python/ast.c b/Python/ast.c
index 1f7ddfc..735424b 100644
--- a/Python/ast.c
+++ b/Python/ast.c
@@ -257,6 +257,14 @@ validate_expr(expr_ty exp, expr_context_ty ctx)
         }
         return 1;
     }
+    case JoinedStr_kind:
+        return validate_exprs(exp->v.JoinedStr.values, Load, 0);
+    case FormattedValue_kind:
+        if (validate_expr(exp->v.FormattedValue.value, Load) == 0)
+            return 0;
+        if (exp->v.FormattedValue.format_spec)
+            return validate_expr(exp->v.FormattedValue.format_spec, Load);
+        return 1;
     case Bytes_kind: {
         PyObject *b = exp->v.Bytes.s;
         if (!PyBytes_CheckExact(b)) {
@@ -535,9 +543,7 @@ static stmt_ty ast_for_for_stmt(struct compiling *, const node *, int);
 static expr_ty ast_for_call(struct compiling *, const node *, expr_ty);
 
 static PyObject *parsenumber(struct compiling *, const char *);
-static PyObject *parsestr(struct compiling *, const node *n, int *bytesmode);
-static PyObject *parsestrplus(struct compiling *, const node *n,
-                              int *bytesmode);
+static expr_ty parsestrplus(struct compiling *, const node *n);
 
 #define COMP_GENEXP   0
 #define COMP_LISTCOMP 1
@@ -986,6 +992,8 @@ set_context(struct compiling *c, expr_ty e, expr_context_ty ctx, const node *n)
         case Num_kind:
         case Str_kind:
         case Bytes_kind:
+        case JoinedStr_kind:
+        case FormattedValue_kind:
             expr_name = "literal";
             break;
         case NameConstant_kind:
@@ -2001,7 +2009,6 @@ ast_for_atom(struct compiling *c, const node *n)
        | '...' | 'None' | 'True' | 'False'
     */
     node *ch = CHILD(n, 0);
-    int bytesmode = 0;
 
     switch (TYPE(ch)) {
     case NAME: {
@@ -2023,7 +2030,7 @@ ast_for_atom(struct compiling *c, const node *n)
         return Name(name, Load, LINENO(n), n->n_col_offset, c->c_arena);
     }
     case STRING: {
-        PyObject *str = parsestrplus(c, n, &bytesmode);
+        expr_ty str = parsestrplus(c, n);
         if (!str) {
             const char *errtype = NULL;
             if (PyErr_ExceptionMatches(PyExc_UnicodeError))
@@ -2050,14 +2057,7 @@ ast_for_atom(struct compiling *c, const node *n)
             }
             return NULL;
         }
-        if (PyArena_AddPyObject(c->c_arena, str) < 0) {
-            Py_DECREF(str);
-            return NULL;
-        }
-        if (bytesmode)
-            return Bytes(str, LINENO(n), n->n_col_offset, c->c_arena);
-        else
-            return Str(str, LINENO(n), n->n_col_offset, c->c_arena);
+        return str;
     }
     case NUMBER: {
         PyObject *pynum = parsenumber(c, STR(ch));
@@ -4002,12 +4002,838 @@ decode_unicode(struct compiling *c, const char *s, size_t len, int rawmode, cons
     return v;
 }
 
-/* s is a Python string literal, including the bracketing quote characters,
- * and r &/or b prefixes (if any), and embedded escape sequences (if any).
- * parsestr parses it, and returns the decoded Python string object.
- */
+/* Compile this expression in to an expr_ty. We know that we can
+   temporarily modify the character before the start of this string
+   (it's '{'), and we know we can temporarily modify the character
+   after this string (it is a '}').  Leverage this to create a
+   sub-string with enough room for us to add parens around the
+   expression. This is to allow strings with embedded newlines, for
+   example. */
+static expr_ty
+fstring_expression_compile(PyObject *str, Py_ssize_t expr_start,
+                           Py_ssize_t expr_end, PyArena *arena)
+{
+    PyCompilerFlags cf;
+    mod_ty mod;
+    char *utf_expr;
+    Py_ssize_t i;
+    int all_whitespace;
+    PyObject *sub = NULL;
+
+    /* We only decref sub if we allocated it with a PyUnicode_Substring.
+       decref_sub records that. */
+    int decref_sub = 0;
+
+    assert(str);
+
+    /* If the substring is all whitespace, it's an error. We need to
+        catch this here, and not when we call PyParser_ASTFromString,
+        because turning the expression '' in to '()' would go from
+        being invalid to valid. */
+    /* Note that this code says an empty string is all
+        whitespace. That's important. There's a test for it: f'{}'. */
+    all_whitespace = 1;
+    for (i = expr_start; i < expr_end; i++) {
+        if (!Py_UNICODE_ISSPACE(PyUnicode_READ_CHAR(str, i))) {
+            all_whitespace = 0;
+            break;
+        }
+    }
+    if (all_whitespace) {
+        PyErr_SetString(PyExc_SyntaxError, "f-string: empty expression "
+                                           "not allowed");
+        goto error;
+    }
+
+    /* If the substring will be the entire source string, we can't use
+        PyUnicode_Substring, since it will return another reference to
+        our original string. Because we're modifying the string in
+        place, that's a no-no. So, detect that case and just use our
+        string directly. */
+
+    if (expr_start-1 == 0 && expr_end+1 == PyUnicode_GET_LENGTH(str)) {
+        /* No need to actually remember these characters, because we
+           know they must be braces. */
+        assert(PyUnicode_ReadChar(str, 0) == '{');
+        assert(PyUnicode_ReadChar(str, expr_end-expr_start+1) == '}');
+        sub = str;
+    } else {
+        /* Create a substring object. It must be a new object, with
+           refcount==1, so that we can modify it. */
+        sub = PyUnicode_Substring(str, expr_start-1, expr_end+1);
+        if (!sub)
+            goto error;
+        assert(sub != str);  /* Make sure it's a new string. */
+        decref_sub = 1;      /* Remember to deallocate it on error. */
+    }
+
+    if (PyUnicode_WriteChar(sub, 0, '(') < 0 ||
+        PyUnicode_WriteChar(sub, expr_end-expr_start+1, ')') < 0)
+        goto error;
+
+    cf.cf_flags = PyCF_ONLY_AST;
+
+    /* No need to free the memory returned here: it's managed by the
+       string. */
+    utf_expr = PyUnicode_AsUTF8(sub);
+    if (!utf_expr)
+        goto error;
+    mod = PyParser_ASTFromString(utf_expr, "<fstring>",
+                                 Py_eval_input, &cf, arena);
+    if (!mod)
+        goto error;
+    if (sub != str)
+        /* Clear instead of decref in case we ever modify this code to change
+           the error handling: this is safest because the XDECREF won't try
+           and decref it when it's NULL. */
+        /* No need to restore the chars in sub, since we know it's getting
+           ready to get deleted (refcount must be 1, since we got a new string
+           in PyUnicode_Substring). */
+        Py_CLEAR(sub);
+    else {
+        assert(!decref_sub);
+        /* Restore str, which we earlier modified directly. */
+        if (PyUnicode_WriteChar(str, 0, '{') < 0 ||
+            PyUnicode_WriteChar(str, expr_end-expr_start+1, '}') < 0)
+            goto error;
+    }
+    return mod->v.Expression.body;
+
+error:
+    /* Only decref sub if it was the result of a call to SubString. */
+    if (decref_sub)
+        Py_XDECREF(sub);
+    return NULL;
+}
+
+/* Return -1 on error.
+
+   Return 0 if we reached the end of the literal.
+
+   Return 1 if we haven't reached the end of the literal, but we want
+   the caller to process the literal up to this point. Used for
+   doubled braces.
+*/
+static int
+fstring_find_literal(PyObject *str, Py_ssize_t *ofs, PyObject **literal,
+                     int recurse_lvl, struct compiling *c, const node *n)
+{
+    /* Get any literal string. It ends when we hit an un-doubled brace, or the
+       end of the string. */
+
+    Py_ssize_t literal_start, literal_end;
+    int result = 0;
+
+    enum PyUnicode_Kind kind = PyUnicode_KIND(str);
+    void *data = PyUnicode_DATA(str);
+
+    assert(*literal == NULL);
+
+    literal_start = *ofs;
+    for (; *ofs < PyUnicode_GET_LENGTH(str); *ofs += 1) {
+        Py_UCS4 ch = PyUnicode_READ(kind, data, *ofs);
+        if (ch == '{' || ch == '}') {
+            /* Check for doubled braces, but only at the top level. If
+               we checked at every level, then f'{0:{3}}' would fail
+               with the two closing braces. */
+            if (recurse_lvl == 0) {
+                if (*ofs + 1 < PyUnicode_GET_LENGTH(str) &&
+                    PyUnicode_READ(kind, data, *ofs + 1) == ch) {
+                    /* We're going to tell the caller that the literal ends
+                       here, but that they should continue scanning. But also
+                       skip over the second brace when we resume scanning. */
+                    literal_end = *ofs + 1;
+                    *ofs += 2;
+                    result = 1;
+                    goto done;
+                }
+
+                /* Where a single '{' is the start of a new expression, a
+                   single '}' is not allowed. */
+                if (ch == '}') {
+                    ast_error(c, n, "f-string: single '}' is not allowed");
+                    return -1;
+                }
+            }
+
+            /* We're either at a '{', which means we're starting another
+               expression; or a '}', which means we're at the end of this
+               f-string (for a nested format_spec). */
+            break;
+        }
+    }
+    literal_end = *ofs;
+
+    assert(*ofs == PyUnicode_GET_LENGTH(str) ||
+           PyUnicode_READ(kind, data, *ofs) == '{' ||
+           PyUnicode_READ(kind, data, *ofs) == '}');
+done:
+    if (literal_start != literal_end) {
+        *literal = PyUnicode_Substring(str, literal_start, literal_end);
+        if (!*literal)
+            return -1;
+    }
+
+    return result;
+}
+
+/* Forward declaration because parsing is recursive. */
+static expr_ty
+fstring_parse(PyObject *str, Py_ssize_t *ofs, int recurse_lvl,
+              struct compiling *c, const node *n);
+
+/* Parse the f-string str, starting at ofs. We know *ofs starts an
+   expression (so it must be a '{'). Returns the FormattedValue node,
+   which includes the expression, conversion character, and
+   format_spec expression.
+
+   Note that I don't do a perfect job here: I don't make sure that a
+   closing brace doesn't match an opening paren, for example. It
+   doesn't need to error on all invalid expressions, just correctly
+   find the end of all valid ones. Any errors inside the expression
+   will be caught when we parse it later. */
+static int
+fstring_find_expr(PyObject *str, Py_ssize_t *ofs, int recurse_lvl,
+                  expr_ty *expression, struct compiling *c, const node *n)
+{
+    /* Return -1 on error, else 0. */
+
+    Py_ssize_t expr_start;
+    Py_ssize_t expr_end;
+    expr_ty simple_expression;
+    expr_ty format_spec = NULL; /* Optional format specifier. */
+    Py_UCS4 conversion = -1; /* The conversion char. -1 if not specified. */
+
+    enum PyUnicode_Kind kind = PyUnicode_KIND(str);
+    void *data = PyUnicode_DATA(str);
+
+    /* 0 if we're not in a string, else the quote char we're trying to
+       match (single or double quote). */
+    Py_UCS4 quote_char = 0;
+
+    /* If we're inside a string, 1=normal, 3=triple-quoted. */
+    int string_type = 0;
+
+    /* Keep track of nesting level for braces/parens/brackets in
+       expressions. */
+    Py_ssize_t nested_depth = 0;
+
+    /* Can only nest one level deep. */
+    if (recurse_lvl >= 2) {
+        ast_error(c, n, "f-string: expressions nested too deeply");
+        return -1;
+    }
+
+    /* The first char must be a left brace, or we wouldn't have gotten
+       here. Skip over it. */
+    assert(PyUnicode_READ(kind, data, *ofs) == '{');
+    *ofs += 1;
+
+    expr_start = *ofs;
+    for (; *ofs < PyUnicode_GET_LENGTH(str); *ofs += 1) {
+        Py_UCS4 ch;
+
+        /* Loop invariants. */
+        assert(nested_depth >= 0);
+        assert(*ofs >= expr_start);
+        if (quote_char)
+            assert(string_type == 1 || string_type == 3);
+        else
+            assert(string_type == 0);
+
+        ch = PyUnicode_READ(kind, data, *ofs);
+        if (quote_char) {
+            /* We're inside a string. See if we're at the end. */
+            /* This code needs to implement the same non-error logic
+               as tok_get from tokenizer.c, at the letter_quote
+               label. To actually share that code would be a
+               nightmare. But, it's unlikely to change and is small,
+               so duplicate it here. Note we don't need to catch all
+               of the errors, since they'll be caught when parsing the
+               expression. We just need to match the non-error
+               cases. Thus we can ignore \n in single-quoted strings,
+               for example. Or non-terminated strings. */
+            if (ch == quote_char) {
+                /* Does this match the string_type (single or triple
+                   quoted)? */
+                if (string_type == 3) {
+                    if (*ofs+2 < PyUnicode_GET_LENGTH(str) &&
+                        PyUnicode_READ(kind, data, *ofs+1) == ch &&
+                        PyUnicode_READ(kind, data, *ofs+2) == ch) {
+                        /* We're at the end of a triple quoted string. */
+                        *ofs += 2;
+                        string_type = 0;
+                        quote_char = 0;
+                        continue;
+                    }
+                } else {
+                    /* We're at the end of a normal string. */
+                    quote_char = 0;
+                    string_type = 0;
+                    continue;
+                }
+            }
+            /* We're inside a string, and not finished with the
+               string. If this is a backslash, skip the next char (it
+               might be an end quote that needs skipping). Otherwise,
+               just consume this character normally. */
+            if (ch == '\\' && *ofs+1 < PyUnicode_GET_LENGTH(str)) {
+                /* Just skip the next char, whatever it is. */
+                *ofs += 1;
+            }
+        } else if (ch == '\'' || ch == '"') {
+            /* Is this a triple quoted string? */
+            if (*ofs+2 < PyUnicode_GET_LENGTH(str) &&
+                PyUnicode_READ(kind, data, *ofs+1) == ch &&
+                PyUnicode_READ(kind, data, *ofs+2) == ch) {
+                string_type = 3;
+                *ofs += 2;
+            } else {
+                /* Start of a normal string. */
+                string_type = 1;
+            }
+            /* Start looking for the end of the string. */
+            quote_char = ch;
+        } else if (ch == '[' || ch == '{' || ch == '(') {
+            nested_depth++;
+        } else if (nested_depth != 0 &&
+                   (ch == ']' || ch == '}' || ch == ')')) {
+            nested_depth--;
+        } else if (ch == '#') {
+            /* Error: can't include a comment character, inside parens
+               or not. */
+            ast_error(c, n, "f-string cannot include '#'");
+            return -1;
+        } else if (nested_depth == 0 &&
+                   (ch == '!' || ch == ':' || ch == '}')) {
+            /* First, test for the special case of "!=". Since '=' is
+               not an allowed conversion character, nothing is lost in
+               this test. */
+            if (ch == '!' && *ofs+1 < PyUnicode_GET_LENGTH(str) &&
+                  PyUnicode_READ(kind, data, *ofs+1) == '=')
+                /* This isn't a conversion character, just continue. */
+                continue;
+
+            /* Normal way out of this loop. */
+            break;
+        } else {
+            /* Just consume this char and loop around. */
+        }
+    }
+    expr_end = *ofs;
+    /* If we leave this loop in a string or with mismatched parens, we
+       don't care. We'll get a syntax error when compiling the
+       expression. But, we can produce a better error message, so
+       let's just do that.*/
+    if (quote_char) {
+        ast_error(c, n, "f-string: unterminated string");
+        return -1;
+    }
+    if (nested_depth) {
+        ast_error(c, n, "f-string: mismatched '(', '{', or '['");
+        return -1;
+    }
+
+    /* Check for a conversion char, if present. */
+    if (*ofs >= PyUnicode_GET_LENGTH(str))
+        goto unexpected_end_of_string;
+    if (PyUnicode_READ(kind, data, *ofs) == '!') {
+        *ofs += 1;
+        if (*ofs >= PyUnicode_GET_LENGTH(str))
+            goto unexpected_end_of_string;
+
+        conversion = PyUnicode_READ(kind, data, *ofs);
+        *ofs += 1;
+
+        /* Validate the conversion. */
+        if (!(conversion == 's' || conversion == 'r'
+              || conversion == 'a')) {
+            ast_error(c, n, "f-string: invalid conversion character: "
+                            "expected 's', 'r', or 'a'");
+            return -1;
+        }
+    }
+
+    /* Check for the format spec, if present. */
+    if (*ofs >= PyUnicode_GET_LENGTH(str))
+        goto unexpected_end_of_string;
+    if (PyUnicode_READ(kind, data, *ofs) == ':') {
+        *ofs += 1;
+        if (*ofs >= PyUnicode_GET_LENGTH(str))
+            goto unexpected_end_of_string;
+
+        /* Parse the format spec. */
+        format_spec = fstring_parse(str, ofs, recurse_lvl+1, c, n);
+        if (!format_spec)
+            return -1;
+    }
+
+    if (*ofs >= PyUnicode_GET_LENGTH(str) ||
+          PyUnicode_READ(kind, data, *ofs) != '}')
+        goto unexpected_end_of_string;
+
+    /* We're at a right brace. Consume it. */
+    assert(*ofs < PyUnicode_GET_LENGTH(str));
+    assert(PyUnicode_READ(kind, data, *ofs) == '}');
+    *ofs += 1;
+
+    /* Compile the expression. */
+    simple_expression = fstring_expression_compile(str, expr_start, expr_end,
+                                                   c->c_arena);
+    if (!simple_expression)
+        return -1;
+
+    /* And now create the FormattedValue node that represents this entire
+       expression with the conversion and format spec. */
+    *expression = FormattedValue(simple_expression, (int)conversion,
+                                 format_spec, LINENO(n), n->n_col_offset,
+                                 c->c_arena);
+    if (!*expression)
+        return -1;
+
+    return 0;
+
+unexpected_end_of_string:
+    ast_error(c, n, "f-string: expecting '}'");
+    return -1;
+}
+
+/* Return -1 on error.
+
+   Return 0 if we have a literal (possible zero length) and an
+   expression (zero length if at the end of the string.
+
+   Return 1 if we have a literal, but no expression, and we want the
+   caller to call us again. This is used to deal with doubled
+   braces.
+
+   When called multiple times on the string 'a{{b{0}c', this function
+   will return:
+
+   1. the literal 'a{' with no expression, and a return value
+      of 1. Despite the fact that there's no expression, the return
+      value of 1 means we're not finished yet.
+
+   2. the literal 'b' and the expression '0', with a return value of
+      0. The fact that there's an expression means we're not finished.
+
+   3. literal 'c' with no expression and a return value of 0. The
+      combination of the return value of 0 with no expression means
+      we're finished.
+*/
+static int
+fstring_find_literal_and_expr(PyObject *str, Py_ssize_t *ofs, int recurse_lvl,
+                              PyObject **literal, expr_ty *expression,
+                              struct compiling *c, const node *n)
+{
+    int result;
+
+    assert(*literal == NULL && *expression == NULL);
+
+    /* Get any literal string. */
+    result = fstring_find_literal(str, ofs, literal, recurse_lvl, c, n);
+    if (result < 0)
+        goto error;
+
+    assert(result == 0 || result == 1);
+
+    if (result == 1)
+        /* We have a literal, but don't look at the expression. */
+        return 1;
+
+    assert(*ofs <= PyUnicode_GET_LENGTH(str));
+
+    if (*ofs >= PyUnicode_GET_LENGTH(str) ||
+        PyUnicode_READ_CHAR(str, *ofs) == '}')
+        /* We're at the end of the string or the end of a nested
+           f-string: no expression. The top-level error case where we
+           expect to be at the end of the string but we're at a '}' is
+           handled later. */
+        return 0;
+
+    /* We must now be the start of an expression, on a '{'. */
+    assert(*ofs < PyUnicode_GET_LENGTH(str) &&
+           PyUnicode_READ_CHAR(str, *ofs) == '{');
+
+    if (fstring_find_expr(str, ofs, recurse_lvl, expression, c, n) < 0)
+        goto error;
+
+    return 0;
+
+error:
+    Py_XDECREF(*literal);
+    *literal = NULL;
+    return -1;
+}
+
+#define EXPRLIST_N_CACHED  64
+
+typedef struct {
+    /* Incrementally build an array of expr_ty, so be used in an
+       asdl_seq. Cache some small but reasonably sized number of
+       expr_ty's, and then after that start dynamically allocating,
+       doubling the number allocated each time. Note that the f-string
+       f'{0}a{1}' contains 3 expr_ty's: 2 FormattedValue's, and one
+       Str for the literal 'a'. So you add expr_ty's about twice as
+       fast as you add exressions in an f-string. */
+
+    Py_ssize_t allocated;  /* Number we've allocated. */
+    Py_ssize_t size;       /* Number we've used. */
+    expr_ty    *p;         /* Pointer to the memory we're actually
+                              using. Will point to 'data' until we
+                              start dynamically allocating. */
+    expr_ty    data[EXPRLIST_N_CACHED];
+} ExprList;
+
+#ifdef NDEBUG
+#define ExprList_check_invariants(l)
+#else
+static void
+ExprList_check_invariants(ExprList *l)
+{
+    /* Check our invariants. Make sure this object is "live", and
+       hasn't been deallocated. */
+    assert(l->size >= 0);
+    assert(l->p != NULL);
+    if (l->size <= EXPRLIST_N_CACHED)
+        assert(l->data == l->p);
+}
+#endif
+
+static void
+ExprList_Init(ExprList *l)
+{
+    l->allocated = EXPRLIST_N_CACHED;
+    l->size = 0;
+
+    /* Until we start allocating dynamically, p points to data. */
+    l->p = l->data;
+
+    ExprList_check_invariants(l);
+}
+
+static int
+ExprList_Append(ExprList *l, expr_ty exp)
+{
+    ExprList_check_invariants(l);
+    if (l->size >= l->allocated) {
+        /* We need to alloc (or realloc) the memory. */
+        Py_ssize_t new_size = l->allocated * 2;
+
+        /* See if we've ever allocated anything dynamically. */
+        if (l->p == l->data) {
+            Py_ssize_t i;
+            /* We're still using the cached data. Switch to
+               alloc-ing. */
+            l->p = PyMem_RawMalloc(sizeof(expr_ty) * new_size);
+            if (!l->p)
+                return -1;
+            /* Copy the cached data into the new buffer. */
+            for (i = 0; i < l->size; i++)
+                l->p[i] = l->data[i];
+        } else {
+            /* Just realloc. */
+            expr_ty *tmp = PyMem_RawRealloc(l->p, sizeof(expr_ty) * new_size);
+            if (!tmp) {
+                PyMem_RawFree(l->p);
+                l->p = NULL;
+                return -1;
+            }
+            l->p = tmp;
+        }
+
+        l->allocated = new_size;
+        assert(l->allocated == 2 * l->size);
+    }
+
+    l->p[l->size++] = exp;
+
+    ExprList_check_invariants(l);
+    return 0;
+}
+
+static void
+ExprList_Dealloc(ExprList *l)
+{
+    ExprList_check_invariants(l);
+
+    /* If there's been an error, or we've never dynamically allocated,
+       do nothing. */
+    if (!l->p || l->p == l->data) {
+        /* Do nothing. */
+    } else {
+        /* We have dynamically allocated. Free the memory. */
+        PyMem_RawFree(l->p);
+    }
+    l->p = NULL;
+    l->size = -1;
+}
+
+static asdl_seq *
+ExprList_Finish(ExprList *l, PyArena *arena)
+{
+    asdl_seq *seq;
+
+    ExprList_check_invariants(l);
+
+    /* Allocate the asdl_seq and copy the expressions in to it. */
+    seq = _Py_asdl_seq_new(l->size, arena);
+    if (seq) {
+        Py_ssize_t i;
+        for (i = 0; i < l->size; i++)
+            asdl_seq_SET(seq, i, l->p[i]);
+    }
+    ExprList_Dealloc(l);
+    return seq;
+}
+
+/* The FstringParser is designed to add a mix of strings and
+   f-strings, and concat them together as needed. Ultimately, it
+   generates an expr_ty. */
+typedef struct {
+    PyObject *last_str;
+    ExprList expr_list;
+} FstringParser;
+
+#ifdef NDEBUG
+#define FstringParser_check_invariants(state)
+#else
+static void
+FstringParser_check_invariants(FstringParser *state)
+{
+    if (state->last_str)
+        assert(PyUnicode_CheckExact(state->last_str));
+    ExprList_check_invariants(&state->expr_list);
+}
+#endif
+
+static void
+FstringParser_Init(FstringParser *state)
+{
+    state->last_str = NULL;
+    ExprList_Init(&state->expr_list);
+    FstringParser_check_invariants(state);
+}
+
+static void
+FstringParser_Dealloc(FstringParser *state)
+{
+    FstringParser_check_invariants(state);
+
+    Py_XDECREF(state->last_str);
+    ExprList_Dealloc(&state->expr_list);
+}
+
+/* Make a Str node, but decref the PyUnicode object being added. */
+static expr_ty
+make_str_node_and_del(PyObject **str, struct compiling *c, const node* n)
+{
+    PyObject *s = *str;
+    *str = NULL;
+    assert(PyUnicode_CheckExact(s));
+    if (PyArena_AddPyObject(c->c_arena, s) < 0) {
+        Py_DECREF(s);
+        return NULL;
+    }
+    return Str(s, LINENO(n), n->n_col_offset, c->c_arena);
+}
+
+/* Add a non-f-string (that is, a regular literal string). str is
+   decref'd. */
+static int
+FstringParser_ConcatAndDel(FstringParser *state, PyObject *str)
+{
+    FstringParser_check_invariants(state);
+
+    assert(PyUnicode_CheckExact(str));
+
+    if (PyUnicode_GET_LENGTH(str) == 0) {
+        Py_DECREF(str);
+        return 0;
+    }
+
+    if (!state->last_str) {
+        /* We didn't have a string before, so just remember this one. */
+        state->last_str = str;
+    } else {
+        /* Concatenate this with the previous string. */
+        PyObject *temp = PyUnicode_Concat(state->last_str, str);
+        Py_DECREF(state->last_str);
+        Py_DECREF(str);
+        state->last_str = temp;
+        if (!temp)
+            return -1;
+    }
+    FstringParser_check_invariants(state);
+    return 0;
+}
+
+/* Parse an f-string. The f-string is in str, starting at ofs, with no 'f'
+   or quotes. str is not decref'd, since we don't know if it's used elsewhere.
+   And if we're only looking at a part of a string, then decref'ing is
+   definitely not the right thing to do! */
+static int
+FstringParser_ConcatFstring(FstringParser *state, PyObject *str,
+                            Py_ssize_t *ofs, int recurse_lvl,
+                            struct compiling *c, const node *n)
+{
+    FstringParser_check_invariants(state);
+
+    /* Parse the f-string. */
+    while (1) {
+        PyObject *literal = NULL;
+        expr_ty expression = NULL;
+
+        /* If there's a zero length literal in front of the
+           expression, literal will be NULL. If we're at the end of
+           the f-string, expression will be NULL (unless result == 1,
+           see below). */
+        int result = fstring_find_literal_and_expr(str, ofs, recurse_lvl,
+                                                   &literal, &expression,
+                                                   c, n);
+        if (result < 0)
+            return -1;
+
+        /* Add the literal, if any. */
+        if (!literal) {
+            /* Do nothing. Just leave last_str alone (and possibly
+               NULL). */
+        } else if (!state->last_str) {
+            state->last_str = literal;
+            literal = NULL;
+        } else {
+            /* We have a literal, concatenate it. */
+            assert(PyUnicode_GET_LENGTH(literal) != 0);
+            if (FstringParser_ConcatAndDel(state, literal) < 0)
+                return -1;
+            literal = NULL;
+        }
+        assert(!state->last_str ||
+               PyUnicode_GET_LENGTH(state->last_str) != 0);
+
+        /* We've dealt with the literal now. It can't be leaked on further
+           errors. */
+        assert(literal == NULL);
+
+        /* See if we should just loop around to get the next literal
+           and expression, while ignoring the expression this
+           time. This is used for un-doubling braces, as an
+           optimization. */
+        if (result == 1)
+            continue;
+
+        if (!expression)
+            /* We're done with this f-string. */
+            break;
+
+        /* We know we have an expression. Convert any existing string
+           to a Str node. */
+        if (!state->last_str) {
+            /* Do nothing. No previous literal. */
+        } else {
+            /* Convert the existing last_str literal to a Str node. */
+            expr_ty str = make_str_node_and_del(&state->last_str, c, n);
+            if (!str || ExprList_Append(&state->expr_list, str) < 0)
+                return -1;
+        }
+
+        if (ExprList_Append(&state->expr_list, expression) < 0)
+            return -1;
+    }
+
+    assert(*ofs <= PyUnicode_GET_LENGTH(str));
+
+    /* If recurse_lvl is zero, then we must be at the end of the
+       string. Otherwise, we must be at a right brace. */
+
+    if (recurse_lvl == 0 && *ofs < PyUnicode_GET_LENGTH(str)) {
+        ast_error(c, n, "f-string: unexpected end of string");
+        return -1;
+    }
+    if (recurse_lvl != 0 && PyUnicode_READ_CHAR(str, *ofs) != '}') {
+        ast_error(c, n, "f-string: expecting '}'");
+        return -1;
+    }
+
+    FstringParser_check_invariants(state);
+    return 0;
+}
+
+/* Convert the partial state reflected in last_str and expr_list to an
+   expr_ty. The expr_ty can be a Str, or a JoinedStr. */
+static expr_ty
+FstringParser_Finish(FstringParser *state, struct compiling *c,
+                     const node *n)
+{
+    asdl_seq *seq;
+
+    FstringParser_check_invariants(state);
+
+    /* If we're just a constant string with no expressions, return
+       that. */
+    if(state->expr_list.size == 0) {
+        if (!state->last_str) {
+            /* Create a zero length string. */
+            state->last_str = PyUnicode_FromStringAndSize(NULL, 0);
+            if (!state->last_str)
+                goto error;
+        }
+        return make_str_node_and_del(&state->last_str, c, n);
+    }
+
+    /* Create a Str node out of last_str, if needed. It will be the
+       last node in our expression list. */
+    if (state->last_str) {
+        expr_ty str = make_str_node_and_del(&state->last_str, c, n);
+        if (!str || ExprList_Append(&state->expr_list, str) < 0)
+            goto error;
+    }
+    /* This has already been freed. */
+    assert(state->last_str == NULL);
+
+    seq = ExprList_Finish(&state->expr_list, c->c_arena);
+    if (!seq)
+        goto error;
+
+    /* If there's only one expression, return it. Otherwise, we need
+       to join them together. */
+    if (seq->size == 1)
+        return seq->elements[0];
+
+    return JoinedStr(seq, LINENO(n), n->n_col_offset, c->c_arena);
+
+error:
+    FstringParser_Dealloc(state);
+    return NULL;
+}
+
+/* Given an f-string (with no 'f' or quotes) that's in str starting at
+   ofs, parse it into an expr_ty. Return NULL on error. Does not
+   decref str. */
+static expr_ty
+fstring_parse(PyObject *str, Py_ssize_t *ofs, int recurse_lvl,
+              struct compiling *c, const node *n)
+{
+    FstringParser state;
+
+    FstringParser_Init(&state);
+    if (FstringParser_ConcatFstring(&state, str, ofs, recurse_lvl,
+                                    c, n) < 0) {
+        FstringParser_Dealloc(&state);
+        return NULL;
+    }
+
+    return FstringParser_Finish(&state, c, n);
+}
+
+/* n is a Python string literal, including the bracketing quote
+   characters, and r, b, u, &/or f prefixes (if any), and embedded
+   escape sequences (if any). parsestr parses it, and returns the
+   decoded Python string object.  If the string is an f-string, set
+   *fmode and return the unparsed string object.
+*/
 static PyObject *
-parsestr(struct compiling *c, const node *n, int *bytesmode)
+parsestr(struct compiling *c, const node *n, int *bytesmode, int *fmode)
 {
     size_t len;
     const char *s = STR(n);
@@ -4027,15 +4853,24 @@ parsestr(struct compiling *c, const node *n, int *bytesmode)
                 quote = *++s;
                 rawmode = 1;
             }
+            else if (quote == 'f' || quote == 'F') {
+                quote = *++s;
+                *fmode = 1;
+            }
             else {
                 break;
             }
         }
     }
+    if (*fmode && *bytesmode) {
+        PyErr_BadInternalCall();
+        return NULL;
+    }
     if (quote != '\'' && quote != '\"') {
         PyErr_BadInternalCall();
         return NULL;
     }
+    /* Skip the leading quote char. */
     s++;
     len = strlen(s);
     if (len > INT_MAX) {
@@ -4044,12 +4879,17 @@ parsestr(struct compiling *c, const node *n, int *bytesmode)
         return NULL;
     }
     if (s[--len] != quote) {
+        /* Last quote char must match the first. */
         PyErr_BadInternalCall();
         return NULL;
     }
     if (len >= 4 && s[0] == quote && s[1] == quote) {
+        /* A triple quoted string. We've already skipped one quote at
+           the start and one at the end of the string. Now skip the
+           two at the start. */
         s += 2;
         len -= 2;
+        /* And check that the last two match. */
         if (s[--len] != quote || s[--len] != quote) {
             PyErr_BadInternalCall();
             return NULL;
@@ -4088,51 +4928,84 @@ parsestr(struct compiling *c, const node *n, int *bytesmode)
         }
     }
     return PyBytes_DecodeEscape(s, len, NULL, 1,
-                                 need_encoding ? c->c_encoding : NULL);
+                                need_encoding ? c->c_encoding : NULL);
 }
 
-/* Build a Python string object out of a STRING+ atom.  This takes care of
- * compile-time literal catenation, calling parsestr() on each piece, and
- * pasting the intermediate results together.
- */
-static PyObject *
-parsestrplus(struct compiling *c, const node *n, int *bytesmode)
+/* Accepts a STRING+ atom, and produces an expr_ty node. Run through
+   each STRING atom, and process it as needed. For bytes, just
+   concatenate them together, and the result will be a Bytes node. For
+   normal strings and f-strings, concatenate them together. The result
+   will be a Str node if there were no f-strings; a FormattedValue
+   node if there's just an f-string (with no leading or trailing
+   literals), or a JoinedStr node if there are multiple f-strings or
+   any literals involved. */
+static expr_ty
+parsestrplus(struct compiling *c, const node *n)
 {
-    PyObject *v;
+    int bytesmode = 0;
+    PyObject *bytes_str = NULL;
     int i;
-    REQ(CHILD(n, 0), STRING);
-    v = parsestr(c, CHILD(n, 0), bytesmode);
-    if (v != NULL) {
-        /* String literal concatenation */
-        for (i = 1; i < NCH(n); i++) {
-            PyObject *s;
-            int subbm = 0;
-            s = parsestr(c, CHILD(n, i), &subbm);
-            if (s == NULL)
-                goto onError;
-            if (*bytesmode != subbm) {
-                ast_error(c, n, "cannot mix bytes and nonbytes literals");
-                Py_DECREF(s);
-                goto onError;
-            }
-            if (PyBytes_Check(v) && PyBytes_Check(s)) {
-                PyBytes_ConcatAndDel(&v, s);
-                if (v == NULL)
-                    goto onError;
-            }
-            else {
-                PyObject *temp = PyUnicode_Concat(v, s);
-                Py_DECREF(s);
-                Py_DECREF(v);
-                v = temp;
-                if (v == NULL)
-                    goto onError;
+
+    FstringParser state;
+    FstringParser_Init(&state);
+
+    for (i = 0; i < NCH(n); i++) {
+        int this_bytesmode = 0;
+        int this_fmode = 0;
+        PyObject *s;
+
+        REQ(CHILD(n, i), STRING);
+        s = parsestr(c, CHILD(n, i), &this_bytesmode, &this_fmode);
+        if (!s)
+            goto error;
+
+        /* Check that we're not mixing bytes with unicode. */
+        if (i != 0 && bytesmode != this_bytesmode) {
+            ast_error(c, n, "cannot mix bytes and nonbytes literals");
+            Py_DECREF(s);
+            goto error;
+        }
+        bytesmode = this_bytesmode;
+
+        assert(bytesmode ? PyBytes_CheckExact(s) : PyUnicode_CheckExact(s));
+
+        if (bytesmode) {
+            /* For bytes, concat as we go. */
+            if (i == 0) {
+                /* First time, just remember this value. */
+                bytes_str = s;
+            } else {
+                PyBytes_ConcatAndDel(&bytes_str, s);
+                if (!bytes_str)
+                    goto error;
             }
+        } else if (this_fmode) {
+            /* This is an f-string. Concatenate and decref it. */
+            Py_ssize_t ofs = 0;
+            int result = FstringParser_ConcatFstring(&state, s, &ofs, 0, c, n);
+            Py_DECREF(s);
+            if (result < 0)
+                goto error;
+        } else {
+            /* This is a regular string. Concatenate it. */
+            if (FstringParser_ConcatAndDel(&state, s) < 0)
+                goto error;
         }
     }
-    return v;
+    if (bytesmode) {
+        /* Just return the bytes object and we're done. */
+        if (PyArena_AddPyObject(c->c_arena, bytes_str) < 0)
+            goto error;
+        return Bytes(bytes_str, LINENO(n), n->n_col_offset, c->c_arena);
+    }
+
+    /* We're not a bytes string, bytes_str should never have been set. */
+    assert(bytes_str == NULL);
+
+    return FstringParser_Finish(&state, c, n);
 
-  onError:
-    Py_XDECREF(v);
+error:
+    Py_XDECREF(bytes_str);
+    FstringParser_Dealloc(&state);
     return NULL;
 }
diff --git a/Python/compile.c b/Python/compile.c
index a6884ec..3a49ece 100644
--- a/Python/compile.c
+++ b/Python/compile.c
@@ -731,6 +731,7 @@ compiler_set_qualname(struct compiler *c)
     return 1;
 }
 
+
 /* Allocate a new block and return a pointer to it.
    Returns NULL on error.
 */
@@ -3209,6 +3210,117 @@ compiler_call(struct compiler *c, expr_ty e)
                                 e->v.Call.keywords);
 }
 
+static int
+compiler_joined_str(struct compiler *c, expr_ty e)
+{
+    /* Concatenate parts of a string using ''.join(parts). There are
+       probably better ways of doing this.
+
+       This is used for constructs like "'x=' f'{42}'", which have to
+       be evaluated at compile time. */
+
+    static PyObject *empty_string;
+    static PyObject *join_string;
+
+    if (!empty_string) {
+        empty_string = PyUnicode_FromString("");
+        if (!empty_string)
+            return 0;
+    }
+    if (!join_string) {
+        join_string = PyUnicode_FromString("join");
+        if (!join_string)
+            return 0;
+    }
+
+    ADDOP_O(c, LOAD_CONST, empty_string, consts);
+    ADDOP_NAME(c, LOAD_ATTR, join_string, names);
+    VISIT_SEQ(c, expr, e->v.JoinedStr.values);
+    ADDOP_I(c, BUILD_LIST, asdl_seq_LEN(e->v.JoinedStr.values));
+    ADDOP_I(c, CALL_FUNCTION, 1);
+    return 1;
+}
+
+/* Note that this code uses the builtin functions format(), str(),
+   repr(), and ascii(). You can break this code, or make it do odd
+   things, by redefining those functions. */
+static int
+compiler_formatted_value(struct compiler *c, expr_ty e)
+{
+    PyObject *conversion_name = NULL;
+
+    static PyObject *format_string;
+    static PyObject *str_string;
+    static PyObject *repr_string;
+    static PyObject *ascii_string;
+
+    if (!format_string) {
+        format_string = PyUnicode_InternFromString("format");
+        if (!format_string)
+            return 0;
+    }
+
+    if (!str_string) {
+        str_string = PyUnicode_InternFromString("str");
+        if (!str_string)
+            return 0;
+    }
+
+    if (!repr_string) {
+        repr_string = PyUnicode_InternFromString("repr");
+        if (!repr_string)
+            return 0;
+    }
+    if (!ascii_string) {
+        ascii_string = PyUnicode_InternFromString("ascii");
+        if (!ascii_string)
+            return 0;
+    }
+
+    ADDOP_NAME(c, LOAD_GLOBAL, format_string, names);
+
+    /* If needed, convert via str, repr, or ascii. */
+    if (e->v.FormattedValue.conversion != -1) {
+        switch (e->v.FormattedValue.conversion) {
+        case 's':
+            conversion_name = str_string;
+            break;
+        case 'r':
+            conversion_name = repr_string;
+            break;
+        case 'a':
+            conversion_name = ascii_string;
+            break;
+        default:
+            PyErr_SetString(PyExc_SystemError,
+                            "Unrecognized conversion character");
+            return 0;
+        }
+        ADDOP_NAME(c, LOAD_GLOBAL, conversion_name, names);
+    }
+
+    /* Evaluate the value. */
+    VISIT(c, expr, e->v.FormattedValue.value);
+
+    /* If needed, convert via str, repr, or ascii. */
+    if (conversion_name) {
+        /* Call the function we previously pushed. */
+        ADDOP_I(c, CALL_FUNCTION, 1);
+    }
+
+    /* If we have a format spec, use format(value, format_spec). Otherwise,
+       use the single argument form. */
+    if (e->v.FormattedValue.format_spec) {
+        VISIT(c, expr, e->v.FormattedValue.format_spec);
+        ADDOP_I(c, CALL_FUNCTION, 2);
+    } else {
+        /* No format spec specified, call format(value). */
+        ADDOP_I(c, CALL_FUNCTION, 1);
+    }
+
+    return 1;
+}
+
 /* shared code between compiler_call and compiler_class */
 static int
 compiler_call_helper(struct compiler *c,
@@ -3878,6 +3990,10 @@ compiler_visit_expr(struct compiler *c, expr_ty e)
     case Str_kind:
         ADDOP_O(c, LOAD_CONST, e->v.Str.s, consts);
         break;
+    case JoinedStr_kind:
+        return compiler_joined_str(c, e);
+    case FormattedValue_kind:
+        return compiler_formatted_value(c, e);
     case Bytes_kind:
         ADDOP_O(c, LOAD_CONST, e->v.Bytes.s, consts);
         break;
@@ -4784,4 +4900,3 @@ PyAST_Compile(mod_ty mod, const char *filename, PyCompilerFlags *flags,
 {
     return PyAST_CompileEx(mod, filename, flags, -1, arena);
 }
-
diff --git a/Python/symtable.c b/Python/symtable.c
index 64910d8..8431d51 100644
--- a/Python/symtable.c
+++ b/Python/symtable.c
@@ -1439,6 +1439,14 @@ symtable_visit_expr(struct symtable *st, expr_ty e)
         VISIT_SEQ(st, expr, e->v.Call.args);
         VISIT_SEQ_WITH_NULL(st, keyword, e->v.Call.keywords);
         break;
+    case FormattedValue_kind:
+        VISIT(st, expr, e->v.FormattedValue.value);
+        if (e->v.FormattedValue.format_spec)
+            VISIT(st, expr, e->v.FormattedValue.format_spec);
+        break;
+    case JoinedStr_kind:
+        VISIT_SEQ(st, expr, e->v.JoinedStr.values);
+        break;
     case Num_kind:
     case Str_kind:
     case Bytes_kind:
author	Eric V. Smith <eric@trueblade.com>	2015-09-19 18:51:32 (GMT)
committer	Eric V. Smith <eric@trueblade.com>	2015-09-19 18:51:32 (GMT)
commit	235a6f09847ad554d8bf073d4e1d58d1e398ae8c (patch)
tree	36ff217247cfcd108914065cea8ddf3ad056d192
parent	aed8830af3bb5a79878cf0f603ebbd8a37f5b36e (diff)
download	cpython-235a6f09847ad554d8bf073d4e1d58d1e398ae8c.zip cpython-235a6f09847ad554d8bf073d4e1d58d1e398ae8c.tar.gz cpython-235a6f09847ad554d8bf073d4e1d58d1e398ae8c.tar.bz2