From c314e3e829943b186e1c894071f00c613433cfe5 Mon Sep 17 00:00:00 2001
From: "Miss Islington (bot)"
 <31488909+miss-islington@users.noreply.github.com>
Date: Mon, 24 Jan 2022 19:08:42 -0800
Subject: bpo-46503: Prevent an assert from firing when parsing some invalid \N
 sequences in f-strings. (GH-30865) (30867)

* bpo-46503: Prevent an assert from firing.  Also fix one nearby tiny PEP-7 nit.

* Added blurb.
(cherry picked from commit 0daf72194bd4e31de7f12020685bb39a14d6f45e)

Co-authored-by: Eric V. Smith <ericvsmith@users.noreply.github.com>

Co-authored-by: Eric V. Smith <ericvsmith@users.noreply.github.com>
---
 Lib/test/test_fstring.py                                 |  4 ++++
 .../2022-01-24-21-24-41.bpo-46503.4UrPsE.rst             |  1 +
 Parser/pegen/parse_string.c                              | 16 ++++++++++++++--
 3 files changed, 19 insertions(+), 2 deletions(-)
 create mode 100644 Misc/NEWS.d/next/Core and Builtins/2022-01-24-21-24-41.bpo-46503.4UrPsE.rst

diff --git a/Lib/test/test_fstring.py b/Lib/test/test_fstring.py
index 518ebdf..92a4d22 100644
--- a/Lib/test/test_fstring.py
+++ b/Lib/test/test_fstring.py
@@ -747,12 +747,16 @@ x = (
         # differently inside f-strings.
         self.assertAllRaise(SyntaxError, r"\(unicode error\) 'unicodeescape' codec can't decode bytes in position .*: malformed \\N character escape",
                             [r"f'\N'",
+                             r"f'\N '",
+                             r"f'\N  '",  # See bpo-46503.
                              r"f'\N{'",
                              r"f'\N{GREEK CAPITAL LETTER DELTA'",
 
                              # Here are the non-f-string versions,
                              #  which should give the same errors.
                              r"'\N'",
+                             r"'\N '",
+                             r"'\N  '",
                              r"'\N{'",
                              r"'\N{GREEK CAPITAL LETTER DELTA'",
                              ])
diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-01-24-21-24-41.bpo-46503.4UrPsE.rst b/Misc/NEWS.d/next/Core and Builtins/2022-01-24-21-24-41.bpo-46503.4UrPsE.rst
new file mode 100644
index 0000000..e48028d
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2022-01-24-21-24-41.bpo-46503.4UrPsE.rst	
@@ -0,0 +1 @@
+Fix an assert when parsing some invalid \N escape sequences in f-strings.
diff --git a/Parser/pegen/parse_string.c b/Parser/pegen/parse_string.c
index f1df2c4..af350b3 100644
--- a/Parser/pegen/parse_string.c
+++ b/Parser/pegen/parse_string.c
@@ -444,12 +444,23 @@ fstring_find_literal(Parser *p, const char **str, const char *end, int raw,
         if (!raw && ch == '\\' && s < end) {
             ch = *s++;
             if (ch == 'N') {
+                /* We need to look at and skip matching braces for "\N{name}"
+                   sequences because otherwise we'll think the opening '{'
+                   starts an expression, which is not the case with "\N".
+                   Keep looking for either a matched '{' '}' pair, or the end
+                   of the string. */
+
                 if (s < end && *s++ == '{') {
                     while (s < end && *s++ != '}') {
                     }
                     continue;
                 }
-                break;
+
+                /* This is an invalid "\N" sequence, since it's a "\N" not
+                   followed by a "{".  Just keep parsing this literal.  This
+                   error will be caught later by
+                   decode_unicode_with_escapes(). */
+                continue;
             }
             if (ch == '{' && warn_invalid_escape_sequence(p, ch, t) < 0) {
                 return -1;
@@ -493,7 +504,8 @@ done:
             *literal = PyUnicode_DecodeUTF8Stateful(literal_start,
                                                     s - literal_start,
                                                     NULL, NULL);
-        } else {
+        }
+        else {
             *literal = decode_unicode_with_escapes(p, literal_start,
                                                    s - literal_start, t);
         }
-- 
cgit v0.12