bpo-29104: Fixed parsing backslashes in f-strings. (#490)

author: Serhiy Storchaka <storchaka@gmail.com> 2017-05-25 10:33:55 (GMT)
committer: GitHub <noreply@github.com> 2017-05-25 10:33:55 (GMT)
commit: 0cd7a3f196cf34d9bb0a52e61327f7fe289d9750 (patch)
tree: 8e1758a3b7a031a3c52545f8b5d462ddb08ad18e /Python/ast.c
parent: d1c3c13fedaf62b71445ccd048e395aa4a7d510f (diff)
download: cpython-0cd7a3f196cf34d9bb0a52e61327f7fe289d9750.zip
cpython-0cd7a3f196cf34d9bb0a52e61327f7fe289d9750.tar.gz
cpython-0cd7a3f196cf34d9bb0a52e61327f7fe289d9750.tar.bz2
1 files changed, 32 insertions, 21 deletions
diff --git a/Python/ast.c b/Python/ast.c
index d8941f0..205c711 100644
--- a/Python/ast.c
+++ b/Python/ast.c
@@ -4197,9 +4197,11 @@ decode_unicode_with_escapes(struct compiling *c, const node *n, const char *s,
     while (s < end) {
         if (*s == '\\') {
             *p++ = *s++;
-            if (*s & 0x80) {
+            if (s >= end || *s & 0x80) {
                 strcpy(p, "u005c");
                 p += 5;
+                if (s >= end)
+                    break;
             }
         }
         if (*s & 0x80) { /* XXX inefficient */
@@ -4352,30 +4354,37 @@ fstring_find_literal(const char **str, const char *end, int raw,
        brace (which isn't part of a unicode name escape such as
        "\N{EULER CONSTANT}"), or the end of the string. */
 
-    const char *literal_start = *str;
-    const char *literal_end;
-    int in_named_escape = 0;
+    const char *s = *str;
+    const char *literal_start = s;
     int result = 0;
 
     assert(*literal == NULL);
-    for (; *str < end; (*str)++) {
-        char ch = **str;
-        if (!in_named_escape && ch == '{' && (*str)-literal_start >= 2 &&
-            *(*str-2) == '\\' && *(*str-1) == 'N') {
-            in_named_escape = 1;
-        } else if (in_named_escape && ch == '}') {
-            in_named_escape = 0;
-        } else if (ch == '{' || ch == '}') {
+    while (s < end) {
+        char ch = *s++;
+        if (!raw && ch == '\\' && s < end) {
+            ch = *s++;
+            if (ch == 'N') {
+                if (s < end && *s++ == '{') {
+                    while (s < end && *s++ != '}') {
+                    }
+                    continue;
+                }
+                break;
+            }
+            if (ch == '{' && warn_invalid_escape_sequence(c, n, ch) < 0) {
+                return -1;
+            }
+        }
+        if (ch == '{' || ch == '}') {
             /* Check for doubled braces, but only at the top level. If
                we checked at every level, then f'{0:{3}}' would fail
                with the two closing braces. */
             if (recurse_lvl == 0) {
-                if (*str+1 < end && *(*str+1) == ch) {
+                if (s < end && *s == ch) {
                     /* We're going to tell the caller that the literal ends
                        here, but that they should continue scanning. But also
                        skip over the second brace when we resume scanning. */
-                    literal_end = *str+1;
-                    *str += 2;
+                    *str = s + 1;
                     result = 1;
                     goto done;
                 }
@@ -4383,6 +4392,7 @@ fstring_find_literal(const char **str, const char *end, int raw,
                 /* Where a single '{' is the start of a new expression, a
                    single '}' is not allowed. */
                 if (ch == '}') {
+                    *str = s - 1;
                     ast_error(c, n, "f-string: single '}' is not allowed");
                     return -1;
                 }
@@ -4390,21 +4400,22 @@ fstring_find_literal(const char **str, const char *end, int raw,
             /* We're either at a '{', which means we're starting another
                expression; or a '}', which means we're at the end of this
                f-string (for a nested format_spec). */
+            s--;
             break;
         }
     }
-    literal_end = *str;
-    assert(*str <= end);
-    assert(*str == end || **str == '{' || **str == '}');
+    *str = s;
+    assert(s <= end);
+    assert(s == end || *s == '{' || *s == '}');
 done:
-    if (literal_start != literal_end) {
+    if (literal_start != s) {
         if (raw)
             *literal = PyUnicode_DecodeUTF8Stateful(literal_start,
-                                                    literal_end-literal_start,
+                                                    s - literal_start,
                                                     NULL, NULL);
         else
             *literal = decode_unicode_with_escapes(c, n, literal_start,
-                                                   literal_end-literal_start);
+                                                   s - literal_start);
         if (!*literal)
             return -1;
     }
author	Serhiy Storchaka <storchaka@gmail.com>	2017-05-25 10:33:55 (GMT)
committer	GitHub <noreply@github.com>	2017-05-25 10:33:55 (GMT)
commit	0cd7a3f196cf34d9bb0a52e61327f7fe289d9750 (patch)
tree	8e1758a3b7a031a3c52545f8b5d462ddb08ad18e /Python/ast.c
parent	d1c3c13fedaf62b71445ccd048e395aa4a7d510f (diff)
download	cpython-0cd7a3f196cf34d9bb0a52e61327f7fe289d9750.zip cpython-0cd7a3f196cf34d9bb0a52e61327f7fe289d9750.tar.gz cpython-0cd7a3f196cf34d9bb0a52e61327f7fe289d9750.tar.bz2