diff options
author | Serhiy Storchaka <storchaka@gmail.com> | 2017-05-25 10:33:55 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-05-25 10:33:55 (GMT) |
commit | 0cd7a3f196cf34d9bb0a52e61327f7fe289d9750 (patch) | |
tree | 8e1758a3b7a031a3c52545f8b5d462ddb08ad18e | |
parent | d1c3c13fedaf62b71445ccd048e395aa4a7d510f (diff) | |
download | cpython-0cd7a3f196cf34d9bb0a52e61327f7fe289d9750.zip cpython-0cd7a3f196cf34d9bb0a52e61327f7fe289d9750.tar.gz cpython-0cd7a3f196cf34d9bb0a52e61327f7fe289d9750.tar.bz2 |
bpo-29104: Fixed parsing backslashes in f-strings. (#490)
-rw-r--r-- | Lib/test/test_fstring.py | 14 | ||||
-rw-r--r-- | Misc/NEWS | 2 | ||||
-rw-r--r-- | Python/ast.c | 53 |
3 files changed, 48 insertions, 21 deletions
diff --git a/Lib/test/test_fstring.py b/Lib/test/test_fstring.py index 708ed25..2573002 100644 --- a/Lib/test/test_fstring.py +++ b/Lib/test/test_fstring.py @@ -361,6 +361,20 @@ f'{a * x()}'""" self.assertEqual(f'2\x203', '2 3') self.assertEqual(f'\x203', ' 3') + with self.assertWarns(DeprecationWarning): # invalid escape sequence + value = eval(r"f'\{6*7}'") + self.assertEqual(value, '\\42') + self.assertEqual(f'\\{6*7}', '\\42') + self.assertEqual(fr'\{6*7}', '\\42') + + AMPERSAND = 'spam' + # Get the right unicode character (&), or pick up local variable + # depending on the number of backslashes. + self.assertEqual(f'\N{AMPERSAND}', '&') + self.assertEqual(f'\\N{AMPERSAND}', '\\Nspam') + self.assertEqual(fr'\N{AMPERSAND}', '\\Nspam') + self.assertEqual(f'\\\N{AMPERSAND}', '\\&') + def test_misformed_unicode_character_name(self): # These test are needed because unicode names are parsed # differently inside f-strings. @@ -10,6 +10,8 @@ What's New in Python 3.7.0 alpha 1? Core and Builtins ----------------- +- bpo-29104: Fixed parsing backslashes in f-strings. + - bpo-27945: Fixed various segfaults with dict when input collections are mutated during searching, inserting or comparing. Based on patches by Duane Griffin and Tim Mitchell. diff --git a/Python/ast.c b/Python/ast.c index d8941f0..205c711 100644 --- a/Python/ast.c +++ b/Python/ast.c @@ -4197,9 +4197,11 @@ decode_unicode_with_escapes(struct compiling *c, const node *n, const char *s, while (s < end) { if (*s == '\\') { *p++ = *s++; - if (*s & 0x80) { + if (s >= end || *s & 0x80) { strcpy(p, "u005c"); p += 5; + if (s >= end) + break; } } if (*s & 0x80) { /* XXX inefficient */ @@ -4352,30 +4354,37 @@ fstring_find_literal(const char **str, const char *end, int raw, brace (which isn't part of a unicode name escape such as "\N{EULER CONSTANT}"), or the end of the string. */ - const char *literal_start = *str; - const char *literal_end; - int in_named_escape = 0; + const char *s = *str; + const char *literal_start = s; int result = 0; assert(*literal == NULL); - for (; *str < end; (*str)++) { - char ch = **str; - if (!in_named_escape && ch == '{' && (*str)-literal_start >= 2 && - *(*str-2) == '\\' && *(*str-1) == 'N') { - in_named_escape = 1; - } else if (in_named_escape && ch == '}') { - in_named_escape = 0; - } else if (ch == '{' || ch == '}') { + while (s < end) { + char ch = *s++; + if (!raw && ch == '\\' && s < end) { + ch = *s++; + if (ch == 'N') { + if (s < end && *s++ == '{') { + while (s < end && *s++ != '}') { + } + continue; + } + break; + } + if (ch == '{' && warn_invalid_escape_sequence(c, n, ch) < 0) { + return -1; + } + } + if (ch == '{' || ch == '}') { /* Check for doubled braces, but only at the top level. If we checked at every level, then f'{0:{3}}' would fail with the two closing braces. */ if (recurse_lvl == 0) { - if (*str+1 < end && *(*str+1) == ch) { + if (s < end && *s == ch) { /* We're going to tell the caller that the literal ends here, but that they should continue scanning. But also skip over the second brace when we resume scanning. */ - literal_end = *str+1; - *str += 2; + *str = s + 1; result = 1; goto done; } @@ -4383,6 +4392,7 @@ fstring_find_literal(const char **str, const char *end, int raw, /* Where a single '{' is the start of a new expression, a single '}' is not allowed. */ if (ch == '}') { + *str = s - 1; ast_error(c, n, "f-string: single '}' is not allowed"); return -1; } @@ -4390,21 +4400,22 @@ fstring_find_literal(const char **str, const char *end, int raw, /* We're either at a '{', which means we're starting another expression; or a '}', which means we're at the end of this f-string (for a nested format_spec). */ + s--; break; } } - literal_end = *str; - assert(*str <= end); - assert(*str == end || **str == '{' || **str == '}'); + *str = s; + assert(s <= end); + assert(s == end || *s == '{' || *s == '}'); done: - if (literal_start != literal_end) { + if (literal_start != s) { if (raw) *literal = PyUnicode_DecodeUTF8Stateful(literal_start, - literal_end-literal_start, + s - literal_start, NULL, NULL); else *literal = decode_unicode_with_escapes(c, n, literal_start, - literal_end-literal_start); + s - literal_start); if (!*literal) return -1; } |