summaryrefslogtreecommitdiffstats
path: root/Python/ast.c
diff options
context:
space:
mode:
authorSerhiy Storchaka <storchaka@gmail.com>2017-05-25 10:33:55 (GMT)
committerGitHub <noreply@github.com>2017-05-25 10:33:55 (GMT)
commit0cd7a3f196cf34d9bb0a52e61327f7fe289d9750 (patch)
tree8e1758a3b7a031a3c52545f8b5d462ddb08ad18e /Python/ast.c
parentd1c3c13fedaf62b71445ccd048e395aa4a7d510f (diff)
downloadcpython-0cd7a3f196cf34d9bb0a52e61327f7fe289d9750.zip
cpython-0cd7a3f196cf34d9bb0a52e61327f7fe289d9750.tar.gz
cpython-0cd7a3f196cf34d9bb0a52e61327f7fe289d9750.tar.bz2
bpo-29104: Fixed parsing backslashes in f-strings. (#490)
Diffstat (limited to 'Python/ast.c')
-rw-r--r--Python/ast.c53
1 files changed, 32 insertions, 21 deletions
diff --git a/Python/ast.c b/Python/ast.c
index d8941f0..205c711 100644
--- a/Python/ast.c
+++ b/Python/ast.c
@@ -4197,9 +4197,11 @@ decode_unicode_with_escapes(struct compiling *c, const node *n, const char *s,
while (s < end) {
if (*s == '\\') {
*p++ = *s++;
- if (*s & 0x80) {
+ if (s >= end || *s & 0x80) {
strcpy(p, "u005c");
p += 5;
+ if (s >= end)
+ break;
}
}
if (*s & 0x80) { /* XXX inefficient */
@@ -4352,30 +4354,37 @@ fstring_find_literal(const char **str, const char *end, int raw,
brace (which isn't part of a unicode name escape such as
"\N{EULER CONSTANT}"), or the end of the string. */
- const char *literal_start = *str;
- const char *literal_end;
- int in_named_escape = 0;
+ const char *s = *str;
+ const char *literal_start = s;
int result = 0;
assert(*literal == NULL);
- for (; *str < end; (*str)++) {
- char ch = **str;
- if (!in_named_escape && ch == '{' && (*str)-literal_start >= 2 &&
- *(*str-2) == '\\' && *(*str-1) == 'N') {
- in_named_escape = 1;
- } else if (in_named_escape && ch == '}') {
- in_named_escape = 0;
- } else if (ch == '{' || ch == '}') {
+ while (s < end) {
+ char ch = *s++;
+ if (!raw && ch == '\\' && s < end) {
+ ch = *s++;
+ if (ch == 'N') {
+ if (s < end && *s++ == '{') {
+ while (s < end && *s++ != '}') {
+ }
+ continue;
+ }
+ break;
+ }
+ if (ch == '{' && warn_invalid_escape_sequence(c, n, ch) < 0) {
+ return -1;
+ }
+ }
+ if (ch == '{' || ch == '}') {
/* Check for doubled braces, but only at the top level. If
we checked at every level, then f'{0:{3}}' would fail
with the two closing braces. */
if (recurse_lvl == 0) {
- if (*str+1 < end && *(*str+1) == ch) {
+ if (s < end && *s == ch) {
/* We're going to tell the caller that the literal ends
here, but that they should continue scanning. But also
skip over the second brace when we resume scanning. */
- literal_end = *str+1;
- *str += 2;
+ *str = s + 1;
result = 1;
goto done;
}
@@ -4383,6 +4392,7 @@ fstring_find_literal(const char **str, const char *end, int raw,
/* Where a single '{' is the start of a new expression, a
single '}' is not allowed. */
if (ch == '}') {
+ *str = s - 1;
ast_error(c, n, "f-string: single '}' is not allowed");
return -1;
}
@@ -4390,21 +4400,22 @@ fstring_find_literal(const char **str, const char *end, int raw,
/* We're either at a '{', which means we're starting another
expression; or a '}', which means we're at the end of this
f-string (for a nested format_spec). */
+ s--;
break;
}
}
- literal_end = *str;
- assert(*str <= end);
- assert(*str == end || **str == '{' || **str == '}');
+ *str = s;
+ assert(s <= end);
+ assert(s == end || *s == '{' || *s == '}');
done:
- if (literal_start != literal_end) {
+ if (literal_start != s) {
if (raw)
*literal = PyUnicode_DecodeUTF8Stateful(literal_start,
- literal_end-literal_start,
+ s - literal_start,
NULL, NULL);
else
*literal = decode_unicode_with_escapes(c, n, literal_start,
- literal_end-literal_start);
+ s - literal_start);
if (!*literal)
return -1;
}