summaryrefslogtreecommitdiffstats
path: root/Parser
diff options
context:
space:
mode:
authorPablo Galindo Salgado <Pablogsal@gmail.com>2024-07-16 18:57:22 (GMT)
committerGitHub <noreply@github.com>2024-07-16 18:57:22 (GMT)
commitc46d64e0ef8e92a6b4ab4805d813d7e4d6663380 (patch)
tree69bf2c0e394c896e05576baa94c7d750f0d27501 /Parser
parent69c68de43aef03dd52fabd21f99cb3b0f9329201 (diff)
downloadcpython-c46d64e0ef8e92a6b4ab4805d813d7e4d6663380.zip
cpython-c46d64e0ef8e92a6b4ab4805d813d7e4d6663380.tar.gz
cpython-c46d64e0ef8e92a6b4ab4805d813d7e4d6663380.tar.bz2
gh-121130: Fix f-string format specifiers with debug expressions (#121150)
Diffstat (limited to 'Parser')
-rw-r--r--Parser/action_helpers.c63
-rw-r--r--Parser/lexer/lexer.c24
-rw-r--r--Parser/lexer/state.c1
-rw-r--r--Parser/lexer/state.h1
4 files changed, 62 insertions, 27 deletions
diff --git a/Parser/action_helpers.c b/Parser/action_helpers.c
index 44bf87d..0307a08 100644
--- a/Parser/action_helpers.c
+++ b/Parser/action_helpers.c
@@ -969,6 +969,8 @@ _PyPegen_check_fstring_conversion(Parser *p, Token* conv_token, expr_ty conv)
return result_token_with_metadata(p, conv, conv_token->metadata);
}
+static asdl_expr_seq *
+unpack_top_level_joined_strs(Parser *p, asdl_expr_seq *raw_expressions);
ResultTokenWithMetadata *
_PyPegen_setup_full_format_spec(Parser *p, Token *colon, asdl_expr_seq *spec, int lineno, int col_offset,
int end_lineno, int end_col_offset, PyArena *arena)
@@ -1007,8 +1009,15 @@ _PyPegen_setup_full_format_spec(Parser *p, Token *colon, asdl_expr_seq *spec, in
assert(j == non_empty_count);
spec = resized_spec;
}
- expr_ty res = _PyAST_JoinedStr(spec, lineno, col_offset, end_lineno,
- end_col_offset, p->arena);
+ expr_ty res;
+ if (asdl_seq_LEN(spec) == 0) {
+ res = _PyAST_JoinedStr(spec, lineno, col_offset, end_lineno,
+ end_col_offset, p->arena);
+ } else {
+ res = _PyPegen_concatenate_strings(p, spec,
+ lineno, col_offset, end_lineno,
+ end_col_offset, arena);
+ }
if (!res) {
return NULL;
}
@@ -1308,6 +1317,7 @@ unpack_top_level_joined_strs(Parser *p, asdl_expr_seq *raw_expressions)
expr_ty
_PyPegen_joined_str(Parser *p, Token* a, asdl_expr_seq* raw_expressions, Token*b) {
+
asdl_expr_seq *expr = unpack_top_level_joined_strs(p, raw_expressions);
Py_ssize_t n_items = asdl_seq_LEN(expr);
@@ -1472,7 +1482,6 @@ expr_ty _PyPegen_formatted_value(Parser *p, expr_ty expression, Token *debug, Re
debug_end_offset = end_col_offset;
debug_metadata = closing_brace->metadata;
}
-
expr_ty debug_text = _PyAST_Constant(debug_metadata, NULL, lineno, col_offset + 1, debug_end_line,
debug_end_offset - 1, p->arena);
if (!debug_text) {
@@ -1505,16 +1514,23 @@ _PyPegen_concatenate_strings(Parser *p, asdl_expr_seq *strings,
Py_ssize_t n_flattened_elements = 0;
for (i = 0; i < len; i++) {
expr_ty elem = asdl_seq_GET(strings, i);
- if (elem->kind == Constant_kind) {
- if (PyBytes_CheckExact(elem->v.Constant.value)) {
- bytes_found = 1;
- } else {
- unicode_string_found = 1;
- }
- n_flattened_elements++;
- } else {
- n_flattened_elements += asdl_seq_LEN(elem->v.JoinedStr.values);
- f_string_found = 1;
+ switch(elem->kind) {
+ case Constant_kind:
+ if (PyBytes_CheckExact(elem->v.Constant.value)) {
+ bytes_found = 1;
+ } else {
+ unicode_string_found = 1;
+ }
+ n_flattened_elements++;
+ break;
+ case JoinedStr_kind:
+ n_flattened_elements += asdl_seq_LEN(elem->v.JoinedStr.values);
+ f_string_found = 1;
+ break;
+ default:
+ n_flattened_elements++;
+ f_string_found = 1;
+ break;
}
}
@@ -1556,16 +1572,19 @@ _PyPegen_concatenate_strings(Parser *p, asdl_expr_seq *strings,
Py_ssize_t j = 0;
for (i = 0; i < len; i++) {
expr_ty elem = asdl_seq_GET(strings, i);
- if (elem->kind == Constant_kind) {
- asdl_seq_SET(flattened, current_pos++, elem);
- } else {
- for (j = 0; j < asdl_seq_LEN(elem->v.JoinedStr.values); j++) {
- expr_ty subvalue = asdl_seq_GET(elem->v.JoinedStr.values, j);
- if (subvalue == NULL) {
- return NULL;
+ switch(elem->kind) {
+ case JoinedStr_kind:
+ for (j = 0; j < asdl_seq_LEN(elem->v.JoinedStr.values); j++) {
+ expr_ty subvalue = asdl_seq_GET(elem->v.JoinedStr.values, j);
+ if (subvalue == NULL) {
+ return NULL;
+ }
+ asdl_seq_SET(flattened, current_pos++, subvalue);
}
- asdl_seq_SET(flattened, current_pos++, subvalue);
- }
+ break;
+ default:
+ asdl_seq_SET(flattened, current_pos++, elem);
+ break;
}
}
diff --git a/Parser/lexer/lexer.c b/Parser/lexer/lexer.c
index 82b0e4e..93b5fbd 100644
--- a/Parser/lexer/lexer.c
+++ b/Parser/lexer/lexer.c
@@ -989,6 +989,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
the_current_tok->last_expr_buffer = NULL;
the_current_tok->last_expr_size = 0;
the_current_tok->last_expr_end = -1;
+ the_current_tok->in_format_spec = 0;
the_current_tok->f_string_debug = 0;
switch (*tok->start) {
@@ -1137,15 +1138,20 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
* by the `{` case, so for ensuring that we are on the 0th level, we need
* to adjust it manually */
int cursor = current_tok->curly_bracket_depth - (c != '{');
- if (cursor == 0 && !_PyLexer_update_fstring_expr(tok, c)) {
+ int in_format_spec = current_tok->in_format_spec;
+ int cursor_in_format_with_debug =
+ cursor == 1 && (current_tok->f_string_debug || in_format_spec);
+ int cursor_valid = cursor == 0 || cursor_in_format_with_debug;
+ if ((cursor_valid) && !_PyLexer_update_fstring_expr(tok, c)) {
return MAKE_TOKEN(ENDMARKER);
}
- if (cursor == 0 && c != '{' && set_fstring_expr(tok, token, c)) {
+ if ((cursor_valid) && c != '{' && set_fstring_expr(tok, token, c)) {
return MAKE_TOKEN(ERRORTOKEN);
}
if (c == ':' && cursor == current_tok->curly_bracket_expr_start_depth) {
current_tok->kind = TOK_FSTRING_MODE;
+ current_tok->in_format_spec = 1;
p_start = tok->start;
p_end = tok->cur;
return MAKE_TOKEN(_PyToken_OneChar(c));
@@ -1235,6 +1241,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
if (c == '}' && current_tok->curly_bracket_depth == current_tok->curly_bracket_expr_start_depth) {
current_tok->curly_bracket_expr_start_depth--;
current_tok->kind = TOK_FSTRING_MODE;
+ current_tok->in_format_spec = 0;
current_tok->f_string_debug = 0;
}
}
@@ -1317,11 +1324,11 @@ f_string_middle:
tok->multi_line_start = tok->line_start;
while (end_quote_size != current_tok->f_string_quote_size) {
int c = tok_nextc(tok);
- if (tok->done == E_ERROR) {
+ if (tok->done == E_ERROR || tok->done == E_DECODE) {
return MAKE_TOKEN(ERRORTOKEN);
}
int in_format_spec = (
- current_tok->last_expr_end != -1
+ current_tok->in_format_spec
&&
INSIDE_FSTRING_EXPR(current_tok)
);
@@ -1337,6 +1344,7 @@ f_string_middle:
if (in_format_spec && c == '\n') {
tok_backup(tok, c);
TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
+ current_tok->in_format_spec = 0;
p_start = tok->start;
p_end = tok->cur;
return MAKE_TOKEN(FSTRING_MIDDLE);
@@ -1378,6 +1386,9 @@ f_string_middle:
}
if (c == '{') {
+ if (!_PyLexer_update_fstring_expr(tok, c)) {
+ return MAKE_TOKEN(ENDMARKER);
+ }
int peek = tok_nextc(tok);
if (peek != '{' || in_format_spec) {
tok_backup(tok, peek);
@@ -1387,6 +1398,7 @@ f_string_middle:
return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "f-string: expressions nested too deeply"));
}
TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
+ current_tok->in_format_spec = 0;
p_start = tok->start;
p_end = tok->cur;
} else {
@@ -1406,13 +1418,15 @@ f_string_middle:
// scanning (indicated by the end of the expression being set) and we are not at the top level
// of the bracket stack (-1 is the top level). Since format specifiers can't legally use double
// brackets, we can bypass it here.
- if (peek == '}' && !in_format_spec) {
+ int cursor = current_tok->curly_bracket_depth;
+ if (peek == '}' && !in_format_spec && cursor == 0) {
p_start = tok->start;
p_end = tok->cur - 1;
} else {
tok_backup(tok, peek);
tok_backup(tok, c);
TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
+ current_tok->in_format_spec = 0;
p_start = tok->start;
p_end = tok->cur;
}
diff --git a/Parser/lexer/state.c b/Parser/lexer/state.c
index 653ddaf..647f291 100644
--- a/Parser/lexer/state.c
+++ b/Parser/lexer/state.c
@@ -74,6 +74,7 @@ free_fstring_expressions(struct tok_state *tok)
mode->last_expr_buffer = NULL;
mode->last_expr_size = 0;
mode->last_expr_end = -1;
+ mode->in_format_spec = 0;
}
}
}
diff --git a/Parser/lexer/state.h b/Parser/lexer/state.h
index 61d090d..9ed3bab 100644
--- a/Parser/lexer/state.h
+++ b/Parser/lexer/state.h
@@ -58,6 +58,7 @@ typedef struct _tokenizer_mode {
Py_ssize_t last_expr_end;
char* last_expr_buffer;
int f_string_debug;
+ int in_format_spec;
} tokenizer_mode;
/* Tokenizer state */