diff options
author | Pablo Galindo Salgado <Pablogsal@gmail.com> | 2024-07-16 18:57:22 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-07-16 18:57:22 (GMT) |
commit | c46d64e0ef8e92a6b4ab4805d813d7e4d6663380 (patch) | |
tree | 69bf2c0e394c896e05576baa94c7d750f0d27501 /Parser | |
parent | 69c68de43aef03dd52fabd21f99cb3b0f9329201 (diff) | |
download | cpython-c46d64e0ef8e92a6b4ab4805d813d7e4d6663380.zip cpython-c46d64e0ef8e92a6b4ab4805d813d7e4d6663380.tar.gz cpython-c46d64e0ef8e92a6b4ab4805d813d7e4d6663380.tar.bz2 |
gh-121130: Fix f-string format specifiers with debug expressions (#121150)
Diffstat (limited to 'Parser')
-rw-r--r-- | Parser/action_helpers.c | 63 | ||||
-rw-r--r-- | Parser/lexer/lexer.c | 24 | ||||
-rw-r--r-- | Parser/lexer/state.c | 1 | ||||
-rw-r--r-- | Parser/lexer/state.h | 1 |
4 files changed, 62 insertions, 27 deletions
diff --git a/Parser/action_helpers.c b/Parser/action_helpers.c index 44bf87d..0307a08 100644 --- a/Parser/action_helpers.c +++ b/Parser/action_helpers.c @@ -969,6 +969,8 @@ _PyPegen_check_fstring_conversion(Parser *p, Token* conv_token, expr_ty conv) return result_token_with_metadata(p, conv, conv_token->metadata); } +static asdl_expr_seq * +unpack_top_level_joined_strs(Parser *p, asdl_expr_seq *raw_expressions); ResultTokenWithMetadata * _PyPegen_setup_full_format_spec(Parser *p, Token *colon, asdl_expr_seq *spec, int lineno, int col_offset, int end_lineno, int end_col_offset, PyArena *arena) @@ -1007,8 +1009,15 @@ _PyPegen_setup_full_format_spec(Parser *p, Token *colon, asdl_expr_seq *spec, in assert(j == non_empty_count); spec = resized_spec; } - expr_ty res = _PyAST_JoinedStr(spec, lineno, col_offset, end_lineno, - end_col_offset, p->arena); + expr_ty res; + if (asdl_seq_LEN(spec) == 0) { + res = _PyAST_JoinedStr(spec, lineno, col_offset, end_lineno, + end_col_offset, p->arena); + } else { + res = _PyPegen_concatenate_strings(p, spec, + lineno, col_offset, end_lineno, + end_col_offset, arena); + } if (!res) { return NULL; } @@ -1308,6 +1317,7 @@ unpack_top_level_joined_strs(Parser *p, asdl_expr_seq *raw_expressions) expr_ty _PyPegen_joined_str(Parser *p, Token* a, asdl_expr_seq* raw_expressions, Token*b) { + asdl_expr_seq *expr = unpack_top_level_joined_strs(p, raw_expressions); Py_ssize_t n_items = asdl_seq_LEN(expr); @@ -1472,7 +1482,6 @@ expr_ty _PyPegen_formatted_value(Parser *p, expr_ty expression, Token *debug, Re debug_end_offset = end_col_offset; debug_metadata = closing_brace->metadata; } - expr_ty debug_text = _PyAST_Constant(debug_metadata, NULL, lineno, col_offset + 1, debug_end_line, debug_end_offset - 1, p->arena); if (!debug_text) { @@ -1505,16 +1514,23 @@ _PyPegen_concatenate_strings(Parser *p, asdl_expr_seq *strings, Py_ssize_t n_flattened_elements = 0; for (i = 0; i < len; i++) { expr_ty elem = asdl_seq_GET(strings, i); - if (elem->kind == Constant_kind) { - if (PyBytes_CheckExact(elem->v.Constant.value)) { - bytes_found = 1; - } else { - unicode_string_found = 1; - } - n_flattened_elements++; - } else { - n_flattened_elements += asdl_seq_LEN(elem->v.JoinedStr.values); - f_string_found = 1; + switch(elem->kind) { + case Constant_kind: + if (PyBytes_CheckExact(elem->v.Constant.value)) { + bytes_found = 1; + } else { + unicode_string_found = 1; + } + n_flattened_elements++; + break; + case JoinedStr_kind: + n_flattened_elements += asdl_seq_LEN(elem->v.JoinedStr.values); + f_string_found = 1; + break; + default: + n_flattened_elements++; + f_string_found = 1; + break; } } @@ -1556,16 +1572,19 @@ _PyPegen_concatenate_strings(Parser *p, asdl_expr_seq *strings, Py_ssize_t j = 0; for (i = 0; i < len; i++) { expr_ty elem = asdl_seq_GET(strings, i); - if (elem->kind == Constant_kind) { - asdl_seq_SET(flattened, current_pos++, elem); - } else { - for (j = 0; j < asdl_seq_LEN(elem->v.JoinedStr.values); j++) { - expr_ty subvalue = asdl_seq_GET(elem->v.JoinedStr.values, j); - if (subvalue == NULL) { - return NULL; + switch(elem->kind) { + case JoinedStr_kind: + for (j = 0; j < asdl_seq_LEN(elem->v.JoinedStr.values); j++) { + expr_ty subvalue = asdl_seq_GET(elem->v.JoinedStr.values, j); + if (subvalue == NULL) { + return NULL; + } + asdl_seq_SET(flattened, current_pos++, subvalue); } - asdl_seq_SET(flattened, current_pos++, subvalue); - } + break; + default: + asdl_seq_SET(flattened, current_pos++, elem); + break; } } diff --git a/Parser/lexer/lexer.c b/Parser/lexer/lexer.c index 82b0e4e..93b5fbd 100644 --- a/Parser/lexer/lexer.c +++ b/Parser/lexer/lexer.c @@ -989,6 +989,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t the_current_tok->last_expr_buffer = NULL; the_current_tok->last_expr_size = 0; the_current_tok->last_expr_end = -1; + the_current_tok->in_format_spec = 0; the_current_tok->f_string_debug = 0; switch (*tok->start) { @@ -1137,15 +1138,20 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t * by the `{` case, so for ensuring that we are on the 0th level, we need * to adjust it manually */ int cursor = current_tok->curly_bracket_depth - (c != '{'); - if (cursor == 0 && !_PyLexer_update_fstring_expr(tok, c)) { + int in_format_spec = current_tok->in_format_spec; + int cursor_in_format_with_debug = + cursor == 1 && (current_tok->f_string_debug || in_format_spec); + int cursor_valid = cursor == 0 || cursor_in_format_with_debug; + if ((cursor_valid) && !_PyLexer_update_fstring_expr(tok, c)) { return MAKE_TOKEN(ENDMARKER); } - if (cursor == 0 && c != '{' && set_fstring_expr(tok, token, c)) { + if ((cursor_valid) && c != '{' && set_fstring_expr(tok, token, c)) { return MAKE_TOKEN(ERRORTOKEN); } if (c == ':' && cursor == current_tok->curly_bracket_expr_start_depth) { current_tok->kind = TOK_FSTRING_MODE; + current_tok->in_format_spec = 1; p_start = tok->start; p_end = tok->cur; return MAKE_TOKEN(_PyToken_OneChar(c)); @@ -1235,6 +1241,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t if (c == '}' && current_tok->curly_bracket_depth == current_tok->curly_bracket_expr_start_depth) { current_tok->curly_bracket_expr_start_depth--; current_tok->kind = TOK_FSTRING_MODE; + current_tok->in_format_spec = 0; current_tok->f_string_debug = 0; } } @@ -1317,11 +1324,11 @@ f_string_middle: tok->multi_line_start = tok->line_start; while (end_quote_size != current_tok->f_string_quote_size) { int c = tok_nextc(tok); - if (tok->done == E_ERROR) { + if (tok->done == E_ERROR || tok->done == E_DECODE) { return MAKE_TOKEN(ERRORTOKEN); } int in_format_spec = ( - current_tok->last_expr_end != -1 + current_tok->in_format_spec && INSIDE_FSTRING_EXPR(current_tok) ); @@ -1337,6 +1344,7 @@ f_string_middle: if (in_format_spec && c == '\n') { tok_backup(tok, c); TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE; + current_tok->in_format_spec = 0; p_start = tok->start; p_end = tok->cur; return MAKE_TOKEN(FSTRING_MIDDLE); @@ -1378,6 +1386,9 @@ f_string_middle: } if (c == '{') { + if (!_PyLexer_update_fstring_expr(tok, c)) { + return MAKE_TOKEN(ENDMARKER); + } int peek = tok_nextc(tok); if (peek != '{' || in_format_spec) { tok_backup(tok, peek); @@ -1387,6 +1398,7 @@ f_string_middle: return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "f-string: expressions nested too deeply")); } TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE; + current_tok->in_format_spec = 0; p_start = tok->start; p_end = tok->cur; } else { @@ -1406,13 +1418,15 @@ f_string_middle: // scanning (indicated by the end of the expression being set) and we are not at the top level // of the bracket stack (-1 is the top level). Since format specifiers can't legally use double // brackets, we can bypass it here. - if (peek == '}' && !in_format_spec) { + int cursor = current_tok->curly_bracket_depth; + if (peek == '}' && !in_format_spec && cursor == 0) { p_start = tok->start; p_end = tok->cur - 1; } else { tok_backup(tok, peek); tok_backup(tok, c); TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE; + current_tok->in_format_spec = 0; p_start = tok->start; p_end = tok->cur; } diff --git a/Parser/lexer/state.c b/Parser/lexer/state.c index 653ddaf..647f291 100644 --- a/Parser/lexer/state.c +++ b/Parser/lexer/state.c @@ -74,6 +74,7 @@ free_fstring_expressions(struct tok_state *tok) mode->last_expr_buffer = NULL; mode->last_expr_size = 0; mode->last_expr_end = -1; + mode->in_format_spec = 0; } } } diff --git a/Parser/lexer/state.h b/Parser/lexer/state.h index 61d090d..9ed3bab 100644 --- a/Parser/lexer/state.h +++ b/Parser/lexer/state.h @@ -58,6 +58,7 @@ typedef struct _tokenizer_mode { Py_ssize_t last_expr_end; char* last_expr_buffer; int f_string_debug; + int in_format_spec; } tokenizer_mode; /* Tokenizer state */ |