diff options
author | Pablo Galindo <Pablogsal@gmail.com> | 2021-04-09 00:32:25 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-04-09 00:32:25 (GMT) |
commit | d00a449d6d421391557393cce695795b4b66c212 (patch) | |
tree | 62c6cc9b17a08a6a3343ee9dc99fe403d1582450 /Parser/pegen.c | |
parent | 58bafe42ab161473ba36c9231c3bf2e64ac8db82 (diff) | |
download | cpython-d00a449d6d421391557393cce695795b4b66c212.zip cpython-d00a449d6d421391557393cce695795b4b66c212.tar.gz cpython-d00a449d6d421391557393cce695795b4b66c212.tar.bz2 |
Simplify _PyPegen_fill_token in pegen.c (GH-25295)
Diffstat (limited to 'Parser/pegen.c')
-rw-r--r-- | Parser/pegen.c | 122 |
1 files changed, 64 insertions, 58 deletions
diff --git a/Parser/pegen.c b/Parser/pegen.c index 0aa55cf..57759f7 100644 --- a/Parser/pegen.c +++ b/Parser/pegen.c @@ -625,6 +625,64 @@ growable_comment_array_deallocate(growable_comment_array *arr) { PyMem_Free(arr->items); } +static int +initialize_token(Parser *p, Token *token, const char *start, const char *end, int token_type) { + assert(token != NULL); + + token->type = (token_type == NAME) ? _get_keyword_or_name_type(p, start, (int)(end - start)) : token_type; + token->bytes = PyBytes_FromStringAndSize(start, end - start); + if (token->bytes == NULL) { + return -1; + } + + if (_PyArena_AddPyObject(p->arena, token->bytes) < 0) { + Py_DECREF(token->bytes); + return -1; + } + + const char *line_start = token_type == STRING ? p->tok->multi_line_start : p->tok->line_start; + int lineno = token_type == STRING ? p->tok->first_lineno : p->tok->lineno; + int end_lineno = p->tok->lineno; + + int col_offset = (start != NULL && start >= line_start) ? (int)(start - line_start) : -1; + int end_col_offset = (end != NULL && end >= p->tok->line_start) ? (int)(end - p->tok->line_start) : -1; + + token->lineno = p->starting_lineno + lineno; + token->col_offset = p->tok->lineno == 1 ? p->starting_col_offset + col_offset : col_offset; + token->end_lineno = p->starting_lineno + end_lineno; + token->end_col_offset = p->tok->lineno == 1 ? p->starting_col_offset + end_col_offset : end_col_offset; + + p->fill += 1; + + if (token_type == ERRORTOKEN && p->tok->done == E_DECODE) { + return raise_decode_error(p); + } + + return (token_type == ERRORTOKEN ? tokenizer_error(p) : 0); +} + +static int +_resize_tokens_array(Parser *p) { + int newsize = p->size * 2; + Token **new_tokens = PyMem_Realloc(p->tokens, newsize * sizeof(Token *)); + if (new_tokens == NULL) { + PyErr_NoMemory(); + return -1; + } + p->tokens = new_tokens; + + for (int i = p->size; i < newsize; i++) { + p->tokens[i] = PyMem_Calloc(1, sizeof(Token)); + if (p->tokens[i] == NULL) { + p->size = i; // Needed, in order to cleanup correctly after parser fails + PyErr_NoMemory(); + return -1; + } + } + p->size = newsize; + return 0; +} + int _PyPegen_fill_token(Parser *p) { @@ -650,7 +708,8 @@ _PyPegen_fill_token(Parser *p) type = PyTokenizer_Get(p->tok, &start, &end); } - if (type == ENDMARKER && p->start_rule == Py_single_input && p->parsing_started) { + // If we have reached the end and we are in single input mode we need to insert a newline and reset the parsing + if (p->start_rule == Py_single_input && type == ENDMARKER && p->parsing_started) { type = NEWLINE; /* Add an extra newline */ p->parsing_started = 0; @@ -663,66 +722,13 @@ _PyPegen_fill_token(Parser *p) p->parsing_started = 1; } - if (p->fill == p->size) { - int newsize = p->size * 2; - Token **new_tokens = PyMem_Realloc(p->tokens, newsize * sizeof(Token *)); - if (new_tokens == NULL) { - PyErr_NoMemory(); - return -1; - } - p->tokens = new_tokens; - - for (int i = p->size; i < newsize; i++) { - p->tokens[i] = PyMem_Malloc(sizeof(Token)); - if (p->tokens[i] == NULL) { - p->size = i; // Needed, in order to cleanup correctly after parser fails - PyErr_NoMemory(); - return -1; - } - memset(p->tokens[i], '\0', sizeof(Token)); - } - p->size = newsize; - } - - Token *t = p->tokens[p->fill]; - t->type = (type == NAME) ? _get_keyword_or_name_type(p, start, (int)(end - start)) : type; - t->bytes = PyBytes_FromStringAndSize(start, end - start); - if (t->bytes == NULL) { - return -1; - } - if (_PyArena_AddPyObject(p->arena, t->bytes) < 0) { - Py_DECREF(t->bytes); + // Check if we are at the limit of the token array capacity and resize if needed + if ((p->fill == p->size) && (_resize_tokens_array(p) != 0)) { return -1; } - int lineno = type == STRING ? p->tok->first_lineno : p->tok->lineno; - const char *line_start = type == STRING ? p->tok->multi_line_start : p->tok->line_start; - int end_lineno = p->tok->lineno; - int col_offset = -1; - int end_col_offset = -1; - if (start != NULL && start >= line_start) { - col_offset = (int)(start - line_start); - } - if (end != NULL && end >= p->tok->line_start) { - end_col_offset = (int)(end - p->tok->line_start); - } - - t->lineno = p->starting_lineno + lineno; - t->col_offset = p->tok->lineno == 1 ? p->starting_col_offset + col_offset : col_offset; - t->end_lineno = p->starting_lineno + end_lineno; - t->end_col_offset = p->tok->lineno == 1 ? p->starting_col_offset + end_col_offset : end_col_offset; - - p->fill += 1; - - if (type == ERRORTOKEN) { - if (p->tok->done == E_DECODE) { - return raise_decode_error(p); - } - return tokenizer_error(p); - - } - - return 0; + Token *t = p->tokens[p->fill]; + return initialize_token(p, t, start, end, type); } |