summaryrefslogtreecommitdiffstats
path: root/Parser/pegen.c
diff options
context:
space:
mode:
authorPablo Galindo <Pablogsal@gmail.com>2021-04-09 00:32:25 (GMT)
committerGitHub <noreply@github.com>2021-04-09 00:32:25 (GMT)
commitd00a449d6d421391557393cce695795b4b66c212 (patch)
tree62c6cc9b17a08a6a3343ee9dc99fe403d1582450 /Parser/pegen.c
parent58bafe42ab161473ba36c9231c3bf2e64ac8db82 (diff)
downloadcpython-d00a449d6d421391557393cce695795b4b66c212.zip
cpython-d00a449d6d421391557393cce695795b4b66c212.tar.gz
cpython-d00a449d6d421391557393cce695795b4b66c212.tar.bz2
Simplify _PyPegen_fill_token in pegen.c (GH-25295)
Diffstat (limited to 'Parser/pegen.c')
-rw-r--r--Parser/pegen.c122
1 files changed, 64 insertions, 58 deletions
diff --git a/Parser/pegen.c b/Parser/pegen.c
index 0aa55cf..57759f7 100644
--- a/Parser/pegen.c
+++ b/Parser/pegen.c
@@ -625,6 +625,64 @@ growable_comment_array_deallocate(growable_comment_array *arr) {
PyMem_Free(arr->items);
}
+static int
+initialize_token(Parser *p, Token *token, const char *start, const char *end, int token_type) {
+ assert(token != NULL);
+
+ token->type = (token_type == NAME) ? _get_keyword_or_name_type(p, start, (int)(end - start)) : token_type;
+ token->bytes = PyBytes_FromStringAndSize(start, end - start);
+ if (token->bytes == NULL) {
+ return -1;
+ }
+
+ if (_PyArena_AddPyObject(p->arena, token->bytes) < 0) {
+ Py_DECREF(token->bytes);
+ return -1;
+ }
+
+ const char *line_start = token_type == STRING ? p->tok->multi_line_start : p->tok->line_start;
+ int lineno = token_type == STRING ? p->tok->first_lineno : p->tok->lineno;
+ int end_lineno = p->tok->lineno;
+
+ int col_offset = (start != NULL && start >= line_start) ? (int)(start - line_start) : -1;
+ int end_col_offset = (end != NULL && end >= p->tok->line_start) ? (int)(end - p->tok->line_start) : -1;
+
+ token->lineno = p->starting_lineno + lineno;
+ token->col_offset = p->tok->lineno == 1 ? p->starting_col_offset + col_offset : col_offset;
+ token->end_lineno = p->starting_lineno + end_lineno;
+ token->end_col_offset = p->tok->lineno == 1 ? p->starting_col_offset + end_col_offset : end_col_offset;
+
+ p->fill += 1;
+
+ if (token_type == ERRORTOKEN && p->tok->done == E_DECODE) {
+ return raise_decode_error(p);
+ }
+
+ return (token_type == ERRORTOKEN ? tokenizer_error(p) : 0);
+}
+
+static int
+_resize_tokens_array(Parser *p) {
+ int newsize = p->size * 2;
+ Token **new_tokens = PyMem_Realloc(p->tokens, newsize * sizeof(Token *));
+ if (new_tokens == NULL) {
+ PyErr_NoMemory();
+ return -1;
+ }
+ p->tokens = new_tokens;
+
+ for (int i = p->size; i < newsize; i++) {
+ p->tokens[i] = PyMem_Calloc(1, sizeof(Token));
+ if (p->tokens[i] == NULL) {
+ p->size = i; // Needed, in order to cleanup correctly after parser fails
+ PyErr_NoMemory();
+ return -1;
+ }
+ }
+ p->size = newsize;
+ return 0;
+}
+
int
_PyPegen_fill_token(Parser *p)
{
@@ -650,7 +708,8 @@ _PyPegen_fill_token(Parser *p)
type = PyTokenizer_Get(p->tok, &start, &end);
}
- if (type == ENDMARKER && p->start_rule == Py_single_input && p->parsing_started) {
+ // If we have reached the end and we are in single input mode we need to insert a newline and reset the parsing
+ if (p->start_rule == Py_single_input && type == ENDMARKER && p->parsing_started) {
type = NEWLINE; /* Add an extra newline */
p->parsing_started = 0;
@@ -663,66 +722,13 @@ _PyPegen_fill_token(Parser *p)
p->parsing_started = 1;
}
- if (p->fill == p->size) {
- int newsize = p->size * 2;
- Token **new_tokens = PyMem_Realloc(p->tokens, newsize * sizeof(Token *));
- if (new_tokens == NULL) {
- PyErr_NoMemory();
- return -1;
- }
- p->tokens = new_tokens;
-
- for (int i = p->size; i < newsize; i++) {
- p->tokens[i] = PyMem_Malloc(sizeof(Token));
- if (p->tokens[i] == NULL) {
- p->size = i; // Needed, in order to cleanup correctly after parser fails
- PyErr_NoMemory();
- return -1;
- }
- memset(p->tokens[i], '\0', sizeof(Token));
- }
- p->size = newsize;
- }
-
- Token *t = p->tokens[p->fill];
- t->type = (type == NAME) ? _get_keyword_or_name_type(p, start, (int)(end - start)) : type;
- t->bytes = PyBytes_FromStringAndSize(start, end - start);
- if (t->bytes == NULL) {
- return -1;
- }
- if (_PyArena_AddPyObject(p->arena, t->bytes) < 0) {
- Py_DECREF(t->bytes);
+ // Check if we are at the limit of the token array capacity and resize if needed
+ if ((p->fill == p->size) && (_resize_tokens_array(p) != 0)) {
return -1;
}
- int lineno = type == STRING ? p->tok->first_lineno : p->tok->lineno;
- const char *line_start = type == STRING ? p->tok->multi_line_start : p->tok->line_start;
- int end_lineno = p->tok->lineno;
- int col_offset = -1;
- int end_col_offset = -1;
- if (start != NULL && start >= line_start) {
- col_offset = (int)(start - line_start);
- }
- if (end != NULL && end >= p->tok->line_start) {
- end_col_offset = (int)(end - p->tok->line_start);
- }
-
- t->lineno = p->starting_lineno + lineno;
- t->col_offset = p->tok->lineno == 1 ? p->starting_col_offset + col_offset : col_offset;
- t->end_lineno = p->starting_lineno + end_lineno;
- t->end_col_offset = p->tok->lineno == 1 ? p->starting_col_offset + end_col_offset : end_col_offset;
-
- p->fill += 1;
-
- if (type == ERRORTOKEN) {
- if (p->tok->done == E_DECODE) {
- return raise_decode_error(p);
- }
- return tokenizer_error(p);
-
- }
-
- return 0;
+ Token *t = p->tokens[p->fill];
+ return initialize_token(p, t, start, end, type);
}