diff options
Diffstat (limited to 'Parser')
-rw-r--r-- | Parser/pegen.c | 73 | ||||
-rw-r--r-- | Parser/tokenizer.c | 5 | ||||
-rw-r--r-- | Parser/tokenizer.h | 1 |
3 files changed, 75 insertions, 4 deletions
diff --git a/Parser/pegen.c b/Parser/pegen.c index a6f9792..6c27980 100644 --- a/Parser/pegen.c +++ b/Parser/pegen.c @@ -265,6 +265,16 @@ raise_decode_error(Parser *p) return -1; } +static inline void +raise_unclosed_parentheses_error(Parser *p) { + int error_lineno = p->tok->parenlinenostack[p->tok->level-1]; + int error_col = p->tok->parencolstack[p->tok->level-1]; + RAISE_ERROR_KNOWN_LOCATION(p, PyExc_SyntaxError, + error_lineno, error_col, + "'%c' was never closed", + p->tok->parenstack[p->tok->level-1]); +} + static void raise_tokenizer_init_error(PyObject *filename) { @@ -324,7 +334,11 @@ tokenizer_error(Parser *p) RAISE_SYNTAX_ERROR("EOL while scanning string literal"); return -1; case E_EOF: - RAISE_SYNTAX_ERROR("unexpected EOF while parsing"); + if (p->tok->level) { + raise_unclosed_parentheses_error(p); + } else { + RAISE_SYNTAX_ERROR("unexpected EOF while parsing"); + } return -1; case E_DEDENT: RAISE_INDENTATION_ERROR("unindent does not match any outer indentation level"); @@ -1151,6 +1165,52 @@ reset_parser_state(Parser *p) p->call_invalid_rules = 1; } +static int +_PyPegen_check_tokenizer_errors(Parser *p) { + // Tokenize the whole input to see if there are any tokenization + // errors such as mistmatching parentheses. These will get priority + // over generic syntax errors only if the line number of the error is + // before the one that we had for the generic error. + + // We don't want to tokenize to the end for interactive input + if (p->tok->prompt != NULL) { + return 0; + } + + + Token *current_token = p->known_err_token != NULL ? p->known_err_token : p->tokens[p->fill - 1]; + Py_ssize_t current_err_line = current_token->lineno; + + // Save the tokenizer state to restore them later in case we found nothing + struct tok_state saved_tok; + memcpy(&saved_tok, p->tok, sizeof(struct tok_state)); + + for (;;) { + const char *start; + const char *end; + switch (PyTokenizer_Get(p->tok, &start, &end)) { + case ERRORTOKEN: + if (p->tok->level != 0) { + int error_lineno = p->tok->parenlinenostack[p->tok->level-1]; + if (current_err_line > error_lineno) { + raise_unclosed_parentheses_error(p); + return -1; + } + } + break; + case ENDMARKER: + break; + default: + continue; + } + break; + } + + // Restore the tokenizer state + memcpy(p->tok, &saved_tok, sizeof(struct tok_state)); + return 0; +} + void * _PyPegen_run_parser(Parser *p) { @@ -1164,8 +1224,12 @@ _PyPegen_run_parser(Parser *p) if (p->fill == 0) { RAISE_SYNTAX_ERROR("error at start before reading any input"); } - else if (p->tok->done == E_EOF) { - RAISE_SYNTAX_ERROR("unexpected EOF while parsing"); + else if (p->tok->done == E_EOF) { + if (p->tok->level) { + raise_unclosed_parentheses_error(p); + } else { + RAISE_SYNTAX_ERROR("unexpected EOF while parsing"); + } } else { if (p->tokens[p->fill-1]->type == INDENT) { @@ -1175,6 +1239,9 @@ _PyPegen_run_parser(Parser *p) RAISE_INDENTATION_ERROR("unexpected unindent"); } else { + if (_PyPegen_check_tokenizer_errors(p)) { + return NULL; + } RAISE_SYNTAX_ERROR("invalid syntax"); } } diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index 62cd296..f9c8bf6 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -64,7 +64,6 @@ tok_new(void) tok->tabsize = TABSIZE; tok->indent = 0; tok->indstack[0] = 0; - tok->atbol = 1; tok->pendin = 0; tok->prompt = tok->nextprompt = NULL; @@ -1396,6 +1395,9 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end) /* Check for EOF and errors now */ if (c == EOF) { + if (tok->level) { + return ERRORTOKEN; + } return tok->done == E_EOF ? ENDMARKER : ERRORTOKEN; } @@ -1818,6 +1820,7 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end) } tok->parenstack[tok->level] = c; tok->parenlinenostack[tok->level] = tok->lineno; + tok->parencolstack[tok->level] = tok->start - tok->line_start; tok->level++; break; case ')': diff --git a/Parser/tokenizer.h b/Parser/tokenizer.h index b659f34..56074b6 100644 --- a/Parser/tokenizer.h +++ b/Parser/tokenizer.h @@ -45,6 +45,7 @@ struct tok_state { /* Used to allow free continuations inside them */ char parenstack[MAXLEVEL]; int parenlinenostack[MAXLEVEL]; + int parencolstack[MAXLEVEL]; PyObject *filename; /* Stuff for checking on different tab sizes */ int altindstack[MAXINDENT]; /* Stack of alternate indents */ |