diff options
Diffstat (limited to 'Parser/pegen_errors.c')
-rw-r--r-- | Parser/pegen_errors.c | 425 |
1 files changed, 0 insertions, 425 deletions
diff --git a/Parser/pegen_errors.c b/Parser/pegen_errors.c deleted file mode 100644 index 93057d1..0000000 --- a/Parser/pegen_errors.c +++ /dev/null @@ -1,425 +0,0 @@ -#include <Python.h> -#include <errcode.h> - -#include "tokenizer.h" -#include "pegen.h" - -// TOKENIZER ERRORS - -void -_PyPegen_raise_tokenizer_init_error(PyObject *filename) -{ - if (!(PyErr_ExceptionMatches(PyExc_LookupError) - || PyErr_ExceptionMatches(PyExc_SyntaxError) - || PyErr_ExceptionMatches(PyExc_ValueError) - || PyErr_ExceptionMatches(PyExc_UnicodeDecodeError))) { - return; - } - PyObject *errstr = NULL; - PyObject *tuple = NULL; - PyObject *type; - PyObject *value; - PyObject *tback; - PyErr_Fetch(&type, &value, &tback); - errstr = PyObject_Str(value); - if (!errstr) { - goto error; - } - - PyObject *tmp = Py_BuildValue("(OiiO)", filename, 0, -1, Py_None); - if (!tmp) { - goto error; - } - - tuple = PyTuple_Pack(2, errstr, tmp); - Py_DECREF(tmp); - if (!value) { - goto error; - } - PyErr_SetObject(PyExc_SyntaxError, tuple); - -error: - Py_XDECREF(type); - Py_XDECREF(value); - Py_XDECREF(tback); - Py_XDECREF(errstr); - Py_XDECREF(tuple); -} - -static inline void -raise_unclosed_parentheses_error(Parser *p) { - int error_lineno = p->tok->parenlinenostack[p->tok->level-1]; - int error_col = p->tok->parencolstack[p->tok->level-1]; - RAISE_ERROR_KNOWN_LOCATION(p, PyExc_SyntaxError, - error_lineno, error_col, error_lineno, -1, - "'%c' was never closed", - p->tok->parenstack[p->tok->level-1]); -} - -int -_Pypegen_tokenizer_error(Parser *p) -{ - if (PyErr_Occurred()) { - return -1; - } - - const char *msg = NULL; - PyObject* errtype = PyExc_SyntaxError; - Py_ssize_t col_offset = -1; - switch (p->tok->done) { - case E_TOKEN: - msg = "invalid token"; - break; - case E_EOF: - if (p->tok->level) { - raise_unclosed_parentheses_error(p); - } else { - RAISE_SYNTAX_ERROR("unexpected EOF while parsing"); - } - return -1; - case E_DEDENT: - RAISE_INDENTATION_ERROR("unindent does not match any outer indentation level"); - return -1; - case E_INTR: - if (!PyErr_Occurred()) { - PyErr_SetNone(PyExc_KeyboardInterrupt); - } - return -1; - case E_NOMEM: - PyErr_NoMemory(); - return -1; - case E_TABSPACE: - errtype = PyExc_TabError; - msg = "inconsistent use of tabs and spaces in indentation"; - break; - case E_TOODEEP: - errtype = PyExc_IndentationError; - msg = "too many levels of indentation"; - break; - case E_LINECONT: { - col_offset = p->tok->cur - p->tok->buf - 1; - msg = "unexpected character after line continuation character"; - break; - } - default: - msg = "unknown parsing error"; - } - - RAISE_ERROR_KNOWN_LOCATION(p, errtype, p->tok->lineno, - col_offset >= 0 ? col_offset : 0, - p->tok->lineno, -1, msg); - return -1; -} - -int -_Pypegen_raise_decode_error(Parser *p) -{ - assert(PyErr_Occurred()); - const char *errtype = NULL; - if (PyErr_ExceptionMatches(PyExc_UnicodeError)) { - errtype = "unicode error"; - } - else if (PyErr_ExceptionMatches(PyExc_ValueError)) { - errtype = "value error"; - } - if (errtype) { - PyObject *type; - PyObject *value; - PyObject *tback; - PyObject *errstr; - PyErr_Fetch(&type, &value, &tback); - errstr = PyObject_Str(value); - if (errstr) { - RAISE_SYNTAX_ERROR("(%s) %U", errtype, errstr); - Py_DECREF(errstr); - } - else { - PyErr_Clear(); - RAISE_SYNTAX_ERROR("(%s) unknown error", errtype); - } - Py_XDECREF(type); - Py_XDECREF(value); - Py_XDECREF(tback); - } - - return -1; -} - -static int -_PyPegen_tokenize_full_source_to_check_for_errors(Parser *p) { - // Tokenize the whole input to see if there are any tokenization - // errors such as mistmatching parentheses. These will get priority - // over generic syntax errors only if the line number of the error is - // before the one that we had for the generic error. - - // We don't want to tokenize to the end for interactive input - if (p->tok->prompt != NULL) { - return 0; - } - - PyObject *type, *value, *traceback; - PyErr_Fetch(&type, &value, &traceback); - - Token *current_token = p->known_err_token != NULL ? p->known_err_token : p->tokens[p->fill - 1]; - Py_ssize_t current_err_line = current_token->lineno; - - int ret = 0; - - for (;;) { - const char *start; - const char *end; - switch (_PyTokenizer_Get(p->tok, &start, &end)) { - case ERRORTOKEN: - if (p->tok->level != 0) { - int error_lineno = p->tok->parenlinenostack[p->tok->level-1]; - if (current_err_line > error_lineno) { - raise_unclosed_parentheses_error(p); - ret = -1; - goto exit; - } - } - break; - case ENDMARKER: - break; - default: - continue; - } - break; - } - - -exit: - if (PyErr_Occurred()) { - Py_XDECREF(value); - Py_XDECREF(type); - Py_XDECREF(traceback); - } else { - PyErr_Restore(type, value, traceback); - } - return ret; -} - -// PARSER ERRORS - -void * -_PyPegen_raise_error(Parser *p, PyObject *errtype, const char *errmsg, ...) -{ - if (p->fill == 0) { - va_list va; - va_start(va, errmsg); - _PyPegen_raise_error_known_location(p, errtype, 0, 0, 0, -1, errmsg, va); - va_end(va); - return NULL; - } - - Token *t = p->known_err_token != NULL ? p->known_err_token : p->tokens[p->fill - 1]; - Py_ssize_t col_offset; - Py_ssize_t end_col_offset = -1; - if (t->col_offset == -1) { - if (p->tok->cur == p->tok->buf) { - col_offset = 0; - } else { - const char* start = p->tok->buf ? p->tok->line_start : p->tok->buf; - col_offset = Py_SAFE_DOWNCAST(p->tok->cur - start, intptr_t, int); - } - } else { - col_offset = t->col_offset + 1; - } - - if (t->end_col_offset != -1) { - end_col_offset = t->end_col_offset + 1; - } - - va_list va; - va_start(va, errmsg); - _PyPegen_raise_error_known_location(p, errtype, t->lineno, col_offset, t->end_lineno, end_col_offset, errmsg, va); - va_end(va); - - return NULL; -} - -static PyObject * -get_error_line_from_tokenizer_buffers(Parser *p, Py_ssize_t lineno) -{ - /* If the file descriptor is interactive, the source lines of the current - * (multi-line) statement are stored in p->tok->interactive_src_start. - * If not, we're parsing from a string, which means that the whole source - * is stored in p->tok->str. */ - assert((p->tok->fp == NULL && p->tok->str != NULL) || p->tok->fp == stdin); - - char *cur_line = p->tok->fp_interactive ? p->tok->interactive_src_start : p->tok->str; - assert(cur_line != NULL); - - for (int i = 0; i < lineno - 1; i++) { - cur_line = strchr(cur_line, '\n') + 1; - } - - char *next_newline; - if ((next_newline = strchr(cur_line, '\n')) == NULL) { // This is the last line - next_newline = cur_line + strlen(cur_line); - } - return PyUnicode_DecodeUTF8(cur_line, next_newline - cur_line, "replace"); -} - -void * -_PyPegen_raise_error_known_location(Parser *p, PyObject *errtype, - Py_ssize_t lineno, Py_ssize_t col_offset, - Py_ssize_t end_lineno, Py_ssize_t end_col_offset, - const char *errmsg, va_list va) -{ - PyObject *value = NULL; - PyObject *errstr = NULL; - PyObject *error_line = NULL; - PyObject *tmp = NULL; - p->error_indicator = 1; - - if (end_lineno == CURRENT_POS) { - end_lineno = p->tok->lineno; - } - if (end_col_offset == CURRENT_POS) { - end_col_offset = p->tok->cur - p->tok->line_start; - } - - if (p->start_rule == Py_fstring_input) { - const char *fstring_msg = "f-string: "; - Py_ssize_t len = strlen(fstring_msg) + strlen(errmsg); - - char *new_errmsg = PyMem_Malloc(len + 1); // Lengths of both strings plus NULL character - if (!new_errmsg) { - return (void *) PyErr_NoMemory(); - } - - // Copy both strings into new buffer - memcpy(new_errmsg, fstring_msg, strlen(fstring_msg)); - memcpy(new_errmsg + strlen(fstring_msg), errmsg, strlen(errmsg)); - new_errmsg[len] = 0; - errmsg = new_errmsg; - } - errstr = PyUnicode_FromFormatV(errmsg, va); - if (!errstr) { - goto error; - } - - if (p->tok->fp_interactive) { - error_line = get_error_line_from_tokenizer_buffers(p, lineno); - } - else if (p->start_rule == Py_file_input) { - error_line = _PyErr_ProgramDecodedTextObject(p->tok->filename, - (int) lineno, p->tok->encoding); - } - - if (!error_line) { - /* PyErr_ProgramTextObject was not called or returned NULL. If it was not called, - then we need to find the error line from some other source, because - p->start_rule != Py_file_input. If it returned NULL, then it either unexpectedly - failed or we're parsing from a string or the REPL. There's a third edge case where - we're actually parsing from a file, which has an E_EOF SyntaxError and in that case - `PyErr_ProgramTextObject` fails because lineno points to last_file_line + 1, which - does not physically exist */ - assert(p->tok->fp == NULL || p->tok->fp == stdin || p->tok->done == E_EOF); - - if (p->tok->lineno <= lineno && p->tok->inp > p->tok->buf) { - Py_ssize_t size = p->tok->inp - p->tok->buf; - error_line = PyUnicode_DecodeUTF8(p->tok->buf, size, "replace"); - } - else if (p->tok->fp == NULL || p->tok->fp == stdin) { - error_line = get_error_line_from_tokenizer_buffers(p, lineno); - } - else { - error_line = PyUnicode_FromStringAndSize("", 0); - } - if (!error_line) { - goto error; - } - } - - if (p->start_rule == Py_fstring_input) { - col_offset -= p->starting_col_offset; - end_col_offset -= p->starting_col_offset; - } - - Py_ssize_t col_number = col_offset; - Py_ssize_t end_col_number = end_col_offset; - - if (p->tok->encoding != NULL) { - col_number = _PyPegen_byte_offset_to_character_offset(error_line, col_offset); - if (col_number < 0) { - goto error; - } - if (end_col_number > 0) { - Py_ssize_t end_col_offset = _PyPegen_byte_offset_to_character_offset(error_line, end_col_number); - if (end_col_offset < 0) { - goto error; - } else { - end_col_number = end_col_offset; - } - } - } - tmp = Py_BuildValue("(OiiNii)", p->tok->filename, lineno, col_number, error_line, end_lineno, end_col_number); - if (!tmp) { - goto error; - } - value = PyTuple_Pack(2, errstr, tmp); - Py_DECREF(tmp); - if (!value) { - goto error; - } - PyErr_SetObject(errtype, value); - - Py_DECREF(errstr); - Py_DECREF(value); - if (p->start_rule == Py_fstring_input) { - PyMem_Free((void *)errmsg); - } - return NULL; - -error: - Py_XDECREF(errstr); - Py_XDECREF(error_line); - if (p->start_rule == Py_fstring_input) { - PyMem_Free((void *)errmsg); - } - return NULL; -} - -void -_Pypegen_set_syntax_error(Parser* p, Token* last_token) { - // Existing sintax error - if (PyErr_Occurred()) { - // Prioritize tokenizer errors to custom syntax errors raised - // on the second phase only if the errors come from the parser. - if (p->tok->done == E_DONE && PyErr_ExceptionMatches(PyExc_SyntaxError)) { - _PyPegen_tokenize_full_source_to_check_for_errors(p); - } - // Propagate the existing syntax error. - return; - } - // Initialization error - if (p->fill == 0) { - RAISE_SYNTAX_ERROR("error at start before reading any input"); - } - // Parser encountered EOF (End of File) unexpectedtly - if (last_token->type == ERRORTOKEN && p->tok->done == E_EOF) { - if (p->tok->level) { - raise_unclosed_parentheses_error(p); - } else { - RAISE_SYNTAX_ERROR("unexpected EOF while parsing"); - } - return; - } - // Indentation error in the tokenizer - if (last_token->type == INDENT || last_token->type == DEDENT) { - RAISE_INDENTATION_ERROR(last_token->type == INDENT ? "unexpected indent" : "unexpected unindent"); - return; - } - // Unknown error (generic case) - - // Use the last token we found on the first pass to avoid reporting - // incorrect locations for generic syntax errors just because we reached - // further away when trying to find specific syntax errors in the second - // pass. - RAISE_SYNTAX_ERROR_KNOWN_LOCATION(last_token, "invalid syntax"); - // _PyPegen_tokenize_full_source_to_check_for_errors will override the existing - // generic SyntaxError we just raised if errors are found. - _PyPegen_tokenize_full_source_to_check_for_errors(p); -} |