diff options
author | Pablo Galindo Salgado <Pablogsal@gmail.com> | 2021-11-21 01:08:50 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-11-21 01:08:50 (GMT) |
commit | c9c4444d9f11ae80c2c4cc7d40b6718419d81a97 (patch) | |
tree | 5feef111609a1ecfe4b714e9f3ef62f3014c3552 /Parser/pegen.c | |
parent | f7638dd0f90b2afd9295ee179119f4a29859953a (diff) | |
download | cpython-c9c4444d9f11ae80c2c4cc7d40b6718419d81a97.zip cpython-c9c4444d9f11ae80c2c4cc7d40b6718419d81a97.tar.gz cpython-c9c4444d9f11ae80c2c4cc7d40b6718419d81a97.tar.bz2 |
Refactor parser compilation units into specific components (GH-29676)
Diffstat (limited to 'Parser/pegen.c')
-rw-r--r-- | Parser/pegen.c | 1936 |
1 files changed, 122 insertions, 1814 deletions
diff --git a/Parser/pegen.c b/Parser/pegen.c index b760730..4f51c63 100644 --- a/Parser/pegen.c +++ b/Parser/pegen.c @@ -1,432 +1,21 @@ #include <Python.h> #include "pycore_ast.h" // _PyAST_Validate(), #include <errcode.h> -#include "tokenizer.h" +#include "tokenizer.h" #include "pegen.h" -#include "string_parser.h" - -PyObject * -_PyPegen_new_type_comment(Parser *p, const char *s) -{ - PyObject *res = PyUnicode_DecodeUTF8(s, strlen(s), NULL); - if (res == NULL) { - return NULL; - } - if (_PyArena_AddPyObject(p->arena, res) < 0) { - Py_DECREF(res); - return NULL; - } - return res; -} - -arg_ty -_PyPegen_add_type_comment_to_arg(Parser *p, arg_ty a, Token *tc) -{ - if (tc == NULL) { - return a; - } - const char *bytes = PyBytes_AsString(tc->bytes); - if (bytes == NULL) { - return NULL; - } - PyObject *tco = _PyPegen_new_type_comment(p, bytes); - if (tco == NULL) { - return NULL; - } - return _PyAST_arg(a->arg, a->annotation, tco, - a->lineno, a->col_offset, a->end_lineno, a->end_col_offset, - p->arena); -} - -static int -init_normalization(Parser *p) -{ - if (p->normalize) { - return 1; - } - PyObject *m = PyImport_ImportModuleNoBlock("unicodedata"); - if (!m) - { - return 0; - } - p->normalize = PyObject_GetAttrString(m, "normalize"); - Py_DECREF(m); - if (!p->normalize) - { - return 0; - } - return 1; -} - -/* Checks if the NOTEQUAL token is valid given the current parser flags -0 indicates success and nonzero indicates failure (an exception may be set) */ -int -_PyPegen_check_barry_as_flufl(Parser *p, Token* t) { - assert(t->bytes != NULL); - assert(t->type == NOTEQUAL); - - const char* tok_str = PyBytes_AS_STRING(t->bytes); - if (p->flags & PyPARSE_BARRY_AS_BDFL && strcmp(tok_str, "<>") != 0) { - RAISE_SYNTAX_ERROR("with Barry as BDFL, use '<>' instead of '!='"); - return -1; - } - if (!(p->flags & PyPARSE_BARRY_AS_BDFL)) { - return strcmp(tok_str, "!="); - } - return 0; -} - -int -_PyPegen_check_legacy_stmt(Parser *p, expr_ty name) { - if (name->kind != Name_kind) { - return 0; - } - const char* candidates[2] = {"print", "exec"}; - for (int i=0; i<2; i++) { - if (PyUnicode_CompareWithASCIIString(name->v.Name.id, candidates[i]) == 0) { - return 1; - } - } - return 0; -} -PyObject * -_PyPegen_new_identifier(Parser *p, const char *n) -{ - PyObject *id = PyUnicode_DecodeUTF8(n, strlen(n), NULL); - if (!id) { - goto error; - } - /* PyUnicode_DecodeUTF8 should always return a ready string. */ - assert(PyUnicode_IS_READY(id)); - /* Check whether there are non-ASCII characters in the - identifier; if so, normalize to NFKC. */ - if (!PyUnicode_IS_ASCII(id)) - { - PyObject *id2; - if (!init_normalization(p)) - { - Py_DECREF(id); - goto error; - } - PyObject *form = PyUnicode_InternFromString("NFKC"); - if (form == NULL) - { - Py_DECREF(id); - goto error; - } - PyObject *args[2] = {form, id}; - id2 = _PyObject_FastCall(p->normalize, args, 2); - Py_DECREF(id); - Py_DECREF(form); - if (!id2) { - goto error; - } - if (!PyUnicode_Check(id2)) - { - PyErr_Format(PyExc_TypeError, - "unicodedata.normalize() must return a string, not " - "%.200s", - _PyType_Name(Py_TYPE(id2))); - Py_DECREF(id2); - goto error; - } - id = id2; - } - PyUnicode_InternInPlace(&id); - if (_PyArena_AddPyObject(p->arena, id) < 0) - { - Py_DECREF(id); - goto error; - } - return id; +// Internal parser functions -error: - p->error_indicator = 1; - return NULL; -} - -static PyObject * -_create_dummy_identifier(Parser *p) -{ - return _PyPegen_new_identifier(p, ""); -} - -const char * -_PyPegen_get_expr_name(expr_ty e) -{ - assert(e != NULL); - switch (e->kind) { - case Attribute_kind: - return "attribute"; - case Subscript_kind: - return "subscript"; - case Starred_kind: - return "starred"; - case Name_kind: - return "name"; - case List_kind: - return "list"; - case Tuple_kind: - return "tuple"; - case Lambda_kind: - return "lambda"; - case Call_kind: - return "function call"; - case BoolOp_kind: - case BinOp_kind: - case UnaryOp_kind: - return "expression"; - case GeneratorExp_kind: - return "generator expression"; - case Yield_kind: - case YieldFrom_kind: - return "yield expression"; - case Await_kind: - return "await expression"; - case ListComp_kind: - return "list comprehension"; - case SetComp_kind: - return "set comprehension"; - case DictComp_kind: - return "dict comprehension"; - case Dict_kind: - return "dict literal"; - case Set_kind: - return "set display"; - case JoinedStr_kind: - case FormattedValue_kind: - return "f-string expression"; - case Constant_kind: { - PyObject *value = e->v.Constant.value; - if (value == Py_None) { - return "None"; - } - if (value == Py_False) { - return "False"; - } - if (value == Py_True) { - return "True"; - } - if (value == Py_Ellipsis) { - return "ellipsis"; - } - return "literal"; - } - case Compare_kind: - return "comparison"; - case IfExp_kind: - return "conditional expression"; - case NamedExpr_kind: - return "named expression"; - default: - PyErr_Format(PyExc_SystemError, - "unexpected expression in assignment %d (line %d)", - e->kind, e->lineno); - return NULL; - } -} - -static int -raise_decode_error(Parser *p) -{ - assert(PyErr_Occurred()); - const char *errtype = NULL; - if (PyErr_ExceptionMatches(PyExc_UnicodeError)) { - errtype = "unicode error"; - } - else if (PyErr_ExceptionMatches(PyExc_ValueError)) { - errtype = "value error"; - } - if (errtype) { - PyObject *type; - PyObject *value; - PyObject *tback; - PyObject *errstr; - PyErr_Fetch(&type, &value, &tback); - errstr = PyObject_Str(value); - if (errstr) { - RAISE_SYNTAX_ERROR("(%s) %U", errtype, errstr); - Py_DECREF(errstr); - } - else { - PyErr_Clear(); - RAISE_SYNTAX_ERROR("(%s) unknown error", errtype); - } - Py_XDECREF(type); - Py_XDECREF(value); - Py_XDECREF(tback); - } - - return -1; -} - -static inline void -raise_unclosed_parentheses_error(Parser *p) { - int error_lineno = p->tok->parenlinenostack[p->tok->level-1]; - int error_col = p->tok->parencolstack[p->tok->level-1]; - RAISE_ERROR_KNOWN_LOCATION(p, PyExc_SyntaxError, - error_lineno, error_col, error_lineno, -1, - "'%c' was never closed", - p->tok->parenstack[p->tok->level-1]); -} - -static void -raise_tokenizer_init_error(PyObject *filename) -{ - if (!(PyErr_ExceptionMatches(PyExc_LookupError) - || PyErr_ExceptionMatches(PyExc_SyntaxError) - || PyErr_ExceptionMatches(PyExc_ValueError) - || PyErr_ExceptionMatches(PyExc_UnicodeDecodeError))) { - return; - } - PyObject *errstr = NULL; - PyObject *tuple = NULL; - PyObject *type; - PyObject *value; - PyObject *tback; - PyErr_Fetch(&type, &value, &tback); - errstr = PyObject_Str(value); - if (!errstr) { - goto error; - } - - PyObject *tmp = Py_BuildValue("(OiiO)", filename, 0, -1, Py_None); - if (!tmp) { - goto error; - } - - tuple = PyTuple_Pack(2, errstr, tmp); - Py_DECREF(tmp); - if (!value) { - goto error; - } - PyErr_SetObject(PyExc_SyntaxError, tuple); - -error: - Py_XDECREF(type); - Py_XDECREF(value); - Py_XDECREF(tback); - Py_XDECREF(errstr); - Py_XDECREF(tuple); -} - -static int -tokenizer_error(Parser *p) -{ - if (PyErr_Occurred()) { - return -1; - } - - const char *msg = NULL; - PyObject* errtype = PyExc_SyntaxError; - Py_ssize_t col_offset = -1; - switch (p->tok->done) { - case E_TOKEN: - msg = "invalid token"; - break; - case E_EOF: - if (p->tok->level) { - raise_unclosed_parentheses_error(p); - } else { - RAISE_SYNTAX_ERROR("unexpected EOF while parsing"); - } - return -1; - case E_DEDENT: - RAISE_INDENTATION_ERROR("unindent does not match any outer indentation level"); - return -1; - case E_INTR: - if (!PyErr_Occurred()) { - PyErr_SetNone(PyExc_KeyboardInterrupt); - } - return -1; - case E_NOMEM: - PyErr_NoMemory(); - return -1; - case E_TABSPACE: - errtype = PyExc_TabError; - msg = "inconsistent use of tabs and spaces in indentation"; - break; - case E_TOODEEP: - errtype = PyExc_IndentationError; - msg = "too many levels of indentation"; - break; - case E_LINECONT: { - col_offset = p->tok->cur - p->tok->buf - 1; - msg = "unexpected character after line continuation character"; - break; - } - default: - msg = "unknown parsing error"; - } - - RAISE_ERROR_KNOWN_LOCATION(p, errtype, p->tok->lineno, - col_offset >= 0 ? col_offset : 0, - p->tok->lineno, -1, msg); - return -1; -} - -void * -_PyPegen_raise_error(Parser *p, PyObject *errtype, const char *errmsg, ...) +asdl_stmt_seq* +_PyPegen_interactive_exit(Parser *p) { - if (p->fill == 0) { - va_list va; - va_start(va, errmsg); - _PyPegen_raise_error_known_location(p, errtype, 0, 0, 0, -1, errmsg, va); - va_end(va); - return NULL; - } - - Token *t = p->known_err_token != NULL ? p->known_err_token : p->tokens[p->fill - 1]; - Py_ssize_t col_offset; - Py_ssize_t end_col_offset = -1; - if (t->col_offset == -1) { - if (p->tok->cur == p->tok->buf) { - col_offset = 0; - } else { - const char* start = p->tok->buf ? p->tok->line_start : p->tok->buf; - col_offset = Py_SAFE_DOWNCAST(p->tok->cur - start, intptr_t, int); - } - } else { - col_offset = t->col_offset + 1; - } - - if (t->end_col_offset != -1) { - end_col_offset = t->end_col_offset + 1; + if (p->errcode) { + *(p->errcode) = E_EOF; } - - va_list va; - va_start(va, errmsg); - _PyPegen_raise_error_known_location(p, errtype, t->lineno, col_offset, t->end_lineno, end_col_offset, errmsg, va); - va_end(va); - return NULL; } -static PyObject * -get_error_line(Parser *p, Py_ssize_t lineno) -{ - /* If the file descriptor is interactive, the source lines of the current - * (multi-line) statement are stored in p->tok->interactive_src_start. - * If not, we're parsing from a string, which means that the whole source - * is stored in p->tok->str. */ - assert(p->tok->fp == NULL || p->tok->fp == stdin); - - char *cur_line = p->tok->fp_interactive ? p->tok->interactive_src_start : p->tok->str; - assert(cur_line != NULL); - - for (int i = 0; i < lineno - 1; i++) { - cur_line = strchr(cur_line, '\n') + 1; - } - - char *next_newline; - if ((next_newline = strchr(cur_line, '\n')) == NULL) { // This is the last line - next_newline = cur_line + strlen(cur_line); - } - return PyUnicode_DecodeUTF8(cur_line, next_newline - cur_line, "replace"); -} - Py_ssize_t _PyPegen_byte_offset_to_character_offset(PyObject *line, Py_ssize_t col_offset) { @@ -448,127 +37,6 @@ _PyPegen_byte_offset_to_character_offset(PyObject *line, Py_ssize_t col_offset) return size; } -void * -_PyPegen_raise_error_known_location(Parser *p, PyObject *errtype, - Py_ssize_t lineno, Py_ssize_t col_offset, - Py_ssize_t end_lineno, Py_ssize_t end_col_offset, - const char *errmsg, va_list va) -{ - PyObject *value = NULL; - PyObject *errstr = NULL; - PyObject *error_line = NULL; - PyObject *tmp = NULL; - p->error_indicator = 1; - - if (end_lineno == CURRENT_POS) { - end_lineno = p->tok->lineno; - } - if (end_col_offset == CURRENT_POS) { - end_col_offset = p->tok->cur - p->tok->line_start; - } - - if (p->start_rule == Py_fstring_input) { - const char *fstring_msg = "f-string: "; - Py_ssize_t len = strlen(fstring_msg) + strlen(errmsg); - - char *new_errmsg = PyMem_Malloc(len + 1); // Lengths of both strings plus NULL character - if (!new_errmsg) { - return (void *) PyErr_NoMemory(); - } - - // Copy both strings into new buffer - memcpy(new_errmsg, fstring_msg, strlen(fstring_msg)); - memcpy(new_errmsg + strlen(fstring_msg), errmsg, strlen(errmsg)); - new_errmsg[len] = 0; - errmsg = new_errmsg; - } - errstr = PyUnicode_FromFormatV(errmsg, va); - if (!errstr) { - goto error; - } - - if (p->tok->fp_interactive) { - error_line = get_error_line(p, lineno); - } - else if (p->start_rule == Py_file_input) { - error_line = _PyErr_ProgramDecodedTextObject(p->tok->filename, - (int) lineno, p->tok->encoding); - } - - if (!error_line) { - /* PyErr_ProgramTextObject was not called or returned NULL. If it was not called, - then we need to find the error line from some other source, because - p->start_rule != Py_file_input. If it returned NULL, then it either unexpectedly - failed or we're parsing from a string or the REPL. There's a third edge case where - we're actually parsing from a file, which has an E_EOF SyntaxError and in that case - `PyErr_ProgramTextObject` fails because lineno points to last_file_line + 1, which - does not physically exist */ - assert(p->tok->fp == NULL || p->tok->fp == stdin || p->tok->done == E_EOF); - - if (p->tok->lineno <= lineno && p->tok->inp > p->tok->buf) { - Py_ssize_t size = p->tok->inp - p->tok->buf; - error_line = PyUnicode_DecodeUTF8(p->tok->buf, size, "replace"); - } - else if (p->tok->fp == NULL || p->tok->fp == stdin) { - error_line = get_error_line(p, lineno); - } - else { - error_line = PyUnicode_FromStringAndSize("", 0); - } - if (!error_line) { - goto error; - } - } - - if (p->start_rule == Py_fstring_input) { - col_offset -= p->starting_col_offset; - end_col_offset -= p->starting_col_offset; - } - - Py_ssize_t col_number = col_offset; - Py_ssize_t end_col_number = end_col_offset; - - if (p->tok->encoding != NULL) { - col_number = _PyPegen_byte_offset_to_character_offset(error_line, col_offset); - if (col_number < 0) { - goto error; - } - if (end_col_number > 0) { - Py_ssize_t end_col_offset = _PyPegen_byte_offset_to_character_offset(error_line, end_col_number); - if (end_col_offset < 0) { - goto error; - } else { - end_col_number = end_col_offset; - } - } - } - tmp = Py_BuildValue("(OiiNii)", p->tok->filename, lineno, col_number, error_line, end_lineno, end_col_number); - if (!tmp) { - goto error; - } - value = PyTuple_Pack(2, errstr, tmp); - Py_DECREF(tmp); - if (!value) { - goto error; - } - PyErr_SetObject(errtype, value); - - Py_DECREF(errstr); - Py_DECREF(value); - if (p->start_rule == Py_fstring_input) { - PyMem_Free((void *)errmsg); - } - return NULL; - -error: - Py_XDECREF(errstr); - Py_XDECREF(error_line); - if (p->start_rule == Py_fstring_input) { - PyMem_Free((void *)errmsg); - } - return NULL; -} - #if 0 static const char * token_name(int type) @@ -614,39 +82,24 @@ _PyPegen_update_memo(Parser *p, int mark, int type, void *node) return _PyPegen_insert_memo(p, mark, type, node); } -// Return dummy NAME. -void * -_PyPegen_dummy_name(Parser *p, ...) -{ - static void *cache = NULL; - - if (cache != NULL) { - return cache; - } - - PyObject *id = _create_dummy_identifier(p); - if (!id) { - return NULL; - } - cache = _PyAST_Name(id, Load, 1, 0, 1, 0, p->arena); - return cache; -} - static int -_get_keyword_or_name_type(Parser *p, const char *name, int name_len) +init_normalization(Parser *p) { - assert(name_len > 0); - if (name_len >= p->n_keyword_lists || - p->keywords[name_len] == NULL || - p->keywords[name_len]->type == -1) { - return NAME; + if (p->normalize) { + return 1; } - for (KeywordToken *k = p->keywords[name_len]; k != NULL && k->type != -1; k++) { - if (strncmp(k->str, name, name_len) == 0) { - return k->type; - } + PyObject *m = PyImport_ImportModuleNoBlock("unicodedata"); + if (!m) + { + return 0; } - return NAME; + p->normalize = PyObject_GetAttrString(m, "normalize"); + Py_DECREF(m); + if (!p->normalize) + { + return 0; + } + return 1; } static int @@ -686,6 +139,23 @@ growable_comment_array_deallocate(growable_comment_array *arr) { } static int +_get_keyword_or_name_type(Parser *p, const char *name, int name_len) +{ + assert(name_len > 0); + if (name_len >= p->n_keyword_lists || + p->keywords[name_len] == NULL || + p->keywords[name_len]->type == -1) { + return NAME; + } + for (KeywordToken *k = p->keywords[name_len]; k != NULL && k->type != -1; k++) { + if (strncmp(k->str, name, name_len) == 0) { + return k->type; + } + } + return NAME; +} + +static int initialize_token(Parser *p, Token *token, const char *start, const char *end, int token_type) { assert(token != NULL); @@ -715,10 +185,10 @@ initialize_token(Parser *p, Token *token, const char *start, const char *end, in p->fill += 1; if (token_type == ERRORTOKEN && p->tok->done == E_DECODE) { - return raise_decode_error(p); + return _Pypegen_raise_decode_error(p); } - return (token_type == ERRORTOKEN ? tokenizer_error(p) : 0); + return (token_type == ERRORTOKEN ? _Pypegen_tokenizer_error(p) : 0); } static int @@ -791,7 +261,6 @@ _PyPegen_fill_token(Parser *p) return initialize_token(p, t, start, end, type); } - #if defined(Py_DEBUG) // Instrumentation to count the effectiveness of memoization. // The array counts the number of tokens skipped by memoization, @@ -989,6 +458,62 @@ _PyPegen_get_last_nonnwhitespace_token(Parser *p) return token; } +PyObject * +_PyPegen_new_identifier(Parser *p, const char *n) +{ + PyObject *id = PyUnicode_DecodeUTF8(n, strlen(n), NULL); + if (!id) { + goto error; + } + /* PyUnicode_DecodeUTF8 should always return a ready string. */ + assert(PyUnicode_IS_READY(id)); + /* Check whether there are non-ASCII characters in the + identifier; if so, normalize to NFKC. */ + if (!PyUnicode_IS_ASCII(id)) + { + PyObject *id2; + if (!init_normalization(p)) + { + Py_DECREF(id); + goto error; + } + PyObject *form = PyUnicode_InternFromString("NFKC"); + if (form == NULL) + { + Py_DECREF(id); + goto error; + } + PyObject *args[2] = {form, id}; + id2 = _PyObject_FastCall(p->normalize, args, 2); + Py_DECREF(id); + Py_DECREF(form); + if (!id2) { + goto error; + } + if (!PyUnicode_Check(id2)) + { + PyErr_Format(PyExc_TypeError, + "unicodedata.normalize() must return a string, not " + "%.200s", + _PyType_Name(Py_TYPE(id2))); + Py_DECREF(id2); + goto error; + } + id = id2; + } + PyUnicode_InternInPlace(&id); + if (_PyArena_AddPyObject(p->arena, id) < 0) + { + Py_DECREF(id); + goto error; + } + return id; + +error: + p->error_indicator = 1; + return NULL; +} + static expr_ty _PyPegen_name_from_token(Parser *p, Token* t) { @@ -1009,7 +534,6 @@ _PyPegen_name_from_token(Parser *p, Token* t) t->end_col_offset, p->arena); } - expr_ty _PyPegen_name_token(Parser *p) { @@ -1023,7 +547,6 @@ _PyPegen_string_token(Parser *p) return _PyPegen_expect_token(p, STRING); } - expr_ty _PyPegen_soft_keyword_token(Parser *p) { Token *t = _PyPegen_expect_token(p, NAME); if (t == NULL) { @@ -1197,18 +720,6 @@ bad_single_statement(Parser *p) } } -void -_PyPegen_Parser_Free(Parser *p) -{ - Py_XDECREF(p->normalize); - for (int i = 0; i < p->size; i++) { - PyMem_Free(p->tokens[i]); - } - PyMem_Free(p->tokens); - growable_comment_array_deallocate(&p->type_ignore_comments); - PyMem_Free(p); -} - static int compute_parser_flags(PyCompilerFlags *flags) { @@ -1234,6 +745,8 @@ compute_parser_flags(PyCompilerFlags *flags) return parser_flags; } +// Parser API + Parser * _PyPegen_Parser_New(struct tok_state *tok, int start_rule, int flags, int feature_version, int *errcode, PyArena *arena) @@ -1289,8 +802,20 @@ _PyPegen_Parser_New(struct tok_state *tok, int start_rule, int flags, return p; } +void +_PyPegen_Parser_Free(Parser *p) +{ + Py_XDECREF(p->normalize); + for (int i = 0; i < p->size; i++) { + PyMem_Free(p->tokens[i]); + } + PyMem_Free(p->tokens); + growable_comment_array_deallocate(&p->type_ignore_comments); + PyMem_Free(p); +} + static void -reset_parser_state(Parser *p) +reset_parser_state_for_error_pass(Parser *p) { for (int i = 0; i < p->fill; i++) { p->tokens[i]->memo = NULL; @@ -1302,60 +827,6 @@ reset_parser_state(Parser *p) p->tok->interactive_underflow = IUNDERFLOW_STOP; } -static int -_PyPegen_check_tokenizer_errors(Parser *p) { - // Tokenize the whole input to see if there are any tokenization - // errors such as mistmatching parentheses. These will get priority - // over generic syntax errors only if the line number of the error is - // before the one that we had for the generic error. - - // We don't want to tokenize to the end for interactive input - if (p->tok->prompt != NULL) { - return 0; - } - - PyObject *type, *value, *traceback; - PyErr_Fetch(&type, &value, &traceback); - - Token *current_token = p->known_err_token != NULL ? p->known_err_token : p->tokens[p->fill - 1]; - Py_ssize_t current_err_line = current_token->lineno; - - int ret = 0; - - for (;;) { - const char *start; - const char *end; - switch (_PyTokenizer_Get(p->tok, &start, &end)) { - case ERRORTOKEN: - if (p->tok->level != 0) { - int error_lineno = p->tok->parenlinenostack[p->tok->level-1]; - if (current_err_line > error_lineno) { - raise_unclosed_parentheses_error(p); - ret = -1; - goto exit; - } - } - break; - case ENDMARKER: - break; - default: - continue; - } - break; - } - - -exit: - if (PyErr_Occurred()) { - Py_XDECREF(value); - Py_XDECREF(type); - Py_XDECREF(traceback); - } else { - PyErr_Restore(type, value, traceback); - } - return ret; -} - void * _PyPegen_run_parser(Parser *p) { @@ -1364,46 +835,17 @@ _PyPegen_run_parser(Parser *p) if (PyErr_Occurred() && !PyErr_ExceptionMatches(PyExc_SyntaxError)) { return NULL; } + // Make a second parser pass. In this pass we activate heavier and slower checks + // to produce better error messages and more complete diagnostics. Extra "invalid_*" + // rules will be active during parsing. Token *last_token = p->tokens[p->fill - 1]; - reset_parser_state(p); + reset_parser_state_for_error_pass(p); _PyPegen_parse(p); - if (PyErr_Occurred()) { - // Prioritize tokenizer errors to custom syntax errors raised - // on the second phase only if the errors come from the parser. - if (p->tok->done == E_DONE && PyErr_ExceptionMatches(PyExc_SyntaxError)) { - _PyPegen_check_tokenizer_errors(p); - } - return NULL; - } - if (p->fill == 0) { - RAISE_SYNTAX_ERROR("error at start before reading any input"); - } - else if (p->tok->done == E_EOF) { - if (p->tok->level) { - raise_unclosed_parentheses_error(p); - } else { - RAISE_SYNTAX_ERROR("unexpected EOF while parsing"); - } - } - else { - if (p->tokens[p->fill-1]->type == INDENT) { - RAISE_INDENTATION_ERROR("unexpected indent"); - } - else if (p->tokens[p->fill-1]->type == DEDENT) { - RAISE_INDENTATION_ERROR("unexpected unindent"); - } - else { - // Use the last token we found on the first pass to avoid reporting - // incorrect locations for generic syntax errors just because we reached - // further away when trying to find specific syntax errors in the second - // pass. - RAISE_SYNTAX_ERROR_KNOWN_LOCATION(last_token, "invalid syntax"); - // _PyPegen_check_tokenizer_errors will override the existing - // generic SyntaxError we just raised if errors are found. - _PyPegen_check_tokenizer_errors(p); - } - } - return NULL; + + // Set SyntaxErrors accordingly depending on the parser/tokenizer status at the failure + // point. + _Pypegen_set_syntax_error(p, last_token); + return NULL; } if (p->start_rule == Py_single_input && bad_single_statement(p)) { @@ -1433,7 +875,7 @@ _PyPegen_run_parser_from_file_pointer(FILE *fp, int start_rule, PyObject *filena struct tok_state *tok = _PyTokenizer_FromFile(fp, enc, ps1, ps2); if (tok == NULL) { if (PyErr_Occurred()) { - raise_tokenizer_init_error(filename_ob); + _PyPegen_raise_tokenizer_init_error(filename_ob); return NULL; } return NULL; @@ -1478,7 +920,7 @@ _PyPegen_run_parser_from_string(const char *str, int start_rule, PyObject *filen } if (tok == NULL) { if (PyErr_Occurred()) { - raise_tokenizer_init_error(filename_ob); + _PyPegen_raise_tokenizer_init_error(filename_ob); } return NULL; } @@ -1504,1138 +946,4 @@ _PyPegen_run_parser_from_string(const char *str, int start_rule, PyObject *filen error: _PyTokenizer_Free(tok); return result; -} - -asdl_stmt_seq* -_PyPegen_interactive_exit(Parser *p) -{ - if (p->errcode) { - *(p->errcode) = E_EOF; - } - return NULL; -} - -/* Creates a single-element asdl_seq* that contains a */ -asdl_seq * -_PyPegen_singleton_seq(Parser *p, void *a) -{ - assert(a != NULL); - asdl_seq *seq = (asdl_seq*)_Py_asdl_generic_seq_new(1, p->arena); - if (!seq) { - return NULL; - } - asdl_seq_SET_UNTYPED(seq, 0, a); - return seq; -} - -/* Creates a copy of seq and prepends a to it */ -asdl_seq * -_PyPegen_seq_insert_in_front(Parser *p, void *a, asdl_seq *seq) -{ - assert(a != NULL); - if (!seq) { - return _PyPegen_singleton_seq(p, a); - } - - asdl_seq *new_seq = (asdl_seq*)_Py_asdl_generic_seq_new(asdl_seq_LEN(seq) + 1, p->arena); - if (!new_seq) { - return NULL; - } - - asdl_seq_SET_UNTYPED(new_seq, 0, a); - for (Py_ssize_t i = 1, l = asdl_seq_LEN(new_seq); i < l; i++) { - asdl_seq_SET_UNTYPED(new_seq, i, asdl_seq_GET_UNTYPED(seq, i - 1)); - } - return new_seq; -} - -/* Creates a copy of seq and appends a to it */ -asdl_seq * -_PyPegen_seq_append_to_end(Parser *p, asdl_seq *seq, void *a) -{ - assert(a != NULL); - if (!seq) { - return _PyPegen_singleton_seq(p, a); - } - - asdl_seq *new_seq = (asdl_seq*)_Py_asdl_generic_seq_new(asdl_seq_LEN(seq) + 1, p->arena); - if (!new_seq) { - return NULL; - } - - for (Py_ssize_t i = 0, l = asdl_seq_LEN(new_seq); i + 1 < l; i++) { - asdl_seq_SET_UNTYPED(new_seq, i, asdl_seq_GET_UNTYPED(seq, i)); - } - asdl_seq_SET_UNTYPED(new_seq, asdl_seq_LEN(new_seq) - 1, a); - return new_seq; -} - -static Py_ssize_t -_get_flattened_seq_size(asdl_seq *seqs) -{ - Py_ssize_t size = 0; - for (Py_ssize_t i = 0, l = asdl_seq_LEN(seqs); i < l; i++) { - asdl_seq *inner_seq = asdl_seq_GET_UNTYPED(seqs, i); - size += asdl_seq_LEN(inner_seq); - } - return size; -} - -/* Flattens an asdl_seq* of asdl_seq*s */ -asdl_seq * -_PyPegen_seq_flatten(Parser *p, asdl_seq *seqs) -{ - Py_ssize_t flattened_seq_size = _get_flattened_seq_size(seqs); - assert(flattened_seq_size > 0); - - asdl_seq *flattened_seq = (asdl_seq*)_Py_asdl_generic_seq_new(flattened_seq_size, p->arena); - if (!flattened_seq) { - return NULL; - } - - int flattened_seq_idx = 0; - for (Py_ssize_t i = 0, l = asdl_seq_LEN(seqs); i < l; i++) { - asdl_seq *inner_seq = asdl_seq_GET_UNTYPED(seqs, i); - for (Py_ssize_t j = 0, li = asdl_seq_LEN(inner_seq); j < li; j++) { - asdl_seq_SET_UNTYPED(flattened_seq, flattened_seq_idx++, asdl_seq_GET_UNTYPED(inner_seq, j)); - } - } - assert(flattened_seq_idx == flattened_seq_size); - - return flattened_seq; -} - -void * -_PyPegen_seq_last_item(asdl_seq *seq) -{ - Py_ssize_t len = asdl_seq_LEN(seq); - return asdl_seq_GET_UNTYPED(seq, len - 1); -} - -void * -_PyPegen_seq_first_item(asdl_seq *seq) -{ - return asdl_seq_GET_UNTYPED(seq, 0); -} - - -/* Creates a new name of the form <first_name>.<second_name> */ -expr_ty -_PyPegen_join_names_with_dot(Parser *p, expr_ty first_name, expr_ty second_name) -{ - assert(first_name != NULL && second_name != NULL); - PyObject *first_identifier = first_name->v.Name.id; - PyObject *second_identifier = second_name->v.Name.id; - - if (PyUnicode_READY(first_identifier) == -1) { - return NULL; - } - if (PyUnicode_READY(second_identifier) == -1) { - return NULL; - } - const char *first_str = PyUnicode_AsUTF8(first_identifier); - if (!first_str) { - return NULL; - } - const char *second_str = PyUnicode_AsUTF8(second_identifier); - if (!second_str) { - return NULL; - } - Py_ssize_t len = strlen(first_str) + strlen(second_str) + 1; // +1 for the dot - - PyObject *str = PyBytes_FromStringAndSize(NULL, len); - if (!str) { - return NULL; - } - - char *s = PyBytes_AS_STRING(str); - if (!s) { - return NULL; - } - - strcpy(s, first_str); - s += strlen(first_str); - *s++ = '.'; - strcpy(s, second_str); - s += strlen(second_str); - *s = '\0'; - - PyObject *uni = PyUnicode_DecodeUTF8(PyBytes_AS_STRING(str), PyBytes_GET_SIZE(str), NULL); - Py_DECREF(str); - if (!uni) { - return NULL; - } - PyUnicode_InternInPlace(&uni); - if (_PyArena_AddPyObject(p->arena, uni) < 0) { - Py_DECREF(uni); - return NULL; - } - - return _PyAST_Name(uni, Load, EXTRA_EXPR(first_name, second_name)); -} - -/* Counts the total number of dots in seq's tokens */ -int -_PyPegen_seq_count_dots(asdl_seq *seq) -{ - int number_of_dots = 0; - for (Py_ssize_t i = 0, l = asdl_seq_LEN(seq); i < l; i++) { - Token *current_expr = asdl_seq_GET_UNTYPED(seq, i); - switch (current_expr->type) { - case ELLIPSIS: - number_of_dots += 3; - break; - case DOT: - number_of_dots += 1; - break; - default: - Py_UNREACHABLE(); - } - } - - return number_of_dots; -} - -/* Creates an alias with '*' as the identifier name */ -alias_ty -_PyPegen_alias_for_star(Parser *p, int lineno, int col_offset, int end_lineno, - int end_col_offset, PyArena *arena) { - PyObject *str = PyUnicode_InternFromString("*"); - if (!str) { - return NULL; - } - if (_PyArena_AddPyObject(p->arena, str) < 0) { - Py_DECREF(str); - return NULL; - } - return _PyAST_alias(str, NULL, lineno, col_offset, end_lineno, end_col_offset, arena); -} - -/* Creates a new asdl_seq* with the identifiers of all the names in seq */ -asdl_identifier_seq * -_PyPegen_map_names_to_ids(Parser *p, asdl_expr_seq *seq) -{ - Py_ssize_t len = asdl_seq_LEN(seq); - assert(len > 0); - - asdl_identifier_seq *new_seq = _Py_asdl_identifier_seq_new(len, p->arena); - if (!new_seq) { - return NULL; - } - for (Py_ssize_t i = 0; i < len; i++) { - expr_ty e = asdl_seq_GET(seq, i); - asdl_seq_SET(new_seq, i, e->v.Name.id); - } - return new_seq; -} - -/* Constructs a CmpopExprPair */ -CmpopExprPair * -_PyPegen_cmpop_expr_pair(Parser *p, cmpop_ty cmpop, expr_ty expr) -{ - assert(expr != NULL); - CmpopExprPair *a = _PyArena_Malloc(p->arena, sizeof(CmpopExprPair)); - if (!a) { - return NULL; - } - a->cmpop = cmpop; - a->expr = expr; - return a; -} - -asdl_int_seq * -_PyPegen_get_cmpops(Parser *p, asdl_seq *seq) -{ - Py_ssize_t len = asdl_seq_LEN(seq); - assert(len > 0); - - asdl_int_seq *new_seq = _Py_asdl_int_seq_new(len, p->arena); - if (!new_seq) { - return NULL; - } - for (Py_ssize_t i = 0; i < len; i++) { - CmpopExprPair *pair = asdl_seq_GET_UNTYPED(seq, i); - asdl_seq_SET(new_seq, i, pair->cmpop); - } - return new_seq; -} - -asdl_expr_seq * -_PyPegen_get_exprs(Parser *p, asdl_seq *seq) -{ - Py_ssize_t len = asdl_seq_LEN(seq); - assert(len > 0); - - asdl_expr_seq *new_seq = _Py_asdl_expr_seq_new(len, p->arena); - if (!new_seq) { - return NULL; - } - for (Py_ssize_t i = 0; i < len; i++) { - CmpopExprPair *pair = asdl_seq_GET_UNTYPED(seq, i); - asdl_seq_SET(new_seq, i, pair->expr); - } - return new_seq; -} - -/* Creates an asdl_seq* where all the elements have been changed to have ctx as context */ -static asdl_expr_seq * -_set_seq_context(Parser *p, asdl_expr_seq *seq, expr_context_ty ctx) -{ - Py_ssize_t len = asdl_seq_LEN(seq); - if (len == 0) { - return NULL; - } - - asdl_expr_seq *new_seq = _Py_asdl_expr_seq_new(len, p->arena); - if (!new_seq) { - return NULL; - } - for (Py_ssize_t i = 0; i < len; i++) { - expr_ty e = asdl_seq_GET(seq, i); - asdl_seq_SET(new_seq, i, _PyPegen_set_expr_context(p, e, ctx)); - } - return new_seq; -} - -static expr_ty -_set_name_context(Parser *p, expr_ty e, expr_context_ty ctx) -{ - return _PyAST_Name(e->v.Name.id, ctx, EXTRA_EXPR(e, e)); -} - -static expr_ty -_set_tuple_context(Parser *p, expr_ty e, expr_context_ty ctx) -{ - return _PyAST_Tuple( - _set_seq_context(p, e->v.Tuple.elts, ctx), - ctx, - EXTRA_EXPR(e, e)); -} - -static expr_ty -_set_list_context(Parser *p, expr_ty e, expr_context_ty ctx) -{ - return _PyAST_List( - _set_seq_context(p, e->v.List.elts, ctx), - ctx, - EXTRA_EXPR(e, e)); -} - -static expr_ty -_set_subscript_context(Parser *p, expr_ty e, expr_context_ty ctx) -{ - return _PyAST_Subscript(e->v.Subscript.value, e->v.Subscript.slice, - ctx, EXTRA_EXPR(e, e)); -} - -static expr_ty -_set_attribute_context(Parser *p, expr_ty e, expr_context_ty ctx) -{ - return _PyAST_Attribute(e->v.Attribute.value, e->v.Attribute.attr, - ctx, EXTRA_EXPR(e, e)); -} - -static expr_ty -_set_starred_context(Parser *p, expr_ty e, expr_context_ty ctx) -{ - return _PyAST_Starred(_PyPegen_set_expr_context(p, e->v.Starred.value, ctx), - ctx, EXTRA_EXPR(e, e)); -} - -/* Creates an `expr_ty` equivalent to `expr` but with `ctx` as context */ -expr_ty -_PyPegen_set_expr_context(Parser *p, expr_ty expr, expr_context_ty ctx) -{ - assert(expr != NULL); - - expr_ty new = NULL; - switch (expr->kind) { - case Name_kind: - new = _set_name_context(p, expr, ctx); - break; - case Tuple_kind: - new = _set_tuple_context(p, expr, ctx); - break; - case List_kind: - new = _set_list_context(p, expr, ctx); - break; - case Subscript_kind: - new = _set_subscript_context(p, expr, ctx); - break; - case Attribute_kind: - new = _set_attribute_context(p, expr, ctx); - break; - case Starred_kind: - new = _set_starred_context(p, expr, ctx); - break; - default: - new = expr; - } - return new; -} - -/* Constructs a KeyValuePair that is used when parsing a dict's key value pairs */ -KeyValuePair * -_PyPegen_key_value_pair(Parser *p, expr_ty key, expr_ty value) -{ - KeyValuePair *a = _PyArena_Malloc(p->arena, sizeof(KeyValuePair)); - if (!a) { - return NULL; - } - a->key = key; - a->value = value; - return a; -} - -/* Extracts all keys from an asdl_seq* of KeyValuePair*'s */ -asdl_expr_seq * -_PyPegen_get_keys(Parser *p, asdl_seq *seq) -{ - Py_ssize_t len = asdl_seq_LEN(seq); - asdl_expr_seq *new_seq = _Py_asdl_expr_seq_new(len, p->arena); - if (!new_seq) { - return NULL; - } - for (Py_ssize_t i = 0; i < len; i++) { - KeyValuePair *pair = asdl_seq_GET_UNTYPED(seq, i); - asdl_seq_SET(new_seq, i, pair->key); - } - return new_seq; -} - -/* Extracts all values from an asdl_seq* of KeyValuePair*'s */ -asdl_expr_seq * -_PyPegen_get_values(Parser *p, asdl_seq *seq) -{ - Py_ssize_t len = asdl_seq_LEN(seq); - asdl_expr_seq *new_seq = _Py_asdl_expr_seq_new(len, p->arena); - if (!new_seq) { - return NULL; - } - for (Py_ssize_t i = 0; i < len; i++) { - KeyValuePair *pair = asdl_seq_GET_UNTYPED(seq, i); - asdl_seq_SET(new_seq, i, pair->value); - } - return new_seq; -} - -/* Constructs a KeyPatternPair that is used when parsing mapping & class patterns */ -KeyPatternPair * -_PyPegen_key_pattern_pair(Parser *p, expr_ty key, pattern_ty pattern) -{ - KeyPatternPair *a = _PyArena_Malloc(p->arena, sizeof(KeyPatternPair)); - if (!a) { - return NULL; - } - a->key = key; - a->pattern = pattern; - return a; -} - -/* Extracts all keys from an asdl_seq* of KeyPatternPair*'s */ -asdl_expr_seq * -_PyPegen_get_pattern_keys(Parser *p, asdl_seq *seq) -{ - Py_ssize_t len = asdl_seq_LEN(seq); - asdl_expr_seq *new_seq = _Py_asdl_expr_seq_new(len, p->arena); - if (!new_seq) { - return NULL; - } - for (Py_ssize_t i = 0; i < len; i++) { - KeyPatternPair *pair = asdl_seq_GET_UNTYPED(seq, i); - asdl_seq_SET(new_seq, i, pair->key); - } - return new_seq; -} - -/* Extracts all patterns from an asdl_seq* of KeyPatternPair*'s */ -asdl_pattern_seq * -_PyPegen_get_patterns(Parser *p, asdl_seq *seq) -{ - Py_ssize_t len = asdl_seq_LEN(seq); - asdl_pattern_seq *new_seq = _Py_asdl_pattern_seq_new(len, p->arena); - if (!new_seq) { - return NULL; - } - for (Py_ssize_t i = 0; i < len; i++) { - KeyPatternPair *pair = asdl_seq_GET_UNTYPED(seq, i); - asdl_seq_SET(new_seq, i, pair->pattern); - } - return new_seq; -} - -/* Constructs a NameDefaultPair */ -NameDefaultPair * -_PyPegen_name_default_pair(Parser *p, arg_ty arg, expr_ty value, Token *tc) -{ - NameDefaultPair *a = _PyArena_Malloc(p->arena, sizeof(NameDefaultPair)); - if (!a) { - return NULL; - } - a->arg = _PyPegen_add_type_comment_to_arg(p, arg, tc); - a->value = value; - return a; -} - -/* Constructs a SlashWithDefault */ -SlashWithDefault * -_PyPegen_slash_with_default(Parser *p, asdl_arg_seq *plain_names, asdl_seq *names_with_defaults) -{ - SlashWithDefault *a = _PyArena_Malloc(p->arena, sizeof(SlashWithDefault)); - if (!a) { - return NULL; - } - a->plain_names = plain_names; - a->names_with_defaults = names_with_defaults; - return a; -} - -/* Constructs a StarEtc */ -StarEtc * -_PyPegen_star_etc(Parser *p, arg_ty vararg, asdl_seq *kwonlyargs, arg_ty kwarg) -{ - StarEtc *a = _PyArena_Malloc(p->arena, sizeof(StarEtc)); - if (!a) { - return NULL; - } - a->vararg = vararg; - a->kwonlyargs = kwonlyargs; - a->kwarg = kwarg; - return a; -} - -asdl_seq * -_PyPegen_join_sequences(Parser *p, asdl_seq *a, asdl_seq *b) -{ - Py_ssize_t first_len = asdl_seq_LEN(a); - Py_ssize_t second_len = asdl_seq_LEN(b); - asdl_seq *new_seq = (asdl_seq*)_Py_asdl_generic_seq_new(first_len + second_len, p->arena); - if (!new_seq) { - return NULL; - } - - int k = 0; - for (Py_ssize_t i = 0; i < first_len; i++) { - asdl_seq_SET_UNTYPED(new_seq, k++, asdl_seq_GET_UNTYPED(a, i)); - } - for (Py_ssize_t i = 0; i < second_len; i++) { - asdl_seq_SET_UNTYPED(new_seq, k++, asdl_seq_GET_UNTYPED(b, i)); - } - - return new_seq; -} - -static asdl_arg_seq* -_get_names(Parser *p, asdl_seq *names_with_defaults) -{ - Py_ssize_t len = asdl_seq_LEN(names_with_defaults); - asdl_arg_seq *seq = _Py_asdl_arg_seq_new(len, p->arena); - if (!seq) { - return NULL; - } - for (Py_ssize_t i = 0; i < len; i++) { - NameDefaultPair *pair = asdl_seq_GET_UNTYPED(names_with_defaults, i); - asdl_seq_SET(seq, i, pair->arg); - } - return seq; -} - -static asdl_expr_seq * -_get_defaults(Parser *p, asdl_seq *names_with_defaults) -{ - Py_ssize_t len = asdl_seq_LEN(names_with_defaults); - asdl_expr_seq *seq = _Py_asdl_expr_seq_new(len, p->arena); - if (!seq) { - return NULL; - } - for (Py_ssize_t i = 0; i < len; i++) { - NameDefaultPair *pair = asdl_seq_GET_UNTYPED(names_with_defaults, i); - asdl_seq_SET(seq, i, pair->value); - } - return seq; -} - -static int -_make_posonlyargs(Parser *p, - asdl_arg_seq *slash_without_default, - SlashWithDefault *slash_with_default, - asdl_arg_seq **posonlyargs) { - if (slash_without_default != NULL) { - *posonlyargs = slash_without_default; - } - else if (slash_with_default != NULL) { - asdl_arg_seq *slash_with_default_names = - _get_names(p, slash_with_default->names_with_defaults); - if (!slash_with_default_names) { - return -1; - } - *posonlyargs = (asdl_arg_seq*)_PyPegen_join_sequences( - p, - (asdl_seq*)slash_with_default->plain_names, - (asdl_seq*)slash_with_default_names); - } - else { - *posonlyargs = _Py_asdl_arg_seq_new(0, p->arena); - } - return *posonlyargs == NULL ? -1 : 0; -} - -static int -_make_posargs(Parser *p, - asdl_arg_seq *plain_names, - asdl_seq *names_with_default, - asdl_arg_seq **posargs) { - if (plain_names != NULL && names_with_default != NULL) { - asdl_arg_seq *names_with_default_names = _get_names(p, names_with_default); - if (!names_with_default_names) { - return -1; - } - *posargs = (asdl_arg_seq*)_PyPegen_join_sequences( - p,(asdl_seq*)plain_names, (asdl_seq*)names_with_default_names); - } - else if (plain_names == NULL && names_with_default != NULL) { - *posargs = _get_names(p, names_with_default); - } - else if (plain_names != NULL && names_with_default == NULL) { - *posargs = plain_names; - } - else { - *posargs = _Py_asdl_arg_seq_new(0, p->arena); - } - return *posargs == NULL ? -1 : 0; -} - -static int -_make_posdefaults(Parser *p, - SlashWithDefault *slash_with_default, - asdl_seq *names_with_default, - asdl_expr_seq **posdefaults) { - if (slash_with_default != NULL && names_with_default != NULL) { - asdl_expr_seq *slash_with_default_values = - _get_defaults(p, slash_with_default->names_with_defaults); - if (!slash_with_default_values) { - return -1; - } - asdl_expr_seq *names_with_default_values = _get_defaults(p, names_with_default); - if (!names_with_default_values) { - return -1; - } - *posdefaults = (asdl_expr_seq*)_PyPegen_join_sequences( - p, - (asdl_seq*)slash_with_default_values, - (asdl_seq*)names_with_default_values); - } - else if (slash_with_default == NULL && names_with_default != NULL) { - *posdefaults = _get_defaults(p, names_with_default); - } - else if (slash_with_default != NULL && names_with_default == NULL) { - *posdefaults = _get_defaults(p, slash_with_default->names_with_defaults); - } - else { - *posdefaults = _Py_asdl_expr_seq_new(0, p->arena); - } - return *posdefaults == NULL ? -1 : 0; -} - -static int -_make_kwargs(Parser *p, StarEtc *star_etc, - asdl_arg_seq **kwonlyargs, - asdl_expr_seq **kwdefaults) { - if (star_etc != NULL && star_etc->kwonlyargs != NULL) { - *kwonlyargs = _get_names(p, star_etc->kwonlyargs); - } - else { - *kwonlyargs = _Py_asdl_arg_seq_new(0, p->arena); - } - - if (*kwonlyargs == NULL) { - return -1; - } - - if (star_etc != NULL && star_etc->kwonlyargs != NULL) { - *kwdefaults = _get_defaults(p, star_etc->kwonlyargs); - } - else { - *kwdefaults = _Py_asdl_expr_seq_new(0, p->arena); - } - - if (*kwdefaults == NULL) { - return -1; - } - - return 0; -} - -/* Constructs an arguments_ty object out of all the parsed constructs in the parameters rule */ -arguments_ty -_PyPegen_make_arguments(Parser *p, asdl_arg_seq *slash_without_default, - SlashWithDefault *slash_with_default, asdl_arg_seq *plain_names, - asdl_seq *names_with_default, StarEtc *star_etc) -{ - asdl_arg_seq *posonlyargs; - if (_make_posonlyargs(p, slash_without_default, slash_with_default, &posonlyargs) == -1) { - return NULL; - } - - asdl_arg_seq *posargs; - if (_make_posargs(p, plain_names, names_with_default, &posargs) == -1) { - return NULL; - } - - asdl_expr_seq *posdefaults; - if (_make_posdefaults(p,slash_with_default, names_with_default, &posdefaults) == -1) { - return NULL; - } - - arg_ty vararg = NULL; - if (star_etc != NULL && star_etc->vararg != NULL) { - vararg = star_etc->vararg; - } - - asdl_arg_seq *kwonlyargs; - asdl_expr_seq *kwdefaults; - if (_make_kwargs(p, star_etc, &kwonlyargs, &kwdefaults) == -1) { - return NULL; - } - - arg_ty kwarg = NULL; - if (star_etc != NULL && star_etc->kwarg != NULL) { - kwarg = star_etc->kwarg; - } - - return _PyAST_arguments(posonlyargs, posargs, vararg, kwonlyargs, - kwdefaults, kwarg, posdefaults, p->arena); -} - - -/* Constructs an empty arguments_ty object, that gets used when a function accepts no - * arguments. */ -arguments_ty -_PyPegen_empty_arguments(Parser *p) -{ - asdl_arg_seq *posonlyargs = _Py_asdl_arg_seq_new(0, p->arena); - if (!posonlyargs) { - return NULL; - } - asdl_arg_seq *posargs = _Py_asdl_arg_seq_new(0, p->arena); - if (!posargs) { - return NULL; - } - asdl_expr_seq *posdefaults = _Py_asdl_expr_seq_new(0, p->arena); - if (!posdefaults) { - return NULL; - } - asdl_arg_seq *kwonlyargs = _Py_asdl_arg_seq_new(0, p->arena); - if (!kwonlyargs) { - return NULL; - } - asdl_expr_seq *kwdefaults = _Py_asdl_expr_seq_new(0, p->arena); - if (!kwdefaults) { - return NULL; - } - - return _PyAST_arguments(posonlyargs, posargs, NULL, kwonlyargs, - kwdefaults, NULL, posdefaults, p->arena); -} - -/* Encapsulates the value of an operator_ty into an AugOperator struct */ -AugOperator * -_PyPegen_augoperator(Parser *p, operator_ty kind) -{ - AugOperator *a = _PyArena_Malloc(p->arena, sizeof(AugOperator)); - if (!a) { - return NULL; - } - a->kind = kind; - return a; -} - -/* Construct a FunctionDef equivalent to function_def, but with decorators */ -stmt_ty -_PyPegen_function_def_decorators(Parser *p, asdl_expr_seq *decorators, stmt_ty function_def) -{ - assert(function_def != NULL); - if (function_def->kind == AsyncFunctionDef_kind) { - return _PyAST_AsyncFunctionDef( - function_def->v.FunctionDef.name, function_def->v.FunctionDef.args, - function_def->v.FunctionDef.body, decorators, function_def->v.FunctionDef.returns, - function_def->v.FunctionDef.type_comment, function_def->lineno, - function_def->col_offset, function_def->end_lineno, function_def->end_col_offset, - p->arena); - } - - return _PyAST_FunctionDef( - function_def->v.FunctionDef.name, function_def->v.FunctionDef.args, - function_def->v.FunctionDef.body, decorators, - function_def->v.FunctionDef.returns, - function_def->v.FunctionDef.type_comment, function_def->lineno, - function_def->col_offset, function_def->end_lineno, - function_def->end_col_offset, p->arena); -} - -/* Construct a ClassDef equivalent to class_def, but with decorators */ -stmt_ty -_PyPegen_class_def_decorators(Parser *p, asdl_expr_seq *decorators, stmt_ty class_def) -{ - assert(class_def != NULL); - return _PyAST_ClassDef( - class_def->v.ClassDef.name, class_def->v.ClassDef.bases, - class_def->v.ClassDef.keywords, class_def->v.ClassDef.body, decorators, - class_def->lineno, class_def->col_offset, class_def->end_lineno, - class_def->end_col_offset, p->arena); -} - -/* Construct a KeywordOrStarred */ -KeywordOrStarred * -_PyPegen_keyword_or_starred(Parser *p, void *element, int is_keyword) -{ - KeywordOrStarred *a = _PyArena_Malloc(p->arena, sizeof(KeywordOrStarred)); - if (!a) { - return NULL; - } - a->element = element; - a->is_keyword = is_keyword; - return a; -} - -/* Get the number of starred expressions in an asdl_seq* of KeywordOrStarred*s */ -static int -_seq_number_of_starred_exprs(asdl_seq *seq) -{ - int n = 0; - for (Py_ssize_t i = 0, l = asdl_seq_LEN(seq); i < l; i++) { - KeywordOrStarred *k = asdl_seq_GET_UNTYPED(seq, i); - if (!k->is_keyword) { - n++; - } - } - return n; -} - -/* Extract the starred expressions of an asdl_seq* of KeywordOrStarred*s */ -asdl_expr_seq * -_PyPegen_seq_extract_starred_exprs(Parser *p, asdl_seq *kwargs) -{ - int new_len = _seq_number_of_starred_exprs(kwargs); - if (new_len == 0) { - return NULL; - } - asdl_expr_seq *new_seq = _Py_asdl_expr_seq_new(new_len, p->arena); - if (!new_seq) { - return NULL; - } - - int idx = 0; - for (Py_ssize_t i = 0, len = asdl_seq_LEN(kwargs); i < len; i++) { - KeywordOrStarred *k = asdl_seq_GET_UNTYPED(kwargs, i); - if (!k->is_keyword) { - asdl_seq_SET(new_seq, idx++, k->element); - } - } - return new_seq; -} - -/* Return a new asdl_seq* with only the keywords in kwargs */ -asdl_keyword_seq* -_PyPegen_seq_delete_starred_exprs(Parser *p, asdl_seq *kwargs) -{ - Py_ssize_t len = asdl_seq_LEN(kwargs); - Py_ssize_t new_len = len - _seq_number_of_starred_exprs(kwargs); - if (new_len == 0) { - return NULL; - } - asdl_keyword_seq *new_seq = _Py_asdl_keyword_seq_new(new_len, p->arena); - if (!new_seq) { - return NULL; - } - - int idx = 0; - for (Py_ssize_t i = 0; i < len; i++) { - KeywordOrStarred *k = asdl_seq_GET_UNTYPED(kwargs, i); - if (k->is_keyword) { - asdl_seq_SET(new_seq, idx++, k->element); - } - } - return new_seq; -} - -expr_ty -_PyPegen_concatenate_strings(Parser *p, asdl_seq *strings) -{ - Py_ssize_t len = asdl_seq_LEN(strings); - assert(len > 0); - - Token *first = asdl_seq_GET_UNTYPED(strings, 0); - Token *last = asdl_seq_GET_UNTYPED(strings, len - 1); - - int bytesmode = 0; - PyObject *bytes_str = NULL; - - FstringParser state; - _PyPegen_FstringParser_Init(&state); - - for (Py_ssize_t i = 0; i < len; i++) { - Token *t = asdl_seq_GET_UNTYPED(strings, i); - - int this_bytesmode; - int this_rawmode; - PyObject *s; - const char *fstr; - Py_ssize_t fstrlen = -1; - - if (_PyPegen_parsestr(p, &this_bytesmode, &this_rawmode, &s, &fstr, &fstrlen, t) != 0) { - goto error; - } - - /* Check that we are not mixing bytes with unicode. */ - if (i != 0 && bytesmode != this_bytesmode) { - RAISE_SYNTAX_ERROR("cannot mix bytes and nonbytes literals"); - Py_XDECREF(s); - goto error; - } - bytesmode = this_bytesmode; - - if (fstr != NULL) { - assert(s == NULL && !bytesmode); - - int result = _PyPegen_FstringParser_ConcatFstring(p, &state, &fstr, fstr + fstrlen, - this_rawmode, 0, first, t, last); - if (result < 0) { - goto error; - } - } - else { - /* String or byte string. */ - assert(s != NULL && fstr == NULL); - assert(bytesmode ? PyBytes_CheckExact(s) : PyUnicode_CheckExact(s)); - - if (bytesmode) { - if (i == 0) { - bytes_str = s; - } - else { - PyBytes_ConcatAndDel(&bytes_str, s); - if (!bytes_str) { - goto error; - } - } - } - else { - /* This is a regular string. Concatenate it. */ - if (_PyPegen_FstringParser_ConcatAndDel(&state, s) < 0) { - goto error; - } - } - } - } - - if (bytesmode) { - if (_PyArena_AddPyObject(p->arena, bytes_str) < 0) { - goto error; - } - return _PyAST_Constant(bytes_str, NULL, first->lineno, - first->col_offset, last->end_lineno, - last->end_col_offset, p->arena); - } - - return _PyPegen_FstringParser_Finish(p, &state, first, last); - -error: - Py_XDECREF(bytes_str); - _PyPegen_FstringParser_Dealloc(&state); - if (PyErr_Occurred()) { - raise_decode_error(p); - } - return NULL; -} - -expr_ty -_PyPegen_ensure_imaginary(Parser *p, expr_ty exp) -{ - if (exp->kind != Constant_kind || !PyComplex_CheckExact(exp->v.Constant.value)) { - RAISE_SYNTAX_ERROR_KNOWN_LOCATION(exp, "imaginary number required in complex literal"); - return NULL; - } - return exp; -} - -expr_ty -_PyPegen_ensure_real(Parser *p, expr_ty exp) -{ - if (exp->kind != Constant_kind || PyComplex_CheckExact(exp->v.Constant.value)) { - RAISE_SYNTAX_ERROR_KNOWN_LOCATION(exp, "real number required in complex literal"); - return NULL; - } - return exp; -} - -mod_ty -_PyPegen_make_module(Parser *p, asdl_stmt_seq *a) { - asdl_type_ignore_seq *type_ignores = NULL; - Py_ssize_t num = p->type_ignore_comments.num_items; - if (num > 0) { - // Turn the raw (comment, lineno) pairs into TypeIgnore objects in the arena - type_ignores = _Py_asdl_type_ignore_seq_new(num, p->arena); - if (type_ignores == NULL) { - return NULL; - } - for (int i = 0; i < num; i++) { - PyObject *tag = _PyPegen_new_type_comment(p, p->type_ignore_comments.items[i].comment); - if (tag == NULL) { - return NULL; - } - type_ignore_ty ti = _PyAST_TypeIgnore(p->type_ignore_comments.items[i].lineno, - tag, p->arena); - if (ti == NULL) { - return NULL; - } - asdl_seq_SET(type_ignores, i, ti); - } - } - return _PyAST_Module(a, type_ignores, p->arena); -} - -// Error reporting helpers - -expr_ty -_PyPegen_get_invalid_target(expr_ty e, TARGETS_TYPE targets_type) -{ - if (e == NULL) { - return NULL; - } - -#define VISIT_CONTAINER(CONTAINER, TYPE) do { \ - Py_ssize_t len = asdl_seq_LEN((CONTAINER)->v.TYPE.elts);\ - for (Py_ssize_t i = 0; i < len; i++) {\ - expr_ty other = asdl_seq_GET((CONTAINER)->v.TYPE.elts, i);\ - expr_ty child = _PyPegen_get_invalid_target(other, targets_type);\ - if (child != NULL) {\ - return child;\ - }\ - }\ - } while (0) - - // We only need to visit List and Tuple nodes recursively as those - // are the only ones that can contain valid names in targets when - // they are parsed as expressions. Any other kind of expression - // that is a container (like Sets or Dicts) is directly invalid and - // we don't need to visit it recursively. - - switch (e->kind) { - case List_kind: - VISIT_CONTAINER(e, List); - return NULL; - case Tuple_kind: - VISIT_CONTAINER(e, Tuple); - return NULL; - case Starred_kind: - if (targets_type == DEL_TARGETS) { - return e; - } - return _PyPegen_get_invalid_target(e->v.Starred.value, targets_type); - case Compare_kind: - // This is needed, because the `a in b` in `for a in b` gets parsed - // as a comparison, and so we need to search the left side of the comparison - // for invalid targets. - if (targets_type == FOR_TARGETS) { - cmpop_ty cmpop = (cmpop_ty) asdl_seq_GET(e->v.Compare.ops, 0); - if (cmpop == In) { - return _PyPegen_get_invalid_target(e->v.Compare.left, targets_type); - } - return NULL; - } - return e; - case Name_kind: - case Subscript_kind: - case Attribute_kind: - return NULL; - default: - return e; - } -} - -void *_PyPegen_arguments_parsing_error(Parser *p, expr_ty e) { - int kwarg_unpacking = 0; - for (Py_ssize_t i = 0, l = asdl_seq_LEN(e->v.Call.keywords); i < l; i++) { - keyword_ty keyword = asdl_seq_GET(e->v.Call.keywords, i); - if (!keyword->arg) { - kwarg_unpacking = 1; - } - } - - const char *msg = NULL; - if (kwarg_unpacking) { - msg = "positional argument follows keyword argument unpacking"; - } else { - msg = "positional argument follows keyword argument"; - } - - return RAISE_SYNTAX_ERROR(msg); -} - - -static inline expr_ty -_PyPegen_get_last_comprehension_item(comprehension_ty comprehension) { - if (comprehension->ifs == NULL || asdl_seq_LEN(comprehension->ifs) == 0) { - return comprehension->iter; - } - return PyPegen_last_item(comprehension->ifs, expr_ty); -} - -void * -_PyPegen_nonparen_genexp_in_call(Parser *p, expr_ty args, asdl_comprehension_seq *comprehensions) -{ - /* The rule that calls this function is 'args for_if_clauses'. - For the input f(L, x for x in y), L and x are in args and - the for is parsed as a for_if_clause. We have to check if - len <= 1, so that input like dict((a, b) for a, b in x) - gets successfully parsed and then we pass the last - argument (x in the above example) as the location of the - error */ - Py_ssize_t len = asdl_seq_LEN(args->v.Call.args); - if (len <= 1) { - return NULL; - } - - comprehension_ty last_comprehension = PyPegen_last_item(comprehensions, comprehension_ty); - - return RAISE_SYNTAX_ERROR_KNOWN_RANGE( - (expr_ty) asdl_seq_GET(args->v.Call.args, len - 1), - _PyPegen_get_last_comprehension_item(last_comprehension), - "Generator expression must be parenthesized" - ); -} - - -expr_ty _PyPegen_collect_call_seqs(Parser *p, asdl_expr_seq *a, asdl_seq *b, - int lineno, int col_offset, int end_lineno, - int end_col_offset, PyArena *arena) { - Py_ssize_t args_len = asdl_seq_LEN(a); - Py_ssize_t total_len = args_len; - - if (b == NULL) { - return _PyAST_Call(_PyPegen_dummy_name(p), a, NULL, lineno, col_offset, - end_lineno, end_col_offset, arena); - - } - - asdl_expr_seq *starreds = _PyPegen_seq_extract_starred_exprs(p, b); - asdl_keyword_seq *keywords = _PyPegen_seq_delete_starred_exprs(p, b); - - if (starreds) { - total_len += asdl_seq_LEN(starreds); - } - - asdl_expr_seq *args = _Py_asdl_expr_seq_new(total_len, arena); - - Py_ssize_t i = 0; - for (i = 0; i < args_len; i++) { - asdl_seq_SET(args, i, asdl_seq_GET(a, i)); - } - for (; i < total_len; i++) { - asdl_seq_SET(args, i, asdl_seq_GET(starreds, i - args_len)); - } - - return _PyAST_Call(_PyPegen_dummy_name(p), args, keywords, lineno, - col_offset, end_lineno, end_col_offset, arena); -} +}
\ No newline at end of file |