1 files changed, 2163 insertions, 0 deletions
diff --git a/Parser/pegen.c b/Parser/pegen.c
new file mode 100644
index 0000000..e29910b
--- /dev/null
+++ b/Parser/pegen.c
@@ -0,0 +1,2163 @@
+#include <Python.h>
+#include <errcode.h>
+#include "tokenizer.h"
+
+#include "pegen.h"
+#include "string_parser.h"
+
+PyObject *
+_PyPegen_new_type_comment(Parser *p, char *s)
+{
+    PyObject *res = PyUnicode_DecodeUTF8(s, strlen(s), NULL);
+    if (res == NULL) {
+        return NULL;
+    }
+    if (PyArena_AddPyObject(p->arena, res) < 0) {
+        Py_DECREF(res);
+        return NULL;
+    }
+    return res;
+}
+
+arg_ty
+_PyPegen_add_type_comment_to_arg(Parser *p, arg_ty a, Token *tc)
+{
+    if (tc == NULL) {
+        return a;
+    }
+    char *bytes = PyBytes_AsString(tc->bytes);
+    if (bytes == NULL) {
+        return NULL;
+    }
+    PyObject *tco = _PyPegen_new_type_comment(p, bytes);
+    if (tco == NULL) {
+        return NULL;
+    }
+    return arg(a->arg, a->annotation, tco,
+               a->lineno, a->col_offset, a->end_lineno, a->end_col_offset,
+               p->arena);
+}
+
+static int
+init_normalization(Parser *p)
+{
+    if (p->normalize) {
+        return 1;
+    }
+    PyObject *m = PyImport_ImportModuleNoBlock("unicodedata");
+    if (!m)
+    {
+        return 0;
+    }
+    p->normalize = PyObject_GetAttrString(m, "normalize");
+    Py_DECREF(m);
+    if (!p->normalize)
+    {
+        return 0;
+    }
+    return 1;
+}
+
+/* Checks if the NOTEQUAL token is valid given the current parser flags
+0 indicates success and nonzero indicates failure (an exception may be set) */
+int
+_PyPegen_check_barry_as_flufl(Parser *p) {
+    Token *t = p->tokens[p->fill - 1];
+    assert(t->bytes != NULL);
+    assert(t->type == NOTEQUAL);
+
+    char* tok_str = PyBytes_AS_STRING(t->bytes);
+    if (p->flags & PyPARSE_BARRY_AS_BDFL && strcmp(tok_str, "<>")){
+        RAISE_SYNTAX_ERROR("with Barry as BDFL, use '<>' instead of '!='");
+        return -1;
+    } else if (!(p->flags & PyPARSE_BARRY_AS_BDFL)) {
+        return strcmp(tok_str, "!=");
+    }
+    return 0;
+}
+
+PyObject *
+_PyPegen_new_identifier(Parser *p, char *n)
+{
+    PyObject *id = PyUnicode_DecodeUTF8(n, strlen(n), NULL);
+    if (!id) {
+        goto error;
+    }
+    /* PyUnicode_DecodeUTF8 should always return a ready string. */
+    assert(PyUnicode_IS_READY(id));
+    /* Check whether there are non-ASCII characters in the
+       identifier; if so, normalize to NFKC. */
+    if (!PyUnicode_IS_ASCII(id))
+    {
+        PyObject *id2;
+        if (!init_normalization(p))
+        {
+            Py_DECREF(id);
+            goto error;
+        }
+        PyObject *form = PyUnicode_InternFromString("NFKC");
+        if (form == NULL)
+        {
+            Py_DECREF(id);
+            goto error;
+        }
+        PyObject *args[2] = {form, id};
+        id2 = _PyObject_FastCall(p->normalize, args, 2);
+        Py_DECREF(id);
+        Py_DECREF(form);
+        if (!id2) {
+            goto error;
+        }
+        if (!PyUnicode_Check(id2))
+        {
+            PyErr_Format(PyExc_TypeError,
+                         "unicodedata.normalize() must return a string, not "
+                         "%.200s",
+                         _PyType_Name(Py_TYPE(id2)));
+            Py_DECREF(id2);
+            goto error;
+        }
+        id = id2;
+    }
+    PyUnicode_InternInPlace(&id);
+    if (PyArena_AddPyObject(p->arena, id) < 0)
+    {
+        Py_DECREF(id);
+        goto error;
+    }
+    return id;
+
+error:
+    p->error_indicator = 1;
+    return NULL;
+}
+
+static PyObject *
+_create_dummy_identifier(Parser *p)
+{
+    return _PyPegen_new_identifier(p, "");
+}
+
+static inline Py_ssize_t
+byte_offset_to_character_offset(PyObject *line, int col_offset)
+{
+    const char *str = PyUnicode_AsUTF8(line);
+    if (!str) {
+        return 0;
+    }
+    PyObject *text = PyUnicode_DecodeUTF8(str, col_offset, "replace");
+    if (!text) {
+        return 0;
+    }
+    Py_ssize_t size = PyUnicode_GET_LENGTH(text);
+    str = PyUnicode_AsUTF8(text);
+    if (str != NULL && (int)strlen(str) == col_offset) {
+        size = strlen(str);
+    }
+    Py_DECREF(text);
+    return size;
+}
+
+const char *
+_PyPegen_get_expr_name(expr_ty e)
+{
+    assert(e != NULL);
+    switch (e->kind) {
+        case Attribute_kind:
+            return "attribute";
+        case Subscript_kind:
+            return "subscript";
+        case Starred_kind:
+            return "starred";
+        case Name_kind:
+            return "name";
+        case List_kind:
+            return "list";
+        case Tuple_kind:
+            return "tuple";
+        case Lambda_kind:
+            return "lambda";
+        case Call_kind:
+            return "function call";
+        case BoolOp_kind:
+        case BinOp_kind:
+        case UnaryOp_kind:
+            return "operator";
+        case GeneratorExp_kind:
+            return "generator expression";
+        case Yield_kind:
+        case YieldFrom_kind:
+            return "yield expression";
+        case Await_kind:
+            return "await expression";
+        case ListComp_kind:
+            return "list comprehension";
+        case SetComp_kind:
+            return "set comprehension";
+        case DictComp_kind:
+            return "dict comprehension";
+        case Dict_kind:
+            return "dict display";
+        case Set_kind:
+            return "set display";
+        case JoinedStr_kind:
+        case FormattedValue_kind:
+            return "f-string expression";
+        case Constant_kind: {
+            PyObject *value = e->v.Constant.value;
+            if (value == Py_None) {
+                return "None";
+            }
+            if (value == Py_False) {
+                return "False";
+            }
+            if (value == Py_True) {
+                return "True";
+            }
+            if (value == Py_Ellipsis) {
+                return "Ellipsis";
+            }
+            return "literal";
+        }
+        case Compare_kind:
+            return "comparison";
+        case IfExp_kind:
+            return "conditional expression";
+        case NamedExpr_kind:
+            return "named expression";
+        default:
+            PyErr_Format(PyExc_SystemError,
+                         "unexpected expression in assignment %d (line %d)",
+                         e->kind, e->lineno);
+            return NULL;
+    }
+}
+
+static int
+raise_decode_error(Parser *p)
+{
+    assert(PyErr_Occurred());
+    const char *errtype = NULL;
+    if (PyErr_ExceptionMatches(PyExc_UnicodeError)) {
+        errtype = "unicode error";
+    }
+    else if (PyErr_ExceptionMatches(PyExc_ValueError)) {
+        errtype = "value error";
+    }
+    if (errtype) {
+        PyObject *type, *value, *tback, *errstr;
+        PyErr_Fetch(&type, &value, &tback);
+        errstr = PyObject_Str(value);
+        if (errstr) {
+            RAISE_SYNTAX_ERROR("(%s) %U", errtype, errstr);
+            Py_DECREF(errstr);
+        }
+        else {
+            PyErr_Clear();
+            RAISE_SYNTAX_ERROR("(%s) unknown error", errtype);
+        }
+        Py_XDECREF(type);
+        Py_XDECREF(value);
+        Py_XDECREF(tback);
+    }
+
+    return -1;
+}
+
+static void
+raise_tokenizer_init_error(PyObject *filename)
+{
+    if (!(PyErr_ExceptionMatches(PyExc_LookupError)
+          || PyErr_ExceptionMatches(PyExc_ValueError)
+          || PyErr_ExceptionMatches(PyExc_UnicodeDecodeError))) {
+        return;
+    }
+    PyObject *errstr = NULL;
+    PyObject *tuple = NULL;
+    PyObject *type, *value, *tback;
+    PyErr_Fetch(&type, &value, &tback);
+    errstr = PyObject_Str(value);
+    if (!errstr) {
+        goto error;
+    }
+
+    PyObject *tmp = Py_BuildValue("(OiiO)", filename, 0, -1, Py_None);
+    if (!tmp) {
+        goto error;
+    }
+
+    tuple = PyTuple_Pack(2, errstr, tmp);
+    Py_DECREF(tmp);
+    if (!value) {
+        goto error;
+    }
+    PyErr_SetObject(PyExc_SyntaxError, tuple);
+
+error:
+    Py_XDECREF(type);
+    Py_XDECREF(value);
+    Py_XDECREF(tback);
+    Py_XDECREF(errstr);
+    Py_XDECREF(tuple);
+}
+
+static int
+tokenizer_error(Parser *p)
+{
+    if (PyErr_Occurred()) {
+        return -1;
+    }
+
+    const char *msg = NULL;
+    PyObject* errtype = PyExc_SyntaxError;
+    switch (p->tok->done) {
+        case E_TOKEN:
+            msg = "invalid token";
+            break;
+        case E_EOFS:
+            RAISE_SYNTAX_ERROR("EOF while scanning triple-quoted string literal");
+            return -1;
+        case E_EOLS:
+            RAISE_SYNTAX_ERROR("EOL while scanning string literal");
+            return -1;
+        case E_EOF:
+            RAISE_SYNTAX_ERROR("unexpected EOF while parsing");
+            return -1;
+        case E_DEDENT:
+            RAISE_INDENTATION_ERROR("unindent does not match any outer indentation level");
+            return -1;
+        case E_INTR:
+            if (!PyErr_Occurred()) {
+                PyErr_SetNone(PyExc_KeyboardInterrupt);
+            }
+            return -1;
+        case E_NOMEM:
+            PyErr_NoMemory();
+            return -1;
+        case E_TABSPACE:
+            errtype = PyExc_TabError;
+            msg = "inconsistent use of tabs and spaces in indentation";
+            break;
+        case E_TOODEEP:
+            errtype = PyExc_IndentationError;
+            msg = "too many levels of indentation";
+            break;
+        case E_LINECONT:
+            msg = "unexpected character after line continuation character";
+            break;
+        default:
+            msg = "unknown parsing error";
+    }
+
+    PyErr_Format(errtype, msg);
+    // There is no reliable column information for this error
+    PyErr_SyntaxLocationObject(p->tok->filename, p->tok->lineno, 0);
+
+    return -1;
+}
+
+void *
+_PyPegen_raise_error(Parser *p, PyObject *errtype, const char *errmsg, ...)
+{
+    Token *t = p->known_err_token != NULL ? p->known_err_token : p->tokens[p->fill - 1];
+    int col_offset;
+    if (t->col_offset == -1) {
+        col_offset = Py_SAFE_DOWNCAST(p->tok->cur - p->tok->buf,
+                                      intptr_t, int);
+    } else {
+        col_offset = t->col_offset + 1;
+    }
+
+    va_list va;
+    va_start(va, errmsg);
+    _PyPegen_raise_error_known_location(p, errtype, t->lineno,
+                                        col_offset, errmsg, va);
+    va_end(va);
+
+    return NULL;
+}
+
+
+void *
+_PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
+                                    int lineno, int col_offset,
+                                    const char *errmsg, va_list va)
+{
+    PyObject *value = NULL;
+    PyObject *errstr = NULL;
+    PyObject *error_line = NULL;
+    PyObject *tmp = NULL;
+    p->error_indicator = 1;
+
+    errstr = PyUnicode_FromFormatV(errmsg, va);
+    if (!errstr) {
+        goto error;
+    }
+
+    if (p->start_rule == Py_file_input) {
+        error_line = PyErr_ProgramTextObject(p->tok->filename, lineno);
+    }
+
+    if (!error_line) {
+        Py_ssize_t size = p->tok->inp - p->tok->buf;
+        if (size && p->tok->buf[size-1] == '\n') {
+            size--;
+        }
+        error_line = PyUnicode_DecodeUTF8(p->tok->buf, size, "replace");
+        if (!error_line) {
+            goto error;
+        }
+    }
+
+    Py_ssize_t col_number = byte_offset_to_character_offset(error_line, col_offset);
+
+    tmp = Py_BuildValue("(OiiN)", p->tok->filename, lineno, col_number, error_line);
+    if (!tmp) {
+        goto error;
+    }
+    value = PyTuple_Pack(2, errstr, tmp);
+    Py_DECREF(tmp);
+    if (!value) {
+        goto error;
+    }
+    PyErr_SetObject(errtype, value);
+
+    Py_DECREF(errstr);
+    Py_DECREF(value);
+    return NULL;
+
+error:
+    Py_XDECREF(errstr);
+    Py_XDECREF(error_line);
+    return NULL;
+}
+
+#if 0
+static const char *
+token_name(int type)
+{
+    if (0 <= type && type <= N_TOKENS) {
+        return _PyParser_TokenNames[type];
+    }
+    return "<Huh?>";
+}
+#endif
+
+// Here, mark is the start of the node, while p->mark is the end.
+// If node==NULL, they should be the same.
+int
+_PyPegen_insert_memo(Parser *p, int mark, int type, void *node)
+{
+    // Insert in front
+    Memo *m = PyArena_Malloc(p->arena, sizeof(Memo));
+    if (m == NULL) {
+        return -1;
+    }
+    m->type = type;
+    m->node = node;
+    m->mark = p->mark;
+    m->next = p->tokens[mark]->memo;
+    p->tokens[mark]->memo = m;
+    return 0;
+}
+
+// Like _PyPegen_insert_memo(), but updates an existing node if found.
+int
+_PyPegen_update_memo(Parser *p, int mark, int type, void *node)
+{
+    for (Memo *m = p->tokens[mark]->memo; m != NULL; m = m->next) {
+        if (m->type == type) {
+            // Update existing node.
+            m->node = node;
+            m->mark = p->mark;
+            return 0;
+        }
+    }
+    // Insert new node.
+    return _PyPegen_insert_memo(p, mark, type, node);
+}
+
+// Return dummy NAME.
+void *
+_PyPegen_dummy_name(Parser *p, ...)
+{
+    static void *cache = NULL;
+
+    if (cache != NULL) {
+        return cache;
+    }
+
+    PyObject *id = _create_dummy_identifier(p);
+    if (!id) {
+        return NULL;
+    }
+    cache = Name(id, Load, 1, 0, 1, 0, p->arena);
+    return cache;
+}
+
+static int
+_get_keyword_or_name_type(Parser *p, const char *name, int name_len)
+{
+    if (name_len >= p->n_keyword_lists || p->keywords[name_len] == NULL) {
+        return NAME;
+    }
+    for (KeywordToken *k = p->keywords[name_len]; k->type != -1; k++) {
+        if (strncmp(k->str, name, name_len) == 0) {
+            return k->type;
+        }
+    }
+    return NAME;
+}
+
+static int
+growable_comment_array_init(growable_comment_array *arr, size_t initial_size) {
+    assert(initial_size > 0);
+    arr->items = PyMem_Malloc(initial_size * sizeof(*arr->items));
+    arr->size = initial_size;
+    arr->num_items = 0;
+
+    return arr->items != NULL;
+}
+
+static int
+growable_comment_array_add(growable_comment_array *arr, int lineno, char *comment) {
+    if (arr->num_items >= arr->size) {
+        size_t new_size = arr->size * 2;
+        void *new_items_array = PyMem_Realloc(arr->items, new_size * sizeof(*arr->items));
+        if (!new_items_array) {
+            return 0;
+        }
+        arr->items = new_items_array;
+        arr->size = new_size;
+    }
+
+    arr->items[arr->num_items].lineno = lineno;
+    arr->items[arr->num_items].comment = comment;  // Take ownership
+    arr->num_items++;
+    return 1;
+}
+
+static void
+growable_comment_array_deallocate(growable_comment_array *arr) {
+    for (unsigned i = 0; i < arr->num_items; i++) {
+        PyMem_Free(arr->items[i].comment);
+    }
+    PyMem_Free(arr->items);
+}
+
+int
+_PyPegen_fill_token(Parser *p)
+{
+    const char *start, *end;
+    int type = PyTokenizer_Get(p->tok, &start, &end);
+
+    // Record and skip '# type: ignore' comments
+    while (type == TYPE_IGNORE) {
+        Py_ssize_t len = end - start;
+        char *tag = PyMem_Malloc(len + 1);
+        if (tag == NULL) {
+            PyErr_NoMemory();
+            return -1;
+        }
+        strncpy(tag, start, len);
+        tag[len] = '\0';
+        // Ownership of tag passes to the growable array
+        if (!growable_comment_array_add(&p->type_ignore_comments, p->tok->lineno, tag)) {
+            PyErr_NoMemory();
+            return -1;
+        }
+        type = PyTokenizer_Get(p->tok, &start, &end);
+    }
+
+    if (type == ENDMARKER && p->start_rule == Py_single_input && p->parsing_started) {
+        type = NEWLINE; /* Add an extra newline */
+        p->parsing_started = 0;
+
+        if (p->tok->indent && !(p->flags & PyPARSE_DONT_IMPLY_DEDENT)) {
+            p->tok->pendin = -p->tok->indent;
+            p->tok->indent = 0;
+        }
+    }
+    else {
+        p->parsing_started = 1;
+    }
+
+    if (p->fill == p->size) {
+        int newsize = p->size * 2;
+        Token **new_tokens = PyMem_Realloc(p->tokens, newsize * sizeof(Token *));
+        if (new_tokens == NULL) {
+            PyErr_NoMemory();
+            return -1;
+        }
+        else {
+            p->tokens = new_tokens;
+        }
+        for (int i = p->size; i < newsize; i++) {
+            p->tokens[i] = PyMem_Malloc(sizeof(Token));
+            if (p->tokens[i] == NULL) {
+                p->size = i; // Needed, in order to cleanup correctly after parser fails
+                PyErr_NoMemory();
+                return -1;
+            }
+            memset(p->tokens[i], '\0', sizeof(Token));
+        }
+        p->size = newsize;
+    }
+
+    Token *t = p->tokens[p->fill];
+    t->type = (type == NAME) ? _get_keyword_or_name_type(p, start, (int)(end - start)) : type;
+    t->bytes = PyBytes_FromStringAndSize(start, end - start);
+    if (t->bytes == NULL) {
+        return -1;
+    }
+    PyArena_AddPyObject(p->arena, t->bytes);
+
+    int lineno = type == STRING ? p->tok->first_lineno : p->tok->lineno;
+    const char *line_start = type == STRING ? p->tok->multi_line_start : p->tok->line_start;
+    int end_lineno = p->tok->lineno;
+    int col_offset = -1, end_col_offset = -1;
+    if (start != NULL && start >= line_start) {
+        col_offset = (int)(start - line_start);
+    }
+    if (end != NULL && end >= p->tok->line_start) {
+        end_col_offset = (int)(end - p->tok->line_start);
+    }
+
+    t->lineno = p->starting_lineno + lineno;
+    t->col_offset = p->tok->lineno == 1 ? p->starting_col_offset + col_offset : col_offset;
+    t->end_lineno = p->starting_lineno + end_lineno;
+    t->end_col_offset = p->tok->lineno == 1 ? p->starting_col_offset + end_col_offset : end_col_offset;
+
+    p->fill += 1;
+
+    if (type == ERRORTOKEN) {
+        if (p->tok->done == E_DECODE) {
+            return raise_decode_error(p);
+        }
+        else {
+            return tokenizer_error(p);
+        }
+    }
+
+    return 0;
+}
+
+// Instrumentation to count the effectiveness of memoization.
+// The array counts the number of tokens skipped by memoization,
+// indexed by type.
+
+#define NSTATISTICS 2000
+static long memo_statistics[NSTATISTICS];
+
+void
+_PyPegen_clear_memo_statistics()
+{
+    for (int i = 0; i < NSTATISTICS; i++) {
+        memo_statistics[i] = 0;
+    }
+}
+
+PyObject *
+_PyPegen_get_memo_statistics()
+{
+    PyObject *ret = PyList_New(NSTATISTICS);
+    if (ret == NULL) {
+        return NULL;
+    }
+    for (int i = 0; i < NSTATISTICS; i++) {
+        PyObject *value = PyLong_FromLong(memo_statistics[i]);
+        if (value == NULL) {
+            Py_DECREF(ret);
+            return NULL;
+        }
+        // PyList_SetItem borrows a reference to value.
+        if (PyList_SetItem(ret, i, value) < 0) {
+            Py_DECREF(ret);
+            return NULL;
+        }
+    }
+    return ret;
+}
+
+int  // bool
+_PyPegen_is_memoized(Parser *p, int type, void *pres)
+{
+    if (p->mark == p->fill) {
+        if (_PyPegen_fill_token(p) < 0) {
+            p->error_indicator = 1;
+            return -1;
+        }
+    }
+
+    Token *t = p->tokens[p->mark];
+
+    for (Memo *m = t->memo; m != NULL; m = m->next) {
+        if (m->type == type) {
+            if (0 <= type && type < NSTATISTICS) {
+                long count = m->mark - p->mark;
+                // A memoized negative result counts for one.
+                if (count <= 0) {
+                    count = 1;
+                }
+                memo_statistics[type] += count;
+            }
+            p->mark = m->mark;
+            *(void **)(pres) = m->node;
+            return 1;
+        }
+    }
+    return 0;
+}
+
+
+int
+_PyPegen_lookahead_with_name(int positive, expr_ty (func)(Parser *), Parser *p)
+{
+    int mark = p->mark;
+    void *res = func(p);
+    p->mark = mark;
+    return (res != NULL) == positive;
+}
+
+int
+_PyPegen_lookahead_with_string(int positive, expr_ty (func)(Parser *, const char*), Parser *p, const char* arg)
+{
+    int mark = p->mark;
+    void *res = func(p, arg);
+    p->mark = mark;
+    return (res != NULL) == positive;
+}
+
+int
+_PyPegen_lookahead_with_int(int positive, Token *(func)(Parser *, int), Parser *p, int arg)
+{
+    int mark = p->mark;
+    void *res = func(p, arg);
+    p->mark = mark;
+    return (res != NULL) == positive;
+}
+
+int
+_PyPegen_lookahead(int positive, void *(func)(Parser *), Parser *p)
+{
+    int mark = p->mark;
+    void *res = (void*)func(p);
+    p->mark = mark;
+    return (res != NULL) == positive;
+}
+
+Token *
+_PyPegen_expect_token(Parser *p, int type)
+{
+    if (p->mark == p->fill) {
+        if (_PyPegen_fill_token(p) < 0) {
+            p->error_indicator = 1;
+            return NULL;
+        }
+    }
+    Token *t = p->tokens[p->mark];
+    if (t->type != type) {
+        return NULL;
+    }
+    p->mark += 1;
+    return t;
+}
+
+expr_ty
+_PyPegen_expect_soft_keyword(Parser *p, const char *keyword)
+{
+    if (p->mark == p->fill) {
+        if (_PyPegen_fill_token(p) < 0) {
+            p->error_indicator = 1;
+            return NULL;
+        }
+    }
+    Token *t = p->tokens[p->mark];
+    if (t->type != NAME) {
+        return NULL;
+    }
+    char *s = PyBytes_AsString(t->bytes);
+    if (!s) {
+        p->error_indicator = 1;
+        return NULL;
+    }
+    if (strcmp(s, keyword) != 0) {
+        return NULL;
+    }
+    return _PyPegen_name_token(p);
+}
+
+Token *
+_PyPegen_get_last_nonnwhitespace_token(Parser *p)
+{
+    assert(p->mark >= 0);
+    Token *token = NULL;
+    for (int m = p->mark - 1; m >= 0; m--) {
+        token = p->tokens[m];
+        if (token->type != ENDMARKER && (token->type < NEWLINE || token->type > DEDENT)) {
+            break;
+        }
+    }
+    return token;
+}
+
+expr_ty
+_PyPegen_name_token(Parser *p)
+{
+    Token *t = _PyPegen_expect_token(p, NAME);
+    if (t == NULL) {
+        return NULL;
+    }
+    char* s = PyBytes_AsString(t->bytes);
+    if (!s) {
+        p->error_indicator = 1;
+        return NULL;
+    }
+    PyObject *id = _PyPegen_new_identifier(p, s);
+    if (id == NULL) {
+        p->error_indicator = 1;
+        return NULL;
+    }
+    return Name(id, Load, t->lineno, t->col_offset, t->end_lineno, t->end_col_offset,
+                p->arena);
+}
+
+void *
+_PyPegen_string_token(Parser *p)
+{
+    return _PyPegen_expect_token(p, STRING);
+}
+
+static PyObject *
+parsenumber_raw(const char *s)
+{
+    const char *end;
+    long x;
+    double dx;
+    Py_complex compl;
+    int imflag;
+
+    assert(s != NULL);
+    errno = 0;
+    end = s + strlen(s) - 1;
+    imflag = *end == 'j' || *end == 'J';
+    if (s[0] == '0') {
+        x = (long)PyOS_strtoul(s, (char **)&end, 0);
+        if (x < 0 && errno == 0) {
+            return PyLong_FromString(s, (char **)0, 0);
+        }
+    }
+    else
+        x = PyOS_strtol(s, (char **)&end, 0);
+    if (*end == '\0') {
+        if (errno != 0)
+            return PyLong_FromString(s, (char **)0, 0);
+        return PyLong_FromLong(x);
+    }
+    /* XXX Huge floats may silently fail */
+    if (imflag) {
+        compl.real = 0.;
+        compl.imag = PyOS_string_to_double(s, (char **)&end, NULL);
+        if (compl.imag == -1.0 && PyErr_Occurred())
+            return NULL;
+        return PyComplex_FromCComplex(compl);
+    }
+    else {
+        dx = PyOS_string_to_double(s, NULL, NULL);
+        if (dx == -1.0 && PyErr_Occurred())
+            return NULL;
+        return PyFloat_FromDouble(dx);
+    }
+}
+
+static PyObject *
+parsenumber(const char *s)
+{
+    char *dup, *end;
+    PyObject *res = NULL;
+
+    assert(s != NULL);
+
+    if (strchr(s, '_') == NULL) {
+        return parsenumber_raw(s);
+    }
+    /* Create a duplicate without underscores. */
+    dup = PyMem_Malloc(strlen(s) + 1);
+    if (dup == NULL) {
+        return PyErr_NoMemory();
+    }
+    end = dup;
+    for (; *s; s++) {
+        if (*s != '_') {
+            *end++ = *s;
+        }
+    }
+    *end = '\0';
+    res = parsenumber_raw(dup);
+    PyMem_Free(dup);
+    return res;
+}
+
+expr_ty
+_PyPegen_number_token(Parser *p)
+{
+    Token *t = _PyPegen_expect_token(p, NUMBER);
+    if (t == NULL) {
+        return NULL;
+    }
+
+    char *num_raw = PyBytes_AsString(t->bytes);
+    if (num_raw == NULL) {
+        p->error_indicator = 1;
+        return NULL;
+    }
+
+    if (p->feature_version < 6 && strchr(num_raw, '_') != NULL) {
+        p->error_indicator = 1;
+        return RAISE_SYNTAX_ERROR("Underscores in numeric literals are only supported "
+                                  "in Python 3.6 and greater");
+    }
+
+    PyObject *c = parsenumber(num_raw);
+
+    if (c == NULL) {
+        p->error_indicator = 1;
+        return NULL;
+    }
+
+    if (PyArena_AddPyObject(p->arena, c) < 0) {
+        Py_DECREF(c);
+        p->error_indicator = 1;
+        return NULL;
+    }
+
+    return Constant(c, NULL, t->lineno, t->col_offset, t->end_lineno, t->end_col_offset,
+                    p->arena);
+}
+
+static int // bool
+newline_in_string(Parser *p, const char *cur)
+{
+    for (const char *c = cur; c >= p->tok->buf; c--) {
+        if (*c == '\'' || *c == '"') {
+            return 1;
+        }
+    }
+    return 0;
+}
+
+/* Check that the source for a single input statement really is a single
+   statement by looking at what is left in the buffer after parsing.
+   Trailing whitespace and comments are OK. */
+static int // bool
+bad_single_statement(Parser *p)
+{
+    const char *cur = strchr(p->tok->buf, '\n');
+
+    /* Newlines are allowed if preceded by a line continuation character
+       or if they appear inside a string. */
+    if (!cur || *(cur - 1) == '\\' || newline_in_string(p, cur)) {
+        return 0;
+    }
+    char c = *cur;
+
+    for (;;) {
+        while (c == ' ' || c == '\t' || c == '\n' || c == '\014') {
+            c = *++cur;
+        }
+
+        if (!c) {
+            return 0;
+        }
+
+        if (c != '#') {
+            return 1;
+        }
+
+        /* Suck up comment. */
+        while (c && c != '\n') {
+            c = *++cur;
+        }
+    }
+}
+
+void
+_PyPegen_Parser_Free(Parser *p)
+{
+    Py_XDECREF(p->normalize);
+    for (int i = 0; i < p->size; i++) {
+        PyMem_Free(p->tokens[i]);
+    }
+    PyMem_Free(p->tokens);
+    growable_comment_array_deallocate(&p->type_ignore_comments);
+    PyMem_Free(p);
+}
+
+static int
+compute_parser_flags(PyCompilerFlags *flags)
+{
+    int parser_flags = 0;
+    if (!flags) {
+        return 0;
+    }
+    if (flags->cf_flags & PyCF_DONT_IMPLY_DEDENT) {
+        parser_flags |= PyPARSE_DONT_IMPLY_DEDENT;
+    }
+    if (flags->cf_flags & PyCF_IGNORE_COOKIE) {
+        parser_flags |= PyPARSE_IGNORE_COOKIE;
+    }
+    if (flags->cf_flags & CO_FUTURE_BARRY_AS_BDFL) {
+        parser_flags |= PyPARSE_BARRY_AS_BDFL;
+    }
+    if (flags->cf_flags & PyCF_TYPE_COMMENTS) {
+        parser_flags |= PyPARSE_TYPE_COMMENTS;
+    }
+    if (flags->cf_feature_version < 7) {
+        parser_flags |= PyPARSE_ASYNC_HACKS;
+    }
+    return parser_flags;
+}
+
+Parser *
+_PyPegen_Parser_New(struct tok_state *tok, int start_rule, int flags,
+                    int feature_version, int *errcode, PyArena *arena)
+{
+    Parser *p = PyMem_Malloc(sizeof(Parser));
+    if (p == NULL) {
+        return (Parser *) PyErr_NoMemory();
+    }
+    assert(tok != NULL);
+    tok->type_comments = (flags & PyPARSE_TYPE_COMMENTS) > 0;
+    tok->async_hacks = (flags & PyPARSE_ASYNC_HACKS) > 0;
+    p->tok = tok;
+    p->keywords = NULL;
+    p->n_keyword_lists = -1;
+    p->tokens = PyMem_Malloc(sizeof(Token *));
+    if (!p->tokens) {
+        PyMem_Free(p);
+        return (Parser *) PyErr_NoMemory();
+    }
+    p->tokens[0] = PyMem_Calloc(1, sizeof(Token));
+    if (!p->tokens) {
+        PyMem_Free(p->tokens);
+        PyMem_Free(p);
+        return (Parser *) PyErr_NoMemory();
+    }
+    if (!growable_comment_array_init(&p->type_ignore_comments, 10)) {
+        PyMem_Free(p->tokens[0]);
+        PyMem_Free(p->tokens);
+        PyMem_Free(p);
+        return (Parser *) PyErr_NoMemory();
+    }
+
+    p->mark = 0;
+    p->fill = 0;
+    p->size = 1;
+
+    p->errcode = errcode;
+    p->arena = arena;
+    p->start_rule = start_rule;
+    p->parsing_started = 0;
+    p->normalize = NULL;
+    p->error_indicator = 0;
+
+    p->starting_lineno = 0;
+    p->starting_col_offset = 0;
+    p->flags = flags;
+    p->feature_version = feature_version;
+    p->known_err_token = NULL;
+    p->level = 0;
+
+    return p;
+}
+
+void *
+_PyPegen_run_parser(Parser *p)
+{
+    void *res = _PyPegen_parse(p);
+    if (res == NULL) {
+        if (PyErr_Occurred()) {
+            return NULL;
+        }
+        if (p->fill == 0) {
+            RAISE_SYNTAX_ERROR("error at start before reading any input");
+        }
+        else if (p->tok->done == E_EOF) {
+            RAISE_SYNTAX_ERROR("unexpected EOF while parsing");
+        }
+        else {
+            if (p->tokens[p->fill-1]->type == INDENT) {
+                RAISE_INDENTATION_ERROR("unexpected indent");
+            }
+            else if (p->tokens[p->fill-1]->type == DEDENT) {
+                RAISE_INDENTATION_ERROR("unexpected unindent");
+            }
+            else {
+                RAISE_SYNTAX_ERROR("invalid syntax");
+            }
+        }
+        return NULL;
+    }
+
+    if (p->start_rule == Py_single_input && bad_single_statement(p)) {
+        p->tok->done = E_BADSINGLE; // This is not necessary for now, but might be in the future
+        return RAISE_SYNTAX_ERROR("multiple statements found while compiling a single statement");
+    }
+
+    return res;
+}
+
+mod_ty
+_PyPegen_run_parser_from_file_pointer(FILE *fp, int start_rule, PyObject *filename_ob,
+                             const char *enc, const char *ps1, const char *ps2,
+                             PyCompilerFlags *flags, int *errcode, PyArena *arena)
+{
+    struct tok_state *tok = PyTokenizer_FromFile(fp, enc, ps1, ps2);
+    if (tok == NULL) {
+        if (PyErr_Occurred()) {
+            raise_tokenizer_init_error(filename_ob);
+            return NULL;
+        }
+        return NULL;
+    }
+    // This transfers the ownership to the tokenizer
+    tok->filename = filename_ob;
+    Py_INCREF(filename_ob);
+
+    // From here on we need to clean up even if there's an error
+    mod_ty result = NULL;
+
+    int parser_flags = compute_parser_flags(flags);
+    Parser *p = _PyPegen_Parser_New(tok, start_rule, parser_flags, PY_MINOR_VERSION,
+                                    errcode, arena);
+    if (p == NULL) {
+        goto error;
+    }
+
+    result = _PyPegen_run_parser(p);
+    _PyPegen_Parser_Free(p);
+
+error:
+    PyTokenizer_Free(tok);
+    return result;
+}
+
+mod_ty
+_PyPegen_run_parser_from_file(const char *filename, int start_rule,
+                     PyObject *filename_ob, PyCompilerFlags *flags, PyArena *arena)
+{
+    FILE *fp = fopen(filename, "rb");
+    if (fp == NULL) {
+        PyErr_SetFromErrnoWithFilename(PyExc_OSError, filename);
+        return NULL;
+    }
+
+    mod_ty result = _PyPegen_run_parser_from_file_pointer(fp, start_rule, filename_ob,
+                                                 NULL, NULL, NULL, flags, NULL, arena);
+
+    fclose(fp);
+    return result;
+}
+
+mod_ty
+_PyPegen_run_parser_from_string(const char *str, int start_rule, PyObject *filename_ob,
+                       PyCompilerFlags *flags, PyArena *arena)
+{
+    int exec_input = start_rule == Py_file_input;
+
+    struct tok_state *tok;
+    if (flags == NULL || flags->cf_flags & PyCF_IGNORE_COOKIE) {
+        tok = PyTokenizer_FromUTF8(str, exec_input);
+    } else {
+        tok = PyTokenizer_FromString(str, exec_input);
+    }
+    if (tok == NULL) {
+        if (PyErr_Occurred()) {
+            raise_tokenizer_init_error(filename_ob);
+        }
+        return NULL;
+    }
+    // This transfers the ownership to the tokenizer
+    tok->filename = filename_ob;
+    Py_INCREF(filename_ob);
+
+    // We need to clear up from here on
+    mod_ty result = NULL;
+
+    int parser_flags = compute_parser_flags(flags);
+    int feature_version = flags ? flags->cf_feature_version : PY_MINOR_VERSION;
+    Parser *p = _PyPegen_Parser_New(tok, start_rule, parser_flags, feature_version,
+                                    NULL, arena);
+    if (p == NULL) {
+        goto error;
+    }
+
+    result = _PyPegen_run_parser(p);
+    _PyPegen_Parser_Free(p);
+
+error:
+    PyTokenizer_Free(tok);
+    return result;
+}
+
+void *
+_PyPegen_interactive_exit(Parser *p)
+{
+    if (p->errcode) {
+        *(p->errcode) = E_EOF;
+    }
+    return NULL;
+}
+
+/* Creates a single-element asdl_seq* that contains a */
+asdl_seq *
+_PyPegen_singleton_seq(Parser *p, void *a)
+{
+    assert(a != NULL);
+    asdl_seq *seq = _Py_asdl_seq_new(1, p->arena);
+    if (!seq) {
+        return NULL;
+    }
+    asdl_seq_SET(seq, 0, a);
+    return seq;
+}
+
+/* Creates a copy of seq and prepends a to it */
+asdl_seq *
+_PyPegen_seq_insert_in_front(Parser *p, void *a, asdl_seq *seq)
+{
+    assert(a != NULL);
+    if (!seq) {
+        return _PyPegen_singleton_seq(p, a);
+    }
+
+    asdl_seq *new_seq = _Py_asdl_seq_new(asdl_seq_LEN(seq) + 1, p->arena);
+    if (!new_seq) {
+        return NULL;
+    }
+
+    asdl_seq_SET(new_seq, 0, a);
+    for (Py_ssize_t i = 1, l = asdl_seq_LEN(new_seq); i < l; i++) {
+        asdl_seq_SET(new_seq, i, asdl_seq_GET(seq, i - 1));
+    }
+    return new_seq;
+}
+
+/* Creates a copy of seq and appends a to it */
+asdl_seq *
+_PyPegen_seq_append_to_end(Parser *p, asdl_seq *seq, void *a)
+{
+    assert(a != NULL);
+    if (!seq) {
+        return _PyPegen_singleton_seq(p, a);
+    }
+
+    asdl_seq *new_seq = _Py_asdl_seq_new(asdl_seq_LEN(seq) + 1, p->arena);
+    if (!new_seq) {
+        return NULL;
+    }
+
+    for (Py_ssize_t i = 0, l = asdl_seq_LEN(new_seq); i + 1 < l; i++) {
+        asdl_seq_SET(new_seq, i, asdl_seq_GET(seq, i));
+    }
+    asdl_seq_SET(new_seq, asdl_seq_LEN(new_seq) - 1, a);
+    return new_seq;
+}
+
+static Py_ssize_t
+_get_flattened_seq_size(asdl_seq *seqs)
+{
+    Py_ssize_t size = 0;
+    for (Py_ssize_t i = 0, l = asdl_seq_LEN(seqs); i < l; i++) {
+        asdl_seq *inner_seq = asdl_seq_GET(seqs, i);
+        size += asdl_seq_LEN(inner_seq);
+    }
+    return size;
+}
+
+/* Flattens an asdl_seq* of asdl_seq*s */
+asdl_seq *
+_PyPegen_seq_flatten(Parser *p, asdl_seq *seqs)
+{
+    Py_ssize_t flattened_seq_size = _get_flattened_seq_size(seqs);
+    assert(flattened_seq_size > 0);
+
+    asdl_seq *flattened_seq = _Py_asdl_seq_new(flattened_seq_size, p->arena);
+    if (!flattened_seq) {
+        return NULL;
+    }
+
+    int flattened_seq_idx = 0;
+    for (Py_ssize_t i = 0, l = asdl_seq_LEN(seqs); i < l; i++) {
+        asdl_seq *inner_seq = asdl_seq_GET(seqs, i);
+        for (Py_ssize_t j = 0, li = asdl_seq_LEN(inner_seq); j < li; j++) {
+            asdl_seq_SET(flattened_seq, flattened_seq_idx++, asdl_seq_GET(inner_seq, j));
+        }
+    }
+    assert(flattened_seq_idx == flattened_seq_size);
+
+    return flattened_seq;
+}
+
+/* Creates a new name of the form <first_name>.<second_name> */
+expr_ty
+_PyPegen_join_names_with_dot(Parser *p, expr_ty first_name, expr_ty second_name)
+{
+    assert(first_name != NULL && second_name != NULL);
+    PyObject *first_identifier = first_name->v.Name.id;
+    PyObject *second_identifier = second_name->v.Name.id;
+
+    if (PyUnicode_READY(first_identifier) == -1) {
+        return NULL;
+    }
+    if (PyUnicode_READY(second_identifier) == -1) {
+        return NULL;
+    }
+    const char *first_str = PyUnicode_AsUTF8(first_identifier);
+    if (!first_str) {
+        return NULL;
+    }
+    const char *second_str = PyUnicode_AsUTF8(second_identifier);
+    if (!second_str) {
+        return NULL;
+    }
+    Py_ssize_t len = strlen(first_str) + strlen(second_str) + 1;  // +1 for the dot
+
+    PyObject *str = PyBytes_FromStringAndSize(NULL, len);
+    if (!str) {
+        return NULL;
+    }
+
+    char *s = PyBytes_AS_STRING(str);
+    if (!s) {
+        return NULL;
+    }
+
+    strcpy(s, first_str);
+    s += strlen(first_str);
+    *s++ = '.';
+    strcpy(s, second_str);
+    s += strlen(second_str);
+    *s = '\0';
+
+    PyObject *uni = PyUnicode_DecodeUTF8(PyBytes_AS_STRING(str), PyBytes_GET_SIZE(str), NULL);
+    Py_DECREF(str);
+    if (!uni) {
+        return NULL;
+    }
+    PyUnicode_InternInPlace(&uni);
+    if (PyArena_AddPyObject(p->arena, uni) < 0) {
+        Py_DECREF(uni);
+        return NULL;
+    }
+
+    return _Py_Name(uni, Load, EXTRA_EXPR(first_name, second_name));
+}
+
+/* Counts the total number of dots in seq's tokens */
+int
+_PyPegen_seq_count_dots(asdl_seq *seq)
+{
+    int number_of_dots = 0;
+    for (Py_ssize_t i = 0, l = asdl_seq_LEN(seq); i < l; i++) {
+        Token *current_expr = asdl_seq_GET(seq, i);
+        switch (current_expr->type) {
+            case ELLIPSIS:
+                number_of_dots += 3;
+                break;
+            case DOT:
+                number_of_dots += 1;
+                break;
+            default:
+                Py_UNREACHABLE();
+        }
+    }
+
+    return number_of_dots;
+}
+
+/* Creates an alias with '*' as the identifier name */
+alias_ty
+_PyPegen_alias_for_star(Parser *p)
+{
+    PyObject *str = PyUnicode_InternFromString("*");
+    if (!str) {
+        return NULL;
+    }
+    if (PyArena_AddPyObject(p->arena, str) < 0) {
+        Py_DECREF(str);
+        return NULL;
+    }
+    return alias(str, NULL, p->arena);
+}
+
+/* Creates a new asdl_seq* with the identifiers of all the names in seq */
+asdl_seq *
+_PyPegen_map_names_to_ids(Parser *p, asdl_seq *seq)
+{
+    Py_ssize_t len = asdl_seq_LEN(seq);
+    assert(len > 0);
+
+    asdl_seq *new_seq = _Py_asdl_seq_new(len, p->arena);
+    if (!new_seq) {
+        return NULL;
+    }
+    for (Py_ssize_t i = 0; i < len; i++) {
+        expr_ty e = asdl_seq_GET(seq, i);
+        asdl_seq_SET(new_seq, i, e->v.Name.id);
+    }
+    return new_seq;
+}
+
+/* Constructs a CmpopExprPair */
+CmpopExprPair *
+_PyPegen_cmpop_expr_pair(Parser *p, cmpop_ty cmpop, expr_ty expr)
+{
+    assert(expr != NULL);
+    CmpopExprPair *a = PyArena_Malloc(p->arena, sizeof(CmpopExprPair));
+    if (!a) {
+        return NULL;
+    }
+    a->cmpop = cmpop;
+    a->expr = expr;
+    return a;
+}
+
+asdl_int_seq *
+_PyPegen_get_cmpops(Parser *p, asdl_seq *seq)
+{
+    Py_ssize_t len = asdl_seq_LEN(seq);
+    assert(len > 0);
+
+    asdl_int_seq *new_seq = _Py_asdl_int_seq_new(len, p->arena);
+    if (!new_seq) {
+        return NULL;
+    }
+    for (Py_ssize_t i = 0; i < len; i++) {
+        CmpopExprPair *pair = asdl_seq_GET(seq, i);
+        asdl_seq_SET(new_seq, i, pair->cmpop);
+    }
+    return new_seq;
+}
+
+asdl_seq *
+_PyPegen_get_exprs(Parser *p, asdl_seq *seq)
+{
+    Py_ssize_t len = asdl_seq_LEN(seq);
+    assert(len > 0);
+
+    asdl_seq *new_seq = _Py_asdl_seq_new(len, p->arena);
+    if (!new_seq) {
+        return NULL;
+    }
+    for (Py_ssize_t i = 0; i < len; i++) {
+        CmpopExprPair *pair = asdl_seq_GET(seq, i);
+        asdl_seq_SET(new_seq, i, pair->expr);
+    }
+    return new_seq;
+}
+
+/* Creates an asdl_seq* where all the elements have been changed to have ctx as context */
+static asdl_seq *
+_set_seq_context(Parser *p, asdl_seq *seq, expr_context_ty ctx)
+{
+    Py_ssize_t len = asdl_seq_LEN(seq);
+    if (len == 0) {
+        return NULL;
+    }
+
+    asdl_seq *new_seq = _Py_asdl_seq_new(len, p->arena);
+    if (!new_seq) {
+        return NULL;
+    }
+    for (Py_ssize_t i = 0; i < len; i++) {
+        expr_ty e = asdl_seq_GET(seq, i);
+        asdl_seq_SET(new_seq, i, _PyPegen_set_expr_context(p, e, ctx));
+    }
+    return new_seq;
+}
+
+static expr_ty
+_set_name_context(Parser *p, expr_ty e, expr_context_ty ctx)
+{
+    return _Py_Name(e->v.Name.id, ctx, EXTRA_EXPR(e, e));
+}
+
+static expr_ty
+_set_tuple_context(Parser *p, expr_ty e, expr_context_ty ctx)
+{
+    return _Py_Tuple(_set_seq_context(p, e->v.Tuple.elts, ctx), ctx, EXTRA_EXPR(e, e));
+}
+
+static expr_ty
+_set_list_context(Parser *p, expr_ty e, expr_context_ty ctx)
+{
+    return _Py_List(_set_seq_context(p, e->v.List.elts, ctx), ctx, EXTRA_EXPR(e, e));
+}
+
+static expr_ty
+_set_subscript_context(Parser *p, expr_ty e, expr_context_ty ctx)
+{
+    return _Py_Subscript(e->v.Subscript.value, e->v.Subscript.slice, ctx, EXTRA_EXPR(e, e));
+}
+
+static expr_ty
+_set_attribute_context(Parser *p, expr_ty e, expr_context_ty ctx)
+{
+    return _Py_Attribute(e->v.Attribute.value, e->v.Attribute.attr, ctx, EXTRA_EXPR(e, e));
+}
+
+static expr_ty
+_set_starred_context(Parser *p, expr_ty e, expr_context_ty ctx)
+{
+    return _Py_Starred(_PyPegen_set_expr_context(p, e->v.Starred.value, ctx), ctx, EXTRA_EXPR(e, e));
+}
+
+/* Creates an `expr_ty` equivalent to `expr` but with `ctx` as context */
+expr_ty
+_PyPegen_set_expr_context(Parser *p, expr_ty expr, expr_context_ty ctx)
+{
+    assert(expr != NULL);
+
+    expr_ty new = NULL;
+    switch (expr->kind) {
+        case Name_kind:
+            new = _set_name_context(p, expr, ctx);
+            break;
+        case Tuple_kind:
+            new = _set_tuple_context(p, expr, ctx);
+            break;
+        case List_kind:
+            new = _set_list_context(p, expr, ctx);
+            break;
+        case Subscript_kind:
+            new = _set_subscript_context(p, expr, ctx);
+            break;
+        case Attribute_kind:
+            new = _set_attribute_context(p, expr, ctx);
+            break;
+        case Starred_kind:
+            new = _set_starred_context(p, expr, ctx);
+            break;
+        default:
+            new = expr;
+    }
+    return new;
+}
+
+/* Constructs a KeyValuePair that is used when parsing a dict's key value pairs */
+KeyValuePair *
+_PyPegen_key_value_pair(Parser *p, expr_ty key, expr_ty value)
+{
+    KeyValuePair *a = PyArena_Malloc(p->arena, sizeof(KeyValuePair));
+    if (!a) {
+        return NULL;
+    }
+    a->key = key;
+    a->value = value;
+    return a;
+}
+
+/* Extracts all keys from an asdl_seq* of KeyValuePair*'s */
+asdl_seq *
+_PyPegen_get_keys(Parser *p, asdl_seq *seq)
+{
+    Py_ssize_t len = asdl_seq_LEN(seq);
+    asdl_seq *new_seq = _Py_asdl_seq_new(len, p->arena);
+    if (!new_seq) {
+        return NULL;
+    }
+    for (Py_ssize_t i = 0; i < len; i++) {
+        KeyValuePair *pair = asdl_seq_GET(seq, i);
+        asdl_seq_SET(new_seq, i, pair->key);
+    }
+    return new_seq;
+}
+
+/* Extracts all values from an asdl_seq* of KeyValuePair*'s */
+asdl_seq *
+_PyPegen_get_values(Parser *p, asdl_seq *seq)
+{
+    Py_ssize_t len = asdl_seq_LEN(seq);
+    asdl_seq *new_seq = _Py_asdl_seq_new(len, p->arena);
+    if (!new_seq) {
+        return NULL;
+    }
+    for (Py_ssize_t i = 0; i < len; i++) {
+        KeyValuePair *pair = asdl_seq_GET(seq, i);
+        asdl_seq_SET(new_seq, i, pair->value);
+    }
+    return new_seq;
+}
+
+/* Constructs a NameDefaultPair */
+NameDefaultPair *
+_PyPegen_name_default_pair(Parser *p, arg_ty arg, expr_ty value, Token *tc)
+{
+    NameDefaultPair *a = PyArena_Malloc(p->arena, sizeof(NameDefaultPair));
+    if (!a) {
+        return NULL;
+    }
+    a->arg = _PyPegen_add_type_comment_to_arg(p, arg, tc);
+    a->value = value;
+    return a;
+}
+
+/* Constructs a SlashWithDefault */
+SlashWithDefault *
+_PyPegen_slash_with_default(Parser *p, asdl_seq *plain_names, asdl_seq *names_with_defaults)
+{
+    SlashWithDefault *a = PyArena_Malloc(p->arena, sizeof(SlashWithDefault));
+    if (!a) {
+        return NULL;
+    }
+    a->plain_names = plain_names;
+    a->names_with_defaults = names_with_defaults;
+    return a;
+}
+
+/* Constructs a StarEtc */
+StarEtc *
+_PyPegen_star_etc(Parser *p, arg_ty vararg, asdl_seq *kwonlyargs, arg_ty kwarg)
+{
+    StarEtc *a = PyArena_Malloc(p->arena, sizeof(StarEtc));
+    if (!a) {
+        return NULL;
+    }
+    a->vararg = vararg;
+    a->kwonlyargs = kwonlyargs;
+    a->kwarg = kwarg;
+    return a;
+}
+
+asdl_seq *
+_PyPegen_join_sequences(Parser *p, asdl_seq *a, asdl_seq *b)
+{
+    Py_ssize_t first_len = asdl_seq_LEN(a);
+    Py_ssize_t second_len = asdl_seq_LEN(b);
+    asdl_seq *new_seq = _Py_asdl_seq_new(first_len + second_len, p->arena);
+    if (!new_seq) {
+        return NULL;
+    }
+
+    int k = 0;
+    for (Py_ssize_t i = 0; i < first_len; i++) {
+        asdl_seq_SET(new_seq, k++, asdl_seq_GET(a, i));
+    }
+    for (Py_ssize_t i = 0; i < second_len; i++) {
+        asdl_seq_SET(new_seq, k++, asdl_seq_GET(b, i));
+    }
+
+    return new_seq;
+}
+
+static asdl_seq *
+_get_names(Parser *p, asdl_seq *names_with_defaults)
+{
+    Py_ssize_t len = asdl_seq_LEN(names_with_defaults);
+    asdl_seq *seq = _Py_asdl_seq_new(len, p->arena);
+    if (!seq) {
+        return NULL;
+    }
+    for (Py_ssize_t i = 0; i < len; i++) {
+        NameDefaultPair *pair = asdl_seq_GET(names_with_defaults, i);
+        asdl_seq_SET(seq, i, pair->arg);
+    }
+    return seq;
+}
+
+static asdl_seq *
+_get_defaults(Parser *p, asdl_seq *names_with_defaults)
+{
+    Py_ssize_t len = asdl_seq_LEN(names_with_defaults);
+    asdl_seq *seq = _Py_asdl_seq_new(len, p->arena);
+    if (!seq) {
+        return NULL;
+    }
+    for (Py_ssize_t i = 0; i < len; i++) {
+        NameDefaultPair *pair = asdl_seq_GET(names_with_defaults, i);
+        asdl_seq_SET(seq, i, pair->value);
+    }
+    return seq;
+}
+
+/* Constructs an arguments_ty object out of all the parsed constructs in the parameters rule */
+arguments_ty
+_PyPegen_make_arguments(Parser *p, asdl_seq *slash_without_default,
+                        SlashWithDefault *slash_with_default, asdl_seq *plain_names,
+                        asdl_seq *names_with_default, StarEtc *star_etc)
+{
+    asdl_seq *posonlyargs;
+    if (slash_without_default != NULL) {
+        posonlyargs = slash_without_default;
+    }
+    else if (slash_with_default != NULL) {
+        asdl_seq *slash_with_default_names =
+            _get_names(p, slash_with_default->names_with_defaults);
+        if (!slash_with_default_names) {
+            return NULL;
+        }
+        posonlyargs = _PyPegen_join_sequences(p, slash_with_default->plain_names, slash_with_default_names);
+        if (!posonlyargs) {
+            return NULL;
+        }
+    }
+    else {
+        posonlyargs = _Py_asdl_seq_new(0, p->arena);
+        if (!posonlyargs) {
+            return NULL;
+        }
+    }
+
+    asdl_seq *posargs;
+    if (plain_names != NULL && names_with_default != NULL) {
+        asdl_seq *names_with_default_names = _get_names(p, names_with_default);
+        if (!names_with_default_names) {
+            return NULL;
+        }
+        posargs = _PyPegen_join_sequences(p, plain_names, names_with_default_names);
+        if (!posargs) {
+            return NULL;
+        }
+    }
+    else if (plain_names == NULL && names_with_default != NULL) {
+        posargs = _get_names(p, names_with_default);
+        if (!posargs) {
+            return NULL;
+        }
+    }
+    else if (plain_names != NULL && names_with_default == NULL) {
+        posargs = plain_names;
+    }
+    else {
+        posargs = _Py_asdl_seq_new(0, p->arena);
+        if (!posargs) {
+            return NULL;
+        }
+    }
+
+    asdl_seq *posdefaults;
+    if (slash_with_default != NULL && names_with_default != NULL) {
+        asdl_seq *slash_with_default_values =
+            _get_defaults(p, slash_with_default->names_with_defaults);
+        if (!slash_with_default_values) {
+            return NULL;
+        }
+        asdl_seq *names_with_default_values = _get_defaults(p, names_with_default);
+        if (!names_with_default_values) {
+            return NULL;
+        }
+        posdefaults = _PyPegen_join_sequences(p, slash_with_default_values, names_with_default_values);
+        if (!posdefaults) {
+            return NULL;
+        }
+    }
+    else if (slash_with_default == NULL && names_with_default != NULL) {
+        posdefaults = _get_defaults(p, names_with_default);
+        if (!posdefaults) {
+            return NULL;
+        }
+    }
+    else if (slash_with_default != NULL && names_with_default == NULL) {
+        posdefaults = _get_defaults(p, slash_with_default->names_with_defaults);
+        if (!posdefaults) {
+            return NULL;
+        }
+    }
+    else {
+        posdefaults = _Py_asdl_seq_new(0, p->arena);
+        if (!posdefaults) {
+            return NULL;
+        }
+    }
+
+    arg_ty vararg = NULL;
+    if (star_etc != NULL && star_etc->vararg != NULL) {
+        vararg = star_etc->vararg;
+    }
+
+    asdl_seq *kwonlyargs;
+    if (star_etc != NULL && star_etc->kwonlyargs != NULL) {
+        kwonlyargs = _get_names(p, star_etc->kwonlyargs);
+        if (!kwonlyargs) {
+            return NULL;
+        }
+    }
+    else {
+        kwonlyargs = _Py_asdl_seq_new(0, p->arena);
+        if (!kwonlyargs) {
+            return NULL;
+        }
+    }
+
+    asdl_seq *kwdefaults;
+    if (star_etc != NULL && star_etc->kwonlyargs != NULL) {
+        kwdefaults = _get_defaults(p, star_etc->kwonlyargs);
+        if (!kwdefaults) {
+            return NULL;
+        }
+    }
+    else {
+        kwdefaults = _Py_asdl_seq_new(0, p->arena);
+        if (!kwdefaults) {
+            return NULL;
+        }
+    }
+
+    arg_ty kwarg = NULL;
+    if (star_etc != NULL && star_etc->kwarg != NULL) {
+        kwarg = star_etc->kwarg;
+    }
+
+    return _Py_arguments(posonlyargs, posargs, vararg, kwonlyargs, kwdefaults, kwarg,
+                         posdefaults, p->arena);
+}
+
+/* Constructs an empty arguments_ty object, that gets used when a function accepts no
+ * arguments. */
+arguments_ty
+_PyPegen_empty_arguments(Parser *p)
+{
+    asdl_seq *posonlyargs = _Py_asdl_seq_new(0, p->arena);
+    if (!posonlyargs) {
+        return NULL;
+    }
+    asdl_seq *posargs = _Py_asdl_seq_new(0, p->arena);
+    if (!posargs) {
+        return NULL;
+    }
+    asdl_seq *posdefaults = _Py_asdl_seq_new(0, p->arena);
+    if (!posdefaults) {
+        return NULL;
+    }
+    asdl_seq *kwonlyargs = _Py_asdl_seq_new(0, p->arena);
+    if (!kwonlyargs) {
+        return NULL;
+    }
+    asdl_seq *kwdefaults = _Py_asdl_seq_new(0, p->arena);
+    if (!kwdefaults) {
+        return NULL;
+    }
+
+    return _Py_arguments(posonlyargs, posargs, NULL, kwonlyargs, kwdefaults, NULL, kwdefaults,
+                         p->arena);
+}
+
+/* Encapsulates the value of an operator_ty into an AugOperator struct */
+AugOperator *
+_PyPegen_augoperator(Parser *p, operator_ty kind)
+{
+    AugOperator *a = PyArena_Malloc(p->arena, sizeof(AugOperator));
+    if (!a) {
+        return NULL;
+    }
+    a->kind = kind;
+    return a;
+}
+
+/* Construct a FunctionDef equivalent to function_def, but with decorators */
+stmt_ty
+_PyPegen_function_def_decorators(Parser *p, asdl_seq *decorators, stmt_ty function_def)
+{
+    assert(function_def != NULL);
+    if (function_def->kind == AsyncFunctionDef_kind) {
+        return _Py_AsyncFunctionDef(
+            function_def->v.FunctionDef.name, function_def->v.FunctionDef.args,
+            function_def->v.FunctionDef.body, decorators, function_def->v.FunctionDef.returns,
+            function_def->v.FunctionDef.type_comment, function_def->lineno,
+            function_def->col_offset, function_def->end_lineno, function_def->end_col_offset,
+            p->arena);
+    }
+
+    return _Py_FunctionDef(function_def->v.FunctionDef.name, function_def->v.FunctionDef.args,
+                           function_def->v.FunctionDef.body, decorators,
+                           function_def->v.FunctionDef.returns,
+                           function_def->v.FunctionDef.type_comment, function_def->lineno,
+                           function_def->col_offset, function_def->end_lineno,
+                           function_def->end_col_offset, p->arena);
+}
+
+/* Construct a ClassDef equivalent to class_def, but with decorators */
+stmt_ty
+_PyPegen_class_def_decorators(Parser *p, asdl_seq *decorators, stmt_ty class_def)
+{
+    assert(class_def != NULL);
+    return _Py_ClassDef(class_def->v.ClassDef.name, class_def->v.ClassDef.bases,
+                        class_def->v.ClassDef.keywords, class_def->v.ClassDef.body, decorators,
+                        class_def->lineno, class_def->col_offset, class_def->end_lineno,
+                        class_def->end_col_offset, p->arena);
+}
+
+/* Construct a KeywordOrStarred */
+KeywordOrStarred *
+_PyPegen_keyword_or_starred(Parser *p, void *element, int is_keyword)
+{
+    KeywordOrStarred *a = PyArena_Malloc(p->arena, sizeof(KeywordOrStarred));
+    if (!a) {
+        return NULL;
+    }
+    a->element = element;
+    a->is_keyword = is_keyword;
+    return a;
+}
+
+/* Get the number of starred expressions in an asdl_seq* of KeywordOrStarred*s */
+static int
+_seq_number_of_starred_exprs(asdl_seq *seq)
+{
+    int n = 0;
+    for (Py_ssize_t i = 0, l = asdl_seq_LEN(seq); i < l; i++) {
+        KeywordOrStarred *k = asdl_seq_GET(seq, i);
+        if (!k->is_keyword) {
+            n++;
+        }
+    }
+    return n;
+}
+
+/* Extract the starred expressions of an asdl_seq* of KeywordOrStarred*s */
+asdl_seq *
+_PyPegen_seq_extract_starred_exprs(Parser *p, asdl_seq *kwargs)
+{
+    int new_len = _seq_number_of_starred_exprs(kwargs);
+    if (new_len == 0) {
+        return NULL;
+    }
+    asdl_seq *new_seq = _Py_asdl_seq_new(new_len, p->arena);
+    if (!new_seq) {
+        return NULL;
+    }
+
+    int idx = 0;
+    for (Py_ssize_t i = 0, len = asdl_seq_LEN(kwargs); i < len; i++) {
+        KeywordOrStarred *k = asdl_seq_GET(kwargs, i);
+        if (!k->is_keyword) {
+            asdl_seq_SET(new_seq, idx++, k->element);
+        }
+    }
+    return new_seq;
+}
+
+/* Return a new asdl_seq* with only the keywords in kwargs */
+asdl_seq *
+_PyPegen_seq_delete_starred_exprs(Parser *p, asdl_seq *kwargs)
+{
+    Py_ssize_t len = asdl_seq_LEN(kwargs);
+    Py_ssize_t new_len = len - _seq_number_of_starred_exprs(kwargs);
+    if (new_len == 0) {
+        return NULL;
+    }
+    asdl_seq *new_seq = _Py_asdl_seq_new(new_len, p->arena);
+    if (!new_seq) {
+        return NULL;
+    }
+
+    int idx = 0;
+    for (Py_ssize_t i = 0; i < len; i++) {
+        KeywordOrStarred *k = asdl_seq_GET(kwargs, i);
+        if (k->is_keyword) {
+            asdl_seq_SET(new_seq, idx++, k->element);
+        }
+    }
+    return new_seq;
+}
+
+expr_ty
+_PyPegen_concatenate_strings(Parser *p, asdl_seq *strings)
+{
+    Py_ssize_t len = asdl_seq_LEN(strings);
+    assert(len > 0);
+
+    Token *first = asdl_seq_GET(strings, 0);
+    Token *last = asdl_seq_GET(strings, len - 1);
+
+    int bytesmode = 0;
+    PyObject *bytes_str = NULL;
+
+    FstringParser state;
+    _PyPegen_FstringParser_Init(&state);
+
+    for (Py_ssize_t i = 0; i < len; i++) {
+        Token *t = asdl_seq_GET(strings, i);
+
+        int this_bytesmode;
+        int this_rawmode;
+        PyObject *s;
+        const char *fstr;
+        Py_ssize_t fstrlen = -1;
+
+        if (_PyPegen_parsestr(p, &this_bytesmode, &this_rawmode, &s, &fstr, &fstrlen, t) != 0) {
+            goto error;
+        }
+
+        /* Check that we are not mixing bytes with unicode. */
+        if (i != 0 && bytesmode != this_bytesmode) {
+            RAISE_SYNTAX_ERROR("cannot mix bytes and nonbytes literals");
+            Py_XDECREF(s);
+            goto error;
+        }
+        bytesmode = this_bytesmode;
+
+        if (fstr != NULL) {
+            assert(s == NULL && !bytesmode);
+
+            int result = _PyPegen_FstringParser_ConcatFstring(p, &state, &fstr, fstr + fstrlen,
+                                                     this_rawmode, 0, first, t, last);
+            if (result < 0) {
+                goto error;
+            }
+        }
+        else {
+            /* String or byte string. */
+            assert(s != NULL && fstr == NULL);
+            assert(bytesmode ? PyBytes_CheckExact(s) : PyUnicode_CheckExact(s));
+
+            if (bytesmode) {
+                if (i == 0) {
+                    bytes_str = s;
+                }
+                else {
+                    PyBytes_ConcatAndDel(&bytes_str, s);
+                    if (!bytes_str) {
+                        goto error;
+                    }
+                }
+            }
+            else {
+                /* This is a regular string. Concatenate it. */
+                if (_PyPegen_FstringParser_ConcatAndDel(&state, s) < 0) {
+                    goto error;
+                }
+            }
+        }
+    }
+
+    if (bytesmode) {
+        if (PyArena_AddPyObject(p->arena, bytes_str) < 0) {
+            goto error;
+        }
+        return Constant(bytes_str, NULL, first->lineno, first->col_offset, last->end_lineno,
+                        last->end_col_offset, p->arena);
+    }
+
+    return _PyPegen_FstringParser_Finish(p, &state, first, last);
+
+error:
+    Py_XDECREF(bytes_str);
+    _PyPegen_FstringParser_Dealloc(&state);
+    if (PyErr_Occurred()) {
+        raise_decode_error(p);
+    }
+    return NULL;
+}
+
+mod_ty
+_PyPegen_make_module(Parser *p, asdl_seq *a) {
+    asdl_seq *type_ignores = NULL;
+    Py_ssize_t num = p->type_ignore_comments.num_items;
+    if (num > 0) {
+        // Turn the raw (comment, lineno) pairs into TypeIgnore objects in the arena
+        type_ignores = _Py_asdl_seq_new(num, p->arena);
+        if (type_ignores == NULL) {
+            return NULL;
+        }
+        for (int i = 0; i < num; i++) {
+            PyObject *tag = _PyPegen_new_type_comment(p, p->type_ignore_comments.items[i].comment);
+            if (tag == NULL) {
+                return NULL;
+            }
+            type_ignore_ty ti = TypeIgnore(p->type_ignore_comments.items[i].lineno, tag, p->arena);
+            if (ti == NULL) {
+                return NULL;
+            }
+            asdl_seq_SET(type_ignores, i, ti);
+        }
+    }
+    return Module(a, type_ignores, p->arena);
+}
+
+// Error reporting helpers
+
+expr_ty
+_PyPegen_get_invalid_target(expr_ty e)
+{
+    if (e == NULL) {
+        return NULL;
+    }
+
+#define VISIT_CONTAINER(CONTAINER, TYPE) do { \
+        Py_ssize_t len = asdl_seq_LEN(CONTAINER->v.TYPE.elts);\
+        for (Py_ssize_t i = 0; i < len; i++) {\
+            expr_ty other = asdl_seq_GET(CONTAINER->v.TYPE.elts, i);\
+            expr_ty child = _PyPegen_get_invalid_target(other);\
+            if (child != NULL) {\
+                return child;\
+            }\
+        }\
+    } while (0)
+
+    // We only need to visit List and Tuple nodes recursively as those
+    // are the only ones that can contain valid names in targets when
+    // they are parsed as expressions. Any other kind of expression
+    // that is a container (like Sets or Dicts) is directly invalid and
+    // we don't need to visit it recursively.
+
+    switch (e->kind) {
+        case List_kind: {
+            VISIT_CONTAINER(e, List);
+            return NULL;
+        }
+        case Tuple_kind: {
+            VISIT_CONTAINER(e, Tuple);
+            return NULL;
+        }
+        case Starred_kind:
+            return _PyPegen_get_invalid_target(e->v.Starred.value);
+        case Name_kind:
+        case Subscript_kind:
+        case Attribute_kind:
+            return NULL;
+        default:
+            return e;
+    }
+}
+
+void *_PyPegen_arguments_parsing_error(Parser *p, expr_ty e) {
+    int kwarg_unpacking = 0;
+    for (Py_ssize_t i = 0, l = asdl_seq_LEN(e->v.Call.keywords); i < l; i++) {
+        keyword_ty keyword = asdl_seq_GET(e->v.Call.keywords, i);
+        if (!keyword->arg) {
+            kwarg_unpacking = 1;
+        }
+    }
+
+    const char *msg = NULL;
+    if (kwarg_unpacking) {
+        msg = "positional argument follows keyword argument unpacking";
+    } else {
+        msg = "positional argument follows keyword argument";
+    }
+
+    return RAISE_SYNTAX_ERROR(msg);
+}
+
+void *
+_PyPegen_nonparen_genexp_in_call(Parser *p, expr_ty args)
+{
+    /* The rule that calls this function is 'args for_if_clauses'.
+       For the input f(L, x for x in y), L and x are in args and
+       the for is parsed as a for_if_clause. We have to check if
+       len <= 1, so that input like dict((a, b) for a, b in x)
+       gets successfully parsed and then we pass the last
+       argument (x in the above example) as the location of the
+       error */
+    Py_ssize_t len = asdl_seq_LEN(args->v.Call.args);
+    if (len <= 1) {
+        return NULL;
+    }
+
+    return RAISE_SYNTAX_ERROR_KNOWN_LOCATION(
+        (expr_ty) asdl_seq_GET(args->v.Call.args, len - 1),
+        "Generator expression must be parenthesized"
+    );
+}