bpo-35808: Retire pgen and use pgen2 to generate the parser (GH-11814)

Pgen is the oldest piece of technology in the CPython repository, building it requires various #if[n]def PGEN hacks in other parts of the code and it also depends more and more on CPython internals. This commit removes the old pgen C code and replaces it for a new version implemented in pure Python. This is a modified and adapted version of lib2to3/pgen2 that can generate grammar files compatibles with the current parser. This commit also eliminates all the #ifdef and code branches related to pgen, simplifying the code and making it more maintainable. The regen-grammar step now uses $(PYTHON_FOR_REGEN) that can be any version of the interpreter, so the new pgen code maintains compatibility with older versions of the interpreter (this also allows regenerating the grammar with the current CI solution that uses Python3.5). The new pgen Python module also makes use of the Grammar/Tokens file that holds the token specification, so is always kept in sync and avoids having to maintain duplicate token definitions.
author: Pablo Galindo <Pablogsal@gmail.com> 2019-03-01 23:34:44 (GMT)
committer: GitHub <noreply@github.com> 2019-03-01 23:34:44 (GMT)
commit: 1f24a719e7be5e49b876a5dc7daf21d01ee69faa (patch)
tree: 8f8f56cab78ef671a8cb7f54b8ec2495d9a435e6 /Parser/grammar.c
parent: 7eebbbd5b3907447eddadf5cb7cb1cc9230d15b2 (diff)
download: cpython-1f24a719e7be5e49b876a5dc7daf21d01ee69faa.zip
cpython-1f24a719e7be5e49b876a5dc7daf21d01ee69faa.tar.gz
cpython-1f24a719e7be5e49b876a5dc7daf21d01ee69faa.tar.bz2
1 files changed, 0 insertions, 273 deletions
diff --git a/Parser/grammar.c b/Parser/grammar.c
deleted file mode 100644
index 75fd5b9..0000000
--- a/Parser/grammar.c
+++ /dev/null
@@ -1,273 +0,0 @@
-
-/* Grammar implementation */
-
-#include "Python.h"
-#include "pgenheaders.h"
-
-#include <ctype.h>
-
-#include "token.h"
-#include "grammar.h"
-
-extern int Py_DebugFlag;
-
-grammar *
-newgrammar(int start)
-{
-    grammar *g;
-
-    g = (grammar *)PyObject_MALLOC(sizeof(grammar));
-    if (g == NULL)
-        Py_FatalError("no mem for new grammar");
-    g->g_ndfas = 0;
-    g->g_dfa = NULL;
-    g->g_start = start;
-    g->g_ll.ll_nlabels = 0;
-    g->g_ll.ll_label = NULL;
-    g->g_accel = 0;
-    return g;
-}
-
-void
-freegrammar(grammar *g)
-{
-    int i;
-    for (i = 0; i < g->g_ndfas; i++) {
-        free(g->g_dfa[i].d_name);
-        for (int j = 0; j < g->g_dfa[i].d_nstates; j++)
-            PyObject_FREE(g->g_dfa[i].d_state[j].s_arc);
-        PyObject_FREE(g->g_dfa[i].d_state);
-    }
-    PyObject_FREE(g->g_dfa);
-    for (i = 0; i < g->g_ll.ll_nlabels; i++)
-        free(g->g_ll.ll_label[i].lb_str);
-    PyObject_FREE(g->g_ll.ll_label);
-    PyObject_FREE(g);
-}
-
-dfa *
-adddfa(grammar *g, int type, const char *name)
-{
-    dfa *d;
-
-    g->g_dfa = (dfa *)PyObject_REALLOC(g->g_dfa,
-                                        sizeof(dfa) * (g->g_ndfas + 1));
-    if (g->g_dfa == NULL)
-        Py_FatalError("no mem to resize dfa in adddfa");
-    d = &g->g_dfa[g->g_ndfas++];
-    d->d_type = type;
-    d->d_name = strdup(name);
-    d->d_nstates = 0;
-    d->d_state = NULL;
-    d->d_initial = -1;
-    d->d_first = NULL;
-    return d; /* Only use while fresh! */
-}
-
-int
-addstate(dfa *d)
-{
-    state *s;
-
-    d->d_state = (state *)PyObject_REALLOC(d->d_state,
-                                  sizeof(state) * (d->d_nstates + 1));
-    if (d->d_state == NULL)
-        Py_FatalError("no mem to resize state in addstate");
-    s = &d->d_state[d->d_nstates++];
-    s->s_narcs = 0;
-    s->s_arc = NULL;
-    s->s_lower = 0;
-    s->s_upper = 0;
-    s->s_accel = NULL;
-    s->s_accept = 0;
-    return Py_SAFE_DOWNCAST(s - d->d_state, intptr_t, int);
-}
-
-void
-addarc(dfa *d, int from, int to, int lbl)
-{
-    state *s;
-    arc *a;
-
-    assert(0 <= from && from < d->d_nstates);
-    assert(0 <= to && to < d->d_nstates);
-
-    s = &d->d_state[from];
-    s->s_arc = (arc *)PyObject_REALLOC(s->s_arc, sizeof(arc) * (s->s_narcs + 1));
-    if (s->s_arc == NULL)
-        Py_FatalError("no mem to resize arc list in addarc");
-    a = &s->s_arc[s->s_narcs++];
-    a->a_lbl = lbl;
-    a->a_arrow = to;
-}
-
-int
-addlabel(labellist *ll, int type, const char *str)
-{
-    int i;
-    label *lb;
-
-    for (i = 0; i < ll->ll_nlabels; i++) {
-        if (ll->ll_label[i].lb_type == type &&
-            strcmp(ll->ll_label[i].lb_str, str) == 0)
-            return i;
-    }
-    ll->ll_label = (label *)PyObject_REALLOC(ll->ll_label,
-                                    sizeof(label) * (ll->ll_nlabels + 1));
-    if (ll->ll_label == NULL)
-        Py_FatalError("no mem to resize labellist in addlabel");
-    lb = &ll->ll_label[ll->ll_nlabels++];
-    lb->lb_type = type;
-    lb->lb_str = strdup(str);
-    if (Py_DebugFlag)
-        printf("Label @ %8p, %d: %s\n", ll, ll->ll_nlabels,
-               PyGrammar_LabelRepr(lb));
-    return Py_SAFE_DOWNCAST(lb - ll->ll_label, intptr_t, int);
-}
-
-/* Same, but rather dies than adds */
-
-int
-findlabel(labellist *ll, int type, const char *str)
-{
-    int i;
-
-    for (i = 0; i < ll->ll_nlabels; i++) {
-        if (ll->ll_label[i].lb_type == type /*&&
-            strcmp(ll->ll_label[i].lb_str, str) == 0*/)
-            return i;
-    }
-    fprintf(stderr, "Label %d/'%s' not found\n", type, str);
-    Py_FatalError("grammar.c:findlabel()");
-
-    /* Py_FatalError() is declared with __attribute__((__noreturn__)).
-       GCC emits a warning without "return 0;" (compiler bug!), but Clang is
-       smarter and emits a warning on the return... */
-#ifndef __clang__
-    return 0; /* Make gcc -Wall happy */
-#endif
-}
-
-/* Forward */
-static void translabel(grammar *, label *);
-
-void
-translatelabels(grammar *g)
-{
-    int i;
-
-#ifdef Py_DEBUG
-    printf("Translating labels ...\n");
-#endif
-    /* Don't translate EMPTY */
-    for (i = EMPTY+1; i < g->g_ll.ll_nlabels; i++)
-        translabel(g, &g->g_ll.ll_label[i]);
-}
-
-static void
-translabel(grammar *g, label *lb)
-{
-    int i;
-
-    if (Py_DebugFlag)
-        printf("Translating label %s ...\n", PyGrammar_LabelRepr(lb));
-
-    if (lb->lb_type == NAME) {
-        for (i = 0; i < g->g_ndfas; i++) {
-            if (strcmp(lb->lb_str, g->g_dfa[i].d_name) == 0) {
-                if (Py_DebugFlag)
-                    printf(
-                        "Label %s is non-terminal %d.\n",
-                        lb->lb_str,
-                        g->g_dfa[i].d_type);
-                lb->lb_type = g->g_dfa[i].d_type;
-                free(lb->lb_str);
-                lb->lb_str = NULL;
-                return;
-            }
-        }
-        for (i = 0; i < (int)N_TOKENS; i++) {
-            if (strcmp(lb->lb_str, _PyParser_TokenNames[i]) == 0) {
-                if (Py_DebugFlag)
-                    printf("Label %s is terminal %d.\n",
-                        lb->lb_str, i);
-                lb->lb_type = i;
-                free(lb->lb_str);
-                lb->lb_str = NULL;
-                return;
-            }
-        }
-        printf("Can't translate NAME label '%s'\n", lb->lb_str);
-        return;
-    }
-
-    if (lb->lb_type == STRING) {
-        if (isalpha(Py_CHARMASK(lb->lb_str[1])) ||
-            lb->lb_str[1] == '_') {
-            char *p;
-            char *src;
-            char *dest;
-            size_t name_len;
-            if (Py_DebugFlag)
-                printf("Label %s is a keyword\n", lb->lb_str);
-            lb->lb_type = NAME;
-            src = lb->lb_str + 1;
-            p = strchr(src, '\'');
-            if (p)
-                name_len = p - src;
-            else
-                name_len = strlen(src);
-            dest = (char *)malloc(name_len + 1);
-            if (!dest) {
-                printf("Can't alloc dest '%s'\n", src);
-                return;
-            }
-            strncpy(dest, src, name_len);
-            dest[name_len] = '\0';
-            free(lb->lb_str);
-            lb->lb_str = dest;
-        }
-        else if (lb->lb_str[2] == lb->lb_str[0]) {
-            int type = (int) PyToken_OneChar(lb->lb_str[1]);
-            if (type != OP) {
-                lb->lb_type = type;
-                free(lb->lb_str);
-                lb->lb_str = NULL;
-            }
-            else
-                printf("Unknown OP label %s\n",
-                    lb->lb_str);
-        }
-        else if (lb->lb_str[2] && lb->lb_str[3] == lb->lb_str[0]) {
-            int type = (int) PyToken_TwoChars(lb->lb_str[1],
-                                       lb->lb_str[2]);
-            if (type != OP) {
-                lb->lb_type = type;
-                free(lb->lb_str);
-                lb->lb_str = NULL;
-            }
-            else
-                printf("Unknown OP label %s\n",
-                    lb->lb_str);
-        }
-        else if (lb->lb_str[2] && lb->lb_str[3] && lb->lb_str[4] == lb->lb_str[0]) {
-            int type = (int) PyToken_ThreeChars(lb->lb_str[1],
-                                                lb->lb_str[2],
-                                                lb->lb_str[3]);
-            if (type != OP) {
-                lb->lb_type = type;
-                free(lb->lb_str);
-                lb->lb_str = NULL;
-            }
-            else
-                printf("Unknown OP label %s\n",
-                    lb->lb_str);
-        }
-        else
-            printf("Can't translate STRING label %s\n",
-                lb->lb_str);
-    }
-    else
-        printf("Can't translate label '%s'\n",
-               PyGrammar_LabelRepr(lb));
-}
author	Pablo Galindo <Pablogsal@gmail.com>	2019-03-01 23:34:44 (GMT)
committer	GitHub <noreply@github.com>	2019-03-01 23:34:44 (GMT)
commit	1f24a719e7be5e49b876a5dc7daf21d01ee69faa (patch)
tree	8f8f56cab78ef671a8cb7f54b8ec2495d9a435e6 /Parser/grammar.c
parent	7eebbbd5b3907447eddadf5cb7cb1cc9230d15b2 (diff)
download	cpython-1f24a719e7be5e49b876a5dc7daf21d01ee69faa.zip cpython-1f24a719e7be5e49b876a5dc7daf21d01ee69faa.tar.gz cpython-1f24a719e7be5e49b876a5dc7daf21d01ee69faa.tar.bz2