summaryrefslogtreecommitdiffstats
path: root/Parser
diff options
context:
space:
mode:
Diffstat (limited to 'Parser')
-rw-r--r--Parser/acceler.c168
-rw-r--r--Parser/bitset.c58
-rw-r--r--Parser/firstsets.c180
-rw-r--r--Parser/grammar.c380
-rw-r--r--Parser/grammar1.c70
-rw-r--r--Parser/intrcheck.c70
-rw-r--r--Parser/listnode.c78
-rw-r--r--Parser/metagrammar.c170
-rw-r--r--Parser/myreadline.c234
-rw-r--r--Parser/node.c122
-rw-r--r--Parser/parser.c540
-rw-r--r--Parser/parsetok.c372
-rw-r--r--Parser/pgen.c964
-rw-r--r--Parser/pgenmain.c194
-rw-r--r--Parser/printgrammar.c152
-rw-r--r--Parser/tokenizer.c2768
-rw-r--r--Parser/tokenizer.h90
17 files changed, 3305 insertions, 3305 deletions
diff --git a/Parser/acceler.c b/Parser/acceler.c
index b41b265..9b14263 100644
--- a/Parser/acceler.c
+++ b/Parser/acceler.c
@@ -23,103 +23,103 @@ static void fixstate(grammar *, state *);
void
PyGrammar_AddAccelerators(grammar *g)
{
- dfa *d;
- int i;
- d = g->g_dfa;
- for (i = g->g_ndfas; --i >= 0; d++)
- fixdfa(g, d);
- g->g_accel = 1;
+ dfa *d;
+ int i;
+ d = g->g_dfa;
+ for (i = g->g_ndfas; --i >= 0; d++)
+ fixdfa(g, d);
+ g->g_accel = 1;
}
void
PyGrammar_RemoveAccelerators(grammar *g)
{
- dfa *d;
- int i;
- g->g_accel = 0;
- d = g->g_dfa;
- for (i = g->g_ndfas; --i >= 0; d++) {
- state *s;
- int j;
- s = d->d_state;
- for (j = 0; j < d->d_nstates; j++, s++) {
- if (s->s_accel)
- PyObject_FREE(s->s_accel);
- s->s_accel = NULL;
- }
- }
+ dfa *d;
+ int i;
+ g->g_accel = 0;
+ d = g->g_dfa;
+ for (i = g->g_ndfas; --i >= 0; d++) {
+ state *s;
+ int j;
+ s = d->d_state;
+ for (j = 0; j < d->d_nstates; j++, s++) {
+ if (s->s_accel)
+ PyObject_FREE(s->s_accel);
+ s->s_accel = NULL;
+ }
+ }
}
static void
fixdfa(grammar *g, dfa *d)
{
- state *s;
- int j;
- s = d->d_state;
- for (j = 0; j < d->d_nstates; j++, s++)
- fixstate(g, s);
+ state *s;
+ int j;
+ s = d->d_state;
+ for (j = 0; j < d->d_nstates; j++, s++)
+ fixstate(g, s);
}
static void
fixstate(grammar *g, state *s)
{
- arc *a;
- int k;
- int *accel;
- int nl = g->g_ll.ll_nlabels;
- s->s_accept = 0;
- accel = (int *) PyObject_MALLOC(nl * sizeof(int));
- if (accel == NULL) {
- fprintf(stderr, "no mem to build parser accelerators\n");
- exit(1);
- }
- for (k = 0; k < nl; k++)
- accel[k] = -1;
- a = s->s_arc;
- for (k = s->s_narcs; --k >= 0; a++) {
- int lbl = a->a_lbl;
- label *l = &g->g_ll.ll_label[lbl];
- int type = l->lb_type;
- if (a->a_arrow >= (1 << 7)) {
- printf("XXX too many states!\n");
- continue;
- }
- if (ISNONTERMINAL(type)) {
- dfa *d1 = PyGrammar_FindDFA(g, type);
- int ibit;
- if (type - NT_OFFSET >= (1 << 7)) {
- printf("XXX too high nonterminal number!\n");
- continue;
- }
- for (ibit = 0; ibit < g->g_ll.ll_nlabels; ibit++) {
- if (testbit(d1->d_first, ibit)) {
- if (accel[ibit] != -1)
- printf("XXX ambiguity!\n");
- accel[ibit] = a->a_arrow | (1 << 7) |
- ((type - NT_OFFSET) << 8);
- }
- }
- }
- else if (lbl == EMPTY)
- s->s_accept = 1;
- else if (lbl >= 0 && lbl < nl)
- accel[lbl] = a->a_arrow;
- }
- while (nl > 0 && accel[nl-1] == -1)
- nl--;
- for (k = 0; k < nl && accel[k] == -1;)
- k++;
- if (k < nl) {
- int i;
- s->s_accel = (int *) PyObject_MALLOC((nl-k) * sizeof(int));
- if (s->s_accel == NULL) {
- fprintf(stderr, "no mem to add parser accelerators\n");
- exit(1);
- }
- s->s_lower = k;
- s->s_upper = nl;
- for (i = 0; k < nl; i++, k++)
- s->s_accel[i] = accel[k];
- }
- PyObject_FREE(accel);
+ arc *a;
+ int k;
+ int *accel;
+ int nl = g->g_ll.ll_nlabels;
+ s->s_accept = 0;
+ accel = (int *) PyObject_MALLOC(nl * sizeof(int));
+ if (accel == NULL) {
+ fprintf(stderr, "no mem to build parser accelerators\n");
+ exit(1);
+ }
+ for (k = 0; k < nl; k++)
+ accel[k] = -1;
+ a = s->s_arc;
+ for (k = s->s_narcs; --k >= 0; a++) {
+ int lbl = a->a_lbl;
+ label *l = &g->g_ll.ll_label[lbl];
+ int type = l->lb_type;
+ if (a->a_arrow >= (1 << 7)) {
+ printf("XXX too many states!\n");
+ continue;
+ }
+ if (ISNONTERMINAL(type)) {
+ dfa *d1 = PyGrammar_FindDFA(g, type);
+ int ibit;
+ if (type - NT_OFFSET >= (1 << 7)) {
+ printf("XXX too high nonterminal number!\n");
+ continue;
+ }
+ for (ibit = 0; ibit < g->g_ll.ll_nlabels; ibit++) {
+ if (testbit(d1->d_first, ibit)) {
+ if (accel[ibit] != -1)
+ printf("XXX ambiguity!\n");
+ accel[ibit] = a->a_arrow | (1 << 7) |
+ ((type - NT_OFFSET) << 8);
+ }
+ }
+ }
+ else if (lbl == EMPTY)
+ s->s_accept = 1;
+ else if (lbl >= 0 && lbl < nl)
+ accel[lbl] = a->a_arrow;
+ }
+ while (nl > 0 && accel[nl-1] == -1)
+ nl--;
+ for (k = 0; k < nl && accel[k] == -1;)
+ k++;
+ if (k < nl) {
+ int i;
+ s->s_accel = (int *) PyObject_MALLOC((nl-k) * sizeof(int));
+ if (s->s_accel == NULL) {
+ fprintf(stderr, "no mem to add parser accelerators\n");
+ exit(1);
+ }
+ s->s_lower = k;
+ s->s_upper = nl;
+ for (i = 0; k < nl; i++, k++)
+ s->s_accel[i] = accel[k];
+ }
+ PyObject_FREE(accel);
}
diff --git a/Parser/bitset.c b/Parser/bitset.c
index b5543b8..f5bfd41 100644
--- a/Parser/bitset.c
+++ b/Parser/bitset.c
@@ -7,60 +7,60 @@
bitset
newbitset(int nbits)
{
- int nbytes = NBYTES(nbits);
- bitset ss = (char *)PyObject_MALLOC(sizeof(BYTE) * nbytes);
-
- if (ss == NULL)
- Py_FatalError("no mem for bitset");
-
- ss += nbytes;
- while (--nbytes >= 0)
- *--ss = 0;
- return ss;
+ int nbytes = NBYTES(nbits);
+ bitset ss = (char *)PyObject_MALLOC(sizeof(BYTE) * nbytes);
+
+ if (ss == NULL)
+ Py_FatalError("no mem for bitset");
+
+ ss += nbytes;
+ while (--nbytes >= 0)
+ *--ss = 0;
+ return ss;
}
void
delbitset(bitset ss)
{
- PyObject_FREE(ss);
+ PyObject_FREE(ss);
}
int
addbit(bitset ss, int ibit)
{
- int ibyte = BIT2BYTE(ibit);
- BYTE mask = BIT2MASK(ibit);
-
- if (ss[ibyte] & mask)
- return 0; /* Bit already set */
- ss[ibyte] |= mask;
- return 1;
+ int ibyte = BIT2BYTE(ibit);
+ BYTE mask = BIT2MASK(ibit);
+
+ if (ss[ibyte] & mask)
+ return 0; /* Bit already set */
+ ss[ibyte] |= mask;
+ return 1;
}
#if 0 /* Now a macro */
int
testbit(bitset ss, int ibit)
{
- return (ss[BIT2BYTE(ibit)] & BIT2MASK(ibit)) != 0;
+ return (ss[BIT2BYTE(ibit)] & BIT2MASK(ibit)) != 0;
}
#endif
int
samebitset(bitset ss1, bitset ss2, int nbits)
{
- int i;
-
- for (i = NBYTES(nbits); --i >= 0; )
- if (*ss1++ != *ss2++)
- return 0;
- return 1;
+ int i;
+
+ for (i = NBYTES(nbits); --i >= 0; )
+ if (*ss1++ != *ss2++)
+ return 0;
+ return 1;
}
void
mergebitset(bitset ss1, bitset ss2, int nbits)
{
- int i;
-
- for (i = NBYTES(nbits); --i >= 0; )
- *ss1++ |= *ss2++;
+ int i;
+
+ for (i = NBYTES(nbits); --i >= 0; )
+ *ss1++ |= *ss2++;
}
diff --git a/Parser/firstsets.c b/Parser/firstsets.c
index 00467b3..ee75d1b 100644
--- a/Parser/firstsets.c
+++ b/Parser/firstsets.c
@@ -13,101 +13,101 @@ static void calcfirstset(grammar *, dfa *);
void
addfirstsets(grammar *g)
{
- int i;
- dfa *d;
+ int i;
+ dfa *d;
- if (Py_DebugFlag)
- printf("Adding FIRST sets ...\n");
- for (i = 0; i < g->g_ndfas; i++) {
- d = &g->g_dfa[i];
- if (d->d_first == NULL)
- calcfirstset(g, d);
- }
+ if (Py_DebugFlag)
+ printf("Adding FIRST sets ...\n");
+ for (i = 0; i < g->g_ndfas; i++) {
+ d = &g->g_dfa[i];
+ if (d->d_first == NULL)
+ calcfirstset(g, d);
+ }
}
static void
calcfirstset(grammar *g, dfa *d)
{
- int i, j;
- state *s;
- arc *a;
- int nsyms;
- int *sym;
- int nbits;
- static bitset dummy;
- bitset result;
- int type;
- dfa *d1;
- label *l0;
-
- if (Py_DebugFlag)
- printf("Calculate FIRST set for '%s'\n", d->d_name);
-
- if (dummy == NULL)
- dummy = newbitset(1);
- if (d->d_first == dummy) {
- fprintf(stderr, "Left-recursion for '%s'\n", d->d_name);
- return;
- }
- if (d->d_first != NULL) {
- fprintf(stderr, "Re-calculating FIRST set for '%s' ???\n",
- d->d_name);
- }
- d->d_first = dummy;
-
- l0 = g->g_ll.ll_label;
- nbits = g->g_ll.ll_nlabels;
- result = newbitset(nbits);
-
- sym = (int *)PyObject_MALLOC(sizeof(int));
- if (sym == NULL)
- Py_FatalError("no mem for new sym in calcfirstset");
- nsyms = 1;
- sym[0] = findlabel(&g->g_ll, d->d_type, (char *)NULL);
-
- s = &d->d_state[d->d_initial];
- for (i = 0; i < s->s_narcs; i++) {
- a = &s->s_arc[i];
- for (j = 0; j < nsyms; j++) {
- if (sym[j] == a->a_lbl)
- break;
- }
- if (j >= nsyms) { /* New label */
- sym = (int *)PyObject_REALLOC(sym,
- sizeof(int) * (nsyms + 1));
- if (sym == NULL)
- Py_FatalError(
- "no mem to resize sym in calcfirstset");
- sym[nsyms++] = a->a_lbl;
- type = l0[a->a_lbl].lb_type;
- if (ISNONTERMINAL(type)) {
- d1 = PyGrammar_FindDFA(g, type);
- if (d1->d_first == dummy) {
- fprintf(stderr,
- "Left-recursion below '%s'\n",
- d->d_name);
- }
- else {
- if (d1->d_first == NULL)
- calcfirstset(g, d1);
- mergebitset(result,
- d1->d_first, nbits);
- }
- }
- else if (ISTERMINAL(type)) {
- addbit(result, a->a_lbl);
- }
- }
- }
- d->d_first = result;
- if (Py_DebugFlag) {
- printf("FIRST set for '%s': {", d->d_name);
- for (i = 0; i < nbits; i++) {
- if (testbit(result, i))
- printf(" %s", PyGrammar_LabelRepr(&l0[i]));
- }
- printf(" }\n");
- }
+ int i, j;
+ state *s;
+ arc *a;
+ int nsyms;
+ int *sym;
+ int nbits;
+ static bitset dummy;
+ bitset result;
+ int type;
+ dfa *d1;
+ label *l0;
- PyObject_FREE(sym);
+ if (Py_DebugFlag)
+ printf("Calculate FIRST set for '%s'\n", d->d_name);
+
+ if (dummy == NULL)
+ dummy = newbitset(1);
+ if (d->d_first == dummy) {
+ fprintf(stderr, "Left-recursion for '%s'\n", d->d_name);
+ return;
+ }
+ if (d->d_first != NULL) {
+ fprintf(stderr, "Re-calculating FIRST set for '%s' ???\n",
+ d->d_name);
+ }
+ d->d_first = dummy;
+
+ l0 = g->g_ll.ll_label;
+ nbits = g->g_ll.ll_nlabels;
+ result = newbitset(nbits);
+
+ sym = (int *)PyObject_MALLOC(sizeof(int));
+ if (sym == NULL)
+ Py_FatalError("no mem for new sym in calcfirstset");
+ nsyms = 1;
+ sym[0] = findlabel(&g->g_ll, d->d_type, (char *)NULL);
+
+ s = &d->d_state[d->d_initial];
+ for (i = 0; i < s->s_narcs; i++) {
+ a = &s->s_arc[i];
+ for (j = 0; j < nsyms; j++) {
+ if (sym[j] == a->a_lbl)
+ break;
+ }
+ if (j >= nsyms) { /* New label */
+ sym = (int *)PyObject_REALLOC(sym,
+ sizeof(int) * (nsyms + 1));
+ if (sym == NULL)
+ Py_FatalError(
+ "no mem to resize sym in calcfirstset");
+ sym[nsyms++] = a->a_lbl;
+ type = l0[a->a_lbl].lb_type;
+ if (ISNONTERMINAL(type)) {
+ d1 = PyGrammar_FindDFA(g, type);
+ if (d1->d_first == dummy) {
+ fprintf(stderr,
+ "Left-recursion below '%s'\n",
+ d->d_name);
+ }
+ else {
+ if (d1->d_first == NULL)
+ calcfirstset(g, d1);
+ mergebitset(result,
+ d1->d_first, nbits);
+ }
+ }
+ else if (ISTERMINAL(type)) {
+ addbit(result, a->a_lbl);
+ }
+ }
+ }
+ d->d_first = result;
+ if (Py_DebugFlag) {
+ printf("FIRST set for '%s': {", d->d_name);
+ for (i = 0; i < nbits; i++) {
+ if (testbit(result, i))
+ printf(" %s", PyGrammar_LabelRepr(&l0[i]));
+ }
+ printf(" }\n");
+ }
+
+ PyObject_FREE(sym);
}
diff --git a/Parser/grammar.c b/Parser/grammar.c
index fa27300..f2a25ca 100644
--- a/Parser/grammar.c
+++ b/Parser/grammar.c
@@ -14,98 +14,98 @@ extern int Py_DebugFlag;
grammar *
newgrammar(int start)
{
- grammar *g;
-
- g = (grammar *)PyObject_MALLOC(sizeof(grammar));
- if (g == NULL)
- Py_FatalError("no mem for new grammar");
- g->g_ndfas = 0;
- g->g_dfa = NULL;
- g->g_start = start;
- g->g_ll.ll_nlabels = 0;
- g->g_ll.ll_label = NULL;
- g->g_accel = 0;
- return g;
+ grammar *g;
+
+ g = (grammar *)PyObject_MALLOC(sizeof(grammar));
+ if (g == NULL)
+ Py_FatalError("no mem for new grammar");
+ g->g_ndfas = 0;
+ g->g_dfa = NULL;
+ g->g_start = start;
+ g->g_ll.ll_nlabels = 0;
+ g->g_ll.ll_label = NULL;
+ g->g_accel = 0;
+ return g;
}
dfa *
adddfa(grammar *g, int type, char *name)
{
- dfa *d;
-
- g->g_dfa = (dfa *)PyObject_REALLOC(g->g_dfa,
- sizeof(dfa) * (g->g_ndfas + 1));
- if (g->g_dfa == NULL)
- Py_FatalError("no mem to resize dfa in adddfa");
- d = &g->g_dfa[g->g_ndfas++];
- d->d_type = type;
- d->d_name = strdup(name);
- d->d_nstates = 0;
- d->d_state = NULL;
- d->d_initial = -1;
- d->d_first = NULL;
- return d; /* Only use while fresh! */
+ dfa *d;
+
+ g->g_dfa = (dfa *)PyObject_REALLOC(g->g_dfa,
+ sizeof(dfa) * (g->g_ndfas + 1));
+ if (g->g_dfa == NULL)
+ Py_FatalError("no mem to resize dfa in adddfa");
+ d = &g->g_dfa[g->g_ndfas++];
+ d->d_type = type;
+ d->d_name = strdup(name);
+ d->d_nstates = 0;
+ d->d_state = NULL;
+ d->d_initial = -1;
+ d->d_first = NULL;
+ return d; /* Only use while fresh! */
}
int
addstate(dfa *d)
{
- state *s;
-
- d->d_state = (state *)PyObject_REALLOC(d->d_state,
- sizeof(state) * (d->d_nstates + 1));
- if (d->d_state == NULL)
- Py_FatalError("no mem to resize state in addstate");
- s = &d->d_state[d->d_nstates++];
- s->s_narcs = 0;
- s->s_arc = NULL;
- s->s_lower = 0;
- s->s_upper = 0;
- s->s_accel = NULL;
- s->s_accept = 0;
- return s - d->d_state;
+ state *s;
+
+ d->d_state = (state *)PyObject_REALLOC(d->d_state,
+ sizeof(state) * (d->d_nstates + 1));
+ if (d->d_state == NULL)
+ Py_FatalError("no mem to resize state in addstate");
+ s = &d->d_state[d->d_nstates++];
+ s->s_narcs = 0;
+ s->s_arc = NULL;
+ s->s_lower = 0;
+ s->s_upper = 0;
+ s->s_accel = NULL;
+ s->s_accept = 0;
+ return s - d->d_state;
}
void
addarc(dfa *d, int from, int to, int lbl)
{
- state *s;
- arc *a;
-
- assert(0 <= from && from < d->d_nstates);
- assert(0 <= to && to < d->d_nstates);
-
- s = &d->d_state[from];
- s->s_arc = (arc *)PyObject_REALLOC(s->s_arc, sizeof(arc) * (s->s_narcs + 1));
- if (s->s_arc == NULL)
- Py_FatalError("no mem to resize arc list in addarc");
- a = &s->s_arc[s->s_narcs++];
- a->a_lbl = lbl;
- a->a_arrow = to;
+ state *s;
+ arc *a;
+
+ assert(0 <= from && from < d->d_nstates);
+ assert(0 <= to && to < d->d_nstates);
+
+ s = &d->d_state[from];
+ s->s_arc = (arc *)PyObject_REALLOC(s->s_arc, sizeof(arc) * (s->s_narcs + 1));
+ if (s->s_arc == NULL)
+ Py_FatalError("no mem to resize arc list in addarc");
+ a = &s->s_arc[s->s_narcs++];
+ a->a_lbl = lbl;
+ a->a_arrow = to;
}
int
addlabel(labellist *ll, int type, char *str)
{
- int i;
- label *lb;
-
- for (i = 0; i < ll->ll_nlabels; i++) {
- if (ll->ll_label[i].lb_type == type &&
- strcmp(ll->ll_label[i].lb_str, str) == 0)
- return i;
- }
- ll->ll_label = (label *)PyObject_REALLOC(ll->ll_label,
- sizeof(label) * (ll->ll_nlabels + 1));
- if (ll->ll_label == NULL)
- Py_FatalError("no mem to resize labellist in addlabel");
- lb = &ll->ll_label[ll->ll_nlabels++];
- lb->lb_type = type;
- lb->lb_str = strdup(str);
- if (Py_DebugFlag)
- printf("Label @ %8p, %d: %s\n", ll, ll->ll_nlabels,
- PyGrammar_LabelRepr(lb));
- return lb - ll->ll_label;
+ int i;
+ label *lb;
+
+ for (i = 0; i < ll->ll_nlabels; i++) {
+ if (ll->ll_label[i].lb_type == type &&
+ strcmp(ll->ll_label[i].lb_str, str) == 0)
+ return i;
+ }
+ ll->ll_label = (label *)PyObject_REALLOC(ll->ll_label,
+ sizeof(label) * (ll->ll_nlabels + 1));
+ if (ll->ll_label == NULL)
+ Py_FatalError("no mem to resize labellist in addlabel");
+ lb = &ll->ll_label[ll->ll_nlabels++];
+ lb->lb_type = type;
+ lb->lb_str = strdup(str);
+ if (Py_DebugFlag)
+ printf("Label @ %8p, %d: %s\n", ll, ll->ll_nlabels,
+ PyGrammar_LabelRepr(lb));
+ return lb - ll->ll_label;
}
/* Same, but rather dies than adds */
@@ -113,16 +113,16 @@ addlabel(labellist *ll, int type, char *str)
int
findlabel(labellist *ll, int type, char *str)
{
- int i;
-
- for (i = 0; i < ll->ll_nlabels; i++) {
- if (ll->ll_label[i].lb_type == type /*&&
- strcmp(ll->ll_label[i].lb_str, str) == 0*/)
- return i;
- }
- fprintf(stderr, "Label %d/'%s' not found\n", type, str);
- Py_FatalError("grammar.c:findlabel()");
- return 0; /* Make gcc -Wall happy */
+ int i;
+
+ for (i = 0; i < ll->ll_nlabels; i++) {
+ if (ll->ll_label[i].lb_type == type /*&&
+ strcmp(ll->ll_label[i].lb_str, str) == 0*/)
+ return i;
+ }
+ fprintf(stderr, "Label %d/'%s' not found\n", type, str);
+ Py_FatalError("grammar.c:findlabel()");
+ return 0; /* Make gcc -Wall happy */
}
/* Forward */
@@ -131,120 +131,120 @@ static void translabel(grammar *, label *);
void
translatelabels(grammar *g)
{
- int i;
+ int i;
#ifdef Py_DEBUG
- printf("Translating labels ...\n");
+ printf("Translating labels ...\n");
#endif
- /* Don't translate EMPTY */
- for (i = EMPTY+1; i < g->g_ll.ll_nlabels; i++)
- translabel(g, &g->g_ll.ll_label[i]);
+ /* Don't translate EMPTY */
+ for (i = EMPTY+1; i < g->g_ll.ll_nlabels; i++)
+ translabel(g, &g->g_ll.ll_label[i]);
}
static void
translabel(grammar *g, label *lb)
{
- int i;
-
- if (Py_DebugFlag)
- printf("Translating label %s ...\n", PyGrammar_LabelRepr(lb));
-
- if (lb->lb_type == NAME) {
- for (i = 0; i < g->g_ndfas; i++) {
- if (strcmp(lb->lb_str, g->g_dfa[i].d_name) == 0) {
- if (Py_DebugFlag)
- printf(
- "Label %s is non-terminal %d.\n",
- lb->lb_str,
- g->g_dfa[i].d_type);
- lb->lb_type = g->g_dfa[i].d_type;
- free(lb->lb_str);
- lb->lb_str = NULL;
- return;
- }
- }
- for (i = 0; i < (int)N_TOKENS; i++) {
- if (strcmp(lb->lb_str, _PyParser_TokenNames[i]) == 0) {
- if (Py_DebugFlag)
- printf("Label %s is terminal %d.\n",
- lb->lb_str, i);
- lb->lb_type = i;
- free(lb->lb_str);
- lb->lb_str = NULL;
- return;
- }
- }
- printf("Can't translate NAME label '%s'\n", lb->lb_str);
- return;
- }
-
- if (lb->lb_type == STRING) {
- if (isalpha(Py_CHARMASK(lb->lb_str[1])) ||
- lb->lb_str[1] == '_') {
- char *p;
- char *src;
- char *dest;
- size_t name_len;
- if (Py_DebugFlag)
- printf("Label %s is a keyword\n", lb->lb_str);
- lb->lb_type = NAME;
- src = lb->lb_str + 1;
- p = strchr(src, '\'');
- if (p)
- name_len = p - src;
- else
- name_len = strlen(src);
- dest = (char *)malloc(name_len + 1);
- if (!dest) {
- printf("Can't alloc dest '%s'\n", src);
- return;
- }
- strncpy(dest, src, name_len);
- dest[name_len] = '\0';
- free(lb->lb_str);
- lb->lb_str = dest;
- }
- else if (lb->lb_str[2] == lb->lb_str[0]) {
- int type = (int) PyToken_OneChar(lb->lb_str[1]);
- if (type != OP) {
- lb->lb_type = type;
- free(lb->lb_str);
- lb->lb_str = NULL;
- }
- else
- printf("Unknown OP label %s\n",
- lb->lb_str);
- }
- else if (lb->lb_str[2] && lb->lb_str[3] == lb->lb_str[0]) {
- int type = (int) PyToken_TwoChars(lb->lb_str[1],
- lb->lb_str[2]);
- if (type != OP) {
- lb->lb_type = type;
- free(lb->lb_str);
- lb->lb_str = NULL;
- }
- else
- printf("Unknown OP label %s\n",
- lb->lb_str);
- }
- else if (lb->lb_str[2] && lb->lb_str[3] && lb->lb_str[4] == lb->lb_str[0]) {
- int type = (int) PyToken_ThreeChars(lb->lb_str[1],
- lb->lb_str[2],
- lb->lb_str[3]);
- if (type != OP) {
- lb->lb_type = type;
- free(lb->lb_str);
- lb->lb_str = NULL;
- }
- else
- printf("Unknown OP label %s\n",
- lb->lb_str);
- }
- else
- printf("Can't translate STRING label %s\n",
- lb->lb_str);
- }
- else
- printf("Can't translate label '%s'\n",
- PyGrammar_LabelRepr(lb));
+ int i;
+
+ if (Py_DebugFlag)
+ printf("Translating label %s ...\n", PyGrammar_LabelRepr(lb));
+
+ if (lb->lb_type == NAME) {
+ for (i = 0; i < g->g_ndfas; i++) {
+ if (strcmp(lb->lb_str, g->g_dfa[i].d_name) == 0) {
+ if (Py_DebugFlag)
+ printf(
+ "Label %s is non-terminal %d.\n",
+ lb->lb_str,
+ g->g_dfa[i].d_type);
+ lb->lb_type = g->g_dfa[i].d_type;
+ free(lb->lb_str);
+ lb->lb_str = NULL;
+ return;
+ }
+ }
+ for (i = 0; i < (int)N_TOKENS; i++) {
+ if (strcmp(lb->lb_str, _PyParser_TokenNames[i]) == 0) {
+ if (Py_DebugFlag)
+ printf("Label %s is terminal %d.\n",
+ lb->lb_str, i);
+ lb->lb_type = i;
+ free(lb->lb_str);
+ lb->lb_str = NULL;
+ return;
+ }
+ }
+ printf("Can't translate NAME label '%s'\n", lb->lb_str);
+ return;
+ }
+
+ if (lb->lb_type == STRING) {
+ if (isalpha(Py_CHARMASK(lb->lb_str[1])) ||
+ lb->lb_str[1] == '_') {
+ char *p;
+ char *src;
+ char *dest;
+ size_t name_len;
+ if (Py_DebugFlag)
+ printf("Label %s is a keyword\n", lb->lb_str);
+ lb->lb_type = NAME;
+ src = lb->lb_str + 1;
+ p = strchr(src, '\'');
+ if (p)
+ name_len = p - src;
+ else
+ name_len = strlen(src);
+ dest = (char *)malloc(name_len + 1);
+ if (!dest) {
+ printf("Can't alloc dest '%s'\n", src);
+ return;
+ }
+ strncpy(dest, src, name_len);
+ dest[name_len] = '\0';
+ free(lb->lb_str);
+ lb->lb_str = dest;
+ }
+ else if (lb->lb_str[2] == lb->lb_str[0]) {
+ int type = (int) PyToken_OneChar(lb->lb_str[1]);
+ if (type != OP) {
+ lb->lb_type = type;
+ free(lb->lb_str);
+ lb->lb_str = NULL;
+ }
+ else
+ printf("Unknown OP label %s\n",
+ lb->lb_str);
+ }
+ else if (lb->lb_str[2] && lb->lb_str[3] == lb->lb_str[0]) {
+ int type = (int) PyToken_TwoChars(lb->lb_str[1],
+ lb->lb_str[2]);
+ if (type != OP) {
+ lb->lb_type = type;
+ free(lb->lb_str);
+ lb->lb_str = NULL;
+ }
+ else
+ printf("Unknown OP label %s\n",
+ lb->lb_str);
+ }
+ else if (lb->lb_str[2] && lb->lb_str[3] && lb->lb_str[4] == lb->lb_str[0]) {
+ int type = (int) PyToken_ThreeChars(lb->lb_str[1],
+ lb->lb_str[2],
+ lb->lb_str[3]);
+ if (type != OP) {
+ lb->lb_type = type;
+ free(lb->lb_str);
+ lb->lb_str = NULL;
+ }
+ else
+ printf("Unknown OP label %s\n",
+ lb->lb_str);
+ }
+ else
+ printf("Can't translate STRING label %s\n",
+ lb->lb_str);
+ }
+ else
+ printf("Can't translate label '%s'\n",
+ PyGrammar_LabelRepr(lb));
}
diff --git a/Parser/grammar1.c b/Parser/grammar1.c
index b76719a..1f7d264 100644
--- a/Parser/grammar1.c
+++ b/Parser/grammar1.c
@@ -11,47 +11,47 @@
dfa *
PyGrammar_FindDFA(grammar *g, register int type)
{
- register dfa *d;
+ register dfa *d;
#if 1
- /* Massive speed-up */
- d = &g->g_dfa[type - NT_OFFSET];
- assert(d->d_type == type);
- return d;
+ /* Massive speed-up */
+ d = &g->g_dfa[type - NT_OFFSET];
+ assert(d->d_type == type);
+ return d;
#else
- /* Old, slow version */
- register int i;
-
- for (i = g->g_ndfas, d = g->g_dfa; --i >= 0; d++) {
- if (d->d_type == type)
- return d;
- }
- assert(0);
- /* NOTREACHED */
+ /* Old, slow version */
+ register int i;
+
+ for (i = g->g_ndfas, d = g->g_dfa; --i >= 0; d++) {
+ if (d->d_type == type)
+ return d;
+ }
+ assert(0);
+ /* NOTREACHED */
#endif
}
char *
PyGrammar_LabelRepr(label *lb)
{
- static char buf[100];
-
- if (lb->lb_type == ENDMARKER)
- return "EMPTY";
- else if (ISNONTERMINAL(lb->lb_type)) {
- if (lb->lb_str == NULL) {
- PyOS_snprintf(buf, sizeof(buf), "NT%d", lb->lb_type);
- return buf;
- }
- else
- return lb->lb_str;
- }
- else {
- if (lb->lb_str == NULL)
- return _PyParser_TokenNames[lb->lb_type];
- else {
- PyOS_snprintf(buf, sizeof(buf), "%.32s(%.32s)",
- _PyParser_TokenNames[lb->lb_type], lb->lb_str);
- return buf;
- }
- }
+ static char buf[100];
+
+ if (lb->lb_type == ENDMARKER)
+ return "EMPTY";
+ else if (ISNONTERMINAL(lb->lb_type)) {
+ if (lb->lb_str == NULL) {
+ PyOS_snprintf(buf, sizeof(buf), "NT%d", lb->lb_type);
+ return buf;
+ }
+ else
+ return lb->lb_str;
+ }
+ else {
+ if (lb->lb_str == NULL)
+ return _PyParser_TokenNames[lb->lb_type];
+ else {
+ PyOS_snprintf(buf, sizeof(buf), "%.32s(%.32s)",
+ _PyParser_TokenNames[lb->lb_type], lb->lb_str);
+ return buf;
+ }
+ }
}
diff --git a/Parser/intrcheck.c b/Parser/intrcheck.c
index 06b5840..4439864 100644
--- a/Parser/intrcheck.c
+++ b/Parser/intrcheck.c
@@ -21,7 +21,7 @@ PyOS_FiniInterrupts(void)
int
PyOS_InterruptOccurred(void)
{
- _wyield();
+ _wyield();
}
#define OK
@@ -47,7 +47,7 @@ PyOS_InterruptOccurred(void)
void
PyOS_InitInterrupts(void)
{
- _go32_want_ctrl_break(1 /* TRUE */);
+ _go32_want_ctrl_break(1 /* TRUE */);
}
void
@@ -58,7 +58,7 @@ PyOS_FiniInterrupts(void)
int
PyOS_InterruptOccurred(void)
{
- return _go32_was_ctrl_break_hit();
+ return _go32_was_ctrl_break_hit();
}
#else /* !__GNUC__ */
@@ -78,12 +78,12 @@ PyOS_FiniInterrupts(void)
int
PyOS_InterruptOccurred(void)
{
- int interrupted = 0;
- while (kbhit()) {
- if (getch() == '\003')
- interrupted = 1;
- }
- return interrupted;
+ int interrupted = 0;
+ while (kbhit()) {
+ if (getch() == '\003')
+ interrupted = 1;
+ }
+ return interrupted;
}
#endif /* __GNUC__ */
@@ -106,7 +106,7 @@ static int interrupted;
void
PyErr_SetInterrupt(void)
{
- interrupted = 1;
+ interrupted = 1;
}
extern int PyErr_CheckSignals(void);
@@ -114,28 +114,28 @@ extern int PyErr_CheckSignals(void);
static int
checksignals_witharg(void * arg)
{
- return PyErr_CheckSignals();
+ return PyErr_CheckSignals();
}
static void
intcatcher(int sig)
{
- extern void Py_Exit(int);
- static char message[] =
+ extern void Py_Exit(int);
+ static char message[] =
"python: to interrupt a truly hanging Python program, interrupt once more.\n";
- switch (interrupted++) {
- case 0:
- break;
- case 1:
- write(2, message, strlen(message));
- break;
- case 2:
- interrupted = 0;
- Py_Exit(1);
- break;
- }
- PyOS_setsig(SIGINT, intcatcher);
- Py_AddPendingCall(checksignals_witharg, NULL);
+ switch (interrupted++) {
+ case 0:
+ break;
+ case 1:
+ write(2, message, strlen(message));
+ break;
+ case 2:
+ interrupted = 0;
+ Py_Exit(1);
+ break;
+ }
+ PyOS_setsig(SIGINT, intcatcher);
+ Py_AddPendingCall(checksignals_witharg, NULL);
}
static void (*old_siginthandler)(int) = SIG_DFL;
@@ -143,23 +143,23 @@ static void (*old_siginthandler)(int) = SIG_DFL;
void
PyOS_InitInterrupts(void)
{
- if ((old_siginthandler = PyOS_setsig(SIGINT, SIG_IGN)) != SIG_IGN)
- PyOS_setsig(SIGINT, intcatcher);
+ if ((old_siginthandler = PyOS_setsig(SIGINT, SIG_IGN)) != SIG_IGN)
+ PyOS_setsig(SIGINT, intcatcher);
}
void
PyOS_FiniInterrupts(void)
{
- PyOS_setsig(SIGINT, old_siginthandler);
+ PyOS_setsig(SIGINT, old_siginthandler);
}
int
PyOS_InterruptOccurred(void)
{
- if (!interrupted)
- return 0;
- interrupted = 0;
- return 1;
+ if (!interrupted)
+ return 0;
+ interrupted = 0;
+ return 1;
}
#endif /* !OK */
@@ -168,7 +168,7 @@ void
PyOS_AfterFork(void)
{
#ifdef WITH_THREAD
- PyEval_ReInitThreads();
- PyThread_ReInitTLS();
+ PyEval_ReInitThreads();
+ PyThread_ReInitTLS();
#endif
}
diff --git a/Parser/listnode.c b/Parser/listnode.c
index c0b3b66..b5f8ad2 100644
--- a/Parser/listnode.c
+++ b/Parser/listnode.c
@@ -12,7 +12,7 @@ static void listnode(FILE *, node *);
void
PyNode_ListTree(node *n)
{
- listnode(stdout, n);
+ listnode(stdout, n);
}
static int level, atbol;
@@ -20,47 +20,47 @@ static int level, atbol;
static void
listnode(FILE *fp, node *n)
{
- level = 0;
- atbol = 1;
- list1node(fp, n);
+ level = 0;
+ atbol = 1;
+ list1node(fp, n);
}
static void
list1node(FILE *fp, node *n)
{
- if (n == 0)
- return;
- if (ISNONTERMINAL(TYPE(n))) {
- int i;
- for (i = 0; i < NCH(n); i++)
- list1node(fp, CHILD(n, i));
- }
- else if (ISTERMINAL(TYPE(n))) {
- switch (TYPE(n)) {
- case INDENT:
- ++level;
- break;
- case DEDENT:
- --level;
- break;
- default:
- if (atbol) {
- int i;
- for (i = 0; i < level; ++i)
- fprintf(fp, "\t");
- atbol = 0;
- }
- if (TYPE(n) == NEWLINE) {
- if (STR(n) != NULL)
- fprintf(fp, "%s", STR(n));
- fprintf(fp, "\n");
- atbol = 1;
- }
- else
- fprintf(fp, "%s ", STR(n));
- break;
- }
- }
- else
- fprintf(fp, "? ");
+ if (n == 0)
+ return;
+ if (ISNONTERMINAL(TYPE(n))) {
+ int i;
+ for (i = 0; i < NCH(n); i++)
+ list1node(fp, CHILD(n, i));
+ }
+ else if (ISTERMINAL(TYPE(n))) {
+ switch (TYPE(n)) {
+ case INDENT:
+ ++level;
+ break;
+ case DEDENT:
+ --level;
+ break;
+ default:
+ if (atbol) {
+ int i;
+ for (i = 0; i < level; ++i)
+ fprintf(fp, "\t");
+ atbol = 0;
+ }
+ if (TYPE(n) == NEWLINE) {
+ if (STR(n) != NULL)
+ fprintf(fp, "%s", STR(n));
+ fprintf(fp, "\n");
+ atbol = 1;
+ }
+ else
+ fprintf(fp, "%s ", STR(n));
+ break;
+ }
+ }
+ else
+ fprintf(fp, "? ");
}
diff --git a/Parser/metagrammar.c b/Parser/metagrammar.c
index b61bc6d..53810b8 100644
--- a/Parser/metagrammar.c
+++ b/Parser/metagrammar.c
@@ -4,152 +4,152 @@
#include "grammar.h"
#include "pgen.h"
static arc arcs_0_0[3] = {
- {2, 0},
- {3, 0},
- {4, 1},
+ {2, 0},
+ {3, 0},
+ {4, 1},
};
static arc arcs_0_1[1] = {
- {0, 1},
+ {0, 1},
};
static state states_0[2] = {
- {3, arcs_0_0},
- {1, arcs_0_1},
+ {3, arcs_0_0},
+ {1, arcs_0_1},
};
static arc arcs_1_0[1] = {
- {5, 1},
+ {5, 1},
};
static arc arcs_1_1[1] = {
- {6, 2},
+ {6, 2},
};
static arc arcs_1_2[1] = {
- {7, 3},
+ {7, 3},
};
static arc arcs_1_3[1] = {
- {3, 4},
+ {3, 4},
};
static arc arcs_1_4[1] = {
- {0, 4},
+ {0, 4},
};
static state states_1[5] = {
- {1, arcs_1_0},
- {1, arcs_1_1},
- {1, arcs_1_2},
- {1, arcs_1_3},
- {1, arcs_1_4},
+ {1, arcs_1_0},
+ {1, arcs_1_1},
+ {1, arcs_1_2},
+ {1, arcs_1_3},
+ {1, arcs_1_4},
};
static arc arcs_2_0[1] = {
- {8, 1},
+ {8, 1},
};
static arc arcs_2_1[2] = {
- {9, 0},
- {0, 1},
+ {9, 0},
+ {0, 1},
};
static state states_2[2] = {
- {1, arcs_2_0},
- {2, arcs_2_1},
+ {1, arcs_2_0},
+ {2, arcs_2_1},
};
static arc arcs_3_0[1] = {
- {10, 1},
+ {10, 1},
};
static arc arcs_3_1[2] = {
- {10, 1},
- {0, 1},
+ {10, 1},
+ {0, 1},
};
static state states_3[2] = {
- {1, arcs_3_0},
- {2, arcs_3_1},
+ {1, arcs_3_0},
+ {2, arcs_3_1},
};
static arc arcs_4_0[2] = {
- {11, 1},
- {13, 2},
+ {11, 1},
+ {13, 2},
};
static arc arcs_4_1[1] = {
- {7, 3},
+ {7, 3},
};
static arc arcs_4_2[3] = {
- {14, 4},
- {15, 4},
- {0, 2},
+ {14, 4},
+ {15, 4},
+ {0, 2},
};
static arc arcs_4_3[1] = {
- {12, 4},
+ {12, 4},
};
static arc arcs_4_4[1] = {
- {0, 4},
+ {0, 4},
};
static state states_4[5] = {
- {2, arcs_4_0},
- {1, arcs_4_1},
- {3, arcs_4_2},
- {1, arcs_4_3},
- {1, arcs_4_4},
+ {2, arcs_4_0},
+ {1, arcs_4_1},
+ {3, arcs_4_2},
+ {1, arcs_4_3},
+ {1, arcs_4_4},
};
static arc arcs_5_0[3] = {
- {5, 1},
- {16, 1},
- {17, 2},
+ {5, 1},
+ {16, 1},
+ {17, 2},
};
static arc arcs_5_1[1] = {
- {0, 1},
+ {0, 1},
};
static arc arcs_5_2[1] = {
- {7, 3},
+ {7, 3},
};
static arc arcs_5_3[1] = {
- {18, 1},
+ {18, 1},
};
static state states_5[4] = {
- {3, arcs_5_0},
- {1, arcs_5_1},
- {1, arcs_5_2},
- {1, arcs_5_3},
+ {3, arcs_5_0},
+ {1, arcs_5_1},
+ {1, arcs_5_2},
+ {1, arcs_5_3},
};
static dfa dfas[6] = {
- {256, "MSTART", 0, 2, states_0,
- "\070\000\000"},
- {257, "RULE", 0, 5, states_1,
- "\040\000\000"},
- {258, "RHS", 0, 2, states_2,
- "\040\010\003"},
- {259, "ALT", 0, 2, states_3,
- "\040\010\003"},
- {260, "ITEM", 0, 5, states_4,
- "\040\010\003"},
- {261, "ATOM", 0, 4, states_5,
- "\040\000\003"},
+ {256, "MSTART", 0, 2, states_0,
+ "\070\000\000"},
+ {257, "RULE", 0, 5, states_1,
+ "\040\000\000"},
+ {258, "RHS", 0, 2, states_2,
+ "\040\010\003"},
+ {259, "ALT", 0, 2, states_3,
+ "\040\010\003"},
+ {260, "ITEM", 0, 5, states_4,
+ "\040\010\003"},
+ {261, "ATOM", 0, 4, states_5,
+ "\040\000\003"},
};
static label labels[19] = {
- {0, "EMPTY"},
- {256, 0},
- {257, 0},
- {4, 0},
- {0, 0},
- {1, 0},
- {11, 0},
- {258, 0},
- {259, 0},
- {18, 0},
- {260, 0},
- {9, 0},
- {10, 0},
- {261, 0},
- {16, 0},
- {14, 0},
- {3, 0},
- {7, 0},
- {8, 0},
+ {0, "EMPTY"},
+ {256, 0},
+ {257, 0},
+ {4, 0},
+ {0, 0},
+ {1, 0},
+ {11, 0},
+ {258, 0},
+ {259, 0},
+ {18, 0},
+ {260, 0},
+ {9, 0},
+ {10, 0},
+ {261, 0},
+ {16, 0},
+ {14, 0},
+ {3, 0},
+ {7, 0},
+ {8, 0},
};
static grammar _PyParser_Grammar = {
- 6,
- dfas,
- {19, labels},
- 256
+ 6,
+ dfas,
+ {19, labels},
+ 256
};
grammar *
meta_grammar(void)
{
- return &_PyParser_Grammar;
+ return &_PyParser_Grammar;
}
grammar *
diff --git a/Parser/myreadline.c b/Parser/myreadline.c
index 0d8394b..ecb4701 100644
--- a/Parser/myreadline.c
+++ b/Parser/myreadline.c
@@ -35,64 +35,64 @@ int (*PyOS_InputHook)(void) = NULL;
static int
my_fgets(char *buf, int len, FILE *fp)
{
- char *p;
- if (PyOS_InputHook != NULL)
- (void)(PyOS_InputHook)();
- errno = 0;
- p = fgets(buf, len, fp);
- if (p != NULL)
- return 0; /* No error */
+ char *p;
+ if (PyOS_InputHook != NULL)
+ (void)(PyOS_InputHook)();
+ errno = 0;
+ p = fgets(buf, len, fp);
+ if (p != NULL)
+ return 0; /* No error */
#ifdef MS_WINDOWS
- /* In the case of a Ctrl+C or some other external event
- interrupting the operation:
- Win2k/NT: ERROR_OPERATION_ABORTED is the most recent Win32
- error code (and feof() returns TRUE).
- Win9x: Ctrl+C seems to have no effect on fgets() returning
- early - the signal handler is called, but the fgets()
- only returns "normally" (ie, when Enter hit or feof())
- */
- if (GetLastError()==ERROR_OPERATION_ABORTED) {
- /* Signals come asynchronously, so we sleep a brief
- moment before checking if the handler has been
- triggered (we cant just return 1 before the
- signal handler has been called, as the later
- signal may be treated as a separate interrupt).
- */
- Sleep(1);
- if (PyOS_InterruptOccurred()) {
- return 1; /* Interrupt */
- }
- /* Either the sleep wasn't long enough (need a
- short loop retrying?) or not interrupted at all
- (in which case we should revisit the whole thing!)
- Logging some warning would be nice. assert is not
- viable as under the debugger, the various dialogs
- mean the condition is not true.
- */
- }
+ /* In the case of a Ctrl+C or some other external event
+ interrupting the operation:
+ Win2k/NT: ERROR_OPERATION_ABORTED is the most recent Win32
+ error code (and feof() returns TRUE).
+ Win9x: Ctrl+C seems to have no effect on fgets() returning
+ early - the signal handler is called, but the fgets()
+ only returns "normally" (ie, when Enter hit or feof())
+ */
+ if (GetLastError()==ERROR_OPERATION_ABORTED) {
+ /* Signals come asynchronously, so we sleep a brief
+ moment before checking if the handler has been
+ triggered (we cant just return 1 before the
+ signal handler has been called, as the later
+ signal may be treated as a separate interrupt).
+ */
+ Sleep(1);
+ if (PyOS_InterruptOccurred()) {
+ return 1; /* Interrupt */
+ }
+ /* Either the sleep wasn't long enough (need a
+ short loop retrying?) or not interrupted at all
+ (in which case we should revisit the whole thing!)
+ Logging some warning would be nice. assert is not
+ viable as under the debugger, the various dialogs
+ mean the condition is not true.
+ */
+ }
#endif /* MS_WINDOWS */
- if (feof(fp)) {
- return -1; /* EOF */
- }
+ if (feof(fp)) {
+ return -1; /* EOF */
+ }
#ifdef EINTR
- if (errno == EINTR) {
- int s;
+ if (errno == EINTR) {
+ int s;
#ifdef WITH_THREAD
- PyEval_RestoreThread(_PyOS_ReadlineTState);
+ PyEval_RestoreThread(_PyOS_ReadlineTState);
#endif
- s = PyErr_CheckSignals();
+ s = PyErr_CheckSignals();
#ifdef WITH_THREAD
- PyEval_SaveThread();
+ PyEval_SaveThread();
#endif
- if (s < 0) {
- return 1;
- }
- }
+ if (s < 0) {
+ return 1;
+ }
+ }
#endif
- if (PyOS_InterruptOccurred()) {
- return 1; /* Interrupt */
- }
- return -2; /* Error */
+ if (PyOS_InterruptOccurred()) {
+ return 1; /* Interrupt */
+ }
+ return -2; /* Error */
}
@@ -101,41 +101,41 @@ my_fgets(char *buf, int len, FILE *fp)
char *
PyOS_StdioReadline(FILE *sys_stdin, FILE *sys_stdout, char *prompt)
{
- size_t n;
- char *p;
- n = 100;
- if ((p = (char *)PyMem_MALLOC(n)) == NULL)
- return NULL;
- fflush(sys_stdout);
- if (prompt)
- fprintf(stderr, "%s", prompt);
- fflush(stderr);
- switch (my_fgets(p, (int)n, sys_stdin)) {
- case 0: /* Normal case */
- break;
- case 1: /* Interrupt */
- PyMem_FREE(p);
- return NULL;
- case -1: /* EOF */
- case -2: /* Error */
- default: /* Shouldn't happen */
- *p = '\0';
- break;
- }
- n = strlen(p);
- while (n > 0 && p[n-1] != '\n') {
- size_t incr = n+2;
- p = (char *)PyMem_REALLOC(p, n + incr);
- if (p == NULL)
- return NULL;
- if (incr > INT_MAX) {
- PyErr_SetString(PyExc_OverflowError, "input line too long");
- }
- if (my_fgets(p+n, (int)incr, sys_stdin) != 0)
- break;
- n += strlen(p+n);
- }
- return (char *)PyMem_REALLOC(p, n+1);
+ size_t n;
+ char *p;
+ n = 100;
+ if ((p = (char *)PyMem_MALLOC(n)) == NULL)
+ return NULL;
+ fflush(sys_stdout);
+ if (prompt)
+ fprintf(stderr, "%s", prompt);
+ fflush(stderr);
+ switch (my_fgets(p, (int)n, sys_stdin)) {
+ case 0: /* Normal case */
+ break;
+ case 1: /* Interrupt */
+ PyMem_FREE(p);
+ return NULL;
+ case -1: /* EOF */
+ case -2: /* Error */
+ default: /* Shouldn't happen */
+ *p = '\0';
+ break;
+ }
+ n = strlen(p);
+ while (n > 0 && p[n-1] != '\n') {
+ size_t incr = n+2;
+ p = (char *)PyMem_REALLOC(p, n + incr);
+ if (p == NULL)
+ return NULL;
+ if (incr > INT_MAX) {
+ PyErr_SetString(PyExc_OverflowError, "input line too long");
+ }
+ if (my_fgets(p+n, (int)incr, sys_stdin) != 0)
+ break;
+ n += strlen(p+n);
+ }
+ return (char *)PyMem_REALLOC(p, n+1);
}
@@ -152,52 +152,52 @@ char *(*PyOS_ReadlineFunctionPointer)(FILE *, FILE *, char *);
char *
PyOS_Readline(FILE *sys_stdin, FILE *sys_stdout, char *prompt)
{
- char *rv;
+ char *rv;
- if (_PyOS_ReadlineTState == PyThreadState_GET()) {
- PyErr_SetString(PyExc_RuntimeError,
- "can't re-enter readline");
- return NULL;
- }
-
+ if (_PyOS_ReadlineTState == PyThreadState_GET()) {
+ PyErr_SetString(PyExc_RuntimeError,
+ "can't re-enter readline");
+ return NULL;
+ }
- if (PyOS_ReadlineFunctionPointer == NULL) {
+
+ if (PyOS_ReadlineFunctionPointer == NULL) {
#ifdef __VMS
- PyOS_ReadlineFunctionPointer = vms__StdioReadline;
+ PyOS_ReadlineFunctionPointer = vms__StdioReadline;
#else
- PyOS_ReadlineFunctionPointer = PyOS_StdioReadline;
+ PyOS_ReadlineFunctionPointer = PyOS_StdioReadline;
#endif
- }
-
+ }
+
#ifdef WITH_THREAD
- if (_PyOS_ReadlineLock == NULL) {
- _PyOS_ReadlineLock = PyThread_allocate_lock();
- }
+ if (_PyOS_ReadlineLock == NULL) {
+ _PyOS_ReadlineLock = PyThread_allocate_lock();
+ }
#endif
- _PyOS_ReadlineTState = PyThreadState_GET();
- Py_BEGIN_ALLOW_THREADS
+ _PyOS_ReadlineTState = PyThreadState_GET();
+ Py_BEGIN_ALLOW_THREADS
#ifdef WITH_THREAD
- PyThread_acquire_lock(_PyOS_ReadlineLock, 1);
+ PyThread_acquire_lock(_PyOS_ReadlineLock, 1);
#endif
- /* This is needed to handle the unlikely case that the
- * interpreter is in interactive mode *and* stdin/out are not
- * a tty. This can happen, for example if python is run like
- * this: python -i < test1.py
- */
- if (!isatty (fileno (sys_stdin)) || !isatty (fileno (sys_stdout)))
- rv = PyOS_StdioReadline (sys_stdin, sys_stdout, prompt);
- else
- rv = (*PyOS_ReadlineFunctionPointer)(sys_stdin, sys_stdout,
- prompt);
- Py_END_ALLOW_THREADS
+ /* This is needed to handle the unlikely case that the
+ * interpreter is in interactive mode *and* stdin/out are not
+ * a tty. This can happen, for example if python is run like
+ * this: python -i < test1.py
+ */
+ if (!isatty (fileno (sys_stdin)) || !isatty (fileno (sys_stdout)))
+ rv = PyOS_StdioReadline (sys_stdin, sys_stdout, prompt);
+ else
+ rv = (*PyOS_ReadlineFunctionPointer)(sys_stdin, sys_stdout,
+ prompt);
+ Py_END_ALLOW_THREADS
#ifdef WITH_THREAD
- PyThread_release_lock(_PyOS_ReadlineLock);
+ PyThread_release_lock(_PyOS_ReadlineLock);
#endif
- _PyOS_ReadlineTState = NULL;
+ _PyOS_ReadlineTState = NULL;
- return rv;
+ return rv;
}
diff --git a/Parser/node.c b/Parser/node.c
index f4c86cb..9eba76b 100644
--- a/Parser/node.c
+++ b/Parser/node.c
@@ -7,30 +7,30 @@
node *
PyNode_New(int type)
{
- node *n = (node *) PyObject_MALLOC(1 * sizeof(node));
- if (n == NULL)
- return NULL;
- n->n_type = type;
- n->n_str = NULL;
- n->n_lineno = 0;
- n->n_nchildren = 0;
- n->n_child = NULL;
- return n;
+ node *n = (node *) PyObject_MALLOC(1 * sizeof(node));
+ if (n == NULL)
+ return NULL;
+ n->n_type = type;
+ n->n_str = NULL;
+ n->n_lineno = 0;
+ n->n_nchildren = 0;
+ n->n_child = NULL;
+ return n;
}
/* See comments at XXXROUNDUP below. Returns -1 on overflow. */
static int
fancy_roundup(int n)
{
- /* Round up to the closest power of 2 >= n. */
- int result = 256;
- assert(n > 128);
- while (result < n) {
- result <<= 1;
- if (result <= 0)
- return -1;
- }
- return result;
+ /* Round up to the closest power of 2 >= n. */
+ int result = 256;
+ assert(n > 128);
+ while (result < n) {
+ result <<= 1;
+ if (result <= 0)
+ return -1;
+ }
+ return result;
}
/* A gimmick to make massive numbers of reallocs quicker. The result is
@@ -70,46 +70,46 @@ fancy_roundup(int n)
* Note that this would be straightforward if a node stored its current
* capacity. The code is tricky to avoid that.
*/
-#define XXXROUNDUP(n) ((n) <= 1 ? (n) : \
- (n) <= 128 ? (((n) + 3) & ~3) : \
- fancy_roundup(n))
+#define XXXROUNDUP(n) ((n) <= 1 ? (n) : \
+ (n) <= 128 ? (((n) + 3) & ~3) : \
+ fancy_roundup(n))
int
PyNode_AddChild(register node *n1, int type, char *str, int lineno, int col_offset)
{
- const int nch = n1->n_nchildren;
- int current_capacity;
- int required_capacity;
- node *n;
+ const int nch = n1->n_nchildren;
+ int current_capacity;
+ int required_capacity;
+ node *n;
- if (nch == INT_MAX || nch < 0)
- return E_OVERFLOW;
+ if (nch == INT_MAX || nch < 0)
+ return E_OVERFLOW;
- current_capacity = XXXROUNDUP(nch);
- required_capacity = XXXROUNDUP(nch + 1);
- if (current_capacity < 0 || required_capacity < 0)
- return E_OVERFLOW;
- if (current_capacity < required_capacity) {
- if (required_capacity > PY_SIZE_MAX / sizeof(node)) {
- return E_NOMEM;
- }
- n = n1->n_child;
- n = (node *) PyObject_REALLOC(n,
- required_capacity * sizeof(node));
- if (n == NULL)
- return E_NOMEM;
- n1->n_child = n;
- }
+ current_capacity = XXXROUNDUP(nch);
+ required_capacity = XXXROUNDUP(nch + 1);
+ if (current_capacity < 0 || required_capacity < 0)
+ return E_OVERFLOW;
+ if (current_capacity < required_capacity) {
+ if (required_capacity > PY_SIZE_MAX / sizeof(node)) {
+ return E_NOMEM;
+ }
+ n = n1->n_child;
+ n = (node *) PyObject_REALLOC(n,
+ required_capacity * sizeof(node));
+ if (n == NULL)
+ return E_NOMEM;
+ n1->n_child = n;
+ }
- n = &n1->n_child[n1->n_nchildren++];
- n->n_type = type;
- n->n_str = str;
- n->n_lineno = lineno;
- n->n_col_offset = col_offset;
- n->n_nchildren = 0;
- n->n_child = NULL;
- return 0;
+ n = &n1->n_child[n1->n_nchildren++];
+ n->n_type = type;
+ n->n_str = str;
+ n->n_lineno = lineno;
+ n->n_col_offset = col_offset;
+ n->n_nchildren = 0;
+ n->n_child = NULL;
+ return 0;
}
/* Forward */
@@ -119,20 +119,20 @@ static void freechildren(node *);
void
PyNode_Free(node *n)
{
- if (n != NULL) {
- freechildren(n);
- PyObject_FREE(n);
- }
+ if (n != NULL) {
+ freechildren(n);
+ PyObject_FREE(n);
+ }
}
static void
freechildren(node *n)
{
- int i;
- for (i = NCH(n); --i >= 0; )
- freechildren(CHILD(n, i));
- if (n->n_child != NULL)
- PyObject_FREE(n->n_child);
- if (STR(n) != NULL)
- PyObject_FREE(STR(n));
+ int i;
+ for (i = NCH(n); --i >= 0; )
+ freechildren(CHILD(n, i));
+ if (n->n_child != NULL)
+ PyObject_FREE(n->n_child);
+ if (STR(n) != NULL)
+ PyObject_FREE(STR(n));
}
diff --git a/Parser/parser.c b/Parser/parser.c
index 83e5e6d..b505fe0 100644
--- a/Parser/parser.c
+++ b/Parser/parser.c
@@ -29,7 +29,7 @@ static void s_reset(stack *);
static void
s_reset(stack *s)
{
- s->s_top = &s->s_base[MAXSTACK];
+ s->s_top = &s->s_base[MAXSTACK];
}
#define s_empty(s) ((s)->s_top == &(s)->s_base[MAXSTACK])
@@ -37,16 +37,16 @@ s_reset(stack *s)
static int
s_push(register stack *s, dfa *d, node *parent)
{
- register stackentry *top;
- if (s->s_top == s->s_base) {
- fprintf(stderr, "s_push: parser stack overflow\n");
- return E_NOMEM;
- }
- top = --s->s_top;
- top->s_dfa = d;
- top->s_parent = parent;
- top->s_state = 0;
- return 0;
+ register stackentry *top;
+ if (s->s_top == s->s_base) {
+ fprintf(stderr, "s_push: parser stack overflow\n");
+ return E_NOMEM;
+ }
+ top = --s->s_top;
+ top->s_dfa = d;
+ top->s_parent = parent;
+ top->s_state = 0;
+ return 0;
}
#ifdef Py_DEBUG
@@ -54,9 +54,9 @@ s_push(register stack *s, dfa *d, node *parent)
static void
s_pop(register stack *s)
{
- if (s_empty(s))
- Py_FatalError("s_pop: parser stack underflow -- FATAL");
- s->s_top++;
+ if (s_empty(s))
+ Py_FatalError("s_pop: parser stack underflow -- FATAL");
+ s->s_top++;
}
#else /* !Py_DEBUG */
@@ -71,34 +71,34 @@ s_pop(register stack *s)
parser_state *
PyParser_New(grammar *g, int start)
{
- parser_state *ps;
-
- if (!g->g_accel)
- PyGrammar_AddAccelerators(g);
- ps = (parser_state *)PyMem_MALLOC(sizeof(parser_state));
- if (ps == NULL)
- return NULL;
- ps->p_grammar = g;
+ parser_state *ps;
+
+ if (!g->g_accel)
+ PyGrammar_AddAccelerators(g);
+ ps = (parser_state *)PyMem_MALLOC(sizeof(parser_state));
+ if (ps == NULL)
+ return NULL;
+ ps->p_grammar = g;
#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
- ps->p_flags = 0;
+ ps->p_flags = 0;
#endif
- ps->p_tree = PyNode_New(start);
- if (ps->p_tree == NULL) {
- PyMem_FREE(ps);
- return NULL;
- }
- s_reset(&ps->p_stack);
- (void) s_push(&ps->p_stack, PyGrammar_FindDFA(g, start), ps->p_tree);
- return ps;
+ ps->p_tree = PyNode_New(start);
+ if (ps->p_tree == NULL) {
+ PyMem_FREE(ps);
+ return NULL;
+ }
+ s_reset(&ps->p_stack);
+ (void) s_push(&ps->p_stack, PyGrammar_FindDFA(g, start), ps->p_tree);
+ return ps;
}
void
PyParser_Delete(parser_state *ps)
{
- /* NB If you want to save the parse tree,
- you must set p_tree to NULL before calling delparser! */
- PyNode_Free(ps->p_tree);
- PyMem_FREE(ps);
+ /* NB If you want to save the parse tree,
+ you must set p_tree to NULL before calling delparser! */
+ PyNode_Free(ps->p_tree);
+ PyMem_FREE(ps);
}
@@ -107,27 +107,27 @@ PyParser_Delete(parser_state *ps)
static int
shift(register stack *s, int type, char *str, int newstate, int lineno, int col_offset)
{
- int err;
- assert(!s_empty(s));
- err = PyNode_AddChild(s->s_top->s_parent, type, str, lineno, col_offset);
- if (err)
- return err;
- s->s_top->s_state = newstate;
- return 0;
+ int err;
+ assert(!s_empty(s));
+ err = PyNode_AddChild(s->s_top->s_parent, type, str, lineno, col_offset);
+ if (err)
+ return err;
+ s->s_top->s_state = newstate;
+ return 0;
}
static int
push(register stack *s, int type, dfa *d, int newstate, int lineno, int col_offset)
{
- int err;
- register node *n;
- n = s->s_top->s_parent;
- assert(!s_empty(s));
- err = PyNode_AddChild(n, type, (char *)NULL, lineno, col_offset);
- if (err)
- return err;
- s->s_top->s_state = newstate;
- return s_push(s, d, CHILD(n, NCH(n)-1));
+ int err;
+ register node *n;
+ n = s->s_top->s_parent;
+ assert(!s_empty(s));
+ err = PyNode_AddChild(n, type, (char *)NULL, lineno, col_offset);
+ if (err)
+ return err;
+ s->s_top->s_state = newstate;
+ return s_push(s, d, CHILD(n, NCH(n)-1));
}
@@ -136,47 +136,47 @@ push(register stack *s, int type, dfa *d, int newstate, int lineno, int col_offs
static int
classify(parser_state *ps, int type, char *str)
{
- grammar *g = ps->p_grammar;
- register int n = g->g_ll.ll_nlabels;
-
- if (type == NAME) {
- register char *s = str;
- register label *l = g->g_ll.ll_label;
- register int i;
- for (i = n; i > 0; i--, l++) {
- if (l->lb_type != NAME || l->lb_str == NULL ||
- l->lb_str[0] != s[0] ||
- strcmp(l->lb_str, s) != 0)
- continue;
+ grammar *g = ps->p_grammar;
+ register int n = g->g_ll.ll_nlabels;
+
+ if (type == NAME) {
+ register char *s = str;
+ register label *l = g->g_ll.ll_label;
+ register int i;
+ for (i = n; i > 0; i--, l++) {
+ if (l->lb_type != NAME || l->lb_str == NULL ||
+ l->lb_str[0] != s[0] ||
+ strcmp(l->lb_str, s) != 0)
+ continue;
#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
#if 0
- /* Leaving this in as an example */
- if (!(ps->p_flags & CO_FUTURE_WITH_STATEMENT)) {
- if (s[0] == 'w' && strcmp(s, "with") == 0)
- break; /* not a keyword yet */
- else if (s[0] == 'a' && strcmp(s, "as") == 0)
- break; /* not a keyword yet */
- }
+ /* Leaving this in as an example */
+ if (!(ps->p_flags & CO_FUTURE_WITH_STATEMENT)) {
+ if (s[0] == 'w' && strcmp(s, "with") == 0)
+ break; /* not a keyword yet */
+ else if (s[0] == 'a' && strcmp(s, "as") == 0)
+ break; /* not a keyword yet */
+ }
#endif
#endif
- D(printf("It's a keyword\n"));
- return n - i;
- }
- }
-
- {
- register label *l = g->g_ll.ll_label;
- register int i;
- for (i = n; i > 0; i--, l++) {
- if (l->lb_type == type && l->lb_str == NULL) {
- D(printf("It's a token we know\n"));
- return n - i;
- }
- }
- }
-
- D(printf("Illegal token\n"));
- return -1;
+ D(printf("It's a keyword\n"));
+ return n - i;
+ }
+ }
+
+ {
+ register label *l = g->g_ll.ll_label;
+ register int i;
+ for (i = n; i > 0; i--, l++) {
+ if (l->lb_type == type && l->lb_str == NULL) {
+ D(printf("It's a token we know\n"));
+ return n - i;
+ }
+ }
+ }
+
+ D(printf("Illegal token\n"));
+ return -1;
}
#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
@@ -185,152 +185,152 @@ classify(parser_state *ps, int type, char *str)
static void
future_hack(parser_state *ps)
{
- node *n = ps->p_stack.s_top->s_parent;
- node *ch, *cch;
- int i;
-
- /* from __future__ import ..., must have at least 4 children */
- n = CHILD(n, 0);
- if (NCH(n) < 4)
- return;
- ch = CHILD(n, 0);
- if (STR(ch) == NULL || strcmp(STR(ch), "from") != 0)
- return;
- ch = CHILD(n, 1);
- if (NCH(ch) == 1 && STR(CHILD(ch, 0)) &&
- strcmp(STR(CHILD(ch, 0)), "__future__") != 0)
- return;
- ch = CHILD(n, 3);
- /* ch can be a star, a parenthesis or import_as_names */
- if (TYPE(ch) == STAR)
- return;
- if (TYPE(ch) == LPAR)
- ch = CHILD(n, 4);
-
- for (i = 0; i < NCH(ch); i += 2) {
- cch = CHILD(ch, i);
- if (NCH(cch) >= 1 && TYPE(CHILD(cch, 0)) == NAME) {
- char *str_ch = STR(CHILD(cch, 0));
- if (strcmp(str_ch, FUTURE_WITH_STATEMENT) == 0) {
- ps->p_flags |= CO_FUTURE_WITH_STATEMENT;
- } else if (strcmp(str_ch, FUTURE_PRINT_FUNCTION) == 0) {
- ps->p_flags |= CO_FUTURE_PRINT_FUNCTION;
- } else if (strcmp(str_ch, FUTURE_UNICODE_LITERALS) == 0) {
- ps->p_flags |= CO_FUTURE_UNICODE_LITERALS;
- }
- }
- }
+ node *n = ps->p_stack.s_top->s_parent;
+ node *ch, *cch;
+ int i;
+
+ /* from __future__ import ..., must have at least 4 children */
+ n = CHILD(n, 0);
+ if (NCH(n) < 4)
+ return;
+ ch = CHILD(n, 0);
+ if (STR(ch) == NULL || strcmp(STR(ch), "from") != 0)
+ return;
+ ch = CHILD(n, 1);
+ if (NCH(ch) == 1 && STR(CHILD(ch, 0)) &&
+ strcmp(STR(CHILD(ch, 0)), "__future__") != 0)
+ return;
+ ch = CHILD(n, 3);
+ /* ch can be a star, a parenthesis or import_as_names */
+ if (TYPE(ch) == STAR)
+ return;
+ if (TYPE(ch) == LPAR)
+ ch = CHILD(n, 4);
+
+ for (i = 0; i < NCH(ch); i += 2) {
+ cch = CHILD(ch, i);
+ if (NCH(cch) >= 1 && TYPE(CHILD(cch, 0)) == NAME) {
+ char *str_ch = STR(CHILD(cch, 0));
+ if (strcmp(str_ch, FUTURE_WITH_STATEMENT) == 0) {
+ ps->p_flags |= CO_FUTURE_WITH_STATEMENT;
+ } else if (strcmp(str_ch, FUTURE_PRINT_FUNCTION) == 0) {
+ ps->p_flags |= CO_FUTURE_PRINT_FUNCTION;
+ } else if (strcmp(str_ch, FUTURE_UNICODE_LITERALS) == 0) {
+ ps->p_flags |= CO_FUTURE_UNICODE_LITERALS;
+ }
+ }
+ }
}
#endif
#endif /* future keyword */
int
PyParser_AddToken(register parser_state *ps, register int type, char *str,
- int lineno, int col_offset, int *expected_ret)
+ int lineno, int col_offset, int *expected_ret)
{
- register int ilabel;
- int err;
-
- D(printf("Token %s/'%s' ... ", _PyParser_TokenNames[type], str));
-
- /* Find out which label this token is */
- ilabel = classify(ps, type, str);
- if (ilabel < 0)
- return E_SYNTAX;
-
- /* Loop until the token is shifted or an error occurred */
- for (;;) {
- /* Fetch the current dfa and state */
- register dfa *d = ps->p_stack.s_top->s_dfa;
- register state *s = &d->d_state[ps->p_stack.s_top->s_state];
-
- D(printf(" DFA '%s', state %d:",
- d->d_name, ps->p_stack.s_top->s_state));
-
- /* Check accelerator */
- if (s->s_lower <= ilabel && ilabel < s->s_upper) {
- register int x = s->s_accel[ilabel - s->s_lower];
- if (x != -1) {
- if (x & (1<<7)) {
- /* Push non-terminal */
- int nt = (x >> 8) + NT_OFFSET;
- int arrow = x & ((1<<7)-1);
- dfa *d1 = PyGrammar_FindDFA(
- ps->p_grammar, nt);
- if ((err = push(&ps->p_stack, nt, d1,
- arrow, lineno, col_offset)) > 0) {
- D(printf(" MemError: push\n"));
- return err;
- }
- D(printf(" Push ...\n"));
- continue;
- }
-
- /* Shift the token */
- if ((err = shift(&ps->p_stack, type, str,
- x, lineno, col_offset)) > 0) {
- D(printf(" MemError: shift.\n"));
- return err;
- }
- D(printf(" Shift.\n"));
- /* Pop while we are in an accept-only state */
- while (s = &d->d_state
- [ps->p_stack.s_top->s_state],
- s->s_accept && s->s_narcs == 1) {
- D(printf(" DFA '%s', state %d: "
- "Direct pop.\n",
- d->d_name,
- ps->p_stack.s_top->s_state));
+ register int ilabel;
+ int err;
+
+ D(printf("Token %s/'%s' ... ", _PyParser_TokenNames[type], str));
+
+ /* Find out which label this token is */
+ ilabel = classify(ps, type, str);
+ if (ilabel < 0)
+ return E_SYNTAX;
+
+ /* Loop until the token is shifted or an error occurred */
+ for (;;) {
+ /* Fetch the current dfa and state */
+ register dfa *d = ps->p_stack.s_top->s_dfa;
+ register state *s = &d->d_state[ps->p_stack.s_top->s_state];
+
+ D(printf(" DFA '%s', state %d:",
+ d->d_name, ps->p_stack.s_top->s_state));
+
+ /* Check accelerator */
+ if (s->s_lower <= ilabel && ilabel < s->s_upper) {
+ register int x = s->s_accel[ilabel - s->s_lower];
+ if (x != -1) {
+ if (x & (1<<7)) {
+ /* Push non-terminal */
+ int nt = (x >> 8) + NT_OFFSET;
+ int arrow = x & ((1<<7)-1);
+ dfa *d1 = PyGrammar_FindDFA(
+ ps->p_grammar, nt);
+ if ((err = push(&ps->p_stack, nt, d1,
+ arrow, lineno, col_offset)) > 0) {
+ D(printf(" MemError: push\n"));
+ return err;
+ }
+ D(printf(" Push ...\n"));
+ continue;
+ }
+
+ /* Shift the token */
+ if ((err = shift(&ps->p_stack, type, str,
+ x, lineno, col_offset)) > 0) {
+ D(printf(" MemError: shift.\n"));
+ return err;
+ }
+ D(printf(" Shift.\n"));
+ /* Pop while we are in an accept-only state */
+ while (s = &d->d_state
+ [ps->p_stack.s_top->s_state],
+ s->s_accept && s->s_narcs == 1) {
+ D(printf(" DFA '%s', state %d: "
+ "Direct pop.\n",
+ d->d_name,
+ ps->p_stack.s_top->s_state));
#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
#if 0
- if (d->d_name[0] == 'i' &&
- strcmp(d->d_name,
- "import_stmt") == 0)
- future_hack(ps);
+ if (d->d_name[0] == 'i' &&
+ strcmp(d->d_name,
+ "import_stmt") == 0)
+ future_hack(ps);
#endif
#endif
- s_pop(&ps->p_stack);
- if (s_empty(&ps->p_stack)) {
- D(printf(" ACCEPT.\n"));
- return E_DONE;
- }
- d = ps->p_stack.s_top->s_dfa;
- }
- return E_OK;
- }
- }
-
- if (s->s_accept) {
+ s_pop(&ps->p_stack);
+ if (s_empty(&ps->p_stack)) {
+ D(printf(" ACCEPT.\n"));
+ return E_DONE;
+ }
+ d = ps->p_stack.s_top->s_dfa;
+ }
+ return E_OK;
+ }
+ }
+
+ if (s->s_accept) {
#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
#if 0
- if (d->d_name[0] == 'i' &&
- strcmp(d->d_name, "import_stmt") == 0)
- future_hack(ps);
+ if (d->d_name[0] == 'i' &&
+ strcmp(d->d_name, "import_stmt") == 0)
+ future_hack(ps);
#endif
#endif
- /* Pop this dfa and try again */
- s_pop(&ps->p_stack);
- D(printf(" Pop ...\n"));
- if (s_empty(&ps->p_stack)) {
- D(printf(" Error: bottom of stack.\n"));
- return E_SYNTAX;
- }
- continue;
- }
-
- /* Stuck, report syntax error */
- D(printf(" Error.\n"));
- if (expected_ret) {
- if (s->s_lower == s->s_upper - 1) {
- /* Only one possible expected token */
- *expected_ret = ps->p_grammar->
- g_ll.ll_label[s->s_lower].lb_type;
- }
- else
- *expected_ret = -1;
- }
- return E_SYNTAX;
- }
+ /* Pop this dfa and try again */
+ s_pop(&ps->p_stack);
+ D(printf(" Pop ...\n"));
+ if (s_empty(&ps->p_stack)) {
+ D(printf(" Error: bottom of stack.\n"));
+ return E_SYNTAX;
+ }
+ continue;
+ }
+
+ /* Stuck, report syntax error */
+ D(printf(" Error.\n"));
+ if (expected_ret) {
+ if (s->s_lower == s->s_upper - 1) {
+ /* Only one possible expected token */
+ *expected_ret = ps->p_grammar->
+ g_ll.ll_label[s->s_lower].lb_type;
+ }
+ else
+ *expected_ret = -1;
+ }
+ return E_SYNTAX;
+ }
}
@@ -341,62 +341,62 @@ PyParser_AddToken(register parser_state *ps, register int type, char *str,
void
dumptree(grammar *g, node *n)
{
- int i;
-
- if (n == NULL)
- printf("NIL");
- else {
- label l;
- l.lb_type = TYPE(n);
- l.lb_str = STR(n);
- printf("%s", PyGrammar_LabelRepr(&l));
- if (ISNONTERMINAL(TYPE(n))) {
- printf("(");
- for (i = 0; i < NCH(n); i++) {
- if (i > 0)
- printf(",");
- dumptree(g, CHILD(n, i));
- }
- printf(")");
- }
- }
+ int i;
+
+ if (n == NULL)
+ printf("NIL");
+ else {
+ label l;
+ l.lb_type = TYPE(n);
+ l.lb_str = STR(n);
+ printf("%s", PyGrammar_LabelRepr(&l));
+ if (ISNONTERMINAL(TYPE(n))) {
+ printf("(");
+ for (i = 0; i < NCH(n); i++) {
+ if (i > 0)
+ printf(",");
+ dumptree(g, CHILD(n, i));
+ }
+ printf(")");
+ }
+ }
}
void
showtree(grammar *g, node *n)
{
- int i;
-
- if (n == NULL)
- return;
- if (ISNONTERMINAL(TYPE(n))) {
- for (i = 0; i < NCH(n); i++)
- showtree(g, CHILD(n, i));
- }
- else if (ISTERMINAL(TYPE(n))) {
- printf("%s", _PyParser_TokenNames[TYPE(n)]);
- if (TYPE(n) == NUMBER || TYPE(n) == NAME)
- printf("(%s)", STR(n));
- printf(" ");
- }
- else
- printf("? ");
+ int i;
+
+ if (n == NULL)
+ return;
+ if (ISNONTERMINAL(TYPE(n))) {
+ for (i = 0; i < NCH(n); i++)
+ showtree(g, CHILD(n, i));
+ }
+ else if (ISTERMINAL(TYPE(n))) {
+ printf("%s", _PyParser_TokenNames[TYPE(n)]);
+ if (TYPE(n) == NUMBER || TYPE(n) == NAME)
+ printf("(%s)", STR(n));
+ printf(" ");
+ }
+ else
+ printf("? ");
}
void
printtree(parser_state *ps)
{
- if (Py_DebugFlag) {
- printf("Parse tree:\n");
- dumptree(ps->p_grammar, ps->p_tree);
- printf("\n");
- printf("Tokens:\n");
- showtree(ps->p_grammar, ps->p_tree);
- printf("\n");
- }
- printf("Listing:\n");
- PyNode_ListTree(ps->p_tree);
- printf("\n");
+ if (Py_DebugFlag) {
+ printf("Parse tree:\n");
+ dumptree(ps->p_grammar, ps->p_tree);
+ printf("\n");
+ printf("Tokens:\n");
+ showtree(ps->p_grammar, ps->p_tree);
+ printf("\n");
+ }
+ printf("Listing:\n");
+ PyNode_ListTree(ps->p_tree);
+ printf("\n");
}
#endif /* Py_DEBUG */
@@ -431,15 +431,15 @@ symbol.
As an example, consider this grammar:
-expr: term (OP term)*
-term: CONSTANT | '(' expr ')'
+expr: term (OP term)*
+term: CONSTANT | '(' expr ')'
The DFA corresponding to the rule for expr is:
------->.---term-->.------->
- ^ |
- | |
- \----OP----/
+ ^ |
+ | |
+ \----OP----/
The parse tree generated for the input a+b is:
diff --git a/Parser/parsetok.c b/Parser/parsetok.c
index ff4ca70..7636a54 100644
--- a/Parser/parsetok.c
+++ b/Parser/parsetok.c
@@ -19,85 +19,85 @@ static void initerr(perrdetail *err_ret, const char* filename);
node *
PyParser_ParseString(const char *s, grammar *g, int start, perrdetail *err_ret)
{
- return PyParser_ParseStringFlagsFilename(s, NULL, g, start, err_ret, 0);
+ return PyParser_ParseStringFlagsFilename(s, NULL, g, start, err_ret, 0);
}
node *
PyParser_ParseStringFlags(const char *s, grammar *g, int start,
- perrdetail *err_ret, int flags)
+ perrdetail *err_ret, int flags)
{
- return PyParser_ParseStringFlagsFilename(s, NULL,
- g, start, err_ret, flags);
+ return PyParser_ParseStringFlagsFilename(s, NULL,
+ g, start, err_ret, flags);
}
node *
PyParser_ParseStringFlagsFilename(const char *s, const char *filename,
- grammar *g, int start,
- perrdetail *err_ret, int flags)
+ grammar *g, int start,
+ perrdetail *err_ret, int flags)
{
- int iflags = flags;
- return PyParser_ParseStringFlagsFilenameEx(s, filename, g, start,
- err_ret, &iflags);
+ int iflags = flags;
+ return PyParser_ParseStringFlagsFilenameEx(s, filename, g, start,
+ err_ret, &iflags);
}
node *
PyParser_ParseStringFlagsFilenameEx(const char *s, const char *filename,
- grammar *g, int start,
- perrdetail *err_ret, int *flags)
+ grammar *g, int start,
+ perrdetail *err_ret, int *flags)
{
- struct tok_state *tok;
- int exec_input = start == file_input;
-
- initerr(err_ret, filename);
-
- if (*flags & PyPARSE_IGNORE_COOKIE)
- tok = PyTokenizer_FromUTF8(s, exec_input);
- else
- tok = PyTokenizer_FromString(s, exec_input);
- if (tok == NULL) {
- err_ret->error = PyErr_Occurred() ? E_DECODE : E_NOMEM;
- return NULL;
- }
-
- tok->filename = filename ? filename : "<string>";
- return parsetok(tok, g, start, err_ret, flags);
+ struct tok_state *tok;
+ int exec_input = start == file_input;
+
+ initerr(err_ret, filename);
+
+ if (*flags & PyPARSE_IGNORE_COOKIE)
+ tok = PyTokenizer_FromUTF8(s, exec_input);
+ else
+ tok = PyTokenizer_FromString(s, exec_input);
+ if (tok == NULL) {
+ err_ret->error = PyErr_Occurred() ? E_DECODE : E_NOMEM;
+ return NULL;
+ }
+
+ tok->filename = filename ? filename : "<string>";
+ return parsetok(tok, g, start, err_ret, flags);
}
/* Parse input coming from a file. Return error code, print some errors. */
node *
PyParser_ParseFile(FILE *fp, const char *filename, grammar *g, int start,
- char *ps1, char *ps2, perrdetail *err_ret)
+ char *ps1, char *ps2, perrdetail *err_ret)
{
- return PyParser_ParseFileFlags(fp, filename, NULL,
- g, start, ps1, ps2, err_ret, 0);
+ return PyParser_ParseFileFlags(fp, filename, NULL,
+ g, start, ps1, ps2, err_ret, 0);
}
node *
PyParser_ParseFileFlags(FILE *fp, const char *filename, const char *enc,
- grammar *g, int start,
- char *ps1, char *ps2, perrdetail *err_ret, int flags)
+ grammar *g, int start,
+ char *ps1, char *ps2, perrdetail *err_ret, int flags)
{
- int iflags = flags;
- return PyParser_ParseFileFlagsEx(fp, filename, enc, g, start, ps1,
- ps2, err_ret, &iflags);
+ int iflags = flags;
+ return PyParser_ParseFileFlagsEx(fp, filename, enc, g, start, ps1,
+ ps2, err_ret, &iflags);
}
node *
-PyParser_ParseFileFlagsEx(FILE *fp, const char *filename,
- const char *enc, grammar *g, int start,
- char *ps1, char *ps2, perrdetail *err_ret, int *flags)
+PyParser_ParseFileFlagsEx(FILE *fp, const char *filename,
+ const char *enc, grammar *g, int start,
+ char *ps1, char *ps2, perrdetail *err_ret, int *flags)
{
- struct tok_state *tok;
+ struct tok_state *tok;
- initerr(err_ret, filename);
+ initerr(err_ret, filename);
- if ((tok = PyTokenizer_FromFile(fp, (char *)enc, ps1, ps2)) == NULL) {
- err_ret->error = E_NOMEM;
- return NULL;
- }
- tok->filename = filename;
- return parsetok(tok, g, start, err_ret, flags);
+ if ((tok = PyTokenizer_FromFile(fp, (char *)enc, ps1, ps2)) == NULL) {
+ err_ret->error = E_NOMEM;
+ return NULL;
+ }
+ tok->filename = filename;
+ return parsetok(tok, g, start, err_ret, flags);
}
#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
@@ -111,9 +111,9 @@ static char as_msg[] =
static void
warn(const char *msg, const char *filename, int lineno)
{
- if (filename == NULL)
- filename = "<string>";
- PySys_WriteStderr(msg, filename, lineno);
+ if (filename == NULL)
+ filename = "<string>";
+ PySys_WriteStderr(msg, filename, lineno);
}
#endif
#endif
@@ -123,159 +123,159 @@ warn(const char *msg, const char *filename, int lineno)
static node *
parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
- int *flags)
+ int *flags)
{
- parser_state *ps;
- node *n;
- int started = 0, handling_import = 0, handling_with = 0;
-
- if ((ps = PyParser_New(g, start)) == NULL) {
- fprintf(stderr, "no mem for new parser\n");
- err_ret->error = E_NOMEM;
- PyTokenizer_Free(tok);
- return NULL;
- }
+ parser_state *ps;
+ node *n;
+ int started = 0, handling_import = 0, handling_with = 0;
+
+ if ((ps = PyParser_New(g, start)) == NULL) {
+ fprintf(stderr, "no mem for new parser\n");
+ err_ret->error = E_NOMEM;
+ PyTokenizer_Free(tok);
+ return NULL;
+ }
#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
- if (*flags & PyPARSE_BARRY_AS_BDFL)
- ps->p_flags |= CO_FUTURE_BARRY_AS_BDFL;
+ if (*flags & PyPARSE_BARRY_AS_BDFL)
+ ps->p_flags |= CO_FUTURE_BARRY_AS_BDFL;
#endif
- for (;;) {
- char *a, *b;
- int type;
- size_t len;
- char *str;
- int col_offset;
-
- type = PyTokenizer_Get(tok, &a, &b);
- if (type == ERRORTOKEN) {
- err_ret->error = tok->done;
- break;
- }
- if (type == ENDMARKER && started) {
- type = NEWLINE; /* Add an extra newline */
- handling_with = handling_import = 0;
- started = 0;
- /* Add the right number of dedent tokens,
- except if a certain flag is given --
- codeop.py uses this. */
- if (tok->indent &&
- !(*flags & PyPARSE_DONT_IMPLY_DEDENT))
- {
- tok->pendin = -tok->indent;
- tok->indent = 0;
- }
- }
- else
- started = 1;
- len = b - a; /* XXX this may compute NULL - NULL */
- str = (char *) PyObject_MALLOC(len + 1);
- if (str == NULL) {
- fprintf(stderr, "no mem for next token\n");
- err_ret->error = E_NOMEM;
- break;
- }
- if (len > 0)
- strncpy(str, a, len);
- str[len] = '\0';
+ for (;;) {
+ char *a, *b;
+ int type;
+ size_t len;
+ char *str;
+ int col_offset;
+
+ type = PyTokenizer_Get(tok, &a, &b);
+ if (type == ERRORTOKEN) {
+ err_ret->error = tok->done;
+ break;
+ }
+ if (type == ENDMARKER && started) {
+ type = NEWLINE; /* Add an extra newline */
+ handling_with = handling_import = 0;
+ started = 0;
+ /* Add the right number of dedent tokens,
+ except if a certain flag is given --
+ codeop.py uses this. */
+ if (tok->indent &&
+ !(*flags & PyPARSE_DONT_IMPLY_DEDENT))
+ {
+ tok->pendin = -tok->indent;
+ tok->indent = 0;
+ }
+ }
+ else
+ started = 1;
+ len = b - a; /* XXX this may compute NULL - NULL */
+ str = (char *) PyObject_MALLOC(len + 1);
+ if (str == NULL) {
+ fprintf(stderr, "no mem for next token\n");
+ err_ret->error = E_NOMEM;
+ break;
+ }
+ if (len > 0)
+ strncpy(str, a, len);
+ str[len] = '\0';
#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
- if (type == NOTEQUAL) {
- if (!(ps->p_flags & CO_FUTURE_BARRY_AS_BDFL) &&
- strcmp(str, "!=")) {
- err_ret->error = E_SYNTAX;
- break;
- }
- else if ((ps->p_flags & CO_FUTURE_BARRY_AS_BDFL) &&
- strcmp(str, "<>")) {
- err_ret->text = "with Barry as BDFL, use '<>' "
- "instead of '!='";
- err_ret->error = E_SYNTAX;
- break;
- }
- }
+ if (type == NOTEQUAL) {
+ if (!(ps->p_flags & CO_FUTURE_BARRY_AS_BDFL) &&
+ strcmp(str, "!=")) {
+ err_ret->error = E_SYNTAX;
+ break;
+ }
+ else if ((ps->p_flags & CO_FUTURE_BARRY_AS_BDFL) &&
+ strcmp(str, "<>")) {
+ err_ret->text = "with Barry as BDFL, use '<>' "
+ "instead of '!='";
+ err_ret->error = E_SYNTAX;
+ break;
+ }
+ }
#endif
- if (a >= tok->line_start)
- col_offset = a - tok->line_start;
- else
- col_offset = -1;
-
- if ((err_ret->error =
- PyParser_AddToken(ps, (int)type, str,
- tok->lineno, col_offset,
- &(err_ret->expected))) != E_OK) {
- if (err_ret->error != E_DONE) {
- PyObject_FREE(str);
- err_ret->token = type;
- }
- break;
- }
- }
-
- if (err_ret->error == E_DONE) {
- n = ps->p_tree;
- ps->p_tree = NULL;
- }
- else
- n = NULL;
+ if (a >= tok->line_start)
+ col_offset = a - tok->line_start;
+ else
+ col_offset = -1;
+
+ if ((err_ret->error =
+ PyParser_AddToken(ps, (int)type, str,
+ tok->lineno, col_offset,
+ &(err_ret->expected))) != E_OK) {
+ if (err_ret->error != E_DONE) {
+ PyObject_FREE(str);
+ err_ret->token = type;
+ }
+ break;
+ }
+ }
+
+ if (err_ret->error == E_DONE) {
+ n = ps->p_tree;
+ ps->p_tree = NULL;
+ }
+ else
+ n = NULL;
#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
- *flags = ps->p_flags;
+ *flags = ps->p_flags;
#endif
- PyParser_Delete(ps);
-
- if (n == NULL) {
- if (tok->lineno <= 1 && tok->done == E_EOF)
- err_ret->error = E_EOF;
- err_ret->lineno = tok->lineno;
- if (tok->buf != NULL) {
- size_t len;
- assert(tok->cur - tok->buf < INT_MAX);
- err_ret->offset = (int)(tok->cur - tok->buf);
- len = tok->inp - tok->buf;
- err_ret->text = (char *) PyObject_MALLOC(len + 1);
- if (err_ret->text != NULL) {
- if (len > 0)
- strncpy(err_ret->text, tok->buf, len);
- err_ret->text[len] = '\0';
- }
- }
- } else if (tok->encoding != NULL) {
- /* 'nodes->n_str' uses PyObject_*, while 'tok->encoding' was
- * allocated using PyMem_
- */
- node* r = PyNode_New(encoding_decl);
- if (r)
- r->n_str = PyObject_MALLOC(strlen(tok->encoding)+1);
- if (!r || !r->n_str) {
- err_ret->error = E_NOMEM;
- if (r)
- PyObject_FREE(r);
- n = NULL;
- goto done;
- }
- strcpy(r->n_str, tok->encoding);
- PyMem_FREE(tok->encoding);
- tok->encoding = NULL;
- r->n_nchildren = 1;
- r->n_child = n;
- n = r;
- }
+ PyParser_Delete(ps);
+
+ if (n == NULL) {
+ if (tok->lineno <= 1 && tok->done == E_EOF)
+ err_ret->error = E_EOF;
+ err_ret->lineno = tok->lineno;
+ if (tok->buf != NULL) {
+ size_t len;
+ assert(tok->cur - tok->buf < INT_MAX);
+ err_ret->offset = (int)(tok->cur - tok->buf);
+ len = tok->inp - tok->buf;
+ err_ret->text = (char *) PyObject_MALLOC(len + 1);
+ if (err_ret->text != NULL) {
+ if (len > 0)
+ strncpy(err_ret->text, tok->buf, len);
+ err_ret->text[len] = '\0';
+ }
+ }
+ } else if (tok->encoding != NULL) {
+ /* 'nodes->n_str' uses PyObject_*, while 'tok->encoding' was
+ * allocated using PyMem_
+ */
+ node* r = PyNode_New(encoding_decl);
+ if (r)
+ r->n_str = PyObject_MALLOC(strlen(tok->encoding)+1);
+ if (!r || !r->n_str) {
+ err_ret->error = E_NOMEM;
+ if (r)
+ PyObject_FREE(r);
+ n = NULL;
+ goto done;
+ }
+ strcpy(r->n_str, tok->encoding);
+ PyMem_FREE(tok->encoding);
+ tok->encoding = NULL;
+ r->n_nchildren = 1;
+ r->n_child = n;
+ n = r;
+ }
done:
- PyTokenizer_Free(tok);
+ PyTokenizer_Free(tok);
- return n;
+ return n;
}
static void
initerr(perrdetail *err_ret, const char *filename)
{
- err_ret->error = E_OK;
- err_ret->filename = filename;
- err_ret->lineno = 0;
- err_ret->offset = 0;
- err_ret->text = NULL;
- err_ret->token = -1;
- err_ret->expected = -1;
+ err_ret->error = E_OK;
+ err_ret->filename = filename;
+ err_ret->lineno = 0;
+ err_ret->offset = 0;
+ err_ret->text = NULL;
+ err_ret->token = -1;
+ err_ret->expected = -1;
}
diff --git a/Parser/pgen.c b/Parser/pgen.c
index 959a5d3..beaf53b 100644
--- a/Parser/pgen.c
+++ b/Parser/pgen.c
@@ -17,85 +17,85 @@ extern int Py_IgnoreEnvironmentFlag; /* needed by Py_GETENV */
/* PART ONE -- CONSTRUCT NFA -- Cf. Algorithm 3.2 from [Aho&Ullman 77] */
typedef struct _nfaarc {
- int ar_label;
- int ar_arrow;
+ int ar_label;
+ int ar_arrow;
} nfaarc;
typedef struct _nfastate {
- int st_narcs;
- nfaarc *st_arc;
+ int st_narcs;
+ nfaarc *st_arc;
} nfastate;
typedef struct _nfa {
- int nf_type;
- char *nf_name;
- int nf_nstates;
- nfastate *nf_state;
- int nf_start, nf_finish;
+ int nf_type;
+ char *nf_name;
+ int nf_nstates;
+ nfastate *nf_state;
+ int nf_start, nf_finish;
} nfa;
/* Forward */
static void compile_rhs(labellist *ll,
- nfa *nf, node *n, int *pa, int *pb);
+ nfa *nf, node *n, int *pa, int *pb);
static void compile_alt(labellist *ll,
- nfa *nf, node *n, int *pa, int *pb);
+ nfa *nf, node *n, int *pa, int *pb);
static void compile_item(labellist *ll,
- nfa *nf, node *n, int *pa, int *pb);
+ nfa *nf, node *n, int *pa, int *pb);
static void compile_atom(labellist *ll,
- nfa *nf, node *n, int *pa, int *pb);
+ nfa *nf, node *n, int *pa, int *pb);
static int
addnfastate(nfa *nf)
{
- nfastate *st;
-
- nf->nf_state = (nfastate *)PyObject_REALLOC(nf->nf_state,
- sizeof(nfastate) * (nf->nf_nstates + 1));
- if (nf->nf_state == NULL)
- Py_FatalError("out of mem");
- st = &nf->nf_state[nf->nf_nstates++];
- st->st_narcs = 0;
- st->st_arc = NULL;
- return st - nf->nf_state;
+ nfastate *st;
+
+ nf->nf_state = (nfastate *)PyObject_REALLOC(nf->nf_state,
+ sizeof(nfastate) * (nf->nf_nstates + 1));
+ if (nf->nf_state == NULL)
+ Py_FatalError("out of mem");
+ st = &nf->nf_state[nf->nf_nstates++];
+ st->st_narcs = 0;
+ st->st_arc = NULL;
+ return st - nf->nf_state;
}
static void
addnfaarc(nfa *nf, int from, int to, int lbl)
{
- nfastate *st;
- nfaarc *ar;
-
- st = &nf->nf_state[from];
- st->st_arc = (nfaarc *)PyObject_REALLOC(st->st_arc,
- sizeof(nfaarc) * (st->st_narcs + 1));
- if (st->st_arc == NULL)
- Py_FatalError("out of mem");
- ar = &st->st_arc[st->st_narcs++];
- ar->ar_label = lbl;
- ar->ar_arrow = to;
+ nfastate *st;
+ nfaarc *ar;
+
+ st = &nf->nf_state[from];
+ st->st_arc = (nfaarc *)PyObject_REALLOC(st->st_arc,
+ sizeof(nfaarc) * (st->st_narcs + 1));
+ if (st->st_arc == NULL)
+ Py_FatalError("out of mem");
+ ar = &st->st_arc[st->st_narcs++];
+ ar->ar_label = lbl;
+ ar->ar_arrow = to;
}
static nfa *
newnfa(char *name)
{
- nfa *nf;
- static int type = NT_OFFSET; /* All types will be disjunct */
-
- nf = (nfa *)PyObject_MALLOC(sizeof(nfa));
- if (nf == NULL)
- Py_FatalError("no mem for new nfa");
- nf->nf_type = type++;
- nf->nf_name = name; /* XXX strdup(name) ??? */
- nf->nf_nstates = 0;
- nf->nf_state = NULL;
- nf->nf_start = nf->nf_finish = -1;
- return nf;
+ nfa *nf;
+ static int type = NT_OFFSET; /* All types will be disjunct */
+
+ nf = (nfa *)PyObject_MALLOC(sizeof(nfa));
+ if (nf == NULL)
+ Py_FatalError("no mem for new nfa");
+ nf->nf_type = type++;
+ nf->nf_name = name; /* XXX strdup(name) ??? */
+ nf->nf_nstates = 0;
+ nf->nf_state = NULL;
+ nf->nf_start = nf->nf_finish = -1;
+ return nf;
}
typedef struct _nfagrammar {
- int gr_nnfas;
- nfa **gr_nfa;
- labellist gr_ll;
+ int gr_nnfas;
+ nfa **gr_nfa;
+ labellist gr_ll;
} nfagrammar;
/* Forward */
@@ -104,32 +104,32 @@ static void compile_rule(nfagrammar *gr, node *n);
static nfagrammar *
newnfagrammar(void)
{
- nfagrammar *gr;
-
- gr = (nfagrammar *)PyObject_MALLOC(sizeof(nfagrammar));
- if (gr == NULL)
- Py_FatalError("no mem for new nfa grammar");
- gr->gr_nnfas = 0;
- gr->gr_nfa = NULL;
- gr->gr_ll.ll_nlabels = 0;
- gr->gr_ll.ll_label = NULL;
- addlabel(&gr->gr_ll, ENDMARKER, "EMPTY");
- return gr;
+ nfagrammar *gr;
+
+ gr = (nfagrammar *)PyObject_MALLOC(sizeof(nfagrammar));
+ if (gr == NULL)
+ Py_FatalError("no mem for new nfa grammar");
+ gr->gr_nnfas = 0;
+ gr->gr_nfa = NULL;
+ gr->gr_ll.ll_nlabels = 0;
+ gr->gr_ll.ll_label = NULL;
+ addlabel(&gr->gr_ll, ENDMARKER, "EMPTY");
+ return gr;
}
static nfa *
addnfa(nfagrammar *gr, char *name)
{
- nfa *nf;
-
- nf = newnfa(name);
- gr->gr_nfa = (nfa **)PyObject_REALLOC(gr->gr_nfa,
- sizeof(nfa*) * (gr->gr_nnfas + 1));
- if (gr->gr_nfa == NULL)
- Py_FatalError("out of mem");
- gr->gr_nfa[gr->gr_nnfas++] = nf;
- addlabel(&gr->gr_ll, NAME, nf->nf_name);
- return nf;
+ nfa *nf;
+
+ nf = newnfa(name);
+ gr->gr_nfa = (nfa **)PyObject_REALLOC(gr->gr_nfa,
+ sizeof(nfa*) * (gr->gr_nnfas + 1));
+ if (gr->gr_nfa == NULL)
+ Py_FatalError("out of mem");
+ gr->gr_nfa[gr->gr_nnfas++] = nf;
+ addlabel(&gr->gr_ll, NAME, nf->nf_name);
+ return nf;
}
#ifdef Py_DEBUG
@@ -137,203 +137,203 @@ addnfa(nfagrammar *gr, char *name)
static char REQNFMT[] = "metacompile: less than %d children\n";
#define REQN(i, count) \
- if (i < count) { \
- fprintf(stderr, REQNFMT, count); \
- Py_FatalError("REQN"); \
- } else
+ if (i < count) { \
+ fprintf(stderr, REQNFMT, count); \
+ Py_FatalError("REQN"); \
+ } else
#else
-#define REQN(i, count) /* empty */
+#define REQN(i, count) /* empty */
#endif
static nfagrammar *
metacompile(node *n)
{
- nfagrammar *gr;
- int i;
-
- if (Py_DebugFlag)
- printf("Compiling (meta-) parse tree into NFA grammar\n");
- gr = newnfagrammar();
- REQ(n, MSTART);
- i = n->n_nchildren - 1; /* Last child is ENDMARKER */
- n = n->n_child;
- for (; --i >= 0; n++) {
- if (n->n_type != NEWLINE)
- compile_rule(gr, n);
- }
- return gr;
+ nfagrammar *gr;
+ int i;
+
+ if (Py_DebugFlag)
+ printf("Compiling (meta-) parse tree into NFA grammar\n");
+ gr = newnfagrammar();
+ REQ(n, MSTART);
+ i = n->n_nchildren - 1; /* Last child is ENDMARKER */
+ n = n->n_child;
+ for (; --i >= 0; n++) {
+ if (n->n_type != NEWLINE)
+ compile_rule(gr, n);
+ }
+ return gr;
}
static void
compile_rule(nfagrammar *gr, node *n)
{
- nfa *nf;
-
- REQ(n, RULE);
- REQN(n->n_nchildren, 4);
- n = n->n_child;
- REQ(n, NAME);
- nf = addnfa(gr, n->n_str);
- n++;
- REQ(n, COLON);
- n++;
- REQ(n, RHS);
- compile_rhs(&gr->gr_ll, nf, n, &nf->nf_start, &nf->nf_finish);
- n++;
- REQ(n, NEWLINE);
+ nfa *nf;
+
+ REQ(n, RULE);
+ REQN(n->n_nchildren, 4);
+ n = n->n_child;
+ REQ(n, NAME);
+ nf = addnfa(gr, n->n_str);
+ n++;
+ REQ(n, COLON);
+ n++;
+ REQ(n, RHS);
+ compile_rhs(&gr->gr_ll, nf, n, &nf->nf_start, &nf->nf_finish);
+ n++;
+ REQ(n, NEWLINE);
}
static void
compile_rhs(labellist *ll, nfa *nf, node *n, int *pa, int *pb)
{
- int i;
- int a, b;
-
- REQ(n, RHS);
- i = n->n_nchildren;
- REQN(i, 1);
- n = n->n_child;
- REQ(n, ALT);
- compile_alt(ll, nf, n, pa, pb);
- if (--i <= 0)
- return;
- n++;
- a = *pa;
- b = *pb;
- *pa = addnfastate(nf);
- *pb = addnfastate(nf);
- addnfaarc(nf, *pa, a, EMPTY);
- addnfaarc(nf, b, *pb, EMPTY);
- for (; --i >= 0; n++) {
- REQ(n, VBAR);
- REQN(i, 1);
- --i;
- n++;
- REQ(n, ALT);
- compile_alt(ll, nf, n, &a, &b);
- addnfaarc(nf, *pa, a, EMPTY);
- addnfaarc(nf, b, *pb, EMPTY);
- }
+ int i;
+ int a, b;
+
+ REQ(n, RHS);
+ i = n->n_nchildren;
+ REQN(i, 1);
+ n = n->n_child;
+ REQ(n, ALT);
+ compile_alt(ll, nf, n, pa, pb);
+ if (--i <= 0)
+ return;
+ n++;
+ a = *pa;
+ b = *pb;
+ *pa = addnfastate(nf);
+ *pb = addnfastate(nf);
+ addnfaarc(nf, *pa, a, EMPTY);
+ addnfaarc(nf, b, *pb, EMPTY);
+ for (; --i >= 0; n++) {
+ REQ(n, VBAR);
+ REQN(i, 1);
+ --i;
+ n++;
+ REQ(n, ALT);
+ compile_alt(ll, nf, n, &a, &b);
+ addnfaarc(nf, *pa, a, EMPTY);
+ addnfaarc(nf, b, *pb, EMPTY);
+ }
}
static void
compile_alt(labellist *ll, nfa *nf, node *n, int *pa, int *pb)
{
- int i;
- int a, b;
-
- REQ(n, ALT);
- i = n->n_nchildren;
- REQN(i, 1);
- n = n->n_child;
- REQ(n, ITEM);
- compile_item(ll, nf, n, pa, pb);
- --i;
- n++;
- for (; --i >= 0; n++) {
- REQ(n, ITEM);
- compile_item(ll, nf, n, &a, &b);
- addnfaarc(nf, *pb, a, EMPTY);
- *pb = b;
- }
+ int i;
+ int a, b;
+
+ REQ(n, ALT);
+ i = n->n_nchildren;
+ REQN(i, 1);
+ n = n->n_child;
+ REQ(n, ITEM);
+ compile_item(ll, nf, n, pa, pb);
+ --i;
+ n++;
+ for (; --i >= 0; n++) {
+ REQ(n, ITEM);
+ compile_item(ll, nf, n, &a, &b);
+ addnfaarc(nf, *pb, a, EMPTY);
+ *pb = b;
+ }
}
static void
compile_item(labellist *ll, nfa *nf, node *n, int *pa, int *pb)
{
- int i;
- int a, b;
-
- REQ(n, ITEM);
- i = n->n_nchildren;
- REQN(i, 1);
- n = n->n_child;
- if (n->n_type == LSQB) {
- REQN(i, 3);
- n++;
- REQ(n, RHS);
- *pa = addnfastate(nf);
- *pb = addnfastate(nf);
- addnfaarc(nf, *pa, *pb, EMPTY);
- compile_rhs(ll, nf, n, &a, &b);
- addnfaarc(nf, *pa, a, EMPTY);
- addnfaarc(nf, b, *pb, EMPTY);
- REQN(i, 1);
- n++;
- REQ(n, RSQB);
- }
- else {
- compile_atom(ll, nf, n, pa, pb);
- if (--i <= 0)
- return;
- n++;
- addnfaarc(nf, *pb, *pa, EMPTY);
- if (n->n_type == STAR)
- *pb = *pa;
- else
- REQ(n, PLUS);
- }
+ int i;
+ int a, b;
+
+ REQ(n, ITEM);
+ i = n->n_nchildren;
+ REQN(i, 1);
+ n = n->n_child;
+ if (n->n_type == LSQB) {
+ REQN(i, 3);
+ n++;
+ REQ(n, RHS);
+ *pa = addnfastate(nf);
+ *pb = addnfastate(nf);
+ addnfaarc(nf, *pa, *pb, EMPTY);
+ compile_rhs(ll, nf, n, &a, &b);
+ addnfaarc(nf, *pa, a, EMPTY);
+ addnfaarc(nf, b, *pb, EMPTY);
+ REQN(i, 1);
+ n++;
+ REQ(n, RSQB);
+ }
+ else {
+ compile_atom(ll, nf, n, pa, pb);
+ if (--i <= 0)
+ return;
+ n++;
+ addnfaarc(nf, *pb, *pa, EMPTY);
+ if (n->n_type == STAR)
+ *pb = *pa;
+ else
+ REQ(n, PLUS);
+ }
}
static void
compile_atom(labellist *ll, nfa *nf, node *n, int *pa, int *pb)
{
- int i;
-
- REQ(n, ATOM);
- i = n->n_nchildren;
- REQN(i, 1);
- n = n->n_child;
- if (n->n_type == LPAR) {
- REQN(i, 3);
- n++;
- REQ(n, RHS);
- compile_rhs(ll, nf, n, pa, pb);
- n++;
- REQ(n, RPAR);
- }
- else if (n->n_type == NAME || n->n_type == STRING) {
- *pa = addnfastate(nf);
- *pb = addnfastate(nf);
- addnfaarc(nf, *pa, *pb, addlabel(ll, n->n_type, n->n_str));
- }
- else
- REQ(n, NAME);
+ int i;
+
+ REQ(n, ATOM);
+ i = n->n_nchildren;
+ REQN(i, 1);
+ n = n->n_child;
+ if (n->n_type == LPAR) {
+ REQN(i, 3);
+ n++;
+ REQ(n, RHS);
+ compile_rhs(ll, nf, n, pa, pb);
+ n++;
+ REQ(n, RPAR);
+ }
+ else if (n->n_type == NAME || n->n_type == STRING) {
+ *pa = addnfastate(nf);
+ *pb = addnfastate(nf);
+ addnfaarc(nf, *pa, *pb, addlabel(ll, n->n_type, n->n_str));
+ }
+ else
+ REQ(n, NAME);
}
static void
dumpstate(labellist *ll, nfa *nf, int istate)
{
- nfastate *st;
- int i;
- nfaarc *ar;
-
- printf("%c%2d%c",
- istate == nf->nf_start ? '*' : ' ',
- istate,
- istate == nf->nf_finish ? '.' : ' ');
- st = &nf->nf_state[istate];
- ar = st->st_arc;
- for (i = 0; i < st->st_narcs; i++) {
- if (i > 0)
- printf("\n ");
- printf("-> %2d %s", ar->ar_arrow,
- PyGrammar_LabelRepr(&ll->ll_label[ar->ar_label]));
- ar++;
- }
- printf("\n");
+ nfastate *st;
+ int i;
+ nfaarc *ar;
+
+ printf("%c%2d%c",
+ istate == nf->nf_start ? '*' : ' ',
+ istate,
+ istate == nf->nf_finish ? '.' : ' ');
+ st = &nf->nf_state[istate];
+ ar = st->st_arc;
+ for (i = 0; i < st->st_narcs; i++) {
+ if (i > 0)
+ printf("\n ");
+ printf("-> %2d %s", ar->ar_arrow,
+ PyGrammar_LabelRepr(&ll->ll_label[ar->ar_label]));
+ ar++;
+ }
+ printf("\n");
}
static void
dumpnfa(labellist *ll, nfa *nf)
{
- int i;
-
- printf("NFA '%s' has %d states; start %d, finish %d\n",
- nf->nf_name, nf->nf_nstates, nf->nf_start, nf->nf_finish);
- for (i = 0; i < nf->nf_nstates; i++)
- dumpstate(ll, nf, i);
+ int i;
+
+ printf("NFA '%s' has %d states; start %d, finish %d\n",
+ nf->nf_name, nf->nf_nstates, nf->nf_start, nf->nf_finish);
+ for (i = 0; i < nf->nf_nstates; i++)
+ dumpstate(ll, nf, i);
}
@@ -342,184 +342,184 @@ dumpnfa(labellist *ll, nfa *nf)
static void
addclosure(bitset ss, nfa *nf, int istate)
{
- if (addbit(ss, istate)) {
- nfastate *st = &nf->nf_state[istate];
- nfaarc *ar = st->st_arc;
- int i;
-
- for (i = st->st_narcs; --i >= 0; ) {
- if (ar->ar_label == EMPTY)
- addclosure(ss, nf, ar->ar_arrow);
- ar++;
- }
- }
+ if (addbit(ss, istate)) {
+ nfastate *st = &nf->nf_state[istate];
+ nfaarc *ar = st->st_arc;
+ int i;
+
+ for (i = st->st_narcs; --i >= 0; ) {
+ if (ar->ar_label == EMPTY)
+ addclosure(ss, nf, ar->ar_arrow);
+ ar++;
+ }
+ }
}
typedef struct _ss_arc {
- bitset sa_bitset;
- int sa_arrow;
- int sa_label;
+ bitset sa_bitset;
+ int sa_arrow;
+ int sa_label;
} ss_arc;
typedef struct _ss_state {
- bitset ss_ss;
- int ss_narcs;
- struct _ss_arc *ss_arc;
- int ss_deleted;
- int ss_finish;
- int ss_rename;
+ bitset ss_ss;
+ int ss_narcs;
+ struct _ss_arc *ss_arc;
+ int ss_deleted;
+ int ss_finish;
+ int ss_rename;
} ss_state;
typedef struct _ss_dfa {
- int sd_nstates;
- ss_state *sd_state;
+ int sd_nstates;
+ ss_state *sd_state;
} ss_dfa;
/* Forward */
static void printssdfa(int xx_nstates, ss_state *xx_state, int nbits,
- labellist *ll, char *msg);
+ labellist *ll, char *msg);
static void simplify(int xx_nstates, ss_state *xx_state);
static void convert(dfa *d, int xx_nstates, ss_state *xx_state);
static void
makedfa(nfagrammar *gr, nfa *nf, dfa *d)
{
- int nbits = nf->nf_nstates;
- bitset ss;
- int xx_nstates;
- ss_state *xx_state, *yy;
- ss_arc *zz;
- int istate, jstate, iarc, jarc, ibit;
- nfastate *st;
- nfaarc *ar;
-
- ss = newbitset(nbits);
- addclosure(ss, nf, nf->nf_start);
- xx_state = (ss_state *)PyObject_MALLOC(sizeof(ss_state));
- if (xx_state == NULL)
- Py_FatalError("no mem for xx_state in makedfa");
- xx_nstates = 1;
- yy = &xx_state[0];
- yy->ss_ss = ss;
- yy->ss_narcs = 0;
- yy->ss_arc = NULL;
- yy->ss_deleted = 0;
- yy->ss_finish = testbit(ss, nf->nf_finish);
- if (yy->ss_finish)
- printf("Error: nonterminal '%s' may produce empty.\n",
- nf->nf_name);
-
- /* This algorithm is from a book written before
- the invention of structured programming... */
-
- /* For each unmarked state... */
- for (istate = 0; istate < xx_nstates; ++istate) {
- size_t size;
- yy = &xx_state[istate];
- ss = yy->ss_ss;
- /* For all its states... */
- for (ibit = 0; ibit < nf->nf_nstates; ++ibit) {
- if (!testbit(ss, ibit))
- continue;
- st = &nf->nf_state[ibit];
- /* For all non-empty arcs from this state... */
- for (iarc = 0; iarc < st->st_narcs; iarc++) {
- ar = &st->st_arc[iarc];
- if (ar->ar_label == EMPTY)
- continue;
- /* Look up in list of arcs from this state */
- for (jarc = 0; jarc < yy->ss_narcs; ++jarc) {
- zz = &yy->ss_arc[jarc];
- if (ar->ar_label == zz->sa_label)
- goto found;
- }
- /* Add new arc for this state */
- size = sizeof(ss_arc) * (yy->ss_narcs + 1);
- yy->ss_arc = (ss_arc *)PyObject_REALLOC(
- yy->ss_arc, size);
- if (yy->ss_arc == NULL)
- Py_FatalError("out of mem");
- zz = &yy->ss_arc[yy->ss_narcs++];
- zz->sa_label = ar->ar_label;
- zz->sa_bitset = newbitset(nbits);
- zz->sa_arrow = -1;
- found: ;
- /* Add destination */
- addclosure(zz->sa_bitset, nf, ar->ar_arrow);
- }
- }
- /* Now look up all the arrow states */
- for (jarc = 0; jarc < xx_state[istate].ss_narcs; jarc++) {
- zz = &xx_state[istate].ss_arc[jarc];
- for (jstate = 0; jstate < xx_nstates; jstate++) {
- if (samebitset(zz->sa_bitset,
- xx_state[jstate].ss_ss, nbits)) {
- zz->sa_arrow = jstate;
- goto done;
- }
- }
- size = sizeof(ss_state) * (xx_nstates + 1);
- xx_state = (ss_state *)PyObject_REALLOC(xx_state,
- size);
- if (xx_state == NULL)
- Py_FatalError("out of mem");
- zz->sa_arrow = xx_nstates;
- yy = &xx_state[xx_nstates++];
- yy->ss_ss = zz->sa_bitset;
- yy->ss_narcs = 0;
- yy->ss_arc = NULL;
- yy->ss_deleted = 0;
- yy->ss_finish = testbit(yy->ss_ss, nf->nf_finish);
- done: ;
- }
- }
-
- if (Py_DebugFlag)
- printssdfa(xx_nstates, xx_state, nbits, &gr->gr_ll,
- "before minimizing");
-
- simplify(xx_nstates, xx_state);
-
- if (Py_DebugFlag)
- printssdfa(xx_nstates, xx_state, nbits, &gr->gr_ll,
- "after minimizing");
-
- convert(d, xx_nstates, xx_state);
-
- /* XXX cleanup */
- PyObject_FREE(xx_state);
+ int nbits = nf->nf_nstates;
+ bitset ss;
+ int xx_nstates;
+ ss_state *xx_state, *yy;
+ ss_arc *zz;
+ int istate, jstate, iarc, jarc, ibit;
+ nfastate *st;
+ nfaarc *ar;
+
+ ss = newbitset(nbits);
+ addclosure(ss, nf, nf->nf_start);
+ xx_state = (ss_state *)PyObject_MALLOC(sizeof(ss_state));
+ if (xx_state == NULL)
+ Py_FatalError("no mem for xx_state in makedfa");
+ xx_nstates = 1;
+ yy = &xx_state[0];
+ yy->ss_ss = ss;
+ yy->ss_narcs = 0;
+ yy->ss_arc = NULL;
+ yy->ss_deleted = 0;
+ yy->ss_finish = testbit(ss, nf->nf_finish);
+ if (yy->ss_finish)
+ printf("Error: nonterminal '%s' may produce empty.\n",
+ nf->nf_name);
+
+ /* This algorithm is from a book written before
+ the invention of structured programming... */
+
+ /* For each unmarked state... */
+ for (istate = 0; istate < xx_nstates; ++istate) {
+ size_t size;
+ yy = &xx_state[istate];
+ ss = yy->ss_ss;
+ /* For all its states... */
+ for (ibit = 0; ibit < nf->nf_nstates; ++ibit) {
+ if (!testbit(ss, ibit))
+ continue;
+ st = &nf->nf_state[ibit];
+ /* For all non-empty arcs from this state... */
+ for (iarc = 0; iarc < st->st_narcs; iarc++) {
+ ar = &st->st_arc[iarc];
+ if (ar->ar_label == EMPTY)
+ continue;
+ /* Look up in list of arcs from this state */
+ for (jarc = 0; jarc < yy->ss_narcs; ++jarc) {
+ zz = &yy->ss_arc[jarc];
+ if (ar->ar_label == zz->sa_label)
+ goto found;
+ }
+ /* Add new arc for this state */
+ size = sizeof(ss_arc) * (yy->ss_narcs + 1);
+ yy->ss_arc = (ss_arc *)PyObject_REALLOC(
+ yy->ss_arc, size);
+ if (yy->ss_arc == NULL)
+ Py_FatalError("out of mem");
+ zz = &yy->ss_arc[yy->ss_narcs++];
+ zz->sa_label = ar->ar_label;
+ zz->sa_bitset = newbitset(nbits);
+ zz->sa_arrow = -1;
+ found: ;
+ /* Add destination */
+ addclosure(zz->sa_bitset, nf, ar->ar_arrow);
+ }
+ }
+ /* Now look up all the arrow states */
+ for (jarc = 0; jarc < xx_state[istate].ss_narcs; jarc++) {
+ zz = &xx_state[istate].ss_arc[jarc];
+ for (jstate = 0; jstate < xx_nstates; jstate++) {
+ if (samebitset(zz->sa_bitset,
+ xx_state[jstate].ss_ss, nbits)) {
+ zz->sa_arrow = jstate;
+ goto done;
+ }
+ }
+ size = sizeof(ss_state) * (xx_nstates + 1);
+ xx_state = (ss_state *)PyObject_REALLOC(xx_state,
+ size);
+ if (xx_state == NULL)
+ Py_FatalError("out of mem");
+ zz->sa_arrow = xx_nstates;
+ yy = &xx_state[xx_nstates++];
+ yy->ss_ss = zz->sa_bitset;
+ yy->ss_narcs = 0;
+ yy->ss_arc = NULL;
+ yy->ss_deleted = 0;
+ yy->ss_finish = testbit(yy->ss_ss, nf->nf_finish);
+ done: ;
+ }
+ }
+
+ if (Py_DebugFlag)
+ printssdfa(xx_nstates, xx_state, nbits, &gr->gr_ll,
+ "before minimizing");
+
+ simplify(xx_nstates, xx_state);
+
+ if (Py_DebugFlag)
+ printssdfa(xx_nstates, xx_state, nbits, &gr->gr_ll,
+ "after minimizing");
+
+ convert(d, xx_nstates, xx_state);
+
+ /* XXX cleanup */
+ PyObject_FREE(xx_state);
}
static void
printssdfa(int xx_nstates, ss_state *xx_state, int nbits,
- labellist *ll, char *msg)
+ labellist *ll, char *msg)
{
- int i, ibit, iarc;
- ss_state *yy;
- ss_arc *zz;
-
- printf("Subset DFA %s\n", msg);
- for (i = 0; i < xx_nstates; i++) {
- yy = &xx_state[i];
- if (yy->ss_deleted)
- continue;
- printf(" Subset %d", i);
- if (yy->ss_finish)
- printf(" (finish)");
- printf(" { ");
- for (ibit = 0; ibit < nbits; ibit++) {
- if (testbit(yy->ss_ss, ibit))
- printf("%d ", ibit);
- }
- printf("}\n");
- for (iarc = 0; iarc < yy->ss_narcs; iarc++) {
- zz = &yy->ss_arc[iarc];
- printf(" Arc to state %d, label %s\n",
- zz->sa_arrow,
- PyGrammar_LabelRepr(
- &ll->ll_label[zz->sa_label]));
- }
- }
+ int i, ibit, iarc;
+ ss_state *yy;
+ ss_arc *zz;
+
+ printf("Subset DFA %s\n", msg);
+ for (i = 0; i < xx_nstates; i++) {
+ yy = &xx_state[i];
+ if (yy->ss_deleted)
+ continue;
+ printf(" Subset %d", i);
+ if (yy->ss_finish)
+ printf(" (finish)");
+ printf(" { ");
+ for (ibit = 0; ibit < nbits; ibit++) {
+ if (testbit(yy->ss_ss, ibit))
+ printf("%d ", ibit);
+ }
+ printf("}\n");
+ for (iarc = 0; iarc < yy->ss_narcs; iarc++) {
+ zz = &yy->ss_arc[iarc];
+ printf(" Arc to state %d, label %s\n",
+ zz->sa_arrow,
+ PyGrammar_LabelRepr(
+ &ll->ll_label[zz->sa_label]));
+ }
+ }
}
@@ -535,59 +535,59 @@ printssdfa(int xx_nstates, ss_state *xx_state, int nbits,
static int
samestate(ss_state *s1, ss_state *s2)
{
- int i;
-
- if (s1->ss_narcs != s2->ss_narcs || s1->ss_finish != s2->ss_finish)
- return 0;
- for (i = 0; i < s1->ss_narcs; i++) {
- if (s1->ss_arc[i].sa_arrow != s2->ss_arc[i].sa_arrow ||
- s1->ss_arc[i].sa_label != s2->ss_arc[i].sa_label)
- return 0;
- }
- return 1;
+ int i;
+
+ if (s1->ss_narcs != s2->ss_narcs || s1->ss_finish != s2->ss_finish)
+ return 0;
+ for (i = 0; i < s1->ss_narcs; i++) {
+ if (s1->ss_arc[i].sa_arrow != s2->ss_arc[i].sa_arrow ||
+ s1->ss_arc[i].sa_label != s2->ss_arc[i].sa_label)
+ return 0;
+ }
+ return 1;
}
static void
renamestates(int xx_nstates, ss_state *xx_state, int from, int to)
{
- int i, j;
-
- if (Py_DebugFlag)
- printf("Rename state %d to %d.\n", from, to);
- for (i = 0; i < xx_nstates; i++) {
- if (xx_state[i].ss_deleted)
- continue;
- for (j = 0; j < xx_state[i].ss_narcs; j++) {
- if (xx_state[i].ss_arc[j].sa_arrow == from)
- xx_state[i].ss_arc[j].sa_arrow = to;
- }
- }
+ int i, j;
+
+ if (Py_DebugFlag)
+ printf("Rename state %d to %d.\n", from, to);
+ for (i = 0; i < xx_nstates; i++) {
+ if (xx_state[i].ss_deleted)
+ continue;
+ for (j = 0; j < xx_state[i].ss_narcs; j++) {
+ if (xx_state[i].ss_arc[j].sa_arrow == from)
+ xx_state[i].ss_arc[j].sa_arrow = to;
+ }
+ }
}
static void
simplify(int xx_nstates, ss_state *xx_state)
{
- int changes;
- int i, j;
-
- do {
- changes = 0;
- for (i = 1; i < xx_nstates; i++) {
- if (xx_state[i].ss_deleted)
- continue;
- for (j = 0; j < i; j++) {
- if (xx_state[j].ss_deleted)
- continue;
- if (samestate(&xx_state[i], &xx_state[j])) {
- xx_state[i].ss_deleted++;
- renamestates(xx_nstates, xx_state,
- i, j);
- changes++;
- break;
- }
- }
- }
- } while (changes);
+ int changes;
+ int i, j;
+
+ do {
+ changes = 0;
+ for (i = 1; i < xx_nstates; i++) {
+ if (xx_state[i].ss_deleted)
+ continue;
+ for (j = 0; j < i; j++) {
+ if (xx_state[j].ss_deleted)
+ continue;
+ if (samestate(&xx_state[i], &xx_state[j])) {
+ xx_state[i].ss_deleted++;
+ renamestates(xx_nstates, xx_state,
+ i, j);
+ changes++;
+ break;
+ }
+ }
+ }
+ } while (changes);
}
@@ -598,32 +598,32 @@ simplify(int xx_nstates, ss_state *xx_state)
static void
convert(dfa *d, int xx_nstates, ss_state *xx_state)
{
- int i, j;
- ss_state *yy;
- ss_arc *zz;
-
- for (i = 0; i < xx_nstates; i++) {
- yy = &xx_state[i];
- if (yy->ss_deleted)
- continue;
- yy->ss_rename = addstate(d);
- }
-
- for (i = 0; i < xx_nstates; i++) {
- yy = &xx_state[i];
- if (yy->ss_deleted)
- continue;
- for (j = 0; j < yy->ss_narcs; j++) {
- zz = &yy->ss_arc[j];
- addarc(d, yy->ss_rename,
- xx_state[zz->sa_arrow].ss_rename,
- zz->sa_label);
- }
- if (yy->ss_finish)
- addarc(d, yy->ss_rename, yy->ss_rename, 0);
- }
-
- d->d_initial = 0;
+ int i, j;
+ ss_state *yy;
+ ss_arc *zz;
+
+ for (i = 0; i < xx_nstates; i++) {
+ yy = &xx_state[i];
+ if (yy->ss_deleted)
+ continue;
+ yy->ss_rename = addstate(d);
+ }
+
+ for (i = 0; i < xx_nstates; i++) {
+ yy = &xx_state[i];
+ if (yy->ss_deleted)
+ continue;
+ for (j = 0; j < yy->ss_narcs; j++) {
+ zz = &yy->ss_arc[j];
+ addarc(d, yy->ss_rename,
+ xx_state[zz->sa_arrow].ss_rename,
+ zz->sa_label);
+ }
+ if (yy->ss_finish)
+ addarc(d, yy->ss_rename, yy->ss_rename, 0);
+ }
+
+ d->d_initial = 0;
}
@@ -632,43 +632,43 @@ convert(dfa *d, int xx_nstates, ss_state *xx_state)
static grammar *
maketables(nfagrammar *gr)
{
- int i;
- nfa *nf;
- dfa *d;
- grammar *g;
-
- if (gr->gr_nnfas == 0)
- return NULL;
- g = newgrammar(gr->gr_nfa[0]->nf_type);
- /* XXX first rule must be start rule */
- g->g_ll = gr->gr_ll;
-
- for (i = 0; i < gr->gr_nnfas; i++) {
- nf = gr->gr_nfa[i];
- if (Py_DebugFlag) {
- printf("Dump of NFA for '%s' ...\n", nf->nf_name);
- dumpnfa(&gr->gr_ll, nf);
- printf("Making DFA for '%s' ...\n", nf->nf_name);
- }
- d = adddfa(g, nf->nf_type, nf->nf_name);
- makedfa(gr, gr->gr_nfa[i], d);
- }
-
- return g;
+ int i;
+ nfa *nf;
+ dfa *d;
+ grammar *g;
+
+ if (gr->gr_nnfas == 0)
+ return NULL;
+ g = newgrammar(gr->gr_nfa[0]->nf_type);
+ /* XXX first rule must be start rule */
+ g->g_ll = gr->gr_ll;
+
+ for (i = 0; i < gr->gr_nnfas; i++) {
+ nf = gr->gr_nfa[i];
+ if (Py_DebugFlag) {
+ printf("Dump of NFA for '%s' ...\n", nf->nf_name);
+ dumpnfa(&gr->gr_ll, nf);
+ printf("Making DFA for '%s' ...\n", nf->nf_name);
+ }
+ d = adddfa(g, nf->nf_type, nf->nf_name);
+ makedfa(gr, gr->gr_nfa[i], d);
+ }
+
+ return g;
}
grammar *
pgen(node *n)
{
- nfagrammar *gr;
- grammar *g;
-
- gr = metacompile(n);
- g = maketables(gr);
- translatelabels(g);
- addfirstsets(g);
- PyObject_FREE(gr);
- return g;
+ nfagrammar *gr;
+ grammar *g;
+
+ gr = metacompile(n);
+ g = maketables(gr);
+ translatelabels(g);
+ addfirstsets(g);
+ PyObject_FREE(gr);
+ return g;
}
grammar *
@@ -702,7 +702,7 @@ Reference
---------
[Aho&Ullman 77]
- Aho&Ullman, Principles of Compiler Design, Addison-Wesley 1977
- (first edition)
+ Aho&Ullman, Principles of Compiler Design, Addison-Wesley 1977
+ (first edition)
*/
diff --git a/Parser/pgenmain.c b/Parser/pgenmain.c
index fc27a2c..88fa7f1 100644
--- a/Parser/pgenmain.c
+++ b/Parser/pgenmain.c
@@ -30,104 +30,104 @@ grammar *getgrammar(char *filename);
void
Py_Exit(int sts)
{
- exit(sts);
+ exit(sts);
}
int
main(int argc, char **argv)
{
- grammar *g;
- FILE *fp;
- char *filename, *graminit_h, *graminit_c;
-
- if (argc != 4) {
- fprintf(stderr,
- "usage: %s grammar graminit.h graminit.c\n", argv[0]);
- Py_Exit(2);
- }
- filename = argv[1];
- graminit_h = argv[2];
- graminit_c = argv[3];
- g = getgrammar(filename);
- fp = fopen(graminit_c, "w");
- if (fp == NULL) {
- perror(graminit_c);
- Py_Exit(1);
- }
- if (Py_DebugFlag)
- printf("Writing %s ...\n", graminit_c);
- printgrammar(g, fp);
- fclose(fp);
- fp = fopen(graminit_h, "w");
- if (fp == NULL) {
- perror(graminit_h);
- Py_Exit(1);
- }
- if (Py_DebugFlag)
- printf("Writing %s ...\n", graminit_h);
- printnonterminals(g, fp);
- fclose(fp);
- Py_Exit(0);
- return 0; /* Make gcc -Wall happy */
+ grammar *g;
+ FILE *fp;
+ char *filename, *graminit_h, *graminit_c;
+
+ if (argc != 4) {
+ fprintf(stderr,
+ "usage: %s grammar graminit.h graminit.c\n", argv[0]);
+ Py_Exit(2);
+ }
+ filename = argv[1];
+ graminit_h = argv[2];
+ graminit_c = argv[3];
+ g = getgrammar(filename);
+ fp = fopen(graminit_c, "w");
+ if (fp == NULL) {
+ perror(graminit_c);
+ Py_Exit(1);
+ }
+ if (Py_DebugFlag)
+ printf("Writing %s ...\n", graminit_c);
+ printgrammar(g, fp);
+ fclose(fp);
+ fp = fopen(graminit_h, "w");
+ if (fp == NULL) {
+ perror(graminit_h);
+ Py_Exit(1);
+ }
+ if (Py_DebugFlag)
+ printf("Writing %s ...\n", graminit_h);
+ printnonterminals(g, fp);
+ fclose(fp);
+ Py_Exit(0);
+ return 0; /* Make gcc -Wall happy */
}
grammar *
getgrammar(char *filename)
{
- FILE *fp;
- node *n;
- grammar *g0, *g;
- perrdetail err;
-
- fp = fopen(filename, "r");
- if (fp == NULL) {
- perror(filename);
- Py_Exit(1);
- }
- g0 = meta_grammar();
- n = PyParser_ParseFile(fp, filename, g0, g0->g_start,
- (char *)NULL, (char *)NULL, &err);
- fclose(fp);
- if (n == NULL) {
- fprintf(stderr, "Parsing error %d, line %d.\n",
- err.error, err.lineno);
- if (err.text != NULL) {
- size_t i;
- fprintf(stderr, "%s", err.text);
- i = strlen(err.text);
- if (i == 0 || err.text[i-1] != '\n')
- fprintf(stderr, "\n");
- for (i = 0; i < err.offset; i++) {
- if (err.text[i] == '\t')
- putc('\t', stderr);
- else
- putc(' ', stderr);
- }
- fprintf(stderr, "^\n");
- PyObject_FREE(err.text);
- }
- Py_Exit(1);
- }
- g = pgen(n);
- if (g == NULL) {
- printf("Bad grammar.\n");
- Py_Exit(1);
- }
- return g;
+ FILE *fp;
+ node *n;
+ grammar *g0, *g;
+ perrdetail err;
+
+ fp = fopen(filename, "r");
+ if (fp == NULL) {
+ perror(filename);
+ Py_Exit(1);
+ }
+ g0 = meta_grammar();
+ n = PyParser_ParseFile(fp, filename, g0, g0->g_start,
+ (char *)NULL, (char *)NULL, &err);
+ fclose(fp);
+ if (n == NULL) {
+ fprintf(stderr, "Parsing error %d, line %d.\n",
+ err.error, err.lineno);
+ if (err.text != NULL) {
+ size_t i;
+ fprintf(stderr, "%s", err.text);
+ i = strlen(err.text);
+ if (i == 0 || err.text[i-1] != '\n')
+ fprintf(stderr, "\n");
+ for (i = 0; i < err.offset; i++) {
+ if (err.text[i] == '\t')
+ putc('\t', stderr);
+ else
+ putc(' ', stderr);
+ }
+ fprintf(stderr, "^\n");
+ PyObject_FREE(err.text);
+ }
+ Py_Exit(1);
+ }
+ g = pgen(n);
+ if (g == NULL) {
+ printf("Bad grammar.\n");
+ Py_Exit(1);
+ }
+ return g;
}
/* Can't happen in pgen */
PyObject*
PyErr_Occurred()
{
- return 0;
+ return 0;
}
void
Py_FatalError(const char *msg)
{
- fprintf(stderr, "pgen: FATAL ERROR: %s\n", msg);
- Py_Exit(1);
+ fprintf(stderr, "pgen: FATAL ERROR: %s\n", msg);
+ Py_Exit(1);
}
/* No-nonsense my_readline() for tokenizer.c */
@@ -135,28 +135,28 @@ Py_FatalError(const char *msg)
char *
PyOS_Readline(FILE *sys_stdin, FILE *sys_stdout, char *prompt)
{
- size_t n = 1000;
- char *p = (char *)PyMem_MALLOC(n);
- char *q;
- if (p == NULL)
- return NULL;
- fprintf(stderr, "%s", prompt);
- q = fgets(p, n, sys_stdin);
- if (q == NULL) {
- *p = '\0';
- return p;
- }
- n = strlen(p);
- if (n > 0 && p[n-1] != '\n')
- p[n-1] = '\n';
- return (char *)PyMem_REALLOC(p, n+1);
+ size_t n = 1000;
+ char *p = (char *)PyMem_MALLOC(n);
+ char *q;
+ if (p == NULL)
+ return NULL;
+ fprintf(stderr, "%s", prompt);
+ q = fgets(p, n, sys_stdin);
+ if (q == NULL) {
+ *p = '\0';
+ return p;
+ }
+ n = strlen(p);
+ if (n > 0 && p[n-1] != '\n')
+ p[n-1] = '\n';
+ return (char *)PyMem_REALLOC(p, n+1);
}
/* No-nonsense fgets */
char *
Py_UniversalNewlineFgets(char *buf, int n, FILE *stream, PyObject *fobj)
{
- return fgets(buf, n, stream);
+ return fgets(buf, n, stream);
}
@@ -165,9 +165,9 @@ Py_UniversalNewlineFgets(char *buf, int n, FILE *stream, PyObject *fobj)
void
PySys_WriteStderr(const char *format, ...)
{
- va_list va;
+ va_list va;
- va_start(va, format);
- vfprintf(stderr, format, va);
- va_end(va);
+ va_start(va, format);
+ vfprintf(stderr, format, va);
+ va_end(va);
}
diff --git a/Parser/printgrammar.c b/Parser/printgrammar.c
index ae180e1..3b54a05 100644
--- a/Parser/printgrammar.c
+++ b/Parser/printgrammar.c
@@ -13,105 +13,105 @@ static void printlabels(grammar *, FILE *);
void
printgrammar(grammar *g, FILE *fp)
{
- fprintf(fp, "/* Generated by Parser/pgen */\n\n");
- fprintf(fp, "#include \"pgenheaders.h\"\n");
- fprintf(fp, "#include \"grammar.h\"\n");
- fprintf(fp, "PyAPI_DATA(grammar) _PyParser_Grammar;\n");
- printdfas(g, fp);
- printlabels(g, fp);
- fprintf(fp, "grammar _PyParser_Grammar = {\n");
- fprintf(fp, "\t%d,\n", g->g_ndfas);
- fprintf(fp, "\tdfas,\n");
- fprintf(fp, "\t{%d, labels},\n", g->g_ll.ll_nlabels);
- fprintf(fp, "\t%d\n", g->g_start);
- fprintf(fp, "};\n");
+ fprintf(fp, "/* Generated by Parser/pgen */\n\n");
+ fprintf(fp, "#include \"pgenheaders.h\"\n");
+ fprintf(fp, "#include \"grammar.h\"\n");
+ fprintf(fp, "PyAPI_DATA(grammar) _PyParser_Grammar;\n");
+ printdfas(g, fp);
+ printlabels(g, fp);
+ fprintf(fp, "grammar _PyParser_Grammar = {\n");
+ fprintf(fp, "\t%d,\n", g->g_ndfas);
+ fprintf(fp, "\tdfas,\n");
+ fprintf(fp, "\t{%d, labels},\n", g->g_ll.ll_nlabels);
+ fprintf(fp, "\t%d\n", g->g_start);
+ fprintf(fp, "};\n");
}
void
printnonterminals(grammar *g, FILE *fp)
{
- dfa *d;
- int i;
-
- fprintf(fp, "/* Generated by Parser/pgen */\n\n");
-
- d = g->g_dfa;
- for (i = g->g_ndfas; --i >= 0; d++)
- fprintf(fp, "#define %s %d\n", d->d_name, d->d_type);
+ dfa *d;
+ int i;
+
+ fprintf(fp, "/* Generated by Parser/pgen */\n\n");
+
+ d = g->g_dfa;
+ for (i = g->g_ndfas; --i >= 0; d++)
+ fprintf(fp, "#define %s %d\n", d->d_name, d->d_type);
}
static void
printarcs(int i, dfa *d, FILE *fp)
{
- arc *a;
- state *s;
- int j, k;
-
- s = d->d_state;
- for (j = 0; j < d->d_nstates; j++, s++) {
- fprintf(fp, "static arc arcs_%d_%d[%d] = {\n",
- i, j, s->s_narcs);
- a = s->s_arc;
- for (k = 0; k < s->s_narcs; k++, a++)
- fprintf(fp, "\t{%d, %d},\n", a->a_lbl, a->a_arrow);
- fprintf(fp, "};\n");
- }
+ arc *a;
+ state *s;
+ int j, k;
+
+ s = d->d_state;
+ for (j = 0; j < d->d_nstates; j++, s++) {
+ fprintf(fp, "static arc arcs_%d_%d[%d] = {\n",
+ i, j, s->s_narcs);
+ a = s->s_arc;
+ for (k = 0; k < s->s_narcs; k++, a++)
+ fprintf(fp, "\t{%d, %d},\n", a->a_lbl, a->a_arrow);
+ fprintf(fp, "};\n");
+ }
}
static void
printstates(grammar *g, FILE *fp)
{
- state *s;
- dfa *d;
- int i, j;
-
- d = g->g_dfa;
- for (i = 0; i < g->g_ndfas; i++, d++) {
- printarcs(i, d, fp);
- fprintf(fp, "static state states_%d[%d] = {\n",
- i, d->d_nstates);
- s = d->d_state;
- for (j = 0; j < d->d_nstates; j++, s++)
- fprintf(fp, "\t{%d, arcs_%d_%d},\n",
- s->s_narcs, i, j);
- fprintf(fp, "};\n");
- }
+ state *s;
+ dfa *d;
+ int i, j;
+
+ d = g->g_dfa;
+ for (i = 0; i < g->g_ndfas; i++, d++) {
+ printarcs(i, d, fp);
+ fprintf(fp, "static state states_%d[%d] = {\n",
+ i, d->d_nstates);
+ s = d->d_state;
+ for (j = 0; j < d->d_nstates; j++, s++)
+ fprintf(fp, "\t{%d, arcs_%d_%d},\n",
+ s->s_narcs, i, j);
+ fprintf(fp, "};\n");
+ }
}
static void
printdfas(grammar *g, FILE *fp)
{
- dfa *d;
- int i, j;
-
- printstates(g, fp);
- fprintf(fp, "static dfa dfas[%d] = {\n", g->g_ndfas);
- d = g->g_dfa;
- for (i = 0; i < g->g_ndfas; i++, d++) {
- fprintf(fp, "\t{%d, \"%s\", %d, %d, states_%d,\n",
- d->d_type, d->d_name, d->d_initial, d->d_nstates, i);
- fprintf(fp, "\t \"");
- for (j = 0; j < NBYTES(g->g_ll.ll_nlabels); j++)
- fprintf(fp, "\\%03o", d->d_first[j] & 0xff);
- fprintf(fp, "\"},\n");
- }
- fprintf(fp, "};\n");
+ dfa *d;
+ int i, j;
+
+ printstates(g, fp);
+ fprintf(fp, "static dfa dfas[%d] = {\n", g->g_ndfas);
+ d = g->g_dfa;
+ for (i = 0; i < g->g_ndfas; i++, d++) {
+ fprintf(fp, "\t{%d, \"%s\", %d, %d, states_%d,\n",
+ d->d_type, d->d_name, d->d_initial, d->d_nstates, i);
+ fprintf(fp, "\t \"");
+ for (j = 0; j < NBYTES(g->g_ll.ll_nlabels); j++)
+ fprintf(fp, "\\%03o", d->d_first[j] & 0xff);
+ fprintf(fp, "\"},\n");
+ }
+ fprintf(fp, "};\n");
}
static void
printlabels(grammar *g, FILE *fp)
{
- label *l;
- int i;
-
- fprintf(fp, "static label labels[%d] = {\n", g->g_ll.ll_nlabels);
- l = g->g_ll.ll_label;
- for (i = g->g_ll.ll_nlabels; --i >= 0; l++) {
- if (l->lb_str == NULL)
- fprintf(fp, "\t{%d, 0},\n", l->lb_type);
- else
- fprintf(fp, "\t{%d, \"%s\"},\n",
- l->lb_type, l->lb_str);
- }
- fprintf(fp, "};\n");
+ label *l;
+ int i;
+
+ fprintf(fp, "static label labels[%d] = {\n", g->g_ll.ll_nlabels);
+ l = g->g_ll.ll_label;
+ for (i = g->g_ll.ll_nlabels; --i >= 0; l++) {
+ if (l->lb_str == NULL)
+ fprintf(fp, "\t{%d, 0},\n", l->lb_type);
+ else
+ fprintf(fp, "\t{%d, \"%s\"},\n",
+ l->lb_type, l->lb_str);
+ }
+ fprintf(fp, "};\n");
}
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
index aef081d..90b1b68 100644
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -19,17 +19,17 @@
#endif /* PGEN */
#define is_potential_identifier_start(c) (\
- (c >= 'a' && c <= 'z')\
- || (c >= 'A' && c <= 'Z')\
- || c == '_'\
- || (c >= 128))
+ (c >= 'a' && c <= 'z')\
+ || (c >= 'A' && c <= 'Z')\
+ || c == '_'\
+ || (c >= 128))
#define is_potential_identifier_char(c) (\
- (c >= 'a' && c <= 'z')\
- || (c >= 'A' && c <= 'Z')\
- || (c >= '0' && c <= '9')\
- || c == '_'\
- || (c >= 128))
+ (c >= 'a' && c <= 'z')\
+ || (c >= 'A' && c <= 'Z')\
+ || (c >= '0' && c <= '9')\
+ || c == '_'\
+ || (c >= 128))
extern char *PyOS_Readline(FILE *, FILE *, char *);
/* Return malloc'ed string including trailing \n;
@@ -48,62 +48,62 @@ static void tok_backup(struct tok_state *tok, int c);
/* Token names */
char *_PyParser_TokenNames[] = {
- "ENDMARKER",
- "NAME",
- "NUMBER",
- "STRING",
- "NEWLINE",
- "INDENT",
- "DEDENT",
- "LPAR",
- "RPAR",
- "LSQB",
- "RSQB",
- "COLON",
- "COMMA",
- "SEMI",
- "PLUS",
- "MINUS",
- "STAR",
- "SLASH",
- "VBAR",
- "AMPER",
- "LESS",
- "GREATER",
- "EQUAL",
- "DOT",
- "PERCENT",
- "LBRACE",
- "RBRACE",
- "EQEQUAL",
- "NOTEQUAL",
- "LESSEQUAL",
- "GREATEREQUAL",
- "TILDE",
- "CIRCUMFLEX",
- "LEFTSHIFT",
- "RIGHTSHIFT",
- "DOUBLESTAR",
- "PLUSEQUAL",
- "MINEQUAL",
- "STAREQUAL",
- "SLASHEQUAL",
- "PERCENTEQUAL",
- "AMPEREQUAL",
- "VBAREQUAL",
- "CIRCUMFLEXEQUAL",
- "LEFTSHIFTEQUAL",
- "RIGHTSHIFTEQUAL",
- "DOUBLESTAREQUAL",
- "DOUBLESLASH",
- "DOUBLESLASHEQUAL",
- "AT",
- "RARROW",
- "ELLIPSIS",
- /* This table must match the #defines in token.h! */
- "OP",
- "<ERRORTOKEN>",
- "<N_TOKENS>"
+ "ENDMARKER",
+ "NAME",
+ "NUMBER",
+ "STRING",
+ "NEWLINE",
+ "INDENT",
+ "DEDENT",
+ "LPAR",
+ "RPAR",
+ "LSQB",
+ "RSQB",
+ "COLON",
+ "COMMA",
+ "SEMI",
+ "PLUS",
+ "MINUS",
+ "STAR",
+ "SLASH",
+ "VBAR",
+ "AMPER",
+ "LESS",
+ "GREATER",
+ "EQUAL",
+ "DOT",
+ "PERCENT",
+ "LBRACE",
+ "RBRACE",
+ "EQEQUAL",
+ "NOTEQUAL",
+ "LESSEQUAL",
+ "GREATEREQUAL",
+ "TILDE",
+ "CIRCUMFLEX",
+ "LEFTSHIFT",
+ "RIGHTSHIFT",
+ "DOUBLESTAR",
+ "PLUSEQUAL",
+ "MINEQUAL",
+ "STAREQUAL",
+ "SLASHEQUAL",
+ "PERCENTEQUAL",
+ "AMPEREQUAL",
+ "VBAREQUAL",
+ "CIRCUMFLEXEQUAL",
+ "LEFTSHIFTEQUAL",
+ "RIGHTSHIFTEQUAL",
+ "DOUBLESTAREQUAL",
+ "DOUBLESLASH",
+ "DOUBLESLASHEQUAL",
+ "AT",
+ "RARROW",
+ "ELLIPSIS",
+ /* This table must match the #defines in token.h! */
+ "OP",
+ "<ERRORTOKEN>",
+ "<N_TOKENS>"
};
@@ -112,49 +112,49 @@ char *_PyParser_TokenNames[] = {
static struct tok_state *
tok_new(void)
{
- struct tok_state *tok = (struct tok_state *)PyMem_MALLOC(
- sizeof(struct tok_state));
- if (tok == NULL)
- return NULL;
- tok->buf = tok->cur = tok->end = tok->inp = tok->start = NULL;
- tok->done = E_OK;
- tok->fp = NULL;
- tok->input = NULL;
- tok->tabsize = TABSIZE;
- tok->indent = 0;
- tok->indstack[0] = 0;
- tok->atbol = 1;
- tok->pendin = 0;
- tok->prompt = tok->nextprompt = NULL;
- tok->lineno = 0;
- tok->level = 0;
- tok->filename = NULL;
- tok->altwarning = 1;
- tok->alterror = 1;
- tok->alttabsize = 1;
- tok->altindstack[0] = 0;
- tok->decoding_state = STATE_INIT;
- tok->decoding_erred = 0;
- tok->read_coding_spec = 0;
- tok->enc = NULL;
- tok->encoding = NULL;
- tok->cont_line = 0;
+ struct tok_state *tok = (struct tok_state *)PyMem_MALLOC(
+ sizeof(struct tok_state));
+ if (tok == NULL)
+ return NULL;
+ tok->buf = tok->cur = tok->end = tok->inp = tok->start = NULL;
+ tok->done = E_OK;
+ tok->fp = NULL;
+ tok->input = NULL;
+ tok->tabsize = TABSIZE;
+ tok->indent = 0;
+ tok->indstack[0] = 0;
+ tok->atbol = 1;
+ tok->pendin = 0;
+ tok->prompt = tok->nextprompt = NULL;
+ tok->lineno = 0;
+ tok->level = 0;
+ tok->filename = NULL;
+ tok->altwarning = 1;
+ tok->alterror = 1;
+ tok->alttabsize = 1;
+ tok->altindstack[0] = 0;
+ tok->decoding_state = STATE_INIT;
+ tok->decoding_erred = 0;
+ tok->read_coding_spec = 0;
+ tok->enc = NULL;
+ tok->encoding = NULL;
+ tok->cont_line = 0;
#ifndef PGEN
- tok->decoding_readline = NULL;
- tok->decoding_buffer = NULL;
+ tok->decoding_readline = NULL;
+ tok->decoding_buffer = NULL;
#endif
- return tok;
+ return tok;
}
static char *
new_string(const char *s, Py_ssize_t len)
{
- char* result = (char *)PyMem_MALLOC(len + 1);
- if (result != NULL) {
- memcpy(result, s, len);
- result[len] = '\0';
- }
- return result;
+ char* result = (char *)PyMem_MALLOC(len + 1);
+ if (result != NULL) {
+ memcpy(result, s, len);
+ result[len] = '\0';
+ }
+ return result;
}
#ifdef PGEN
@@ -162,19 +162,19 @@ new_string(const char *s, Py_ssize_t len)
static char *
decoding_fgets(char *s, int size, struct tok_state *tok)
{
- return fgets(s, size, tok->fp);
+ return fgets(s, size, tok->fp);
}
static int
decoding_feof(struct tok_state *tok)
{
- return feof(tok->fp);
+ return feof(tok->fp);
}
static char *
decode_str(const char *str, int exec_input, struct tok_state *tok)
{
- return new_string(str, strlen(str));
+ return new_string(str, strlen(str));
}
#else /* PGEN */
@@ -182,41 +182,41 @@ decode_str(const char *str, int exec_input, struct tok_state *tok)
static char *
error_ret(struct tok_state *tok) /* XXX */
{
- tok->decoding_erred = 1;
- if (tok->fp != NULL && tok->buf != NULL) /* see PyTokenizer_Free */
- PyMem_FREE(tok->buf);
- tok->buf = NULL;
- return NULL; /* as if it were EOF */
+ tok->decoding_erred = 1;
+ if (tok->fp != NULL && tok->buf != NULL) /* see PyTokenizer_Free */
+ PyMem_FREE(tok->buf);
+ tok->buf = NULL;
+ return NULL; /* as if it were EOF */
}
static char *
-get_normal_name(char *s) /* for utf-8 and latin-1 */
+get_normal_name(char *s) /* for utf-8 and latin-1 */
{
- char buf[13];
- int i;
- for (i = 0; i < 12; i++) {
- int c = s[i];
- if (c == '\0')
- break;
- else if (c == '_')
- buf[i] = '-';
- else
- buf[i] = tolower(c);
- }
- buf[i] = '\0';
- if (strcmp(buf, "utf-8") == 0 ||
- strncmp(buf, "utf-8-", 6) == 0)
- return "utf-8";
- else if (strcmp(buf, "latin-1") == 0 ||
- strcmp(buf, "iso-8859-1") == 0 ||
- strcmp(buf, "iso-latin-1") == 0 ||
- strncmp(buf, "latin-1-", 8) == 0 ||
- strncmp(buf, "iso-8859-1-", 11) == 0 ||
- strncmp(buf, "iso-latin-1-", 12) == 0)
- return "iso-8859-1";
- else
- return s;
+ char buf[13];
+ int i;
+ for (i = 0; i < 12; i++) {
+ int c = s[i];
+ if (c == '\0')
+ break;
+ else if (c == '_')
+ buf[i] = '-';
+ else
+ buf[i] = tolower(c);
+ }
+ buf[i] = '\0';
+ if (strcmp(buf, "utf-8") == 0 ||
+ strncmp(buf, "utf-8-", 6) == 0)
+ return "utf-8";
+ else if (strcmp(buf, "latin-1") == 0 ||
+ strcmp(buf, "iso-8859-1") == 0 ||
+ strcmp(buf, "iso-latin-1") == 0 ||
+ strncmp(buf, "latin-1-", 8) == 0 ||
+ strncmp(buf, "iso-8859-1-", 11) == 0 ||
+ strncmp(buf, "iso-latin-1-", 12) == 0)
+ return "iso-8859-1";
+ else
+ return s;
}
/* Return the coding spec in S, or NULL if none is found. */
@@ -224,43 +224,43 @@ get_normal_name(char *s) /* for utf-8 and latin-1 */
static char *
get_coding_spec(const char *s, Py_ssize_t size)
{
- Py_ssize_t i;
- /* Coding spec must be in a comment, and that comment must be
- * the only statement on the source code line. */
- for (i = 0; i < size - 6; i++) {
- if (s[i] == '#')
- break;
- if (s[i] != ' ' && s[i] != '\t' && s[i] != '\014')
- return NULL;
- }
- for (; i < size - 6; i++) { /* XXX inefficient search */
- const char* t = s + i;
- if (strncmp(t, "coding", 6) == 0) {
- const char* begin = NULL;
- t += 6;
- if (t[0] != ':' && t[0] != '=')
- continue;
- do {
- t++;
- } while (t[0] == '\x20' || t[0] == '\t');
-
- begin = t;
- while (Py_ISALNUM(t[0]) ||
- t[0] == '-' || t[0] == '_' || t[0] == '.')
- t++;
-
- if (begin < t) {
- char* r = new_string(begin, t - begin);
- char* q = get_normal_name(r);
- if (r != q) {
- PyMem_FREE(r);
- r = new_string(q, strlen(q));
- }
- return r;
- }
- }
- }
- return NULL;
+ Py_ssize_t i;
+ /* Coding spec must be in a comment, and that comment must be
+ * the only statement on the source code line. */
+ for (i = 0; i < size - 6; i++) {
+ if (s[i] == '#')
+ break;
+ if (s[i] != ' ' && s[i] != '\t' && s[i] != '\014')
+ return NULL;
+ }
+ for (; i < size - 6; i++) { /* XXX inefficient search */
+ const char* t = s + i;
+ if (strncmp(t, "coding", 6) == 0) {
+ const char* begin = NULL;
+ t += 6;
+ if (t[0] != ':' && t[0] != '=')
+ continue;
+ do {
+ t++;
+ } while (t[0] == '\x20' || t[0] == '\t');
+
+ begin = t;
+ while (Py_ISALNUM(t[0]) ||
+ t[0] == '-' || t[0] == '_' || t[0] == '.')
+ t++;
+
+ if (begin < t) {
+ char* r = new_string(begin, t - begin);
+ char* q = get_normal_name(r);
+ if (r != q) {
+ PyMem_FREE(r);
+ r = new_string(q, strlen(q));
+ }
+ return r;
+ }
+ }
+ }
+ return NULL;
}
/* Check whether the line contains a coding spec. If it does,
@@ -270,42 +270,42 @@ get_coding_spec(const char *s, Py_ssize_t size)
static int
check_coding_spec(const char* line, Py_ssize_t size, struct tok_state *tok,
- int set_readline(struct tok_state *, const char *))
+ int set_readline(struct tok_state *, const char *))
{
- char * cs;
- int r = 1;
-
- if (tok->cont_line)
- /* It's a continuation line, so it can't be a coding spec. */
- return 1;
- cs = get_coding_spec(line, size);
- if (cs != NULL) {
- tok->read_coding_spec = 1;
- if (tok->encoding == NULL) {
- assert(tok->decoding_state == STATE_RAW);
- if (strcmp(cs, "utf-8") == 0) {
- tok->encoding = cs;
- } else {
- r = set_readline(tok, cs);
- if (r) {
- tok->encoding = cs;
- tok->decoding_state = STATE_NORMAL;
- }
- else
- PyMem_FREE(cs);
- }
- } else { /* then, compare cs with BOM */
- r = (strcmp(tok->encoding, cs) == 0);
- PyMem_FREE(cs);
- }
- }
- if (!r) {
- cs = tok->encoding;
- if (!cs)
- cs = "with BOM";
- PyErr_Format(PyExc_SyntaxError, "encoding problem: %s", cs);
- }
- return r;
+ char * cs;
+ int r = 1;
+
+ if (tok->cont_line)
+ /* It's a continuation line, so it can't be a coding spec. */
+ return 1;
+ cs = get_coding_spec(line, size);
+ if (cs != NULL) {
+ tok->read_coding_spec = 1;
+ if (tok->encoding == NULL) {
+ assert(tok->decoding_state == STATE_RAW);
+ if (strcmp(cs, "utf-8") == 0) {
+ tok->encoding = cs;
+ } else {
+ r = set_readline(tok, cs);
+ if (r) {
+ tok->encoding = cs;
+ tok->decoding_state = STATE_NORMAL;
+ }
+ else
+ PyMem_FREE(cs);
+ }
+ } else { /* then, compare cs with BOM */
+ r = (strcmp(tok->encoding, cs) == 0);
+ PyMem_FREE(cs);
+ }
+ }
+ if (!r) {
+ cs = tok->encoding;
+ if (!cs)
+ cs = "with BOM";
+ PyErr_Format(PyExc_SyntaxError, "encoding problem: %s", cs);
+ }
+ return r;
}
/* See whether the file starts with a BOM. If it does,
@@ -314,62 +314,62 @@ check_coding_spec(const char* line, Py_ssize_t size, struct tok_state *tok,
static int
check_bom(int get_char(struct tok_state *),
- void unget_char(int, struct tok_state *),
- int set_readline(struct tok_state *, const char *),
- struct tok_state *tok)
+ void unget_char(int, struct tok_state *),
+ int set_readline(struct tok_state *, const char *),
+ struct tok_state *tok)
{
- int ch1, ch2, ch3;
- ch1 = get_char(tok);
- tok->decoding_state = STATE_RAW;
- if (ch1 == EOF) {
- return 1;
- } else if (ch1 == 0xEF) {
- ch2 = get_char(tok);
- if (ch2 != 0xBB) {
- unget_char(ch2, tok);
- unget_char(ch1, tok);
- return 1;
- }
- ch3 = get_char(tok);
- if (ch3 != 0xBF) {
- unget_char(ch3, tok);
- unget_char(ch2, tok);
- unget_char(ch1, tok);
- return 1;
- }
+ int ch1, ch2, ch3;
+ ch1 = get_char(tok);
+ tok->decoding_state = STATE_RAW;
+ if (ch1 == EOF) {
+ return 1;
+ } else if (ch1 == 0xEF) {
+ ch2 = get_char(tok);
+ if (ch2 != 0xBB) {
+ unget_char(ch2, tok);
+ unget_char(ch1, tok);
+ return 1;
+ }
+ ch3 = get_char(tok);
+ if (ch3 != 0xBF) {
+ unget_char(ch3, tok);
+ unget_char(ch2, tok);
+ unget_char(ch1, tok);
+ return 1;
+ }
#if 0
- /* Disable support for UTF-16 BOMs until a decision
- is made whether this needs to be supported. */
- } else if (ch1 == 0xFE) {
- ch2 = get_char(tok);
- if (ch2 != 0xFF) {
- unget_char(ch2, tok);
- unget_char(ch1, tok);
- return 1;
- }
- if (!set_readline(tok, "utf-16-be"))
- return 0;
- tok->decoding_state = STATE_NORMAL;
- } else if (ch1 == 0xFF) {
- ch2 = get_char(tok);
- if (ch2 != 0xFE) {
- unget_char(ch2, tok);
- unget_char(ch1, tok);
- return 1;
- }
- if (!set_readline(tok, "utf-16-le"))
- return 0;
- tok->decoding_state = STATE_NORMAL;
+ /* Disable support for UTF-16 BOMs until a decision
+ is made whether this needs to be supported. */
+ } else if (ch1 == 0xFE) {
+ ch2 = get_char(tok);
+ if (ch2 != 0xFF) {
+ unget_char(ch2, tok);
+ unget_char(ch1, tok);
+ return 1;
+ }
+ if (!set_readline(tok, "utf-16-be"))
+ return 0;
+ tok->decoding_state = STATE_NORMAL;
+ } else if (ch1 == 0xFF) {
+ ch2 = get_char(tok);
+ if (ch2 != 0xFE) {
+ unget_char(ch2, tok);
+ unget_char(ch1, tok);
+ return 1;
+ }
+ if (!set_readline(tok, "utf-16-le"))
+ return 0;
+ tok->decoding_state = STATE_NORMAL;
#endif
- } else {
- unget_char(ch1, tok);
- return 1;
- }
- if (tok->encoding != NULL)
- PyMem_FREE(tok->encoding);
- tok->encoding = new_string("utf-8", 5); /* resulting is in utf-8 */
- /* No need to set_readline: input is already utf-8 */
- return 1;
+ } else {
+ unget_char(ch1, tok);
+ return 1;
+ }
+ if (tok->encoding != NULL)
+ PyMem_FREE(tok->encoding);
+ tok->encoding = new_string("utf-8", 5); /* resulting is in utf-8 */
+ /* No need to set_readline: input is already utf-8 */
+ return 1;
}
/* Read a line of text from TOK into S, using the stream in TOK.
@@ -378,74 +378,74 @@ check_bom(int get_char(struct tok_state *),
On entry, tok->decoding_buffer will be one of:
1) NULL: need to call tok->decoding_readline to get a new line
2) PyUnicodeObject *: decoding_feof has called tok->decoding_readline and
- stored the result in tok->decoding_buffer
+ stored the result in tok->decoding_buffer
3) PyByteArrayObject *: previous call to fp_readl did not have enough room
- (in the s buffer) to copy entire contents of the line read
- by tok->decoding_readline. tok->decoding_buffer has the overflow.
- In this case, fp_readl is called in a loop (with an expanded buffer)
- until the buffer ends with a '\n' (or until the end of the file is
- reached): see tok_nextc and its calls to decoding_fgets.
+ (in the s buffer) to copy entire contents of the line read
+ by tok->decoding_readline. tok->decoding_buffer has the overflow.
+ In this case, fp_readl is called in a loop (with an expanded buffer)
+ until the buffer ends with a '\n' (or until the end of the file is
+ reached): see tok_nextc and its calls to decoding_fgets.
*/
static char *
fp_readl(char *s, int size, struct tok_state *tok)
{
- PyObject* bufobj;
- const char *buf;
- Py_ssize_t buflen;
-
- /* Ask for one less byte so we can terminate it */
- assert(size > 0);
- size--;
-
- if (tok->decoding_buffer) {
- bufobj = tok->decoding_buffer;
- Py_INCREF(bufobj);
- }
- else
- {
- bufobj = PyObject_CallObject(tok->decoding_readline, NULL);
- if (bufobj == NULL)
- goto error;
- }
- if (PyUnicode_CheckExact(bufobj))
- {
- buf = _PyUnicode_AsStringAndSize(bufobj, &buflen);
- if (buf == NULL) {
- goto error;
- }
- }
- else
- {
- buf = PyByteArray_AsString(bufobj);
- if (buf == NULL) {
- goto error;
- }
- buflen = PyByteArray_GET_SIZE(bufobj);
- }
-
- Py_XDECREF(tok->decoding_buffer);
- if (buflen > size) {
- /* Too many chars, the rest goes into tok->decoding_buffer */
- tok->decoding_buffer = PyByteArray_FromStringAndSize(buf+size,
- buflen-size);
- if (tok->decoding_buffer == NULL)
- goto error;
- buflen = size;
- }
- else
- tok->decoding_buffer = NULL;
-
- memcpy(s, buf, buflen);
- s[buflen] = '\0';
- if (buflen == 0) /* EOF */
- s = NULL;
- Py_DECREF(bufobj);
- return s;
+ PyObject* bufobj;
+ const char *buf;
+ Py_ssize_t buflen;
+
+ /* Ask for one less byte so we can terminate it */
+ assert(size > 0);
+ size--;
+
+ if (tok->decoding_buffer) {
+ bufobj = tok->decoding_buffer;
+ Py_INCREF(bufobj);
+ }
+ else
+ {
+ bufobj = PyObject_CallObject(tok->decoding_readline, NULL);
+ if (bufobj == NULL)
+ goto error;
+ }
+ if (PyUnicode_CheckExact(bufobj))
+ {
+ buf = _PyUnicode_AsStringAndSize(bufobj, &buflen);
+ if (buf == NULL) {
+ goto error;
+ }
+ }
+ else
+ {
+ buf = PyByteArray_AsString(bufobj);
+ if (buf == NULL) {
+ goto error;
+ }
+ buflen = PyByteArray_GET_SIZE(bufobj);
+ }
+
+ Py_XDECREF(tok->decoding_buffer);
+ if (buflen > size) {
+ /* Too many chars, the rest goes into tok->decoding_buffer */
+ tok->decoding_buffer = PyByteArray_FromStringAndSize(buf+size,
+ buflen-size);
+ if (tok->decoding_buffer == NULL)
+ goto error;
+ buflen = size;
+ }
+ else
+ tok->decoding_buffer = NULL;
+
+ memcpy(s, buf, buflen);
+ s[buflen] = '\0';
+ if (buflen == 0) /* EOF */
+ s = NULL;
+ Py_DECREF(bufobj);
+ return s;
error:
- Py_XDECREF(bufobj);
- return error_ret(tok);
+ Py_XDECREF(bufobj);
+ return error_ret(tok);
}
/* Set the readline function for TOK to a StreamReader's
@@ -461,49 +461,49 @@ error:
static int
fp_setreadl(struct tok_state *tok, const char* enc)
{
- PyObject *readline = NULL, *stream = NULL, *io = NULL;
-
- io = PyImport_ImportModuleNoBlock("io");
- if (io == NULL)
- goto cleanup;
-
- if (tok->filename)
- stream = PyObject_CallMethod(io, "open", "ssis",
- tok->filename, "r", -1, enc);
- else
- stream = PyObject_CallMethod(io, "open", "isisOOO",
- fileno(tok->fp), "r", -1, enc, Py_None, Py_None, Py_False);
- if (stream == NULL)
- goto cleanup;
-
- Py_XDECREF(tok->decoding_readline);
- readline = PyObject_GetAttrString(stream, "readline");
- tok->decoding_readline = readline;
-
- /* The file has been reopened; parsing will restart from
- * the beginning of the file, we have to reset the line number.
- * But this function has been called from inside tok_nextc() which
- * will increment lineno before it returns. So we set it -1 so that
- * the next call to tok_nextc() will start with tok->lineno == 0.
- */
- tok->lineno = -1;
+ PyObject *readline = NULL, *stream = NULL, *io = NULL;
+
+ io = PyImport_ImportModuleNoBlock("io");
+ if (io == NULL)
+ goto cleanup;
+
+ if (tok->filename)
+ stream = PyObject_CallMethod(io, "open", "ssis",
+ tok->filename, "r", -1, enc);
+ else
+ stream = PyObject_CallMethod(io, "open", "isisOOO",
+ fileno(tok->fp), "r", -1, enc, Py_None, Py_None, Py_False);
+ if (stream == NULL)
+ goto cleanup;
+
+ Py_XDECREF(tok->decoding_readline);
+ readline = PyObject_GetAttrString(stream, "readline");
+ tok->decoding_readline = readline;
+
+ /* The file has been reopened; parsing will restart from
+ * the beginning of the file, we have to reset the line number.
+ * But this function has been called from inside tok_nextc() which
+ * will increment lineno before it returns. So we set it -1 so that
+ * the next call to tok_nextc() will start with tok->lineno == 0.
+ */
+ tok->lineno = -1;
cleanup:
- Py_XDECREF(stream);
- Py_XDECREF(io);
- return readline != NULL;
+ Py_XDECREF(stream);
+ Py_XDECREF(io);
+ return readline != NULL;
}
/* Fetch the next byte from TOK. */
static int fp_getc(struct tok_state *tok) {
- return getc(tok->fp);
+ return getc(tok->fp);
}
/* Unfetch the last byte back into TOK. */
static void fp_ungetc(int c, struct tok_state *tok) {
- ungetc(c, tok->fp);
+ ungetc(c, tok->fp);
}
/* Check whether the characters at s start a valid
@@ -511,27 +511,27 @@ static void fp_ungetc(int c, struct tok_state *tok) {
the sequence if yes, 0 if not. */
static int valid_utf8(const unsigned char* s)
{
- int expected = 0;
- int length;
- if (*s < 0x80)
- /* single-byte code */
- return 1;
- if (*s < 0xc0)
- /* following byte */
- return 0;
- if (*s < 0xE0)
- expected = 1;
- else if (*s < 0xF0)
- expected = 2;
- else if (*s < 0xF8)
- expected = 3;
- else
- return 0;
- length = expected + 1;
- for (; expected; expected--)
- if (s[expected] < 0x80 || s[expected] >= 0xC0)
- return 0;
- return length;
+ int expected = 0;
+ int length;
+ if (*s < 0x80)
+ /* single-byte code */
+ return 1;
+ if (*s < 0xc0)
+ /* following byte */
+ return 0;
+ if (*s < 0xE0)
+ expected = 1;
+ else if (*s < 0xF0)
+ expected = 2;
+ else if (*s < 0xF8)
+ expected = 3;
+ else
+ return 0;
+ length = expected + 1;
+ for (; expected; expected--)
+ if (s[expected] < 0x80 || s[expected] >= 0xC0)
+ return 0;
+ return length;
}
/* Read a line of input from TOK. Determine encoding
@@ -540,93 +540,93 @@ static int valid_utf8(const unsigned char* s)
static char *
decoding_fgets(char *s, int size, struct tok_state *tok)
{
- char *line = NULL;
- int badchar = 0;
- for (;;) {
- if (tok->decoding_state == STATE_NORMAL) {
- /* We already have a codec associated with
- this input. */
- line = fp_readl(s, size, tok);
- break;
- } else if (tok->decoding_state == STATE_RAW) {
- /* We want a 'raw' read. */
- line = Py_UniversalNewlineFgets(s, size,
- tok->fp, NULL);
- break;
- } else {
- /* We have not yet determined the encoding.
- If an encoding is found, use the file-pointer
- reader functions from now on. */
- if (!check_bom(fp_getc, fp_ungetc, fp_setreadl, tok))
- return error_ret(tok);
- assert(tok->decoding_state != STATE_INIT);
- }
- }
- if (line != NULL && tok->lineno < 2 && !tok->read_coding_spec) {
- if (!check_coding_spec(line, strlen(line), tok, fp_setreadl)) {
- return error_ret(tok);
- }
- }
+ char *line = NULL;
+ int badchar = 0;
+ for (;;) {
+ if (tok->decoding_state == STATE_NORMAL) {
+ /* We already have a codec associated with
+ this input. */
+ line = fp_readl(s, size, tok);
+ break;
+ } else if (tok->decoding_state == STATE_RAW) {
+ /* We want a 'raw' read. */
+ line = Py_UniversalNewlineFgets(s, size,
+ tok->fp, NULL);
+ break;
+ } else {
+ /* We have not yet determined the encoding.
+ If an encoding is found, use the file-pointer
+ reader functions from now on. */
+ if (!check_bom(fp_getc, fp_ungetc, fp_setreadl, tok))
+ return error_ret(tok);
+ assert(tok->decoding_state != STATE_INIT);
+ }
+ }
+ if (line != NULL && tok->lineno < 2 && !tok->read_coding_spec) {
+ if (!check_coding_spec(line, strlen(line), tok, fp_setreadl)) {
+ return error_ret(tok);
+ }
+ }
#ifndef PGEN
- /* The default encoding is UTF-8, so make sure we don't have any
- non-UTF-8 sequences in it. */
- if (line && !tok->encoding) {
- unsigned char *c;
- int length;
- for (c = (unsigned char *)line; *c; c += length)
- if (!(length = valid_utf8(c))) {
- badchar = *c;
- break;
- }
- }
- if (badchar) {
- /* Need to add 1 to the line number, since this line
- has not been counted, yet. */
- PyErr_Format(PyExc_SyntaxError,
- "Non-UTF-8 code starting with '\\x%.2x' "
- "in file %.200s on line %i, "
- "but no encoding declared; "
- "see http://python.org/dev/peps/pep-0263/ for details",
- badchar, tok->filename, tok->lineno + 1);
- return error_ret(tok);
- }
+ /* The default encoding is UTF-8, so make sure we don't have any
+ non-UTF-8 sequences in it. */
+ if (line && !tok->encoding) {
+ unsigned char *c;
+ int length;
+ for (c = (unsigned char *)line; *c; c += length)
+ if (!(length = valid_utf8(c))) {
+ badchar = *c;
+ break;
+ }
+ }
+ if (badchar) {
+ /* Need to add 1 to the line number, since this line
+ has not been counted, yet. */
+ PyErr_Format(PyExc_SyntaxError,
+ "Non-UTF-8 code starting with '\\x%.2x' "
+ "in file %.200s on line %i, "
+ "but no encoding declared; "
+ "see http://python.org/dev/peps/pep-0263/ for details",
+ badchar, tok->filename, tok->lineno + 1);
+ return error_ret(tok);
+ }
#endif
- return line;
+ return line;
}
static int
decoding_feof(struct tok_state *tok)
{
- if (tok->decoding_state != STATE_NORMAL) {
- return feof(tok->fp);
- } else {
- PyObject* buf = tok->decoding_buffer;
- if (buf == NULL) {
- buf = PyObject_CallObject(tok->decoding_readline, NULL);
- if (buf == NULL) {
- error_ret(tok);
- return 1;
- } else {
- tok->decoding_buffer = buf;
- }
- }
- return PyObject_Length(buf) == 0;
- }
+ if (tok->decoding_state != STATE_NORMAL) {
+ return feof(tok->fp);
+ } else {
+ PyObject* buf = tok->decoding_buffer;
+ if (buf == NULL) {
+ buf = PyObject_CallObject(tok->decoding_readline, NULL);
+ if (buf == NULL) {
+ error_ret(tok);
+ return 1;
+ } else {
+ tok->decoding_buffer = buf;
+ }
+ }
+ return PyObject_Length(buf) == 0;
+ }
}
/* Fetch a byte from TOK, using the string buffer. */
static int
buf_getc(struct tok_state *tok) {
- return Py_CHARMASK(*tok->str++);
+ return Py_CHARMASK(*tok->str++);
}
/* Unfetch a byte from TOK, using the string buffer. */
static void
buf_ungetc(int c, struct tok_state *tok) {
- tok->str--;
- assert(Py_CHARMASK(*tok->str) == c); /* tok->cur may point to read-only segment */
+ tok->str--;
+ assert(Py_CHARMASK(*tok->str) == c); /* tok->cur may point to read-only segment */
}
/* Set the readline function for TOK to ENC. For the string-based
@@ -634,8 +634,8 @@ buf_ungetc(int c, struct tok_state *tok) {
static int
buf_setreadl(struct tok_state *tok, const char* enc) {
- tok->enc = enc;
- return 1;
+ tok->enc = enc;
+ return 1;
}
/* Return a UTF-8 encoding Python string object from the
@@ -643,54 +643,54 @@ buf_setreadl(struct tok_state *tok, const char* enc) {
static PyObject *
translate_into_utf8(const char* str, const char* enc) {
- PyObject *utf8;
- PyObject* buf = PyUnicode_Decode(str, strlen(str), enc, NULL);
- if (buf == NULL)
- return NULL;
- utf8 = PyUnicode_AsUTF8String(buf);
- Py_DECREF(buf);
- return utf8;
+ PyObject *utf8;
+ PyObject* buf = PyUnicode_Decode(str, strlen(str), enc, NULL);
+ if (buf == NULL)
+ return NULL;
+ utf8 = PyUnicode_AsUTF8String(buf);
+ Py_DECREF(buf);
+ return utf8;
}
static char *
translate_newlines(const char *s, int exec_input, struct tok_state *tok) {
- int skip_next_lf = 0, needed_length = strlen(s) + 2, final_length;
- char *buf, *current;
- char c = '\0';
- buf = PyMem_MALLOC(needed_length);
- if (buf == NULL) {
- tok->done = E_NOMEM;
- return NULL;
- }
- for (current = buf; *s; s++, current++) {
- c = *s;
- if (skip_next_lf) {
- skip_next_lf = 0;
- if (c == '\n') {
- c = *++s;
- if (!c)
- break;
- }
- }
- if (c == '\r') {
- skip_next_lf = 1;
- c = '\n';
- }
- *current = c;
- }
- /* If this is exec input, add a newline to the end of the string if
- there isn't one already. */
- if (exec_input && c != '\n') {
- *current = '\n';
- current++;
- }
- *current = '\0';
- final_length = current - buf + 1;
- if (final_length < needed_length && final_length)
- /* should never fail */
- buf = PyMem_REALLOC(buf, final_length);
- return buf;
+ int skip_next_lf = 0, needed_length = strlen(s) + 2, final_length;
+ char *buf, *current;
+ char c = '\0';
+ buf = PyMem_MALLOC(needed_length);
+ if (buf == NULL) {
+ tok->done = E_NOMEM;
+ return NULL;
+ }
+ for (current = buf; *s; s++, current++) {
+ c = *s;
+ if (skip_next_lf) {
+ skip_next_lf = 0;
+ if (c == '\n') {
+ c = *++s;
+ if (!c)
+ break;
+ }
+ }
+ if (c == '\r') {
+ skip_next_lf = 1;
+ c = '\n';
+ }
+ *current = c;
+ }
+ /* If this is exec input, add a newline to the end of the string if
+ there isn't one already. */
+ if (exec_input && c != '\n') {
+ *current = '\n';
+ current++;
+ }
+ *current = '\0';
+ final_length = current - buf + 1;
+ if (final_length < needed_length && final_length)
+ /* should never fail */
+ buf = PyMem_REALLOC(buf, final_length);
+ return buf;
}
/* Decode a byte string STR for use as the buffer of TOK.
@@ -700,57 +700,57 @@ translate_newlines(const char *s, int exec_input, struct tok_state *tok) {
static const char *
decode_str(const char *input, int single, struct tok_state *tok)
{
- PyObject* utf8 = NULL;
- const char *str;
- const char *s;
- const char *newl[2] = {NULL, NULL};
- int lineno = 0;
- tok->input = str = translate_newlines(input, single, tok);
- if (str == NULL)
- return NULL;
- tok->enc = NULL;
- tok->str = str;
- if (!check_bom(buf_getc, buf_ungetc, buf_setreadl, tok))
- return error_ret(tok);
- str = tok->str; /* string after BOM if any */
- assert(str);
- if (tok->enc != NULL) {
- utf8 = translate_into_utf8(str, tok->enc);
- if (utf8 == NULL)
- return error_ret(tok);
- str = PyBytes_AsString(utf8);
- }
- for (s = str;; s++) {
- if (*s == '\0') break;
- else if (*s == '\n') {
- assert(lineno < 2);
- newl[lineno] = s;
- lineno++;
- if (lineno == 2) break;
- }
- }
- tok->enc = NULL;
- /* need to check line 1 and 2 separately since check_coding_spec
- assumes a single line as input */
- if (newl[0]) {
- if (!check_coding_spec(str, newl[0] - str, tok, buf_setreadl))
- return error_ret(tok);
- if (tok->enc == NULL && newl[1]) {
- if (!check_coding_spec(newl[0]+1, newl[1] - newl[0],
- tok, buf_setreadl))
- return error_ret(tok);
- }
- }
- if (tok->enc != NULL) {
- assert(utf8 == NULL);
- utf8 = translate_into_utf8(str, tok->enc);
- if (utf8 == NULL)
- return error_ret(tok);
- str = PyBytes_AS_STRING(utf8);
- }
- assert(tok->decoding_buffer == NULL);
- tok->decoding_buffer = utf8; /* CAUTION */
- return str;
+ PyObject* utf8 = NULL;
+ const char *str;
+ const char *s;
+ const char *newl[2] = {NULL, NULL};
+ int lineno = 0;
+ tok->input = str = translate_newlines(input, single, tok);
+ if (str == NULL)
+ return NULL;
+ tok->enc = NULL;
+ tok->str = str;
+ if (!check_bom(buf_getc, buf_ungetc, buf_setreadl, tok))
+ return error_ret(tok);
+ str = tok->str; /* string after BOM if any */
+ assert(str);
+ if (tok->enc != NULL) {
+ utf8 = translate_into_utf8(str, tok->enc);
+ if (utf8 == NULL)
+ return error_ret(tok);
+ str = PyBytes_AsString(utf8);
+ }
+ for (s = str;; s++) {
+ if (*s == '\0') break;
+ else if (*s == '\n') {
+ assert(lineno < 2);
+ newl[lineno] = s;
+ lineno++;
+ if (lineno == 2) break;
+ }
+ }
+ tok->enc = NULL;
+ /* need to check line 1 and 2 separately since check_coding_spec
+ assumes a single line as input */
+ if (newl[0]) {
+ if (!check_coding_spec(str, newl[0] - str, tok, buf_setreadl))
+ return error_ret(tok);
+ if (tok->enc == NULL && newl[1]) {
+ if (!check_coding_spec(newl[0]+1, newl[1] - newl[0],
+ tok, buf_setreadl))
+ return error_ret(tok);
+ }
+ }
+ if (tok->enc != NULL) {
+ assert(utf8 == NULL);
+ utf8 = translate_into_utf8(str, tok->enc);
+ if (utf8 == NULL)
+ return error_ret(tok);
+ str = PyBytes_AS_STRING(utf8);
+ }
+ assert(tok->decoding_buffer == NULL);
+ tok->decoding_buffer = utf8; /* CAUTION */
+ return str;
}
#endif /* PGEN */
@@ -760,47 +760,47 @@ decode_str(const char *input, int single, struct tok_state *tok)
struct tok_state *
PyTokenizer_FromString(const char *str, int exec_input)
{
- struct tok_state *tok = tok_new();
- if (tok == NULL)
- return NULL;
- str = (char *)decode_str(str, exec_input, tok);
- if (str == NULL) {
- PyTokenizer_Free(tok);
- return NULL;
- }
-
- /* XXX: constify members. */
- tok->buf = tok->cur = tok->end = tok->inp = (char*)str;
- return tok;
+ struct tok_state *tok = tok_new();
+ if (tok == NULL)
+ return NULL;
+ str = (char *)decode_str(str, exec_input, tok);
+ if (str == NULL) {
+ PyTokenizer_Free(tok);
+ return NULL;
+ }
+
+ /* XXX: constify members. */
+ tok->buf = tok->cur = tok->end = tok->inp = (char*)str;
+ return tok;
}
struct tok_state *
PyTokenizer_FromUTF8(const char *str, int exec_input)
{
- struct tok_state *tok = tok_new();
- if (tok == NULL)
- return NULL;
+ struct tok_state *tok = tok_new();
+ if (tok == NULL)
+ return NULL;
#ifndef PGEN
- tok->input = str = translate_newlines(str, exec_input, tok);
+ tok->input = str = translate_newlines(str, exec_input, tok);
#endif
- if (str == NULL) {
- PyTokenizer_Free(tok);
- return NULL;
- }
- tok->decoding_state = STATE_RAW;
- tok->read_coding_spec = 1;
- tok->enc = NULL;
- tok->str = str;
- tok->encoding = (char *)PyMem_MALLOC(6);
- if (!tok->encoding) {
- PyTokenizer_Free(tok);
- return NULL;
- }
- strcpy(tok->encoding, "utf-8");
-
- /* XXX: constify members. */
- tok->buf = tok->cur = tok->end = tok->inp = (char*)str;
- return tok;
+ if (str == NULL) {
+ PyTokenizer_Free(tok);
+ return NULL;
+ }
+ tok->decoding_state = STATE_RAW;
+ tok->read_coding_spec = 1;
+ tok->enc = NULL;
+ tok->str = str;
+ tok->encoding = (char *)PyMem_MALLOC(6);
+ if (!tok->encoding) {
+ PyTokenizer_Free(tok);
+ return NULL;
+ }
+ strcpy(tok->encoding, "utf-8");
+
+ /* XXX: constify members. */
+ tok->buf = tok->cur = tok->end = tok->inp = (char*)str;
+ return tok;
}
/* Set up tokenizer for file */
@@ -808,30 +808,30 @@ PyTokenizer_FromUTF8(const char *str, int exec_input)
struct tok_state *
PyTokenizer_FromFile(FILE *fp, char* enc, char *ps1, char *ps2)
{
- struct tok_state *tok = tok_new();
- if (tok == NULL)
- return NULL;
- if ((tok->buf = (char *)PyMem_MALLOC(BUFSIZ)) == NULL) {
- PyTokenizer_Free(tok);
- return NULL;
- }
- tok->cur = tok->inp = tok->buf;
- tok->end = tok->buf + BUFSIZ;
- tok->fp = fp;
- tok->prompt = ps1;
- tok->nextprompt = ps2;
- if (enc != NULL) {
- /* Must copy encoding declaration since it
- gets copied into the parse tree. */
- tok->encoding = PyMem_MALLOC(strlen(enc)+1);
- if (!tok->encoding) {
- PyTokenizer_Free(tok);
- return NULL;
- }
- strcpy(tok->encoding, enc);
- tok->decoding_state = STATE_NORMAL;
- }
- return tok;
+ struct tok_state *tok = tok_new();
+ if (tok == NULL)
+ return NULL;
+ if ((tok->buf = (char *)PyMem_MALLOC(BUFSIZ)) == NULL) {
+ PyTokenizer_Free(tok);
+ return NULL;
+ }
+ tok->cur = tok->inp = tok->buf;
+ tok->end = tok->buf + BUFSIZ;
+ tok->fp = fp;
+ tok->prompt = ps1;
+ tok->nextprompt = ps2;
+ if (enc != NULL) {
+ /* Must copy encoding declaration since it
+ gets copied into the parse tree. */
+ tok->encoding = PyMem_MALLOC(strlen(enc)+1);
+ if (!tok->encoding) {
+ PyTokenizer_Free(tok);
+ return NULL;
+ }
+ strcpy(tok->encoding, enc);
+ tok->decoding_state = STATE_NORMAL;
+ }
+ return tok;
}
@@ -840,17 +840,17 @@ PyTokenizer_FromFile(FILE *fp, char* enc, char *ps1, char *ps2)
void
PyTokenizer_Free(struct tok_state *tok)
{
- if (tok->encoding != NULL)
- PyMem_FREE(tok->encoding);
+ if (tok->encoding != NULL)
+ PyMem_FREE(tok->encoding);
#ifndef PGEN
- Py_XDECREF(tok->decoding_readline);
- Py_XDECREF(tok->decoding_buffer);
+ Py_XDECREF(tok->decoding_readline);
+ Py_XDECREF(tok->decoding_buffer);
#endif
- if (tok->fp != NULL && tok->buf != NULL)
- PyMem_FREE(tok->buf);
- if (tok->input)
- PyMem_FREE((char *)tok->input);
- PyMem_FREE(tok);
+ if (tok->fp != NULL && tok->buf != NULL)
+ PyMem_FREE(tok->buf);
+ if (tok->input)
+ PyMem_FREE((char *)tok->input);
+ PyMem_FREE(tok);
}
/* Get next char, updating state; error code goes into tok->done */
@@ -858,188 +858,188 @@ PyTokenizer_Free(struct tok_state *tok)
static int
tok_nextc(register struct tok_state *tok)
{
- for (;;) {
- if (tok->cur != tok->inp) {
- return Py_CHARMASK(*tok->cur++); /* Fast path */
- }
- if (tok->done != E_OK)
- return EOF;
- if (tok->fp == NULL) {
- char *end = strchr(tok->inp, '\n');
- if (end != NULL)
- end++;
- else {
- end = strchr(tok->inp, '\0');
- if (end == tok->inp) {
- tok->done = E_EOF;
- return EOF;
- }
- }
- if (tok->start == NULL)
- tok->buf = tok->cur;
- tok->line_start = tok->cur;
- tok->lineno++;
- tok->inp = end;
- return Py_CHARMASK(*tok->cur++);
- }
- if (tok->prompt != NULL) {
- char *newtok = PyOS_Readline(stdin, stdout, tok->prompt);
+ for (;;) {
+ if (tok->cur != tok->inp) {
+ return Py_CHARMASK(*tok->cur++); /* Fast path */
+ }
+ if (tok->done != E_OK)
+ return EOF;
+ if (tok->fp == NULL) {
+ char *end = strchr(tok->inp, '\n');
+ if (end != NULL)
+ end++;
+ else {
+ end = strchr(tok->inp, '\0');
+ if (end == tok->inp) {
+ tok->done = E_EOF;
+ return EOF;
+ }
+ }
+ if (tok->start == NULL)
+ tok->buf = tok->cur;
+ tok->line_start = tok->cur;
+ tok->lineno++;
+ tok->inp = end;
+ return Py_CHARMASK(*tok->cur++);
+ }
+ if (tok->prompt != NULL) {
+ char *newtok = PyOS_Readline(stdin, stdout, tok->prompt);
#ifndef PGEN
- if (tok->encoding && newtok && *newtok) {
- /* Recode to UTF-8 */
- Py_ssize_t buflen;
- const char* buf;
- PyObject *u = translate_into_utf8(newtok, tok->encoding);
- PyMem_FREE(newtok);
- if (!u) {
- tok->done = E_DECODE;
- return EOF;
- }
- buflen = PyBytes_GET_SIZE(u);
- buf = PyBytes_AS_STRING(u);
- if (!buf) {
- Py_DECREF(u);
- tok->done = E_DECODE;
- return EOF;
- }
- newtok = PyMem_MALLOC(buflen+1);
- strcpy(newtok, buf);
- Py_DECREF(u);
- }
+ if (tok->encoding && newtok && *newtok) {
+ /* Recode to UTF-8 */
+ Py_ssize_t buflen;
+ const char* buf;
+ PyObject *u = translate_into_utf8(newtok, tok->encoding);
+ PyMem_FREE(newtok);
+ if (!u) {
+ tok->done = E_DECODE;
+ return EOF;
+ }
+ buflen = PyBytes_GET_SIZE(u);
+ buf = PyBytes_AS_STRING(u);
+ if (!buf) {
+ Py_DECREF(u);
+ tok->done = E_DECODE;
+ return EOF;
+ }
+ newtok = PyMem_MALLOC(buflen+1);
+ strcpy(newtok, buf);
+ Py_DECREF(u);
+ }
#endif
- if (tok->nextprompt != NULL)
- tok->prompt = tok->nextprompt;
- if (newtok == NULL)
- tok->done = E_INTR;
- else if (*newtok == '\0') {
- PyMem_FREE(newtok);
- tok->done = E_EOF;
- }
- else if (tok->start != NULL) {
- size_t start = tok->start - tok->buf;
- size_t oldlen = tok->cur - tok->buf;
- size_t newlen = oldlen + strlen(newtok);
- char *buf = tok->buf;
- buf = (char *)PyMem_REALLOC(buf, newlen+1);
- tok->lineno++;
- if (buf == NULL) {
- PyMem_FREE(tok->buf);
- tok->buf = NULL;
- PyMem_FREE(newtok);
- tok->done = E_NOMEM;
- return EOF;
- }
- tok->buf = buf;
- tok->cur = tok->buf + oldlen;
- tok->line_start = tok->cur;
- strcpy(tok->buf + oldlen, newtok);
- PyMem_FREE(newtok);
- tok->inp = tok->buf + newlen;
- tok->end = tok->inp + 1;
- tok->start = tok->buf + start;
- }
- else {
- tok->lineno++;
- if (tok->buf != NULL)
- PyMem_FREE(tok->buf);
- tok->buf = newtok;
- tok->line_start = tok->buf;
- tok->cur = tok->buf;
- tok->line_start = tok->buf;
- tok->inp = strchr(tok->buf, '\0');
- tok->end = tok->inp + 1;
- }
- }
- else {
- int done = 0;
- Py_ssize_t cur = 0;
- char *pt;
- if (tok->start == NULL) {
- if (tok->buf == NULL) {
- tok->buf = (char *)
- PyMem_MALLOC(BUFSIZ);
- if (tok->buf == NULL) {
- tok->done = E_NOMEM;
- return EOF;
- }
- tok->end = tok->buf + BUFSIZ;
- }
- if (decoding_fgets(tok->buf, (int)(tok->end - tok->buf),
- tok) == NULL) {
- tok->done = E_EOF;
- done = 1;
- }
- else {
- tok->done = E_OK;
- tok->inp = strchr(tok->buf, '\0');
- done = tok->inp[-1] == '\n';
- }
- }
- else {
- cur = tok->cur - tok->buf;
- if (decoding_feof(tok)) {
- tok->done = E_EOF;
- done = 1;
- }
- else
- tok->done = E_OK;
- }
- tok->lineno++;
- /* Read until '\n' or EOF */
- while (!done) {
- Py_ssize_t curstart = tok->start == NULL ? -1 :
- tok->start - tok->buf;
- Py_ssize_t curvalid = tok->inp - tok->buf;
- Py_ssize_t newsize = curvalid + BUFSIZ;
- char *newbuf = tok->buf;
- newbuf = (char *)PyMem_REALLOC(newbuf,
- newsize);
- if (newbuf == NULL) {
- tok->done = E_NOMEM;
- tok->cur = tok->inp;
- return EOF;
- }
- tok->buf = newbuf;
- tok->inp = tok->buf + curvalid;
- tok->end = tok->buf + newsize;
- tok->start = curstart < 0 ? NULL :
- tok->buf + curstart;
- if (decoding_fgets(tok->inp,
- (int)(tok->end - tok->inp),
- tok) == NULL) {
- /* Break out early on decoding
- errors, as tok->buf will be NULL
- */
- if (tok->decoding_erred)
- return EOF;
- /* Last line does not end in \n,
- fake one */
- strcpy(tok->inp, "\n");
- }
- tok->inp = strchr(tok->inp, '\0');
- done = tok->inp[-1] == '\n';
- }
- if (tok->buf != NULL) {
- tok->cur = tok->buf + cur;
- tok->line_start = tok->cur;
- /* replace "\r\n" with "\n" */
- /* For Mac leave the \r, giving a syntax error */
- pt = tok->inp - 2;
- if (pt >= tok->buf && *pt == '\r') {
- *pt++ = '\n';
- *pt = '\0';
- tok->inp = pt;
- }
- }
- }
- if (tok->done != E_OK) {
- if (tok->prompt != NULL)
- PySys_WriteStderr("\n");
- tok->cur = tok->inp;
- return EOF;
- }
- }
- /*NOTREACHED*/
+ if (tok->nextprompt != NULL)
+ tok->prompt = tok->nextprompt;
+ if (newtok == NULL)
+ tok->done = E_INTR;
+ else if (*newtok == '\0') {
+ PyMem_FREE(newtok);
+ tok->done = E_EOF;
+ }
+ else if (tok->start != NULL) {
+ size_t start = tok->start - tok->buf;
+ size_t oldlen = tok->cur - tok->buf;
+ size_t newlen = oldlen + strlen(newtok);
+ char *buf = tok->buf;
+ buf = (char *)PyMem_REALLOC(buf, newlen+1);
+ tok->lineno++;
+ if (buf == NULL) {
+ PyMem_FREE(tok->buf);
+ tok->buf = NULL;
+ PyMem_FREE(newtok);
+ tok->done = E_NOMEM;
+ return EOF;
+ }
+ tok->buf = buf;
+ tok->cur = tok->buf + oldlen;
+ tok->line_start = tok->cur;
+ strcpy(tok->buf + oldlen, newtok);
+ PyMem_FREE(newtok);
+ tok->inp = tok->buf + newlen;
+ tok->end = tok->inp + 1;
+ tok->start = tok->buf + start;
+ }
+ else {
+ tok->lineno++;
+ if (tok->buf != NULL)
+ PyMem_FREE(tok->buf);
+ tok->buf = newtok;
+ tok->line_start = tok->buf;
+ tok->cur = tok->buf;
+ tok->line_start = tok->buf;
+ tok->inp = strchr(tok->buf, '\0');
+ tok->end = tok->inp + 1;
+ }
+ }
+ else {
+ int done = 0;
+ Py_ssize_t cur = 0;
+ char *pt;
+ if (tok->start == NULL) {
+ if (tok->buf == NULL) {
+ tok->buf = (char *)
+ PyMem_MALLOC(BUFSIZ);
+ if (tok->buf == NULL) {
+ tok->done = E_NOMEM;
+ return EOF;
+ }
+ tok->end = tok->buf + BUFSIZ;
+ }
+ if (decoding_fgets(tok->buf, (int)(tok->end - tok->buf),
+ tok) == NULL) {
+ tok->done = E_EOF;
+ done = 1;
+ }
+ else {
+ tok->done = E_OK;
+ tok->inp = strchr(tok->buf, '\0');
+ done = tok->inp[-1] == '\n';
+ }
+ }
+ else {
+ cur = tok->cur - tok->buf;
+ if (decoding_feof(tok)) {
+ tok->done = E_EOF;
+ done = 1;
+ }
+ else
+ tok->done = E_OK;
+ }
+ tok->lineno++;
+ /* Read until '\n' or EOF */
+ while (!done) {
+ Py_ssize_t curstart = tok->start == NULL ? -1 :
+ tok->start - tok->buf;
+ Py_ssize_t curvalid = tok->inp - tok->buf;
+ Py_ssize_t newsize = curvalid + BUFSIZ;
+ char *newbuf = tok->buf;
+ newbuf = (char *)PyMem_REALLOC(newbuf,
+ newsize);
+ if (newbuf == NULL) {
+ tok->done = E_NOMEM;
+ tok->cur = tok->inp;
+ return EOF;
+ }
+ tok->buf = newbuf;
+ tok->inp = tok->buf + curvalid;
+ tok->end = tok->buf + newsize;
+ tok->start = curstart < 0 ? NULL :
+ tok->buf + curstart;
+ if (decoding_fgets(tok->inp,
+ (int)(tok->end - tok->inp),
+ tok) == NULL) {
+ /* Break out early on decoding
+ errors, as tok->buf will be NULL
+ */
+ if (tok->decoding_erred)
+ return EOF;
+ /* Last line does not end in \n,
+ fake one */
+ strcpy(tok->inp, "\n");
+ }
+ tok->inp = strchr(tok->inp, '\0');
+ done = tok->inp[-1] == '\n';
+ }
+ if (tok->buf != NULL) {
+ tok->cur = tok->buf + cur;
+ tok->line_start = tok->cur;
+ /* replace "\r\n" with "\n" */
+ /* For Mac leave the \r, giving a syntax error */
+ pt = tok->inp - 2;
+ if (pt >= tok->buf && *pt == '\r') {
+ *pt++ = '\n';
+ *pt = '\0';
+ tok->inp = pt;
+ }
+ }
+ }
+ if (tok->done != E_OK) {
+ if (tok->prompt != NULL)
+ PySys_WriteStderr("\n");
+ tok->cur = tok->inp;
+ return EOF;
+ }
+ }
+ /*NOTREACHED*/
}
@@ -1048,12 +1048,12 @@ tok_nextc(register struct tok_state *tok)
static void
tok_backup(register struct tok_state *tok, register int c)
{
- if (c != EOF) {
- if (--tok->cur < tok->buf)
- Py_FatalError("tok_backup: beginning of buffer");
- if (*tok->cur != c)
- *tok->cur = c;
- }
+ if (c != EOF) {
+ if (--tok->cur < tok->buf)
+ Py_FatalError("tok_backup: beginning of buffer");
+ if (*tok->cur != c)
+ *tok->cur = c;
+ }
}
@@ -1062,181 +1062,181 @@ tok_backup(register struct tok_state *tok, register int c)
int
PyToken_OneChar(int c)
{
- switch (c) {
- case '(': return LPAR;
- case ')': return RPAR;
- case '[': return LSQB;
- case ']': return RSQB;
- case ':': return COLON;
- case ',': return COMMA;
- case ';': return SEMI;
- case '+': return PLUS;
- case '-': return MINUS;
- case '*': return STAR;
- case '/': return SLASH;
- case '|': return VBAR;
- case '&': return AMPER;
- case '<': return LESS;
- case '>': return GREATER;
- case '=': return EQUAL;
- case '.': return DOT;
- case '%': return PERCENT;
- case '{': return LBRACE;
- case '}': return RBRACE;
- case '^': return CIRCUMFLEX;
- case '~': return TILDE;
- case '@': return AT;
- default: return OP;
- }
+ switch (c) {
+ case '(': return LPAR;
+ case ')': return RPAR;
+ case '[': return LSQB;
+ case ']': return RSQB;
+ case ':': return COLON;
+ case ',': return COMMA;
+ case ';': return SEMI;
+ case '+': return PLUS;
+ case '-': return MINUS;
+ case '*': return STAR;
+ case '/': return SLASH;
+ case '|': return VBAR;
+ case '&': return AMPER;
+ case '<': return LESS;
+ case '>': return GREATER;
+ case '=': return EQUAL;
+ case '.': return DOT;
+ case '%': return PERCENT;
+ case '{': return LBRACE;
+ case '}': return RBRACE;
+ case '^': return CIRCUMFLEX;
+ case '~': return TILDE;
+ case '@': return AT;
+ default: return OP;
+ }
}
int
PyToken_TwoChars(int c1, int c2)
{
- switch (c1) {
- case '=':
- switch (c2) {
- case '=': return EQEQUAL;
- }
- break;
- case '!':
- switch (c2) {
- case '=': return NOTEQUAL;
- }
- break;
- case '<':
- switch (c2) {
- case '>': return NOTEQUAL;
- case '=': return LESSEQUAL;
- case '<': return LEFTSHIFT;
- }
- break;
- case '>':
- switch (c2) {
- case '=': return GREATEREQUAL;
- case '>': return RIGHTSHIFT;
- }
- break;
- case '+':
- switch (c2) {
- case '=': return PLUSEQUAL;
- }
- break;
- case '-':
- switch (c2) {
- case '=': return MINEQUAL;
- case '>': return RARROW;
- }
- break;
- case '*':
- switch (c2) {
- case '*': return DOUBLESTAR;
- case '=': return STAREQUAL;
- }
- break;
- case '/':
- switch (c2) {
- case '/': return DOUBLESLASH;
- case '=': return SLASHEQUAL;
- }
- break;
- case '|':
- switch (c2) {
- case '=': return VBAREQUAL;
- }
- break;
- case '%':
- switch (c2) {
- case '=': return PERCENTEQUAL;
- }
- break;
- case '&':
- switch (c2) {
- case '=': return AMPEREQUAL;
- }
- break;
- case '^':
- switch (c2) {
- case '=': return CIRCUMFLEXEQUAL;
- }
- break;
- }
- return OP;
+ switch (c1) {
+ case '=':
+ switch (c2) {
+ case '=': return EQEQUAL;
+ }
+ break;
+ case '!':
+ switch (c2) {
+ case '=': return NOTEQUAL;
+ }
+ break;
+ case '<':
+ switch (c2) {
+ case '>': return NOTEQUAL;
+ case '=': return LESSEQUAL;
+ case '<': return LEFTSHIFT;
+ }
+ break;
+ case '>':
+ switch (c2) {
+ case '=': return GREATEREQUAL;
+ case '>': return RIGHTSHIFT;
+ }
+ break;
+ case '+':
+ switch (c2) {
+ case '=': return PLUSEQUAL;
+ }
+ break;
+ case '-':
+ switch (c2) {
+ case '=': return MINEQUAL;
+ case '>': return RARROW;
+ }
+ break;
+ case '*':
+ switch (c2) {
+ case '*': return DOUBLESTAR;
+ case '=': return STAREQUAL;
+ }
+ break;
+ case '/':
+ switch (c2) {
+ case '/': return DOUBLESLASH;
+ case '=': return SLASHEQUAL;
+ }
+ break;
+ case '|':
+ switch (c2) {
+ case '=': return VBAREQUAL;
+ }
+ break;
+ case '%':
+ switch (c2) {
+ case '=': return PERCENTEQUAL;
+ }
+ break;
+ case '&':
+ switch (c2) {
+ case '=': return AMPEREQUAL;
+ }
+ break;
+ case '^':
+ switch (c2) {
+ case '=': return CIRCUMFLEXEQUAL;
+ }
+ break;
+ }
+ return OP;
}
int
PyToken_ThreeChars(int c1, int c2, int c3)
{
- switch (c1) {
- case '<':
- switch (c2) {
- case '<':
- switch (c3) {
- case '=':
- return LEFTSHIFTEQUAL;
- }
- break;
- }
- break;
- case '>':
- switch (c2) {
- case '>':
- switch (c3) {
- case '=':
- return RIGHTSHIFTEQUAL;
- }
- break;
- }
- break;
- case '*':
- switch (c2) {
- case '*':
- switch (c3) {
- case '=':
- return DOUBLESTAREQUAL;
- }
- break;
- }
- break;
- case '/':
- switch (c2) {
- case '/':
- switch (c3) {
- case '=':
- return DOUBLESLASHEQUAL;
- }
- break;
- }
- break;
+ switch (c1) {
+ case '<':
+ switch (c2) {
+ case '<':
+ switch (c3) {
+ case '=':
+ return LEFTSHIFTEQUAL;
+ }
+ break;
+ }
+ break;
+ case '>':
+ switch (c2) {
+ case '>':
+ switch (c3) {
+ case '=':
+ return RIGHTSHIFTEQUAL;
+ }
+ break;
+ }
+ break;
+ case '*':
+ switch (c2) {
+ case '*':
+ switch (c3) {
+ case '=':
+ return DOUBLESTAREQUAL;
+ }
+ break;
+ }
+ break;
+ case '/':
+ switch (c2) {
+ case '/':
+ switch (c3) {
+ case '=':
+ return DOUBLESLASHEQUAL;
+ }
+ break;
+ }
+ break;
+ case '.':
+ switch (c2) {
case '.':
- switch (c2) {
- case '.':
- switch (c3) {
- case '.':
- return ELLIPSIS;
- }
- break;
- }
- break;
- }
- return OP;
+ switch (c3) {
+ case '.':
+ return ELLIPSIS;
+ }
+ break;
+ }
+ break;
+ }
+ return OP;
}
static int
indenterror(struct tok_state *tok)
{
- if (tok->alterror) {
- tok->done = E_TABSPACE;
- tok->cur = tok->inp;
- return 1;
- }
- if (tok->altwarning) {
- PySys_WriteStderr("%s: inconsistent use of tabs and spaces "
- "in indentation\n", tok->filename);
- tok->altwarning = 0;
- }
- return 0;
+ if (tok->alterror) {
+ tok->done = E_TABSPACE;
+ tok->cur = tok->inp;
+ return 1;
+ }
+ if (tok->altwarning) {
+ PySys_WriteStderr("%s: inconsistent use of tabs and spaces "
+ "in indentation\n", tok->filename);
+ tok->altwarning = 0;
+ }
+ return 0;
}
#ifdef PGEN
@@ -1246,23 +1246,23 @@ indenterror(struct tok_state *tok)
static int
verify_identifier(struct tok_state *tok)
{
- PyObject *s;
- int result;
- s = PyUnicode_DecodeUTF8(tok->start, tok->cur - tok->start, NULL);
- if (s == NULL) {
- if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
- PyErr_Clear();
- tok->done = E_IDENTIFIER;
- } else {
- tok->done = E_ERROR;
- }
- return 0;
- }
- result = PyUnicode_IsIdentifier(s);
- Py_DECREF(s);
- if (result == 0)
- tok->done = E_IDENTIFIER;
- return result;
+ PyObject *s;
+ int result;
+ s = PyUnicode_DecodeUTF8(tok->start, tok->cur - tok->start, NULL);
+ if (s == NULL) {
+ if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
+ PyErr_Clear();
+ tok->done = E_IDENTIFIER;
+ } else {
+ tok->done = E_ERROR;
+ }
+ return 0;
+ }
+ result = PyUnicode_IsIdentifier(s);
+ Py_DECREF(s);
+ if (result == 0)
+ tok->done = E_IDENTIFIER;
+ return result;
}
#endif
@@ -1271,407 +1271,407 @@ verify_identifier(struct tok_state *tok)
static int
tok_get(register struct tok_state *tok, char **p_start, char **p_end)
{
- register int c;
- int blankline, nonascii;
+ register int c;
+ int blankline, nonascii;
- *p_start = *p_end = NULL;
+ *p_start = *p_end = NULL;
nextline:
- tok->start = NULL;
- blankline = 0;
-
- /* Get indentation level */
- if (tok->atbol) {
- register int col = 0;
- register int altcol = 0;
- tok->atbol = 0;
- for (;;) {
- c = tok_nextc(tok);
- if (c == ' ')
- col++, altcol++;
- else if (c == '\t') {
- col = (col/tok->tabsize + 1) * tok->tabsize;
- altcol = (altcol/tok->alttabsize + 1)
- * tok->alttabsize;
- }
- else if (c == '\014') /* Control-L (formfeed) */
- col = altcol = 0; /* For Emacs users */
- else
- break;
- }
- tok_backup(tok, c);
- if (c == '#' || c == '\n') {
- /* Lines with only whitespace and/or comments
- shouldn't affect the indentation and are
- not passed to the parser as NEWLINE tokens,
- except *totally* empty lines in interactive
- mode, which signal the end of a command group. */
- if (col == 0 && c == '\n' && tok->prompt != NULL)
- blankline = 0; /* Let it through */
- else
- blankline = 1; /* Ignore completely */
- /* We can't jump back right here since we still
- may need to skip to the end of a comment */
- }
- if (!blankline && tok->level == 0) {
- if (col == tok->indstack[tok->indent]) {
- /* No change */
- if (altcol != tok->altindstack[tok->indent]) {
- if (indenterror(tok))
- return ERRORTOKEN;
- }
- }
- else if (col > tok->indstack[tok->indent]) {
- /* Indent -- always one */
- if (tok->indent+1 >= MAXINDENT) {
- tok->done = E_TOODEEP;
- tok->cur = tok->inp;
- return ERRORTOKEN;
- }
- if (altcol <= tok->altindstack[tok->indent]) {
- if (indenterror(tok))
- return ERRORTOKEN;
- }
- tok->pendin++;
- tok->indstack[++tok->indent] = col;
- tok->altindstack[tok->indent] = altcol;
- }
- else /* col < tok->indstack[tok->indent] */ {
- /* Dedent -- any number, must be consistent */
- while (tok->indent > 0 &&
- col < tok->indstack[tok->indent]) {
- tok->pendin--;
- tok->indent--;
- }
- if (col != tok->indstack[tok->indent]) {
- tok->done = E_DEDENT;
- tok->cur = tok->inp;
- return ERRORTOKEN;
- }
- if (altcol != tok->altindstack[tok->indent]) {
- if (indenterror(tok))
- return ERRORTOKEN;
- }
- }
- }
- }
-
- tok->start = tok->cur;
-
- /* Return pending indents/dedents */
- if (tok->pendin != 0) {
- if (tok->pendin < 0) {
- tok->pendin++;
- return DEDENT;
- }
- else {
- tok->pendin--;
- return INDENT;
- }
- }
+ tok->start = NULL;
+ blankline = 0;
+
+ /* Get indentation level */
+ if (tok->atbol) {
+ register int col = 0;
+ register int altcol = 0;
+ tok->atbol = 0;
+ for (;;) {
+ c = tok_nextc(tok);
+ if (c == ' ')
+ col++, altcol++;
+ else if (c == '\t') {
+ col = (col/tok->tabsize + 1) * tok->tabsize;
+ altcol = (altcol/tok->alttabsize + 1)
+ * tok->alttabsize;
+ }
+ else if (c == '\014') /* Control-L (formfeed) */
+ col = altcol = 0; /* For Emacs users */
+ else
+ break;
+ }
+ tok_backup(tok, c);
+ if (c == '#' || c == '\n') {
+ /* Lines with only whitespace and/or comments
+ shouldn't affect the indentation and are
+ not passed to the parser as NEWLINE tokens,
+ except *totally* empty lines in interactive
+ mode, which signal the end of a command group. */
+ if (col == 0 && c == '\n' && tok->prompt != NULL)
+ blankline = 0; /* Let it through */
+ else
+ blankline = 1; /* Ignore completely */
+ /* We can't jump back right here since we still
+ may need to skip to the end of a comment */
+ }
+ if (!blankline && tok->level == 0) {
+ if (col == tok->indstack[tok->indent]) {
+ /* No change */
+ if (altcol != tok->altindstack[tok->indent]) {
+ if (indenterror(tok))
+ return ERRORTOKEN;
+ }
+ }
+ else if (col > tok->indstack[tok->indent]) {
+ /* Indent -- always one */
+ if (tok->indent+1 >= MAXINDENT) {
+ tok->done = E_TOODEEP;
+ tok->cur = tok->inp;
+ return ERRORTOKEN;
+ }
+ if (altcol <= tok->altindstack[tok->indent]) {
+ if (indenterror(tok))
+ return ERRORTOKEN;
+ }
+ tok->pendin++;
+ tok->indstack[++tok->indent] = col;
+ tok->altindstack[tok->indent] = altcol;
+ }
+ else /* col < tok->indstack[tok->indent] */ {
+ /* Dedent -- any number, must be consistent */
+ while (tok->indent > 0 &&
+ col < tok->indstack[tok->indent]) {
+ tok->pendin--;
+ tok->indent--;
+ }
+ if (col != tok->indstack[tok->indent]) {
+ tok->done = E_DEDENT;
+ tok->cur = tok->inp;
+ return ERRORTOKEN;
+ }
+ if (altcol != tok->altindstack[tok->indent]) {
+ if (indenterror(tok))
+ return ERRORTOKEN;
+ }
+ }
+ }
+ }
+
+ tok->start = tok->cur;
+
+ /* Return pending indents/dedents */
+ if (tok->pendin != 0) {
+ if (tok->pendin < 0) {
+ tok->pendin++;
+ return DEDENT;
+ }
+ else {
+ tok->pendin--;
+ return INDENT;
+ }
+ }
again:
- tok->start = NULL;
- /* Skip spaces */
- do {
- c = tok_nextc(tok);
- } while (c == ' ' || c == '\t' || c == '\014');
-
- /* Set start of current token */
- tok->start = tok->cur - 1;
-
- /* Skip comment */
- if (c == '#')
- while (c != EOF && c != '\n')
- c = tok_nextc(tok);
-
- /* Check for EOF and errors now */
- if (c == EOF) {
- return tok->done == E_EOF ? ENDMARKER : ERRORTOKEN;
- }
-
- /* Identifier (most frequent token!) */
- nonascii = 0;
- if (is_potential_identifier_start(c)) {
- /* Process b"", r"" and br"" */
- if (c == 'b' || c == 'B') {
- c = tok_nextc(tok);
- if (c == '"' || c == '\'')
- goto letter_quote;
- }
- if (c == 'r' || c == 'R') {
- c = tok_nextc(tok);
- if (c == '"' || c == '\'')
- goto letter_quote;
- }
- while (is_potential_identifier_char(c)) {
- if (c >= 128)
- nonascii = 1;
- c = tok_nextc(tok);
- }
- tok_backup(tok, c);
- if (nonascii &&
- !verify_identifier(tok)) {
- tok->done = E_IDENTIFIER;
- return ERRORTOKEN;
- }
- *p_start = tok->start;
- *p_end = tok->cur;
- return NAME;
- }
-
- /* Newline */
- if (c == '\n') {
- tok->atbol = 1;
- if (blankline || tok->level > 0)
- goto nextline;
- *p_start = tok->start;
- *p_end = tok->cur - 1; /* Leave '\n' out of the string */
- tok->cont_line = 0;
- return NEWLINE;
- }
-
- /* Period or number starting with period? */
- if (c == '.') {
- c = tok_nextc(tok);
- if (isdigit(c)) {
- goto fraction;
- } else if (c == '.') {
- c = tok_nextc(tok);
- if (c == '.') {
- *p_start = tok->start;
- *p_end = tok->cur;
- return ELLIPSIS;
- } else {
- tok_backup(tok, c);
- }
- tok_backup(tok, '.');
- } else {
- tok_backup(tok, c);
- }
- *p_start = tok->start;
- *p_end = tok->cur;
- return DOT;
- }
-
- /* Number */
- if (isdigit(c)) {
- if (c == '0') {
- /* Hex, octal or binary -- maybe. */
- c = tok_nextc(tok);
- if (c == '.')
- goto fraction;
- if (c == 'j' || c == 'J')
- goto imaginary;
- if (c == 'x' || c == 'X') {
-
- /* Hex */
- c = tok_nextc(tok);
- if (!isxdigit(c)) {
- tok->done = E_TOKEN;
- tok_backup(tok, c);
- return ERRORTOKEN;
- }
- do {
- c = tok_nextc(tok);
- } while (isxdigit(c));
- }
- else if (c == 'o' || c == 'O') {
- /* Octal */
- c = tok_nextc(tok);
- if (c < '0' || c >= '8') {
- tok->done = E_TOKEN;
- tok_backup(tok, c);
- return ERRORTOKEN;
- }
- do {
- c = tok_nextc(tok);
- } while ('0' <= c && c < '8');
- }
- else if (c == 'b' || c == 'B') {
- /* Binary */
- c = tok_nextc(tok);
- if (c != '0' && c != '1') {
- tok->done = E_TOKEN;
- tok_backup(tok, c);
- return ERRORTOKEN;
- }
- do {
- c = tok_nextc(tok);
- } while (c == '0' || c == '1');
- }
- else {
- int nonzero = 0;
- /* maybe old-style octal; c is first char of it */
- /* in any case, allow '0' as a literal */
- while (c == '0')
- c = tok_nextc(tok);
- while (isdigit(c)) {
- nonzero = 1;
- c = tok_nextc(tok);
- }
- if (c == '.')
- goto fraction;
- else if (c == 'e' || c == 'E')
- goto exponent;
- else if (c == 'j' || c == 'J')
- goto imaginary;
- else if (nonzero) {
- tok->done = E_TOKEN;
- tok_backup(tok, c);
- return ERRORTOKEN;
- }
- }
- }
- else {
- /* Decimal */
- do {
- c = tok_nextc(tok);
- } while (isdigit(c));
- {
- /* Accept floating point numbers. */
- if (c == '.') {
- fraction:
- /* Fraction */
- do {
- c = tok_nextc(tok);
- } while (isdigit(c));
- }
- if (c == 'e' || c == 'E') {
- exponent:
- /* Exponent part */
- c = tok_nextc(tok);
- if (c == '+' || c == '-')
- c = tok_nextc(tok);
- if (!isdigit(c)) {
- tok->done = E_TOKEN;
- tok_backup(tok, c);
- return ERRORTOKEN;
- }
- do {
- c = tok_nextc(tok);
- } while (isdigit(c));
- }
- if (c == 'j' || c == 'J')
- /* Imaginary part */
- imaginary:
- c = tok_nextc(tok);
- }
- }
- tok_backup(tok, c);
- *p_start = tok->start;
- *p_end = tok->cur;
- return NUMBER;
- }
+ tok->start = NULL;
+ /* Skip spaces */
+ do {
+ c = tok_nextc(tok);
+ } while (c == ' ' || c == '\t' || c == '\014');
+
+ /* Set start of current token */
+ tok->start = tok->cur - 1;
+
+ /* Skip comment */
+ if (c == '#')
+ while (c != EOF && c != '\n')
+ c = tok_nextc(tok);
+
+ /* Check for EOF and errors now */
+ if (c == EOF) {
+ return tok->done == E_EOF ? ENDMARKER : ERRORTOKEN;
+ }
+
+ /* Identifier (most frequent token!) */
+ nonascii = 0;
+ if (is_potential_identifier_start(c)) {
+ /* Process b"", r"" and br"" */
+ if (c == 'b' || c == 'B') {
+ c = tok_nextc(tok);
+ if (c == '"' || c == '\'')
+ goto letter_quote;
+ }
+ if (c == 'r' || c == 'R') {
+ c = tok_nextc(tok);
+ if (c == '"' || c == '\'')
+ goto letter_quote;
+ }
+ while (is_potential_identifier_char(c)) {
+ if (c >= 128)
+ nonascii = 1;
+ c = tok_nextc(tok);
+ }
+ tok_backup(tok, c);
+ if (nonascii &&
+ !verify_identifier(tok)) {
+ tok->done = E_IDENTIFIER;
+ return ERRORTOKEN;
+ }
+ *p_start = tok->start;
+ *p_end = tok->cur;
+ return NAME;
+ }
+
+ /* Newline */
+ if (c == '\n') {
+ tok->atbol = 1;
+ if (blankline || tok->level > 0)
+ goto nextline;
+ *p_start = tok->start;
+ *p_end = tok->cur - 1; /* Leave '\n' out of the string */
+ tok->cont_line = 0;
+ return NEWLINE;
+ }
+
+ /* Period or number starting with period? */
+ if (c == '.') {
+ c = tok_nextc(tok);
+ if (isdigit(c)) {
+ goto fraction;
+ } else if (c == '.') {
+ c = tok_nextc(tok);
+ if (c == '.') {
+ *p_start = tok->start;
+ *p_end = tok->cur;
+ return ELLIPSIS;
+ } else {
+ tok_backup(tok, c);
+ }
+ tok_backup(tok, '.');
+ } else {
+ tok_backup(tok, c);
+ }
+ *p_start = tok->start;
+ *p_end = tok->cur;
+ return DOT;
+ }
+
+ /* Number */
+ if (isdigit(c)) {
+ if (c == '0') {
+ /* Hex, octal or binary -- maybe. */
+ c = tok_nextc(tok);
+ if (c == '.')
+ goto fraction;
+ if (c == 'j' || c == 'J')
+ goto imaginary;
+ if (c == 'x' || c == 'X') {
+
+ /* Hex */
+ c = tok_nextc(tok);
+ if (!isxdigit(c)) {
+ tok->done = E_TOKEN;
+ tok_backup(tok, c);
+ return ERRORTOKEN;
+ }
+ do {
+ c = tok_nextc(tok);
+ } while (isxdigit(c));
+ }
+ else if (c == 'o' || c == 'O') {
+ /* Octal */
+ c = tok_nextc(tok);
+ if (c < '0' || c >= '8') {
+ tok->done = E_TOKEN;
+ tok_backup(tok, c);
+ return ERRORTOKEN;
+ }
+ do {
+ c = tok_nextc(tok);
+ } while ('0' <= c && c < '8');
+ }
+ else if (c == 'b' || c == 'B') {
+ /* Binary */
+ c = tok_nextc(tok);
+ if (c != '0' && c != '1') {
+ tok->done = E_TOKEN;
+ tok_backup(tok, c);
+ return ERRORTOKEN;
+ }
+ do {
+ c = tok_nextc(tok);
+ } while (c == '0' || c == '1');
+ }
+ else {
+ int nonzero = 0;
+ /* maybe old-style octal; c is first char of it */
+ /* in any case, allow '0' as a literal */
+ while (c == '0')
+ c = tok_nextc(tok);
+ while (isdigit(c)) {
+ nonzero = 1;
+ c = tok_nextc(tok);
+ }
+ if (c == '.')
+ goto fraction;
+ else if (c == 'e' || c == 'E')
+ goto exponent;
+ else if (c == 'j' || c == 'J')
+ goto imaginary;
+ else if (nonzero) {
+ tok->done = E_TOKEN;
+ tok_backup(tok, c);
+ return ERRORTOKEN;
+ }
+ }
+ }
+ else {
+ /* Decimal */
+ do {
+ c = tok_nextc(tok);
+ } while (isdigit(c));
+ {
+ /* Accept floating point numbers. */
+ if (c == '.') {
+ fraction:
+ /* Fraction */
+ do {
+ c = tok_nextc(tok);
+ } while (isdigit(c));
+ }
+ if (c == 'e' || c == 'E') {
+ exponent:
+ /* Exponent part */
+ c = tok_nextc(tok);
+ if (c == '+' || c == '-')
+ c = tok_nextc(tok);
+ if (!isdigit(c)) {
+ tok->done = E_TOKEN;
+ tok_backup(tok, c);
+ return ERRORTOKEN;
+ }
+ do {
+ c = tok_nextc(tok);
+ } while (isdigit(c));
+ }
+ if (c == 'j' || c == 'J')
+ /* Imaginary part */
+ imaginary:
+ c = tok_nextc(tok);
+ }
+ }
+ tok_backup(tok, c);
+ *p_start = tok->start;
+ *p_end = tok->cur;
+ return NUMBER;
+ }
letter_quote:
- /* String */
- if (c == '\'' || c == '"') {
- int quote = c;
- int quote_size = 1; /* 1 or 3 */
- int end_quote_size = 0;
-
- /* Find the quote size and start of string */
- c = tok_nextc(tok);
- if (c == quote) {
- c = tok_nextc(tok);
- if (c == quote)
- quote_size = 3;
- else
- end_quote_size = 1; /* empty string found */
- }
- if (c != quote)
- tok_backup(tok, c);
-
- /* Get rest of string */
- while (end_quote_size != quote_size) {
- c = tok_nextc(tok);
- if (c == EOF) {
- if (quote_size == 3)
- tok->done = E_EOFS;
- else
- tok->done = E_EOLS;
- tok->cur = tok->inp;
- return ERRORTOKEN;
- }
- if (quote_size == 1 && c == '\n') {
- tok->done = E_EOLS;
- tok->cur = tok->inp;
- return ERRORTOKEN;
- }
- if (c == quote)
- end_quote_size += 1;
- else {
- end_quote_size = 0;
- if (c == '\\')
- c = tok_nextc(tok); /* skip escaped char */
- }
- }
-
- *p_start = tok->start;
- *p_end = tok->cur;
- return STRING;
- }
-
- /* Line continuation */
- if (c == '\\') {
- c = tok_nextc(tok);
- if (c != '\n') {
- tok->done = E_LINECONT;
- tok->cur = tok->inp;
- return ERRORTOKEN;
- }
- tok->cont_line = 1;
- goto again; /* Read next line */
- }
-
- /* Check for two-character token */
- {
- int c2 = tok_nextc(tok);
- int token = PyToken_TwoChars(c, c2);
- if (token != OP) {
- int c3 = tok_nextc(tok);
- int token3 = PyToken_ThreeChars(c, c2, c3);
- if (token3 != OP) {
- token = token3;
- } else {
- tok_backup(tok, c3);
- }
- *p_start = tok->start;
- *p_end = tok->cur;
- return token;
- }
- tok_backup(tok, c2);
- }
-
- /* Keep track of parentheses nesting level */
- switch (c) {
- case '(':
- case '[':
- case '{':
- tok->level++;
- break;
- case ')':
- case ']':
- case '}':
- tok->level--;
- break;
- }
-
- /* Punctuation character */
- *p_start = tok->start;
- *p_end = tok->cur;
- return PyToken_OneChar(c);
+ /* String */
+ if (c == '\'' || c == '"') {
+ int quote = c;
+ int quote_size = 1; /* 1 or 3 */
+ int end_quote_size = 0;
+
+ /* Find the quote size and start of string */
+ c = tok_nextc(tok);
+ if (c == quote) {
+ c = tok_nextc(tok);
+ if (c == quote)
+ quote_size = 3;
+ else
+ end_quote_size = 1; /* empty string found */
+ }
+ if (c != quote)
+ tok_backup(tok, c);
+
+ /* Get rest of string */
+ while (end_quote_size != quote_size) {
+ c = tok_nextc(tok);
+ if (c == EOF) {
+ if (quote_size == 3)
+ tok->done = E_EOFS;
+ else
+ tok->done = E_EOLS;
+ tok->cur = tok->inp;
+ return ERRORTOKEN;
+ }
+ if (quote_size == 1 && c == '\n') {
+ tok->done = E_EOLS;
+ tok->cur = tok->inp;
+ return ERRORTOKEN;
+ }
+ if (c == quote)
+ end_quote_size += 1;
+ else {
+ end_quote_size = 0;
+ if (c == '\\')
+ c = tok_nextc(tok); /* skip escaped char */
+ }
+ }
+
+ *p_start = tok->start;
+ *p_end = tok->cur;
+ return STRING;
+ }
+
+ /* Line continuation */
+ if (c == '\\') {
+ c = tok_nextc(tok);
+ if (c != '\n') {
+ tok->done = E_LINECONT;
+ tok->cur = tok->inp;
+ return ERRORTOKEN;
+ }
+ tok->cont_line = 1;
+ goto again; /* Read next line */
+ }
+
+ /* Check for two-character token */
+ {
+ int c2 = tok_nextc(tok);
+ int token = PyToken_TwoChars(c, c2);
+ if (token != OP) {
+ int c3 = tok_nextc(tok);
+ int token3 = PyToken_ThreeChars(c, c2, c3);
+ if (token3 != OP) {
+ token = token3;
+ } else {
+ tok_backup(tok, c3);
+ }
+ *p_start = tok->start;
+ *p_end = tok->cur;
+ return token;
+ }
+ tok_backup(tok, c2);
+ }
+
+ /* Keep track of parentheses nesting level */
+ switch (c) {
+ case '(':
+ case '[':
+ case '{':
+ tok->level++;
+ break;
+ case ')':
+ case ']':
+ case '}':
+ tok->level--;
+ break;
+ }
+
+ /* Punctuation character */
+ *p_start = tok->start;
+ *p_end = tok->cur;
+ return PyToken_OneChar(c);
}
int
PyTokenizer_Get(struct tok_state *tok, char **p_start, char **p_end)
{
- int result = tok_get(tok, p_start, p_end);
- if (tok->decoding_erred) {
- result = ERRORTOKEN;
- tok->done = E_DECODE;
- }
- return result;
+ int result = tok_get(tok, p_start, p_end);
+ if (tok->decoding_erred) {
+ result = ERRORTOKEN;
+ tok->done = E_DECODE;
+ }
+ return result;
}
/* Get -*- encoding -*- from a Python file.
@@ -1686,34 +1686,34 @@ PyTokenizer_Get(struct tok_state *tok, char **p_start, char **p_end)
char *
PyTokenizer_FindEncoding(int fd)
{
- struct tok_state *tok;
- FILE *fp;
- char *p_start =NULL , *p_end =NULL , *encoding = NULL;
-
- fd = dup(fd);
- if (fd < 0) {
- return NULL;
- }
- fp = fdopen(fd, "r");
- if (fp == NULL) {
- return NULL;
- }
- tok = PyTokenizer_FromFile(fp, NULL, NULL, NULL);
- if (tok == NULL) {
- fclose(fp);
- return NULL;
- }
- while (tok->lineno < 2 && tok->done == E_OK) {
- PyTokenizer_Get(tok, &p_start, &p_end);
- }
- fclose(fp);
- if (tok->encoding) {
- encoding = (char *)PyMem_MALLOC(strlen(tok->encoding) + 1);
- if (encoding)
- strcpy(encoding, tok->encoding);
- }
- PyTokenizer_Free(tok);
- return encoding;
+ struct tok_state *tok;
+ FILE *fp;
+ char *p_start =NULL , *p_end =NULL , *encoding = NULL;
+
+ fd = dup(fd);
+ if (fd < 0) {
+ return NULL;
+ }
+ fp = fdopen(fd, "r");
+ if (fp == NULL) {
+ return NULL;
+ }
+ tok = PyTokenizer_FromFile(fp, NULL, NULL, NULL);
+ if (tok == NULL) {
+ fclose(fp);
+ return NULL;
+ }
+ while (tok->lineno < 2 && tok->done == E_OK) {
+ PyTokenizer_Get(tok, &p_start, &p_end);
+ }
+ fclose(fp);
+ if (tok->encoding) {
+ encoding = (char *)PyMem_MALLOC(strlen(tok->encoding) + 1);
+ if (encoding)
+ strcpy(encoding, tok->encoding);
+ }
+ PyTokenizer_Free(tok);
+ return encoding;
}
#ifdef Py_DEBUG
@@ -1721,9 +1721,9 @@ PyTokenizer_FindEncoding(int fd)
void
tok_dump(int type, char *start, char *end)
{
- printf("%s", _PyParser_TokenNames[type]);
- if (type == NAME || type == NUMBER || type == STRING || type == OP)
- printf("(%.*s)", (int)(end - start), start);
+ printf("%s", _PyParser_TokenNames[type]);
+ if (type == NAME || type == NUMBER || type == STRING || type == OP)
+ printf("(%.*s)", (int)(end - start), start);
}
#endif
diff --git a/Parser/tokenizer.h b/Parser/tokenizer.h
index 1a81e33..c8e19c1 100644
--- a/Parser/tokenizer.h
+++ b/Parser/tokenizer.h
@@ -8,67 +8,67 @@ extern "C" {
/* Tokenizer interface */
-#include "token.h" /* For token types */
+#include "token.h" /* For token types */
-#define MAXINDENT 100 /* Max indentation level */
+#define MAXINDENT 100 /* Max indentation level */
enum decoding_state {
- STATE_INIT,
- STATE_RAW,
- STATE_NORMAL, /* have a codec associated with input */
+ STATE_INIT,
+ STATE_RAW,
+ STATE_NORMAL, /* have a codec associated with input */
};
/* Tokenizer state */
struct tok_state {
- /* Input state; buf <= cur <= inp <= end */
- /* NB an entire line is held in the buffer */
- char *buf; /* Input buffer, or NULL; malloc'ed if fp != NULL */
- char *cur; /* Next character in buffer */
- char *inp; /* End of data in buffer */
- char *end; /* End of input buffer if buf != NULL */
- char *start; /* Start of current token if not NULL */
- int done; /* E_OK normally, E_EOF at EOF, otherwise error code */
- /* NB If done != E_OK, cur must be == inp!!! */
- FILE *fp; /* Rest of input; NULL if tokenizing a string */
- int tabsize; /* Tab spacing */
- int indent; /* Current indentation index */
- int indstack[MAXINDENT]; /* Stack of indents */
- int atbol; /* Nonzero if at begin of new line */
- int pendin; /* Pending indents (if > 0) or dedents (if < 0) */
- char *prompt, *nextprompt; /* For interactive prompting */
- int lineno; /* Current line number */
- int level; /* () [] {} Parentheses nesting level */
- /* Used to allow free continuations inside them */
- /* Stuff for checking on different tab sizes */
- const char *filename; /* For error messages */
- int altwarning; /* Issue warning if alternate tabs don't match */
- int alterror; /* Issue error if alternate tabs don't match */
- int alttabsize; /* Alternate tab spacing */
- int altindstack[MAXINDENT]; /* Stack of alternate indents */
- /* Stuff for PEP 0263 */
- enum decoding_state decoding_state;
- int decoding_erred; /* whether erred in decoding */
- int read_coding_spec; /* whether 'coding:...' has been read */
- char *encoding; /* Source encoding. */
- int cont_line; /* whether we are in a continuation line. */
- const char* line_start; /* pointer to start of current line */
+ /* Input state; buf <= cur <= inp <= end */
+ /* NB an entire line is held in the buffer */
+ char *buf; /* Input buffer, or NULL; malloc'ed if fp != NULL */
+ char *cur; /* Next character in buffer */
+ char *inp; /* End of data in buffer */
+ char *end; /* End of input buffer if buf != NULL */
+ char *start; /* Start of current token if not NULL */
+ int done; /* E_OK normally, E_EOF at EOF, otherwise error code */
+ /* NB If done != E_OK, cur must be == inp!!! */
+ FILE *fp; /* Rest of input; NULL if tokenizing a string */
+ int tabsize; /* Tab spacing */
+ int indent; /* Current indentation index */
+ int indstack[MAXINDENT]; /* Stack of indents */
+ int atbol; /* Nonzero if at begin of new line */
+ int pendin; /* Pending indents (if > 0) or dedents (if < 0) */
+ char *prompt, *nextprompt; /* For interactive prompting */
+ int lineno; /* Current line number */
+ int level; /* () [] {} Parentheses nesting level */
+ /* Used to allow free continuations inside them */
+ /* Stuff for checking on different tab sizes */
+ const char *filename; /* For error messages */
+ int altwarning; /* Issue warning if alternate tabs don't match */
+ int alterror; /* Issue error if alternate tabs don't match */
+ int alttabsize; /* Alternate tab spacing */
+ int altindstack[MAXINDENT]; /* Stack of alternate indents */
+ /* Stuff for PEP 0263 */
+ enum decoding_state decoding_state;
+ int decoding_erred; /* whether erred in decoding */
+ int read_coding_spec; /* whether 'coding:...' has been read */
+ char *encoding; /* Source encoding. */
+ int cont_line; /* whether we are in a continuation line. */
+ const char* line_start; /* pointer to start of current line */
#ifndef PGEN
- PyObject *decoding_readline; /* codecs.open(...).readline */
- PyObject *decoding_buffer;
+ PyObject *decoding_readline; /* codecs.open(...).readline */
+ PyObject *decoding_buffer;
#endif
- const char* enc; /* Encoding for the current str. */
- const char* str;
- const char* input; /* Tokenizer's newline translated copy of the string. */
+ const char* enc; /* Encoding for the current str. */
+ const char* str;
+ const char* input; /* Tokenizer's newline translated copy of the string. */
};
extern struct tok_state *PyTokenizer_FromString(const char *, int);
extern struct tok_state *PyTokenizer_FromUTF8(const char *, int);
extern struct tok_state *PyTokenizer_FromFile(FILE *, char*,
- char *, char *);
+ char *, char *);
extern void PyTokenizer_Free(struct tok_state *);
extern int PyTokenizer_Get(struct tok_state *, char **, char **);
-extern char * PyTokenizer_RestoreEncoding(struct tok_state* tok,
- int len, int *offset);
+extern char * PyTokenizer_RestoreEncoding(struct tok_state* tok,
+ int len, int *offset);
extern char * PyTokenizer_FindEncoding(int);
#ifdef __cplusplus