diff options
Diffstat (limited to 'Parser')
-rw-r--r-- | Parser/acceler.c | 168 | ||||
-rw-r--r-- | Parser/bitset.c | 58 | ||||
-rw-r--r-- | Parser/firstsets.c | 180 | ||||
-rw-r--r-- | Parser/grammar.c | 380 | ||||
-rw-r--r-- | Parser/grammar1.c | 70 | ||||
-rw-r--r-- | Parser/intrcheck.c | 70 | ||||
-rw-r--r-- | Parser/listnode.c | 78 | ||||
-rw-r--r-- | Parser/metagrammar.c | 170 | ||||
-rw-r--r-- | Parser/myreadline.c | 240 | ||||
-rw-r--r-- | Parser/node.c | 122 | ||||
-rw-r--r-- | Parser/parser.c | 540 | ||||
-rw-r--r-- | Parser/parsetok.c | 348 | ||||
-rw-r--r-- | Parser/pgen.c | 964 | ||||
-rw-r--r-- | Parser/pgenmain.c | 194 | ||||
-rw-r--r-- | Parser/printgrammar.c | 152 | ||||
-rw-r--r-- | Parser/tokenizer.c | 2676 | ||||
-rw-r--r-- | Parser/tokenizer.h | 88 |
17 files changed, 3249 insertions, 3249 deletions
diff --git a/Parser/acceler.c b/Parser/acceler.c index b41b265..9b14263 100644 --- a/Parser/acceler.c +++ b/Parser/acceler.c @@ -23,103 +23,103 @@ static void fixstate(grammar *, state *); void PyGrammar_AddAccelerators(grammar *g) { - dfa *d; - int i; - d = g->g_dfa; - for (i = g->g_ndfas; --i >= 0; d++) - fixdfa(g, d); - g->g_accel = 1; + dfa *d; + int i; + d = g->g_dfa; + for (i = g->g_ndfas; --i >= 0; d++) + fixdfa(g, d); + g->g_accel = 1; } void PyGrammar_RemoveAccelerators(grammar *g) { - dfa *d; - int i; - g->g_accel = 0; - d = g->g_dfa; - for (i = g->g_ndfas; --i >= 0; d++) { - state *s; - int j; - s = d->d_state; - for (j = 0; j < d->d_nstates; j++, s++) { - if (s->s_accel) - PyObject_FREE(s->s_accel); - s->s_accel = NULL; - } - } + dfa *d; + int i; + g->g_accel = 0; + d = g->g_dfa; + for (i = g->g_ndfas; --i >= 0; d++) { + state *s; + int j; + s = d->d_state; + for (j = 0; j < d->d_nstates; j++, s++) { + if (s->s_accel) + PyObject_FREE(s->s_accel); + s->s_accel = NULL; + } + } } static void fixdfa(grammar *g, dfa *d) { - state *s; - int j; - s = d->d_state; - for (j = 0; j < d->d_nstates; j++, s++) - fixstate(g, s); + state *s; + int j; + s = d->d_state; + for (j = 0; j < d->d_nstates; j++, s++) + fixstate(g, s); } static void fixstate(grammar *g, state *s) { - arc *a; - int k; - int *accel; - int nl = g->g_ll.ll_nlabels; - s->s_accept = 0; - accel = (int *) PyObject_MALLOC(nl * sizeof(int)); - if (accel == NULL) { - fprintf(stderr, "no mem to build parser accelerators\n"); - exit(1); - } - for (k = 0; k < nl; k++) - accel[k] = -1; - a = s->s_arc; - for (k = s->s_narcs; --k >= 0; a++) { - int lbl = a->a_lbl; - label *l = &g->g_ll.ll_label[lbl]; - int type = l->lb_type; - if (a->a_arrow >= (1 << 7)) { - printf("XXX too many states!\n"); - continue; - } - if (ISNONTERMINAL(type)) { - dfa *d1 = PyGrammar_FindDFA(g, type); - int ibit; - if (type - NT_OFFSET >= (1 << 7)) { - printf("XXX too high nonterminal number!\n"); - continue; - } - for (ibit = 0; ibit < g->g_ll.ll_nlabels; ibit++) { - if (testbit(d1->d_first, ibit)) { - if (accel[ibit] != -1) - printf("XXX ambiguity!\n"); - accel[ibit] = a->a_arrow | (1 << 7) | - ((type - NT_OFFSET) << 8); - } - } - } - else if (lbl == EMPTY) - s->s_accept = 1; - else if (lbl >= 0 && lbl < nl) - accel[lbl] = a->a_arrow; - } - while (nl > 0 && accel[nl-1] == -1) - nl--; - for (k = 0; k < nl && accel[k] == -1;) - k++; - if (k < nl) { - int i; - s->s_accel = (int *) PyObject_MALLOC((nl-k) * sizeof(int)); - if (s->s_accel == NULL) { - fprintf(stderr, "no mem to add parser accelerators\n"); - exit(1); - } - s->s_lower = k; - s->s_upper = nl; - for (i = 0; k < nl; i++, k++) - s->s_accel[i] = accel[k]; - } - PyObject_FREE(accel); + arc *a; + int k; + int *accel; + int nl = g->g_ll.ll_nlabels; + s->s_accept = 0; + accel = (int *) PyObject_MALLOC(nl * sizeof(int)); + if (accel == NULL) { + fprintf(stderr, "no mem to build parser accelerators\n"); + exit(1); + } + for (k = 0; k < nl; k++) + accel[k] = -1; + a = s->s_arc; + for (k = s->s_narcs; --k >= 0; a++) { + int lbl = a->a_lbl; + label *l = &g->g_ll.ll_label[lbl]; + int type = l->lb_type; + if (a->a_arrow >= (1 << 7)) { + printf("XXX too many states!\n"); + continue; + } + if (ISNONTERMINAL(type)) { + dfa *d1 = PyGrammar_FindDFA(g, type); + int ibit; + if (type - NT_OFFSET >= (1 << 7)) { + printf("XXX too high nonterminal number!\n"); + continue; + } + for (ibit = 0; ibit < g->g_ll.ll_nlabels; ibit++) { + if (testbit(d1->d_first, ibit)) { + if (accel[ibit] != -1) + printf("XXX ambiguity!\n"); + accel[ibit] = a->a_arrow | (1 << 7) | + ((type - NT_OFFSET) << 8); + } + } + } + else if (lbl == EMPTY) + s->s_accept = 1; + else if (lbl >= 0 && lbl < nl) + accel[lbl] = a->a_arrow; + } + while (nl > 0 && accel[nl-1] == -1) + nl--; + for (k = 0; k < nl && accel[k] == -1;) + k++; + if (k < nl) { + int i; + s->s_accel = (int *) PyObject_MALLOC((nl-k) * sizeof(int)); + if (s->s_accel == NULL) { + fprintf(stderr, "no mem to add parser accelerators\n"); + exit(1); + } + s->s_lower = k; + s->s_upper = nl; + for (i = 0; k < nl; i++, k++) + s->s_accel[i] = accel[k]; + } + PyObject_FREE(accel); } diff --git a/Parser/bitset.c b/Parser/bitset.c index b5543b8..f5bfd41 100644 --- a/Parser/bitset.c +++ b/Parser/bitset.c @@ -7,60 +7,60 @@ bitset newbitset(int nbits) { - int nbytes = NBYTES(nbits); - bitset ss = (char *)PyObject_MALLOC(sizeof(BYTE) * nbytes); - - if (ss == NULL) - Py_FatalError("no mem for bitset"); - - ss += nbytes; - while (--nbytes >= 0) - *--ss = 0; - return ss; + int nbytes = NBYTES(nbits); + bitset ss = (char *)PyObject_MALLOC(sizeof(BYTE) * nbytes); + + if (ss == NULL) + Py_FatalError("no mem for bitset"); + + ss += nbytes; + while (--nbytes >= 0) + *--ss = 0; + return ss; } void delbitset(bitset ss) { - PyObject_FREE(ss); + PyObject_FREE(ss); } int addbit(bitset ss, int ibit) { - int ibyte = BIT2BYTE(ibit); - BYTE mask = BIT2MASK(ibit); - - if (ss[ibyte] & mask) - return 0; /* Bit already set */ - ss[ibyte] |= mask; - return 1; + int ibyte = BIT2BYTE(ibit); + BYTE mask = BIT2MASK(ibit); + + if (ss[ibyte] & mask) + return 0; /* Bit already set */ + ss[ibyte] |= mask; + return 1; } #if 0 /* Now a macro */ int testbit(bitset ss, int ibit) { - return (ss[BIT2BYTE(ibit)] & BIT2MASK(ibit)) != 0; + return (ss[BIT2BYTE(ibit)] & BIT2MASK(ibit)) != 0; } #endif int samebitset(bitset ss1, bitset ss2, int nbits) { - int i; - - for (i = NBYTES(nbits); --i >= 0; ) - if (*ss1++ != *ss2++) - return 0; - return 1; + int i; + + for (i = NBYTES(nbits); --i >= 0; ) + if (*ss1++ != *ss2++) + return 0; + return 1; } void mergebitset(bitset ss1, bitset ss2, int nbits) { - int i; - - for (i = NBYTES(nbits); --i >= 0; ) - *ss1++ |= *ss2++; + int i; + + for (i = NBYTES(nbits); --i >= 0; ) + *ss1++ |= *ss2++; } diff --git a/Parser/firstsets.c b/Parser/firstsets.c index 00467b3..ee75d1b 100644 --- a/Parser/firstsets.c +++ b/Parser/firstsets.c @@ -13,101 +13,101 @@ static void calcfirstset(grammar *, dfa *); void addfirstsets(grammar *g) { - int i; - dfa *d; + int i; + dfa *d; - if (Py_DebugFlag) - printf("Adding FIRST sets ...\n"); - for (i = 0; i < g->g_ndfas; i++) { - d = &g->g_dfa[i]; - if (d->d_first == NULL) - calcfirstset(g, d); - } + if (Py_DebugFlag) + printf("Adding FIRST sets ...\n"); + for (i = 0; i < g->g_ndfas; i++) { + d = &g->g_dfa[i]; + if (d->d_first == NULL) + calcfirstset(g, d); + } } static void calcfirstset(grammar *g, dfa *d) { - int i, j; - state *s; - arc *a; - int nsyms; - int *sym; - int nbits; - static bitset dummy; - bitset result; - int type; - dfa *d1; - label *l0; - - if (Py_DebugFlag) - printf("Calculate FIRST set for '%s'\n", d->d_name); - - if (dummy == NULL) - dummy = newbitset(1); - if (d->d_first == dummy) { - fprintf(stderr, "Left-recursion for '%s'\n", d->d_name); - return; - } - if (d->d_first != NULL) { - fprintf(stderr, "Re-calculating FIRST set for '%s' ???\n", - d->d_name); - } - d->d_first = dummy; - - l0 = g->g_ll.ll_label; - nbits = g->g_ll.ll_nlabels; - result = newbitset(nbits); - - sym = (int *)PyObject_MALLOC(sizeof(int)); - if (sym == NULL) - Py_FatalError("no mem for new sym in calcfirstset"); - nsyms = 1; - sym[0] = findlabel(&g->g_ll, d->d_type, (char *)NULL); - - s = &d->d_state[d->d_initial]; - for (i = 0; i < s->s_narcs; i++) { - a = &s->s_arc[i]; - for (j = 0; j < nsyms; j++) { - if (sym[j] == a->a_lbl) - break; - } - if (j >= nsyms) { /* New label */ - sym = (int *)PyObject_REALLOC(sym, - sizeof(int) * (nsyms + 1)); - if (sym == NULL) - Py_FatalError( - "no mem to resize sym in calcfirstset"); - sym[nsyms++] = a->a_lbl; - type = l0[a->a_lbl].lb_type; - if (ISNONTERMINAL(type)) { - d1 = PyGrammar_FindDFA(g, type); - if (d1->d_first == dummy) { - fprintf(stderr, - "Left-recursion below '%s'\n", - d->d_name); - } - else { - if (d1->d_first == NULL) - calcfirstset(g, d1); - mergebitset(result, - d1->d_first, nbits); - } - } - else if (ISTERMINAL(type)) { - addbit(result, a->a_lbl); - } - } - } - d->d_first = result; - if (Py_DebugFlag) { - printf("FIRST set for '%s': {", d->d_name); - for (i = 0; i < nbits; i++) { - if (testbit(result, i)) - printf(" %s", PyGrammar_LabelRepr(&l0[i])); - } - printf(" }\n"); - } + int i, j; + state *s; + arc *a; + int nsyms; + int *sym; + int nbits; + static bitset dummy; + bitset result; + int type; + dfa *d1; + label *l0; - PyObject_FREE(sym); + if (Py_DebugFlag) + printf("Calculate FIRST set for '%s'\n", d->d_name); + + if (dummy == NULL) + dummy = newbitset(1); + if (d->d_first == dummy) { + fprintf(stderr, "Left-recursion for '%s'\n", d->d_name); + return; + } + if (d->d_first != NULL) { + fprintf(stderr, "Re-calculating FIRST set for '%s' ???\n", + d->d_name); + } + d->d_first = dummy; + + l0 = g->g_ll.ll_label; + nbits = g->g_ll.ll_nlabels; + result = newbitset(nbits); + + sym = (int *)PyObject_MALLOC(sizeof(int)); + if (sym == NULL) + Py_FatalError("no mem for new sym in calcfirstset"); + nsyms = 1; + sym[0] = findlabel(&g->g_ll, d->d_type, (char *)NULL); + + s = &d->d_state[d->d_initial]; + for (i = 0; i < s->s_narcs; i++) { + a = &s->s_arc[i]; + for (j = 0; j < nsyms; j++) { + if (sym[j] == a->a_lbl) + break; + } + if (j >= nsyms) { /* New label */ + sym = (int *)PyObject_REALLOC(sym, + sizeof(int) * (nsyms + 1)); + if (sym == NULL) + Py_FatalError( + "no mem to resize sym in calcfirstset"); + sym[nsyms++] = a->a_lbl; + type = l0[a->a_lbl].lb_type; + if (ISNONTERMINAL(type)) { + d1 = PyGrammar_FindDFA(g, type); + if (d1->d_first == dummy) { + fprintf(stderr, + "Left-recursion below '%s'\n", + d->d_name); + } + else { + if (d1->d_first == NULL) + calcfirstset(g, d1); + mergebitset(result, + d1->d_first, nbits); + } + } + else if (ISTERMINAL(type)) { + addbit(result, a->a_lbl); + } + } + } + d->d_first = result; + if (Py_DebugFlag) { + printf("FIRST set for '%s': {", d->d_name); + for (i = 0; i < nbits; i++) { + if (testbit(result, i)) + printf(" %s", PyGrammar_LabelRepr(&l0[i])); + } + printf(" }\n"); + } + + PyObject_FREE(sym); } diff --git a/Parser/grammar.c b/Parser/grammar.c index fa27300..f2a25ca 100644 --- a/Parser/grammar.c +++ b/Parser/grammar.c @@ -14,98 +14,98 @@ extern int Py_DebugFlag; grammar * newgrammar(int start) { - grammar *g; - - g = (grammar *)PyObject_MALLOC(sizeof(grammar)); - if (g == NULL) - Py_FatalError("no mem for new grammar"); - g->g_ndfas = 0; - g->g_dfa = NULL; - g->g_start = start; - g->g_ll.ll_nlabels = 0; - g->g_ll.ll_label = NULL; - g->g_accel = 0; - return g; + grammar *g; + + g = (grammar *)PyObject_MALLOC(sizeof(grammar)); + if (g == NULL) + Py_FatalError("no mem for new grammar"); + g->g_ndfas = 0; + g->g_dfa = NULL; + g->g_start = start; + g->g_ll.ll_nlabels = 0; + g->g_ll.ll_label = NULL; + g->g_accel = 0; + return g; } dfa * adddfa(grammar *g, int type, char *name) { - dfa *d; - - g->g_dfa = (dfa *)PyObject_REALLOC(g->g_dfa, - sizeof(dfa) * (g->g_ndfas + 1)); - if (g->g_dfa == NULL) - Py_FatalError("no mem to resize dfa in adddfa"); - d = &g->g_dfa[g->g_ndfas++]; - d->d_type = type; - d->d_name = strdup(name); - d->d_nstates = 0; - d->d_state = NULL; - d->d_initial = -1; - d->d_first = NULL; - return d; /* Only use while fresh! */ + dfa *d; + + g->g_dfa = (dfa *)PyObject_REALLOC(g->g_dfa, + sizeof(dfa) * (g->g_ndfas + 1)); + if (g->g_dfa == NULL) + Py_FatalError("no mem to resize dfa in adddfa"); + d = &g->g_dfa[g->g_ndfas++]; + d->d_type = type; + d->d_name = strdup(name); + d->d_nstates = 0; + d->d_state = NULL; + d->d_initial = -1; + d->d_first = NULL; + return d; /* Only use while fresh! */ } int addstate(dfa *d) { - state *s; - - d->d_state = (state *)PyObject_REALLOC(d->d_state, - sizeof(state) * (d->d_nstates + 1)); - if (d->d_state == NULL) - Py_FatalError("no mem to resize state in addstate"); - s = &d->d_state[d->d_nstates++]; - s->s_narcs = 0; - s->s_arc = NULL; - s->s_lower = 0; - s->s_upper = 0; - s->s_accel = NULL; - s->s_accept = 0; - return s - d->d_state; + state *s; + + d->d_state = (state *)PyObject_REALLOC(d->d_state, + sizeof(state) * (d->d_nstates + 1)); + if (d->d_state == NULL) + Py_FatalError("no mem to resize state in addstate"); + s = &d->d_state[d->d_nstates++]; + s->s_narcs = 0; + s->s_arc = NULL; + s->s_lower = 0; + s->s_upper = 0; + s->s_accel = NULL; + s->s_accept = 0; + return s - d->d_state; } void addarc(dfa *d, int from, int to, int lbl) { - state *s; - arc *a; - - assert(0 <= from && from < d->d_nstates); - assert(0 <= to && to < d->d_nstates); - - s = &d->d_state[from]; - s->s_arc = (arc *)PyObject_REALLOC(s->s_arc, sizeof(arc) * (s->s_narcs + 1)); - if (s->s_arc == NULL) - Py_FatalError("no mem to resize arc list in addarc"); - a = &s->s_arc[s->s_narcs++]; - a->a_lbl = lbl; - a->a_arrow = to; + state *s; + arc *a; + + assert(0 <= from && from < d->d_nstates); + assert(0 <= to && to < d->d_nstates); + + s = &d->d_state[from]; + s->s_arc = (arc *)PyObject_REALLOC(s->s_arc, sizeof(arc) * (s->s_narcs + 1)); + if (s->s_arc == NULL) + Py_FatalError("no mem to resize arc list in addarc"); + a = &s->s_arc[s->s_narcs++]; + a->a_lbl = lbl; + a->a_arrow = to; } int addlabel(labellist *ll, int type, char *str) { - int i; - label *lb; - - for (i = 0; i < ll->ll_nlabels; i++) { - if (ll->ll_label[i].lb_type == type && - strcmp(ll->ll_label[i].lb_str, str) == 0) - return i; - } - ll->ll_label = (label *)PyObject_REALLOC(ll->ll_label, - sizeof(label) * (ll->ll_nlabels + 1)); - if (ll->ll_label == NULL) - Py_FatalError("no mem to resize labellist in addlabel"); - lb = &ll->ll_label[ll->ll_nlabels++]; - lb->lb_type = type; - lb->lb_str = strdup(str); - if (Py_DebugFlag) - printf("Label @ %8p, %d: %s\n", ll, ll->ll_nlabels, - PyGrammar_LabelRepr(lb)); - return lb - ll->ll_label; + int i; + label *lb; + + for (i = 0; i < ll->ll_nlabels; i++) { + if (ll->ll_label[i].lb_type == type && + strcmp(ll->ll_label[i].lb_str, str) == 0) + return i; + } + ll->ll_label = (label *)PyObject_REALLOC(ll->ll_label, + sizeof(label) * (ll->ll_nlabels + 1)); + if (ll->ll_label == NULL) + Py_FatalError("no mem to resize labellist in addlabel"); + lb = &ll->ll_label[ll->ll_nlabels++]; + lb->lb_type = type; + lb->lb_str = strdup(str); + if (Py_DebugFlag) + printf("Label @ %8p, %d: %s\n", ll, ll->ll_nlabels, + PyGrammar_LabelRepr(lb)); + return lb - ll->ll_label; } /* Same, but rather dies than adds */ @@ -113,16 +113,16 @@ addlabel(labellist *ll, int type, char *str) int findlabel(labellist *ll, int type, char *str) { - int i; - - for (i = 0; i < ll->ll_nlabels; i++) { - if (ll->ll_label[i].lb_type == type /*&& - strcmp(ll->ll_label[i].lb_str, str) == 0*/) - return i; - } - fprintf(stderr, "Label %d/'%s' not found\n", type, str); - Py_FatalError("grammar.c:findlabel()"); - return 0; /* Make gcc -Wall happy */ + int i; + + for (i = 0; i < ll->ll_nlabels; i++) { + if (ll->ll_label[i].lb_type == type /*&& + strcmp(ll->ll_label[i].lb_str, str) == 0*/) + return i; + } + fprintf(stderr, "Label %d/'%s' not found\n", type, str); + Py_FatalError("grammar.c:findlabel()"); + return 0; /* Make gcc -Wall happy */ } /* Forward */ @@ -131,120 +131,120 @@ static void translabel(grammar *, label *); void translatelabels(grammar *g) { - int i; + int i; #ifdef Py_DEBUG - printf("Translating labels ...\n"); + printf("Translating labels ...\n"); #endif - /* Don't translate EMPTY */ - for (i = EMPTY+1; i < g->g_ll.ll_nlabels; i++) - translabel(g, &g->g_ll.ll_label[i]); + /* Don't translate EMPTY */ + for (i = EMPTY+1; i < g->g_ll.ll_nlabels; i++) + translabel(g, &g->g_ll.ll_label[i]); } static void translabel(grammar *g, label *lb) { - int i; - - if (Py_DebugFlag) - printf("Translating label %s ...\n", PyGrammar_LabelRepr(lb)); - - if (lb->lb_type == NAME) { - for (i = 0; i < g->g_ndfas; i++) { - if (strcmp(lb->lb_str, g->g_dfa[i].d_name) == 0) { - if (Py_DebugFlag) - printf( - "Label %s is non-terminal %d.\n", - lb->lb_str, - g->g_dfa[i].d_type); - lb->lb_type = g->g_dfa[i].d_type; - free(lb->lb_str); - lb->lb_str = NULL; - return; - } - } - for (i = 0; i < (int)N_TOKENS; i++) { - if (strcmp(lb->lb_str, _PyParser_TokenNames[i]) == 0) { - if (Py_DebugFlag) - printf("Label %s is terminal %d.\n", - lb->lb_str, i); - lb->lb_type = i; - free(lb->lb_str); - lb->lb_str = NULL; - return; - } - } - printf("Can't translate NAME label '%s'\n", lb->lb_str); - return; - } - - if (lb->lb_type == STRING) { - if (isalpha(Py_CHARMASK(lb->lb_str[1])) || - lb->lb_str[1] == '_') { - char *p; - char *src; - char *dest; - size_t name_len; - if (Py_DebugFlag) - printf("Label %s is a keyword\n", lb->lb_str); - lb->lb_type = NAME; - src = lb->lb_str + 1; - p = strchr(src, '\''); - if (p) - name_len = p - src; - else - name_len = strlen(src); - dest = (char *)malloc(name_len + 1); - if (!dest) { - printf("Can't alloc dest '%s'\n", src); - return; - } - strncpy(dest, src, name_len); - dest[name_len] = '\0'; - free(lb->lb_str); - lb->lb_str = dest; - } - else if (lb->lb_str[2] == lb->lb_str[0]) { - int type = (int) PyToken_OneChar(lb->lb_str[1]); - if (type != OP) { - lb->lb_type = type; - free(lb->lb_str); - lb->lb_str = NULL; - } - else - printf("Unknown OP label %s\n", - lb->lb_str); - } - else if (lb->lb_str[2] && lb->lb_str[3] == lb->lb_str[0]) { - int type = (int) PyToken_TwoChars(lb->lb_str[1], - lb->lb_str[2]); - if (type != OP) { - lb->lb_type = type; - free(lb->lb_str); - lb->lb_str = NULL; - } - else - printf("Unknown OP label %s\n", - lb->lb_str); - } - else if (lb->lb_str[2] && lb->lb_str[3] && lb->lb_str[4] == lb->lb_str[0]) { - int type = (int) PyToken_ThreeChars(lb->lb_str[1], - lb->lb_str[2], - lb->lb_str[3]); - if (type != OP) { - lb->lb_type = type; - free(lb->lb_str); - lb->lb_str = NULL; - } - else - printf("Unknown OP label %s\n", - lb->lb_str); - } - else - printf("Can't translate STRING label %s\n", - lb->lb_str); - } - else - printf("Can't translate label '%s'\n", - PyGrammar_LabelRepr(lb)); + int i; + + if (Py_DebugFlag) + printf("Translating label %s ...\n", PyGrammar_LabelRepr(lb)); + + if (lb->lb_type == NAME) { + for (i = 0; i < g->g_ndfas; i++) { + if (strcmp(lb->lb_str, g->g_dfa[i].d_name) == 0) { + if (Py_DebugFlag) + printf( + "Label %s is non-terminal %d.\n", + lb->lb_str, + g->g_dfa[i].d_type); + lb->lb_type = g->g_dfa[i].d_type; + free(lb->lb_str); + lb->lb_str = NULL; + return; + } + } + for (i = 0; i < (int)N_TOKENS; i++) { + if (strcmp(lb->lb_str, _PyParser_TokenNames[i]) == 0) { + if (Py_DebugFlag) + printf("Label %s is terminal %d.\n", + lb->lb_str, i); + lb->lb_type = i; + free(lb->lb_str); + lb->lb_str = NULL; + return; + } + } + printf("Can't translate NAME label '%s'\n", lb->lb_str); + return; + } + + if (lb->lb_type == STRING) { + if (isalpha(Py_CHARMASK(lb->lb_str[1])) || + lb->lb_str[1] == '_') { + char *p; + char *src; + char *dest; + size_t name_len; + if (Py_DebugFlag) + printf("Label %s is a keyword\n", lb->lb_str); + lb->lb_type = NAME; + src = lb->lb_str + 1; + p = strchr(src, '\''); + if (p) + name_len = p - src; + else + name_len = strlen(src); + dest = (char *)malloc(name_len + 1); + if (!dest) { + printf("Can't alloc dest '%s'\n", src); + return; + } + strncpy(dest, src, name_len); + dest[name_len] = '\0'; + free(lb->lb_str); + lb->lb_str = dest; + } + else if (lb->lb_str[2] == lb->lb_str[0]) { + int type = (int) PyToken_OneChar(lb->lb_str[1]); + if (type != OP) { + lb->lb_type = type; + free(lb->lb_str); + lb->lb_str = NULL; + } + else + printf("Unknown OP label %s\n", + lb->lb_str); + } + else if (lb->lb_str[2] && lb->lb_str[3] == lb->lb_str[0]) { + int type = (int) PyToken_TwoChars(lb->lb_str[1], + lb->lb_str[2]); + if (type != OP) { + lb->lb_type = type; + free(lb->lb_str); + lb->lb_str = NULL; + } + else + printf("Unknown OP label %s\n", + lb->lb_str); + } + else if (lb->lb_str[2] && lb->lb_str[3] && lb->lb_str[4] == lb->lb_str[0]) { + int type = (int) PyToken_ThreeChars(lb->lb_str[1], + lb->lb_str[2], + lb->lb_str[3]); + if (type != OP) { + lb->lb_type = type; + free(lb->lb_str); + lb->lb_str = NULL; + } + else + printf("Unknown OP label %s\n", + lb->lb_str); + } + else + printf("Can't translate STRING label %s\n", + lb->lb_str); + } + else + printf("Can't translate label '%s'\n", + PyGrammar_LabelRepr(lb)); } diff --git a/Parser/grammar1.c b/Parser/grammar1.c index b76719a..1f7d264 100644 --- a/Parser/grammar1.c +++ b/Parser/grammar1.c @@ -11,47 +11,47 @@ dfa * PyGrammar_FindDFA(grammar *g, register int type) { - register dfa *d; + register dfa *d; #if 1 - /* Massive speed-up */ - d = &g->g_dfa[type - NT_OFFSET]; - assert(d->d_type == type); - return d; + /* Massive speed-up */ + d = &g->g_dfa[type - NT_OFFSET]; + assert(d->d_type == type); + return d; #else - /* Old, slow version */ - register int i; - - for (i = g->g_ndfas, d = g->g_dfa; --i >= 0; d++) { - if (d->d_type == type) - return d; - } - assert(0); - /* NOTREACHED */ + /* Old, slow version */ + register int i; + + for (i = g->g_ndfas, d = g->g_dfa; --i >= 0; d++) { + if (d->d_type == type) + return d; + } + assert(0); + /* NOTREACHED */ #endif } char * PyGrammar_LabelRepr(label *lb) { - static char buf[100]; - - if (lb->lb_type == ENDMARKER) - return "EMPTY"; - else if (ISNONTERMINAL(lb->lb_type)) { - if (lb->lb_str == NULL) { - PyOS_snprintf(buf, sizeof(buf), "NT%d", lb->lb_type); - return buf; - } - else - return lb->lb_str; - } - else { - if (lb->lb_str == NULL) - return _PyParser_TokenNames[lb->lb_type]; - else { - PyOS_snprintf(buf, sizeof(buf), "%.32s(%.32s)", - _PyParser_TokenNames[lb->lb_type], lb->lb_str); - return buf; - } - } + static char buf[100]; + + if (lb->lb_type == ENDMARKER) + return "EMPTY"; + else if (ISNONTERMINAL(lb->lb_type)) { + if (lb->lb_str == NULL) { + PyOS_snprintf(buf, sizeof(buf), "NT%d", lb->lb_type); + return buf; + } + else + return lb->lb_str; + } + else { + if (lb->lb_str == NULL) + return _PyParser_TokenNames[lb->lb_type]; + else { + PyOS_snprintf(buf, sizeof(buf), "%.32s(%.32s)", + _PyParser_TokenNames[lb->lb_type], lb->lb_str); + return buf; + } + } } diff --git a/Parser/intrcheck.c b/Parser/intrcheck.c index 06b5840..4439864 100644 --- a/Parser/intrcheck.c +++ b/Parser/intrcheck.c @@ -21,7 +21,7 @@ PyOS_FiniInterrupts(void) int PyOS_InterruptOccurred(void) { - _wyield(); + _wyield(); } #define OK @@ -47,7 +47,7 @@ PyOS_InterruptOccurred(void) void PyOS_InitInterrupts(void) { - _go32_want_ctrl_break(1 /* TRUE */); + _go32_want_ctrl_break(1 /* TRUE */); } void @@ -58,7 +58,7 @@ PyOS_FiniInterrupts(void) int PyOS_InterruptOccurred(void) { - return _go32_was_ctrl_break_hit(); + return _go32_was_ctrl_break_hit(); } #else /* !__GNUC__ */ @@ -78,12 +78,12 @@ PyOS_FiniInterrupts(void) int PyOS_InterruptOccurred(void) { - int interrupted = 0; - while (kbhit()) { - if (getch() == '\003') - interrupted = 1; - } - return interrupted; + int interrupted = 0; + while (kbhit()) { + if (getch() == '\003') + interrupted = 1; + } + return interrupted; } #endif /* __GNUC__ */ @@ -106,7 +106,7 @@ static int interrupted; void PyErr_SetInterrupt(void) { - interrupted = 1; + interrupted = 1; } extern int PyErr_CheckSignals(void); @@ -114,28 +114,28 @@ extern int PyErr_CheckSignals(void); static int checksignals_witharg(void * arg) { - return PyErr_CheckSignals(); + return PyErr_CheckSignals(); } static void intcatcher(int sig) { - extern void Py_Exit(int); - static char message[] = + extern void Py_Exit(int); + static char message[] = "python: to interrupt a truly hanging Python program, interrupt once more.\n"; - switch (interrupted++) { - case 0: - break; - case 1: - write(2, message, strlen(message)); - break; - case 2: - interrupted = 0; - Py_Exit(1); - break; - } - PyOS_setsig(SIGINT, intcatcher); - Py_AddPendingCall(checksignals_witharg, NULL); + switch (interrupted++) { + case 0: + break; + case 1: + write(2, message, strlen(message)); + break; + case 2: + interrupted = 0; + Py_Exit(1); + break; + } + PyOS_setsig(SIGINT, intcatcher); + Py_AddPendingCall(checksignals_witharg, NULL); } static void (*old_siginthandler)(int) = SIG_DFL; @@ -143,23 +143,23 @@ static void (*old_siginthandler)(int) = SIG_DFL; void PyOS_InitInterrupts(void) { - if ((old_siginthandler = PyOS_setsig(SIGINT, SIG_IGN)) != SIG_IGN) - PyOS_setsig(SIGINT, intcatcher); + if ((old_siginthandler = PyOS_setsig(SIGINT, SIG_IGN)) != SIG_IGN) + PyOS_setsig(SIGINT, intcatcher); } void PyOS_FiniInterrupts(void) { - PyOS_setsig(SIGINT, old_siginthandler); + PyOS_setsig(SIGINT, old_siginthandler); } int PyOS_InterruptOccurred(void) { - if (!interrupted) - return 0; - interrupted = 0; - return 1; + if (!interrupted) + return 0; + interrupted = 0; + return 1; } #endif /* !OK */ @@ -168,7 +168,7 @@ void PyOS_AfterFork(void) { #ifdef WITH_THREAD - PyEval_ReInitThreads(); - PyThread_ReInitTLS(); + PyEval_ReInitThreads(); + PyThread_ReInitTLS(); #endif } diff --git a/Parser/listnode.c b/Parser/listnode.c index c0b3b66..b5f8ad2 100644 --- a/Parser/listnode.c +++ b/Parser/listnode.c @@ -12,7 +12,7 @@ static void listnode(FILE *, node *); void PyNode_ListTree(node *n) { - listnode(stdout, n); + listnode(stdout, n); } static int level, atbol; @@ -20,47 +20,47 @@ static int level, atbol; static void listnode(FILE *fp, node *n) { - level = 0; - atbol = 1; - list1node(fp, n); + level = 0; + atbol = 1; + list1node(fp, n); } static void list1node(FILE *fp, node *n) { - if (n == 0) - return; - if (ISNONTERMINAL(TYPE(n))) { - int i; - for (i = 0; i < NCH(n); i++) - list1node(fp, CHILD(n, i)); - } - else if (ISTERMINAL(TYPE(n))) { - switch (TYPE(n)) { - case INDENT: - ++level; - break; - case DEDENT: - --level; - break; - default: - if (atbol) { - int i; - for (i = 0; i < level; ++i) - fprintf(fp, "\t"); - atbol = 0; - } - if (TYPE(n) == NEWLINE) { - if (STR(n) != NULL) - fprintf(fp, "%s", STR(n)); - fprintf(fp, "\n"); - atbol = 1; - } - else - fprintf(fp, "%s ", STR(n)); - break; - } - } - else - fprintf(fp, "? "); + if (n == 0) + return; + if (ISNONTERMINAL(TYPE(n))) { + int i; + for (i = 0; i < NCH(n); i++) + list1node(fp, CHILD(n, i)); + } + else if (ISTERMINAL(TYPE(n))) { + switch (TYPE(n)) { + case INDENT: + ++level; + break; + case DEDENT: + --level; + break; + default: + if (atbol) { + int i; + for (i = 0; i < level; ++i) + fprintf(fp, "\t"); + atbol = 0; + } + if (TYPE(n) == NEWLINE) { + if (STR(n) != NULL) + fprintf(fp, "%s", STR(n)); + fprintf(fp, "\n"); + atbol = 1; + } + else + fprintf(fp, "%s ", STR(n)); + break; + } + } + else + fprintf(fp, "? "); } diff --git a/Parser/metagrammar.c b/Parser/metagrammar.c index b61bc6d..53810b8 100644 --- a/Parser/metagrammar.c +++ b/Parser/metagrammar.c @@ -4,152 +4,152 @@ #include "grammar.h" #include "pgen.h" static arc arcs_0_0[3] = { - {2, 0}, - {3, 0}, - {4, 1}, + {2, 0}, + {3, 0}, + {4, 1}, }; static arc arcs_0_1[1] = { - {0, 1}, + {0, 1}, }; static state states_0[2] = { - {3, arcs_0_0}, - {1, arcs_0_1}, + {3, arcs_0_0}, + {1, arcs_0_1}, }; static arc arcs_1_0[1] = { - {5, 1}, + {5, 1}, }; static arc arcs_1_1[1] = { - {6, 2}, + {6, 2}, }; static arc arcs_1_2[1] = { - {7, 3}, + {7, 3}, }; static arc arcs_1_3[1] = { - {3, 4}, + {3, 4}, }; static arc arcs_1_4[1] = { - {0, 4}, + {0, 4}, }; static state states_1[5] = { - {1, arcs_1_0}, - {1, arcs_1_1}, - {1, arcs_1_2}, - {1, arcs_1_3}, - {1, arcs_1_4}, + {1, arcs_1_0}, + {1, arcs_1_1}, + {1, arcs_1_2}, + {1, arcs_1_3}, + {1, arcs_1_4}, }; static arc arcs_2_0[1] = { - {8, 1}, + {8, 1}, }; static arc arcs_2_1[2] = { - {9, 0}, - {0, 1}, + {9, 0}, + {0, 1}, }; static state states_2[2] = { - {1, arcs_2_0}, - {2, arcs_2_1}, + {1, arcs_2_0}, + {2, arcs_2_1}, }; static arc arcs_3_0[1] = { - {10, 1}, + {10, 1}, }; static arc arcs_3_1[2] = { - {10, 1}, - {0, 1}, + {10, 1}, + {0, 1}, }; static state states_3[2] = { - {1, arcs_3_0}, - {2, arcs_3_1}, + {1, arcs_3_0}, + {2, arcs_3_1}, }; static arc arcs_4_0[2] = { - {11, 1}, - {13, 2}, + {11, 1}, + {13, 2}, }; static arc arcs_4_1[1] = { - {7, 3}, + {7, 3}, }; static arc arcs_4_2[3] = { - {14, 4}, - {15, 4}, - {0, 2}, + {14, 4}, + {15, 4}, + {0, 2}, }; static arc arcs_4_3[1] = { - {12, 4}, + {12, 4}, }; static arc arcs_4_4[1] = { - {0, 4}, + {0, 4}, }; static state states_4[5] = { - {2, arcs_4_0}, - {1, arcs_4_1}, - {3, arcs_4_2}, - {1, arcs_4_3}, - {1, arcs_4_4}, + {2, arcs_4_0}, + {1, arcs_4_1}, + {3, arcs_4_2}, + {1, arcs_4_3}, + {1, arcs_4_4}, }; static arc arcs_5_0[3] = { - {5, 1}, - {16, 1}, - {17, 2}, + {5, 1}, + {16, 1}, + {17, 2}, }; static arc arcs_5_1[1] = { - {0, 1}, + {0, 1}, }; static arc arcs_5_2[1] = { - {7, 3}, + {7, 3}, }; static arc arcs_5_3[1] = { - {18, 1}, + {18, 1}, }; static state states_5[4] = { - {3, arcs_5_0}, - {1, arcs_5_1}, - {1, arcs_5_2}, - {1, arcs_5_3}, + {3, arcs_5_0}, + {1, arcs_5_1}, + {1, arcs_5_2}, + {1, arcs_5_3}, }; static dfa dfas[6] = { - {256, "MSTART", 0, 2, states_0, - "\070\000\000"}, - {257, "RULE", 0, 5, states_1, - "\040\000\000"}, - {258, "RHS", 0, 2, states_2, - "\040\010\003"}, - {259, "ALT", 0, 2, states_3, - "\040\010\003"}, - {260, "ITEM", 0, 5, states_4, - "\040\010\003"}, - {261, "ATOM", 0, 4, states_5, - "\040\000\003"}, + {256, "MSTART", 0, 2, states_0, + "\070\000\000"}, + {257, "RULE", 0, 5, states_1, + "\040\000\000"}, + {258, "RHS", 0, 2, states_2, + "\040\010\003"}, + {259, "ALT", 0, 2, states_3, + "\040\010\003"}, + {260, "ITEM", 0, 5, states_4, + "\040\010\003"}, + {261, "ATOM", 0, 4, states_5, + "\040\000\003"}, }; static label labels[19] = { - {0, "EMPTY"}, - {256, 0}, - {257, 0}, - {4, 0}, - {0, 0}, - {1, 0}, - {11, 0}, - {258, 0}, - {259, 0}, - {18, 0}, - {260, 0}, - {9, 0}, - {10, 0}, - {261, 0}, - {16, 0}, - {14, 0}, - {3, 0}, - {7, 0}, - {8, 0}, + {0, "EMPTY"}, + {256, 0}, + {257, 0}, + {4, 0}, + {0, 0}, + {1, 0}, + {11, 0}, + {258, 0}, + {259, 0}, + {18, 0}, + {260, 0}, + {9, 0}, + {10, 0}, + {261, 0}, + {16, 0}, + {14, 0}, + {3, 0}, + {7, 0}, + {8, 0}, }; static grammar _PyParser_Grammar = { - 6, - dfas, - {19, labels}, - 256 + 6, + dfas, + {19, labels}, + 256 }; grammar * meta_grammar(void) { - return &_PyParser_Grammar; + return &_PyParser_Grammar; } grammar * diff --git a/Parser/myreadline.c b/Parser/myreadline.c index a144850..a62e208 100644 --- a/Parser/myreadline.c +++ b/Parser/myreadline.c @@ -35,67 +35,67 @@ int (*PyOS_InputHook)(void) = NULL; static int my_fgets(char *buf, int len, FILE *fp) { - char *p; - for (;;) { - if (PyOS_InputHook != NULL) - (void)(PyOS_InputHook)(); - errno = 0; - p = fgets(buf, len, fp); - if (p != NULL) - return 0; /* No error */ + char *p; + for (;;) { + if (PyOS_InputHook != NULL) + (void)(PyOS_InputHook)(); + errno = 0; + p = fgets(buf, len, fp); + if (p != NULL) + return 0; /* No error */ #ifdef MS_WINDOWS - /* In the case of a Ctrl+C or some other external event - interrupting the operation: - Win2k/NT: ERROR_OPERATION_ABORTED is the most recent Win32 - error code (and feof() returns TRUE). - Win9x: Ctrl+C seems to have no effect on fgets() returning - early - the signal handler is called, but the fgets() - only returns "normally" (ie, when Enter hit or feof()) - */ - if (GetLastError()==ERROR_OPERATION_ABORTED) { - /* Signals come asynchronously, so we sleep a brief - moment before checking if the handler has been - triggered (we cant just return 1 before the - signal handler has been called, as the later - signal may be treated as a separate interrupt). - */ - Sleep(1); - if (PyOS_InterruptOccurred()) { - return 1; /* Interrupt */ - } - /* Either the sleep wasn't long enough (need a - short loop retrying?) or not interrupted at all - (in which case we should revisit the whole thing!) - Logging some warning would be nice. assert is not - viable as under the debugger, the various dialogs - mean the condition is not true. - */ - } + /* In the case of a Ctrl+C or some other external event + interrupting the operation: + Win2k/NT: ERROR_OPERATION_ABORTED is the most recent Win32 + error code (and feof() returns TRUE). + Win9x: Ctrl+C seems to have no effect on fgets() returning + early - the signal handler is called, but the fgets() + only returns "normally" (ie, when Enter hit or feof()) + */ + if (GetLastError()==ERROR_OPERATION_ABORTED) { + /* Signals come asynchronously, so we sleep a brief + moment before checking if the handler has been + triggered (we cant just return 1 before the + signal handler has been called, as the later + signal may be treated as a separate interrupt). + */ + Sleep(1); + if (PyOS_InterruptOccurred()) { + return 1; /* Interrupt */ + } + /* Either the sleep wasn't long enough (need a + short loop retrying?) or not interrupted at all + (in which case we should revisit the whole thing!) + Logging some warning would be nice. assert is not + viable as under the debugger, the various dialogs + mean the condition is not true. + */ + } #endif /* MS_WINDOWS */ - if (feof(fp)) { - return -1; /* EOF */ - } + if (feof(fp)) { + return -1; /* EOF */ + } #ifdef EINTR - if (errno == EINTR) { - int s; + if (errno == EINTR) { + int s; #ifdef WITH_THREAD - PyEval_RestoreThread(_PyOS_ReadlineTState); + PyEval_RestoreThread(_PyOS_ReadlineTState); #endif - s = PyErr_CheckSignals(); + s = PyErr_CheckSignals(); #ifdef WITH_THREAD - PyEval_SaveThread(); + PyEval_SaveThread(); #endif - if (s < 0) { - return 1; - } - } + if (s < 0) { + return 1; + } + } #endif - if (PyOS_InterruptOccurred()) { - return 1; /* Interrupt */ - } - return -2; /* Error */ - } - /* NOTREACHED */ + if (PyOS_InterruptOccurred()) { + return 1; /* Interrupt */ + } + return -2; /* Error */ + } + /* NOTREACHED */ } @@ -104,41 +104,41 @@ my_fgets(char *buf, int len, FILE *fp) char * PyOS_StdioReadline(FILE *sys_stdin, FILE *sys_stdout, char *prompt) { - size_t n; - char *p; - n = 100; - if ((p = (char *)PyMem_MALLOC(n)) == NULL) - return NULL; - fflush(sys_stdout); - if (prompt) - fprintf(stderr, "%s", prompt); - fflush(stderr); - switch (my_fgets(p, (int)n, sys_stdin)) { - case 0: /* Normal case */ - break; - case 1: /* Interrupt */ - PyMem_FREE(p); - return NULL; - case -1: /* EOF */ - case -2: /* Error */ - default: /* Shouldn't happen */ - *p = '\0'; - break; - } - n = strlen(p); - while (n > 0 && p[n-1] != '\n') { - size_t incr = n+2; - p = (char *)PyMem_REALLOC(p, n + incr); - if (p == NULL) - return NULL; - if (incr > INT_MAX) { - PyErr_SetString(PyExc_OverflowError, "input line too long"); - } - if (my_fgets(p+n, (int)incr, sys_stdin) != 0) - break; - n += strlen(p+n); - } - return (char *)PyMem_REALLOC(p, n+1); + size_t n; + char *p; + n = 100; + if ((p = (char *)PyMem_MALLOC(n)) == NULL) + return NULL; + fflush(sys_stdout); + if (prompt) + fprintf(stderr, "%s", prompt); + fflush(stderr); + switch (my_fgets(p, (int)n, sys_stdin)) { + case 0: /* Normal case */ + break; + case 1: /* Interrupt */ + PyMem_FREE(p); + return NULL; + case -1: /* EOF */ + case -2: /* Error */ + default: /* Shouldn't happen */ + *p = '\0'; + break; + } + n = strlen(p); + while (n > 0 && p[n-1] != '\n') { + size_t incr = n+2; + p = (char *)PyMem_REALLOC(p, n + incr); + if (p == NULL) + return NULL; + if (incr > INT_MAX) { + PyErr_SetString(PyExc_OverflowError, "input line too long"); + } + if (my_fgets(p+n, (int)incr, sys_stdin) != 0) + break; + n += strlen(p+n); + } + return (char *)PyMem_REALLOC(p, n+1); } @@ -155,52 +155,52 @@ char *(*PyOS_ReadlineFunctionPointer)(FILE *, FILE *, char *); char * PyOS_Readline(FILE *sys_stdin, FILE *sys_stdout, char *prompt) { - char *rv; + char *rv; - if (_PyOS_ReadlineTState == PyThreadState_GET()) { - PyErr_SetString(PyExc_RuntimeError, - "can't re-enter readline"); - return NULL; - } - + if (_PyOS_ReadlineTState == PyThreadState_GET()) { + PyErr_SetString(PyExc_RuntimeError, + "can't re-enter readline"); + return NULL; + } - if (PyOS_ReadlineFunctionPointer == NULL) { + + if (PyOS_ReadlineFunctionPointer == NULL) { #ifdef __VMS - PyOS_ReadlineFunctionPointer = vms__StdioReadline; + PyOS_ReadlineFunctionPointer = vms__StdioReadline; #else - PyOS_ReadlineFunctionPointer = PyOS_StdioReadline; + PyOS_ReadlineFunctionPointer = PyOS_StdioReadline; #endif - } - + } + #ifdef WITH_THREAD - if (_PyOS_ReadlineLock == NULL) { - _PyOS_ReadlineLock = PyThread_allocate_lock(); - } + if (_PyOS_ReadlineLock == NULL) { + _PyOS_ReadlineLock = PyThread_allocate_lock(); + } #endif - _PyOS_ReadlineTState = PyThreadState_GET(); - Py_BEGIN_ALLOW_THREADS + _PyOS_ReadlineTState = PyThreadState_GET(); + Py_BEGIN_ALLOW_THREADS #ifdef WITH_THREAD - PyThread_acquire_lock(_PyOS_ReadlineLock, 1); + PyThread_acquire_lock(_PyOS_ReadlineLock, 1); #endif - /* This is needed to handle the unlikely case that the - * interpreter is in interactive mode *and* stdin/out are not - * a tty. This can happen, for example if python is run like - * this: python -i < test1.py - */ - if (!isatty (fileno (sys_stdin)) || !isatty (fileno (sys_stdout))) - rv = PyOS_StdioReadline (sys_stdin, sys_stdout, prompt); - else - rv = (*PyOS_ReadlineFunctionPointer)(sys_stdin, sys_stdout, - prompt); - Py_END_ALLOW_THREADS + /* This is needed to handle the unlikely case that the + * interpreter is in interactive mode *and* stdin/out are not + * a tty. This can happen, for example if python is run like + * this: python -i < test1.py + */ + if (!isatty (fileno (sys_stdin)) || !isatty (fileno (sys_stdout))) + rv = PyOS_StdioReadline (sys_stdin, sys_stdout, prompt); + else + rv = (*PyOS_ReadlineFunctionPointer)(sys_stdin, sys_stdout, + prompt); + Py_END_ALLOW_THREADS #ifdef WITH_THREAD - PyThread_release_lock(_PyOS_ReadlineLock); + PyThread_release_lock(_PyOS_ReadlineLock); #endif - _PyOS_ReadlineTState = NULL; + _PyOS_ReadlineTState = NULL; - return rv; + return rv; } diff --git a/Parser/node.c b/Parser/node.c index f4c86cb..9eba76b 100644 --- a/Parser/node.c +++ b/Parser/node.c @@ -7,30 +7,30 @@ node * PyNode_New(int type) { - node *n = (node *) PyObject_MALLOC(1 * sizeof(node)); - if (n == NULL) - return NULL; - n->n_type = type; - n->n_str = NULL; - n->n_lineno = 0; - n->n_nchildren = 0; - n->n_child = NULL; - return n; + node *n = (node *) PyObject_MALLOC(1 * sizeof(node)); + if (n == NULL) + return NULL; + n->n_type = type; + n->n_str = NULL; + n->n_lineno = 0; + n->n_nchildren = 0; + n->n_child = NULL; + return n; } /* See comments at XXXROUNDUP below. Returns -1 on overflow. */ static int fancy_roundup(int n) { - /* Round up to the closest power of 2 >= n. */ - int result = 256; - assert(n > 128); - while (result < n) { - result <<= 1; - if (result <= 0) - return -1; - } - return result; + /* Round up to the closest power of 2 >= n. */ + int result = 256; + assert(n > 128); + while (result < n) { + result <<= 1; + if (result <= 0) + return -1; + } + return result; } /* A gimmick to make massive numbers of reallocs quicker. The result is @@ -70,46 +70,46 @@ fancy_roundup(int n) * Note that this would be straightforward if a node stored its current * capacity. The code is tricky to avoid that. */ -#define XXXROUNDUP(n) ((n) <= 1 ? (n) : \ - (n) <= 128 ? (((n) + 3) & ~3) : \ - fancy_roundup(n)) +#define XXXROUNDUP(n) ((n) <= 1 ? (n) : \ + (n) <= 128 ? (((n) + 3) & ~3) : \ + fancy_roundup(n)) int PyNode_AddChild(register node *n1, int type, char *str, int lineno, int col_offset) { - const int nch = n1->n_nchildren; - int current_capacity; - int required_capacity; - node *n; + const int nch = n1->n_nchildren; + int current_capacity; + int required_capacity; + node *n; - if (nch == INT_MAX || nch < 0) - return E_OVERFLOW; + if (nch == INT_MAX || nch < 0) + return E_OVERFLOW; - current_capacity = XXXROUNDUP(nch); - required_capacity = XXXROUNDUP(nch + 1); - if (current_capacity < 0 || required_capacity < 0) - return E_OVERFLOW; - if (current_capacity < required_capacity) { - if (required_capacity > PY_SIZE_MAX / sizeof(node)) { - return E_NOMEM; - } - n = n1->n_child; - n = (node *) PyObject_REALLOC(n, - required_capacity * sizeof(node)); - if (n == NULL) - return E_NOMEM; - n1->n_child = n; - } + current_capacity = XXXROUNDUP(nch); + required_capacity = XXXROUNDUP(nch + 1); + if (current_capacity < 0 || required_capacity < 0) + return E_OVERFLOW; + if (current_capacity < required_capacity) { + if (required_capacity > PY_SIZE_MAX / sizeof(node)) { + return E_NOMEM; + } + n = n1->n_child; + n = (node *) PyObject_REALLOC(n, + required_capacity * sizeof(node)); + if (n == NULL) + return E_NOMEM; + n1->n_child = n; + } - n = &n1->n_child[n1->n_nchildren++]; - n->n_type = type; - n->n_str = str; - n->n_lineno = lineno; - n->n_col_offset = col_offset; - n->n_nchildren = 0; - n->n_child = NULL; - return 0; + n = &n1->n_child[n1->n_nchildren++]; + n->n_type = type; + n->n_str = str; + n->n_lineno = lineno; + n->n_col_offset = col_offset; + n->n_nchildren = 0; + n->n_child = NULL; + return 0; } /* Forward */ @@ -119,20 +119,20 @@ static void freechildren(node *); void PyNode_Free(node *n) { - if (n != NULL) { - freechildren(n); - PyObject_FREE(n); - } + if (n != NULL) { + freechildren(n); + PyObject_FREE(n); + } } static void freechildren(node *n) { - int i; - for (i = NCH(n); --i >= 0; ) - freechildren(CHILD(n, i)); - if (n->n_child != NULL) - PyObject_FREE(n->n_child); - if (STR(n) != NULL) - PyObject_FREE(STR(n)); + int i; + for (i = NCH(n); --i >= 0; ) + freechildren(CHILD(n, i)); + if (n->n_child != NULL) + PyObject_FREE(n->n_child); + if (STR(n) != NULL) + PyObject_FREE(STR(n)); } diff --git a/Parser/parser.c b/Parser/parser.c index 83e5e6d..b505fe0 100644 --- a/Parser/parser.c +++ b/Parser/parser.c @@ -29,7 +29,7 @@ static void s_reset(stack *); static void s_reset(stack *s) { - s->s_top = &s->s_base[MAXSTACK]; + s->s_top = &s->s_base[MAXSTACK]; } #define s_empty(s) ((s)->s_top == &(s)->s_base[MAXSTACK]) @@ -37,16 +37,16 @@ s_reset(stack *s) static int s_push(register stack *s, dfa *d, node *parent) { - register stackentry *top; - if (s->s_top == s->s_base) { - fprintf(stderr, "s_push: parser stack overflow\n"); - return E_NOMEM; - } - top = --s->s_top; - top->s_dfa = d; - top->s_parent = parent; - top->s_state = 0; - return 0; + register stackentry *top; + if (s->s_top == s->s_base) { + fprintf(stderr, "s_push: parser stack overflow\n"); + return E_NOMEM; + } + top = --s->s_top; + top->s_dfa = d; + top->s_parent = parent; + top->s_state = 0; + return 0; } #ifdef Py_DEBUG @@ -54,9 +54,9 @@ s_push(register stack *s, dfa *d, node *parent) static void s_pop(register stack *s) { - if (s_empty(s)) - Py_FatalError("s_pop: parser stack underflow -- FATAL"); - s->s_top++; + if (s_empty(s)) + Py_FatalError("s_pop: parser stack underflow -- FATAL"); + s->s_top++; } #else /* !Py_DEBUG */ @@ -71,34 +71,34 @@ s_pop(register stack *s) parser_state * PyParser_New(grammar *g, int start) { - parser_state *ps; - - if (!g->g_accel) - PyGrammar_AddAccelerators(g); - ps = (parser_state *)PyMem_MALLOC(sizeof(parser_state)); - if (ps == NULL) - return NULL; - ps->p_grammar = g; + parser_state *ps; + + if (!g->g_accel) + PyGrammar_AddAccelerators(g); + ps = (parser_state *)PyMem_MALLOC(sizeof(parser_state)); + if (ps == NULL) + return NULL; + ps->p_grammar = g; #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD - ps->p_flags = 0; + ps->p_flags = 0; #endif - ps->p_tree = PyNode_New(start); - if (ps->p_tree == NULL) { - PyMem_FREE(ps); - return NULL; - } - s_reset(&ps->p_stack); - (void) s_push(&ps->p_stack, PyGrammar_FindDFA(g, start), ps->p_tree); - return ps; + ps->p_tree = PyNode_New(start); + if (ps->p_tree == NULL) { + PyMem_FREE(ps); + return NULL; + } + s_reset(&ps->p_stack); + (void) s_push(&ps->p_stack, PyGrammar_FindDFA(g, start), ps->p_tree); + return ps; } void PyParser_Delete(parser_state *ps) { - /* NB If you want to save the parse tree, - you must set p_tree to NULL before calling delparser! */ - PyNode_Free(ps->p_tree); - PyMem_FREE(ps); + /* NB If you want to save the parse tree, + you must set p_tree to NULL before calling delparser! */ + PyNode_Free(ps->p_tree); + PyMem_FREE(ps); } @@ -107,27 +107,27 @@ PyParser_Delete(parser_state *ps) static int shift(register stack *s, int type, char *str, int newstate, int lineno, int col_offset) { - int err; - assert(!s_empty(s)); - err = PyNode_AddChild(s->s_top->s_parent, type, str, lineno, col_offset); - if (err) - return err; - s->s_top->s_state = newstate; - return 0; + int err; + assert(!s_empty(s)); + err = PyNode_AddChild(s->s_top->s_parent, type, str, lineno, col_offset); + if (err) + return err; + s->s_top->s_state = newstate; + return 0; } static int push(register stack *s, int type, dfa *d, int newstate, int lineno, int col_offset) { - int err; - register node *n; - n = s->s_top->s_parent; - assert(!s_empty(s)); - err = PyNode_AddChild(n, type, (char *)NULL, lineno, col_offset); - if (err) - return err; - s->s_top->s_state = newstate; - return s_push(s, d, CHILD(n, NCH(n)-1)); + int err; + register node *n; + n = s->s_top->s_parent; + assert(!s_empty(s)); + err = PyNode_AddChild(n, type, (char *)NULL, lineno, col_offset); + if (err) + return err; + s->s_top->s_state = newstate; + return s_push(s, d, CHILD(n, NCH(n)-1)); } @@ -136,47 +136,47 @@ push(register stack *s, int type, dfa *d, int newstate, int lineno, int col_offs static int classify(parser_state *ps, int type, char *str) { - grammar *g = ps->p_grammar; - register int n = g->g_ll.ll_nlabels; - - if (type == NAME) { - register char *s = str; - register label *l = g->g_ll.ll_label; - register int i; - for (i = n; i > 0; i--, l++) { - if (l->lb_type != NAME || l->lb_str == NULL || - l->lb_str[0] != s[0] || - strcmp(l->lb_str, s) != 0) - continue; + grammar *g = ps->p_grammar; + register int n = g->g_ll.ll_nlabels; + + if (type == NAME) { + register char *s = str; + register label *l = g->g_ll.ll_label; + register int i; + for (i = n; i > 0; i--, l++) { + if (l->lb_type != NAME || l->lb_str == NULL || + l->lb_str[0] != s[0] || + strcmp(l->lb_str, s) != 0) + continue; #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD #if 0 - /* Leaving this in as an example */ - if (!(ps->p_flags & CO_FUTURE_WITH_STATEMENT)) { - if (s[0] == 'w' && strcmp(s, "with") == 0) - break; /* not a keyword yet */ - else if (s[0] == 'a' && strcmp(s, "as") == 0) - break; /* not a keyword yet */ - } + /* Leaving this in as an example */ + if (!(ps->p_flags & CO_FUTURE_WITH_STATEMENT)) { + if (s[0] == 'w' && strcmp(s, "with") == 0) + break; /* not a keyword yet */ + else if (s[0] == 'a' && strcmp(s, "as") == 0) + break; /* not a keyword yet */ + } #endif #endif - D(printf("It's a keyword\n")); - return n - i; - } - } - - { - register label *l = g->g_ll.ll_label; - register int i; - for (i = n; i > 0; i--, l++) { - if (l->lb_type == type && l->lb_str == NULL) { - D(printf("It's a token we know\n")); - return n - i; - } - } - } - - D(printf("Illegal token\n")); - return -1; + D(printf("It's a keyword\n")); + return n - i; + } + } + + { + register label *l = g->g_ll.ll_label; + register int i; + for (i = n; i > 0; i--, l++) { + if (l->lb_type == type && l->lb_str == NULL) { + D(printf("It's a token we know\n")); + return n - i; + } + } + } + + D(printf("Illegal token\n")); + return -1; } #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD @@ -185,152 +185,152 @@ classify(parser_state *ps, int type, char *str) static void future_hack(parser_state *ps) { - node *n = ps->p_stack.s_top->s_parent; - node *ch, *cch; - int i; - - /* from __future__ import ..., must have at least 4 children */ - n = CHILD(n, 0); - if (NCH(n) < 4) - return; - ch = CHILD(n, 0); - if (STR(ch) == NULL || strcmp(STR(ch), "from") != 0) - return; - ch = CHILD(n, 1); - if (NCH(ch) == 1 && STR(CHILD(ch, 0)) && - strcmp(STR(CHILD(ch, 0)), "__future__") != 0) - return; - ch = CHILD(n, 3); - /* ch can be a star, a parenthesis or import_as_names */ - if (TYPE(ch) == STAR) - return; - if (TYPE(ch) == LPAR) - ch = CHILD(n, 4); - - for (i = 0; i < NCH(ch); i += 2) { - cch = CHILD(ch, i); - if (NCH(cch) >= 1 && TYPE(CHILD(cch, 0)) == NAME) { - char *str_ch = STR(CHILD(cch, 0)); - if (strcmp(str_ch, FUTURE_WITH_STATEMENT) == 0) { - ps->p_flags |= CO_FUTURE_WITH_STATEMENT; - } else if (strcmp(str_ch, FUTURE_PRINT_FUNCTION) == 0) { - ps->p_flags |= CO_FUTURE_PRINT_FUNCTION; - } else if (strcmp(str_ch, FUTURE_UNICODE_LITERALS) == 0) { - ps->p_flags |= CO_FUTURE_UNICODE_LITERALS; - } - } - } + node *n = ps->p_stack.s_top->s_parent; + node *ch, *cch; + int i; + + /* from __future__ import ..., must have at least 4 children */ + n = CHILD(n, 0); + if (NCH(n) < 4) + return; + ch = CHILD(n, 0); + if (STR(ch) == NULL || strcmp(STR(ch), "from") != 0) + return; + ch = CHILD(n, 1); + if (NCH(ch) == 1 && STR(CHILD(ch, 0)) && + strcmp(STR(CHILD(ch, 0)), "__future__") != 0) + return; + ch = CHILD(n, 3); + /* ch can be a star, a parenthesis or import_as_names */ + if (TYPE(ch) == STAR) + return; + if (TYPE(ch) == LPAR) + ch = CHILD(n, 4); + + for (i = 0; i < NCH(ch); i += 2) { + cch = CHILD(ch, i); + if (NCH(cch) >= 1 && TYPE(CHILD(cch, 0)) == NAME) { + char *str_ch = STR(CHILD(cch, 0)); + if (strcmp(str_ch, FUTURE_WITH_STATEMENT) == 0) { + ps->p_flags |= CO_FUTURE_WITH_STATEMENT; + } else if (strcmp(str_ch, FUTURE_PRINT_FUNCTION) == 0) { + ps->p_flags |= CO_FUTURE_PRINT_FUNCTION; + } else if (strcmp(str_ch, FUTURE_UNICODE_LITERALS) == 0) { + ps->p_flags |= CO_FUTURE_UNICODE_LITERALS; + } + } + } } #endif #endif /* future keyword */ int PyParser_AddToken(register parser_state *ps, register int type, char *str, - int lineno, int col_offset, int *expected_ret) + int lineno, int col_offset, int *expected_ret) { - register int ilabel; - int err; - - D(printf("Token %s/'%s' ... ", _PyParser_TokenNames[type], str)); - - /* Find out which label this token is */ - ilabel = classify(ps, type, str); - if (ilabel < 0) - return E_SYNTAX; - - /* Loop until the token is shifted or an error occurred */ - for (;;) { - /* Fetch the current dfa and state */ - register dfa *d = ps->p_stack.s_top->s_dfa; - register state *s = &d->d_state[ps->p_stack.s_top->s_state]; - - D(printf(" DFA '%s', state %d:", - d->d_name, ps->p_stack.s_top->s_state)); - - /* Check accelerator */ - if (s->s_lower <= ilabel && ilabel < s->s_upper) { - register int x = s->s_accel[ilabel - s->s_lower]; - if (x != -1) { - if (x & (1<<7)) { - /* Push non-terminal */ - int nt = (x >> 8) + NT_OFFSET; - int arrow = x & ((1<<7)-1); - dfa *d1 = PyGrammar_FindDFA( - ps->p_grammar, nt); - if ((err = push(&ps->p_stack, nt, d1, - arrow, lineno, col_offset)) > 0) { - D(printf(" MemError: push\n")); - return err; - } - D(printf(" Push ...\n")); - continue; - } - - /* Shift the token */ - if ((err = shift(&ps->p_stack, type, str, - x, lineno, col_offset)) > 0) { - D(printf(" MemError: shift.\n")); - return err; - } - D(printf(" Shift.\n")); - /* Pop while we are in an accept-only state */ - while (s = &d->d_state - [ps->p_stack.s_top->s_state], - s->s_accept && s->s_narcs == 1) { - D(printf(" DFA '%s', state %d: " - "Direct pop.\n", - d->d_name, - ps->p_stack.s_top->s_state)); + register int ilabel; + int err; + + D(printf("Token %s/'%s' ... ", _PyParser_TokenNames[type], str)); + + /* Find out which label this token is */ + ilabel = classify(ps, type, str); + if (ilabel < 0) + return E_SYNTAX; + + /* Loop until the token is shifted or an error occurred */ + for (;;) { + /* Fetch the current dfa and state */ + register dfa *d = ps->p_stack.s_top->s_dfa; + register state *s = &d->d_state[ps->p_stack.s_top->s_state]; + + D(printf(" DFA '%s', state %d:", + d->d_name, ps->p_stack.s_top->s_state)); + + /* Check accelerator */ + if (s->s_lower <= ilabel && ilabel < s->s_upper) { + register int x = s->s_accel[ilabel - s->s_lower]; + if (x != -1) { + if (x & (1<<7)) { + /* Push non-terminal */ + int nt = (x >> 8) + NT_OFFSET; + int arrow = x & ((1<<7)-1); + dfa *d1 = PyGrammar_FindDFA( + ps->p_grammar, nt); + if ((err = push(&ps->p_stack, nt, d1, + arrow, lineno, col_offset)) > 0) { + D(printf(" MemError: push\n")); + return err; + } + D(printf(" Push ...\n")); + continue; + } + + /* Shift the token */ + if ((err = shift(&ps->p_stack, type, str, + x, lineno, col_offset)) > 0) { + D(printf(" MemError: shift.\n")); + return err; + } + D(printf(" Shift.\n")); + /* Pop while we are in an accept-only state */ + while (s = &d->d_state + [ps->p_stack.s_top->s_state], + s->s_accept && s->s_narcs == 1) { + D(printf(" DFA '%s', state %d: " + "Direct pop.\n", + d->d_name, + ps->p_stack.s_top->s_state)); #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD #if 0 - if (d->d_name[0] == 'i' && - strcmp(d->d_name, - "import_stmt") == 0) - future_hack(ps); + if (d->d_name[0] == 'i' && + strcmp(d->d_name, + "import_stmt") == 0) + future_hack(ps); #endif #endif - s_pop(&ps->p_stack); - if (s_empty(&ps->p_stack)) { - D(printf(" ACCEPT.\n")); - return E_DONE; - } - d = ps->p_stack.s_top->s_dfa; - } - return E_OK; - } - } - - if (s->s_accept) { + s_pop(&ps->p_stack); + if (s_empty(&ps->p_stack)) { + D(printf(" ACCEPT.\n")); + return E_DONE; + } + d = ps->p_stack.s_top->s_dfa; + } + return E_OK; + } + } + + if (s->s_accept) { #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD #if 0 - if (d->d_name[0] == 'i' && - strcmp(d->d_name, "import_stmt") == 0) - future_hack(ps); + if (d->d_name[0] == 'i' && + strcmp(d->d_name, "import_stmt") == 0) + future_hack(ps); #endif #endif - /* Pop this dfa and try again */ - s_pop(&ps->p_stack); - D(printf(" Pop ...\n")); - if (s_empty(&ps->p_stack)) { - D(printf(" Error: bottom of stack.\n")); - return E_SYNTAX; - } - continue; - } - - /* Stuck, report syntax error */ - D(printf(" Error.\n")); - if (expected_ret) { - if (s->s_lower == s->s_upper - 1) { - /* Only one possible expected token */ - *expected_ret = ps->p_grammar-> - g_ll.ll_label[s->s_lower].lb_type; - } - else - *expected_ret = -1; - } - return E_SYNTAX; - } + /* Pop this dfa and try again */ + s_pop(&ps->p_stack); + D(printf(" Pop ...\n")); + if (s_empty(&ps->p_stack)) { + D(printf(" Error: bottom of stack.\n")); + return E_SYNTAX; + } + continue; + } + + /* Stuck, report syntax error */ + D(printf(" Error.\n")); + if (expected_ret) { + if (s->s_lower == s->s_upper - 1) { + /* Only one possible expected token */ + *expected_ret = ps->p_grammar-> + g_ll.ll_label[s->s_lower].lb_type; + } + else + *expected_ret = -1; + } + return E_SYNTAX; + } } @@ -341,62 +341,62 @@ PyParser_AddToken(register parser_state *ps, register int type, char *str, void dumptree(grammar *g, node *n) { - int i; - - if (n == NULL) - printf("NIL"); - else { - label l; - l.lb_type = TYPE(n); - l.lb_str = STR(n); - printf("%s", PyGrammar_LabelRepr(&l)); - if (ISNONTERMINAL(TYPE(n))) { - printf("("); - for (i = 0; i < NCH(n); i++) { - if (i > 0) - printf(","); - dumptree(g, CHILD(n, i)); - } - printf(")"); - } - } + int i; + + if (n == NULL) + printf("NIL"); + else { + label l; + l.lb_type = TYPE(n); + l.lb_str = STR(n); + printf("%s", PyGrammar_LabelRepr(&l)); + if (ISNONTERMINAL(TYPE(n))) { + printf("("); + for (i = 0; i < NCH(n); i++) { + if (i > 0) + printf(","); + dumptree(g, CHILD(n, i)); + } + printf(")"); + } + } } void showtree(grammar *g, node *n) { - int i; - - if (n == NULL) - return; - if (ISNONTERMINAL(TYPE(n))) { - for (i = 0; i < NCH(n); i++) - showtree(g, CHILD(n, i)); - } - else if (ISTERMINAL(TYPE(n))) { - printf("%s", _PyParser_TokenNames[TYPE(n)]); - if (TYPE(n) == NUMBER || TYPE(n) == NAME) - printf("(%s)", STR(n)); - printf(" "); - } - else - printf("? "); + int i; + + if (n == NULL) + return; + if (ISNONTERMINAL(TYPE(n))) { + for (i = 0; i < NCH(n); i++) + showtree(g, CHILD(n, i)); + } + else if (ISTERMINAL(TYPE(n))) { + printf("%s", _PyParser_TokenNames[TYPE(n)]); + if (TYPE(n) == NUMBER || TYPE(n) == NAME) + printf("(%s)", STR(n)); + printf(" "); + } + else + printf("? "); } void printtree(parser_state *ps) { - if (Py_DebugFlag) { - printf("Parse tree:\n"); - dumptree(ps->p_grammar, ps->p_tree); - printf("\n"); - printf("Tokens:\n"); - showtree(ps->p_grammar, ps->p_tree); - printf("\n"); - } - printf("Listing:\n"); - PyNode_ListTree(ps->p_tree); - printf("\n"); + if (Py_DebugFlag) { + printf("Parse tree:\n"); + dumptree(ps->p_grammar, ps->p_tree); + printf("\n"); + printf("Tokens:\n"); + showtree(ps->p_grammar, ps->p_tree); + printf("\n"); + } + printf("Listing:\n"); + PyNode_ListTree(ps->p_tree); + printf("\n"); } #endif /* Py_DEBUG */ @@ -431,15 +431,15 @@ symbol. As an example, consider this grammar: -expr: term (OP term)* -term: CONSTANT | '(' expr ')' +expr: term (OP term)* +term: CONSTANT | '(' expr ')' The DFA corresponding to the rule for expr is: ------->.---term-->.-------> - ^ | - | | - \----OP----/ + ^ | + | | + \----OP----/ The parse tree generated for the input a+b is: diff --git a/Parser/parsetok.c b/Parser/parsetok.c index 1470327..16cf5cb 100644 --- a/Parser/parsetok.c +++ b/Parser/parsetok.c @@ -19,84 +19,84 @@ static void initerr(perrdetail *err_ret, const char* filename); node * PyParser_ParseString(const char *s, grammar *g, int start, perrdetail *err_ret) { - return PyParser_ParseStringFlagsFilename(s, NULL, g, start, err_ret, 0); + return PyParser_ParseStringFlagsFilename(s, NULL, g, start, err_ret, 0); } node * PyParser_ParseStringFlags(const char *s, grammar *g, int start, - perrdetail *err_ret, int flags) + perrdetail *err_ret, int flags) { - return PyParser_ParseStringFlagsFilename(s, NULL, - g, start, err_ret, flags); + return PyParser_ParseStringFlagsFilename(s, NULL, + g, start, err_ret, flags); } node * PyParser_ParseStringFlagsFilename(const char *s, const char *filename, - grammar *g, int start, - perrdetail *err_ret, int flags) + grammar *g, int start, + perrdetail *err_ret, int flags) { - int iflags = flags; - return PyParser_ParseStringFlagsFilenameEx(s, filename, g, start, - err_ret, &iflags); + int iflags = flags; + return PyParser_ParseStringFlagsFilenameEx(s, filename, g, start, + err_ret, &iflags); } node * PyParser_ParseStringFlagsFilenameEx(const char *s, const char *filename, - grammar *g, int start, - perrdetail *err_ret, int *flags) + grammar *g, int start, + perrdetail *err_ret, int *flags) { - struct tok_state *tok; + struct tok_state *tok; - initerr(err_ret, filename); + initerr(err_ret, filename); - if (*flags & PyPARSE_IGNORE_COOKIE) - tok = PyTokenizer_FromUTF8(s); - else - tok = PyTokenizer_FromString(s); - if (tok == NULL) { - err_ret->error = PyErr_Occurred() ? E_DECODE : E_NOMEM; - return NULL; - } + if (*flags & PyPARSE_IGNORE_COOKIE) + tok = PyTokenizer_FromUTF8(s); + else + tok = PyTokenizer_FromString(s); + if (tok == NULL) { + err_ret->error = PyErr_Occurred() ? E_DECODE : E_NOMEM; + return NULL; + } - tok->filename = filename ? filename : "<string>"; - return parsetok(tok, g, start, err_ret, flags); + tok->filename = filename ? filename : "<string>"; + return parsetok(tok, g, start, err_ret, flags); } /* Parse input coming from a file. Return error code, print some errors. */ node * PyParser_ParseFile(FILE *fp, const char *filename, grammar *g, int start, - char *ps1, char *ps2, perrdetail *err_ret) + char *ps1, char *ps2, perrdetail *err_ret) { - return PyParser_ParseFileFlags(fp, filename, NULL, - g, start, ps1, ps2, err_ret, 0); + return PyParser_ParseFileFlags(fp, filename, NULL, + g, start, ps1, ps2, err_ret, 0); } node * PyParser_ParseFileFlags(FILE *fp, const char *filename, const char *enc, - grammar *g, int start, - char *ps1, char *ps2, perrdetail *err_ret, int flags) + grammar *g, int start, + char *ps1, char *ps2, perrdetail *err_ret, int flags) { - int iflags = flags; - return PyParser_ParseFileFlagsEx(fp, filename, enc, g, start, ps1, - ps2, err_ret, &iflags); + int iflags = flags; + return PyParser_ParseFileFlagsEx(fp, filename, enc, g, start, ps1, + ps2, err_ret, &iflags); } node * -PyParser_ParseFileFlagsEx(FILE *fp, const char *filename, - const char *enc, grammar *g, int start, - char *ps1, char *ps2, perrdetail *err_ret, int *flags) +PyParser_ParseFileFlagsEx(FILE *fp, const char *filename, + const char *enc, grammar *g, int start, + char *ps1, char *ps2, perrdetail *err_ret, int *flags) { - struct tok_state *tok; + struct tok_state *tok; - initerr(err_ret, filename); + initerr(err_ret, filename); - if ((tok = PyTokenizer_FromFile(fp, (char *)enc, ps1, ps2)) == NULL) { - err_ret->error = E_NOMEM; - return NULL; - } - tok->filename = filename; - return parsetok(tok, g, start, err_ret, flags); + if ((tok = PyTokenizer_FromFile(fp, (char *)enc, ps1, ps2)) == NULL) { + err_ret->error = E_NOMEM; + return NULL; + } + tok->filename = filename; + return parsetok(tok, g, start, err_ret, flags); } #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD @@ -110,9 +110,9 @@ static char as_msg[] = static void warn(const char *msg, const char *filename, int lineno) { - if (filename == NULL) - filename = "<string>"; - PySys_WriteStderr(msg, filename, lineno); + if (filename == NULL) + filename = "<string>"; + PySys_WriteStderr(msg, filename, lineno); } #endif #endif @@ -122,151 +122,151 @@ warn(const char *msg, const char *filename, int lineno) static node * parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret, - int *flags) + int *flags) { - parser_state *ps; - node *n; - int started = 0, handling_import = 0, handling_with = 0; - - if ((ps = PyParser_New(g, start)) == NULL) { - fprintf(stderr, "no mem for new parser\n"); - err_ret->error = E_NOMEM; - PyTokenizer_Free(tok); - return NULL; - } + parser_state *ps; + node *n; + int started = 0, handling_import = 0, handling_with = 0; + + if ((ps = PyParser_New(g, start)) == NULL) { + fprintf(stderr, "no mem for new parser\n"); + err_ret->error = E_NOMEM; + PyTokenizer_Free(tok); + return NULL; + } #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD - if (*flags & PyPARSE_BARRY_AS_BDFL) - ps->p_flags |= CO_FUTURE_BARRY_AS_BDFL; + if (*flags & PyPARSE_BARRY_AS_BDFL) + ps->p_flags |= CO_FUTURE_BARRY_AS_BDFL; #endif - for (;;) { - char *a, *b; - int type; - size_t len; - char *str; - int col_offset; - - type = PyTokenizer_Get(tok, &a, &b); - if (type == ERRORTOKEN) { - err_ret->error = tok->done; - break; - } - if (type == ENDMARKER && started) { - type = NEWLINE; /* Add an extra newline */ - handling_with = handling_import = 0; - started = 0; - /* Add the right number of dedent tokens, - except if a certain flag is given -- - codeop.py uses this. */ - if (tok->indent && - !(*flags & PyPARSE_DONT_IMPLY_DEDENT)) - { - tok->pendin = -tok->indent; - tok->indent = 0; - } - } - else - started = 1; - len = b - a; /* XXX this may compute NULL - NULL */ - str = (char *) PyObject_MALLOC(len + 1); - if (str == NULL) { - fprintf(stderr, "no mem for next token\n"); - err_ret->error = E_NOMEM; - break; - } - if (len > 0) - strncpy(str, a, len); - str[len] = '\0'; + for (;;) { + char *a, *b; + int type; + size_t len; + char *str; + int col_offset; + + type = PyTokenizer_Get(tok, &a, &b); + if (type == ERRORTOKEN) { + err_ret->error = tok->done; + break; + } + if (type == ENDMARKER && started) { + type = NEWLINE; /* Add an extra newline */ + handling_with = handling_import = 0; + started = 0; + /* Add the right number of dedent tokens, + except if a certain flag is given -- + codeop.py uses this. */ + if (tok->indent && + !(*flags & PyPARSE_DONT_IMPLY_DEDENT)) + { + tok->pendin = -tok->indent; + tok->indent = 0; + } + } + else + started = 1; + len = b - a; /* XXX this may compute NULL - NULL */ + str = (char *) PyObject_MALLOC(len + 1); + if (str == NULL) { + fprintf(stderr, "no mem for next token\n"); + err_ret->error = E_NOMEM; + break; + } + if (len > 0) + strncpy(str, a, len); + str[len] = '\0'; #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD - if (type == NOTEQUAL) { - if (!(ps->p_flags & CO_FUTURE_BARRY_AS_BDFL) && - strcmp(str, "!=")) { - err_ret->error = E_SYNTAX; - break; - } - else if ((ps->p_flags & CO_FUTURE_BARRY_AS_BDFL) && - strcmp(str, "<>")) { - err_ret->text = "with Barry as BDFL, use '<>' " - "instead of '!='"; - err_ret->error = E_SYNTAX; - break; - } - } + if (type == NOTEQUAL) { + if (!(ps->p_flags & CO_FUTURE_BARRY_AS_BDFL) && + strcmp(str, "!=")) { + err_ret->error = E_SYNTAX; + break; + } + else if ((ps->p_flags & CO_FUTURE_BARRY_AS_BDFL) && + strcmp(str, "<>")) { + err_ret->text = "with Barry as BDFL, use '<>' " + "instead of '!='"; + err_ret->error = E_SYNTAX; + break; + } + } #endif - if (a >= tok->line_start) - col_offset = a - tok->line_start; - else - col_offset = -1; - - if ((err_ret->error = - PyParser_AddToken(ps, (int)type, str, - tok->lineno, col_offset, - &(err_ret->expected))) != E_OK) { - if (err_ret->error != E_DONE) { - PyObject_FREE(str); - err_ret->token = type; - } - break; - } - } - - if (err_ret->error == E_DONE) { - n = ps->p_tree; - ps->p_tree = NULL; - } - else - n = NULL; + if (a >= tok->line_start) + col_offset = a - tok->line_start; + else + col_offset = -1; + + if ((err_ret->error = + PyParser_AddToken(ps, (int)type, str, + tok->lineno, col_offset, + &(err_ret->expected))) != E_OK) { + if (err_ret->error != E_DONE) { + PyObject_FREE(str); + err_ret->token = type; + } + break; + } + } + + if (err_ret->error == E_DONE) { + n = ps->p_tree; + ps->p_tree = NULL; + } + else + n = NULL; #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD - *flags = ps->p_flags; + *flags = ps->p_flags; #endif - PyParser_Delete(ps); - - if (n == NULL) { - if (tok->lineno <= 1 && tok->done == E_EOF) - err_ret->error = E_EOF; - err_ret->lineno = tok->lineno; - if (tok->buf != NULL) { - size_t len; - assert(tok->cur - tok->buf < INT_MAX); - err_ret->offset = (int)(tok->cur - tok->buf); - len = tok->inp - tok->buf; - err_ret->text = (char *) PyObject_MALLOC(len + 1); - if (err_ret->text != NULL) { - if (len > 0) - strncpy(err_ret->text, tok->buf, len); - err_ret->text[len] = '\0'; - } - } - } else if (tok->encoding != NULL) { - node* r = PyNode_New(encoding_decl); - if (!r) { - err_ret->error = E_NOMEM; - n = NULL; - goto done; - } - r->n_str = tok->encoding; - r->n_nchildren = 1; - r->n_child = n; - tok->encoding = NULL; - n = r; - } + PyParser_Delete(ps); + + if (n == NULL) { + if (tok->lineno <= 1 && tok->done == E_EOF) + err_ret->error = E_EOF; + err_ret->lineno = tok->lineno; + if (tok->buf != NULL) { + size_t len; + assert(tok->cur - tok->buf < INT_MAX); + err_ret->offset = (int)(tok->cur - tok->buf); + len = tok->inp - tok->buf; + err_ret->text = (char *) PyObject_MALLOC(len + 1); + if (err_ret->text != NULL) { + if (len > 0) + strncpy(err_ret->text, tok->buf, len); + err_ret->text[len] = '\0'; + } + } + } else if (tok->encoding != NULL) { + node* r = PyNode_New(encoding_decl); + if (!r) { + err_ret->error = E_NOMEM; + n = NULL; + goto done; + } + r->n_str = tok->encoding; + r->n_nchildren = 1; + r->n_child = n; + tok->encoding = NULL; + n = r; + } done: - PyTokenizer_Free(tok); + PyTokenizer_Free(tok); - return n; + return n; } static void initerr(perrdetail *err_ret, const char *filename) { - err_ret->error = E_OK; - err_ret->filename = filename; - err_ret->lineno = 0; - err_ret->offset = 0; - err_ret->text = NULL; - err_ret->token = -1; - err_ret->expected = -1; + err_ret->error = E_OK; + err_ret->filename = filename; + err_ret->lineno = 0; + err_ret->offset = 0; + err_ret->text = NULL; + err_ret->token = -1; + err_ret->expected = -1; } diff --git a/Parser/pgen.c b/Parser/pgen.c index 959a5d3..beaf53b 100644 --- a/Parser/pgen.c +++ b/Parser/pgen.c @@ -17,85 +17,85 @@ extern int Py_IgnoreEnvironmentFlag; /* needed by Py_GETENV */ /* PART ONE -- CONSTRUCT NFA -- Cf. Algorithm 3.2 from [Aho&Ullman 77] */ typedef struct _nfaarc { - int ar_label; - int ar_arrow; + int ar_label; + int ar_arrow; } nfaarc; typedef struct _nfastate { - int st_narcs; - nfaarc *st_arc; + int st_narcs; + nfaarc *st_arc; } nfastate; typedef struct _nfa { - int nf_type; - char *nf_name; - int nf_nstates; - nfastate *nf_state; - int nf_start, nf_finish; + int nf_type; + char *nf_name; + int nf_nstates; + nfastate *nf_state; + int nf_start, nf_finish; } nfa; /* Forward */ static void compile_rhs(labellist *ll, - nfa *nf, node *n, int *pa, int *pb); + nfa *nf, node *n, int *pa, int *pb); static void compile_alt(labellist *ll, - nfa *nf, node *n, int *pa, int *pb); + nfa *nf, node *n, int *pa, int *pb); static void compile_item(labellist *ll, - nfa *nf, node *n, int *pa, int *pb); + nfa *nf, node *n, int *pa, int *pb); static void compile_atom(labellist *ll, - nfa *nf, node *n, int *pa, int *pb); + nfa *nf, node *n, int *pa, int *pb); static int addnfastate(nfa *nf) { - nfastate *st; - - nf->nf_state = (nfastate *)PyObject_REALLOC(nf->nf_state, - sizeof(nfastate) * (nf->nf_nstates + 1)); - if (nf->nf_state == NULL) - Py_FatalError("out of mem"); - st = &nf->nf_state[nf->nf_nstates++]; - st->st_narcs = 0; - st->st_arc = NULL; - return st - nf->nf_state; + nfastate *st; + + nf->nf_state = (nfastate *)PyObject_REALLOC(nf->nf_state, + sizeof(nfastate) * (nf->nf_nstates + 1)); + if (nf->nf_state == NULL) + Py_FatalError("out of mem"); + st = &nf->nf_state[nf->nf_nstates++]; + st->st_narcs = 0; + st->st_arc = NULL; + return st - nf->nf_state; } static void addnfaarc(nfa *nf, int from, int to, int lbl) { - nfastate *st; - nfaarc *ar; - - st = &nf->nf_state[from]; - st->st_arc = (nfaarc *)PyObject_REALLOC(st->st_arc, - sizeof(nfaarc) * (st->st_narcs + 1)); - if (st->st_arc == NULL) - Py_FatalError("out of mem"); - ar = &st->st_arc[st->st_narcs++]; - ar->ar_label = lbl; - ar->ar_arrow = to; + nfastate *st; + nfaarc *ar; + + st = &nf->nf_state[from]; + st->st_arc = (nfaarc *)PyObject_REALLOC(st->st_arc, + sizeof(nfaarc) * (st->st_narcs + 1)); + if (st->st_arc == NULL) + Py_FatalError("out of mem"); + ar = &st->st_arc[st->st_narcs++]; + ar->ar_label = lbl; + ar->ar_arrow = to; } static nfa * newnfa(char *name) { - nfa *nf; - static int type = NT_OFFSET; /* All types will be disjunct */ - - nf = (nfa *)PyObject_MALLOC(sizeof(nfa)); - if (nf == NULL) - Py_FatalError("no mem for new nfa"); - nf->nf_type = type++; - nf->nf_name = name; /* XXX strdup(name) ??? */ - nf->nf_nstates = 0; - nf->nf_state = NULL; - nf->nf_start = nf->nf_finish = -1; - return nf; + nfa *nf; + static int type = NT_OFFSET; /* All types will be disjunct */ + + nf = (nfa *)PyObject_MALLOC(sizeof(nfa)); + if (nf == NULL) + Py_FatalError("no mem for new nfa"); + nf->nf_type = type++; + nf->nf_name = name; /* XXX strdup(name) ??? */ + nf->nf_nstates = 0; + nf->nf_state = NULL; + nf->nf_start = nf->nf_finish = -1; + return nf; } typedef struct _nfagrammar { - int gr_nnfas; - nfa **gr_nfa; - labellist gr_ll; + int gr_nnfas; + nfa **gr_nfa; + labellist gr_ll; } nfagrammar; /* Forward */ @@ -104,32 +104,32 @@ static void compile_rule(nfagrammar *gr, node *n); static nfagrammar * newnfagrammar(void) { - nfagrammar *gr; - - gr = (nfagrammar *)PyObject_MALLOC(sizeof(nfagrammar)); - if (gr == NULL) - Py_FatalError("no mem for new nfa grammar"); - gr->gr_nnfas = 0; - gr->gr_nfa = NULL; - gr->gr_ll.ll_nlabels = 0; - gr->gr_ll.ll_label = NULL; - addlabel(&gr->gr_ll, ENDMARKER, "EMPTY"); - return gr; + nfagrammar *gr; + + gr = (nfagrammar *)PyObject_MALLOC(sizeof(nfagrammar)); + if (gr == NULL) + Py_FatalError("no mem for new nfa grammar"); + gr->gr_nnfas = 0; + gr->gr_nfa = NULL; + gr->gr_ll.ll_nlabels = 0; + gr->gr_ll.ll_label = NULL; + addlabel(&gr->gr_ll, ENDMARKER, "EMPTY"); + return gr; } static nfa * addnfa(nfagrammar *gr, char *name) { - nfa *nf; - - nf = newnfa(name); - gr->gr_nfa = (nfa **)PyObject_REALLOC(gr->gr_nfa, - sizeof(nfa*) * (gr->gr_nnfas + 1)); - if (gr->gr_nfa == NULL) - Py_FatalError("out of mem"); - gr->gr_nfa[gr->gr_nnfas++] = nf; - addlabel(&gr->gr_ll, NAME, nf->nf_name); - return nf; + nfa *nf; + + nf = newnfa(name); + gr->gr_nfa = (nfa **)PyObject_REALLOC(gr->gr_nfa, + sizeof(nfa*) * (gr->gr_nnfas + 1)); + if (gr->gr_nfa == NULL) + Py_FatalError("out of mem"); + gr->gr_nfa[gr->gr_nnfas++] = nf; + addlabel(&gr->gr_ll, NAME, nf->nf_name); + return nf; } #ifdef Py_DEBUG @@ -137,203 +137,203 @@ addnfa(nfagrammar *gr, char *name) static char REQNFMT[] = "metacompile: less than %d children\n"; #define REQN(i, count) \ - if (i < count) { \ - fprintf(stderr, REQNFMT, count); \ - Py_FatalError("REQN"); \ - } else + if (i < count) { \ + fprintf(stderr, REQNFMT, count); \ + Py_FatalError("REQN"); \ + } else #else -#define REQN(i, count) /* empty */ +#define REQN(i, count) /* empty */ #endif static nfagrammar * metacompile(node *n) { - nfagrammar *gr; - int i; - - if (Py_DebugFlag) - printf("Compiling (meta-) parse tree into NFA grammar\n"); - gr = newnfagrammar(); - REQ(n, MSTART); - i = n->n_nchildren - 1; /* Last child is ENDMARKER */ - n = n->n_child; - for (; --i >= 0; n++) { - if (n->n_type != NEWLINE) - compile_rule(gr, n); - } - return gr; + nfagrammar *gr; + int i; + + if (Py_DebugFlag) + printf("Compiling (meta-) parse tree into NFA grammar\n"); + gr = newnfagrammar(); + REQ(n, MSTART); + i = n->n_nchildren - 1; /* Last child is ENDMARKER */ + n = n->n_child; + for (; --i >= 0; n++) { + if (n->n_type != NEWLINE) + compile_rule(gr, n); + } + return gr; } static void compile_rule(nfagrammar *gr, node *n) { - nfa *nf; - - REQ(n, RULE); - REQN(n->n_nchildren, 4); - n = n->n_child; - REQ(n, NAME); - nf = addnfa(gr, n->n_str); - n++; - REQ(n, COLON); - n++; - REQ(n, RHS); - compile_rhs(&gr->gr_ll, nf, n, &nf->nf_start, &nf->nf_finish); - n++; - REQ(n, NEWLINE); + nfa *nf; + + REQ(n, RULE); + REQN(n->n_nchildren, 4); + n = n->n_child; + REQ(n, NAME); + nf = addnfa(gr, n->n_str); + n++; + REQ(n, COLON); + n++; + REQ(n, RHS); + compile_rhs(&gr->gr_ll, nf, n, &nf->nf_start, &nf->nf_finish); + n++; + REQ(n, NEWLINE); } static void compile_rhs(labellist *ll, nfa *nf, node *n, int *pa, int *pb) { - int i; - int a, b; - - REQ(n, RHS); - i = n->n_nchildren; - REQN(i, 1); - n = n->n_child; - REQ(n, ALT); - compile_alt(ll, nf, n, pa, pb); - if (--i <= 0) - return; - n++; - a = *pa; - b = *pb; - *pa = addnfastate(nf); - *pb = addnfastate(nf); - addnfaarc(nf, *pa, a, EMPTY); - addnfaarc(nf, b, *pb, EMPTY); - for (; --i >= 0; n++) { - REQ(n, VBAR); - REQN(i, 1); - --i; - n++; - REQ(n, ALT); - compile_alt(ll, nf, n, &a, &b); - addnfaarc(nf, *pa, a, EMPTY); - addnfaarc(nf, b, *pb, EMPTY); - } + int i; + int a, b; + + REQ(n, RHS); + i = n->n_nchildren; + REQN(i, 1); + n = n->n_child; + REQ(n, ALT); + compile_alt(ll, nf, n, pa, pb); + if (--i <= 0) + return; + n++; + a = *pa; + b = *pb; + *pa = addnfastate(nf); + *pb = addnfastate(nf); + addnfaarc(nf, *pa, a, EMPTY); + addnfaarc(nf, b, *pb, EMPTY); + for (; --i >= 0; n++) { + REQ(n, VBAR); + REQN(i, 1); + --i; + n++; + REQ(n, ALT); + compile_alt(ll, nf, n, &a, &b); + addnfaarc(nf, *pa, a, EMPTY); + addnfaarc(nf, b, *pb, EMPTY); + } } static void compile_alt(labellist *ll, nfa *nf, node *n, int *pa, int *pb) { - int i; - int a, b; - - REQ(n, ALT); - i = n->n_nchildren; - REQN(i, 1); - n = n->n_child; - REQ(n, ITEM); - compile_item(ll, nf, n, pa, pb); - --i; - n++; - for (; --i >= 0; n++) { - REQ(n, ITEM); - compile_item(ll, nf, n, &a, &b); - addnfaarc(nf, *pb, a, EMPTY); - *pb = b; - } + int i; + int a, b; + + REQ(n, ALT); + i = n->n_nchildren; + REQN(i, 1); + n = n->n_child; + REQ(n, ITEM); + compile_item(ll, nf, n, pa, pb); + --i; + n++; + for (; --i >= 0; n++) { + REQ(n, ITEM); + compile_item(ll, nf, n, &a, &b); + addnfaarc(nf, *pb, a, EMPTY); + *pb = b; + } } static void compile_item(labellist *ll, nfa *nf, node *n, int *pa, int *pb) { - int i; - int a, b; - - REQ(n, ITEM); - i = n->n_nchildren; - REQN(i, 1); - n = n->n_child; - if (n->n_type == LSQB) { - REQN(i, 3); - n++; - REQ(n, RHS); - *pa = addnfastate(nf); - *pb = addnfastate(nf); - addnfaarc(nf, *pa, *pb, EMPTY); - compile_rhs(ll, nf, n, &a, &b); - addnfaarc(nf, *pa, a, EMPTY); - addnfaarc(nf, b, *pb, EMPTY); - REQN(i, 1); - n++; - REQ(n, RSQB); - } - else { - compile_atom(ll, nf, n, pa, pb); - if (--i <= 0) - return; - n++; - addnfaarc(nf, *pb, *pa, EMPTY); - if (n->n_type == STAR) - *pb = *pa; - else - REQ(n, PLUS); - } + int i; + int a, b; + + REQ(n, ITEM); + i = n->n_nchildren; + REQN(i, 1); + n = n->n_child; + if (n->n_type == LSQB) { + REQN(i, 3); + n++; + REQ(n, RHS); + *pa = addnfastate(nf); + *pb = addnfastate(nf); + addnfaarc(nf, *pa, *pb, EMPTY); + compile_rhs(ll, nf, n, &a, &b); + addnfaarc(nf, *pa, a, EMPTY); + addnfaarc(nf, b, *pb, EMPTY); + REQN(i, 1); + n++; + REQ(n, RSQB); + } + else { + compile_atom(ll, nf, n, pa, pb); + if (--i <= 0) + return; + n++; + addnfaarc(nf, *pb, *pa, EMPTY); + if (n->n_type == STAR) + *pb = *pa; + else + REQ(n, PLUS); + } } static void compile_atom(labellist *ll, nfa *nf, node *n, int *pa, int *pb) { - int i; - - REQ(n, ATOM); - i = n->n_nchildren; - REQN(i, 1); - n = n->n_child; - if (n->n_type == LPAR) { - REQN(i, 3); - n++; - REQ(n, RHS); - compile_rhs(ll, nf, n, pa, pb); - n++; - REQ(n, RPAR); - } - else if (n->n_type == NAME || n->n_type == STRING) { - *pa = addnfastate(nf); - *pb = addnfastate(nf); - addnfaarc(nf, *pa, *pb, addlabel(ll, n->n_type, n->n_str)); - } - else - REQ(n, NAME); + int i; + + REQ(n, ATOM); + i = n->n_nchildren; + REQN(i, 1); + n = n->n_child; + if (n->n_type == LPAR) { + REQN(i, 3); + n++; + REQ(n, RHS); + compile_rhs(ll, nf, n, pa, pb); + n++; + REQ(n, RPAR); + } + else if (n->n_type == NAME || n->n_type == STRING) { + *pa = addnfastate(nf); + *pb = addnfastate(nf); + addnfaarc(nf, *pa, *pb, addlabel(ll, n->n_type, n->n_str)); + } + else + REQ(n, NAME); } static void dumpstate(labellist *ll, nfa *nf, int istate) { - nfastate *st; - int i; - nfaarc *ar; - - printf("%c%2d%c", - istate == nf->nf_start ? '*' : ' ', - istate, - istate == nf->nf_finish ? '.' : ' '); - st = &nf->nf_state[istate]; - ar = st->st_arc; - for (i = 0; i < st->st_narcs; i++) { - if (i > 0) - printf("\n "); - printf("-> %2d %s", ar->ar_arrow, - PyGrammar_LabelRepr(&ll->ll_label[ar->ar_label])); - ar++; - } - printf("\n"); + nfastate *st; + int i; + nfaarc *ar; + + printf("%c%2d%c", + istate == nf->nf_start ? '*' : ' ', + istate, + istate == nf->nf_finish ? '.' : ' '); + st = &nf->nf_state[istate]; + ar = st->st_arc; + for (i = 0; i < st->st_narcs; i++) { + if (i > 0) + printf("\n "); + printf("-> %2d %s", ar->ar_arrow, + PyGrammar_LabelRepr(&ll->ll_label[ar->ar_label])); + ar++; + } + printf("\n"); } static void dumpnfa(labellist *ll, nfa *nf) { - int i; - - printf("NFA '%s' has %d states; start %d, finish %d\n", - nf->nf_name, nf->nf_nstates, nf->nf_start, nf->nf_finish); - for (i = 0; i < nf->nf_nstates; i++) - dumpstate(ll, nf, i); + int i; + + printf("NFA '%s' has %d states; start %d, finish %d\n", + nf->nf_name, nf->nf_nstates, nf->nf_start, nf->nf_finish); + for (i = 0; i < nf->nf_nstates; i++) + dumpstate(ll, nf, i); } @@ -342,184 +342,184 @@ dumpnfa(labellist *ll, nfa *nf) static void addclosure(bitset ss, nfa *nf, int istate) { - if (addbit(ss, istate)) { - nfastate *st = &nf->nf_state[istate]; - nfaarc *ar = st->st_arc; - int i; - - for (i = st->st_narcs; --i >= 0; ) { - if (ar->ar_label == EMPTY) - addclosure(ss, nf, ar->ar_arrow); - ar++; - } - } + if (addbit(ss, istate)) { + nfastate *st = &nf->nf_state[istate]; + nfaarc *ar = st->st_arc; + int i; + + for (i = st->st_narcs; --i >= 0; ) { + if (ar->ar_label == EMPTY) + addclosure(ss, nf, ar->ar_arrow); + ar++; + } + } } typedef struct _ss_arc { - bitset sa_bitset; - int sa_arrow; - int sa_label; + bitset sa_bitset; + int sa_arrow; + int sa_label; } ss_arc; typedef struct _ss_state { - bitset ss_ss; - int ss_narcs; - struct _ss_arc *ss_arc; - int ss_deleted; - int ss_finish; - int ss_rename; + bitset ss_ss; + int ss_narcs; + struct _ss_arc *ss_arc; + int ss_deleted; + int ss_finish; + int ss_rename; } ss_state; typedef struct _ss_dfa { - int sd_nstates; - ss_state *sd_state; + int sd_nstates; + ss_state *sd_state; } ss_dfa; /* Forward */ static void printssdfa(int xx_nstates, ss_state *xx_state, int nbits, - labellist *ll, char *msg); + labellist *ll, char *msg); static void simplify(int xx_nstates, ss_state *xx_state); static void convert(dfa *d, int xx_nstates, ss_state *xx_state); static void makedfa(nfagrammar *gr, nfa *nf, dfa *d) { - int nbits = nf->nf_nstates; - bitset ss; - int xx_nstates; - ss_state *xx_state, *yy; - ss_arc *zz; - int istate, jstate, iarc, jarc, ibit; - nfastate *st; - nfaarc *ar; - - ss = newbitset(nbits); - addclosure(ss, nf, nf->nf_start); - xx_state = (ss_state *)PyObject_MALLOC(sizeof(ss_state)); - if (xx_state == NULL) - Py_FatalError("no mem for xx_state in makedfa"); - xx_nstates = 1; - yy = &xx_state[0]; - yy->ss_ss = ss; - yy->ss_narcs = 0; - yy->ss_arc = NULL; - yy->ss_deleted = 0; - yy->ss_finish = testbit(ss, nf->nf_finish); - if (yy->ss_finish) - printf("Error: nonterminal '%s' may produce empty.\n", - nf->nf_name); - - /* This algorithm is from a book written before - the invention of structured programming... */ - - /* For each unmarked state... */ - for (istate = 0; istate < xx_nstates; ++istate) { - size_t size; - yy = &xx_state[istate]; - ss = yy->ss_ss; - /* For all its states... */ - for (ibit = 0; ibit < nf->nf_nstates; ++ibit) { - if (!testbit(ss, ibit)) - continue; - st = &nf->nf_state[ibit]; - /* For all non-empty arcs from this state... */ - for (iarc = 0; iarc < st->st_narcs; iarc++) { - ar = &st->st_arc[iarc]; - if (ar->ar_label == EMPTY) - continue; - /* Look up in list of arcs from this state */ - for (jarc = 0; jarc < yy->ss_narcs; ++jarc) { - zz = &yy->ss_arc[jarc]; - if (ar->ar_label == zz->sa_label) - goto found; - } - /* Add new arc for this state */ - size = sizeof(ss_arc) * (yy->ss_narcs + 1); - yy->ss_arc = (ss_arc *)PyObject_REALLOC( - yy->ss_arc, size); - if (yy->ss_arc == NULL) - Py_FatalError("out of mem"); - zz = &yy->ss_arc[yy->ss_narcs++]; - zz->sa_label = ar->ar_label; - zz->sa_bitset = newbitset(nbits); - zz->sa_arrow = -1; - found: ; - /* Add destination */ - addclosure(zz->sa_bitset, nf, ar->ar_arrow); - } - } - /* Now look up all the arrow states */ - for (jarc = 0; jarc < xx_state[istate].ss_narcs; jarc++) { - zz = &xx_state[istate].ss_arc[jarc]; - for (jstate = 0; jstate < xx_nstates; jstate++) { - if (samebitset(zz->sa_bitset, - xx_state[jstate].ss_ss, nbits)) { - zz->sa_arrow = jstate; - goto done; - } - } - size = sizeof(ss_state) * (xx_nstates + 1); - xx_state = (ss_state *)PyObject_REALLOC(xx_state, - size); - if (xx_state == NULL) - Py_FatalError("out of mem"); - zz->sa_arrow = xx_nstates; - yy = &xx_state[xx_nstates++]; - yy->ss_ss = zz->sa_bitset; - yy->ss_narcs = 0; - yy->ss_arc = NULL; - yy->ss_deleted = 0; - yy->ss_finish = testbit(yy->ss_ss, nf->nf_finish); - done: ; - } - } - - if (Py_DebugFlag) - printssdfa(xx_nstates, xx_state, nbits, &gr->gr_ll, - "before minimizing"); - - simplify(xx_nstates, xx_state); - - if (Py_DebugFlag) - printssdfa(xx_nstates, xx_state, nbits, &gr->gr_ll, - "after minimizing"); - - convert(d, xx_nstates, xx_state); - - /* XXX cleanup */ - PyObject_FREE(xx_state); + int nbits = nf->nf_nstates; + bitset ss; + int xx_nstates; + ss_state *xx_state, *yy; + ss_arc *zz; + int istate, jstate, iarc, jarc, ibit; + nfastate *st; + nfaarc *ar; + + ss = newbitset(nbits); + addclosure(ss, nf, nf->nf_start); + xx_state = (ss_state *)PyObject_MALLOC(sizeof(ss_state)); + if (xx_state == NULL) + Py_FatalError("no mem for xx_state in makedfa"); + xx_nstates = 1; + yy = &xx_state[0]; + yy->ss_ss = ss; + yy->ss_narcs = 0; + yy->ss_arc = NULL; + yy->ss_deleted = 0; + yy->ss_finish = testbit(ss, nf->nf_finish); + if (yy->ss_finish) + printf("Error: nonterminal '%s' may produce empty.\n", + nf->nf_name); + + /* This algorithm is from a book written before + the invention of structured programming... */ + + /* For each unmarked state... */ + for (istate = 0; istate < xx_nstates; ++istate) { + size_t size; + yy = &xx_state[istate]; + ss = yy->ss_ss; + /* For all its states... */ + for (ibit = 0; ibit < nf->nf_nstates; ++ibit) { + if (!testbit(ss, ibit)) + continue; + st = &nf->nf_state[ibit]; + /* For all non-empty arcs from this state... */ + for (iarc = 0; iarc < st->st_narcs; iarc++) { + ar = &st->st_arc[iarc]; + if (ar->ar_label == EMPTY) + continue; + /* Look up in list of arcs from this state */ + for (jarc = 0; jarc < yy->ss_narcs; ++jarc) { + zz = &yy->ss_arc[jarc]; + if (ar->ar_label == zz->sa_label) + goto found; + } + /* Add new arc for this state */ + size = sizeof(ss_arc) * (yy->ss_narcs + 1); + yy->ss_arc = (ss_arc *)PyObject_REALLOC( + yy->ss_arc, size); + if (yy->ss_arc == NULL) + Py_FatalError("out of mem"); + zz = &yy->ss_arc[yy->ss_narcs++]; + zz->sa_label = ar->ar_label; + zz->sa_bitset = newbitset(nbits); + zz->sa_arrow = -1; + found: ; + /* Add destination */ + addclosure(zz->sa_bitset, nf, ar->ar_arrow); + } + } + /* Now look up all the arrow states */ + for (jarc = 0; jarc < xx_state[istate].ss_narcs; jarc++) { + zz = &xx_state[istate].ss_arc[jarc]; + for (jstate = 0; jstate < xx_nstates; jstate++) { + if (samebitset(zz->sa_bitset, + xx_state[jstate].ss_ss, nbits)) { + zz->sa_arrow = jstate; + goto done; + } + } + size = sizeof(ss_state) * (xx_nstates + 1); + xx_state = (ss_state *)PyObject_REALLOC(xx_state, + size); + if (xx_state == NULL) + Py_FatalError("out of mem"); + zz->sa_arrow = xx_nstates; + yy = &xx_state[xx_nstates++]; + yy->ss_ss = zz->sa_bitset; + yy->ss_narcs = 0; + yy->ss_arc = NULL; + yy->ss_deleted = 0; + yy->ss_finish = testbit(yy->ss_ss, nf->nf_finish); + done: ; + } + } + + if (Py_DebugFlag) + printssdfa(xx_nstates, xx_state, nbits, &gr->gr_ll, + "before minimizing"); + + simplify(xx_nstates, xx_state); + + if (Py_DebugFlag) + printssdfa(xx_nstates, xx_state, nbits, &gr->gr_ll, + "after minimizing"); + + convert(d, xx_nstates, xx_state); + + /* XXX cleanup */ + PyObject_FREE(xx_state); } static void printssdfa(int xx_nstates, ss_state *xx_state, int nbits, - labellist *ll, char *msg) + labellist *ll, char *msg) { - int i, ibit, iarc; - ss_state *yy; - ss_arc *zz; - - printf("Subset DFA %s\n", msg); - for (i = 0; i < xx_nstates; i++) { - yy = &xx_state[i]; - if (yy->ss_deleted) - continue; - printf(" Subset %d", i); - if (yy->ss_finish) - printf(" (finish)"); - printf(" { "); - for (ibit = 0; ibit < nbits; ibit++) { - if (testbit(yy->ss_ss, ibit)) - printf("%d ", ibit); - } - printf("}\n"); - for (iarc = 0; iarc < yy->ss_narcs; iarc++) { - zz = &yy->ss_arc[iarc]; - printf(" Arc to state %d, label %s\n", - zz->sa_arrow, - PyGrammar_LabelRepr( - &ll->ll_label[zz->sa_label])); - } - } + int i, ibit, iarc; + ss_state *yy; + ss_arc *zz; + + printf("Subset DFA %s\n", msg); + for (i = 0; i < xx_nstates; i++) { + yy = &xx_state[i]; + if (yy->ss_deleted) + continue; + printf(" Subset %d", i); + if (yy->ss_finish) + printf(" (finish)"); + printf(" { "); + for (ibit = 0; ibit < nbits; ibit++) { + if (testbit(yy->ss_ss, ibit)) + printf("%d ", ibit); + } + printf("}\n"); + for (iarc = 0; iarc < yy->ss_narcs; iarc++) { + zz = &yy->ss_arc[iarc]; + printf(" Arc to state %d, label %s\n", + zz->sa_arrow, + PyGrammar_LabelRepr( + &ll->ll_label[zz->sa_label])); + } + } } @@ -535,59 +535,59 @@ printssdfa(int xx_nstates, ss_state *xx_state, int nbits, static int samestate(ss_state *s1, ss_state *s2) { - int i; - - if (s1->ss_narcs != s2->ss_narcs || s1->ss_finish != s2->ss_finish) - return 0; - for (i = 0; i < s1->ss_narcs; i++) { - if (s1->ss_arc[i].sa_arrow != s2->ss_arc[i].sa_arrow || - s1->ss_arc[i].sa_label != s2->ss_arc[i].sa_label) - return 0; - } - return 1; + int i; + + if (s1->ss_narcs != s2->ss_narcs || s1->ss_finish != s2->ss_finish) + return 0; + for (i = 0; i < s1->ss_narcs; i++) { + if (s1->ss_arc[i].sa_arrow != s2->ss_arc[i].sa_arrow || + s1->ss_arc[i].sa_label != s2->ss_arc[i].sa_label) + return 0; + } + return 1; } static void renamestates(int xx_nstates, ss_state *xx_state, int from, int to) { - int i, j; - - if (Py_DebugFlag) - printf("Rename state %d to %d.\n", from, to); - for (i = 0; i < xx_nstates; i++) { - if (xx_state[i].ss_deleted) - continue; - for (j = 0; j < xx_state[i].ss_narcs; j++) { - if (xx_state[i].ss_arc[j].sa_arrow == from) - xx_state[i].ss_arc[j].sa_arrow = to; - } - } + int i, j; + + if (Py_DebugFlag) + printf("Rename state %d to %d.\n", from, to); + for (i = 0; i < xx_nstates; i++) { + if (xx_state[i].ss_deleted) + continue; + for (j = 0; j < xx_state[i].ss_narcs; j++) { + if (xx_state[i].ss_arc[j].sa_arrow == from) + xx_state[i].ss_arc[j].sa_arrow = to; + } + } } static void simplify(int xx_nstates, ss_state *xx_state) { - int changes; - int i, j; - - do { - changes = 0; - for (i = 1; i < xx_nstates; i++) { - if (xx_state[i].ss_deleted) - continue; - for (j = 0; j < i; j++) { - if (xx_state[j].ss_deleted) - continue; - if (samestate(&xx_state[i], &xx_state[j])) { - xx_state[i].ss_deleted++; - renamestates(xx_nstates, xx_state, - i, j); - changes++; - break; - } - } - } - } while (changes); + int changes; + int i, j; + + do { + changes = 0; + for (i = 1; i < xx_nstates; i++) { + if (xx_state[i].ss_deleted) + continue; + for (j = 0; j < i; j++) { + if (xx_state[j].ss_deleted) + continue; + if (samestate(&xx_state[i], &xx_state[j])) { + xx_state[i].ss_deleted++; + renamestates(xx_nstates, xx_state, + i, j); + changes++; + break; + } + } + } + } while (changes); } @@ -598,32 +598,32 @@ simplify(int xx_nstates, ss_state *xx_state) static void convert(dfa *d, int xx_nstates, ss_state *xx_state) { - int i, j; - ss_state *yy; - ss_arc *zz; - - for (i = 0; i < xx_nstates; i++) { - yy = &xx_state[i]; - if (yy->ss_deleted) - continue; - yy->ss_rename = addstate(d); - } - - for (i = 0; i < xx_nstates; i++) { - yy = &xx_state[i]; - if (yy->ss_deleted) - continue; - for (j = 0; j < yy->ss_narcs; j++) { - zz = &yy->ss_arc[j]; - addarc(d, yy->ss_rename, - xx_state[zz->sa_arrow].ss_rename, - zz->sa_label); - } - if (yy->ss_finish) - addarc(d, yy->ss_rename, yy->ss_rename, 0); - } - - d->d_initial = 0; + int i, j; + ss_state *yy; + ss_arc *zz; + + for (i = 0; i < xx_nstates; i++) { + yy = &xx_state[i]; + if (yy->ss_deleted) + continue; + yy->ss_rename = addstate(d); + } + + for (i = 0; i < xx_nstates; i++) { + yy = &xx_state[i]; + if (yy->ss_deleted) + continue; + for (j = 0; j < yy->ss_narcs; j++) { + zz = &yy->ss_arc[j]; + addarc(d, yy->ss_rename, + xx_state[zz->sa_arrow].ss_rename, + zz->sa_label); + } + if (yy->ss_finish) + addarc(d, yy->ss_rename, yy->ss_rename, 0); + } + + d->d_initial = 0; } @@ -632,43 +632,43 @@ convert(dfa *d, int xx_nstates, ss_state *xx_state) static grammar * maketables(nfagrammar *gr) { - int i; - nfa *nf; - dfa *d; - grammar *g; - - if (gr->gr_nnfas == 0) - return NULL; - g = newgrammar(gr->gr_nfa[0]->nf_type); - /* XXX first rule must be start rule */ - g->g_ll = gr->gr_ll; - - for (i = 0; i < gr->gr_nnfas; i++) { - nf = gr->gr_nfa[i]; - if (Py_DebugFlag) { - printf("Dump of NFA for '%s' ...\n", nf->nf_name); - dumpnfa(&gr->gr_ll, nf); - printf("Making DFA for '%s' ...\n", nf->nf_name); - } - d = adddfa(g, nf->nf_type, nf->nf_name); - makedfa(gr, gr->gr_nfa[i], d); - } - - return g; + int i; + nfa *nf; + dfa *d; + grammar *g; + + if (gr->gr_nnfas == 0) + return NULL; + g = newgrammar(gr->gr_nfa[0]->nf_type); + /* XXX first rule must be start rule */ + g->g_ll = gr->gr_ll; + + for (i = 0; i < gr->gr_nnfas; i++) { + nf = gr->gr_nfa[i]; + if (Py_DebugFlag) { + printf("Dump of NFA for '%s' ...\n", nf->nf_name); + dumpnfa(&gr->gr_ll, nf); + printf("Making DFA for '%s' ...\n", nf->nf_name); + } + d = adddfa(g, nf->nf_type, nf->nf_name); + makedfa(gr, gr->gr_nfa[i], d); + } + + return g; } grammar * pgen(node *n) { - nfagrammar *gr; - grammar *g; - - gr = metacompile(n); - g = maketables(gr); - translatelabels(g); - addfirstsets(g); - PyObject_FREE(gr); - return g; + nfagrammar *gr; + grammar *g; + + gr = metacompile(n); + g = maketables(gr); + translatelabels(g); + addfirstsets(g); + PyObject_FREE(gr); + return g; } grammar * @@ -702,7 +702,7 @@ Reference --------- [Aho&Ullman 77] - Aho&Ullman, Principles of Compiler Design, Addison-Wesley 1977 - (first edition) + Aho&Ullman, Principles of Compiler Design, Addison-Wesley 1977 + (first edition) */ diff --git a/Parser/pgenmain.c b/Parser/pgenmain.c index fc27a2c..88fa7f1 100644 --- a/Parser/pgenmain.c +++ b/Parser/pgenmain.c @@ -30,104 +30,104 @@ grammar *getgrammar(char *filename); void Py_Exit(int sts) { - exit(sts); + exit(sts); } int main(int argc, char **argv) { - grammar *g; - FILE *fp; - char *filename, *graminit_h, *graminit_c; - - if (argc != 4) { - fprintf(stderr, - "usage: %s grammar graminit.h graminit.c\n", argv[0]); - Py_Exit(2); - } - filename = argv[1]; - graminit_h = argv[2]; - graminit_c = argv[3]; - g = getgrammar(filename); - fp = fopen(graminit_c, "w"); - if (fp == NULL) { - perror(graminit_c); - Py_Exit(1); - } - if (Py_DebugFlag) - printf("Writing %s ...\n", graminit_c); - printgrammar(g, fp); - fclose(fp); - fp = fopen(graminit_h, "w"); - if (fp == NULL) { - perror(graminit_h); - Py_Exit(1); - } - if (Py_DebugFlag) - printf("Writing %s ...\n", graminit_h); - printnonterminals(g, fp); - fclose(fp); - Py_Exit(0); - return 0; /* Make gcc -Wall happy */ + grammar *g; + FILE *fp; + char *filename, *graminit_h, *graminit_c; + + if (argc != 4) { + fprintf(stderr, + "usage: %s grammar graminit.h graminit.c\n", argv[0]); + Py_Exit(2); + } + filename = argv[1]; + graminit_h = argv[2]; + graminit_c = argv[3]; + g = getgrammar(filename); + fp = fopen(graminit_c, "w"); + if (fp == NULL) { + perror(graminit_c); + Py_Exit(1); + } + if (Py_DebugFlag) + printf("Writing %s ...\n", graminit_c); + printgrammar(g, fp); + fclose(fp); + fp = fopen(graminit_h, "w"); + if (fp == NULL) { + perror(graminit_h); + Py_Exit(1); + } + if (Py_DebugFlag) + printf("Writing %s ...\n", graminit_h); + printnonterminals(g, fp); + fclose(fp); + Py_Exit(0); + return 0; /* Make gcc -Wall happy */ } grammar * getgrammar(char *filename) { - FILE *fp; - node *n; - grammar *g0, *g; - perrdetail err; - - fp = fopen(filename, "r"); - if (fp == NULL) { - perror(filename); - Py_Exit(1); - } - g0 = meta_grammar(); - n = PyParser_ParseFile(fp, filename, g0, g0->g_start, - (char *)NULL, (char *)NULL, &err); - fclose(fp); - if (n == NULL) { - fprintf(stderr, "Parsing error %d, line %d.\n", - err.error, err.lineno); - if (err.text != NULL) { - size_t i; - fprintf(stderr, "%s", err.text); - i = strlen(err.text); - if (i == 0 || err.text[i-1] != '\n') - fprintf(stderr, "\n"); - for (i = 0; i < err.offset; i++) { - if (err.text[i] == '\t') - putc('\t', stderr); - else - putc(' ', stderr); - } - fprintf(stderr, "^\n"); - PyObject_FREE(err.text); - } - Py_Exit(1); - } - g = pgen(n); - if (g == NULL) { - printf("Bad grammar.\n"); - Py_Exit(1); - } - return g; + FILE *fp; + node *n; + grammar *g0, *g; + perrdetail err; + + fp = fopen(filename, "r"); + if (fp == NULL) { + perror(filename); + Py_Exit(1); + } + g0 = meta_grammar(); + n = PyParser_ParseFile(fp, filename, g0, g0->g_start, + (char *)NULL, (char *)NULL, &err); + fclose(fp); + if (n == NULL) { + fprintf(stderr, "Parsing error %d, line %d.\n", + err.error, err.lineno); + if (err.text != NULL) { + size_t i; + fprintf(stderr, "%s", err.text); + i = strlen(err.text); + if (i == 0 || err.text[i-1] != '\n') + fprintf(stderr, "\n"); + for (i = 0; i < err.offset; i++) { + if (err.text[i] == '\t') + putc('\t', stderr); + else + putc(' ', stderr); + } + fprintf(stderr, "^\n"); + PyObject_FREE(err.text); + } + Py_Exit(1); + } + g = pgen(n); + if (g == NULL) { + printf("Bad grammar.\n"); + Py_Exit(1); + } + return g; } /* Can't happen in pgen */ PyObject* PyErr_Occurred() { - return 0; + return 0; } void Py_FatalError(const char *msg) { - fprintf(stderr, "pgen: FATAL ERROR: %s\n", msg); - Py_Exit(1); + fprintf(stderr, "pgen: FATAL ERROR: %s\n", msg); + Py_Exit(1); } /* No-nonsense my_readline() for tokenizer.c */ @@ -135,28 +135,28 @@ Py_FatalError(const char *msg) char * PyOS_Readline(FILE *sys_stdin, FILE *sys_stdout, char *prompt) { - size_t n = 1000; - char *p = (char *)PyMem_MALLOC(n); - char *q; - if (p == NULL) - return NULL; - fprintf(stderr, "%s", prompt); - q = fgets(p, n, sys_stdin); - if (q == NULL) { - *p = '\0'; - return p; - } - n = strlen(p); - if (n > 0 && p[n-1] != '\n') - p[n-1] = '\n'; - return (char *)PyMem_REALLOC(p, n+1); + size_t n = 1000; + char *p = (char *)PyMem_MALLOC(n); + char *q; + if (p == NULL) + return NULL; + fprintf(stderr, "%s", prompt); + q = fgets(p, n, sys_stdin); + if (q == NULL) { + *p = '\0'; + return p; + } + n = strlen(p); + if (n > 0 && p[n-1] != '\n') + p[n-1] = '\n'; + return (char *)PyMem_REALLOC(p, n+1); } /* No-nonsense fgets */ char * Py_UniversalNewlineFgets(char *buf, int n, FILE *stream, PyObject *fobj) { - return fgets(buf, n, stream); + return fgets(buf, n, stream); } @@ -165,9 +165,9 @@ Py_UniversalNewlineFgets(char *buf, int n, FILE *stream, PyObject *fobj) void PySys_WriteStderr(const char *format, ...) { - va_list va; + va_list va; - va_start(va, format); - vfprintf(stderr, format, va); - va_end(va); + va_start(va, format); + vfprintf(stderr, format, va); + va_end(va); } diff --git a/Parser/printgrammar.c b/Parser/printgrammar.c index ae180e1..3b54a05 100644 --- a/Parser/printgrammar.c +++ b/Parser/printgrammar.c @@ -13,105 +13,105 @@ static void printlabels(grammar *, FILE *); void printgrammar(grammar *g, FILE *fp) { - fprintf(fp, "/* Generated by Parser/pgen */\n\n"); - fprintf(fp, "#include \"pgenheaders.h\"\n"); - fprintf(fp, "#include \"grammar.h\"\n"); - fprintf(fp, "PyAPI_DATA(grammar) _PyParser_Grammar;\n"); - printdfas(g, fp); - printlabels(g, fp); - fprintf(fp, "grammar _PyParser_Grammar = {\n"); - fprintf(fp, "\t%d,\n", g->g_ndfas); - fprintf(fp, "\tdfas,\n"); - fprintf(fp, "\t{%d, labels},\n", g->g_ll.ll_nlabels); - fprintf(fp, "\t%d\n", g->g_start); - fprintf(fp, "};\n"); + fprintf(fp, "/* Generated by Parser/pgen */\n\n"); + fprintf(fp, "#include \"pgenheaders.h\"\n"); + fprintf(fp, "#include \"grammar.h\"\n"); + fprintf(fp, "PyAPI_DATA(grammar) _PyParser_Grammar;\n"); + printdfas(g, fp); + printlabels(g, fp); + fprintf(fp, "grammar _PyParser_Grammar = {\n"); + fprintf(fp, "\t%d,\n", g->g_ndfas); + fprintf(fp, "\tdfas,\n"); + fprintf(fp, "\t{%d, labels},\n", g->g_ll.ll_nlabels); + fprintf(fp, "\t%d\n", g->g_start); + fprintf(fp, "};\n"); } void printnonterminals(grammar *g, FILE *fp) { - dfa *d; - int i; - - fprintf(fp, "/* Generated by Parser/pgen */\n\n"); - - d = g->g_dfa; - for (i = g->g_ndfas; --i >= 0; d++) - fprintf(fp, "#define %s %d\n", d->d_name, d->d_type); + dfa *d; + int i; + + fprintf(fp, "/* Generated by Parser/pgen */\n\n"); + + d = g->g_dfa; + for (i = g->g_ndfas; --i >= 0; d++) + fprintf(fp, "#define %s %d\n", d->d_name, d->d_type); } static void printarcs(int i, dfa *d, FILE *fp) { - arc *a; - state *s; - int j, k; - - s = d->d_state; - for (j = 0; j < d->d_nstates; j++, s++) { - fprintf(fp, "static arc arcs_%d_%d[%d] = {\n", - i, j, s->s_narcs); - a = s->s_arc; - for (k = 0; k < s->s_narcs; k++, a++) - fprintf(fp, "\t{%d, %d},\n", a->a_lbl, a->a_arrow); - fprintf(fp, "};\n"); - } + arc *a; + state *s; + int j, k; + + s = d->d_state; + for (j = 0; j < d->d_nstates; j++, s++) { + fprintf(fp, "static arc arcs_%d_%d[%d] = {\n", + i, j, s->s_narcs); + a = s->s_arc; + for (k = 0; k < s->s_narcs; k++, a++) + fprintf(fp, "\t{%d, %d},\n", a->a_lbl, a->a_arrow); + fprintf(fp, "};\n"); + } } static void printstates(grammar *g, FILE *fp) { - state *s; - dfa *d; - int i, j; - - d = g->g_dfa; - for (i = 0; i < g->g_ndfas; i++, d++) { - printarcs(i, d, fp); - fprintf(fp, "static state states_%d[%d] = {\n", - i, d->d_nstates); - s = d->d_state; - for (j = 0; j < d->d_nstates; j++, s++) - fprintf(fp, "\t{%d, arcs_%d_%d},\n", - s->s_narcs, i, j); - fprintf(fp, "};\n"); - } + state *s; + dfa *d; + int i, j; + + d = g->g_dfa; + for (i = 0; i < g->g_ndfas; i++, d++) { + printarcs(i, d, fp); + fprintf(fp, "static state states_%d[%d] = {\n", + i, d->d_nstates); + s = d->d_state; + for (j = 0; j < d->d_nstates; j++, s++) + fprintf(fp, "\t{%d, arcs_%d_%d},\n", + s->s_narcs, i, j); + fprintf(fp, "};\n"); + } } static void printdfas(grammar *g, FILE *fp) { - dfa *d; - int i, j; - - printstates(g, fp); - fprintf(fp, "static dfa dfas[%d] = {\n", g->g_ndfas); - d = g->g_dfa; - for (i = 0; i < g->g_ndfas; i++, d++) { - fprintf(fp, "\t{%d, \"%s\", %d, %d, states_%d,\n", - d->d_type, d->d_name, d->d_initial, d->d_nstates, i); - fprintf(fp, "\t \""); - for (j = 0; j < NBYTES(g->g_ll.ll_nlabels); j++) - fprintf(fp, "\\%03o", d->d_first[j] & 0xff); - fprintf(fp, "\"},\n"); - } - fprintf(fp, "};\n"); + dfa *d; + int i, j; + + printstates(g, fp); + fprintf(fp, "static dfa dfas[%d] = {\n", g->g_ndfas); + d = g->g_dfa; + for (i = 0; i < g->g_ndfas; i++, d++) { + fprintf(fp, "\t{%d, \"%s\", %d, %d, states_%d,\n", + d->d_type, d->d_name, d->d_initial, d->d_nstates, i); + fprintf(fp, "\t \""); + for (j = 0; j < NBYTES(g->g_ll.ll_nlabels); j++) + fprintf(fp, "\\%03o", d->d_first[j] & 0xff); + fprintf(fp, "\"},\n"); + } + fprintf(fp, "};\n"); } static void printlabels(grammar *g, FILE *fp) { - label *l; - int i; - - fprintf(fp, "static label labels[%d] = {\n", g->g_ll.ll_nlabels); - l = g->g_ll.ll_label; - for (i = g->g_ll.ll_nlabels; --i >= 0; l++) { - if (l->lb_str == NULL) - fprintf(fp, "\t{%d, 0},\n", l->lb_type); - else - fprintf(fp, "\t{%d, \"%s\"},\n", - l->lb_type, l->lb_str); - } - fprintf(fp, "};\n"); + label *l; + int i; + + fprintf(fp, "static label labels[%d] = {\n", g->g_ll.ll_nlabels); + l = g->g_ll.ll_label; + for (i = g->g_ll.ll_nlabels; --i >= 0; l++) { + if (l->lb_str == NULL) + fprintf(fp, "\t{%d, 0},\n", l->lb_type); + else + fprintf(fp, "\t{%d, \"%s\"},\n", + l->lb_type, l->lb_str); + } + fprintf(fp, "};\n"); } diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index af99def..d985131 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -19,17 +19,17 @@ #endif /* PGEN */ #define is_potential_identifier_start(c) (\ - (c >= 'a' && c <= 'z')\ - || (c >= 'A' && c <= 'Z')\ - || c == '_'\ - || (c >= 128)) + (c >= 'a' && c <= 'z')\ + || (c >= 'A' && c <= 'Z')\ + || c == '_'\ + || (c >= 128)) #define is_potential_identifier_char(c) (\ - (c >= 'a' && c <= 'z')\ - || (c >= 'A' && c <= 'Z')\ - || (c >= '0' && c <= '9')\ - || c == '_'\ - || (c >= 128)) + (c >= 'a' && c <= 'z')\ + || (c >= 'A' && c <= 'Z')\ + || (c >= '0' && c <= '9')\ + || c == '_'\ + || (c >= 128)) extern char *PyOS_Readline(FILE *, FILE *, char *); /* Return malloc'ed string including trailing \n; @@ -48,62 +48,62 @@ static void tok_backup(struct tok_state *tok, int c); /* Token names */ char *_PyParser_TokenNames[] = { - "ENDMARKER", - "NAME", - "NUMBER", - "STRING", - "NEWLINE", - "INDENT", - "DEDENT", - "LPAR", - "RPAR", - "LSQB", - "RSQB", - "COLON", - "COMMA", - "SEMI", - "PLUS", - "MINUS", - "STAR", - "SLASH", - "VBAR", - "AMPER", - "LESS", - "GREATER", - "EQUAL", - "DOT", - "PERCENT", - "LBRACE", - "RBRACE", - "EQEQUAL", - "NOTEQUAL", - "LESSEQUAL", - "GREATEREQUAL", - "TILDE", - "CIRCUMFLEX", - "LEFTSHIFT", - "RIGHTSHIFT", - "DOUBLESTAR", - "PLUSEQUAL", - "MINEQUAL", - "STAREQUAL", - "SLASHEQUAL", - "PERCENTEQUAL", - "AMPEREQUAL", - "VBAREQUAL", - "CIRCUMFLEXEQUAL", - "LEFTSHIFTEQUAL", - "RIGHTSHIFTEQUAL", - "DOUBLESTAREQUAL", - "DOUBLESLASH", - "DOUBLESLASHEQUAL", - "AT", - "RARROW", - "ELLIPSIS", - /* This table must match the #defines in token.h! */ - "OP", - "<ERRORTOKEN>", - "<N_TOKENS>" + "ENDMARKER", + "NAME", + "NUMBER", + "STRING", + "NEWLINE", + "INDENT", + "DEDENT", + "LPAR", + "RPAR", + "LSQB", + "RSQB", + "COLON", + "COMMA", + "SEMI", + "PLUS", + "MINUS", + "STAR", + "SLASH", + "VBAR", + "AMPER", + "LESS", + "GREATER", + "EQUAL", + "DOT", + "PERCENT", + "LBRACE", + "RBRACE", + "EQEQUAL", + "NOTEQUAL", + "LESSEQUAL", + "GREATEREQUAL", + "TILDE", + "CIRCUMFLEX", + "LEFTSHIFT", + "RIGHTSHIFT", + "DOUBLESTAR", + "PLUSEQUAL", + "MINEQUAL", + "STAREQUAL", + "SLASHEQUAL", + "PERCENTEQUAL", + "AMPEREQUAL", + "VBAREQUAL", + "CIRCUMFLEXEQUAL", + "LEFTSHIFTEQUAL", + "RIGHTSHIFTEQUAL", + "DOUBLESTAREQUAL", + "DOUBLESLASH", + "DOUBLESLASHEQUAL", + "AT", + "RARROW", + "ELLIPSIS", + /* This table must match the #defines in token.h! */ + "OP", + "<ERRORTOKEN>", + "<N_TOKENS>" }; @@ -112,37 +112,37 @@ char *_PyParser_TokenNames[] = { static struct tok_state * tok_new(void) { - struct tok_state *tok = (struct tok_state *)PyMem_MALLOC( - sizeof(struct tok_state)); - if (tok == NULL) - return NULL; - tok->buf = tok->cur = tok->end = tok->inp = tok->start = NULL; - tok->done = E_OK; - tok->fp = NULL; - tok->tabsize = TABSIZE; - tok->indent = 0; - tok->indstack[0] = 0; - tok->atbol = 1; - tok->pendin = 0; - tok->prompt = tok->nextprompt = NULL; - tok->lineno = 0; - tok->level = 0; - tok->filename = NULL; - tok->altwarning = 1; - tok->alterror = 1; - tok->alttabsize = 1; - tok->altindstack[0] = 0; - tok->decoding_state = STATE_INIT; - tok->decoding_erred = 0; - tok->read_coding_spec = 0; - tok->enc = NULL; - tok->encoding = NULL; - tok->cont_line = 0; + struct tok_state *tok = (struct tok_state *)PyMem_MALLOC( + sizeof(struct tok_state)); + if (tok == NULL) + return NULL; + tok->buf = tok->cur = tok->end = tok->inp = tok->start = NULL; + tok->done = E_OK; + tok->fp = NULL; + tok->tabsize = TABSIZE; + tok->indent = 0; + tok->indstack[0] = 0; + tok->atbol = 1; + tok->pendin = 0; + tok->prompt = tok->nextprompt = NULL; + tok->lineno = 0; + tok->level = 0; + tok->filename = NULL; + tok->altwarning = 1; + tok->alterror = 1; + tok->alttabsize = 1; + tok->altindstack[0] = 0; + tok->decoding_state = STATE_INIT; + tok->decoding_erred = 0; + tok->read_coding_spec = 0; + tok->enc = NULL; + tok->encoding = NULL; + tok->cont_line = 0; #ifndef PGEN - tok->decoding_readline = NULL; - tok->decoding_buffer = NULL; + tok->decoding_readline = NULL; + tok->decoding_buffer = NULL; #endif - return tok; + return tok; } #ifdef PGEN @@ -150,19 +150,19 @@ tok_new(void) static char * decoding_fgets(char *s, int size, struct tok_state *tok) { - return fgets(s, size, tok->fp); + return fgets(s, size, tok->fp); } static int decoding_feof(struct tok_state *tok) { - return feof(tok->fp); + return feof(tok->fp); } static const char * decode_str(const char *str, struct tok_state *tok) { - return str; + return str; } #else /* PGEN */ @@ -170,51 +170,51 @@ decode_str(const char *str, struct tok_state *tok) static char * error_ret(struct tok_state *tok) /* XXX */ { - tok->decoding_erred = 1; - if (tok->fp != NULL && tok->buf != NULL) /* see PyTokenizer_Free */ - PyMem_FREE(tok->buf); - tok->buf = NULL; - return NULL; /* as if it were EOF */ + tok->decoding_erred = 1; + if (tok->fp != NULL && tok->buf != NULL) /* see PyTokenizer_Free */ + PyMem_FREE(tok->buf); + tok->buf = NULL; + return NULL; /* as if it were EOF */ } static char * new_string(const char *s, Py_ssize_t len) { - char* result = (char *)PyMem_MALLOC(len + 1); - if (result != NULL) { - memcpy(result, s, len); - result[len] = '\0'; - } - return result; + char* result = (char *)PyMem_MALLOC(len + 1); + if (result != NULL) { + memcpy(result, s, len); + result[len] = '\0'; + } + return result; } static char * -get_normal_name(char *s) /* for utf-8 and latin-1 */ +get_normal_name(char *s) /* for utf-8 and latin-1 */ { - char buf[13]; - int i; - for (i = 0; i < 12; i++) { - int c = s[i]; - if (c == '\0') - break; - else if (c == '_') - buf[i] = '-'; - else - buf[i] = tolower(c); - } - buf[i] = '\0'; - if (strcmp(buf, "utf-8") == 0 || - strncmp(buf, "utf-8-", 6) == 0) - return "utf-8"; - else if (strcmp(buf, "latin-1") == 0 || - strcmp(buf, "iso-8859-1") == 0 || - strcmp(buf, "iso-latin-1") == 0 || - strncmp(buf, "latin-1-", 8) == 0 || - strncmp(buf, "iso-8859-1-", 11) == 0 || - strncmp(buf, "iso-latin-1-", 12) == 0) - return "iso-8859-1"; - else - return s; + char buf[13]; + int i; + for (i = 0; i < 12; i++) { + int c = s[i]; + if (c == '\0') + break; + else if (c == '_') + buf[i] = '-'; + else + buf[i] = tolower(c); + } + buf[i] = '\0'; + if (strcmp(buf, "utf-8") == 0 || + strncmp(buf, "utf-8-", 6) == 0) + return "utf-8"; + else if (strcmp(buf, "latin-1") == 0 || + strcmp(buf, "iso-8859-1") == 0 || + strcmp(buf, "iso-latin-1") == 0 || + strncmp(buf, "latin-1-", 8) == 0 || + strncmp(buf, "iso-8859-1-", 11) == 0 || + strncmp(buf, "iso-latin-1-", 12) == 0) + return "iso-8859-1"; + else + return s; } /* Return the coding spec in S, or NULL if none is found. */ @@ -222,43 +222,43 @@ get_normal_name(char *s) /* for utf-8 and latin-1 */ static char * get_coding_spec(const char *s, Py_ssize_t size) { - Py_ssize_t i; - /* Coding spec must be in a comment, and that comment must be - * the only statement on the source code line. */ - for (i = 0; i < size - 6; i++) { - if (s[i] == '#') - break; - if (s[i] != ' ' && s[i] != '\t' && s[i] != '\014') - return NULL; - } - for (; i < size - 6; i++) { /* XXX inefficient search */ - const char* t = s + i; - if (strncmp(t, "coding", 6) == 0) { - const char* begin = NULL; - t += 6; - if (t[0] != ':' && t[0] != '=') - continue; - do { - t++; - } while (t[0] == '\x20' || t[0] == '\t'); - - begin = t; - while (isalnum(Py_CHARMASK(t[0])) || - t[0] == '-' || t[0] == '_' || t[0] == '.') - t++; - - if (begin < t) { - char* r = new_string(begin, t - begin); - char* q = get_normal_name(r); - if (r != q) { - PyMem_FREE(r); - r = new_string(q, strlen(q)); - } - return r; - } - } - } - return NULL; + Py_ssize_t i; + /* Coding spec must be in a comment, and that comment must be + * the only statement on the source code line. */ + for (i = 0; i < size - 6; i++) { + if (s[i] == '#') + break; + if (s[i] != ' ' && s[i] != '\t' && s[i] != '\014') + return NULL; + } + for (; i < size - 6; i++) { /* XXX inefficient search */ + const char* t = s + i; + if (strncmp(t, "coding", 6) == 0) { + const char* begin = NULL; + t += 6; + if (t[0] != ':' && t[0] != '=') + continue; + do { + t++; + } while (t[0] == '\x20' || t[0] == '\t'); + + begin = t; + while (isalnum(Py_CHARMASK(t[0])) || + t[0] == '-' || t[0] == '_' || t[0] == '.') + t++; + + if (begin < t) { + char* r = new_string(begin, t - begin); + char* q = get_normal_name(r); + if (r != q) { + PyMem_FREE(r); + r = new_string(q, strlen(q)); + } + return r; + } + } + } + return NULL; } /* Check whether the line contains a coding spec. If it does, @@ -268,42 +268,42 @@ get_coding_spec(const char *s, Py_ssize_t size) static int check_coding_spec(const char* line, Py_ssize_t size, struct tok_state *tok, - int set_readline(struct tok_state *, const char *)) + int set_readline(struct tok_state *, const char *)) { - char * cs; - int r = 1; - - if (tok->cont_line) - /* It's a continuation line, so it can't be a coding spec. */ - return 1; - cs = get_coding_spec(line, size); - if (cs != NULL) { - tok->read_coding_spec = 1; - if (tok->encoding == NULL) { - assert(tok->decoding_state == STATE_RAW); - if (strcmp(cs, "utf-8") == 0) { - tok->encoding = cs; - } else { - r = set_readline(tok, cs); - if (r) { - tok->encoding = cs; - tok->decoding_state = STATE_NORMAL; - } - else - PyMem_FREE(cs); - } - } else { /* then, compare cs with BOM */ - r = (strcmp(tok->encoding, cs) == 0); - PyMem_FREE(cs); - } - } - if (!r) { - cs = tok->encoding; - if (!cs) - cs = "with BOM"; - PyErr_Format(PyExc_SyntaxError, "encoding problem: %s", cs); - } - return r; + char * cs; + int r = 1; + + if (tok->cont_line) + /* It's a continuation line, so it can't be a coding spec. */ + return 1; + cs = get_coding_spec(line, size); + if (cs != NULL) { + tok->read_coding_spec = 1; + if (tok->encoding == NULL) { + assert(tok->decoding_state == STATE_RAW); + if (strcmp(cs, "utf-8") == 0) { + tok->encoding = cs; + } else { + r = set_readline(tok, cs); + if (r) { + tok->encoding = cs; + tok->decoding_state = STATE_NORMAL; + } + else + PyMem_FREE(cs); + } + } else { /* then, compare cs with BOM */ + r = (strcmp(tok->encoding, cs) == 0); + PyMem_FREE(cs); + } + } + if (!r) { + cs = tok->encoding; + if (!cs) + cs = "with BOM"; + PyErr_Format(PyExc_SyntaxError, "encoding problem: %s", cs); + } + return r; } /* See whether the file starts with a BOM. If it does, @@ -312,62 +312,62 @@ check_coding_spec(const char* line, Py_ssize_t size, struct tok_state *tok, static int check_bom(int get_char(struct tok_state *), - void unget_char(int, struct tok_state *), - int set_readline(struct tok_state *, const char *), - struct tok_state *tok) + void unget_char(int, struct tok_state *), + int set_readline(struct tok_state *, const char *), + struct tok_state *tok) { - int ch1, ch2, ch3; - ch1 = get_char(tok); - tok->decoding_state = STATE_RAW; - if (ch1 == EOF) { - return 1; - } else if (ch1 == 0xEF) { - ch2 = get_char(tok); - if (ch2 != 0xBB) { - unget_char(ch2, tok); - unget_char(ch1, tok); - return 1; - } - ch3 = get_char(tok); - if (ch3 != 0xBF) { - unget_char(ch3, tok); - unget_char(ch2, tok); - unget_char(ch1, tok); - return 1; - } + int ch1, ch2, ch3; + ch1 = get_char(tok); + tok->decoding_state = STATE_RAW; + if (ch1 == EOF) { + return 1; + } else if (ch1 == 0xEF) { + ch2 = get_char(tok); + if (ch2 != 0xBB) { + unget_char(ch2, tok); + unget_char(ch1, tok); + return 1; + } + ch3 = get_char(tok); + if (ch3 != 0xBF) { + unget_char(ch3, tok); + unget_char(ch2, tok); + unget_char(ch1, tok); + return 1; + } #if 0 - /* Disable support for UTF-16 BOMs until a decision - is made whether this needs to be supported. */ - } else if (ch1 == 0xFE) { - ch2 = get_char(tok); - if (ch2 != 0xFF) { - unget_char(ch2, tok); - unget_char(ch1, tok); - return 1; - } - if (!set_readline(tok, "utf-16-be")) - return 0; - tok->decoding_state = STATE_NORMAL; - } else if (ch1 == 0xFF) { - ch2 = get_char(tok); - if (ch2 != 0xFE) { - unget_char(ch2, tok); - unget_char(ch1, tok); - return 1; - } - if (!set_readline(tok, "utf-16-le")) - return 0; - tok->decoding_state = STATE_NORMAL; + /* Disable support for UTF-16 BOMs until a decision + is made whether this needs to be supported. */ + } else if (ch1 == 0xFE) { + ch2 = get_char(tok); + if (ch2 != 0xFF) { + unget_char(ch2, tok); + unget_char(ch1, tok); + return 1; + } + if (!set_readline(tok, "utf-16-be")) + return 0; + tok->decoding_state = STATE_NORMAL; + } else if (ch1 == 0xFF) { + ch2 = get_char(tok); + if (ch2 != 0xFE) { + unget_char(ch2, tok); + unget_char(ch1, tok); + return 1; + } + if (!set_readline(tok, "utf-16-le")) + return 0; + tok->decoding_state = STATE_NORMAL; #endif - } else { - unget_char(ch1, tok); - return 1; - } - if (tok->encoding != NULL) - PyMem_FREE(tok->encoding); - tok->encoding = new_string("utf-8", 5); /* resulting is in utf-8 */ - /* No need to set_readline: input is already utf-8 */ - return 1; + } else { + unget_char(ch1, tok); + return 1; + } + if (tok->encoding != NULL) + PyMem_FREE(tok->encoding); + tok->encoding = new_string("utf-8", 5); /* resulting is in utf-8 */ + /* No need to set_readline: input is already utf-8 */ + return 1; } /* Read a line of text from TOK into S, using the stream in TOK. @@ -376,74 +376,74 @@ check_bom(int get_char(struct tok_state *), On entry, tok->decoding_buffer will be one of: 1) NULL: need to call tok->decoding_readline to get a new line 2) PyUnicodeObject *: decoding_feof has called tok->decoding_readline and - stored the result in tok->decoding_buffer + stored the result in tok->decoding_buffer 3) PyByteArrayObject *: previous call to fp_readl did not have enough room - (in the s buffer) to copy entire contents of the line read - by tok->decoding_readline. tok->decoding_buffer has the overflow. - In this case, fp_readl is called in a loop (with an expanded buffer) - until the buffer ends with a '\n' (or until the end of the file is - reached): see tok_nextc and its calls to decoding_fgets. + (in the s buffer) to copy entire contents of the line read + by tok->decoding_readline. tok->decoding_buffer has the overflow. + In this case, fp_readl is called in a loop (with an expanded buffer) + until the buffer ends with a '\n' (or until the end of the file is + reached): see tok_nextc and its calls to decoding_fgets. */ static char * fp_readl(char *s, int size, struct tok_state *tok) { - PyObject* bufobj; - const char *buf; - Py_ssize_t buflen; - - /* Ask for one less byte so we can terminate it */ - assert(size > 0); - size--; - - if (tok->decoding_buffer) { - bufobj = tok->decoding_buffer; - Py_INCREF(bufobj); - } - else - { - bufobj = PyObject_CallObject(tok->decoding_readline, NULL); - if (bufobj == NULL) - goto error; - } - if (PyUnicode_CheckExact(bufobj)) - { - buf = _PyUnicode_AsStringAndSize(bufobj, &buflen); - if (buf == NULL) { - goto error; - } - } - else - { - buf = PyByteArray_AsString(bufobj); - if (buf == NULL) { - goto error; - } - buflen = PyByteArray_GET_SIZE(bufobj); - } - - Py_XDECREF(tok->decoding_buffer); - if (buflen > size) { - /* Too many chars, the rest goes into tok->decoding_buffer */ - tok->decoding_buffer = PyByteArray_FromStringAndSize(buf+size, - buflen-size); - if (tok->decoding_buffer == NULL) - goto error; - buflen = size; - } - else - tok->decoding_buffer = NULL; - - memcpy(s, buf, buflen); - s[buflen] = '\0'; - if (buflen == 0) /* EOF */ - s = NULL; - Py_DECREF(bufobj); - return s; + PyObject* bufobj; + const char *buf; + Py_ssize_t buflen; + + /* Ask for one less byte so we can terminate it */ + assert(size > 0); + size--; + + if (tok->decoding_buffer) { + bufobj = tok->decoding_buffer; + Py_INCREF(bufobj); + } + else + { + bufobj = PyObject_CallObject(tok->decoding_readline, NULL); + if (bufobj == NULL) + goto error; + } + if (PyUnicode_CheckExact(bufobj)) + { + buf = _PyUnicode_AsStringAndSize(bufobj, &buflen); + if (buf == NULL) { + goto error; + } + } + else + { + buf = PyByteArray_AsString(bufobj); + if (buf == NULL) { + goto error; + } + buflen = PyByteArray_GET_SIZE(bufobj); + } + + Py_XDECREF(tok->decoding_buffer); + if (buflen > size) { + /* Too many chars, the rest goes into tok->decoding_buffer */ + tok->decoding_buffer = PyByteArray_FromStringAndSize(buf+size, + buflen-size); + if (tok->decoding_buffer == NULL) + goto error; + buflen = size; + } + else + tok->decoding_buffer = NULL; + + memcpy(s, buf, buflen); + s[buflen] = '\0'; + if (buflen == 0) /* EOF */ + s = NULL; + Py_DECREF(bufobj); + return s; error: - Py_XDECREF(bufobj); - return error_ret(tok); + Py_XDECREF(bufobj); + return error_ret(tok); } /* Set the readline function for TOK to a StreamReader's @@ -459,49 +459,49 @@ error: static int fp_setreadl(struct tok_state *tok, const char* enc) { - PyObject *readline = NULL, *stream = NULL, *io = NULL; - - io = PyImport_ImportModuleNoBlock("io"); - if (io == NULL) - goto cleanup; - - if (tok->filename) - stream = PyObject_CallMethod(io, "open", "ssis", - tok->filename, "r", -1, enc); - else - stream = PyObject_CallMethod(io, "open", "isisOOO", - fileno(tok->fp), "r", -1, enc, Py_None, Py_None, Py_False); - if (stream == NULL) - goto cleanup; - - Py_XDECREF(tok->decoding_readline); - readline = PyObject_GetAttrString(stream, "readline"); - tok->decoding_readline = readline; - - /* The file has been reopened; parsing will restart from - * the beginning of the file, we have to reset the line number. - * But this function has been called from inside tok_nextc() which - * will increment lineno before it returns. So we set it -1 so that - * the next call to tok_nextc() will start with tok->lineno == 0. - */ - tok->lineno = -1; + PyObject *readline = NULL, *stream = NULL, *io = NULL; + + io = PyImport_ImportModuleNoBlock("io"); + if (io == NULL) + goto cleanup; + + if (tok->filename) + stream = PyObject_CallMethod(io, "open", "ssis", + tok->filename, "r", -1, enc); + else + stream = PyObject_CallMethod(io, "open", "isisOOO", + fileno(tok->fp), "r", -1, enc, Py_None, Py_None, Py_False); + if (stream == NULL) + goto cleanup; + + Py_XDECREF(tok->decoding_readline); + readline = PyObject_GetAttrString(stream, "readline"); + tok->decoding_readline = readline; + + /* The file has been reopened; parsing will restart from + * the beginning of the file, we have to reset the line number. + * But this function has been called from inside tok_nextc() which + * will increment lineno before it returns. So we set it -1 so that + * the next call to tok_nextc() will start with tok->lineno == 0. + */ + tok->lineno = -1; cleanup: - Py_XDECREF(stream); - Py_XDECREF(io); - return readline != NULL; + Py_XDECREF(stream); + Py_XDECREF(io); + return readline != NULL; } /* Fetch the next byte from TOK. */ static int fp_getc(struct tok_state *tok) { - return getc(tok->fp); + return getc(tok->fp); } /* Unfetch the last byte back into TOK. */ static void fp_ungetc(int c, struct tok_state *tok) { - ungetc(c, tok->fp); + ungetc(c, tok->fp); } /* Check whether the characters at s start a valid @@ -509,27 +509,27 @@ static void fp_ungetc(int c, struct tok_state *tok) { the sequence if yes, 0 if not. */ static int valid_utf8(const unsigned char* s) { - int expected = 0; - int length; - if (*s < 0x80) - /* single-byte code */ - return 1; - if (*s < 0xc0) - /* following byte */ - return 0; - if (*s < 0xE0) - expected = 1; - else if (*s < 0xF0) - expected = 2; - else if (*s < 0xF8) - expected = 3; - else - return 0; - length = expected + 1; - for (; expected; expected--) - if (s[expected] < 0x80 || s[expected] >= 0xC0) - return 0; - return length; + int expected = 0; + int length; + if (*s < 0x80) + /* single-byte code */ + return 1; + if (*s < 0xc0) + /* following byte */ + return 0; + if (*s < 0xE0) + expected = 1; + else if (*s < 0xF0) + expected = 2; + else if (*s < 0xF8) + expected = 3; + else + return 0; + length = expected + 1; + for (; expected; expected--) + if (s[expected] < 0x80 || s[expected] >= 0xC0) + return 0; + return length; } /* Read a line of input from TOK. Determine encoding @@ -538,95 +538,95 @@ static int valid_utf8(const unsigned char* s) static char * decoding_fgets(char *s, int size, struct tok_state *tok) { - char *line = NULL; - int badchar = 0; - for (;;) { - if (tok->decoding_state == STATE_NORMAL) { - /* We already have a codec associated with - this input. */ - line = fp_readl(s, size, tok); - break; - } else if (tok->decoding_state == STATE_RAW) { - /* We want a 'raw' read. */ - line = Py_UniversalNewlineFgets(s, size, - tok->fp, NULL); - break; - } else { - /* We have not yet determined the encoding. - If an encoding is found, use the file-pointer - reader functions from now on. */ - if (!check_bom(fp_getc, fp_ungetc, fp_setreadl, tok)) - return error_ret(tok); - assert(tok->decoding_state != STATE_INIT); - } - } - if (line != NULL && tok->lineno < 2 && !tok->read_coding_spec) { - if (!check_coding_spec(line, strlen(line), tok, fp_setreadl)) { - return error_ret(tok); - } - } + char *line = NULL; + int badchar = 0; + for (;;) { + if (tok->decoding_state == STATE_NORMAL) { + /* We already have a codec associated with + this input. */ + line = fp_readl(s, size, tok); + break; + } else if (tok->decoding_state == STATE_RAW) { + /* We want a 'raw' read. */ + line = Py_UniversalNewlineFgets(s, size, + tok->fp, NULL); + break; + } else { + /* We have not yet determined the encoding. + If an encoding is found, use the file-pointer + reader functions from now on. */ + if (!check_bom(fp_getc, fp_ungetc, fp_setreadl, tok)) + return error_ret(tok); + assert(tok->decoding_state != STATE_INIT); + } + } + if (line != NULL && tok->lineno < 2 && !tok->read_coding_spec) { + if (!check_coding_spec(line, strlen(line), tok, fp_setreadl)) { + return error_ret(tok); + } + } #ifndef PGEN - /* The default encoding is UTF-8, so make sure we don't have any - non-UTF-8 sequences in it. */ - if (line && !tok->encoding) { - unsigned char *c; - int length; - for (c = (unsigned char *)line; *c; c += length) - if (!(length = valid_utf8(c))) { - badchar = *c; - break; - } - } - if (badchar) { - char buf[500]; - /* Need to add 1 to the line number, since this line - has not been counted, yet. */ - sprintf(buf, - "Non-UTF-8 code starting with '\\x%.2x' " - "in file %.200s on line %i, " - "but no encoding declared; " - "see http://python.org/dev/peps/pep-0263/ for details", - badchar, tok->filename, tok->lineno + 1); - PyErr_SetString(PyExc_SyntaxError, buf); - return error_ret(tok); - } + /* The default encoding is UTF-8, so make sure we don't have any + non-UTF-8 sequences in it. */ + if (line && !tok->encoding) { + unsigned char *c; + int length; + for (c = (unsigned char *)line; *c; c += length) + if (!(length = valid_utf8(c))) { + badchar = *c; + break; + } + } + if (badchar) { + char buf[500]; + /* Need to add 1 to the line number, since this line + has not been counted, yet. */ + sprintf(buf, + "Non-UTF-8 code starting with '\\x%.2x' " + "in file %.200s on line %i, " + "but no encoding declared; " + "see http://python.org/dev/peps/pep-0263/ for details", + badchar, tok->filename, tok->lineno + 1); + PyErr_SetString(PyExc_SyntaxError, buf); + return error_ret(tok); + } #endif - return line; + return line; } static int decoding_feof(struct tok_state *tok) { - if (tok->decoding_state != STATE_NORMAL) { - return feof(tok->fp); - } else { - PyObject* buf = tok->decoding_buffer; - if (buf == NULL) { - buf = PyObject_CallObject(tok->decoding_readline, NULL); - if (buf == NULL) { - error_ret(tok); - return 1; - } else { - tok->decoding_buffer = buf; - } - } - return PyObject_Length(buf) == 0; - } + if (tok->decoding_state != STATE_NORMAL) { + return feof(tok->fp); + } else { + PyObject* buf = tok->decoding_buffer; + if (buf == NULL) { + buf = PyObject_CallObject(tok->decoding_readline, NULL); + if (buf == NULL) { + error_ret(tok); + return 1; + } else { + tok->decoding_buffer = buf; + } + } + return PyObject_Length(buf) == 0; + } } /* Fetch a byte from TOK, using the string buffer. */ static int buf_getc(struct tok_state *tok) { - return Py_CHARMASK(*tok->str++); + return Py_CHARMASK(*tok->str++); } /* Unfetch a byte from TOK, using the string buffer. */ static void buf_ungetc(int c, struct tok_state *tok) { - tok->str--; - assert(Py_CHARMASK(*tok->str) == c); /* tok->cur may point to read-only segment */ + tok->str--; + assert(Py_CHARMASK(*tok->str) == c); /* tok->cur may point to read-only segment */ } /* Set the readline function for TOK to ENC. For the string-based @@ -634,8 +634,8 @@ buf_ungetc(int c, struct tok_state *tok) { static int buf_setreadl(struct tok_state *tok, const char* enc) { - tok->enc = enc; - return 1; + tok->enc = enc; + return 1; } /* Return a UTF-8 encoding Python string object from the @@ -643,13 +643,13 @@ buf_setreadl(struct tok_state *tok, const char* enc) { static PyObject * translate_into_utf8(const char* str, const char* enc) { - PyObject *utf8; - PyObject* buf = PyUnicode_Decode(str, strlen(str), enc, NULL); - if (buf == NULL) - return NULL; - utf8 = PyUnicode_AsUTF8String(buf); - Py_DECREF(buf); - return utf8; + PyObject *utf8; + PyObject* buf = PyUnicode_Decode(str, strlen(str), enc, NULL); + if (buf == NULL) + return NULL; + utf8 = PyUnicode_AsUTF8String(buf); + Py_DECREF(buf); + return utf8; } /* Decode a byte string STR for use as the buffer of TOK. @@ -659,53 +659,53 @@ translate_into_utf8(const char* str, const char* enc) { static const char * decode_str(const char *str, struct tok_state *tok) { - PyObject* utf8 = NULL; - const char *s; - const char *newl[2] = {NULL, NULL}; - int lineno = 0; - tok->enc = NULL; - tok->str = str; - if (!check_bom(buf_getc, buf_ungetc, buf_setreadl, tok)) - return error_ret(tok); - str = tok->str; /* string after BOM if any */ - assert(str); - if (tok->enc != NULL) { - utf8 = translate_into_utf8(str, tok->enc); - if (utf8 == NULL) - return error_ret(tok); - str = PyBytes_AsString(utf8); - } - for (s = str;; s++) { - if (*s == '\0') break; - else if (*s == '\n') { - assert(lineno < 2); - newl[lineno] = s; - lineno++; - if (lineno == 2) break; - } - } - tok->enc = NULL; - /* need to check line 1 and 2 separately since check_coding_spec - assumes a single line as input */ - if (newl[0]) { - if (!check_coding_spec(str, newl[0] - str, tok, buf_setreadl)) - return error_ret(tok); - if (tok->enc == NULL && newl[1]) { - if (!check_coding_spec(newl[0]+1, newl[1] - newl[0], - tok, buf_setreadl)) - return error_ret(tok); - } - } - if (tok->enc != NULL) { - assert(utf8 == NULL); - utf8 = translate_into_utf8(str, tok->enc); - if (utf8 == NULL) - return error_ret(tok); - str = PyBytes_AS_STRING(utf8); - } - assert(tok->decoding_buffer == NULL); - tok->decoding_buffer = utf8; /* CAUTION */ - return str; + PyObject* utf8 = NULL; + const char *s; + const char *newl[2] = {NULL, NULL}; + int lineno = 0; + tok->enc = NULL; + tok->str = str; + if (!check_bom(buf_getc, buf_ungetc, buf_setreadl, tok)) + return error_ret(tok); + str = tok->str; /* string after BOM if any */ + assert(str); + if (tok->enc != NULL) { + utf8 = translate_into_utf8(str, tok->enc); + if (utf8 == NULL) + return error_ret(tok); + str = PyBytes_AsString(utf8); + } + for (s = str;; s++) { + if (*s == '\0') break; + else if (*s == '\n') { + assert(lineno < 2); + newl[lineno] = s; + lineno++; + if (lineno == 2) break; + } + } + tok->enc = NULL; + /* need to check line 1 and 2 separately since check_coding_spec + assumes a single line as input */ + if (newl[0]) { + if (!check_coding_spec(str, newl[0] - str, tok, buf_setreadl)) + return error_ret(tok); + if (tok->enc == NULL && newl[1]) { + if (!check_coding_spec(newl[0]+1, newl[1] - newl[0], + tok, buf_setreadl)) + return error_ret(tok); + } + } + if (tok->enc != NULL) { + assert(utf8 == NULL); + utf8 = translate_into_utf8(str, tok->enc); + if (utf8 == NULL) + return error_ret(tok); + str = PyBytes_AS_STRING(utf8); + } + assert(tok->decoding_buffer == NULL); + tok->decoding_buffer = utf8; /* CAUTION */ + return str; } #endif /* PGEN */ @@ -715,40 +715,40 @@ decode_str(const char *str, struct tok_state *tok) struct tok_state * PyTokenizer_FromString(const char *str) { - struct tok_state *tok = tok_new(); - if (tok == NULL) - return NULL; - str = (char *)decode_str(str, tok); - if (str == NULL) { - PyTokenizer_Free(tok); - return NULL; - } - - /* XXX: constify members. */ - tok->buf = tok->cur = tok->end = tok->inp = (char*)str; - return tok; + struct tok_state *tok = tok_new(); + if (tok == NULL) + return NULL; + str = (char *)decode_str(str, tok); + if (str == NULL) { + PyTokenizer_Free(tok); + return NULL; + } + + /* XXX: constify members. */ + tok->buf = tok->cur = tok->end = tok->inp = (char*)str; + return tok; } struct tok_state * PyTokenizer_FromUTF8(const char *str) { - struct tok_state *tok = tok_new(); - if (tok == NULL) - return NULL; - tok->decoding_state = STATE_RAW; - tok->read_coding_spec = 1; - tok->enc = NULL; - tok->str = str; - tok->encoding = (char *)PyMem_MALLOC(6); - if (!tok->encoding) { - PyTokenizer_Free(tok); - return NULL; - } - strcpy(tok->encoding, "utf-8"); - - /* XXX: constify members. */ - tok->buf = tok->cur = tok->end = tok->inp = (char*)str; - return tok; + struct tok_state *tok = tok_new(); + if (tok == NULL) + return NULL; + tok->decoding_state = STATE_RAW; + tok->read_coding_spec = 1; + tok->enc = NULL; + tok->str = str; + tok->encoding = (char *)PyMem_MALLOC(6); + if (!tok->encoding) { + PyTokenizer_Free(tok); + return NULL; + } + strcpy(tok->encoding, "utf-8"); + + /* XXX: constify members. */ + tok->buf = tok->cur = tok->end = tok->inp = (char*)str; + return tok; } @@ -757,30 +757,30 @@ PyTokenizer_FromUTF8(const char *str) struct tok_state * PyTokenizer_FromFile(FILE *fp, char* enc, char *ps1, char *ps2) { - struct tok_state *tok = tok_new(); - if (tok == NULL) - return NULL; - if ((tok->buf = (char *)PyMem_MALLOC(BUFSIZ)) == NULL) { - PyTokenizer_Free(tok); - return NULL; - } - tok->cur = tok->inp = tok->buf; - tok->end = tok->buf + BUFSIZ; - tok->fp = fp; - tok->prompt = ps1; - tok->nextprompt = ps2; - if (enc != NULL) { - /* Must copy encoding declaration since it - gets copied into the parse tree. */ - tok->encoding = PyMem_MALLOC(strlen(enc)+1); - if (!tok->encoding) { - PyTokenizer_Free(tok); - return NULL; - } - strcpy(tok->encoding, enc); - tok->decoding_state = STATE_NORMAL; - } - return tok; + struct tok_state *tok = tok_new(); + if (tok == NULL) + return NULL; + if ((tok->buf = (char *)PyMem_MALLOC(BUFSIZ)) == NULL) { + PyTokenizer_Free(tok); + return NULL; + } + tok->cur = tok->inp = tok->buf; + tok->end = tok->buf + BUFSIZ; + tok->fp = fp; + tok->prompt = ps1; + tok->nextprompt = ps2; + if (enc != NULL) { + /* Must copy encoding declaration since it + gets copied into the parse tree. */ + tok->encoding = PyMem_MALLOC(strlen(enc)+1); + if (!tok->encoding) { + PyTokenizer_Free(tok); + return NULL; + } + strcpy(tok->encoding, enc); + tok->decoding_state = STATE_NORMAL; + } + return tok; } @@ -789,15 +789,15 @@ PyTokenizer_FromFile(FILE *fp, char* enc, char *ps1, char *ps2) void PyTokenizer_Free(struct tok_state *tok) { - if (tok->encoding != NULL) - PyMem_FREE(tok->encoding); + if (tok->encoding != NULL) + PyMem_FREE(tok->encoding); #ifndef PGEN - Py_XDECREF(tok->decoding_readline); - Py_XDECREF(tok->decoding_buffer); + Py_XDECREF(tok->decoding_readline); + Py_XDECREF(tok->decoding_buffer); #endif - if (tok->fp != NULL && tok->buf != NULL) - PyMem_FREE(tok->buf); - PyMem_FREE(tok); + if (tok->fp != NULL && tok->buf != NULL) + PyMem_FREE(tok->buf); + PyMem_FREE(tok); } /* Get next char, updating state; error code goes into tok->done */ @@ -805,188 +805,188 @@ PyTokenizer_Free(struct tok_state *tok) static int tok_nextc(register struct tok_state *tok) { - for (;;) { - if (tok->cur != tok->inp) { - return Py_CHARMASK(*tok->cur++); /* Fast path */ - } - if (tok->done != E_OK) - return EOF; - if (tok->fp == NULL) { - char *end = strchr(tok->inp, '\n'); - if (end != NULL) - end++; - else { - end = strchr(tok->inp, '\0'); - if (end == tok->inp) { - tok->done = E_EOF; - return EOF; - } - } - if (tok->start == NULL) - tok->buf = tok->cur; - tok->line_start = tok->cur; - tok->lineno++; - tok->inp = end; - return Py_CHARMASK(*tok->cur++); - } - if (tok->prompt != NULL) { - char *newtok = PyOS_Readline(stdin, stdout, tok->prompt); + for (;;) { + if (tok->cur != tok->inp) { + return Py_CHARMASK(*tok->cur++); /* Fast path */ + } + if (tok->done != E_OK) + return EOF; + if (tok->fp == NULL) { + char *end = strchr(tok->inp, '\n'); + if (end != NULL) + end++; + else { + end = strchr(tok->inp, '\0'); + if (end == tok->inp) { + tok->done = E_EOF; + return EOF; + } + } + if (tok->start == NULL) + tok->buf = tok->cur; + tok->line_start = tok->cur; + tok->lineno++; + tok->inp = end; + return Py_CHARMASK(*tok->cur++); + } + if (tok->prompt != NULL) { + char *newtok = PyOS_Readline(stdin, stdout, tok->prompt); #ifndef PGEN - if (tok->encoding && newtok && *newtok) { - /* Recode to UTF-8 */ - Py_ssize_t buflen; - const char* buf; - PyObject *u = translate_into_utf8(newtok, tok->encoding); - PyMem_FREE(newtok); - if (!u) { - tok->done = E_DECODE; - return EOF; - } - buflen = PyBytes_GET_SIZE(u); - buf = PyBytes_AS_STRING(u); - if (!buf) { - Py_DECREF(u); - tok->done = E_DECODE; - return EOF; - } - newtok = PyMem_MALLOC(buflen+1); - strcpy(newtok, buf); - Py_DECREF(u); - } + if (tok->encoding && newtok && *newtok) { + /* Recode to UTF-8 */ + Py_ssize_t buflen; + const char* buf; + PyObject *u = translate_into_utf8(newtok, tok->encoding); + PyMem_FREE(newtok); + if (!u) { + tok->done = E_DECODE; + return EOF; + } + buflen = PyBytes_GET_SIZE(u); + buf = PyBytes_AS_STRING(u); + if (!buf) { + Py_DECREF(u); + tok->done = E_DECODE; + return EOF; + } + newtok = PyMem_MALLOC(buflen+1); + strcpy(newtok, buf); + Py_DECREF(u); + } #endif - if (tok->nextprompt != NULL) - tok->prompt = tok->nextprompt; - if (newtok == NULL) - tok->done = E_INTR; - else if (*newtok == '\0') { - PyMem_FREE(newtok); - tok->done = E_EOF; - } - else if (tok->start != NULL) { - size_t start = tok->start - tok->buf; - size_t oldlen = tok->cur - tok->buf; - size_t newlen = oldlen + strlen(newtok); - char *buf = tok->buf; - buf = (char *)PyMem_REALLOC(buf, newlen+1); - tok->lineno++; - if (buf == NULL) { - PyMem_FREE(tok->buf); - tok->buf = NULL; - PyMem_FREE(newtok); - tok->done = E_NOMEM; - return EOF; - } - tok->buf = buf; - tok->cur = tok->buf + oldlen; - tok->line_start = tok->cur; - strcpy(tok->buf + oldlen, newtok); - PyMem_FREE(newtok); - tok->inp = tok->buf + newlen; - tok->end = tok->inp + 1; - tok->start = tok->buf + start; - } - else { - tok->lineno++; - if (tok->buf != NULL) - PyMem_FREE(tok->buf); - tok->buf = newtok; - tok->line_start = tok->buf; - tok->cur = tok->buf; - tok->line_start = tok->buf; - tok->inp = strchr(tok->buf, '\0'); - tok->end = tok->inp + 1; - } - } - else { - int done = 0; - Py_ssize_t cur = 0; - char *pt; - if (tok->start == NULL) { - if (tok->buf == NULL) { - tok->buf = (char *) - PyMem_MALLOC(BUFSIZ); - if (tok->buf == NULL) { - tok->done = E_NOMEM; - return EOF; - } - tok->end = tok->buf + BUFSIZ; - } - if (decoding_fgets(tok->buf, (int)(tok->end - tok->buf), - tok) == NULL) { - tok->done = E_EOF; - done = 1; - } - else { - tok->done = E_OK; - tok->inp = strchr(tok->buf, '\0'); - done = tok->inp[-1] == '\n'; - } - } - else { - cur = tok->cur - tok->buf; - if (decoding_feof(tok)) { - tok->done = E_EOF; - done = 1; - } - else - tok->done = E_OK; - } - tok->lineno++; - /* Read until '\n' or EOF */ - while (!done) { - Py_ssize_t curstart = tok->start == NULL ? -1 : - tok->start - tok->buf; - Py_ssize_t curvalid = tok->inp - tok->buf; - Py_ssize_t newsize = curvalid + BUFSIZ; - char *newbuf = tok->buf; - newbuf = (char *)PyMem_REALLOC(newbuf, - newsize); - if (newbuf == NULL) { - tok->done = E_NOMEM; - tok->cur = tok->inp; - return EOF; - } - tok->buf = newbuf; - tok->inp = tok->buf + curvalid; - tok->end = tok->buf + newsize; - tok->start = curstart < 0 ? NULL : - tok->buf + curstart; - if (decoding_fgets(tok->inp, - (int)(tok->end - tok->inp), - tok) == NULL) { - /* Break out early on decoding - errors, as tok->buf will be NULL - */ - if (tok->decoding_erred) - return EOF; - /* Last line does not end in \n, - fake one */ - strcpy(tok->inp, "\n"); - } - tok->inp = strchr(tok->inp, '\0'); - done = tok->inp[-1] == '\n'; - } - if (tok->buf != NULL) { - tok->cur = tok->buf + cur; - tok->line_start = tok->cur; - /* replace "\r\n" with "\n" */ - /* For Mac leave the \r, giving a syntax error */ - pt = tok->inp - 2; - if (pt >= tok->buf && *pt == '\r') { - *pt++ = '\n'; - *pt = '\0'; - tok->inp = pt; - } - } - } - if (tok->done != E_OK) { - if (tok->prompt != NULL) - PySys_WriteStderr("\n"); - tok->cur = tok->inp; - return EOF; - } - } - /*NOTREACHED*/ + if (tok->nextprompt != NULL) + tok->prompt = tok->nextprompt; + if (newtok == NULL) + tok->done = E_INTR; + else if (*newtok == '\0') { + PyMem_FREE(newtok); + tok->done = E_EOF; + } + else if (tok->start != NULL) { + size_t start = tok->start - tok->buf; + size_t oldlen = tok->cur - tok->buf; + size_t newlen = oldlen + strlen(newtok); + char *buf = tok->buf; + buf = (char *)PyMem_REALLOC(buf, newlen+1); + tok->lineno++; + if (buf == NULL) { + PyMem_FREE(tok->buf); + tok->buf = NULL; + PyMem_FREE(newtok); + tok->done = E_NOMEM; + return EOF; + } + tok->buf = buf; + tok->cur = tok->buf + oldlen; + tok->line_start = tok->cur; + strcpy(tok->buf + oldlen, newtok); + PyMem_FREE(newtok); + tok->inp = tok->buf + newlen; + tok->end = tok->inp + 1; + tok->start = tok->buf + start; + } + else { + tok->lineno++; + if (tok->buf != NULL) + PyMem_FREE(tok->buf); + tok->buf = newtok; + tok->line_start = tok->buf; + tok->cur = tok->buf; + tok->line_start = tok->buf; + tok->inp = strchr(tok->buf, '\0'); + tok->end = tok->inp + 1; + } + } + else { + int done = 0; + Py_ssize_t cur = 0; + char *pt; + if (tok->start == NULL) { + if (tok->buf == NULL) { + tok->buf = (char *) + PyMem_MALLOC(BUFSIZ); + if (tok->buf == NULL) { + tok->done = E_NOMEM; + return EOF; + } + tok->end = tok->buf + BUFSIZ; + } + if (decoding_fgets(tok->buf, (int)(tok->end - tok->buf), + tok) == NULL) { + tok->done = E_EOF; + done = 1; + } + else { + tok->done = E_OK; + tok->inp = strchr(tok->buf, '\0'); + done = tok->inp[-1] == '\n'; + } + } + else { + cur = tok->cur - tok->buf; + if (decoding_feof(tok)) { + tok->done = E_EOF; + done = 1; + } + else + tok->done = E_OK; + } + tok->lineno++; + /* Read until '\n' or EOF */ + while (!done) { + Py_ssize_t curstart = tok->start == NULL ? -1 : + tok->start - tok->buf; + Py_ssize_t curvalid = tok->inp - tok->buf; + Py_ssize_t newsize = curvalid + BUFSIZ; + char *newbuf = tok->buf; + newbuf = (char *)PyMem_REALLOC(newbuf, + newsize); + if (newbuf == NULL) { + tok->done = E_NOMEM; + tok->cur = tok->inp; + return EOF; + } + tok->buf = newbuf; + tok->inp = tok->buf + curvalid; + tok->end = tok->buf + newsize; + tok->start = curstart < 0 ? NULL : + tok->buf + curstart; + if (decoding_fgets(tok->inp, + (int)(tok->end - tok->inp), + tok) == NULL) { + /* Break out early on decoding + errors, as tok->buf will be NULL + */ + if (tok->decoding_erred) + return EOF; + /* Last line does not end in \n, + fake one */ + strcpy(tok->inp, "\n"); + } + tok->inp = strchr(tok->inp, '\0'); + done = tok->inp[-1] == '\n'; + } + if (tok->buf != NULL) { + tok->cur = tok->buf + cur; + tok->line_start = tok->cur; + /* replace "\r\n" with "\n" */ + /* For Mac leave the \r, giving a syntax error */ + pt = tok->inp - 2; + if (pt >= tok->buf && *pt == '\r') { + *pt++ = '\n'; + *pt = '\0'; + tok->inp = pt; + } + } + } + if (tok->done != E_OK) { + if (tok->prompt != NULL) + PySys_WriteStderr("\n"); + tok->cur = tok->inp; + return EOF; + } + } + /*NOTREACHED*/ } @@ -995,12 +995,12 @@ tok_nextc(register struct tok_state *tok) static void tok_backup(register struct tok_state *tok, register int c) { - if (c != EOF) { - if (--tok->cur < tok->buf) - Py_FatalError("tok_backup: beginning of buffer"); - if (*tok->cur != c) - *tok->cur = c; - } + if (c != EOF) { + if (--tok->cur < tok->buf) + Py_FatalError("tok_backup: beginning of buffer"); + if (*tok->cur != c) + *tok->cur = c; + } } @@ -1009,181 +1009,181 @@ tok_backup(register struct tok_state *tok, register int c) int PyToken_OneChar(int c) { - switch (c) { - case '(': return LPAR; - case ')': return RPAR; - case '[': return LSQB; - case ']': return RSQB; - case ':': return COLON; - case ',': return COMMA; - case ';': return SEMI; - case '+': return PLUS; - case '-': return MINUS; - case '*': return STAR; - case '/': return SLASH; - case '|': return VBAR; - case '&': return AMPER; - case '<': return LESS; - case '>': return GREATER; - case '=': return EQUAL; - case '.': return DOT; - case '%': return PERCENT; - case '{': return LBRACE; - case '}': return RBRACE; - case '^': return CIRCUMFLEX; - case '~': return TILDE; - case '@': return AT; - default: return OP; - } + switch (c) { + case '(': return LPAR; + case ')': return RPAR; + case '[': return LSQB; + case ']': return RSQB; + case ':': return COLON; + case ',': return COMMA; + case ';': return SEMI; + case '+': return PLUS; + case '-': return MINUS; + case '*': return STAR; + case '/': return SLASH; + case '|': return VBAR; + case '&': return AMPER; + case '<': return LESS; + case '>': return GREATER; + case '=': return EQUAL; + case '.': return DOT; + case '%': return PERCENT; + case '{': return LBRACE; + case '}': return RBRACE; + case '^': return CIRCUMFLEX; + case '~': return TILDE; + case '@': return AT; + default: return OP; + } } int PyToken_TwoChars(int c1, int c2) { - switch (c1) { - case '=': - switch (c2) { - case '=': return EQEQUAL; - } - break; - case '!': - switch (c2) { - case '=': return NOTEQUAL; - } - break; - case '<': - switch (c2) { - case '>': return NOTEQUAL; - case '=': return LESSEQUAL; - case '<': return LEFTSHIFT; - } - break; - case '>': - switch (c2) { - case '=': return GREATEREQUAL; - case '>': return RIGHTSHIFT; - } - break; - case '+': - switch (c2) { - case '=': return PLUSEQUAL; - } - break; - case '-': - switch (c2) { - case '=': return MINEQUAL; - case '>': return RARROW; - } - break; - case '*': - switch (c2) { - case '*': return DOUBLESTAR; - case '=': return STAREQUAL; - } - break; - case '/': - switch (c2) { - case '/': return DOUBLESLASH; - case '=': return SLASHEQUAL; - } - break; - case '|': - switch (c2) { - case '=': return VBAREQUAL; - } - break; - case '%': - switch (c2) { - case '=': return PERCENTEQUAL; - } - break; - case '&': - switch (c2) { - case '=': return AMPEREQUAL; - } - break; - case '^': - switch (c2) { - case '=': return CIRCUMFLEXEQUAL; - } - break; - } - return OP; + switch (c1) { + case '=': + switch (c2) { + case '=': return EQEQUAL; + } + break; + case '!': + switch (c2) { + case '=': return NOTEQUAL; + } + break; + case '<': + switch (c2) { + case '>': return NOTEQUAL; + case '=': return LESSEQUAL; + case '<': return LEFTSHIFT; + } + break; + case '>': + switch (c2) { + case '=': return GREATEREQUAL; + case '>': return RIGHTSHIFT; + } + break; + case '+': + switch (c2) { + case '=': return PLUSEQUAL; + } + break; + case '-': + switch (c2) { + case '=': return MINEQUAL; + case '>': return RARROW; + } + break; + case '*': + switch (c2) { + case '*': return DOUBLESTAR; + case '=': return STAREQUAL; + } + break; + case '/': + switch (c2) { + case '/': return DOUBLESLASH; + case '=': return SLASHEQUAL; + } + break; + case '|': + switch (c2) { + case '=': return VBAREQUAL; + } + break; + case '%': + switch (c2) { + case '=': return PERCENTEQUAL; + } + break; + case '&': + switch (c2) { + case '=': return AMPEREQUAL; + } + break; + case '^': + switch (c2) { + case '=': return CIRCUMFLEXEQUAL; + } + break; + } + return OP; } int PyToken_ThreeChars(int c1, int c2, int c3) { - switch (c1) { - case '<': - switch (c2) { - case '<': - switch (c3) { - case '=': - return LEFTSHIFTEQUAL; - } - break; - } - break; - case '>': - switch (c2) { - case '>': - switch (c3) { - case '=': - return RIGHTSHIFTEQUAL; - } - break; - } - break; - case '*': - switch (c2) { - case '*': - switch (c3) { - case '=': - return DOUBLESTAREQUAL; - } - break; - } - break; - case '/': - switch (c2) { - case '/': - switch (c3) { - case '=': - return DOUBLESLASHEQUAL; - } - break; - } - break; + switch (c1) { + case '<': + switch (c2) { + case '<': + switch (c3) { + case '=': + return LEFTSHIFTEQUAL; + } + break; + } + break; + case '>': + switch (c2) { + case '>': + switch (c3) { + case '=': + return RIGHTSHIFTEQUAL; + } + break; + } + break; + case '*': + switch (c2) { + case '*': + switch (c3) { + case '=': + return DOUBLESTAREQUAL; + } + break; + } + break; + case '/': + switch (c2) { + case '/': + switch (c3) { + case '=': + return DOUBLESLASHEQUAL; + } + break; + } + break; + case '.': + switch (c2) { case '.': - switch (c2) { - case '.': - switch (c3) { - case '.': - return ELLIPSIS; - } - break; - } - break; - } - return OP; + switch (c3) { + case '.': + return ELLIPSIS; + } + break; + } + break; + } + return OP; } static int indenterror(struct tok_state *tok) { - if (tok->alterror) { - tok->done = E_TABSPACE; - tok->cur = tok->inp; - return 1; - } - if (tok->altwarning) { - PySys_WriteStderr("%s: inconsistent use of tabs and spaces " - "in indentation\n", tok->filename); - tok->altwarning = 0; - } - return 0; + if (tok->alterror) { + tok->done = E_TABSPACE; + tok->cur = tok->inp; + return 1; + } + if (tok->altwarning) { + PySys_WriteStderr("%s: inconsistent use of tabs and spaces " + "in indentation\n", tok->filename); + tok->altwarning = 0; + } + return 0; } #ifdef PGEN @@ -1193,23 +1193,23 @@ indenterror(struct tok_state *tok) static int verify_identifier(struct tok_state *tok) { - PyObject *s; - int result; - s = PyUnicode_DecodeUTF8(tok->start, tok->cur - tok->start, NULL); - if (s == NULL) { - if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) { - PyErr_Clear(); - tok->done = E_IDENTIFIER; - } else { - tok->done = E_ERROR; - } - return 0; - } - result = PyUnicode_IsIdentifier(s); - Py_DECREF(s); - if (result == 0) - tok->done = E_IDENTIFIER; - return result; + PyObject *s; + int result; + s = PyUnicode_DecodeUTF8(tok->start, tok->cur - tok->start, NULL); + if (s == NULL) { + if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) { + PyErr_Clear(); + tok->done = E_IDENTIFIER; + } else { + tok->done = E_ERROR; + } + return 0; + } + result = PyUnicode_IsIdentifier(s); + Py_DECREF(s); + if (result == 0) + tok->done = E_IDENTIFIER; + return result; } #endif @@ -1218,413 +1218,413 @@ verify_identifier(struct tok_state *tok) static int tok_get(register struct tok_state *tok, char **p_start, char **p_end) { - register int c; - int blankline, nonascii; + register int c; + int blankline, nonascii; - *p_start = *p_end = NULL; + *p_start = *p_end = NULL; nextline: - tok->start = NULL; - blankline = 0; - - /* Get indentation level */ - if (tok->atbol) { - register int col = 0; - register int altcol = 0; - tok->atbol = 0; - for (;;) { - c = tok_nextc(tok); - if (c == ' ') - col++, altcol++; - else if (c == '\t') { - col = (col/tok->tabsize + 1) * tok->tabsize; - altcol = (altcol/tok->alttabsize + 1) - * tok->alttabsize; - } - else if (c == '\014') /* Control-L (formfeed) */ - col = altcol = 0; /* For Emacs users */ - else - break; - } - tok_backup(tok, c); - if (c == '#' || c == '\n') { - /* Lines with only whitespace and/or comments - shouldn't affect the indentation and are - not passed to the parser as NEWLINE tokens, - except *totally* empty lines in interactive - mode, which signal the end of a command group. */ - if (col == 0 && c == '\n' && tok->prompt != NULL) - blankline = 0; /* Let it through */ - else - blankline = 1; /* Ignore completely */ - /* We can't jump back right here since we still - may need to skip to the end of a comment */ - } - if (!blankline && tok->level == 0) { - if (col == tok->indstack[tok->indent]) { - /* No change */ - if (altcol != tok->altindstack[tok->indent]) { - if (indenterror(tok)) - return ERRORTOKEN; - } - } - else if (col > tok->indstack[tok->indent]) { - /* Indent -- always one */ - if (tok->indent+1 >= MAXINDENT) { - tok->done = E_TOODEEP; - tok->cur = tok->inp; - return ERRORTOKEN; - } - if (altcol <= tok->altindstack[tok->indent]) { - if (indenterror(tok)) - return ERRORTOKEN; - } - tok->pendin++; - tok->indstack[++tok->indent] = col; - tok->altindstack[tok->indent] = altcol; - } - else /* col < tok->indstack[tok->indent] */ { - /* Dedent -- any number, must be consistent */ - while (tok->indent > 0 && - col < tok->indstack[tok->indent]) { - tok->pendin--; - tok->indent--; - } - if (col != tok->indstack[tok->indent]) { - tok->done = E_DEDENT; - tok->cur = tok->inp; - return ERRORTOKEN; - } - if (altcol != tok->altindstack[tok->indent]) { - if (indenterror(tok)) - return ERRORTOKEN; - } - } - } - } - - tok->start = tok->cur; - - /* Return pending indents/dedents */ - if (tok->pendin != 0) { - if (tok->pendin < 0) { - tok->pendin++; - return DEDENT; - } - else { - tok->pendin--; - return INDENT; - } - } + tok->start = NULL; + blankline = 0; + + /* Get indentation level */ + if (tok->atbol) { + register int col = 0; + register int altcol = 0; + tok->atbol = 0; + for (;;) { + c = tok_nextc(tok); + if (c == ' ') + col++, altcol++; + else if (c == '\t') { + col = (col/tok->tabsize + 1) * tok->tabsize; + altcol = (altcol/tok->alttabsize + 1) + * tok->alttabsize; + } + else if (c == '\014') /* Control-L (formfeed) */ + col = altcol = 0; /* For Emacs users */ + else + break; + } + tok_backup(tok, c); + if (c == '#' || c == '\n') { + /* Lines with only whitespace and/or comments + shouldn't affect the indentation and are + not passed to the parser as NEWLINE tokens, + except *totally* empty lines in interactive + mode, which signal the end of a command group. */ + if (col == 0 && c == '\n' && tok->prompt != NULL) + blankline = 0; /* Let it through */ + else + blankline = 1; /* Ignore completely */ + /* We can't jump back right here since we still + may need to skip to the end of a comment */ + } + if (!blankline && tok->level == 0) { + if (col == tok->indstack[tok->indent]) { + /* No change */ + if (altcol != tok->altindstack[tok->indent]) { + if (indenterror(tok)) + return ERRORTOKEN; + } + } + else if (col > tok->indstack[tok->indent]) { + /* Indent -- always one */ + if (tok->indent+1 >= MAXINDENT) { + tok->done = E_TOODEEP; + tok->cur = tok->inp; + return ERRORTOKEN; + } + if (altcol <= tok->altindstack[tok->indent]) { + if (indenterror(tok)) + return ERRORTOKEN; + } + tok->pendin++; + tok->indstack[++tok->indent] = col; + tok->altindstack[tok->indent] = altcol; + } + else /* col < tok->indstack[tok->indent] */ { + /* Dedent -- any number, must be consistent */ + while (tok->indent > 0 && + col < tok->indstack[tok->indent]) { + tok->pendin--; + tok->indent--; + } + if (col != tok->indstack[tok->indent]) { + tok->done = E_DEDENT; + tok->cur = tok->inp; + return ERRORTOKEN; + } + if (altcol != tok->altindstack[tok->indent]) { + if (indenterror(tok)) + return ERRORTOKEN; + } + } + } + } + + tok->start = tok->cur; + + /* Return pending indents/dedents */ + if (tok->pendin != 0) { + if (tok->pendin < 0) { + tok->pendin++; + return DEDENT; + } + else { + tok->pendin--; + return INDENT; + } + } again: - tok->start = NULL; - /* Skip spaces */ - do { - c = tok_nextc(tok); - } while (c == ' ' || c == '\t' || c == '\014'); - - /* Set start of current token */ - tok->start = tok->cur - 1; - - /* Skip comment */ - if (c == '#') - while (c != EOF && c != '\n') - c = tok_nextc(tok); - - /* Check for EOF and errors now */ - if (c == EOF) { - return tok->done == E_EOF ? ENDMARKER : ERRORTOKEN; - } - - /* Identifier (most frequent token!) */ - nonascii = 0; - if (is_potential_identifier_start(c)) { - /* Process b"", r"" and br"" */ - if (c == 'b' || c == 'B') { - c = tok_nextc(tok); - if (c == '"' || c == '\'') - goto letter_quote; - } - if (c == 'r' || c == 'R') { - c = tok_nextc(tok); - if (c == '"' || c == '\'') - goto letter_quote; - } - while (is_potential_identifier_char(c)) { - if (c >= 128) - nonascii = 1; - c = tok_nextc(tok); - } - tok_backup(tok, c); - if (nonascii && - !verify_identifier(tok)) { - tok->done = E_IDENTIFIER; - return ERRORTOKEN; - } - *p_start = tok->start; - *p_end = tok->cur; - return NAME; - } - - /* Newline */ - if (c == '\n') { - tok->atbol = 1; - if (blankline || tok->level > 0) - goto nextline; - *p_start = tok->start; - *p_end = tok->cur - 1; /* Leave '\n' out of the string */ - tok->cont_line = 0; - return NEWLINE; - } - - /* Period or number starting with period? */ - if (c == '.') { - c = tok_nextc(tok); - if (isdigit(c)) { - goto fraction; - } else if (c == '.') { - c = tok_nextc(tok); - if (c == '.') { - *p_start = tok->start; - *p_end = tok->cur; - return ELLIPSIS; - } else { - tok_backup(tok, c); - } - tok_backup(tok, '.'); - } else { - tok_backup(tok, c); - } - *p_start = tok->start; - *p_end = tok->cur; - return DOT; - } - - /* Number */ - if (isdigit(c)) { - if (c == '0') { - /* Hex, octal or binary -- maybe. */ - c = tok_nextc(tok); - if (c == '.') - goto fraction; + tok->start = NULL; + /* Skip spaces */ + do { + c = tok_nextc(tok); + } while (c == ' ' || c == '\t' || c == '\014'); + + /* Set start of current token */ + tok->start = tok->cur - 1; + + /* Skip comment */ + if (c == '#') + while (c != EOF && c != '\n') + c = tok_nextc(tok); + + /* Check for EOF and errors now */ + if (c == EOF) { + return tok->done == E_EOF ? ENDMARKER : ERRORTOKEN; + } + + /* Identifier (most frequent token!) */ + nonascii = 0; + if (is_potential_identifier_start(c)) { + /* Process b"", r"" and br"" */ + if (c == 'b' || c == 'B') { + c = tok_nextc(tok); + if (c == '"' || c == '\'') + goto letter_quote; + } + if (c == 'r' || c == 'R') { + c = tok_nextc(tok); + if (c == '"' || c == '\'') + goto letter_quote; + } + while (is_potential_identifier_char(c)) { + if (c >= 128) + nonascii = 1; + c = tok_nextc(tok); + } + tok_backup(tok, c); + if (nonascii && + !verify_identifier(tok)) { + tok->done = E_IDENTIFIER; + return ERRORTOKEN; + } + *p_start = tok->start; + *p_end = tok->cur; + return NAME; + } + + /* Newline */ + if (c == '\n') { + tok->atbol = 1; + if (blankline || tok->level > 0) + goto nextline; + *p_start = tok->start; + *p_end = tok->cur - 1; /* Leave '\n' out of the string */ + tok->cont_line = 0; + return NEWLINE; + } + + /* Period or number starting with period? */ + if (c == '.') { + c = tok_nextc(tok); + if (isdigit(c)) { + goto fraction; + } else if (c == '.') { + c = tok_nextc(tok); + if (c == '.') { + *p_start = tok->start; + *p_end = tok->cur; + return ELLIPSIS; + } else { + tok_backup(tok, c); + } + tok_backup(tok, '.'); + } else { + tok_backup(tok, c); + } + *p_start = tok->start; + *p_end = tok->cur; + return DOT; + } + + /* Number */ + if (isdigit(c)) { + if (c == '0') { + /* Hex, octal or binary -- maybe. */ + c = tok_nextc(tok); + if (c == '.') + goto fraction; #ifndef WITHOUT_COMPLEX - if (c == 'j' || c == 'J') - goto imaginary; + if (c == 'j' || c == 'J') + goto imaginary; #endif - if (c == 'x' || c == 'X') { - - /* Hex */ - c = tok_nextc(tok); - if (!isxdigit(c)) { - tok->done = E_TOKEN; - tok_backup(tok, c); - return ERRORTOKEN; - } - do { - c = tok_nextc(tok); - } while (isxdigit(c)); - } - else if (c == 'o' || c == 'O') { - /* Octal */ - c = tok_nextc(tok); - if (c < '0' || c >= '8') { - tok->done = E_TOKEN; - tok_backup(tok, c); - return ERRORTOKEN; - } - do { - c = tok_nextc(tok); - } while ('0' <= c && c < '8'); - } - else if (c == 'b' || c == 'B') { - /* Binary */ - c = tok_nextc(tok); - if (c != '0' && c != '1') { - tok->done = E_TOKEN; - tok_backup(tok, c); - return ERRORTOKEN; - } - do { - c = tok_nextc(tok); - } while (c == '0' || c == '1'); - } - else { - int nonzero = 0; - /* maybe old-style octal; c is first char of it */ - /* in any case, allow '0' as a literal */ - while (c == '0') - c = tok_nextc(tok); - while (isdigit(c)) { - nonzero = 1; - c = tok_nextc(tok); - } - if (c == '.') - goto fraction; - else if (c == 'e' || c == 'E') - goto exponent; + if (c == 'x' || c == 'X') { + + /* Hex */ + c = tok_nextc(tok); + if (!isxdigit(c)) { + tok->done = E_TOKEN; + tok_backup(tok, c); + return ERRORTOKEN; + } + do { + c = tok_nextc(tok); + } while (isxdigit(c)); + } + else if (c == 'o' || c == 'O') { + /* Octal */ + c = tok_nextc(tok); + if (c < '0' || c >= '8') { + tok->done = E_TOKEN; + tok_backup(tok, c); + return ERRORTOKEN; + } + do { + c = tok_nextc(tok); + } while ('0' <= c && c < '8'); + } + else if (c == 'b' || c == 'B') { + /* Binary */ + c = tok_nextc(tok); + if (c != '0' && c != '1') { + tok->done = E_TOKEN; + tok_backup(tok, c); + return ERRORTOKEN; + } + do { + c = tok_nextc(tok); + } while (c == '0' || c == '1'); + } + else { + int nonzero = 0; + /* maybe old-style octal; c is first char of it */ + /* in any case, allow '0' as a literal */ + while (c == '0') + c = tok_nextc(tok); + while (isdigit(c)) { + nonzero = 1; + c = tok_nextc(tok); + } + if (c == '.') + goto fraction; + else if (c == 'e' || c == 'E') + goto exponent; #ifndef WITHOUT_COMPLEX - else if (c == 'j' || c == 'J') - goto imaginary; + else if (c == 'j' || c == 'J') + goto imaginary; #endif - else if (nonzero) { - tok->done = E_TOKEN; - tok_backup(tok, c); - return ERRORTOKEN; - } - } - } - else { - /* Decimal */ - do { - c = tok_nextc(tok); - } while (isdigit(c)); - { - /* Accept floating point numbers. */ - if (c == '.') { - fraction: - /* Fraction */ - do { - c = tok_nextc(tok); - } while (isdigit(c)); - } - if (c == 'e' || c == 'E') { - exponent: - /* Exponent part */ - c = tok_nextc(tok); - if (c == '+' || c == '-') - c = tok_nextc(tok); - if (!isdigit(c)) { - tok->done = E_TOKEN; - tok_backup(tok, c); - return ERRORTOKEN; - } - do { - c = tok_nextc(tok); - } while (isdigit(c)); - } + else if (nonzero) { + tok->done = E_TOKEN; + tok_backup(tok, c); + return ERRORTOKEN; + } + } + } + else { + /* Decimal */ + do { + c = tok_nextc(tok); + } while (isdigit(c)); + { + /* Accept floating point numbers. */ + if (c == '.') { + fraction: + /* Fraction */ + do { + c = tok_nextc(tok); + } while (isdigit(c)); + } + if (c == 'e' || c == 'E') { + exponent: + /* Exponent part */ + c = tok_nextc(tok); + if (c == '+' || c == '-') + c = tok_nextc(tok); + if (!isdigit(c)) { + tok->done = E_TOKEN; + tok_backup(tok, c); + return ERRORTOKEN; + } + do { + c = tok_nextc(tok); + } while (isdigit(c)); + } #ifndef WITHOUT_COMPLEX - if (c == 'j' || c == 'J') - /* Imaginary part */ - imaginary: - c = tok_nextc(tok); + if (c == 'j' || c == 'J') + /* Imaginary part */ + imaginary: + c = tok_nextc(tok); #endif - } - } - tok_backup(tok, c); - *p_start = tok->start; - *p_end = tok->cur; - return NUMBER; - } + } + } + tok_backup(tok, c); + *p_start = tok->start; + *p_end = tok->cur; + return NUMBER; + } letter_quote: - /* String */ - if (c == '\'' || c == '"') { - int quote = c; - int quote_size = 1; /* 1 or 3 */ - int end_quote_size = 0; - - /* Find the quote size and start of string */ - c = tok_nextc(tok); - if (c == quote) { - c = tok_nextc(tok); - if (c == quote) - quote_size = 3; - else - end_quote_size = 1; /* empty string found */ - } - if (c != quote) - tok_backup(tok, c); - - /* Get rest of string */ - while (end_quote_size != quote_size) { - c = tok_nextc(tok); - if (c == EOF) { - if (quote_size == 3) - tok->done = E_EOFS; - else - tok->done = E_EOLS; - tok->cur = tok->inp; - return ERRORTOKEN; - } - if (quote_size == 1 && c == '\n') { - tok->done = E_EOLS; - tok->cur = tok->inp; - return ERRORTOKEN; - } - if (c == quote) - end_quote_size += 1; - else { - end_quote_size = 0; - if (c == '\\') - c = tok_nextc(tok); /* skip escaped char */ - } - } - - *p_start = tok->start; - *p_end = tok->cur; - return STRING; - } - - /* Line continuation */ - if (c == '\\') { - c = tok_nextc(tok); - if (c != '\n') { - tok->done = E_LINECONT; - tok->cur = tok->inp; - return ERRORTOKEN; - } - tok->cont_line = 1; - goto again; /* Read next line */ - } - - /* Check for two-character token */ - { - int c2 = tok_nextc(tok); - int token = PyToken_TwoChars(c, c2); - if (token != OP) { - int c3 = tok_nextc(tok); - int token3 = PyToken_ThreeChars(c, c2, c3); - if (token3 != OP) { - token = token3; - } else { - tok_backup(tok, c3); - } - *p_start = tok->start; - *p_end = tok->cur; - return token; - } - tok_backup(tok, c2); - } - - /* Keep track of parentheses nesting level */ - switch (c) { - case '(': - case '[': - case '{': - tok->level++; - break; - case ')': - case ']': - case '}': - tok->level--; - break; - } - - /* Punctuation character */ - *p_start = tok->start; - *p_end = tok->cur; - return PyToken_OneChar(c); + /* String */ + if (c == '\'' || c == '"') { + int quote = c; + int quote_size = 1; /* 1 or 3 */ + int end_quote_size = 0; + + /* Find the quote size and start of string */ + c = tok_nextc(tok); + if (c == quote) { + c = tok_nextc(tok); + if (c == quote) + quote_size = 3; + else + end_quote_size = 1; /* empty string found */ + } + if (c != quote) + tok_backup(tok, c); + + /* Get rest of string */ + while (end_quote_size != quote_size) { + c = tok_nextc(tok); + if (c == EOF) { + if (quote_size == 3) + tok->done = E_EOFS; + else + tok->done = E_EOLS; + tok->cur = tok->inp; + return ERRORTOKEN; + } + if (quote_size == 1 && c == '\n') { + tok->done = E_EOLS; + tok->cur = tok->inp; + return ERRORTOKEN; + } + if (c == quote) + end_quote_size += 1; + else { + end_quote_size = 0; + if (c == '\\') + c = tok_nextc(tok); /* skip escaped char */ + } + } + + *p_start = tok->start; + *p_end = tok->cur; + return STRING; + } + + /* Line continuation */ + if (c == '\\') { + c = tok_nextc(tok); + if (c != '\n') { + tok->done = E_LINECONT; + tok->cur = tok->inp; + return ERRORTOKEN; + } + tok->cont_line = 1; + goto again; /* Read next line */ + } + + /* Check for two-character token */ + { + int c2 = tok_nextc(tok); + int token = PyToken_TwoChars(c, c2); + if (token != OP) { + int c3 = tok_nextc(tok); + int token3 = PyToken_ThreeChars(c, c2, c3); + if (token3 != OP) { + token = token3; + } else { + tok_backup(tok, c3); + } + *p_start = tok->start; + *p_end = tok->cur; + return token; + } + tok_backup(tok, c2); + } + + /* Keep track of parentheses nesting level */ + switch (c) { + case '(': + case '[': + case '{': + tok->level++; + break; + case ')': + case ']': + case '}': + tok->level--; + break; + } + + /* Punctuation character */ + *p_start = tok->start; + *p_end = tok->cur; + return PyToken_OneChar(c); } int PyTokenizer_Get(struct tok_state *tok, char **p_start, char **p_end) { - int result = tok_get(tok, p_start, p_end); - if (tok->decoding_erred) { - result = ERRORTOKEN; - tok->done = E_DECODE; - } - return result; + int result = tok_get(tok, p_start, p_end); + if (tok->decoding_erred) { + result = ERRORTOKEN; + tok->done = E_DECODE; + } + return result; } /* Get -*- encoding -*- from a Python file. @@ -1639,34 +1639,34 @@ PyTokenizer_Get(struct tok_state *tok, char **p_start, char **p_end) char * PyTokenizer_FindEncoding(int fd) { - struct tok_state *tok; - FILE *fp; - char *p_start =NULL , *p_end =NULL , *encoding = NULL; - - fd = dup(fd); - if (fd < 0) { - return NULL; - } - fp = fdopen(fd, "r"); - if (fp == NULL) { - return NULL; - } - tok = PyTokenizer_FromFile(fp, NULL, NULL, NULL); - if (tok == NULL) { - fclose(fp); - return NULL; - } - while (tok->lineno < 2 && tok->done == E_OK) { - PyTokenizer_Get(tok, &p_start, &p_end); - } - fclose(fp); - if (tok->encoding) { - encoding = (char *)PyMem_MALLOC(strlen(tok->encoding) + 1); - if (encoding) - strcpy(encoding, tok->encoding); - } - PyTokenizer_Free(tok); - return encoding; + struct tok_state *tok; + FILE *fp; + char *p_start =NULL , *p_end =NULL , *encoding = NULL; + + fd = dup(fd); + if (fd < 0) { + return NULL; + } + fp = fdopen(fd, "r"); + if (fp == NULL) { + return NULL; + } + tok = PyTokenizer_FromFile(fp, NULL, NULL, NULL); + if (tok == NULL) { + fclose(fp); + return NULL; + } + while (tok->lineno < 2 && tok->done == E_OK) { + PyTokenizer_Get(tok, &p_start, &p_end); + } + fclose(fp); + if (tok->encoding) { + encoding = (char *)PyMem_MALLOC(strlen(tok->encoding) + 1); + if (encoding) + strcpy(encoding, tok->encoding); + } + PyTokenizer_Free(tok); + return encoding; } #ifdef Py_DEBUG @@ -1674,9 +1674,9 @@ PyTokenizer_FindEncoding(int fd) void tok_dump(int type, char *start, char *end) { - printf("%s", _PyParser_TokenNames[type]); - if (type == NAME || type == NUMBER || type == STRING || type == OP) - printf("(%.*s)", (int)(end - start), start); + printf("%s", _PyParser_TokenNames[type]); + if (type == NAME || type == NUMBER || type == STRING || type == OP) + printf("(%.*s)", (int)(end - start), start); } #endif diff --git a/Parser/tokenizer.h b/Parser/tokenizer.h index e3328f1..5a6d060 100644 --- a/Parser/tokenizer.h +++ b/Parser/tokenizer.h @@ -8,66 +8,66 @@ extern "C" { /* Tokenizer interface */ -#include "token.h" /* For token types */ +#include "token.h" /* For token types */ -#define MAXINDENT 100 /* Max indentation level */ +#define MAXINDENT 100 /* Max indentation level */ enum decoding_state { - STATE_INIT, - STATE_RAW, - STATE_NORMAL, /* have a codec associated with input */ + STATE_INIT, + STATE_RAW, + STATE_NORMAL, /* have a codec associated with input */ }; /* Tokenizer state */ struct tok_state { - /* Input state; buf <= cur <= inp <= end */ - /* NB an entire line is held in the buffer */ - char *buf; /* Input buffer, or NULL; malloc'ed if fp != NULL */ - char *cur; /* Next character in buffer */ - char *inp; /* End of data in buffer */ - char *end; /* End of input buffer if buf != NULL */ - char *start; /* Start of current token if not NULL */ - int done; /* E_OK normally, E_EOF at EOF, otherwise error code */ - /* NB If done != E_OK, cur must be == inp!!! */ - FILE *fp; /* Rest of input; NULL if tokenizing a string */ - int tabsize; /* Tab spacing */ - int indent; /* Current indentation index */ - int indstack[MAXINDENT]; /* Stack of indents */ - int atbol; /* Nonzero if at begin of new line */ - int pendin; /* Pending indents (if > 0) or dedents (if < 0) */ - char *prompt, *nextprompt; /* For interactive prompting */ - int lineno; /* Current line number */ - int level; /* () [] {} Parentheses nesting level */ - /* Used to allow free continuations inside them */ - /* Stuff for checking on different tab sizes */ - const char *filename; /* For error messages */ - int altwarning; /* Issue warning if alternate tabs don't match */ - int alterror; /* Issue error if alternate tabs don't match */ - int alttabsize; /* Alternate tab spacing */ - int altindstack[MAXINDENT]; /* Stack of alternate indents */ - /* Stuff for PEP 0263 */ - enum decoding_state decoding_state; - int decoding_erred; /* whether erred in decoding */ - int read_coding_spec; /* whether 'coding:...' has been read */ - char *encoding; /* Source encoding. */ - int cont_line; /* whether we are in a continuation line. */ - const char* line_start; /* pointer to start of current line */ + /* Input state; buf <= cur <= inp <= end */ + /* NB an entire line is held in the buffer */ + char *buf; /* Input buffer, or NULL; malloc'ed if fp != NULL */ + char *cur; /* Next character in buffer */ + char *inp; /* End of data in buffer */ + char *end; /* End of input buffer if buf != NULL */ + char *start; /* Start of current token if not NULL */ + int done; /* E_OK normally, E_EOF at EOF, otherwise error code */ + /* NB If done != E_OK, cur must be == inp!!! */ + FILE *fp; /* Rest of input; NULL if tokenizing a string */ + int tabsize; /* Tab spacing */ + int indent; /* Current indentation index */ + int indstack[MAXINDENT]; /* Stack of indents */ + int atbol; /* Nonzero if at begin of new line */ + int pendin; /* Pending indents (if > 0) or dedents (if < 0) */ + char *prompt, *nextprompt; /* For interactive prompting */ + int lineno; /* Current line number */ + int level; /* () [] {} Parentheses nesting level */ + /* Used to allow free continuations inside them */ + /* Stuff for checking on different tab sizes */ + const char *filename; /* For error messages */ + int altwarning; /* Issue warning if alternate tabs don't match */ + int alterror; /* Issue error if alternate tabs don't match */ + int alttabsize; /* Alternate tab spacing */ + int altindstack[MAXINDENT]; /* Stack of alternate indents */ + /* Stuff for PEP 0263 */ + enum decoding_state decoding_state; + int decoding_erred; /* whether erred in decoding */ + int read_coding_spec; /* whether 'coding:...' has been read */ + char *encoding; /* Source encoding. */ + int cont_line; /* whether we are in a continuation line. */ + const char* line_start; /* pointer to start of current line */ #ifndef PGEN - PyObject *decoding_readline; /* codecs.open(...).readline */ - PyObject *decoding_buffer; + PyObject *decoding_readline; /* codecs.open(...).readline */ + PyObject *decoding_buffer; #endif - const char* enc; /* Encoding for the current str. */ - const char* str; + const char* enc; /* Encoding for the current str. */ + const char* str; }; extern struct tok_state *PyTokenizer_FromString(const char *); extern struct tok_state *PyTokenizer_FromUTF8(const char *); extern struct tok_state *PyTokenizer_FromFile(FILE *, char*, - char *, char *); + char *, char *); extern void PyTokenizer_Free(struct tok_state *); extern int PyTokenizer_Get(struct tok_state *, char **, char **); -extern char * PyTokenizer_RestoreEncoding(struct tok_state* tok, - int len, int *offset); +extern char * PyTokenizer_RestoreEncoding(struct tok_state* tok, + int len, int *offset); extern char * PyTokenizer_FindEncoding(int); #ifdef __cplusplus |