Initial revision

author: Guido van Rossum <guido@python.org> 1990-10-14 12:07:46 (GMT)
committer: Guido van Rossum <guido@python.org> 1990-10-14 12:07:46 (GMT)
commit: 85a5fbbdfea617f6cc8fae82c9e8c2b5c424436d (patch)
tree: a1bf57db1c75e2a7029c8f2fad5f8dba4b9ba25c /Parser
parent: c636014c430620325f8d213e9ba10d925991b8d7 (diff)
download: cpython-85a5fbbdfea617f6cc8fae82c9e8c2b5c424436d.zip
cpython-85a5fbbdfea617f6cc8fae82c9e8c2b5c424436d.tar.gz
cpython-85a5fbbdfea617f6cc8fae82c9e8c2b5c424436d.tar.bz2
19 files changed, 2945 insertions, 0 deletions
diff --git a/Parser/acceler.c b/Parser/acceler.c
new file mode 100644
index 0000000..7ff10b2
--- /dev/null
+++ b/Parser/acceler.c
@@ -0,0 +1,101 @@
+/* Parser accelerator module */
+
+#include <stdio.h>
+
+#include "PROTO.h"
+#include "grammar.h"
+#include "token.h"
+#include "malloc.h"
+
+static void
+fixstate(g, d, s)
+	grammar *g;
+	dfa *d;
+	state *s;
+{
+	arc *a;
+	int k;
+	int *accel;
+	int nl = g->g_ll.ll_nlabels;
+	s->s_accept = 0;
+	accel = NEW(int, nl);
+	for (k = 0; k < nl; k++)
+		accel[k] = -1;
+	a = s->s_arc;
+	for (k = s->s_narcs; --k >= 0; a++) {
+		int lbl = a->a_lbl;
+		label *l = &g->g_ll.ll_label[lbl];
+		int type = l->lb_type;
+		if (a->a_arrow >= (1 << 7)) {
+			printf("XXX too many states!\n");
+			continue;
+		}
+		if (ISNONTERMINAL(type)) {
+			dfa *d1 = finddfa(g, type);
+			int ibit;
+			if (type - NT_OFFSET >= (1 << 7)) {
+				printf("XXX too high nonterminal number!\n");
+				continue;
+			}
+			for (ibit = 0; ibit < g->g_ll.ll_nlabels; ibit++) {
+				if (testbit(d1->d_first, ibit)) {
+					if (accel[ibit] != -1)
+						printf("XXX ambiguity!\n");
+					accel[ibit] = a->a_arrow | (1 << 7) |
+						((type - NT_OFFSET) << 8);
+				}
+			}
+		}
+		else if (lbl == EMPTY)
+			s->s_accept = 1;
+		else if (lbl >= 0 && lbl < nl)
+			accel[lbl] = a->a_arrow;
+	}
+	while (nl > 0 && accel[nl-1] == -1)
+		nl--;
+	for (k = 0; k < nl && accel[k] == -1;)
+		k++;
+	if (k < nl) {
+		int i;
+		s->s_accel = NEW(int, nl-k);
+		if (s->s_accel == NULL) {
+			fprintf(stderr, "no mem to add parser accelerators\n");
+			exit(1);
+		}
+		s->s_lower = k;
+		s->s_upper = nl;
+		for (i = 0; k < nl; i++, k++)
+			s->s_accel[i] = accel[k];
+	}
+	DEL(accel);
+}
+
+static void
+fixdfa(g, d)
+	grammar *g;
+	dfa *d;
+{
+	state *s;
+	int j;
+	s = d->d_state;
+	for (j = 0; j < d->d_nstates; j++, s++)
+		fixstate(g, d, s);
+}
+
+void
+addaccelerators(g)
+	grammar *g;
+{
+	dfa *d;
+	int i;
+#ifdef DEBUG
+	printf("Adding parser accellerators ...\n");
+#endif
+	d = g->g_dfa;
+	for (i = g->g_ndfas; --i >= 0; d++)
+		fixdfa(g, d);
+	g->g_accel = 1;
+#ifdef DEBUG
+	printf("Done.\n");
+#endif
+}
diff --git a/Parser/assert.h b/Parser/assert.h
new file mode 100644
index 0000000..e8edafc
--- /dev/null
+++ b/Parser/assert.h
@@ -0,0 +1 @@
+#define assert(e) { if (!(e)) { printf("Assertion failed\n"); abort(); } }
diff --git a/Parser/bitset.c b/Parser/bitset.c
new file mode 100644
index 0000000..b43fb82
--- /dev/null
+++ b/Parser/bitset.c
@@ -0,0 +1,76 @@
+/* Bitset primitives */
+
+#include "PROTO.h"
+#include "malloc.h"
+#include "bitset.h"
+
+bitset
+newbitset(nbits)
+	int nbits;
+{
+	int nbytes = NBYTES(nbits);
+	bitset ss = NEW(BYTE, nbytes);
+	
+	if (ss == NULL)
+		fatal("no mem for bitset");
+	
+	ss += nbytes;
+	while (--nbytes >= 0)
+		*--ss = 0;
+	return ss;
+}
+
+void
+delbitset(ss)
+	bitset ss;
+{
+	DEL(ss);
+}
+
+int
+addbit(ss, ibit)
+	bitset ss;
+	int ibit;
+{
+	int ibyte = BIT2BYTE(ibit);
+	BYTE mask = BIT2MASK(ibit);
+	
+	if (ss[ibyte] & mask)
+		return 0; /* Bit already set */
+	ss[ibyte] |= mask;
+	return 1;
+}
+
+#if 0 /* Now a macro */
+int
+testbit(ss, ibit)
+	bitset ss;
+	int ibit;
+{
+	return (ss[BIT2BYTE(ibit)] & BIT2MASK(ibit)) != 0;
+}
+#endif
+
+int
+samebitset(ss1, ss2, nbits)
+	bitset ss1, ss2;
+	int nbits;
+{
+	int i;
+	
+	for (i = NBYTES(nbits); --i >= 0; )
+		if (*ss1++ != *ss2++)
+			return 0;
+	return 1;
+}
+
+void
+mergebitset(ss1, ss2, nbits)
+	bitset ss1, ss2;
+	int nbits;
+{
+	int i;
+	
+	for (i = NBYTES(nbits); --i >= 0; )
+		*ss1++ |= *ss2++;
+}
diff --git a/Parser/firstsets.c b/Parser/firstsets.c
new file mode 100644
index 0000000..0f28dd0
--- /dev/null
+++ b/Parser/firstsets.c
@@ -0,0 +1,109 @@
+/* Computation of FIRST stets */
+
+#include <stdio.h>
+
+#include "PROTO.h"
+#include "malloc.h"
+#include "grammar.h"
+#include "token.h"
+
+extern int debugging;
+
+static void
+calcfirstset(g, d)
+	grammar *g;
+	dfa *d;
+{
+	int i, j;
+	state *s;
+	arc *a;
+	int nsyms;
+	int *sym;
+	int nbits;
+	static bitset dummy;
+	bitset result;
+	int type;
+	dfa *d1;
+	label *l0;
+	
+	if (debugging)
+		printf("Calculate FIRST set for '%s'\n", d->d_name);
+	
+	if (dummy == NULL)
+		dummy = newbitset(1);
+	if (d->d_first == dummy) {
+		fprintf(stderr, "Left-recursion for '%s'\n", d->d_name);
+		return;
+	}
+	if (d->d_first != NULL) {
+		fprintf(stderr, "Re-calculating FIRST set for '%s' ???\n",
+			d->d_name);
+	}
+	d->d_first = dummy;
+	
+	l0 = g->g_ll.ll_label;
+	nbits = g->g_ll.ll_nlabels;
+	result = newbitset(nbits);
+	
+	sym = NEW(int, 1);
+	if (sym == NULL)
+		fatal("no mem for new sym in calcfirstset");
+	nsyms = 1;
+	sym[0] = findlabel(&g->g_ll, d->d_type, (char *)NULL);
+	
+	s = &d->d_state[d->d_initial];
+	for (i = 0; i < s->s_narcs; i++) {
+		a = &s->s_arc[i];
+		for (j = 0; j < nsyms; j++) {
+			if (sym[j] == a->a_lbl)
+				break;
+		}
+		if (j >= nsyms) { /* New label */
+			RESIZE(sym, int, nsyms + 1);
+			if (sym == NULL)
+				fatal("no mem to resize sym in calcfirstset");
+			sym[nsyms++] = a->a_lbl;
+			type = l0[a->a_lbl].lb_type;
+			if (ISNONTERMINAL(type)) {
+				d1 = finddfa(g, type);
+				if (d1->d_first == dummy) {
+					fprintf(stderr,
+						"Left-recursion below '%s'\n",
+						d->d_name);
+				}
+				else {
+					if (d1->d_first == NULL)
+						calcfirstset(g, d1);
+					mergebitset(result, d1->d_first, nbits);
+				}
+			}
+			else if (ISTERMINAL(type)) {
+				addbit(result, a->a_lbl);
+			}
+		}
+	}
+	d->d_first = result;
+	if (debugging) {
+		printf("FIRST set for '%s': {", d->d_name);
+		for (i = 0; i < nbits; i++) {
+			if (testbit(result, i))
+				printf(" %s", labelrepr(&l0[i]));
+		}
+		printf(" }\n");
+	}
+}
+
+void
+addfirstsets(g)
+	grammar *g;
+{
+	int i;
+	dfa *d;
+	
+	printf("Adding FIRST sets ...\n");
+	for (i = 0; i < g->g_ndfas; i++) {
+		d = &g->g_dfa[i];
+		if (d->d_first == NULL)
+			calcfirstset(g, d);
+	}
+}
diff --git a/Parser/grammar.c b/Parser/grammar.c
new file mode 100644
index 0000000..ad9c2b0
--- /dev/null
+++ b/Parser/grammar.c
@@ -0,0 +1,207 @@
+/* Grammar implementation */
+
+#include <stdio.h>
+#include <ctype.h>
+
+#include "PROTO.h"
+#include "malloc.h"
+#include "assert.h"
+#include "token.h"
+#include "grammar.h"
+
+extern int debugging;
+
+grammar *
+newgrammar(start)
+	int start;
+{
+	grammar *g;
+	
+	g = NEW(grammar, 1);
+	if (g == NULL)
+		fatal("no mem for new grammar");
+	g->g_ndfas = 0;
+	g->g_dfa = NULL;
+	g->g_start = start;
+	g->g_ll.ll_nlabels = 0;
+	g->g_ll.ll_label = NULL;
+	return g;
+}
+
+dfa *
+adddfa(g, type, name)
+	grammar *g;
+	int type;
+	char *name;
+{
+	dfa *d;
+	
+	RESIZE(g->g_dfa, dfa, g->g_ndfas + 1);
+	if (g->g_dfa == NULL)
+		fatal("no mem to resize dfa in adddfa");
+	d = &g->g_dfa[g->g_ndfas++];
+	d->d_type = type;
+	d->d_name = name;
+	d->d_nstates = 0;
+	d->d_state = NULL;
+	d->d_initial = -1;
+	d->d_first = NULL;
+	return d; /* Only use while fresh! */
+}
+
+int
+addstate(d)
+	dfa *d;
+{
+	state *s;
+	
+	RESIZE(d->d_state, state, d->d_nstates + 1);
+	if (d->d_state == NULL)
+		fatal("no mem to resize state in addstate");
+	s = &d->d_state[d->d_nstates++];
+	s->s_narcs = 0;
+	s->s_arc = NULL;
+	return s - d->d_state;
+}
+
+void
+addarc(d, from, to, lbl)
+	dfa *d;
+	int lbl;
+{
+	state *s;
+	arc *a;
+	
+	assert(0 <= from && from < d->d_nstates);
+	assert(0 <= to && to < d->d_nstates);
+	
+	s = &d->d_state[from];
+	RESIZE(s->s_arc, arc, s->s_narcs + 1);
+	if (s->s_arc == NULL)
+		fatal("no mem to resize arc list in addarc");
+	a = &s->s_arc[s->s_narcs++];
+	a->a_lbl = lbl;
+	a->a_arrow = to;
+}
+
+int
+addlabel(ll, type, str)
+	labellist *ll;
+	int type;
+	char *str;
+{
+	int i;
+	label *lb;
+	
+	for (i = 0; i < ll->ll_nlabels; i++) {
+		if (ll->ll_label[i].lb_type == type &&
+			strcmp(ll->ll_label[i].lb_str, str) == 0)
+			return i;
+	}
+	RESIZE(ll->ll_label, label, ll->ll_nlabels + 1);
+	if (ll->ll_label == NULL)
+		fatal("no mem to resize labellist in addlabel");
+	lb = &ll->ll_label[ll->ll_nlabels++];
+	lb->lb_type = type;
+	lb->lb_str = str; /* XXX strdup(str) ??? */
+	return lb - ll->ll_label;
+}
+
+/* Same, but rather dies than adds */
+
+int
+findlabel(ll, type, str)
+	labellist *ll;
+	int type;
+	char *str;
+{
+	int i;
+	label *lb;
+	
+	for (i = 0; i < ll->ll_nlabels; i++) {
+		if (ll->ll_label[i].lb_type == type /*&&
+			strcmp(ll->ll_label[i].lb_str, str) == 0*/)
+			return i;
+	}
+	fprintf(stderr, "Label %d/'%s' not found\n", type, str);
+	abort();
+}
+
+static void
+translabel(g, lb)
+	grammar *g;
+	label *lb;
+{
+	int i;
+	
+	if (debugging)
+		printf("Translating label %s ...\n", labelrepr(lb));
+	
+	if (lb->lb_type == NAME) {
+		for (i = 0; i < g->g_ndfas; i++) {
+			if (strcmp(lb->lb_str, g->g_dfa[i].d_name) == 0) {
+				if (debugging)
+					printf("Label %s is non-terminal %d.\n",
+						lb->lb_str,
+						g->g_dfa[i].d_type);
+				lb->lb_type = g->g_dfa[i].d_type;
+				lb->lb_str = NULL;
+				return;
+			}
+		}
+		for (i = 0; i < (int)N_TOKENS; i++) {
+			if (strcmp(lb->lb_str, tok_name[i]) == 0) {
+				if (debugging)
+					printf("Label %s is terminal %d.\n",
+						lb->lb_str, i);
+				lb->lb_type = i;
+				lb->lb_str = NULL;
+				return;
+			}
+		}
+		printf("Can't translate NAME label '%s'\n", lb->lb_str);
+		return;
+	}
+	
+	if (lb->lb_type == STRING) {
+		if (isalpha(lb->lb_str[1])) {
+			char *p, *strchr();
+			if (debugging)
+				printf("Label %s is a keyword\n", lb->lb_str);
+			lb->lb_type = NAME;
+			lb->lb_str++;
+			p = strchr(lb->lb_str, '\'');
+			if (p)
+				*p = '\0';
+		}
+		else {
+			if (lb->lb_str[2] == lb->lb_str[0]) {
+				int type = (int) tok_1char(lb->lb_str[1]);
+				if (type != OP) {
+					lb->lb_type = type;
+					lb->lb_str = NULL;
+				}
+				else
+					printf("Unknown OP label %s\n",
+						lb->lb_str);
+			}
+			else
+				printf("Can't translate STRING label %s\n",
+					lb->lb_str);
+		}
+	}
+	else
+		printf("Can't translate label '%s'\n", labelrepr(lb));
+}
+
+void
+translatelabels(g)
+	grammar *g;
+{
+	int i;
+	
+	printf("Translating labels ...\n");
+	/* Don't translate EMPTY */
+	for (i = EMPTY+1; i < g->g_ll.ll_nlabels; i++)
+		translabel(g, &g->g_ll.ll_label[i]);
+}
diff --git a/Parser/grammar1.c b/Parser/grammar1.c
new file mode 100644
index 0000000..0cd66af
--- /dev/null
+++ b/Parser/grammar1.c
@@ -0,0 +1,52 @@
+/* Grammar subroutines needed by parser */
+
+#include "PROTO.h"
+#define NULL 0
+#include "assert.h"
+#include "grammar.h"
+#include "token.h"
+
+/* Return the DFA for the given type */
+
+dfa *
+finddfa(g, type)
+	grammar *g;
+	register int type;
+{
+	register int i;
+	register dfa *d;
+	
+	for (i = g->g_ndfas, d = g->g_dfa; --i >= 0; d++) {
+		if (d->d_type == type)
+			return d;
+	}
+	assert(0);
+	/* NOTREACHED */
+}
+
+char *
+labelrepr(lb)
+	label *lb;
+{
+	static char buf[100];
+	
+	if (lb->lb_type == ENDMARKER)
+		return "EMPTY";
+	else if (ISNONTERMINAL(lb->lb_type)) {
+		if (lb->lb_str == NULL) {
+			sprintf(buf, "NT%d", lb->lb_type);
+			return buf;
+		}
+		else
+			return lb->lb_str;
+	}
+	else {
+		if (lb->lb_str == NULL)
+			return tok_name[lb->lb_type];
+		else {
+			sprintf(buf, "%.32s(%.32s)",
+				tok_name[lb->lb_type], lb->lb_str);
+			return buf;
+		}
+	}
+}
diff --git a/Parser/intrcheck.c b/Parser/intrcheck.c
new file mode 100644
index 0000000..f963208
--- /dev/null
+++ b/Parser/intrcheck.c
@@ -0,0 +1,95 @@
+/* Check for interrupts */
+
+#ifdef MSDOS
+
+/* This might work for MS-DOS: */
+
+void
+initintr()
+{
+}
+
+int
+intrcheck()
+{
+	int interrupted = 0;
+	while (kbhit()) {
+		if (getch() == '\003')
+			interrupted = 1;
+	}
+	return interrupted;
+}
+
+#define OK
+
+#endif
+
+
+#ifdef THINK_C
+
+#include <MacHeaders>
+
+void
+initintr()
+{
+}
+
+int
+intrcheck()
+{
+	/* Static to make it faster(?) only */
+	static EventRecord e;
+	
+	/* XXX This fails if the user first types ahead and then
+	   decides to interrupt -- repeating Command-. until the
+	   event queue overflows may work though. */
+	if (EventAvail(keyDownMask|autoKeyMask, &e) &&
+				(e.modifiers & cmdKey) &&
+				(e.message & charCodeMask) == '.') {
+		(void) GetNextEvent(keyDownMask|autoKeyMask, &e);
+		return 1;
+	}
+	return 0;
+}
+
+#define OK
+
+#endif /* THINK_C */
+
+
+#ifndef OK
+
+/* Default version -- should work for Unix and Standard C */
+
+#include <stdio.h>
+#include <signal.h>
+
+#include "sigtype.h"
+
+static int interrupted;
+
+static SIGTYPE
+intcatcher(sig)
+	int sig;
+{
+	interrupted = 1;
+	signal(SIGINT, intcatcher);
+}
+
+void
+initintr()
+{
+	if (signal(SIGINT, SIG_IGN) != SIG_IGN)
+		signal(SIGINT, intcatcher);
+}
+
+int
+intrcheck()
+{
+	if (!interrupted)
+		return 0;
+	interrupted = 0;
+	return 1;
+}
+
+#endif /* !OK */
diff --git a/Parser/listnode.c b/Parser/listnode.c
new file mode 100644
index 0000000..e0a979e
--- /dev/null
+++ b/Parser/listnode.c
@@ -0,0 +1,68 @@
+/* List a node on a file */
+
+#include <stdio.h>
+
+#include "PROTO.h"
+#include "token.h"
+#include "node.h"
+
+static int level, atbol;
+
+static void
+list1node(fp, n)
+	FILE *fp;
+	node *n;
+{
+	if (n == 0)
+		return;
+	if (ISNONTERMINAL(TYPE(n))) {
+		int i;
+		for (i = 0; i < NCH(n); i++)
+			list1node(fp, CHILD(n, i));
+	}
+	else if (ISTERMINAL(TYPE(n))) {
+		switch (TYPE(n)) {
+		case INDENT:
+			++level;
+			break;
+		case DEDENT:
+			--level;
+			break;
+		default:
+			if (atbol) {
+				int i;
+				for (i = 0; i < level; ++i)
+					fprintf(fp, "\t");
+				atbol = 0;
+			}
+			if (TYPE(n) == NEWLINE) {
+				if (STR(n) != NULL)
+					fprintf(fp, "%s", STR(n));
+				fprintf(fp, "\n");
+				atbol = 1;
+			}
+			else
+				fprintf(fp, "%s ", STR(n));
+			break;
+		}
+	}
+	else
+		fprintf(fp, "? ");
+}
+
+void
+listnode(fp, n)
+	FILE *fp;
+	node *n;
+{
+	level = 0;
+	atbol = 1;
+	list1node(fp, n);
+}
+
+void
+listtree(n)
+	node *n;
+{
+	listnode(stdout, n);
+}
diff --git a/Parser/metagrammar.c b/Parser/metagrammar.c
new file mode 100644
index 0000000..a7e6364
--- /dev/null
+++ b/Parser/metagrammar.c
@@ -0,0 +1,151 @@
+#include "PROTO.h"
+#include "metagrammar.h"
+#include "grammar.h"
+static arc arcs_0_0[3] = {
+	{2, 0},
+	{3, 0},
+	{4, 1},
+};
+static arc arcs_0_1[1] = {
+	{0, 1},
+};
+static state states_0[2] = {
+	{3, arcs_0_0},
+	{1, arcs_0_1},
+};
+static arc arcs_1_0[1] = {
+	{5, 1},
+};
+static arc arcs_1_1[1] = {
+	{6, 2},
+};
+static arc arcs_1_2[1] = {
+	{7, 3},
+};
+static arc arcs_1_3[1] = {
+	{3, 4},
+};
+static arc arcs_1_4[1] = {
+	{0, 4},
+};
+static state states_1[5] = {
+	{1, arcs_1_0},
+	{1, arcs_1_1},
+	{1, arcs_1_2},
+	{1, arcs_1_3},
+	{1, arcs_1_4},
+};
+static arc arcs_2_0[1] = {
+	{8, 1},
+};
+static arc arcs_2_1[2] = {
+	{9, 0},
+	{0, 1},
+};
+static state states_2[2] = {
+	{1, arcs_2_0},
+	{2, arcs_2_1},
+};
+static arc arcs_3_0[1] = {
+	{10, 1},
+};
+static arc arcs_3_1[2] = {
+	{10, 1},
+	{0, 1},
+};
+static state states_3[2] = {
+	{1, arcs_3_0},
+	{2, arcs_3_1},
+};
+static arc arcs_4_0[2] = {
+	{11, 1},
+	{13, 2},
+};
+static arc arcs_4_1[1] = {
+	{7, 3},
+};
+static arc arcs_4_2[3] = {
+	{14, 4},
+	{15, 4},
+	{0, 2},
+};
+static arc arcs_4_3[1] = {
+	{12, 4},
+};
+static arc arcs_4_4[1] = {
+	{0, 4},
+};
+static state states_4[5] = {
+	{2, arcs_4_0},
+	{1, arcs_4_1},
+	{3, arcs_4_2},
+	{1, arcs_4_3},
+	{1, arcs_4_4},
+};
+static arc arcs_5_0[3] = {
+	{5, 1},
+	{16, 1},
+	{17, 2},
+};
+static arc arcs_5_1[1] = {
+	{0, 1},
+};
+static arc arcs_5_2[1] = {
+	{7, 3},
+};
+static arc arcs_5_3[1] = {
+	{18, 1},
+};
+static state states_5[4] = {
+	{3, arcs_5_0},
+	{1, arcs_5_1},
+	{1, arcs_5_2},
+	{1, arcs_5_3},
+};
+static dfa dfas[6] = {
+	{256, "MSTART", 0, 2, states_0,
+	 "\070\000\000"},
+	{257, "RULE", 0, 5, states_1,
+	 "\040\000\000"},
+	{258, "RHS", 0, 2, states_2,
+	 "\040\010\003"},
+	{259, "ALT", 0, 2, states_3,
+	 "\040\010\003"},
+	{260, "ITEM", 0, 5, states_4,
+	 "\040\010\003"},
+	{261, "ATOM", 0, 4, states_5,
+	 "\040\000\003"},
+};
+static label labels[19] = {
+	{0, "EMPTY"},
+	{256, 0},
+	{257, 0},
+	{4, 0},
+	{0, 0},
+	{1, 0},
+	{11, 0},
+	{258, 0},
+	{259, 0},
+	{18, 0},
+	{260, 0},
+	{9, 0},
+	{10, 0},
+	{261, 0},
+	{16, 0},
+	{14, 0},
+	{3, 0},
+	{7, 0},
+	{8, 0},
+};
+static grammar gram = {
+	6,
+	dfas,
+	{19, labels},
+	256
+};
+
+grammar *
+meta_grammar()
+{
+	return &gram;
+}
diff --git a/Parser/node.c b/Parser/node.c
new file mode 100644
index 0000000..86d607a
--- /dev/null
+++ b/Parser/node.c
@@ -0,0 +1,47 @@
+/* Parse tree node implementation */
+
+#include "PROTO.h"
+#include "malloc.h"
+#include "node.h"
+
+node *
+newnode(type)
+	int type;
+{
+	node *n = NEW(node, 1);
+	if (n == NULL)
+		return NULL;
+	n->n_type = type;
+	n->n_str = NULL;
+	n->n_nchildren = 0;
+	n->n_child = NULL;
+	return n;
+}
+
+#define XXX 3 /* Node alignment factor to speed up realloc */
+#define XXXROUNDUP(n) ((n) == 1 ? 1 : ((n) + XXX - 1) / XXX * XXX)
+
+node *
+addchild(n1, type, str)
+	register node *n1;
+	int type;
+	char *str;
+{
+	register int nch = n1->n_nchildren;
+	register int nch1 = nch+1;
+	register node *n;
+	if (XXXROUNDUP(nch) < nch1) {
+		n = n1->n_child;
+		nch1 = XXXROUNDUP(nch1);
+		RESIZE(n, node, nch1);
+		if (n == NULL)
+			return NULL;
+		n1->n_child = n;
+	}
+	n = &n1->n_child[n1->n_nchildren++];
+	n->n_type = type;
+	n->n_str = str;
+	n->n_nchildren = 0;
+	n->n_child = NULL;
+	return n;
+}
diff --git a/Parser/parser.c b/Parser/parser.c
new file mode 100644
index 0000000..e0b3530
--- /dev/null
+++ b/Parser/parser.c
@@ -0,0 +1,396 @@
+/* Parser implementation */
+
+/* For a description, see the comments at end of this file */
+
+/* XXX To do: error recovery */
+
+#include <stdio.h>
+#include "assert.h"
+
+#include "PROTO.h"
+#include "malloc.h"
+#include "token.h"
+#include "grammar.h"
+#include "node.h"
+#include "parser.h"
+#include "errcode.h"
+
+extern int debugging;
+
+#ifdef DEBUG
+#define D(x) if (!debugging); else x
+#else
+#define D(x)
+#endif
+
+
+/* STACK DATA TYPE */
+
+static void s_reset PROTO((stack *));
+
+static void
+s_reset(s)
+	stack *s;
+{
+	s->s_top = &s->s_base[MAXSTACK];
+}
+
+#define s_empty(s) ((s)->s_top == &(s)->s_base[MAXSTACK])
+
+static int s_push PROTO((stack *, dfa *, node *));
+
+static int
+s_push(s, d, parent)
+	register stack *s;
+	dfa *d;
+	node *parent;
+{
+	register stackentry *top;
+	if (s->s_top == s->s_base) {
+		fprintf(stderr, "s_push: parser stack overflow\n");
+		return -1;
+	}
+	top = --s->s_top;
+	top->s_dfa = d;
+	top->s_parent = parent;
+	top->s_state = 0;
+	return 0;
+}
+
+#ifdef DEBUG
+
+static void s_pop PROTO((stack *));
+
+static void
+s_pop(s)
+	register stack *s;
+{
+	if (s_empty(s)) {
+		fprintf(stderr, "s_pop: parser stack underflow -- FATAL\n");
+		abort();
+	}
+	s->s_top++;
+}
+
+#else /* !DEBUG */
+
+#define s_pop(s) (s)->s_top++
+
+#endif
+
+
+/* PARSER CREATION */
+
+parser_state *
+newparser(g, start)
+	grammar *g;
+	int start;
+{
+	parser_state *ps;
+	
+	if (!g->g_accel)
+		addaccelerators(g);
+	ps = NEW(parser_state, 1);
+	if (ps == NULL)
+		return NULL;
+	ps->p_grammar = g;
+	ps->p_tree = newnode(start);
+	if (ps->p_tree == NULL) {
+		if (ps->p_tree != NULL)
+			DEL(ps->p_tree); /* XXX freeing a node!?! */
+		DEL(ps);
+		return NULL;
+	}
+	s_reset(&ps->p_stack);
+	(void) s_push(&ps->p_stack, finddfa(g, start), ps->p_tree);
+	return ps;
+}
+
+void
+delparser(ps)
+	parser_state *ps;
+{
+	DEL(ps);
+}
+
+
+/* PARSER STACK OPERATIONS */
+
+static int shift PROTO((stack *, int, char *, int));
+
+static int
+shift(s, type, str, newstate)
+	register stack *s;
+	int type;
+	char *str;
+	int newstate;
+{
+	assert(!s_empty(s));
+	if (addchild(s->s_top->s_parent, type, str) == NULL) {
+		fprintf(stderr, "shift: no mem in addchild\n");
+		return -1;
+	}
+	s->s_top->s_state = newstate;
+	return 0;
+}
+
+static int push PROTO((stack *, int, dfa *, int));
+
+static int
+push(s, type, d, newstate)
+	register stack *s;
+	int type;
+	dfa *d;
+	int newstate;
+{
+	register node *n;
+	n = s->s_top->s_parent;
+	assert(!s_empty(s));
+	if (addchild(n, type, (char *)NULL) == NULL) {
+		fprintf(stderr, "push: no mem in addchild\n");
+		return -1;
+	}
+	s->s_top->s_state = newstate;
+	return s_push(s, d, CHILD(n, NCH(n)-1));
+}
+
+
+/* PARSER PROPER */
+
+static int classify PROTO((grammar *, int, char *));
+
+static int
+classify(g, type, str)
+	grammar *g;
+	register int type;
+	char *str;
+{
+	register int n = g->g_ll.ll_nlabels;
+	
+	if (type == NAME) {
+		register char *s = str;
+		register label *l = g->g_ll.ll_label;
+		register int i;
+		for (i = n; i > 0; i--, l++) {
+			if (l->lb_type == NAME && l->lb_str != NULL &&
+					l->lb_str[0] == s[0] &&
+					strcmp(l->lb_str, s) == 0) {
+				D(printf("It's a keyword\n"));
+				return n - i;
+			}
+		}
+	}
+	
+	{
+		register label *l = g->g_ll.ll_label;
+		register int i;
+		for (i = n; i > 0; i--, l++) {
+			if (l->lb_type == type && l->lb_str == NULL) {
+				D(printf("It's a token we know\n"));
+				return n - i;
+			}
+		}
+	}
+	
+	D(printf("Illegal token\n"));
+	return -1;
+}
+
+int
+addtoken(ps, type, str)
+	register parser_state *ps;
+	register int type;
+	char *str;
+{
+	register int ilabel;
+	
+	D(printf("Token %s/'%s' ... ", tok_name[type], str));
+	
+	/* Find out which label this token is */
+	ilabel = classify(ps->p_grammar, type, str);
+	if (ilabel < 0)
+		return E_SYNTAX;
+	
+	/* Loop until the token is shifted or an error occurred */
+	for (;;) {
+		/* Fetch the current dfa and state */
+		register dfa *d = ps->p_stack.s_top->s_dfa;
+		register state *s = &d->d_state[ps->p_stack.s_top->s_state];
+		
+		D(printf(" DFA '%s', state %d:",
+			d->d_name, ps->p_stack.s_top->s_state));
+		
+		/* Check accelerator */
+		if (s->s_lower <= ilabel && ilabel < s->s_upper) {
+			register int x = s->s_accel[ilabel - s->s_lower];
+			if (x != -1) {
+				if (x & (1<<7)) {
+					/* Push non-terminal */
+					int nt = (x >> 8) + NT_OFFSET;
+					int arrow = x & ((1<<7)-1);
+					dfa *d1 = finddfa(ps->p_grammar, nt);
+					if (push(&ps->p_stack, nt, d1, arrow) < 0) {
+						D(printf(" MemError: push.\n"));
+						return E_NOMEM;
+					}
+					D(printf(" Push ...\n"));
+					continue;
+				}
+				
+				/* Shift the token */
+				if (shift(&ps->p_stack, type, str, x) < 0) {
+					D(printf(" MemError: shift.\n"));
+					return E_NOMEM;
+				}
+				D(printf(" Shift.\n"));
+				/* Pop while we are in an accept-only state */
+				while (s = &d->d_state
+						[ps->p_stack.s_top->s_state],
+					s->s_accept && s->s_narcs == 1) {
+					D(printf("  Direct pop.\n"));
+					s_pop(&ps->p_stack);
+					if (s_empty(&ps->p_stack)) {
+						D(printf("  ACCEPT.\n"));
+						return E_DONE;
+					}
+					d = ps->p_stack.s_top->s_dfa;
+				}
+				return E_OK;
+			}
+		}
+		
+		if (s->s_accept) {
+			/* Pop this dfa and try again */
+			s_pop(&ps->p_stack);
+			D(printf(" Pop ...\n"));
+			if (s_empty(&ps->p_stack)) {
+				D(printf(" Error: bottom of stack.\n"));
+				return E_SYNTAX;
+			}
+			continue;
+		}
+		
+		/* Stuck, report syntax error */
+		D(printf(" Error.\n"));
+		return E_SYNTAX;
+	}
+}
+
+
+#ifdef DEBUG
+
+/* DEBUG OUTPUT */
+
+void
+dumptree(g, n)
+	grammar *g;
+	node *n;
+{
+	int i;
+	
+	if (n == NULL)
+		printf("NIL");
+	else {
+		label l;
+		l.lb_type = TYPE(n);
+		l.lb_str = TYPE(str);
+		printf("%s", labelrepr(&l));
+		if (ISNONTERMINAL(TYPE(n))) {
+			printf("(");
+			for (i = 0; i < NCH(n); i++) {
+				if (i > 0)
+					printf(",");
+				dumptree(g, CHILD(n, i));
+			}
+			printf(")");
+		}
+	}
+}
+
+void
+showtree(g, n)
+	grammar *g;
+	node *n;
+{
+	int i;
+	
+	if (n == NULL)
+		return;
+	if (ISNONTERMINAL(TYPE(n))) {
+		for (i = 0; i < NCH(n); i++)
+			showtree(g, CHILD(n, i));
+	}
+	else if (ISTERMINAL(TYPE(n))) {
+		printf("%s", tok_name[TYPE(n)]);
+		if (TYPE(n) == NUMBER || TYPE(n) == NAME)
+			printf("(%s)", STR(n));
+		printf(" ");
+	}
+	else
+		printf("? ");
+}
+
+void
+printtree(ps)
+	parser_state *ps;
+{
+	if (debugging) {
+		printf("Parse tree:\n");
+		dumptree(ps->p_grammar, ps->p_tree);
+		printf("\n");
+		printf("Tokens:\n");
+		showtree(ps->p_grammar, ps->p_tree);
+		printf("\n");
+	}
+	printf("Listing:\n");
+	listtree(ps->p_tree);
+	printf("\n");
+}
+
+#endif /* DEBUG */
+
+/*
+
+Description
+-----------
+
+The parser's interface is different than usual: the function addtoken()
+must be called for each token in the input.  This makes it possible to
+turn it into an incremental parsing system later.  The parsing system
+constructs a parse tree as it goes.
+
+A parsing rule is represented as a Deterministic Finite-state Automaton
+(DFA).  A node in a DFA represents a state of the parser; an arc represents
+a transition.  Transitions are either labeled with terminal symbols or
+with non-terminals.  When the parser decides to follow an arc labeled
+with a non-terminal, it is invoked recursively with the DFA representing
+the parsing rule for that as its initial state; when that DFA accepts,
+the parser that invoked it continues.  The parse tree constructed by the
+recursively called parser is inserted as a child in the current parse tree.
+
+The DFA's can be constructed automatically from a more conventional
+language description.  An extended LL(1) grammar (ELL(1)) is suitable.
+Certain restrictions make the parser's life easier: rules that can produce
+the empty string should be outlawed (there are other ways to put loops
+or optional parts in the language).  To avoid the need to construct
+FIRST sets, we can require that all but the last alternative of a rule
+(really: arc going out of a DFA's state) must begin with a terminal
+symbol.
+
+As an example, consider this grammar:
+
+expr:	term (OP term)*
+term:	CONSTANT | '(' expr ')'
+
+The DFA corresponding to the rule for expr is:
+
+------->.---term-->.------->
+	^          |
+	|          |
+	\----OP----/
+
+The parse tree generated for the input a+b is:
+
+(expr: (term: (NAME: a)), (OP: +), (term: (NAME: b)))
+
+*/
diff --git a/Parser/parser.h b/Parser/parser.h
new file mode 100644
index 0000000..16eee0e
--- /dev/null
+++ b/Parser/parser.h
@@ -0,0 +1,25 @@
+/* Parser interface */
+
+#define MAXSTACK 100
+
+typedef struct _stackentry {
+	int		 s_state;	/* State in current DFA */
+	dfa		*s_dfa;		/* Current DFA */
+	node		*s_parent;	/* Where to add next node */
+} stackentry;
+
+typedef struct _stack {
+	stackentry	*s_top;		/* Top entry */
+	stackentry	 s_base[MAXSTACK];/* Array of stack entries */
+					/* NB The stack grows down */
+} stack;
+
+typedef struct {
+	struct _stack	 p_stack;	/* Stack of parser states */
+	struct _grammar	*p_grammar;	/* Grammar to use */
+	struct _node	*p_tree;	/* Top of parse tree */
+} parser_state;
+
+parser_state *newparser PROTO((struct _grammar *g, int start));
+void delparser PROTO((parser_state *ps));
+int addtoken PROTO((parser_state *ps, int type, char *str));
diff --git a/Parser/parsetok.c b/Parser/parsetok.c
new file mode 100644
index 0000000..01877a1
--- /dev/null
+++ b/Parser/parsetok.c
@@ -0,0 +1,131 @@
+/* Parser-tokenizer link implementation */
+
+#include <stdio.h>
+
+#include "PROTO.h"
+#include "malloc.h"
+#include "tokenizer.h"
+#include "node.h"
+#include "grammar.h"
+#include "parser.h"
+#include "errcode.h"
+
+extern int debugging;
+
+
+/* Parse input coming from the given tokenizer structure.
+   Return error code. */
+
+static int
+parsetok(tok, g, start, n_ret)
+	struct tok_state *tok;
+	grammar *g;
+	int start;
+	node **n_ret;
+{
+	parser_state *ps;
+	int ret;
+	
+	if ((ps = newparser(g, start)) == NULL) {
+		fprintf(stderr, "no mem for new parser\n");
+		return E_NOMEM;
+	}
+	
+	for (;;) {
+		char *a, *b;
+		int type;
+		int len;
+		char *str;
+		
+		type = tok_get(tok, &a, &b);
+		if (type == ERRORTOKEN) {
+			ret = tok->done;
+			break;
+		}
+		len = b - a;
+		str = NEW(char, len + 1);
+		if (str == NULL) {
+			fprintf(stderr, "no mem for next token\n");
+			ret = E_NOMEM;
+			break;
+		}
+		strncpy(str, a, len);
+		str[len] = '\0';
+		ret = addtoken(ps, (int)type, str);
+		if (ret != E_OK) {
+			if (ret == E_DONE)
+				*n_ret = ps->p_tree;
+			else if (tok->lineno <= 1 && tok->done == E_EOF)
+				ret = E_EOF;
+			break;
+		}
+	}
+	
+	delparser(ps);
+	return ret;
+}
+
+
+/* Parse input coming from a string.  Return error code. */
+
+int
+parsestring(s, g, start, n_ret)
+	char *s;
+	grammar *g;
+	int start;
+	node **n_ret;
+{
+	struct tok_state *tok = tok_setups(s);
+	int ret;
+	
+	if (tok == NULL) {
+		fprintf(stderr, "no mem for tok_setups\n");
+		return E_NOMEM;
+	}
+	ret = parsetok(tok, g, start, n_ret);
+	if (ret == E_TOKEN || ret == E_SYNTAX) {
+		fprintf(stderr, "String parsing error at line %d\n",
+			tok->lineno);
+	}
+	tok_free(tok);
+	return ret;
+}
+
+
+/* Parse input coming from a file.  Return error code. */
+
+int
+parsefile(fp, g, start, ps1, ps2, n_ret)
+	FILE *fp;
+	grammar *g;
+	int start;
+	char *ps1, *ps2;
+	node **n_ret;
+{
+	struct tok_state *tok = tok_setupf(fp, ps1, ps2);
+	int ret;
+	
+	if (tok == NULL) {
+		fprintf(stderr, "no mem for tok_setupf\n");
+		return E_NOMEM;
+	}
+	ret = parsetok(tok, g, start, n_ret);
+	if (ret == E_TOKEN || ret == E_SYNTAX) {
+		char *p;
+		fprintf(stderr, "Parsing error at line %d:\n",
+			tok->lineno);
+		*tok->inp = '\0';
+		if (tok->inp > tok->buf && tok->inp[-1] == '\n')
+			tok->inp[-1] = '\0';
+		fprintf(stderr, "%s\n", tok->buf);
+		for (p = tok->buf; p < tok->cur; p++) {
+			if (*p == '\t')
+				putc('\t', stderr);
+			else
+				putc(' ', stderr);
+		}
+		fprintf(stderr, "^\n");
+	}
+	tok_free(tok);
+	return ret;
+}
diff --git a/Parser/pgen.c b/Parser/pgen.c
new file mode 100644
index 0000000..34d9b71
--- /dev/null
+++ b/Parser/pgen.c
@@ -0,0 +1,729 @@
+/* Parser generator */
+
+/* For a description, see the comments at end of this file */
+
+#include <stdio.h>
+#include "assert.h"
+
+#include "PROTO.h"
+#include "malloc.h"
+#include "token.h"
+#include "node.h"
+#include "grammar.h"
+#include "metagrammar.h"
+#include "pgen.h"
+
+extern int debugging;
+
+
+/* PART ONE -- CONSTRUCT NFA -- Cf. Algorithm 3.2 from [Aho&Ullman 77] */
+
+typedef struct _nfaarc {
+	int	ar_label;
+	int	ar_arrow;
+} nfaarc;
+
+typedef struct _nfastate {
+	int	st_narcs;
+	nfaarc	*st_arc;
+} nfastate;
+
+typedef struct _nfa {
+	int		nf_type;
+	char		*nf_name;
+	int		nf_nstates;
+	nfastate	*nf_state;
+	int		nf_start, nf_finish;
+} nfa;
+
+static int
+addnfastate(nf)
+	nfa *nf;
+{
+	nfastate *st;
+	
+	RESIZE(nf->nf_state, nfastate, nf->nf_nstates + 1);
+	if (nf->nf_state == NULL)
+		fatal("out of mem");
+	st = &nf->nf_state[nf->nf_nstates++];
+	st->st_narcs = 0;
+	st->st_arc = NULL;
+	return st - nf->nf_state;
+}
+
+static void
+addnfaarc(nf, from, to, lbl)
+	nfa *nf;
+	int from, to, lbl;
+{
+	nfastate *st;
+	nfaarc *ar;
+	
+	st = &nf->nf_state[from];
+	RESIZE(st->st_arc, nfaarc, st->st_narcs + 1);
+	if (st->st_arc == NULL)
+		fatal("out of mem");
+	ar = &st->st_arc[st->st_narcs++];
+	ar->ar_label = lbl;
+	ar->ar_arrow = to;
+}
+
+static nfa *
+newnfa(name)
+	char *name;
+{
+	nfa *nf;
+	static type = NT_OFFSET; /* All types will be disjunct */
+	
+	nf = NEW(nfa, 1);
+	if (nf == NULL)
+		fatal("no mem for new nfa");
+	nf->nf_type = type++;
+	nf->nf_name = name; /* XXX strdup(name) ??? */
+	nf->nf_nstates = 0;
+	nf->nf_state = NULL;
+	nf->nf_start = nf->nf_finish = -1;
+	return nf;
+}
+
+typedef struct _nfagrammar {
+	int		gr_nnfas;
+	nfa		**gr_nfa;
+	labellist	gr_ll;
+} nfagrammar;
+
+static nfagrammar *
+newnfagrammar()
+{
+	nfagrammar *gr;
+	
+	gr = NEW(nfagrammar, 1);
+	if (gr == NULL)
+		fatal("no mem for new nfa grammar");
+	gr->gr_nnfas = 0;
+	gr->gr_nfa = NULL;
+	gr->gr_ll.ll_nlabels = 0;
+	gr->gr_ll.ll_label = NULL;
+	addlabel(&gr->gr_ll, ENDMARKER, "EMPTY");
+	return gr;
+}
+
+static nfa *
+addnfa(gr, name)
+	nfagrammar *gr;
+	char *name;
+{
+	nfa *nf;
+	
+	nf = newnfa(name);
+	RESIZE(gr->gr_nfa, nfa *, gr->gr_nnfas + 1);
+	if (gr->gr_nfa == NULL)
+		fatal("out of mem");
+	gr->gr_nfa[gr->gr_nnfas++] = nf;
+	addlabel(&gr->gr_ll, NAME, nf->nf_name);
+	return nf;
+}
+
+#ifdef DEBUG
+
+static char REQNFMT[] = "metacompile: less than %d children\n";
+
+#define REQN(i, count) \
+ 	if (i < count) { \
+		fprintf(stderr, REQNFMT, count); \
+		abort(); \
+	} else
+
+#else
+#define REQN(i, count)	/* empty */
+#endif
+
+static nfagrammar *
+metacompile(n)
+	node *n;
+{
+	nfagrammar *gr;
+	int i;
+	
+	printf("Compiling (meta-) parse tree into NFA grammar\n");
+	gr = newnfagrammar();
+	REQ(n, MSTART);
+	i = n->n_nchildren - 1; /* Last child is ENDMARKER */
+	n = n->n_child;
+	for (; --i >= 0; n++) {
+		if (n->n_type != NEWLINE)
+			compile_rule(gr, n);
+	}
+	return gr;
+}
+
+static
+compile_rule(gr, n)
+	nfagrammar *gr;
+	node *n;
+{
+	nfa *nf;
+	
+	REQ(n, RULE);
+	REQN(n->n_nchildren, 4);
+	n = n->n_child;
+	REQ(n, NAME);
+	nf = addnfa(gr, n->n_str);
+	n++;
+	REQ(n, COLON);
+	n++;
+	REQ(n, RHS);
+	compile_rhs(&gr->gr_ll, nf, n, &nf->nf_start, &nf->nf_finish);
+	n++;
+	REQ(n, NEWLINE);
+}
+
+static
+compile_rhs(ll, nf, n, pa, pb)
+	labellist *ll;
+	nfa *nf;
+	node *n;
+	int *pa, *pb;
+{
+	int i;
+	int a, b;
+	
+	REQ(n, RHS);
+	i = n->n_nchildren;
+	REQN(i, 1);
+	n = n->n_child;
+	REQ(n, ALT);
+	compile_alt(ll, nf, n, pa, pb);
+	if (--i <= 0)
+		return;
+	n++;
+	a = *pa;
+	b = *pb;
+	*pa = addnfastate(nf);
+	*pb = addnfastate(nf);
+	addnfaarc(nf, *pa, a, EMPTY);
+	addnfaarc(nf, b, *pb, EMPTY);
+	for (; --i >= 0; n++) {
+		REQ(n, VBAR);
+		REQN(i, 1);
+		--i;
+		n++;
+		REQ(n, ALT);
+		compile_alt(ll, nf, n, &a, &b);
+		addnfaarc(nf, *pa, a, EMPTY);
+		addnfaarc(nf, b, *pb, EMPTY);
+	}
+}
+
+static
+compile_alt(ll, nf, n, pa, pb)
+	labellist *ll;
+	nfa *nf;
+	node *n;
+	int *pa, *pb;
+{
+	int i;
+	int a, b;
+	
+	REQ(n, ALT);
+	i = n->n_nchildren;
+	REQN(i, 1);
+	n = n->n_child;
+	REQ(n, ITEM);
+	compile_item(ll, nf, n, pa, pb);
+	--i;
+	n++;
+	for (; --i >= 0; n++) {
+		if (n->n_type == COMMA) { /* XXX Temporary */
+			REQN(i, 1);
+			--i;
+			n++;
+		}
+		REQ(n, ITEM);
+		compile_item(ll, nf, n, &a, &b);
+		addnfaarc(nf, *pb, a, EMPTY);
+		*pb = b;
+	}
+}
+
+static
+compile_item(ll, nf, n, pa, pb)
+	labellist *ll;
+	nfa *nf;
+	node *n;
+	int *pa, *pb;
+{
+	int i;
+	int a, b;
+	
+	REQ(n, ITEM);
+	i = n->n_nchildren;
+	REQN(i, 1);
+	n = n->n_child;
+	if (n->n_type == LSQB) {
+		REQN(i, 3);
+		n++;
+		REQ(n, RHS);
+		*pa = addnfastate(nf);
+		*pb = addnfastate(nf);
+		addnfaarc(nf, *pa, *pb, EMPTY);
+		compile_rhs(ll, nf, n, &a, &b);
+		addnfaarc(nf, *pa, a, EMPTY);
+		addnfaarc(nf, b, *pb, EMPTY);
+		REQN(i, 1);
+		n++;
+		REQ(n, RSQB);
+	}
+	else {
+		compile_atom(ll, nf, n, pa, pb);
+		if (--i <= 0)
+			return;
+		n++;
+		addnfaarc(nf, *pb, *pa, EMPTY);
+		if (n->n_type == STAR)
+			*pb = *pa;
+		else
+			REQ(n, PLUS);
+	}
+}
+
+static
+compile_atom(ll, nf, n, pa, pb)
+	labellist *ll;
+	nfa *nf;
+	node *n;
+	int *pa, *pb;
+{
+	int i;
+	
+	REQ(n, ATOM);
+	i = n->n_nchildren;
+	REQN(i, 1);
+	n = n->n_child;
+	if (n->n_type == LPAR) {
+		REQN(i, 3);
+		n++;
+		REQ(n, RHS);
+		compile_rhs(ll, nf, n, pa, pb);
+		n++;
+		REQ(n, RPAR);
+	}
+	else if (n->n_type == NAME || n->n_type == STRING) {
+		*pa = addnfastate(nf);
+		*pb = addnfastate(nf);
+		addnfaarc(nf, *pa, *pb, addlabel(ll, n->n_type, n->n_str));
+	}
+	else
+		REQ(n, NAME);
+}
+
+static void
+dumpstate(ll, nf, istate)
+	labellist *ll;
+	nfa *nf;
+	int istate;
+{
+	nfastate *st;
+	int i;
+	nfaarc *ar;
+	
+	printf("%c%2d%c",
+		istate == nf->nf_start ? '*' : ' ',
+		istate,
+		istate == nf->nf_finish ? '.' : ' ');
+	st = &nf->nf_state[istate];
+	ar = st->st_arc;
+	for (i = 0; i < st->st_narcs; i++) {
+		if (i > 0)
+			printf("\n    ");
+		printf("-> %2d  %s", ar->ar_arrow,
+			labelrepr(&ll->ll_label[ar->ar_label]));
+		ar++;
+	}
+	printf("\n");
+}
+
+static void
+dumpnfa(ll, nf)
+	labellist *ll;
+	nfa *nf;
+{
+	int i;
+	
+	printf("NFA '%s' has %d states; start %d, finish %d\n",
+		nf->nf_name, nf->nf_nstates, nf->nf_start, nf->nf_finish);
+	for (i = 0; i < nf->nf_nstates; i++)
+		dumpstate(ll, nf, i);
+}
+
+
+/* PART TWO -- CONSTRUCT DFA -- Algorithm 3.1 from [Aho&Ullman 77] */
+
+static int
+addclosure(ss, nf, istate)
+	bitset ss;
+	nfa *nf;
+	int istate;
+{
+	if (addbit(ss, istate)) {
+		nfastate *st = &nf->nf_state[istate];
+		nfaarc *ar = st->st_arc;
+		int i;
+		
+		for (i = st->st_narcs; --i >= 0; ) {
+			if (ar->ar_label == EMPTY)
+				addclosure(ss, nf, ar->ar_arrow);
+			ar++;
+		}
+	}
+}
+
+typedef struct _ss_arc {
+	bitset	sa_bitset;
+	int	sa_arrow;
+	int	sa_label;
+} ss_arc;
+
+typedef struct _ss_state {
+	bitset	ss_ss;
+	int	ss_narcs;
+	ss_arc	*ss_arc;
+	int	ss_deleted;
+	int	ss_finish;
+	int	ss_rename;
+} ss_state;
+
+typedef struct _ss_dfa {
+	int	sd_nstates;
+	ss_state *sd_state;
+} ss_dfa;
+
+static
+makedfa(gr, nf, d)
+	nfagrammar *gr;
+	nfa *nf;
+	dfa *d;
+{
+	int nbits = nf->nf_nstates;
+	bitset ss;
+	int xx_nstates;
+	ss_state *xx_state, *yy;
+	ss_arc *zz;
+	int istate, jstate, iarc, jarc, ibit;
+	nfastate *st;
+	nfaarc *ar;
+	
+	ss = newbitset(nbits);
+	addclosure(ss, nf, nf->nf_start);
+	xx_state = NEW(ss_state, 1);
+	if (xx_state == NULL)
+		fatal("no mem for xx_state in makedfa");
+	xx_nstates = 1;
+	yy = &xx_state[0];
+	yy->ss_ss = ss;
+	yy->ss_narcs = 0;
+	yy->ss_arc = NULL;
+	yy->ss_deleted = 0;
+	yy->ss_finish = testbit(ss, nf->nf_finish);
+	if (yy->ss_finish)
+		printf("Error: nonterminal '%s' may produce empty.\n",
+			nf->nf_name);
+	
+	/* This algorithm is from a book written before
+	   the invention of structured programming... */
+
+	/* For each unmarked state... */
+	for (istate = 0; istate < xx_nstates; ++istate) {
+		yy = &xx_state[istate];
+		ss = yy->ss_ss;
+		/* For all its states... */
+		for (ibit = 0; ibit < nf->nf_nstates; ++ibit) {
+			if (!testbit(ss, ibit))
+				continue;
+			st = &nf->nf_state[ibit];
+			/* For all non-empty arcs from this state... */
+			for (iarc = 0; iarc < st->st_narcs; iarc++) {
+				ar = &st->st_arc[iarc];
+				if (ar->ar_label == EMPTY)
+					continue;
+				/* Look up in list of arcs from this state */
+				for (jarc = 0; jarc < yy->ss_narcs; ++jarc) {
+					zz = &yy->ss_arc[jarc];
+					if (ar->ar_label == zz->sa_label)
+						goto found;
+				}
+				/* Add new arc for this state */
+				RESIZE(yy->ss_arc, ss_arc, yy->ss_narcs + 1);
+				if (yy->ss_arc == NULL)
+					fatal("out of mem");
+				zz = &yy->ss_arc[yy->ss_narcs++];
+				zz->sa_label = ar->ar_label;
+				zz->sa_bitset = newbitset(nbits);
+				zz->sa_arrow = -1;
+			 found:	;
+				/* Add destination */
+				addclosure(zz->sa_bitset, nf, ar->ar_arrow);
+			}
+		}
+		/* Now look up all the arrow states */
+		for (jarc = 0; jarc < xx_state[istate].ss_narcs; jarc++) {
+			zz = &xx_state[istate].ss_arc[jarc];
+			for (jstate = 0; jstate < xx_nstates; jstate++) {
+				if (samebitset(zz->sa_bitset,
+					xx_state[jstate].ss_ss, nbits)) {
+					zz->sa_arrow = jstate;
+					goto done;
+				}
+			}
+			RESIZE(xx_state, ss_state, xx_nstates + 1);
+			if (xx_state == NULL)
+				fatal("out of mem");
+			zz->sa_arrow = xx_nstates;
+			yy = &xx_state[xx_nstates++];
+			yy->ss_ss = zz->sa_bitset;
+			yy->ss_narcs = 0;
+			yy->ss_arc = NULL;
+			yy->ss_deleted = 0;
+			yy->ss_finish = testbit(yy->ss_ss, nf->nf_finish);
+		 done:	;
+		}
+	}
+	
+	if (debugging)
+		printssdfa(xx_nstates, xx_state, nbits, &gr->gr_ll,
+						"before minimizing");
+	
+	simplify(xx_nstates, xx_state);
+	
+	if (debugging)
+		printssdfa(xx_nstates, xx_state, nbits, &gr->gr_ll,
+						"after minimizing");
+	
+	convert(d, xx_nstates, xx_state);
+	
+	/* XXX cleanup */
+}
+
+static
+printssdfa(xx_nstates, xx_state, nbits, ll, msg)
+	int xx_nstates;
+	ss_state *xx_state;
+	int nbits;
+	labellist *ll;
+	char *msg;
+{
+	int i, ibit, iarc;
+	ss_state *yy;
+	ss_arc *zz;
+	
+	printf("Subset DFA %s\n", msg);
+	for (i = 0; i < xx_nstates; i++) {
+		yy = &xx_state[i];
+		if (yy->ss_deleted)
+			continue;
+		printf(" Subset %d", i);
+		if (yy->ss_finish)
+			printf(" (finish)");
+		printf(" { ");
+		for (ibit = 0; ibit < nbits; ibit++) {
+			if (testbit(yy->ss_ss, ibit))
+				printf("%d ", ibit);
+		}
+		printf("}\n");
+		for (iarc = 0; iarc < yy->ss_narcs; iarc++) {
+			zz = &yy->ss_arc[iarc];
+			printf("  Arc to state %d, label %s\n",
+				zz->sa_arrow,
+				labelrepr(&ll->ll_label[zz->sa_label]));
+		}
+	}
+}
+
+
+/* PART THREE -- SIMPLIFY DFA */
+
+/* Simplify the DFA by repeatedly eliminating states that are
+   equivalent to another oner.  This is NOT Algorithm 3.3 from
+   [Aho&Ullman 77].  It does not always finds the minimal DFA,
+   but it does usually make a much smaller one...  (For an example
+   of sub-optimal behaviour, try S: x a b+ | y a b+.)
+*/
+
+static int
+samestate(s1, s2)
+	ss_state *s1, *s2;
+{
+	int i;
+	
+	if (s1->ss_narcs != s2->ss_narcs || s1->ss_finish != s2->ss_finish)
+		return 0;
+	for (i = 0; i < s1->ss_narcs; i++) {
+		if (s1->ss_arc[i].sa_arrow != s2->ss_arc[i].sa_arrow ||
+			s1->ss_arc[i].sa_label != s2->ss_arc[i].sa_label)
+			return 0;
+	}
+	return 1;
+}
+
+static void
+renamestates(xx_nstates, xx_state, from, to)
+	int xx_nstates;
+	ss_state *xx_state;
+	int from, to;
+{
+	int i, j;
+	
+	if (debugging)
+		printf("Rename state %d to %d.\n", from, to);
+	for (i = 0; i < xx_nstates; i++) {
+		if (xx_state[i].ss_deleted)
+			continue;
+		for (j = 0; j < xx_state[i].ss_narcs; j++) {
+			if (xx_state[i].ss_arc[j].sa_arrow == from)
+				xx_state[i].ss_arc[j].sa_arrow = to;
+		}
+	}
+}
+
+static
+simplify(xx_nstates, xx_state)
+	int xx_nstates;
+	ss_state *xx_state;
+{
+	int changes;
+	int i, j, k;
+	
+	do {
+		changes = 0;
+		for (i = 1; i < xx_nstates; i++) {
+			if (xx_state[i].ss_deleted)
+				continue;
+			for (j = 0; j < i; j++) {
+				if (xx_state[j].ss_deleted)
+					continue;
+				if (samestate(&xx_state[i], &xx_state[j])) {
+					xx_state[i].ss_deleted++;
+					renamestates(xx_nstates, xx_state, i, j);
+					changes++;
+					break;
+				}
+			}
+		}
+	} while (changes);
+}
+
+
+/* PART FOUR -- GENERATE PARSING TABLES */
+
+/* Convert the DFA into a grammar that can be used by our parser */
+
+static
+convert(d, xx_nstates, xx_state)
+	dfa *d;
+	int xx_nstates;
+	ss_state *xx_state;
+{
+	int i, j;
+	ss_state *yy;
+	ss_arc *zz;
+	
+	for (i = 0; i < xx_nstates; i++) {
+		yy = &xx_state[i];
+		if (yy->ss_deleted)
+			continue;
+		yy->ss_rename = addstate(d);
+	}
+	
+	for (i = 0; i < xx_nstates; i++) {
+		yy = &xx_state[i];
+		if (yy->ss_deleted)
+			continue;
+		for (j = 0; j < yy->ss_narcs; j++) {
+			zz = &yy->ss_arc[j];
+			addarc(d, yy->ss_rename,
+				xx_state[zz->sa_arrow].ss_rename,
+				zz->sa_label);
+		}
+		if (yy->ss_finish)
+			addarc(d, yy->ss_rename, yy->ss_rename, 0);
+	}
+	
+	d->d_initial = 0;
+}
+
+
+/* PART FIVE -- GLUE IT ALL TOGETHER */
+
+static grammar *
+maketables(gr)
+	nfagrammar *gr;
+{
+	int i;
+	nfa *nf;
+	dfa *d;
+	grammar *g;
+	
+	if (gr->gr_nnfas == 0)
+		return NULL;
+	g = newgrammar(gr->gr_nfa[0]->nf_type);
+			/* XXX first rule must be start rule */
+	g->g_ll = gr->gr_ll;
+	
+	for (i = 0; i < gr->gr_nnfas; i++) {
+		nf = gr->gr_nfa[i];
+		if (debugging) {
+			printf("Dump of NFA for '%s' ...\n", nf->nf_name);
+			dumpnfa(&gr->gr_ll, nf);
+		}
+		printf("Making DFA for '%s' ...\n", nf->nf_name);
+		d = adddfa(g, nf->nf_type, nf->nf_name);
+		makedfa(gr, gr->gr_nfa[i], d);
+	}
+	
+	return g;
+}
+
+grammar *
+pgen(n)
+	node *n;
+{
+	nfagrammar *gr;
+	grammar *g;
+	
+	gr = metacompile(n);
+	g = maketables(gr);
+	translatelabels(g);
+	addfirstsets(g);
+	return g;
+}
+
+
+/*
+
+Description
+-----------
+
+Input is a grammar in extended BNF (using * for repetition, + for
+at-least-once repetition, [] for optional parts, | for alternatives and
+() for grouping).  This has already been parsed and turned into a parse
+tree.
+
+Each rule is considered as a regular expression in its own right.
+It is turned into a Non-deterministic Finite Automaton (NFA), which
+is then turned into a Deterministic Finite Automaton (DFA), which is then
+optimized to reduce the number of states.  See [Aho&Ullman 77] chapter 3,
+or similar compiler books (this technique is more often used for lexical
+analyzers).
+
+The DFA's are used by the parser as parsing tables in a special way
+that's probably unique.  Before they are usable, the FIRST sets of all
+non-terminals are computed.
+
+Reference
+---------
+
+[Aho&Ullman 77]
+	Aho&Ullman, Principles of Compiler Design, Addison-Wesley 1977
+	(first edition)
+
+*/
diff --git a/Parser/pgen.h b/Parser/pgen.h
new file mode 100644
index 0000000..7fcf277
--- /dev/null
+++ b/Parser/pgen.h
@@ -0,0 +1,6 @@
+/* Parser generator interface */
+
+extern grammar gram;
+
+extern grammar *meta_grammar PROTO((void));
+extern grammar *pgen PROTO((node *));
diff --git a/Parser/pgenmain.c b/Parser/pgenmain.c
new file mode 100644
index 0000000..678be5d
--- /dev/null
+++ b/Parser/pgenmain.c
@@ -0,0 +1,111 @@
+/* Parser generator main program */
+
+#include <stdio.h>
+
+#include "PROTO.h"
+#include "grammar.h"
+#include "node.h"
+#include "parsetok.h"
+#include "pgen.h"
+
+int debugging;
+
+#ifdef THINK_C
+char *
+askfile()
+{
+	char buf[256];
+	static char name[256];
+	printf("Input file name: ");
+	if (fgets(buf, sizeof buf, stdin) == NULL) {
+		printf("EOF\n");
+		exit(1);
+	}
+	if (sscanf(buf, " %s ", name) != 1) {
+		printf("No file\n");
+		exit(1);
+	}
+	return name;
+}
+#endif
+
+grammar *
+getgrammar(filename)
+	char *filename;
+{
+	FILE *fp;
+	node *n;
+	grammar *g0, *g;
+	
+	fp = fopen(filename, "r");
+	if (fp == NULL) {
+		perror(filename);
+		exit(1);
+	}
+	g0 = meta_grammar();
+	n = NULL;
+	parsefile(fp, g0, g0->g_start, (char *)NULL, (char *)NULL, &n);
+	fclose(fp);
+	if (n == NULL) {
+		fprintf(stderr, "Parsing error.\n");
+		exit(1);
+	}
+	g = pgen(n);
+	if (g == NULL) {
+		printf("Bad grammar.\n");
+		exit(1);
+	}
+	return g;
+}
+
+main(argc, argv)
+	int argc;
+	char **argv;
+{
+	grammar *g;
+	node *n;
+	FILE *fp;
+	char *filename;
+	
+#ifdef THINK_C
+	filename = askfile();
+#else
+	if (argc != 2) {
+		fprintf(stderr, "usage: %s grammar\n", argv[0]);
+		exit(2);
+	}
+	filename = argv[1];
+#endif
+	g = getgrammar(filename);
+	fp = fopen("graminit.c", "w");
+	if (fp == NULL) {
+		perror("graminit.c");
+		exit(1);
+	}
+	printf("Writing graminit.c ...\n");
+	printgrammar(g, fp);
+	fclose(fp);
+	fp = fopen("graminit.h", "w");
+	if (fp == NULL) {
+		perror("graminit.h");
+		exit(1);
+	}
+	printf("Writing graminit.h ...\n");
+	printnonterminals(g, fp);
+	fclose(fp);
+	exit(0);
+}
+
+void
+fatal(msg)
+	char *msg;
+{
+	fprintf(stderr, "pgen: FATAL ERROR: %s\n", msg);
+	exit(1);
+}
+
+/*	TO DO:
+
+	- improve user interface
+	- check for duplicate definitions of names (instead of fatal err)
+*/
diff --git a/Parser/printgrammar.c b/Parser/printgrammar.c
new file mode 100644
index 0000000..f6aa2cc
--- /dev/null
+++ b/Parser/printgrammar.c
@@ -0,0 +1,121 @@
+/* Print a bunch of C initializers that represent a grammar */
+
+#include <stdio.h>
+
+#include "PROTO.h"
+#include "grammar.h"
+
+static void
+printarcs(i, d, fp)
+	int i;
+	dfa *d;
+	FILE *fp;
+{
+	arc *a;
+	state *s;
+	int j, k;
+	
+	s = d->d_state;
+	for (j = 0; j < d->d_nstates; j++, s++) {
+		fprintf(fp, "static arc arcs_%d_%d[%d] = {\n",
+			i, j, s->s_narcs);
+		a = s->s_arc;
+		for (k = 0; k < s->s_narcs; k++, a++)
+			fprintf(fp, "\t{%d, %d},\n", a->a_lbl, a->a_arrow);
+		fprintf(fp, "};\n");
+	}
+}
+
+static void
+printstates(g, fp)
+	grammar *g;
+	FILE *fp;
+{
+	state *s;
+	dfa *d;
+	int i, j;
+	
+	d = g->g_dfa;
+	for (i = 0; i < g->g_ndfas; i++, d++) {
+		printarcs(i, d, fp);
+		fprintf(fp, "static state states_%d[%d] = {\n",
+			i, d->d_nstates);
+		s = d->d_state;
+		for (j = 0; j < d->d_nstates; j++, s++)
+			fprintf(fp, "\t{%d, arcs_%d_%d},\n",
+				s->s_narcs, i, j);
+		fprintf(fp, "};\n");
+	}
+}
+
+static void
+printdfas(g, fp)
+	grammar *g;
+	FILE *fp;
+{
+	dfa *d;
+	int i, j;
+	
+	printstates(g, fp);
+	fprintf(fp, "static dfa dfas[%d] = {\n", g->g_ndfas);
+	d = g->g_dfa;
+	for (i = 0; i < g->g_ndfas; i++, d++) {
+		fprintf(fp, "\t{%d, \"%s\", %d, %d, states_%d,\n",
+			d->d_type, d->d_name, d->d_initial, d->d_nstates, i);
+		fprintf(fp, "\t \"");
+		for (j = 0; j < NBYTES(g->g_ll.ll_nlabels); j++)
+			fprintf(fp, "\\%03o", d->d_first[j] & 0xff);
+		fprintf(fp, "\"},\n");
+	}
+	fprintf(fp, "};\n");
+}
+
+static void
+printlabels(g, fp)
+	grammar *g;
+	FILE *fp;
+{
+	label *l;
+	int i;
+	
+	fprintf(fp, "static label labels[%d] = {\n", g->g_ll.ll_nlabels);
+	l = g->g_ll.ll_label;
+	for (i = g->g_ll.ll_nlabels; --i >= 0; l++) {
+		if (l->lb_str == NULL)
+			fprintf(fp, "\t{%d, 0},\n", l->lb_type);
+		else
+			fprintf(fp, "\t{%d, \"%s\"},\n",
+				l->lb_type, l->lb_str);
+	}
+	fprintf(fp, "};\n");
+}
+
+void
+printgrammar(g, fp)
+	grammar *g;
+	FILE *fp;
+{
+	fprintf(fp, "#include \"PROTO.h\"\n");
+	fprintf(fp, "#include \"grammar.h\"\n");
+	printdfas(g, fp);
+	printlabels(g, fp);
+	fprintf(fp, "grammar gram = {\n");
+	fprintf(fp, "\t%d,\n", g->g_ndfas);
+	fprintf(fp, "\tdfas,\n");
+	fprintf(fp, "\t{%d, labels},\n", g->g_ll.ll_nlabels);
+	fprintf(fp, "\t%d\n", g->g_start);
+	fprintf(fp, "};\n");
+}
+
+void
+printnonterminals(g, fp)
+	grammar *g;
+	FILE *fp;
+{
+	dfa *d;
+	int i;
+	
+	d = g->g_dfa;
+	for (i = g->g_ndfas; --i >= 0; d++)
+		fprintf(fp, "#define %s %d\n", d->d_name, d->d_type);
+}
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
new file mode 100644
index 0000000..38f76ed
--- /dev/null
+++ b/Parser/tokenizer.c
@@ -0,0 +1,490 @@
+/* Tokenizer implementation */
+
+/* XXX This is rather old, should be restructured perhaps */
+/* XXX Need a better interface to report errors than writing to stderr */
+
+#include <stdio.h>
+#include <ctype.h>
+#include "string.h"
+
+#include "PROTO.h"
+#include "malloc.h"
+#include "tokenizer.h"
+#include "errcode.h"
+
+#ifdef THINK_C
+#define TABSIZE 4
+#endif
+
+#ifndef TABSIZE
+#define TABSIZE 8
+#endif
+
+/* Token names */
+
+char *tok_name[] = {
+	"ENDMARKER",
+	"NAME",
+	"NUMBER",
+	"STRING",
+	"NEWLINE",
+	"INDENT",
+	"DEDENT",
+	"LPAR",
+	"RPAR",
+	"LSQB",
+	"RSQB",
+	"COLON",
+	"COMMA",
+	"SEMI",
+	"PLUS",
+	"MINUS",
+	"STAR",
+	"SLASH",
+	"VBAR",
+	"AMPER",
+	"LESS",
+	"GREATER",
+	"EQUAL",
+	"DOT",
+	"PERCENT",
+	"BACKQUOTE",
+	"LBRACE",
+	"RBRACE",
+	"OP",
+	"<ERRORTOKEN>",
+	"<N_TOKENS>"
+};
+
+
+/* Create and initialize a new tok_state structure */
+
+static struct tok_state *
+tok_new()
+{
+	struct tok_state *tok = NEW(struct tok_state, 1);
+	if (tok == NULL)
+		return NULL;
+	tok->buf = tok->cur = tok->end = tok->inp = NULL;
+	tok->done = E_OK;
+	tok->fp = NULL;
+	tok->tabsize = TABSIZE;
+	tok->indent = 0;
+	tok->indstack[0] = 0;
+	tok->atbol = 1;
+	tok->pendin = 0;
+	tok->prompt = tok->nextprompt = NULL;
+	tok->lineno = 0;
+	return tok;
+}
+
+
+/* Set up tokenizer for string */
+
+struct tok_state *
+tok_setups(str)
+	char *str;
+{
+	struct tok_state *tok = tok_new();
+	if (tok == NULL)
+		return NULL;
+	tok->buf = tok->cur = str;
+	tok->end = tok->inp = strchr(str, '\0');
+	return tok;
+}
+
+
+/* Set up tokenizer for string */
+
+struct tok_state *
+tok_setupf(fp, ps1, ps2)
+	FILE *fp;
+	char *ps1, *ps2;
+{
+	struct tok_state *tok = tok_new();
+	if (tok == NULL)
+		return NULL;
+	if ((tok->buf = NEW(char, BUFSIZ)) == NULL) {
+		DEL(tok);
+		return NULL;
+	}
+	tok->cur = tok->inp = tok->buf;
+	tok->end = tok->buf + BUFSIZ;
+	tok->fp = fp;
+	tok->prompt = ps1;
+	tok->nextprompt = ps2;
+	return tok;
+}
+
+
+/* Free a tok_state structure */
+
+void
+tok_free(tok)
+	struct tok_state *tok;
+{
+	/* XXX really need a separate flag to say 'my buffer' */
+	if (tok->fp != NULL && tok->buf != NULL)
+		DEL(tok->buf);
+	DEL(tok);
+}
+
+
+/* Get next char, updating state; error code goes into tok->done */
+
+static int
+tok_nextc(tok)
+	register struct tok_state *tok;
+{
+	if (tok->done != E_OK)
+		return EOF;
+	
+	for (;;) {
+		if (tok->cur < tok->inp)
+			return *tok->cur++;
+		if (tok->fp == NULL) {
+			tok->done = E_EOF;
+			return EOF;
+		}
+		if (tok->inp > tok->buf && tok->inp[-1] == '\n')
+			tok->inp = tok->buf;
+		if (tok->inp == tok->end) {
+			int n = tok->end - tok->buf;
+			char *new = tok->buf;
+			RESIZE(new, char, n+n);
+			if (new == NULL) {
+				fprintf(stderr, "tokenizer out of mem\n");
+				tok->done = E_NOMEM;
+				return EOF;
+			}
+			tok->buf = new;
+			tok->inp = tok->buf + n;
+			tok->end = tok->inp + n;
+		}
+#ifdef USE_READLINE
+		if (tok->prompt != NULL) {
+			extern char *readline PROTO((char *prompt));
+			static int been_here;
+			if (!been_here) {
+				/* Force rebind of TAB to insert-tab */
+				extern int rl_insert();
+				rl_bind_key('\t', rl_insert);
+				been_here++;
+			}
+			if (tok->buf != NULL)
+				free(tok->buf);
+			tok->buf = readline(tok->prompt);
+			(void) intrcheck(); /* Clear pending interrupt */
+			if (tok->nextprompt != NULL)
+				tok->prompt = tok->nextprompt;
+				/* XXX different semantics w/o readline()! */
+			if (tok->buf == NULL) {
+				tok->done = E_EOF;
+			}
+			else {
+				unsigned int n = strlen(tok->buf);
+				if (n > 0)
+					add_history(tok->buf);
+				/* Append the '\n' that readline()
+				   doesn't give us, for the tokenizer... */
+				tok->buf = realloc(tok->buf, n+2);
+				if (tok->buf == NULL)
+					tok->done = E_NOMEM;
+				else {
+					tok->end = tok->buf + n;
+					*tok->end++ = '\n';
+					*tok->end = '\0';
+					tok->inp = tok->end;
+					tok->cur = tok->buf;
+				}
+			}
+		}
+		else
+#endif
+		{
+			tok->cur = tok->inp;
+			if (tok->prompt != NULL && tok->inp == tok->buf) {
+				fprintf(stderr, "%s", tok->prompt);
+				tok->prompt = tok->nextprompt;
+			}
+			tok->done = fgets_intr(tok->inp,
+				(int)(tok->end - tok->inp), tok->fp);
+		}
+		if (tok->done != E_OK) {
+			if (tok->prompt != NULL)
+				fprintf(stderr, "\n");
+			return EOF;
+		}
+		tok->inp = strchr(tok->inp, '\0');
+	}
+}
+
+
+/* Back-up one character */
+
+static void
+tok_backup(tok, c)
+	register struct tok_state *tok;
+	register int c;
+{
+	if (c != EOF) {
+		if (--tok->cur < tok->buf) {
+			fprintf(stderr, "tok_backup: begin of buffer\n");
+			abort();
+		}
+		if (*tok->cur != c)
+			*tok->cur = c;
+	}
+}
+
+
+/* Return the token corresponding to a single character */
+
+int
+tok_1char(c)
+	int c;
+{
+	switch (c) {
+	case '(':	return LPAR;
+	case ')':	return RPAR;
+	case '[':	return LSQB;
+	case ']':	return RSQB;
+	case ':':	return COLON;
+	case ',':	return COMMA;
+	case ';':	return SEMI;
+	case '+':	return PLUS;
+	case '-':	return MINUS;
+	case '*':	return STAR;
+	case '/':	return SLASH;
+	case '|':	return VBAR;
+	case '&':	return AMPER;
+	case '<':	return LESS;
+	case '>':	return GREATER;
+	case '=':	return EQUAL;
+	case '.':	return DOT;
+	case '%':	return PERCENT;
+	case '`':	return BACKQUOTE;
+	case '{':	return LBRACE;
+	case '}':	return RBRACE;
+	default:	return OP;
+	}
+}
+
+
+/* Get next token, after space stripping etc. */
+
+int
+tok_get(tok, p_start, p_end)
+	register struct tok_state *tok; /* In/out: tokenizer state */
+	char **p_start, **p_end; /* Out: point to start/end of token */
+{
+	register int c;
+	
+	/* Get indentation level */
+	if (tok->atbol) {
+		register int col = 0;
+		tok->atbol = 0;
+		tok->lineno++;
+		for (;;) {
+			c = tok_nextc(tok);
+			if (c == ' ')
+				col++;
+			else if (c == '\t')
+				col = (col/tok->tabsize + 1) * tok->tabsize;
+			else
+				break;
+		}
+		tok_backup(tok, c);
+		if (col == tok->indstack[tok->indent]) {
+			/* No change */
+		}
+		else if (col > tok->indstack[tok->indent]) {
+			/* Indent -- always one */
+			if (tok->indent+1 >= MAXINDENT) {
+				fprintf(stderr, "excessive indent\n");
+				tok->done = E_TOKEN;
+				return ERRORTOKEN;
+			}
+			tok->pendin++;
+			tok->indstack[++tok->indent] = col;
+		}
+		else /* col < tok->indstack[tok->indent] */ {
+			/* Dedent -- any number, must be consistent */
+			while (tok->indent > 0 &&
+				col < tok->indstack[tok->indent]) {
+				tok->indent--;
+				tok->pendin--;
+			}
+			if (col != tok->indstack[tok->indent]) {
+				fprintf(stderr, "inconsistent dedent\n");
+				tok->done = E_TOKEN;
+				return ERRORTOKEN;
+			}
+		}
+	}
+	
+	*p_start = *p_end = tok->cur;
+	
+	/* Return pending indents/dedents */
+	if (tok->pendin != 0) {
+		if (tok->pendin < 0) {
+			tok->pendin++;
+			return DEDENT;
+		}
+		else {
+			tok->pendin--;
+			return INDENT;
+		}
+	}
+	
+ again:
+	/* Skip spaces */
+	do {
+		c = tok_nextc(tok);
+	} while (c == ' ' || c == '\t');
+	
+	/* Set start of current token */
+	*p_start = tok->cur - 1;
+	
+	/* Skip comment */
+	if (c == '#') {
+		/* Hack to allow overriding the tabsize in the file.
+		   This is also recognized by vi, when it occurs near the
+		   beginning or end of the file.  (Will vi never die...?) */
+		int x;
+		if (sscanf(tok->cur, " vi:set tabsize=%d:", &x) == 1 &&
+						x >= 1 && x <= 40) {
+			fprintf(stderr, "# vi:set tabsize=%d:\n", x);
+			tok->tabsize = x;
+		}
+		do {
+			c = tok_nextc(tok);
+		} while (c != EOF && c != '\n');
+	}
+	
+	/* Check for EOF and errors now */
+	if (c == EOF)
+		return tok->done == E_EOF ? ENDMARKER : ERRORTOKEN;
+	
+	/* Identifier (most frequent token!) */
+	if (isalpha(c) || c == '_') {
+		do {
+			c = tok_nextc(tok);
+		} while (isalnum(c) || c == '_');
+		tok_backup(tok, c);
+		*p_end = tok->cur;
+		return NAME;
+	}
+	
+	/* Newline */
+	if (c == '\n') {
+		tok->atbol = 1;
+		*p_end = tok->cur - 1; /* Leave '\n' out of the string */
+		return NEWLINE;
+	}
+	
+	/* Number */
+	if (isdigit(c)) {
+		if (c == '0') {
+			/* Hex or octal */
+			c = tok_nextc(tok);
+			if (c == '.')
+				goto fraction;
+			if (c == 'x' || c == 'X') {
+				/* Hex */
+				do {
+					c = tok_nextc(tok);
+				} while (isxdigit(c));
+			}
+			else {
+				/* Octal; c is first char of it */
+				/* There's no 'isoctdigit' macro, sigh */
+				while ('0' <= c && c < '8') {
+					c = tok_nextc(tok);
+				}
+			}
+		}
+		else {
+			/* Decimal */
+			do {
+				c = tok_nextc(tok);
+			} while (isdigit(c));
+			/* Accept floating point numbers.
+			   XXX This accepts incomplete things like 12e or 1e+;
+			       worry about that at run-time.
+			   XXX Doesn't accept numbers starting with a dot */
+			if (c == '.') {
+	fraction:
+				/* Fraction */
+				do {
+					c = tok_nextc(tok);
+				} while (isdigit(c));
+			}
+			if (c == 'e' || c == 'E') {
+				/* Exponent part */
+				c = tok_nextc(tok);
+				if (c == '+' || c == '-')
+					c = tok_nextc(tok);
+				while (isdigit(c)) {
+					c = tok_nextc(tok);
+				}
+			}
+		}
+		tok_backup(tok, c);
+		*p_end = tok->cur;
+		return NUMBER;
+	}
+	
+	/* String */
+	if (c == '\'') {
+		for (;;) {
+			c = tok_nextc(tok);
+			if (c == '\n' || c == EOF) {
+				tok->done = E_TOKEN;
+				return ERRORTOKEN;
+			}
+			if (c == '\\') {
+				c = tok_nextc(tok);
+				*p_end = tok->cur;
+				if (c == '\n' || c == EOF) {
+					tok->done = E_TOKEN;
+					return ERRORTOKEN;
+				}
+				continue;
+			}
+			if (c == '\'')
+				break;
+		}
+		*p_end = tok->cur;
+		return STRING;
+	}
+	
+	/* Line continuation */
+	if (c == '\\') {
+		c = tok_nextc(tok);
+		if (c != '\n') {
+			tok->done = E_TOKEN;
+			return ERRORTOKEN;
+		}
+		goto again; /* Read next line */
+	}
+	
+	/* Punctuation character */
+	*p_end = tok->cur;
+	return tok_1char(c);
+}
+
+
+#ifdef DEBUG
+
+void
+tok_dump(type, start, end)
+	int type;
+	char *start, *end;
+{
+	printf("%s", tok_name[type]);
+	if (type == NAME || type == NUMBER || type == STRING || type == OP)
+		printf("(%.*s)", (int)(end - start), start);
+}
+
+#endif
diff --git a/Parser/tokenizer.h b/Parser/tokenizer.h
new file mode 100644
index 0000000..8950c62
--- /dev/null
+++ b/Parser/tokenizer.h
@@ -0,0 +1,29 @@
+/* Tokenizer interface */
+
+#include "token.h"	/* For token types */
+
+#define MAXINDENT 100	/* Max indentation level */
+
+/* Tokenizer state */
+struct tok_state {
+	/* Input state; buf <= cur <= inp <= end */
+	/* NB an entire token must fit in the buffer */
+	char *buf;	/* Input buffer */
+	char *cur;	/* Next character in buffer */
+	char *inp;	/* End of data in buffer */
+	char *end;	/* End of input buffer */
+	int done;	/* 0 normally, 1 at EOF, -1 after error */
+	FILE *fp;	/* Rest of input; NULL if tokenizing a string */
+	int tabsize;	/* Tab spacing */
+	int indent;	/* Current indentation index */
+	int indstack[MAXINDENT];	/* Stack of indents */
+	int atbol;	/* Nonzero if at begin of new line */
+	int pendin;	/* Pending indents (if > 0) or dedents (if < 0) */
+	char *prompt, *nextprompt;	/* For interactive prompting */
+	int lineno;	/* Current line number */
+};
+
+extern struct tok_state *tok_setups PROTO((char *));
+extern struct tok_state *tok_setupf PROTO((FILE *, char *ps1, char *ps2));
+extern void tok_free PROTO((struct tok_state *));
+extern int tok_get PROTO((struct tok_state *, char **, char **));
author	Guido van Rossum <guido@python.org>	1990-10-14 12:07:46 (GMT)
committer	Guido van Rossum <guido@python.org>	1990-10-14 12:07:46 (GMT)
commit	85a5fbbdfea617f6cc8fae82c9e8c2b5c424436d (patch)
tree	a1bf57db1c75e2a7029c8f2fad5f8dba4b9ba25c /Parser
parent	c636014c430620325f8d213e9ba10d925991b8d7 (diff)
download	cpython-85a5fbbdfea617f6cc8fae82c9e8c2b5c424436d.zip cpython-85a5fbbdfea617f6cc8fae82c9e8c2b5c424436d.tar.gz cpython-85a5fbbdfea617f6cc8fae82c9e8c2b5c424436d.tar.bz2