summaryrefslogtreecommitdiffstats
path: root/generic/regcomp.c
diff options
context:
space:
mode:
authorstanton <stanton>1998-11-11 01:44:46 (GMT)
committerstanton <stanton>1998-11-11 01:44:46 (GMT)
commit0a41c61107c36da0a8e4ca0fc259149e3bc1956d (patch)
tree37f7fe5f0b8a64e08aae1446bb8cdd4516256a01 /generic/regcomp.c
parent3774776e7bc507091c0793c14cfd8fb45484e054 (diff)
downloadtcl-0a41c61107c36da0a8e4ca0fc259149e3bc1956d.zip
tcl-0a41c61107c36da0a8e4ca0fc259149e3bc1956d.tar.gz
tcl-0a41c61107c36da0a8e4ca0fc259149e3bc1956d.tar.bz2
integrated latest regexp updates from Henry Spencer, includes new
regexp switches "-line", "-lineanchor", and "-linestop" for controlling newline behavior
Diffstat (limited to 'generic/regcomp.c')
-rw-r--r--generic/regcomp.c1705
1 files changed, 788 insertions, 917 deletions
diff --git a/generic/regcomp.c b/generic/regcomp.c
index b8c2119..620dc1c 100644
--- a/generic/regcomp.c
+++ b/generic/regcomp.c
@@ -14,7 +14,11 @@
int compile _ANSI_ARGS_((regex_t *, CONST chr *, size_t, int));
static VOID moresubs _ANSI_ARGS_((struct vars *, int));
static int freev _ANSI_ARGS_((struct vars *, int));
-static struct rtree *parse _ANSI_ARGS_((struct vars *, int, int, struct state *, struct state *, int));
+static struct subre *parse _ANSI_ARGS_((struct vars *, int, int, struct state *, struct state *));
+static struct subre *parsebranch _ANSI_ARGS_((struct vars *, int, int, struct state *, struct state *, int));
+static VOID parseqatom _ANSI_ARGS_((struct vars *, int, int, struct state *, struct state *, struct subre *));
+static VOID nonword _ANSI_ARGS_((struct vars *, int, struct state *, struct state *));
+static VOID word _ANSI_ARGS_((struct vars *, int, struct state *, struct state *));
static int scannum _ANSI_ARGS_((struct vars *));
static VOID repeat _ANSI_ARGS_((struct vars *, struct state *, struct state *, int, int));
static VOID bracket _ANSI_ARGS_((struct vars *, struct state *, struct state *));
@@ -24,24 +28,23 @@ static chr *scanplain _ANSI_ARGS_((struct vars *));
static VOID leaders _ANSI_ARGS_((struct vars *, struct cvec *));
static VOID onechr _ANSI_ARGS_((struct vars *, pchr, struct state *, struct state *));
static VOID dovec _ANSI_ARGS_((struct vars *, struct cvec *, struct state *, struct state *));
-static color nlcolor _ANSI_ARGS_((struct vars *));
+static celt nextleader _ANSI_ARGS_((struct vars *, pchr, pchr));
static VOID wordchrs _ANSI_ARGS_((struct vars *));
-static struct subre subre _ANSI_ARGS_((struct state *, struct state *, int, int, struct rtree *));
-static struct rtree *newrt _ANSI_ARGS_((struct vars *));
-static VOID freert _ANSI_ARGS_((struct vars *, struct rtree *));
-static VOID freertnode _ANSI_ARGS_((struct vars *, struct rtree *));
-static VOID optrt _ANSI_ARGS_((struct vars *, struct rtree *));
-static int numrt _ANSI_ARGS_((struct rtree *, int));
-static VOID markrt _ANSI_ARGS_((struct rtree *));
-static VOID cleanrt _ANSI_ARGS_((struct vars *));
-static VOID nfatree _ANSI_ARGS_((struct vars *, struct rtree *, FILE *));
-static VOID nfanode _ANSI_ARGS_((struct vars *, struct subre *, FILE *));
+static struct subre *subre _ANSI_ARGS_((struct vars *, int, int, struct state *, struct state *));
+static VOID freesubre _ANSI_ARGS_((struct vars *, struct subre *));
+static VOID freesrnode _ANSI_ARGS_((struct vars *, struct subre *));
+static VOID optst _ANSI_ARGS_((struct vars *, struct subre *));
+static int numst _ANSI_ARGS_((struct subre *, int));
+static VOID markst _ANSI_ARGS_((struct subre *));
+static VOID cleanst _ANSI_ARGS_((struct vars *));
+static int nfatree _ANSI_ARGS_((struct vars *, struct subre *, FILE *));
+static int nfanode _ANSI_ARGS_((struct vars *, struct subre *, FILE *));
static int newlacon _ANSI_ARGS_((struct vars *, struct state *, struct state *, int));
static VOID freelacons _ANSI_ARGS_((struct subre *, int));
static VOID rfree _ANSI_ARGS_((regex_t *));
static VOID dump _ANSI_ARGS_((regex_t *, FILE *));
-static VOID dumprt _ANSI_ARGS_((struct rtree *, FILE *, int));
-static VOID rtdump _ANSI_ARGS_((struct rtree *, FILE *, int, int));
+static VOID dumpst _ANSI_ARGS_((struct subre *, FILE *, int));
+static VOID stdump _ANSI_ARGS_((struct subre *, FILE *, int, int));
/* === regc_lex.c === */
static VOID lexstart _ANSI_ARGS_((struct vars *));
static VOID prefixes _ANSI_ARGS_((struct vars *));
@@ -56,17 +59,18 @@ static chr newline _ANSI_ARGS_((NOPARMS));
static chr *ch _ANSI_ARGS_((NOPARMS));
static chr chrnamed _ANSI_ARGS_((struct vars *, chr *, chr *, pchr));
/* === regc_color.c === */
-static struct colormap *newcm _ANSI_ARGS_((struct vars *));
+static VOID initcm _ANSI_ARGS_((struct vars *, struct colormap *));
static VOID freecm _ANSI_ARGS_((struct colormap *));
static VOID cmtreefree _ANSI_ARGS_((struct colormap *, union tree *, int));
-static VOID fillcm _ANSI_ARGS_((struct colormap *));
-static VOID cmtreefill _ANSI_ARGS_((struct colormap *, union tree *, int));
-static color getcolor _ANSI_ARGS_((struct colormap *, pchr));
static color setcolor _ANSI_ARGS_((struct colormap *, pchr, pcolor));
static color maxcolor _ANSI_ARGS_((struct colormap *));
static color newcolor _ANSI_ARGS_((struct colormap *));
+static VOID freecolor _ANSI_ARGS_((struct colormap *, pcolor));
static color pseudocolor _ANSI_ARGS_((struct colormap *));
static color subcolor _ANSI_ARGS_((struct colormap *, pchr c));
+static color newsub _ANSI_ARGS_((struct colormap *, pcolor));
+static VOID subrange _ANSI_ARGS_((struct vars *, pchr, pchr, struct state *, struct state *));
+static VOID subblock _ANSI_ARGS_((struct vars *, pchr, struct state *, struct state *));
static VOID okcolors _ANSI_ARGS_((struct nfa *, struct colormap *));
static VOID colorchain _ANSI_ARGS_((struct colormap *, struct arc *));
static VOID uncolorchain _ANSI_ARGS_((struct colormap *, struct arc *));
@@ -115,10 +119,9 @@ static VOID cleanup _ANSI_ARGS_((struct nfa *));
static VOID markreachable _ANSI_ARGS_((struct nfa *, struct state *, struct state *, struct state *));
static VOID markcanreach _ANSI_ARGS_((struct nfa *, struct state *, struct state *, struct state *));
static int analyze _ANSI_ARGS_((struct nfa *));
-static int isempty _ANSI_ARGS_((struct state *, struct state *));
static VOID compact _ANSI_ARGS_((struct nfa *, struct cnfa *));
static VOID carcsort _ANSI_ARGS_((struct carc *, struct carc *));
-static VOID freecnfa _ANSI_ARGS_((struct cnfa *, int));
+static VOID freecnfa _ANSI_ARGS_((struct cnfa *));
static VOID dumpnfa _ANSI_ARGS_((struct nfa *, FILE *));
static VOID dumpstate _ANSI_ARGS_((struct state *, FILE *));
static VOID dumparcs _ANSI_ARGS_((struct state *, FILE *));
@@ -173,9 +176,9 @@ struct vars {
struct colormap *cm; /* character color map */
color nlcolor; /* color of newline */
struct state *wordchrs; /* state in nfa holding word-char outarcs */
- struct rtree *tree; /* subexpression tree */
- struct rtree *treechain; /* all tree nodes allocated */
- struct rtree *treefree; /* any free tree nodes */
+ struct subre *tree; /* subexpression tree */
+ struct subre *treechain; /* all tree nodes allocated */
+ struct subre *treefree; /* any free tree nodes */
int ntree; /* number of tree nodes */
struct cvec *cv; /* interface cvec */
struct cvec *cv2; /* utility cvec */
@@ -186,6 +189,7 @@ struct vars {
struct subre *lacons; /* lookahead-constraint vector */
int nlacons; /* size of lacons */
int usedshorter; /* used short-preferring quantifiers */
+ int unmatchable; /* can never match */
};
/* parsing macros; most know that `v' is the struct vars pointer */
@@ -199,6 +203,7 @@ struct vars {
#define ERR(e) VERR(v, e) /* record an error */
#define NOERR() {if (ISERR()) return;} /* if error seen, return */
#define NOERRN() {if (ISERR()) return NULL;} /* NOERR with retval */
+#define NOERRZ() {if (ISERR()) return 0;} /* NOERR with retval */
#define INSIST(c, e) ((c) ? 0 : ERR(e)) /* if condition false, error */
#define NOTE(b) (v->re->re_info |= (b)) /* note visible condition */
#define EMPTYARC(x, y) newarc(v->nfa, EMPTY, 0, x, y)
@@ -259,7 +264,6 @@ int flags;
if (re == NULL || string == NULL)
return REG_INVARG;
- assert(REG_ADVANCED == (REG_EXTENDED|REG_ADVF));
if ((flags&REG_QUOTE) &&
(flags&(REG_ADVANCED|REG_EXPANDED|REG_NEWLINE)))
return REG_INVARG;
@@ -297,19 +301,18 @@ int flags;
re->re_fns = VS(&functions);
/* more complex setup, malloced things */
- v->cm = newcm(v);
- CNOERR();
- v->nfa = newnfa(v, v->cm, (struct nfa *)NULL);
- CNOERR();
re->re_guts = VS(MALLOC(sizeof(struct guts)));
if (re->re_guts == NULL)
return freev(v, REG_ESPACE);
g = (struct guts *)re->re_guts;
- ZAPCNFA(g->cnfa);
g->tree = NULL;
- g->cm = NULL;
+ initcm(v, &g->cmap);
+ v->cm = &g->cmap;
g->lacons = NULL;
g->nlacons = 0;
+ ZAPCNFA(g->search);
+ v->nfa = newnfa(v, v->cm, (struct nfa *)NULL);
+ CNOERR();
v->cv = newcvec(100, 20, 10);
if (v->cv == NULL)
return freev(v, REG_ESPACE);
@@ -324,53 +327,56 @@ int flags;
/* parsing */
lexstart(v); /* also handles prefixes */
- v->tree = parse(v, EOS, PLAIN, v->nfa->init, v->nfa->final, NONEYET);
+ if ((v->cflags&REG_NLSTOP) || (v->cflags&REG_NLANCH)) {
+ /* assign newline a unique color */
+ v->nlcolor = subcolor(v->cm, newline());
+ okcolors(v->nfa, v->cm);
+ }
+ CNOERR();
+ v->tree = parse(v, EOS, PLAIN, v->nfa->init, v->nfa->final);
assert(SEE(EOS)); /* even if error; ISERR() => SEE(EOS) */
CNOERR();
+ assert(v->tree != NULL);
/* finish setup of nfa and its subre tree */
specialcolors(v->nfa);
CNOERR();
if (debug != NULL) {
dumpnfa(v->nfa, debug);
- dumprt(v->tree, debug, 1);
+ dumpst(v->tree, debug, 1);
}
v->usedshorter = 0;
- optrt(v, v->tree);
- if (v->tree != NULL) {
- v->ntree = numrt(v->tree, 1);
- markrt(v->tree);
- } else
- v->ntree = 0;
- cleanrt(v);
+ v->unmatchable = 0;
+ optst(v, v->tree);
+ v->ntree = numst(v->tree, 1);
+ markst(v->tree);
+ cleanst(v);
if (debug != NULL) {
fprintf(debug, "-->\n");
- dumprt(v->tree, debug, 1);
+ dumpst(v->tree, debug, 1);
}
- /* build compacted NFAs for tree, lacons, main nfa */
- nfatree(v, v->tree, debug);
+ /* build compacted NFAs for tree, lacons, fast search */
+ re->re_info |= nfatree(v, v->tree, debug);
if (debug != NULL) {
fprintf(debug, "---->\n");
- dumprt(v->tree, debug, 1);
+ dumpst(v->tree, debug, 1);
}
CNOERR();
+ if (re->re_info&REG_UIMPOSSIBLE)
+ v->unmatchable = 1;
assert(v->nlacons == 0 || v->lacons != NULL);
for (i = 1; i < v->nlacons; i++)
nfanode(v, &v->lacons[i], debug);
CNOERR();
- re->re_info |= optimize(v->nfa, debug);
+ rainbow(v->nfa, v->cm, PLAIN, COLORLESS, v->nfa->pre, v->nfa->pre);
+ newarc(v->nfa, PLAIN, v->nfa->bos[0], v->nfa->pre, v->nfa->pre);
+ newarc(v->nfa, PLAIN, v->nfa->bos[1], v->nfa->pre, v->nfa->pre);
+ newarc(v->nfa, PLAIN, v->nfa->eos[0], v->nfa->pre, v->nfa->pre);
+ newarc(v->nfa, PLAIN, v->nfa->eos[1], v->nfa->pre, v->nfa->pre);
+ (DISCARD)optimize(v->nfa, debug);
CNOERR();
- if (v->nfa->post->nins <= 0)
- return freev(v, REG_IMPOSS); /* end unreachable! */
- assert(v->nfa->pre->nouts > 0);
- compact(v->nfa, &g->cnfa);
- CNOERR();
- freenfa(v->nfa);
- v->nfa = NULL;
-
- /* fill color map */
- fillcm(v->cm);
+ compact(v->nfa, &g->search);
CNOERR();
/* looks okay, package it up */
@@ -380,8 +386,6 @@ int flags;
g->cflags = v->cflags;
g->info = re->re_info;
g->nsub = re->re_nsub;
- g->cm = v->cm;
- v->cm = NULL;
g->tree = v->tree;
v->tree = NULL;
g->ntree = v->ntree;
@@ -390,6 +394,7 @@ int flags;
v->lacons = NULL;
g->nlacons = v->nlacons;
g->usedshorter = v->usedshorter;
+ g->unmatchable = v->unmatchable;
if (flags&REG_DUMP)
dump(re, stdout);
@@ -447,12 +452,10 @@ int err;
FREE(v->subs);
if (v->nfa != NULL)
freenfa(v->nfa);
- if (v->cm != NULL)
- freecm(v->cm);
if (v->tree != NULL)
- freert(v, v->tree);
+ freesubre(v, v->tree);
if (v->treechain != NULL)
- cleanrt(v);
+ cleanst(v);
if (v->cv != NULL)
freecvec(v->cv);
if (v->cv2 != NULL)
@@ -468,541 +471,562 @@ int err;
/*
- parse - parse an RE
- * Arguably this is too big and too complex and ought to be divided up.
- * However, the code is somewhat intertwined...
- *
- * Note that it is no longer necessary to be rigorous about freeing tree
- * nodes on error exits, as the tree machinery keeps track of them.
- ^ static struct rtree *parse(struct vars *, int, int, struct state *,
- ^ struct state *, int);
+ * This is actually just the top level, which parses a bunch of branches
+ * tied together with '|'. They appear in the tree as the left children
+ * of a chain of '|' subres.
+ ^ static struct subre *parse(struct vars *, int, int, struct state *,
+ ^ struct state *);
*/
-static struct rtree * /* NULL if no interesting substructure */
-parse(v, stopper, type, init, final, pprefer)
+static struct subre *
+parse(v, stopper, type, init, final)
struct vars *v;
int stopper; /* EOS or ')' */
int type; /* LACON (lookahead subRE) or PLAIN */
struct state *init; /* initial state */
struct state *final; /* final state */
-int pprefer; /* parent's short/long preference */
{
struct state *left; /* scaffolding for branch */
struct state *right;
- struct state *lp; /* scaffolding for current construct */
- struct state *rp;
- struct state *s; /* temporaries for new states */
- struct state *s2;
-# define ARCV(t, val) newarc(v->nfa, t, val, lp, rp)
- int m, n;
- int emptybranch; /* is there anything in this branch yet? */
- struct rtree *branches; /* top level */
- struct rtree *branch; /* current branch */
- struct subre *now; /* current subtree's top */
- struct subre sub; /* communication variable */
- struct rtree *rt1; /* temporaries */
- struct rtree *rt2;
- struct subre *t; /* work pointer, top of interesting subtree */
+ struct subre *branches; /* top level */
+ struct subre *branch; /* current branch */
+ struct subre *t; /* temporary */
int firstbranch; /* is this the first branch? */
- int capture; /* any capturing parens within this? */
- int constraint; /* is the current atom a constraint? */
assert(stopper == ')' || stopper == EOS);
- capture = 0;
- branches = newrt(v);
+ branches = subre(v, '|', LONGER, init, final);
+ NOERRN();
branch = branches;
- rt1 = NULL; /* shut up lint */
firstbranch = 1;
- NOERRN();
- do {
- /* a branch */
- emptybranch = 1; /* tentatively */
- left = newstate(v->nfa);
- right = newstate(v->nfa);
- NOERRN();
+ do { /* a branch */
if (!firstbranch) {
- rt1 = newrt(v);
+ /* need a place to hang it */
+ branch->right = subre(v, '|', LONGER, init, final);
NOERRN();
- branch->next = rt1;
- branch = rt1;
+ branch = branch->right;
}
+ firstbranch = 0;
+ left = newstate(v->nfa);
+ right = newstate(v->nfa);
+ NOERRN();
EMPTYARC(init, left);
EMPTYARC(right, final);
- lp = left;
- rp = right;
- branch->op = '|';
- now = &branch->left;
- *now = subre(left, right, NONEYET, 0, (struct rtree *)NULL);
- firstbranch = 0;
NOERRN();
+ branch->left = parsebranch(v, stopper, type, left, right, 0);
+ NOERRN();
+ branch->flags |= UP(branch->flags | branch->left->flags);
+ if ((branch->flags &~ branches->flags) != 0) /* new flags */
+ for (t = branches; t != branch; t = t->right)
+ t->flags |= branch->flags;
+ } while (EAT('|'));
+ assert(SEE(stopper) || SEE(EOS));
- while (!SEE('|') && !SEE(stopper) && !SEE(EOS)) {
- /* initial bookkeeping */
- sub.begin = NULL; /* no substructure seen yet */
- sub.subno = 0;
- sub.prefer = NONEYET;
- constraint = 0;
- if (emptybranch) /* first of the branch */
- emptybranch = 0;
- else { /* implicit concat operator */
- lp = newstate(v->nfa);
- NOERRN();
- moveins(v->nfa, rp, lp);
- }
- assert(lp->nouts == 0); /* must string new code */
- assert(rp->nins == 0); /* between lp and rp */
-
- /* an atom... */
- switch (v->nexttype) {
- case '(': /* value flags as capturing or non */
- m = (type == LACON) ? 0 : v->nextvalue;
- if (m) {
- v->nsubexp++;
- sub.subno = v->nsubexp;
- if ((size_t)sub.subno >= v->nsubs)
- moresubs(v, sub.subno);
- assert((size_t)sub.subno < v->nsubs);
- } else
- sub.subno = 0;
- NEXT();
- sub.begin = lp; /* NB, substructure seen */
- sub.end = rp;
- /* use now->tree as temporary, so */
- /* things get freed on error returns */
- assert(now->tree == NULL);
- now->tree = parse(v, ')', PLAIN, lp, rp,
- now->prefer);
- assert(SEE(')') || ISERR());
- NEXT();
- NOERRN();
- if (!m && now->tree == NULL) {
- /* actually no relevant substructure */
- sub.begin = NULL;
- }
- if (now->tree != NULL) {
- if (now->tree->op == '|')
- sub.prefer = LONGER;
- else
- sub.prefer =
- now->tree->left.prefer;
- }
- /* must postpone other processing until we */
- /* know about any {0,0} quantifier */
- break;
- case BACKREF: /* the Feature From The Black Lagoon */
- INSIST(type != LACON, REG_ESUBREG);
- INSIST(v->nextvalue < v->nsubs, REG_ESUBREG);
- INSIST(v->subs[v->nextvalue] != NULL,
- REG_ESUBREG);
- NOERRN();
- assert(v->nextvalue > 0);
- sub.subno = -v->nextvalue;
- sub.begin = lp; /* NB, substructure seen */
- sub.end = rp;
- EMPTYARC(lp, rp); /* temporarily */
- assert(now->tree == NULL);
- NEXT();
- break;
- case LACON: /* lookahead constraint */
- m = v->nextvalue; /* is positive? */
- NEXT();
- s = newstate(v->nfa);
- s2 = newstate(v->nfa);
- NOERRN();
- rt1 = parse(v, ')', LACON, s, s2, NONEYET);
- assert(SEE(')') || ISERR());
- NEXT();
- m = newlacon(v, s, s2, m);
- freert(v, rt1);
- NOERRN();
- ARCV(LACON, m);
- constraint = 1;
- break;
- case PREFER: /* length preference */
- sub.prefer = (v->nextvalue) ? LONGER : SHORTER;
- NEXT();
- sub.begin = lp; /* NB, substructure seen */
- sub.end = rp;
- /* use now->tree as temporary, so */
- /* things get freed on error returns */
- assert(now->tree == NULL);
- now->tree = parse(v, ')', PLAIN, lp, rp,
- sub.prefer);
- assert(SEE(')') || ISERR());
- NEXT();
- NOERRN();
- if (now->prefer == NONEYET)
- now->prefer = sub.prefer;
- if (sub.prefer == now->prefer &&
- now->tree == NULL) {
- /* actually no relevant substructure */
- sub.begin = NULL;
- }
- break;
- case '[':
- if (v->nextvalue == 1)
- bracket(v, lp, rp);
- else
- cbracket(v, lp, rp);
- assert(SEE(']') || ISERR());
- NEXT();
- break;
- case '.':
- rainbow(v->nfa, v->cm, PLAIN,
- (v->cflags&REG_NLSTOP) ?
- nlcolor(v) : COLORLESS,
- lp, rp);
- NEXT();
- break;
- case '^':
- ARCV('^', 1);
- if (v->cflags&REG_NLANCH)
- ARCV(BEHIND, nlcolor(v));
- NEXT();
- constraint = 1;
- break;
- case '$':
- ARCV('$', 1);
- if (v->cflags&REG_NLANCH)
- ARCV(AHEAD, nlcolor(v));
- NEXT();
- constraint = 1;
- break;
- case SBEGIN:
- ARCV('^', 1); /* BOL */
- ARCV('^', 0); /* or BOS */
- NEXT();
- constraint = 1;
- break;
- case SEND:
- ARCV('$', 1); /* EOL */
- ARCV('$', 0); /* or EOS */
- NEXT();
- constraint = 1;
- break;
- case '<':
- wordchrs(v); /* does NEXT() */
- s = newstate(v->nfa);
- NOERRN();
- /* needs BOL, BOS, or nonword to left... */
- newarc(v->nfa, '^', 1, lp, s);
- newarc(v->nfa, '^', 0, lp, s);
- colorcomplement(v->nfa, v->cm, BEHIND,
- v->wordchrs, lp, s);
- /* ... and word to right */
- cloneouts(v->nfa, v->wordchrs, s, rp, AHEAD);
- /* (no need for special attention to \n) */
- constraint = 1;
- break;
- case '>':
- wordchrs(v); /* does NEXT() */
- s = newstate(v->nfa);
- NOERRN();
- /* needs word to left... */
- cloneouts(v->nfa, v->wordchrs, lp, s, BEHIND);
- /* ... and EOL, EOS, or nonword to right */
- newarc(v->nfa, '$', 1, s, rp);
- newarc(v->nfa, '$', 0, s, rp);
- colorcomplement(v->nfa, v->cm, AHEAD,
- v->wordchrs, s, rp);
- /* (no need for special attention to \n) */
- constraint = 1;
- break;
- case WBDRY:
- wordchrs(v); /* does NEXT() */
- s = newstate(v->nfa);
- NOERRN();
- /* needs BOL, BOS, or nonword to left... */
- newarc(v->nfa, '^', 1, lp, s);
- newarc(v->nfa, '^', 0, lp, s);
- colorcomplement(v->nfa, v->cm, BEHIND,
- v->wordchrs, lp, s);
- /* ... and word to right... */
- cloneouts(v->nfa, v->wordchrs, s, rp, AHEAD);
- /* ...or... */
- s = newstate(v->nfa);
- NOERRN();
- /* ...needs word to left... */
- cloneouts(v->nfa, v->wordchrs, lp, s, BEHIND);
- /* ... and EOL, EOS, or nonword to right */
- newarc(v->nfa, '$', 1, s, rp);
- newarc(v->nfa, '$', 0, s, rp);
- colorcomplement(v->nfa, v->cm, AHEAD,
- v->wordchrs, s, rp);
- /* (no need for special attention to \n) */
- constraint = 1;
- break;
- case NWBDRY:
- wordchrs(v); /* does NEXT() */
- s = newstate(v->nfa);
- NOERRN();
- /* needs word to both left and right... */
- cloneouts(v->nfa, v->wordchrs, lp, s, BEHIND);
- cloneouts(v->nfa, v->wordchrs, s, rp, AHEAD);
- /* ...or... */
- s = newstate(v->nfa);
- NOERRN();
- /* ...BOL, BOS, or nonword to left... */
- newarc(v->nfa, '^', 1, lp, s);
- newarc(v->nfa, '^', 0, lp, s);
- colorcomplement(v->nfa, v->cm, BEHIND,
- v->wordchrs, lp, s);
- /* ... and EOL, EOS, or nonword to right */
- newarc(v->nfa, '$', 1, s, rp);
- newarc(v->nfa, '$', 0, s, rp);
- colorcomplement(v->nfa, v->cm, AHEAD,
- v->wordchrs, s, rp);
- /* (no need for special attention to \n) */
- constraint = 1;
- break;
- case ')': /* unbalanced paren */
-#ifdef POSIX_MISTAKE
- if (!(v->cflags&REG_EXTENDED) ||
- (v->cflags&REG_ADVF)) {
- ERR(REG_EPAREN);
- return NULL;
- }
- NOTE(REG_UPBOTCH);
- /* fallthrough into case PLAIN */
-#else
- ERR(REG_EPAREN);
- return NULL;
- break;
-#endif
- case PLAIN:
- onechr(v, v->nextvalue, lp, rp);
- okcolors(v->nfa, v->cm);
- NOERRN();
- NEXT();
- break;
- case '*':
- case '+':
- case '?':
- case '{':
- ERR(REG_BADRPT);
- return NULL;
- break;
- default:
- ERR(REG_ASSERT);
- return NULL;
- break;
- }
-
- /* ...possibly followed by a quantifier */
- switch (v->nexttype) {
- case '*':
- m = 0;
- n = INFINITY;
- sub.prefer = (v->nextvalue) ? LONGER : SHORTER;
- NEXT();
- break;
- case '+':
- m = 1;
- n = INFINITY;
- sub.prefer = (v->nextvalue) ? LONGER : SHORTER;
- NEXT();
- break;
- case '?':
- m = 0;
- n = 1;
- sub.prefer = (v->nextvalue) ? LONGER : SHORTER;
- NEXT();
- break;
- case '{':
- NEXT();
- m = scannum(v);
- if (EAT(',')) {
- if (SEE(DIGIT))
- n = scannum(v);
- else
- n = INFINITY;
- if (m > n) {
- ERR(REG_BADBR);
- return NULL;
- }
- } else
- n = m;
- if (!SEE('}')) { /* gets errors too */
- ERR(REG_BADBR);
- return NULL;
- }
- if (m != n)
- sub.prefer = (v->nextvalue) ? LONGER :
- SHORTER;
- NEXT();
- break;
- default: /* no quantifier */
- m = n = 1;
- constraint = 0;
- break;
- }
+ if (!SEE(stopper)) {
+ assert(stopper == ')' && SEE(EOS));
+ ERR(REG_EPAREN);
+ }
- /* constraints may not be quantified */
- if (constraint) {
- ERR(REG_BADRPT);
- return NULL;
- }
+ /* optimize out simple cases */
+ if (branch == branches) { /* only one branch */
+ assert(branch->right == NULL);
+ t = branch->left;
+ branch->left = NULL;
+ freesubre(v, branches);
+ branches = t;
+ } else if (!MESSY(branches->flags)) { /* no interesting innards */
+ freesubre(v, branches->left);
+ branches->left = NULL;
+ freesubre(v, branches->right);
+ branches->right = NULL;
+ branches->op = '=';
+ }
- /* annoying special case: {0,0} cancels everything */
- if (m == 0 && n == 0 && sub.begin != NULL) {
- freert(v, now->tree);
- now->tree = NULL;
- sub.begin = NULL; /* no substructure */
- sub.prefer = NONEYET;
- /* the repeat() below will do the rest */
- }
+ return branches;
+}
- /* if no substructure, avoid hard part */
- if (now->prefer == NONEYET)
- now->prefer = sub.prefer;
- if (sub.begin == NULL && (sub.prefer == NONEYET ||
- sub.prefer == now->prefer)) {
- assert(sub.subno >= 0 || (m == 0 && n == 0));
- if (!(m == 1 && n == 1))
- repeat(v, lp, rp, m, n);
- continue; /* NOTE CONTINUE */
- }
+/*
+ - parsebranch - parse one branch of an RE
+ * This mostly manages concatenation, working closely with parseqatom().
+ * Concatenated things are bundled up as much as possible, with separate
+ * ',' nodes introduced only when necessary due to substructure.
+ ^ static struct subre *parsebranch(struct vars *, int, int, struct state *,
+ ^ struct state *, int);
+ */
+static struct subre *
+parsebranch(v, stopper, type, left, right, partial)
+struct vars *v;
+int stopper; /* EOS or ')' */
+int type; /* LACON (lookahead subRE) or PLAIN */
+struct state *left; /* leftmost state */
+struct state *right; /* rightmost state */
+int partial; /* is this only part of a branch? */
+{
+ struct state *lp; /* left end of current construct */
+ int seencontent; /* is there anything in this branch yet? */
+ struct subre *t;
- /* hard part: something messy seen */
- /* break subRE into pre, x{...}, post-to-be */
- capture = 1; /* upper levels will care */
- rt1 = newrt(v);
- rt2 = newrt(v);
- s = newstate(v->nfa); /* between x and post-to-be */
+ lp = left;
+ seencontent = 0;
+ t = subre(v, '=', 0, left, right); /* op '=' is tentative */
+ NOERRN();
+ while (!SEE('|') && !SEE(stopper) && !SEE(EOS)) {
+ if (seencontent) { /* implicit concat operator */
+ lp = newstate(v->nfa);
NOERRN();
- moveins(v->nfa, rp, s);
- EMPTYARC(s, rp);
- rt1->op = ',';
- rt1->left = subre(now->begin, lp, now->prefer, 0,
- (struct rtree *)NULL);
- assert(now->end == rp);
- rt1->right = subre(lp, rp, sub.prefer, 0, rt2);
- rt2->op = ',';
- rt2->left = subre(lp, s, sub.prefer, 0, now->tree);
- rt2->right = subre(s, rp, NONEYET, 0,
- (struct rtree *)NULL);
- now->tree = rt1;
- now = &rt2->right; /* future elaborations here */
- t = &rt2->left; /* current activity here */
-
- /* if it's a backref, time to replicate the subNFA */
- if (sub.subno < 0) {
- assert(lp->nouts == 1); /* just the EMPTY */
- delsub(v->nfa, lp, s);
- assert(v->subs[-sub.subno] != NULL);
- dupnfa(v->nfa, v->subs[-sub.subno]->begin,
- v->subs[-sub.subno]->end, lp, s);
- NOERRN();
- }
+ moveins(v->nfa, right, lp);
+ }
+ seencontent = 1;
- /* if no/vacuous quantifier and not backref, done */
- if (m == 1 && n == 1 && sub.subno >= 0) {
- t->subno = sub.subno;
- if (sub.subno > 0)
- v->subs[sub.subno] = t;
- continue; /* NOTE CONTINUE */
- }
+ /* NB, recursion in parseqatom() may swallow rest of branch */
+ parseqatom(v, stopper, type, lp, right, t);
+ }
- /* really sticky part, quantified capturer/backref */
- /* first, turn x{0,...} into x{1,...}| */
- if (m == 0) {
- s = newstate(v->nfa);
- s2 = newstate(v->nfa);
- rt1 = newrt(v);
- rt2 = newrt(v);
- NOERRN();
- moveouts(v->nfa, t->begin, s);
- EMPTYARC(t->begin, s);
- EMPTYARC(t->begin, s2);
- EMPTYARC(s2, t->end);
- rt1->op = rt2->op = '|';
- rt1->left = subre(s, t->end, sub.prefer, 0,
- t->tree);
- rt1->next = rt2;
- rt2->left = subre(s2, t->end, sub.prefer, 0,
- (struct rtree *)NULL);
- t->tree = rt1;
- t = &rt1->left;
- m = 1;
- }
+ if (!seencontent) { /* empty branch */
+ if (!partial)
+ NOTE(REG_UUNSPEC);
+ assert(lp == left);
+ EMPTYARC(left, right);
+ }
- /* second, x{1,1} is just x */
- if (m == 1 && n == 1 && sub.subno >= 0) {
- t->subno = sub.subno;
- if (sub.subno > 0)
- v->subs[sub.subno] = t;
- continue; /* NOTE CONTINUE */
- }
+ return t;
+}
- /* backrefs get special treatment */
- if (sub.subno < 0) {
- repeat(v, t->begin, t->end, m, n);
- rt1 = newrt(v);
- NOERRN();
- assert(t->tree == NULL);
- t->tree = rt1;
- rt1->op = 'b';
- rt1->left.subno = sub.subno;
- rt1->left.min = (short)m;
- rt1->left.max = (short)n;
- rt1->left.prefer = sub.prefer;
- continue; /* NOTE CONTINUE */
- }
+/*
+ - parseqatom - parse one quantified atom or constraint of an RE
+ * The bookkeeping near the end cooperates very closely with parsebranch();
+ * in particular, it contains a recursion that can involve parsing the rest
+ * of the branch, making this function's name somewhat inaccurate.
+ ^ static VOID parseqatom(struct vars *, int, int, struct state *,
+ ^ struct state *, struct subre *);
+ */
+static VOID
+parseqatom(v, stopper, type, lp, rp, top)
+struct vars *v;
+int stopper; /* EOS or ')' */
+int type; /* LACON (lookahead subRE) or PLAIN */
+struct state *lp; /* left state to hang it on */
+struct state *rp; /* right state to hang it on */
+struct subre *top; /* subtree top */
+{
+ struct state *s; /* temporaries for new states */
+ struct state *s2;
+# define ARCV(t, val) newarc(v->nfa, t, val, lp, rp)
+ int m, n;
+ struct subre *atom; /* atom's subtree */
+ struct subre *t;
+ int cap; /* capturing parens? */
+ int pos; /* positive lookahead? */
+ int subno; /* capturing-parens or backref number */
+ int atomtype;
+ int qprefer; /* quantifier short/long preference */
+ int f;
+ struct subre **atomp; /* where the pointer to atom is */
+
+ /* initial bookkeeping */
+ atom = NULL;
+ assert(lp->nouts == 0); /* must string new code */
+ assert(rp->nins == 0); /* between lp and rp */
+ subno = 0; /* just to shut lint up */
+
+ /* an atom or constraint... */
+ atomtype = v->nexttype;
+ switch (atomtype) {
+ /* first, constraints, which end by returning */
+ case '^':
+ ARCV('^', 1);
+ if (v->cflags&REG_NLANCH)
+ ARCV(BEHIND, v->nlcolor);
+ NEXT();
+ return;
+ break;
+ case '$':
+ ARCV('$', 1);
+ if (v->cflags&REG_NLANCH)
+ ARCV(AHEAD, v->nlcolor);
+ NEXT();
+ return;
+ break;
+ case SBEGIN:
+ ARCV('^', 1); /* BOL */
+ ARCV('^', 0); /* or BOS */
+ NEXT();
+ return;
+ break;
+ case SEND:
+ ARCV('$', 1); /* EOL */
+ ARCV('$', 0); /* or EOS */
+ NEXT();
+ return;
+ break;
+ case '<':
+ wordchrs(v); /* does NEXT() */
+ s = newstate(v->nfa);
+ NOERR();
+ nonword(v, BEHIND, lp, s);
+ word(v, AHEAD, s, rp);
+ return;
+ break;
+ case '>':
+ wordchrs(v); /* does NEXT() */
+ s = newstate(v->nfa);
+ NOERR();
+ word(v, BEHIND, lp, s);
+ nonword(v, AHEAD, s, rp);
+ return;
+ break;
+ case WBDRY:
+ wordchrs(v); /* does NEXT() */
+ s = newstate(v->nfa);
+ NOERR();
+ nonword(v, BEHIND, lp, s);
+ word(v, AHEAD, s, rp);
+ s = newstate(v->nfa);
+ NOERR();
+ word(v, BEHIND, lp, s);
+ nonword(v, AHEAD, s, rp);
+ return;
+ break;
+ case NWBDRY:
+ wordchrs(v); /* does NEXT() */
+ s = newstate(v->nfa);
+ NOERR();
+ word(v, BEHIND, lp, s);
+ word(v, AHEAD, s, rp);
+ s = newstate(v->nfa);
+ NOERR();
+ nonword(v, BEHIND, lp, s);
+ nonword(v, AHEAD, s, rp);
+ return;
+ break;
+ case LACON: /* lookahead constraint */
+ pos = v->nextvalue;
+ NEXT();
+ s = newstate(v->nfa);
+ s2 = newstate(v->nfa);
+ NOERR();
+ t = parse(v, ')', LACON, s, s2);
+ freesubre(v, t); /* internal structure irrelevant */
+ assert(SEE(')') || ISERR());
+ NEXT();
+ n = newlacon(v, s, s2, pos);
+ NOERR();
+ ARCV(LACON, n);
+ return;
+ break;
+ /* then errors, to get them out of the way */
+ case '*':
+ case '+':
+ case '?':
+ case '{':
+ ERR(REG_BADRPT);
+ return;
+ break;
+ default:
+ ERR(REG_ASSERT);
+ return;
+ break;
+ /* then plain characters, and minor variants on that theme */
+ case ')': /* unbalanced paren */
+ if ((v->cflags&REG_ADVANCED) != REG_EXTENDED) {
+ ERR(REG_EPAREN);
+ return;
+ }
+ /* legal in EREs due to specification botch */
+ NOTE(REG_UPBOTCH);
+ /* fallthrough into case PLAIN */
+ case PLAIN:
+ onechr(v, v->nextvalue, lp, rp);
+ okcolors(v->nfa, v->cm);
+ NOERR();
+ NEXT();
+ break;
+ case '[':
+ if (v->nextvalue == 1)
+ bracket(v, lp, rp);
+ else
+ cbracket(v, lp, rp);
+ assert(SEE(']') || ISERR());
+ NEXT();
+ break;
+ case '.':
+ rainbow(v->nfa, v->cm, PLAIN,
+ (v->cflags&REG_NLSTOP) ? v->nlcolor : COLORLESS,
+ lp, rp);
+ NEXT();
+ break;
+ /* and finally the ugly stuff */
+ case '(': /* value flags as capturing or non */
+ cap = (type == LACON) ? 0 : v->nextvalue;
+ if (cap) {
+ v->nsubexp++;
+ subno = v->nsubexp;
+ if ((size_t)subno >= v->nsubs)
+ moresubs(v, subno);
+ assert((size_t)subno < v->nsubs);
+ } else
+ atomtype = PLAIN; /* something that's not '(' */
+ NEXT();
+ /* need new endpoints because tree will contain pointers */
+ s = newstate(v->nfa);
+ s2 = newstate(v->nfa);
+ NOERR();
+ EMPTYARC(lp, s);
+ EMPTYARC(s2, rp);
+ NOERR();
+ atom = parse(v, ')', PLAIN, s, s2);
+ assert(SEE(')') || ISERR());
+ NEXT();
+ NOERR();
+ if (cap) {
+ v->subs[subno] = atom;
+ t = subre(v, '(', atom->flags|CAP, lp, rp);
+ NOERR();
+ t->subno = subno;
+ t->left = atom;
+ atom = t;
+ }
+ /* postpone everything else pending possible {0} */
+ break;
+ case BACKREF: /* the Feature From The Black Lagoon */
+ INSIST(type != LACON, REG_ESUBREG);
+ INSIST(v->nextvalue < v->nsubs, REG_ESUBREG);
+ INSIST(v->subs[v->nextvalue] != NULL, REG_ESUBREG);
+ NOERR();
+ assert(v->nextvalue > 0);
+ atom = subre(v, 'b', BACKR, lp, rp);
+ subno = v->nextvalue;
+ atom->subno = subno;
+ EMPTYARC(lp, rp); /* temporarily, so there's something */
+ NEXT();
+ break;
+ }
- /* turn x{m,n} into x{m-1,n-1}x, with capturing */
- /* parens in only second x */
- s = newstate(v->nfa);
- NOERRN();
- moveouts(v->nfa, t->begin, s);
- dupnfa(v->nfa, s, t->end, t->begin, s);
- assert(m >= 1 && m != INFINITY && n >= 1);
- repeat(v, t->begin, s, m-1, (n == INFINITY) ? n : n-1);
- rt1 = newrt(v);
- NOERRN();
- rt1->op = ',';
- rt1->left = subre(t->begin, s, sub.prefer, 0,
- (struct rtree *)NULL);
- /* sub.prefer not really right, but doesn't matter */
- rt1->right = subre(s, t->end, sub.prefer, sub.subno,
- t->tree);
- if (sub.subno > 0)
- v->subs[sub.subno] = &rt1->right;
- t->tree = rt1;
+ /* ...and an atom may be followed by a quantifier */
+ switch (v->nexttype) {
+ case '*':
+ m = 0;
+ n = INFINITY;
+ qprefer = (v->nextvalue) ? LONGER : SHORTER;
+ NEXT();
+ break;
+ case '+':
+ m = 1;
+ n = INFINITY;
+ qprefer = (v->nextvalue) ? LONGER : SHORTER;
+ NEXT();
+ break;
+ case '?':
+ m = 0;
+ n = 1;
+ qprefer = (v->nextvalue) ? LONGER : SHORTER;
+ NEXT();
+ break;
+ case '{':
+ NEXT();
+ m = scannum(v);
+ if (EAT(',')) {
+ if (SEE(DIGIT))
+ n = scannum(v);
+ else
+ n = INFINITY;
+ if (m > n) {
+ ERR(REG_BADBR);
+ return;
+ }
+ /* {m,n} exercises preference, even if it's {m,m} */
+ qprefer = (v->nextvalue) ? LONGER : SHORTER;
+ } else {
+ n = m;
+ /* {m} passes operand's preference through */
+ qprefer = 0;
}
- if (emptybranch) {
- NOTE(REG_UUNSPEC);
- EMPTYARC(lp, rp);
+ if (!SEE('}')) { /* catches errors too */
+ ERR(REG_BADBR);
+ return;
}
- } while (EAT('|'));
- assert(SEE(stopper) || SEE(EOS));
+ NEXT();
+ break;
+ default: /* no quantifier */
+ m = n = 1;
+ qprefer = 0;
+ break;
+ }
- if (!SEE(stopper)) {
- assert(stopper == ')' && SEE(EOS));
- ERR(REG_EPAREN);
+ /* annoying special case: {0} or {0,0} cancels everything */
+ if (m == 0 && n == 0) {
+ if (atom != NULL)
+ freesubre(v, atom);
+ if (atomtype == '(')
+ v->subs[subno] = NULL;
+ delsub(v->nfa, lp, rp);
+ EMPTYARC(lp, rp);
+ return;
}
- /* higher levels care about our preference in certain situations */
- if (branch != branches) { /* >1 branch */
- if (pprefer != LONGER)
- capture = 1;
- } else if (branches->left.prefer != pprefer)
- capture = 1;
-
- /* optimize out vacuous alternation */
- if (branch == branches) {
- assert(branch->next == NULL && branch->right.begin == NULL);
- assert(branch->left.subno == 0);
- if (capture && branch->left.tree == NULL)
- branch->op = ',';
- else {
- branches = branch->left.tree; /* might be NULL */
- freertnode(v, branch);
- }
+ /* if not a messy case, avoid hard part */
+ assert(!MESSY(top->flags));
+ f = top->flags | qprefer | ((atom != NULL) ? atom->flags : 0);
+ if (atomtype != '(' && atomtype != BACKREF && !MESSY(UP(f))) {
+ if (!(m == 1 && n == 1))
+ repeat(v, lp, rp, m, n);
+ if (atom != NULL)
+ freesubre(v, atom);
+ top->flags = f;
+ return;
+ }
+
+ /*
+ * hard part: something messy
+ * That is, capturing parens, back reference, short/long clash, or
+ * an atom with substructure containing one of those.
+ */
+
+ /* now we'll need a subre for the contents even if they're boring */
+ if (atom == NULL) {
+ atom = subre(v, '=', 0, lp, rp);
+ NOERR();
+ }
+
+ /*
+ * prepare a general-purpose state skeleton
+ *
+ * ---> [s] ---prefix---> [begin] ---atom---> [end] ----rest---> [rp]
+ * / /
+ * [lp] ----> [s2] ----bypass---------------------
+ *
+ * where bypass is an empty, and prefix is some repetitions of atom
+ */
+ s = newstate(v->nfa); /* first, new endpoints for the atom */
+ s2 = newstate(v->nfa);
+ NOERR();
+ moveouts(v->nfa, lp, s);
+ moveins(v->nfa, rp, s2);
+ NOERR();
+ atom->begin = s;
+ atom->end = s2;
+ s = newstate(v->nfa); /* and spots for prefix and bypass */
+ s2 = newstate(v->nfa);
+ NOERR();
+ EMPTYARC(lp, s);
+ EMPTYARC(lp, s2);
+ NOERR();
+
+ /* break remaining subRE into x{...} and what follows */
+ t = subre(v, '.', COMBINE(qprefer, atom->flags), lp, rp);
+ t->left = atom;
+ atomp = &t->left;
+ /* here we should recurse... but we must postpone that to the end */
+
+ /* split top into prefix and remaining */
+ assert(top->op == '=' && top->left == NULL && top->right == NULL);
+ top->left = subre(v, '=', top->flags, top->begin, lp);
+ top->op = '.';
+ top->right = t;
+
+ /* if it's a backref, now is the time to replicate the subNFA */
+ if (atomtype == BACKREF) {
+ assert(atom->begin->nouts == 1); /* just the EMPTY */
+ delsub(v->nfa, atom->begin, atom->end);
+ assert(v->subs[subno] != NULL);
+ /* and here's why the recursion got postponed: it must */
+ /* wait until the skeleton is filled in, because it may */
+ /* hit a backref that wants to copy the filled-in skeleton */
+ dupnfa(v->nfa, v->subs[subno]->begin, v->subs[subno]->end,
+ atom->begin, atom->end);
+ NOERR();
+ }
+
+ /* it's quantifier time; first, turn x{0,...} into x{1,...}|empty */
+ if (m == 0) {
+ EMPTYARC(s2, atom->end); /* the bypass */
+ assert(PREF(qprefer) != 0);
+ f = COMBINE(qprefer, atom->flags);
+ t = subre(v, '|', f, lp, atom->end);
+ NOERR();
+ t->left = atom;
+ t->right = subre(v, '|', PREF(f), s2, atom->end);
+ NOERR();
+ t->right->left = subre(v, '=', 0, s2, atom->end);
+ NOERR();
+ *atomp = t;
+ atomp = &t->left;
+ m = 1;
}
- if (capture) /* actually a catchall flag */
- return branches;
- freert(v, branches);
- return NULL;
+ /* deal with the rest of the quantifier */
+ if (atomtype == BACKREF) {
+ /* special case: backrefs have internal quantifiers */
+ EMPTYARC(s, atom->begin); /* empty prefix */
+ /* just stuff everything into atom */
+ repeat(v, atom->begin, atom->end, m, n);
+ atom->min = (short)m;
+ atom->max = (short)n;
+ atom->flags |= COMBINE(qprefer, atom->flags);
+ } else if (m == 1 && n == 1) {
+ /* no/vacuous quantifier: done */
+ EMPTYARC(s, atom->begin); /* empty prefix */
+ } else {
+ /* turn x{m,n} into x{m-1,n-1}x, with capturing */
+ /* parens in only second x */
+ dupnfa(v->nfa, atom->begin, atom->end, s, atom->begin);
+ assert(m >= 1 && m != INFINITY && n >= 1);
+ repeat(v, s, atom->begin, m-1, (n == INFINITY) ? n : n-1);
+ f = COMBINE(qprefer, atom->flags);
+ t = subre(v, '.', f, s, atom->end); /* prefix and atom */
+ NOERR();
+ t->left = subre(v, '=', PREF(f), s, atom->begin);
+ NOERR();
+ t->right = atom;
+ *atomp = t;
+ }
+
+ /* and finally, look after that postponed recursion */
+ t = top->right;
+ if (!(SEE('|') || SEE(stopper) || SEE(EOS)))
+ t->right = parsebranch(v, stopper, type, atom->end, rp, 1);
+ else {
+ EMPTYARC(atom->end, rp);
+ t->right = subre(v, '=', 0, atom->end, rp);
+ }
+ assert(SEE('|') || SEE(stopper) || SEE(EOS));
+ t->flags |= COMBINE(t->flags, t->right->flags);
+ top->flags |= COMBINE(top->flags, t->flags);
+}
+
+/*
+ - nonword - generate arcs for non-word-character ahead or behind
+ ^ static VOID nonword(struct vars *, int, struct state *, struct state *);
+ */
+static VOID
+nonword(v, dir, lp, rp)
+struct vars *v;
+int dir; /* AHEAD or BEHIND */
+struct state *lp;
+struct state *rp;
+{
+ int anchor = (dir == AHEAD) ? '$' : '^';
+
+ assert(dir == AHEAD || dir == BEHIND);
+ newarc(v->nfa, anchor, 1, lp, rp);
+ newarc(v->nfa, anchor, 0, lp, rp);
+ colorcomplement(v->nfa, v->cm, dir, v->wordchrs, lp, rp);
+ /* (no need for special attention to \n) */
+}
+
+/*
+ - word - generate arcs for word character ahead or behind
+ ^ static VOID word(struct vars *, int, struct state *, struct state *);
+ */
+static VOID
+word(v, dir, lp, rp)
+struct vars *v;
+int dir; /* AHEAD or BEHIND */
+struct state *lp;
+struct state *rp;
+{
+ assert(dir == AHEAD || dir == BEHIND);
+ cloneouts(v->nfa, v->wordchrs, lp, rp, dir);
+ /* (no need for special attention to \n) */
}
/*
@@ -1163,7 +1187,7 @@ struct state *rp;
NOERR();
bracket(v, left, right);
if (v->cflags&REG_NLSTOP)
- newarc(v->nfa, PLAIN, nlcolor(v), left, right);
+ newarc(v->nfa, PLAIN, v->nlcolor, left, right);
NOERR();
assert(lp->nouts == 0); /* all outarcs will be ours */
@@ -1181,7 +1205,7 @@ struct state *rp;
/* but complementing gets messy in the presence of MCCEs... */
NOTE(REG_ULOCALE);
for (p = v->mcces->chrs, i = v->mcces->nchrs; i > 0; p++, i--) {
- co = getcolor(v->cm, *p);
+ co = GETCOLOR(v->cm, *p);
a = findarc(lp, PLAIN, co);
ba = findarc(left, PLAIN, co);
if (ba == NULL) {
@@ -1391,7 +1415,7 @@ struct cvec *cv;
okcolors(v->nfa, v->cm);
} else {
a = findarc(v->mccepbegin, PLAIN,
- getcolor(v->cm, leader));
+ GETCOLOR(v->cm, leader));
assert(a != NULL);
s = a->to;
assert(s != v->mccepend);
@@ -1438,6 +1462,7 @@ struct state *lp;
struct state *rp;
{
chr ch, from, to;
+ celt ce;
chr *p;
int i;
color co;
@@ -1478,16 +1503,17 @@ struct state *rp;
for (p = cv->ranges, i = cv->nranges; i > 0; p += 2, i--) {
from = *p;
to = *(p+1);
- for (ch = from; ch <= to; ch++)
- if (!ISCELEADER(v, ch))
- newarc(v->nfa, PLAIN, subcolor(v->cm, ch), lp,
- rp);
- else {
- assert(singleton(v->cm, ch));
- assert(leads != NULL);
- if (!haschr(leads, ch))
- addchr(leads, ch);
- }
+ while (from <= to && (ce = nextleader(v, from, to)) != NOCELT) {
+ if (from < ce)
+ subrange(v, from, ce - 1, lp, rp);
+ assert(singleton(v->cm, ce));
+ assert(leads != NULL);
+ if (!haschr(leads, ce))
+ addchr(leads, ce);
+ from = ce + 1;
+ }
+ if (from <= to)
+ subrange(v, from, to, lp, rp);
}
if ((leads == NULL || leads->nchrs == 0) && cv->nmcces == 0)
@@ -1496,7 +1522,7 @@ struct state *rp;
/* deal with the MCCE leaders */
NOTE(REG_ULOCALE);
for (p = leads->chrs, i = leads->nchrs; i > 0; p++, i--) {
- co = getcolor(v->cm, *p);
+ co = GETCOLOR(v->cm, *p);
a = findarc(lp, PLAIN, co);
if (a != NULL)
s = a->to;
@@ -1519,7 +1545,8 @@ struct state *rp;
for (i = 0; i < cv->nmcces; i++) {
p = cv->mcces[i];
assert(singleton(v->cm, *p));
- co = getcolor(v->cm, *p++);
+ ch = *p++;
+ co = GETCOLOR(v->cm, ch);
a = findarc(lp, PLAIN, co);
if (a != NULL)
s = a->to;
@@ -1531,7 +1558,8 @@ struct state *rp;
}
assert(*p != 0); /* at least two chars */
assert(singleton(v->cm, *p));
- co = getcolor(v->cm, *p++);
+ ch = *p++;
+ co = GETCOLOR(v->cm, ch);
assert(*p == 0); /* and only two, for now */
newarc(v->nfa, PLAIN, co, s, rp);
NOERR();
@@ -1539,20 +1567,30 @@ struct state *rp;
}
/*
- - nlcolor - assign newline a unique color, if it doesn't have one already
- * Restriction: can't be called when there are subcolors open. (Maybe
- * this should be enforced...)
- ^ static color nlcolor(struct vars *);
+ - nextleader - find next MCCE leader within range
+ ^ static celt nextleader(struct vars *, pchr, pchr);
*/
-static color
-nlcolor(v)
+static celt /* NOCELT means none */
+nextleader(v, from, to)
struct vars *v;
+pchr from;
+pchr to;
{
- if (v->nlcolor == COLORLESS) {
- v->nlcolor = subcolor(v->cm, newline());
- okcolors(v->nfa, v->cm);
+ int i;
+ chr *p;
+ chr ch;
+ celt it = NOCELT;
+
+ if (v->mcces == NULL)
+ return it;
+
+ for (i = v->mcces->nchrs, p = v->mcces->chrs; i > 0; i--, p++) {
+ ch = *p;
+ if (from <= ch && ch <= to)
+ if (it == NOCELT || ch < it)
+ it = ch;
}
- return v->nlcolor;
+ return it;
}
/*
@@ -1579,6 +1617,7 @@ struct vars *v;
left = newstate(v->nfa);
right = newstate(v->nfa);
NOERR();
+ /* fine point: implemented with [::], and lexer will set REG_ULOCALE */
lexword(v);
NEXT();
assert(v->savenow != NULL && SEE('['));
@@ -1590,273 +1629,169 @@ struct vars *v;
}
/*
- - subre - construct a subre struct
- ^ static struct subre subre(struct state *, struct state *, int, int,
- ^ struct rtree *);
+ - subre - allocate a subre
+ ^ static struct subre *subre(struct vars *, int, int, struct state *,
+ ^ struct state *);
*/
-static struct subre
-subre(begin, end, prefer, subno, tree)
+static struct subre *
+subre(v, op, flags, begin, end)
+struct vars *v;
+int op;
+int flags;
struct state *begin;
struct state *end;
-int prefer;
-int subno;
-struct rtree *tree;
-{
- struct subre ret;
-
- ret.begin = begin;
- ret.end = end;
- ret.prefer = prefer;
- ret.subno = subno;
- ret.min = ret.max = 1;
- ret.tree = tree;
- ZAPCNFA(ret.cnfa);
- return ret;
-}
-
-/*
- - newrt - allocate subRE-tree node
- ^ static struct rtree *newrt(struct vars *);
- */
-static struct rtree *
-newrt(v)
-struct vars *v;
{
- struct rtree *rt;
+ struct subre *ret;
- rt = v->treefree;
- if (rt != NULL)
- v->treefree = rt->next;
+ ret = v->treefree;
+ if (ret != NULL)
+ v->treefree = ret->left;
else {
- rt = (struct rtree *)MALLOC(sizeof(struct rtree));
- if (rt == NULL) {
+ ret = (struct subre *)MALLOC(sizeof(struct subre));
+ if (ret == NULL) {
ERR(REG_ESPACE);
return NULL;
}
- rt->chain = v->treechain;
- v->treechain = rt;
+ ret->chain = v->treechain;
+ v->treechain = ret;
}
- rt->op = '?'; /* invalid */
- rt->flags = 0;
- rt->no = 0;
- rt->left.begin = NULL;
- rt->left.end = NULL;
- rt->left.prefer = NONEYET;
- rt->left.subno = 0;
- rt->left.min = rt->left.max = 1;
- rt->left.tree = NULL;
- ZAPCNFA(rt->left.cnfa);
- rt->right.begin = NULL;
- rt->right.end = NULL;
- rt->right.prefer = NONEYET;
- rt->right.subno = 0;
- rt->right.min = rt->right.max = 1;
- rt->right.tree = NULL;
- ZAPCNFA(rt->right.cnfa);
- rt->next = NULL;
-
- return rt;
+ assert(strchr("|.b(=", op) != NULL);
+
+ ret->op = op;
+ ret->flags = flags;
+ ret->retry = 0;
+ ret->subno = 0;
+ ret->min = ret->max = 1;
+ ret->left = NULL;
+ ret->right = NULL;
+ ret->begin = begin;
+ ret->end = end;
+ ZAPCNFA(ret->cnfa);
+
+ return ret;
}
/*
- - freert - free a subRE subtree
- ^ static VOID freert(struct vars *, struct rtree *);
+ - freesubre - free a subRE subtree
+ ^ static VOID freesubre(struct vars *, struct subre *);
*/
static VOID
-freert(v, rt)
+freesubre(v, sr)
struct vars *v; /* might be NULL */
-struct rtree *rt;
+struct subre *sr;
{
- if (rt == NULL)
+ if (sr == NULL)
return;
- if (rt->left.tree != NULL)
- freert(v, rt->left.tree);
- if (rt->right.tree != NULL)
- freert(v, rt->right.tree);
- if (rt->next != NULL)
- freert(v, rt->next);
+ if (sr->left != NULL)
+ freesubre(v, sr->left);
+ if (sr->right != NULL)
+ freesubre(v, sr->right);
- freertnode(v, rt);
+ freesrnode(v, sr);
}
/*
- - freertnode - free one node in a subRE subtree
- ^ static VOID freertnode(struct vars *, struct rtree *);
+ - freesrnode - free one node in a subRE subtree
+ ^ static VOID freesrnode(struct vars *, struct subre *);
*/
static VOID
-freertnode(v, rt)
+freesrnode(v, sr)
struct vars *v; /* might be NULL */
-struct rtree *rt;
+struct subre *sr;
{
- if (rt == NULL)
+ if (sr == NULL)
return;
- if (!NULLCNFA(rt->left.cnfa))
- freecnfa(&rt->left.cnfa, 0);
- if (!NULLCNFA(rt->right.cnfa))
- freecnfa(&rt->right.cnfa, 0);
- rt->flags = 0;
+ if (!NULLCNFA(sr->cnfa))
+ freecnfa(&sr->cnfa);
+ sr->flags = 0;
if (v != NULL) {
- rt->next = v->treefree;
- v->treefree = rt;
+ sr->left = v->treefree;
+ v->treefree = sr;
} else
- FREE(rt);
+ FREE(sr);
}
/*
- - optrt - optimize a subRE subtree
- ^ static VOID optrt(struct vars *, struct rtree *);
+ - optst - optimize a subRE subtree
+ ^ static VOID optst(struct vars *, struct subre *);
*/
static VOID
-optrt(v, rt)
+optst(v, t)
struct vars *v;
-struct rtree *rt;
+struct subre *t;
{
- struct rtree *t;
- int subno;
-
- if (rt == NULL)
+ if (t == NULL)
return;
- assert(rt->op != 'b');
-
- /* pull up subtrees if possible */
- if (rt->left.begin != NULL && rt->left.tree != NULL &&
- rt->left.tree->op != 'b') {
- t = rt->left.tree;
- optrt(v, t);
- if (t->right.begin == NULL && t->next == NULL &&
- (rt->left.prefer == NONEYET ||
- t->left.prefer == rt->left.prefer) &&
- (rt->left.subno == 0 || t->left.subno == 0)) {
- subno = rt->left.subno;
- rt->left = t->left;
- assert(NULLCNFA(t->left.cnfa));
- freertnode(v, t);
- if (subno != 0) {
- assert(rt->left.subno == 0 && subno > 0);
- rt->left.subno = subno;
- }
- }
- }
- if (rt->right.begin != NULL && rt->right.tree != NULL &&
- rt->right.tree->op != 'b') {
- t = rt->right.tree;
- optrt(v, t);
- if (t->right.begin == NULL && t->next == NULL &&
- (rt->right.prefer == NONEYET ||
- t->right.prefer == rt->right.prefer) &&
- (rt->right.subno == 0 || t->right.subno == 0)) {
- subno = rt->right.subno;
- rt->right = t->left;
- assert(NULLCNFA(t->right.cnfa));
- freertnode(v, t);
- if (subno != 0) {
- assert(rt->right.subno == 0 && subno > 0);
- rt->right.subno = subno;
- }
- }
- }
-
- /* simplify empties */
- if (rt->left.begin != NULL && isempty(rt->left.begin, rt->left.end))
- rt->left.end = rt->left.begin;
- if (rt->right.begin != NULL && isempty(rt->right.begin, rt->right.end))
- rt->right.end = rt->right.begin;
-
- /* if left subtree vacuous and right non-empty, move right over */
- if (rt->left.begin != NULL && rt->left.begin == rt->left.end &&
- rt->left.subno == 0 && rt->left.tree == NULL &&
- rt->right.begin != NULL) {
- rt->left = rt->right;
- rt->right.begin = NULL;
- rt->right.tree = NULL;
- }
-
- /* if right subtree vacuous, clear it out */
- if (rt->right.begin != NULL && rt->right.begin == rt->right.end &&
- rt->right.subno == 0 && rt->right.tree == NULL) {
- rt->right.begin = NULL;
- rt->right.tree = NULL;
- }
/* preference cleanup and analysis */
- if (rt->left.prefer == NONEYET)
- rt->left.prefer = LONGER;
- if (rt->left.prefer == SHORTER)
+ if (t->flags&SHORTER)
v->usedshorter = 1;
- if (rt->right.begin != NULL) {
- if (rt->right.prefer == NONEYET)
- rt->right.prefer = LONGER;
- if (rt->right.prefer == SHORTER)
- v->usedshorter = 1;
- }
- /* recurse through alternatives */
- if (rt->next != NULL)
- optrt(v, rt->next);
+ /* recurse through children */
+ if (t->left != NULL)
+ optst(v, t->left);
+ if (t->right != NULL)
+ optst(v, t->right);
}
/*
- - numrt - number tree nodes
- ^ static int numrt(struct rtree *, int);
+ - numst - number tree nodes (assigning retry indexes)
+ ^ static int numst(struct subre *, int);
*/
static int /* next number */
-numrt(rt, start)
-struct rtree *rt;
+numst(t, start)
+struct subre *t;
int start; /* starting point for subtree numbers */
{
int i;
- assert(rt != NULL);
+ assert(t != NULL);
i = start;
- rt->no = (short)i++;
- if (rt->left.tree != NULL)
- i = numrt(rt->left.tree, i);
- if (rt->right.tree != NULL)
- i = numrt(rt->right.tree, i);
- if (rt->next != NULL)
- i = numrt(rt->next, i);
+ t->retry = (short)i++;
+ if (t->left != NULL)
+ i = numst(t->left, i);
+ if (t->right != NULL)
+ i = numst(t->right, i);
return i;
}
/*
- - markrt - mark tree nodes as INUSE
- ^ static VOID markrt(struct rtree *);
+ - markst - mark tree nodes as INUSE
+ ^ static VOID markst(struct subre *);
*/
static VOID
-markrt(rt)
-struct rtree *rt;
+markst(t)
+struct subre *t;
{
- assert(rt != NULL);
-
- rt->flags |= INUSE;
- if (rt->left.tree != NULL)
- markrt(rt->left.tree);
- if (rt->right.tree != NULL)
- markrt(rt->right.tree);
- if (rt->next != NULL)
- markrt(rt->next);
+ assert(t != NULL);
+
+ t->flags |= INUSE;
+ if (t->left != NULL)
+ markst(t->left);
+ if (t->right != NULL)
+ markst(t->right);
}
/*
- - cleanrt - free any tree nodes not marked INUSE
- ^ static VOID cleanrt(struct vars *);
+ - cleanst - free any tree nodes not marked INUSE
+ ^ static VOID cleanst(struct vars *);
*/
static VOID
-cleanrt(v)
+cleanst(v)
struct vars *v;
{
- struct rtree *rt;
- struct rtree *next;
+ struct subre *t;
+ struct subre *next;
- for (rt = v->treechain; rt != NULL; rt = next) {
- next = rt->next;
- if (!(rt->flags&INUSE))
- FREE(rt);
+ for (t = v->treechain; t != NULL; t = next) {
+ next = t->chain;
+ if (!(t->flags&INUSE))
+ FREE(t);
}
v->treechain = NULL;
v->treefree = NULL; /* just on general principles */
@@ -1864,56 +1799,51 @@ struct vars *v;
/*
- nfatree - turn a subRE subtree into a tree of compacted NFAs
- ^ static VOID nfatree(struct vars *, struct rtree *, FILE *);
+ ^ static int nfatree(struct vars *, struct subre *, FILE *);
*/
-static VOID
-nfatree(v, rt, f)
+static int /* optimize results from top node */
+nfatree(v, t, f)
struct vars *v;
-struct rtree *rt;
+struct subre *t;
FILE *f; /* for debug output */
{
- if (rt == NULL)
- return;
+ assert(t != NULL && t->begin != NULL);
- if (rt->left.begin != NULL)
- nfanode(v, &rt->left, f);
- if (rt->left.tree != NULL)
- nfatree(v, rt->left.tree, f);
+ if (t->left != NULL)
+ (DISCARD)nfatree(v, t->left, f);
+ if (t->right != NULL)
+ (DISCARD)nfatree(v, t->right, f);
- if (rt->right.begin != NULL)
- nfanode(v, &rt->right, f);
- if (rt->right.tree != NULL)
- nfatree(v, rt->right.tree, f);
-
- if (rt->next != NULL)
- nfatree(v, rt->next, f);
+ return nfanode(v, t, f);
}
/*
- nfanode - do one NFA for nfatree
- ^ static VOID nfanode(struct vars *, struct subre *, FILE *);
+ ^ static int nfanode(struct vars *, struct subre *, FILE *);
*/
-static VOID
-nfanode(v, sub, f)
+static int /* optimize results */
+nfanode(v, t, f)
struct vars *v;
-struct subre *sub;
+struct subre *t;
FILE *f; /* for debug output */
{
struct nfa *nfa;
+ int ret = 0;
- if (sub->begin == NULL)
- return;
+ assert(t->begin != NULL);
nfa = newnfa(v, v->cm, v->nfa);
- NOERR();
- dupnfa(nfa, sub->begin, sub->end, nfa->init, nfa->final);
+ NOERRZ();
+ dupnfa(nfa, t->begin, t->end, nfa->init, nfa->final);
if (!ISERR()) {
specialcolors(nfa);
- (DISCARD) optimize(nfa, f);
+ ret = optimize(nfa, f);
}
if (!ISERR())
- compact(nfa, &sub->cnfa);
+ compact(nfa, &t->cnfa);
+
freenfa(nfa);
+ return ret;
}
/*
@@ -1964,9 +1894,9 @@ int n;
int i;
assert(n > 0);
- for (sub = subs + 1, i = n - 1; i > 0; sub++, i--)
+ for (sub = subs + 1, i = n - 1; i > 0; sub++, i--) /* no 0th */
if (!NULLCNFA(sub->cnfa))
- freecnfa(&sub->cnfa, 0);
+ freecnfa(&sub->cnfa);
FREE(subs);
}
@@ -1988,14 +1918,13 @@ regex_t *re;
re->re_guts = NULL;
re->re_fns = NULL;
g->magic = 0;
- if (!NULLCNFA(g->cnfa))
- freecnfa(&g->cnfa, 0);
- if (g->cm != NULL)
- freecm(g->cm);
+ freecm(&g->cmap);
if (g->tree != NULL)
- freert((struct vars *)NULL, g->tree);
+ freesubre((struct vars *)NULL, g->tree);
if (g->lacons != NULL)
freelacons(g->lacons, g->nlacons);
+ if (!NULLCNFA(g->search))
+ freecnfa(&g->search);
FREE(g);
}
@@ -2028,41 +1957,44 @@ FILE *f;
re->re_nsub, re->re_info, re->re_csize, g->ntree,
g->usedshorter);
- dumpcolors(g->cm, f);
- dumpcnfa(&g->cnfa, f);
+ dumpcolors(&g->cmap, f);
+ if (!NULLCNFA(g->search)) {
+ printf("search:\n");
+ dumpcnfa(&g->search, f);
+ }
for (i = 1; i < g->nlacons; i++) {
fprintf(f, "la%d (%s):\n", i,
(g->lacons[i].subno) ? "positive" : "negative");
dumpcnfa(&g->lacons[i].cnfa, f);
}
- dumprt(g->tree, f, 0);
+ dumpst(g->tree, f, 0);
#endif
}
/*
- - dumprt - dump a subRE tree
- ^ static VOID dumprt(struct rtree *, FILE *, int);
+ - dumpst - dump a subRE tree
+ ^ static VOID dumpst(struct subre *, FILE *, int);
*/
static VOID
-dumprt(rt, f, nfapresent)
-struct rtree *rt;
+dumpst(t, f, nfapresent)
+struct subre *t;
FILE *f;
int nfapresent; /* is the original NFA still around? */
{
- if (rt == NULL)
+ if (t == NULL)
fprintf(f, "null tree\n");
else
- rtdump(rt, f, nfapresent, 0);
+ stdump(t, f, nfapresent, 0);
fflush(f);
}
/*
- - rtdump - recursive guts of dumprt
- ^ static VOID rtdump(struct rtree *, FILE *, int, int);
+ - stdump - recursive guts of dumpst
+ ^ static VOID stdump(struct subre *, FILE *, int, int);
*/
static VOID
-rtdump(rt, f, nfapresent, level)
-struct rtree *rt;
+stdump(t, f, nfapresent, level)
+struct subre *t;
FILE *f;
int nfapresent; /* is the original NFA still around? */
int level;
@@ -2072,102 +2004,41 @@ int level;
for (i = 0; i < level; i++)
fprintf(f, RTSEP);
- fprintf(f, "%c (n%d) {\n", rt->op, rt->no);
- if (rt->left.begin != NULL) {
- for (i = 0; i < level+1; i++)
- fprintf(f, RTSEP);
- fprintf(f, "L");
- fprintf(f, "%s", (rt->left.prefer == NONEYET) ? "-" :
- ((rt->left.prefer == LONGER) ? ">" : "<"));
- if (nfapresent)
- fprintf(f, "%ld-%ld", (long)rt->left.begin->no,
- (long)rt->left.end->no);
- if (rt->left.subno > 0)
- fprintf(f, " (%d)", rt->left.subno);
- else if (rt->left.subno < 0) {
- fprintf(f, " \\%d", -rt->left.subno);
- if (rt->left.min != 1 || rt->left.max != 1) {
- fprintf(f, "{%d-", (int)rt->left.min);
- if (rt->left.max != INFINITY)
- fprintf(f, "%d", (int)rt->left.max);
- fprintf(f, "}");
- }
- if (rt->left.tree != NULL)
- fprintf(f, "(nonNULL tree!!)");
- }
- if (rt->left.tree != NULL || !NULLCNFA(rt->left.cnfa))
- fprintf(f, ":");
- fprintf(f, "\n");
- if (!NULLCNFA(rt->left.cnfa))
- dumpcnfa(&rt->left.cnfa, f);
- if (rt->left.tree != NULL)
- rtdump(rt->left.tree, f, nfapresent, level+1);
- } else if (rt->op == 'b') {
- for (i = 0; i < level+1; i++)
- fprintf(f, RTSEP);
+ fprintf(f, "%c (", t->op);
+ if (t->flags&LONGER)
fprintf(f, "L");
- fprintf(f, "%s", (rt->left.prefer == NONEYET) ? "-" :
- ((rt->left.prefer == LONGER) ? ">" : "<"));
- assert(rt->left.subno < 0);
- fprintf(f, " \\%d", -rt->left.subno);
- if (rt->left.min != 1 || rt->left.max != 1) {
- fprintf(f, "{%d-", (int)rt->left.min);
- if (rt->left.max != INFINITY)
- fprintf(f, "%d", (int)rt->left.max);
- fprintf(f, "}");
- }
- if (rt->left.tree != NULL)
- fprintf(f, "(nonNULL tree!!)");
- fprintf(f, "\n");
- }
-
- if (rt->right.begin != NULL) {
- if (rt->op != ',')
- fprintf(f, "op %c has non-NULL right tree\n", rt->op);
- for (i = 0; i < level+1; i++)
- fprintf(f, RTSEP);
- fprintf(f, "R");
- fprintf(f, "%s", (rt->right.prefer == NONEYET) ? "-" :
- ((rt->right.prefer == LONGER) ? ">" : "<"));
- if (nfapresent)
- fprintf(f, "%ld-%ld", (long)rt->right.begin->no,
- (long)rt->right.end->no);
- if (rt->right.subno > 0)
- fprintf(f, " (%d)", rt->right.subno);
- else if (rt->right.subno < 0) {
- fprintf(f, " \\%d", -rt->right.subno);
- if (rt->right.min != 1 || rt->right.max != 1) {
- fprintf(f, "{%d-", (int)rt->right.min);
- if (rt->right.max != INFINITY)
- fprintf(f, "%d", (int)rt->right.max);
- fprintf(f, "}");
- }
- if (rt->right.tree != NULL)
- fprintf(f, "(nonNULL tree!!)");
- }
- if (rt->right.tree != NULL || !NULLCNFA(rt->right.cnfa))
- fprintf(f, ":");
- fprintf(f, "\n");
- if (!NULLCNFA(rt->right.cnfa))
- dumpcnfa(&rt->right.cnfa, f);
- if (rt->right.tree != NULL)
- rtdump(rt->right.tree, f, nfapresent, level+1);
- }
- for (i = 0; i < level; i++)
- fprintf(f, RTSEP);
- fprintf(f, "}\n");
-
- if (rt->next != NULL) {
- if (rt->op != '|')
- fprintf(f, "op %c has non-NULL next\n", rt->op);
- if (rt->next->op != rt->op)
- fprintf(f, "next op %c, expecting %c\n", rt->next->op,
- rt->op);
- rtdump(rt->next, f, nfapresent, level);
+ if (t->flags&SHORTER)
+ fprintf(f, "S");
+ if (t->flags&MIXED)
+ fprintf(f, "M");
+ if (t->flags&CAP)
+ fprintf(f, "c");
+ if (t->flags&BACKR)
+ fprintf(f, "b");
+ if (!(t->flags&INUSE))
+ fprintf(f, "!u");
+ fprintf(f, ") r%d", t->retry);
+ if (t->subno != 0)
+ fprintf(f, " #%d", t->subno);
+ if (t->min != 1 || t->max != 1) {
+ fprintf(f, "{%d,", t->min);
+ if (t->max != INFINITY)
+ fprintf(f, "%d", t->max);
+ fprintf(f, "}");
}
+ if (nfapresent)
+ fprintf(f, " %ld-%ld", (long)t->begin->no, (long)t->end->no);
+ if (!NULLCNFA(t->cnfa))
+ fprintf(f, ":");
+ fprintf(f, "\n");
+ if (t->left != NULL)
+ stdump(t->left, f, nfapresent, level+1);
+ if (!NULLCNFA(t->cnfa))
+ dumpcnfa(&t->cnfa, f);
+ if (t->right != NULL)
+ stdump(t->right, f, nfapresent, level+1);
}
-#define COMPILE 1
#include "regc_lex.c"
#include "regc_color.c"
#include "regc_nfa.c"