/* * NFA utilities. * This file is #included by regcomp.c. * * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. * * Development of this software was funded, in part, by Cray Research Inc., * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics * Corporation, none of whom are responsible for the results. The author * thanks all of them. * * Redistribution and use in source and binary forms -- with or without * modification -- are permitted for any purpose, provided that * redistributions in source form retain this entire copyright notice and * indicate the origin and nature of any modifications. * * I'd appreciate being given credit for this package in the documentation * of software which uses it, but that is not a requirement. * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * * * One or two things that technically ought to be in here * are actually in color.c, thanks to some incestuous relationships in * the color chains. */ #define NISERR() VISERR(nfa->v) #define NERR(e) VERR(nfa->v, (e)) /* - newnfa - set up an NFA ^ static struct nfa *newnfa(struct vars *, struct colormap *, struct nfa *); */ static struct nfa * /* the NFA, or NULL */ newnfa(v, cm, parent) struct vars *v; struct colormap *cm; struct nfa *parent; /* NULL if primary NFA */ { struct nfa *nfa; nfa = (struct nfa *)MALLOC(sizeof(struct nfa)); if (nfa == NULL) return NULL; nfa->states = NULL; nfa->slast = NULL; nfa->free = NULL; nfa->nstates = 0; nfa->cm = cm; nfa->v = v; nfa->size = 0; nfa->bos[0] = nfa->bos[1] = COLORLESS; nfa->eos[0] = nfa->eos[1] = COLORLESS; nfa->parent = parent; nfa->post = newfstate(nfa, '@'); /* number 0 */ nfa->pre = newfstate(nfa, '>'); /* number 1 */ nfa->init = newstate(nfa); /* may become invalid later */ nfa->final = newstate(nfa); if (ISERR()) { freenfa(nfa); return NULL; } rainbow(nfa, nfa->cm, PLAIN, COLORLESS, nfa->pre, nfa->init); newarc(nfa, '^', 1, nfa->pre, nfa->init); newarc(nfa, '^', 0, nfa->pre, nfa->init); rainbow(nfa, nfa->cm, PLAIN, COLORLESS, nfa->final, nfa->post); newarc(nfa, '$', 1, nfa->final, nfa->post); newarc(nfa, '$', 0, nfa->final, nfa->post); if (ISERR()) { freenfa(nfa); return NULL; } return nfa; } /* - too_many_states - checks if the max states exceeds the compile-time value ^ static int too_many_states(struct nfa *); */ static int too_many_states(nfa) struct nfa *nfa; { struct nfa *parent = nfa->parent; size_t sz = nfa->size; while (parent != NULL) { sz = parent->size; parent = parent->parent; } if (sz > REG_MAX_STATES) return 1; return 0; } /* - increment_size - increases the tracked size of the NFA and its parents. ^ static void increment_size(struct nfa *); */ static void increment_size(nfa) struct nfa *nfa; { struct nfa *parent = nfa->parent; nfa->size++; while (parent != NULL) { parent->size++; parent = parent->parent; } } /* - decrement_size - increases the tracked size of the NFA and its parents. ^ static void decrement_size(struct nfa *); */ static void decrement_size(nfa) struct nfa *nfa; { struct nfa *parent = nfa->parent; nfa->size--; while (parent != NULL) { parent->size--; parent = parent->parent; } } /* - freenfa - free an entire NFA ^ static VOID freenfa(struct nfa *); */ static VOID freenfa(nfa) struct nfa *nfa; { struct state *s; while ((s = nfa->states) != NULL) { s->nins = s->nouts = 0; /* don't worry about arcs */ freestate(nfa, s); } while ((s = nfa->free) != NULL) { nfa->free = s->next; destroystate(nfa, s); } nfa->slast = NULL; nfa->nstates = -1; nfa->pre = NULL; nfa->post = NULL; FREE(nfa); } /* - newstate - allocate an NFA state, with zero flag value ^ static struct state *newstate(struct nfa *); */ static struct state * /* NULL on error */ newstate(nfa) struct nfa *nfa; { struct state *s; if (too_many_states(nfa)) { /* XXX: add specific error for this */ NERR(REG_ETOOBIG); return NULL; } if (nfa->free != NULL) { s = nfa->free; nfa->free = s->next; } else { s = (struct state *)MALLOC(sizeof(struct state)); if (s == NULL) { NERR(REG_ESPACE); return NULL; } s->oas.next = NULL; s->free = NULL; s->noas = 0; } assert(nfa->nstates >= 0); s->no = nfa->nstates++; s->flag = 0; if (nfa->states == NULL) nfa->states = s; s->nins = 0; s->ins = NULL; s->nouts = 0; s->outs = NULL; s->tmp = NULL; s->next = NULL; if (nfa->slast != NULL) { assert(nfa->slast->next == NULL); nfa->slast->next = s; } s->prev = nfa->slast; nfa->slast = s; /* Track the current size and the parent size */ increment_size(nfa); return s; } /* - newfstate - allocate an NFA state with a specified flag value ^ static struct state *newfstate(struct nfa *, int flag); */ static struct state * /* NULL on error */ newfstate(nfa, flag) struct nfa *nfa; int flag; { struct state *s; s = newstate(nfa); if (s != NULL) s->flag = (char)flag; return s; } /* - dropstate - delete a state's inarcs and outarcs and free it ^ static VOID dropstate(struct nfa *, struct state *); */ static VOID dropstate(nfa, s) struct nfa *nfa; struct state *s; { struct arc *a; while ((a = s->ins) != NULL) freearc(nfa, a); while ((a = s->outs) != NULL) freearc(nfa, a); freestate(nfa, s); } /* - freestate - free a state, which has no in-arcs or out-arcs ^ static VOID freestate(struct nfa *, struct state *); */ static VOID freestate(nfa, s) struct nfa *nfa; struct state *s; { assert(s != NULL); assert(s->nins == 0 && s->nouts == 0); s->no = FREESTATE; s->flag = 0; if (s->next != NULL) s->next->prev = s->prev; else { assert(s == nfa->slast); nfa->slast = s->prev; } if (s->prev != NULL) s->prev->next = s->next; else { assert(s == nfa->states); nfa->states = s->next; } s->prev = NULL; s->next = nfa->free; /* don't delete it, put it on the free list */ nfa->free = s; decrement_size(nfa); } /* - destroystate - really get rid of an already-freed state ^ static VOID destroystate(struct nfa *, struct state *); */ static VOID destroystate(nfa, s) struct nfa *nfa; struct state *s; { struct arcbatch *ab; struct arcbatch *abnext; assert(s->no == FREESTATE); for (ab = s->oas.next; ab != NULL; ab = abnext) { abnext = ab->next; FREE(ab); } s->ins = NULL; s->outs = NULL; s->next = NULL; FREE(s); } /* - newarc - set up a new arc within an NFA ^ static VOID newarc(struct nfa *, int, pcolor, struct state *, ^ struct state *); */ static VOID newarc(nfa, t, co, from, to) struct nfa *nfa; int t; pcolor co; struct state *from; struct state *to; { struct arc *a; assert(from != NULL && to != NULL); /* check for duplicates */ for (a = from->outs; a != NULL; a = a->outchain) if (a->to == to && a->co == co && a->type == t) return; a = allocarc(nfa, from); if (NISERR()) return; assert(a != NULL); a->type = t; a->co = (color)co; a->to = to; a->from = from; /* * Put the new arc on the beginning, not the end, of the chains. * Not only is this easier, it has the very useful side effect that * deleting the most-recently-added arc is the cheapest case rather * than the most expensive one. */ a->inchain = to->ins; to->ins = a; a->outchain = from->outs; from->outs = a; from->nouts++; to->nins++; if (COLORED(a) && nfa->parent == NULL) colorchain(nfa->cm, a); return; } /* - allocarc - allocate a new out-arc within a state ^ static struct arc *allocarc(struct nfa *, struct state *); */ static struct arc * /* NULL for failure */ allocarc(nfa, s) struct nfa *nfa; struct state *s; { struct arc *a; struct arcbatch *new; int i; /* shortcut */ if (s->free == NULL && s->noas < ABSIZE) { a = &s->oas.a[s->noas]; s->noas++; return a; } /* if none at hand, get more */ if (s->free == NULL) { new = (struct arcbatch *)MALLOC(sizeof(struct arcbatch)); if (new == NULL) { NERR(REG_ESPACE); return NULL; } new->next = s->oas.next; s->oas.next = new; for (i = 0; i < ABSIZE; i++) { new->a[i].type = 0; new->a[i].freechain = &new->a[i+1]; } new->a[ABSIZE-1].freechain = NULL; s->free = &new->a[0]; } assert(s->free != NULL); a = s->free; s->free = a->freechain; return a; } /* - freearc - free an arc ^ static VOID freearc(struct nfa *, struct arc *); */ static VOID freearc(nfa, victim) struct nfa *nfa; struct arc *victim; { struct state *from = victim->from; struct state *to = victim->to; struct arc *a; assert(victim->type != 0); /* take it off color chain if necessary */ if (COLORED(victim) && nfa->parent == NULL) uncolorchain(nfa->cm, victim); /* take it off source's out-chain */ assert(from != NULL); assert(from->outs != NULL); a = from->outs; if (a == victim) /* simple case: first in chain */ from->outs = victim->outchain; else { for (; a != NULL && a->outchain != victim; a = a->outchain) continue; assert(a != NULL); a->outchain = victim->outchain; } from->nouts--; /* take it off target's in-chain */ assert(to != NULL); assert(to->ins != NULL); a = to->ins; if (a == victim) /* simple case: first in chain */ to->ins = victim->inchain; else { for (; a != NULL && a->inchain != victim; a = a->inchain) continue; assert(a != NULL); a->inchain = victim->inchain; } to->nins--; /* clean up and place on free list */ victim->type = 0; victim->from = NULL; /* precautions... */ victim->to = NULL; victim->inchain = NULL; victim->outchain = NULL; victim->freechain = from->free; from->free = victim; } /* - nonemptyouts - count non-EMPTY out arcs of a state ^ static int nonemptyouts(struct state *); */ static int nonemptyouts(s) struct state *s; { int n = 0; struct arc *a; for (a = s->outs; a != NULL; a = a->outchain) { if (a->type != EMPTY) n++; } return n; } /* - nonemptyins - count non-EMPTY in arcs of a state ^ static int nonemptyins(struct state *); */ static int nonemptyins(s) struct state *s; { int n = 0; struct arc *a; for (a = s->ins; a != NULL; a = a->inchain) { if (a->type != EMPTY) n++; } return n; } /* - findarc - find arc, if any, from given source with given type and color * If there is more than one such arc, the result is random. ^ static struct arc *findarc(struct state *, int, pcolor); */ static struct arc * findarc(s, type, co) struct state *s; int type; pcolor co; { struct arc *a; for (a = s->outs; a != NULL; a = a->outchain) if (a->type == type && a->co == co) return a; return NULL; } /* - cparc - allocate a new arc within an NFA, copying details from old one ^ static VOID cparc(struct nfa *, struct arc *, struct state *, ^ struct state *); */ static VOID cparc(nfa, oa, from, to) struct nfa *nfa; struct arc *oa; struct state *from; struct state *to; { newarc(nfa, oa->type, oa->co, from, to); } /* - moveins - move all in arcs of a state to another state * You might think this could be done better by just updating the * existing arcs, and you would be right if it weren't for the desire * for duplicate suppression, which makes it easier to just make new * ones to exploit the suppression built into newarc. ^ static VOID moveins(struct nfa *, struct state *, struct state *); */ static VOID moveins(nfa, old, new) struct nfa *nfa; struct state *old; struct state *new; { struct arc *a; assert(old != new); while ((a = old->ins) != NULL) { cparc(nfa, a, a->from, new); freearc(nfa, a); } assert(old->nins == 0); assert(old->ins == NULL); } /* - copyins - copy all in arcs of a state to another state ^ static VOID copyins(struct nfa *, struct state *, struct state *); */ static VOID copyins(nfa, old, new) struct nfa *nfa; struct state *old; struct state *new; { struct arc *a; assert(old != new); for (a = old->ins; a != NULL; a = a->inchain) cparc(nfa, a, a->from, new); } /* - copynonemptyins - as above, but ignore empty arcs ^ static void copynonemptyins(struct nfa *, struct state *, struct state *); */ static VOID copynonemptyins(nfa, oldState, newState) struct nfa *nfa; struct state *oldState; struct state *newState; { struct arc *a; assert(oldState != newState); for (a=oldState->ins ; a!=NULL ; a=a->inchain) { if (a->type != EMPTY) cparc(nfa, a, a->from, newState); } } /* - moveouts - move all out arcs of a state to another state ^ static VOID moveouts(struct nfa *, struct state *, struct state *); */ static VOID moveouts(nfa, old, new) struct nfa *nfa; struct state *old; struct state *new; { struct arc *a; assert(old != new); while ((a = old->outs) != NULL) { cparc(nfa, a, new, a->to); freearc(nfa, a); } } /* - copyouts - copy all out arcs of a state to another state ^ static VOID copyouts(struct nfa *, struct state *, struct state *); */ static VOID copyouts(nfa, old, new) struct nfa *nfa; struct state *old; struct state *new; { struct arc *a; assert(old != new); for (a = old->outs; a != NULL; a = a->outchain) cparc(nfa, a, new, a->to); } /* - copynonemptyouts - as above, but ignore empty arcs ^ static void copynonemptyouts(struct nfa *, struct state *, struct state *); */ static VOID copynonemptyouts(nfa, oldState, newState) struct nfa *nfa; struct state *oldState; struct state *newState; { struct arc *a; assert(oldState != newState); for (a=oldState->outs ; a!=NULL ; a=a->outchain) { if (a->type != EMPTY) cparc(nfa, a, newState, a->to); } } /* - cloneouts - copy out arcs of a state to another state pair, modifying type ^ static VOID cloneouts(struct nfa *, struct state *, struct state *, ^ struct state *, int); */ static VOID cloneouts(nfa, old, from, to, type) struct nfa *nfa; struct state *old; struct state *from; struct state *to; int type; { struct arc *a; assert(old != from); for (a = old->outs; a != NULL; a = a->outchain) newarc(nfa, type, a->co, from, to); } /* - delsub - delete a sub-NFA, updating subre pointers if necessary * This uses a recursive traversal of the sub-NFA, marking already-seen * states using their tmp pointer. ^ static VOID delsub(struct nfa *, struct state *, struct state *); */ static VOID delsub(nfa, lp, rp) struct nfa *nfa; struct state *lp; /* the sub-NFA goes from here... */ struct state *rp; /* ...to here, *not* inclusive */ { assert(lp != rp); rp->tmp = rp; /* mark end */ deltraverse(nfa, lp, lp); assert(lp->nouts == 0 && rp->nins == 0); /* did the job */ assert(lp->no != FREESTATE && rp->no != FREESTATE); /* no more */ rp->tmp = NULL; /* unmark end */ lp->tmp = NULL; /* and begin, marked by deltraverse */ } /* - deltraverse - the recursive heart of delsub * This routine's basic job is to destroy all out-arcs of the state. ^ static VOID deltraverse(struct nfa *, struct state *, struct state *); */ static VOID deltraverse(nfa, leftend, s) struct nfa *nfa; struct state *leftend; struct state *s; { struct arc *a; struct state *to; if (s->nouts == 0) return; /* nothing to do */ if (s->tmp != NULL) return; /* already in progress */ s->tmp = s; /* mark as in progress */ while ((a = s->outs) != NULL) { to = a->to; deltraverse(nfa, leftend, to); assert(to->nouts == 0 || to->tmp != NULL); freearc(nfa, a); if (to->nins == 0 && to->tmp == NULL) { assert(to->nouts == 0); freestate(nfa, to); } } assert(s->no != FREESTATE); /* we're still here */ assert(s == leftend || s->nins != 0); /* and still reachable */ assert(s->nouts == 0); /* but have no outarcs */ s->tmp = NULL; /* we're done here */ } /* - dupnfa - duplicate sub-NFA * Another recursive traversal, this time using tmp to point to duplicates * as well as mark already-seen states. (You knew there was a reason why * it's a state pointer, didn't you? :-)) ^ static VOID dupnfa(struct nfa *, struct state *, struct state *, ^ struct state *, struct state *); */ static VOID dupnfa(nfa, start, stop, from, to) struct nfa *nfa; struct state *start; /* duplicate of subNFA starting here */ struct state *stop; /* and stopping here */ struct state *from; /* stringing duplicate from here */ struct state *to; /* to here */ { if (start == stop) { newarc(nfa, EMPTY, 0, from, to); return; } stop->tmp = to; duptraverse(nfa, start, from); /* done, except for clearing out the tmp pointers */ stop->tmp = NULL; cleartraverse(nfa, start); } /* - duptraverse - recursive heart of dupnfa ^ static VOID duptraverse(struct nfa *, struct state *, struct state *); */ static VOID duptraverse(nfa, s, stmp) struct nfa *nfa; struct state *s; struct state *stmp; /* s's duplicate, or NULL */ { struct arc *a; if (s->tmp != NULL) return; /* already done */ s->tmp = (stmp == NULL) ? newstate(nfa) : stmp; if (s->tmp == NULL) { assert(NISERR()); return; } for (a = s->outs; a != NULL && !NISERR(); a = a->outchain) { duptraverse(nfa, a->to, (struct state *)NULL); if (NISERR()) break; assert(a->to->tmp != NULL); cparc(nfa, a, s->tmp, a->to->tmp); } } /* - cleartraverse - recursive cleanup for algorithms that leave tmp ptrs set ^ static VOID cleartraverse(struct nfa *, struct state *); */ static VOID cleartraverse(nfa, s) struct nfa *nfa; struct state *s; { struct arc *a; if (s->tmp == NULL) return; s->tmp = NULL; for (a = s->outs; a != NULL; a = a->outchain) cleartraverse(nfa, a->to); } /* - specialcolors - fill in special colors for an NFA ^ static VOID specialcolors(struct nfa *); */ static VOID specialcolors(nfa) struct nfa *nfa; { /* false colors for BOS, BOL, EOS, EOL */ if (nfa->parent == NULL) { nfa->bos[0] = pseudocolor(nfa->cm); nfa->bos[1] = pseudocolor(nfa->cm); nfa->eos[0] = pseudocolor(nfa->cm); nfa->eos[1] = pseudocolor(nfa->cm); } else { assert(nfa->parent->bos[0] != COLORLESS); nfa->bos[0] = nfa->parent->bos[0]; assert(nfa->parent->bos[1] != COLORLESS); nfa->bos[1] = nfa->parent->bos[1]; assert(nfa->parent->eos[0] != COLORLESS); nfa->eos[0] = nfa->parent->eos[0]; assert(nfa->parent->eos[1] != COLORLESS); nfa->eos[1] = nfa->parent->eos[1]; } } /* - optimize - optimize an NFA ^ static long optimize(struct nfa *, FILE *); */ static long /* re_info bits */ optimize(nfa, f) struct nfa *nfa; FILE *f; /* for debug output; NULL none */ { int verbose = (f != NULL) ? 1 : 0; if (verbose) fprintf(f, "\ninitial cleanup:\n"); cleanup(nfa); /* may simplify situation */ if (verbose) dumpnfa(nfa, f); if (verbose) fprintf(f, "\nempties:\n"); fixempties(nfa, f); /* get rid of EMPTY arcs */ if (verbose) fprintf(f, "\nconstraints:\n"); pullback(nfa, f); /* pull back constraints backward */ pushfwd(nfa, f); /* push fwd constraints forward */ if (verbose) fprintf(f, "\nfinal cleanup:\n"); cleanup(nfa); /* final tidying */ return analyze(nfa); /* and analysis */ } /* - pullback - pull back constraints backward to (with luck) eliminate them ^ static VOID pullback(struct nfa *, FILE *); */ static VOID pullback(nfa, f) struct nfa *nfa; FILE *f; /* for debug output; NULL none */ { struct state *s; struct state *nexts; struct arc *a; struct arc *nexta; int progress; /* find and pull until there are no more */ do { progress = 0; for (s = nfa->states; s != NULL && !NISERR(); s = nexts) { nexts = s->next; for (a = s->outs; a != NULL && !NISERR(); a = nexta) { nexta = a->outchain; if (a->type == '^' || a->type == BEHIND) if (pull(nfa, a)) progress = 1; assert(nexta == NULL || s->no != FREESTATE); } } if (progress && f != NULL) dumpnfa(nfa, f); } while (progress && !NISERR()); if (NISERR()) return; for (a = nfa->pre->outs; a != NULL; a = nexta) { nexta = a->outchain; if (a->type == '^') { assert(a->co == 0 || a->co == 1); newarc(nfa, PLAIN, nfa->bos[a->co], a->from, a->to); freearc(nfa, a); } } } /* - pull - pull a back constraint backward past its source state * A significant property of this function is that it deletes at most * one state -- the constraint's from state -- and only if the constraint * was that state's last outarc. ^ static int pull(struct nfa *, struct arc *); */ static int /* 0 couldn't, 1 could */ pull(nfa, con) struct nfa *nfa; struct arc *con; { struct state *from = con->from; struct state *to = con->to; struct arc *a; struct arc *nexta; struct state *s; if (from == to) { /* circular constraint is pointless */ freearc(nfa, con); return 1; } if (from->flag) /* can't pull back beyond start */ return 0; if (from->nins == 0) { /* unreachable */ freearc(nfa, con); return 1; } /* * DGP 2007-11-15: Cloning a state with a circular constraint on its * list of outs can lead to trouble [Bug 1810038], so get rid of them * first. */ for (a = from->outs; a != NULL; a = nexta) { nexta = a->outchain; switch (a->type) { case '^': case '$': case BEHIND: case AHEAD: if (from == a->to) { freearc(nfa, a); } break; } } /* first, clone from state if necessary to avoid other outarcs */ if (from->nouts > 1) { s = newstate(nfa); if (NISERR()) return 0; assert(to != from); /* con is not an inarc */ copyins(nfa, from, s); /* duplicate inarcs */ cparc(nfa, con, s, to); /* move constraint arc */ freearc(nfa, con); from = s; con = from->outs; } assert(from->nouts == 1); /* propagate the constraint into the from state's inarcs */ for (a = from->ins; a != NULL; a = nexta) { nexta = a->inchain; switch (combine(con, a)) { case INCOMPATIBLE: /* destroy the arc */ freearc(nfa, a); break; case SATISFIED: /* no action needed */ break; case COMPATIBLE: /* swap the two arcs, more or less */ s = newstate(nfa); if (NISERR()) return 0; cparc(nfa, a, s, to); /* anticipate move */ cparc(nfa, con, a->from, s); if (NISERR()) return 0; freearc(nfa, a); break; default: assert(NOTREACHED); break; } } /* remaining inarcs, if any, incorporate the constraint */ moveins(nfa, from, to); dropstate(nfa, from); /* will free the constraint */ return 1; } /* - pushfwd - push forward constraints forward to (with luck) eliminate them ^ static VOID pushfwd(struct nfa *, FILE *); */ static VOID pushfwd(nfa, f) struct nfa *nfa; FILE *f; /* for debug output; NULL none */ { struct state *s; struct state *nexts; struct arc *a; struct arc *nexta; int progress; /* find and push until there are no more */ do { progress = 0; for (s = nfa->states; s != NULL && !NISERR(); s = nexts) { nexts = s->next; for (a = s->ins; a != NULL && !NISERR(); a = nexta) { nexta = a->inchain; if (a->type == '$' || a->type == AHEAD) if (push(nfa, a)) progress = 1; assert(nexta == NULL || s->no != FREESTATE); } } if (progress && f != NULL) dumpnfa(nfa, f); } while (progress && !NISERR()); if (NISERR()) return; for (a = nfa->post->ins; a != NULL; a = nexta) { nexta = a->inchain; if (a->type == '$') { assert(a->co == 0 || a->co == 1); newarc(nfa, PLAIN, nfa->eos[a->co], a->from, a->to); freearc(nfa, a); } } } /* - push - push a forward constraint forward past its destination state * A significant property of this function is that it deletes at most * one state -- the constraint's to state -- and only if the constraint * was that state's last inarc. ^ static int push(struct nfa *, struct arc *); */ static int /* 0 couldn't, 1 could */ push(nfa, con) struct nfa *nfa; struct arc *con; { struct state *from = con->from; struct state *to = con->to; struct arc *a; struct arc *nexta; struct state *s; if (to == from) { /* circular constraint is pointless */ freearc(nfa, con); return 1; } if (to->flag) /* can't push forward beyond end */ return 0; if (to->nouts == 0) { /* dead end */ freearc(nfa, con); return 1; } /* * DGP 2007-11-15: Here we duplicate the same protections as appear * in pull() above to avoid troubles with cloning a state with a * circular constraint on its list of ins. It is not clear whether * this is necessary, or is protecting against a "can't happen". * Any test case that actually leads to a freearc() call here would * be a welcome addition to the test suite. */ for (a = to->ins; a != NULL; a = nexta) { nexta = a->inchain; switch (a->type) { case '^': case '$': case BEHIND: case AHEAD: if (a->from == to) { freearc(nfa, a); } break; } } /* first, clone to state if necessary to avoid other inarcs */ if (to->nins > 1) { s = newstate(nfa); if (NISERR()) return 0; copyouts(nfa, to, s); /* duplicate outarcs */ cparc(nfa, con, from, s); /* move constraint */ freearc(nfa, con); to = s; con = to->ins; } assert(to->nins == 1); /* propagate the constraint into the to state's outarcs */ for (a = to->outs; a != NULL; a = nexta) { nexta = a->outchain; switch (combine(con, a)) { case INCOMPATIBLE: /* destroy the arc */ freearc(nfa, a); break; case SATISFIED: /* no action needed */ break; case COMPATIBLE: /* swap the two arcs, more or less */ s = newstate(nfa); if (NISERR()) return 0; cparc(nfa, con, s, a->to); /* anticipate move */ cparc(nfa, a, from, s); if (NISERR()) return 0; freearc(nfa, a); break; default: assert(NOTREACHED); break; } } /* remaining outarcs, if any, incorporate the constraint */ moveouts(nfa, to, from); dropstate(nfa, to); /* will free the constraint */ return 1; } /* - combine - constraint lands on an arc, what happens? ^ #def INCOMPATIBLE 1 // destroys arc ^ #def SATISFIED 2 // constraint satisfied ^ #def COMPATIBLE 3 // compatible but not satisfied yet ^ static int combine(struct arc *, struct arc *); */ static int combine(con, a) struct arc *con; struct arc *a; { # define CA(ct,at) (((ct)<type, a->type)) { case CA('^', PLAIN): /* newlines are handled separately */ case CA('$', PLAIN): return INCOMPATIBLE; break; case CA(AHEAD, PLAIN): /* color constraints meet colors */ case CA(BEHIND, PLAIN): if (con->co == a->co) return SATISFIED; return INCOMPATIBLE; break; case CA('^', '^'): /* collision, similar constraints */ case CA('$', '$'): case CA(AHEAD, AHEAD): case CA(BEHIND, BEHIND): if (con->co == a->co) /* true duplication */ return SATISFIED; return INCOMPATIBLE; break; case CA('^', BEHIND): /* collision, dissimilar constraints */ case CA(BEHIND, '^'): case CA('$', AHEAD): case CA(AHEAD, '$'): return INCOMPATIBLE; break; case CA('^', '$'): /* constraints passing each other */ case CA('^', AHEAD): case CA(BEHIND, '$'): case CA(BEHIND, AHEAD): case CA('$', '^'): case CA('$', BEHIND): case CA(AHEAD, '^'): case CA(AHEAD, BEHIND): case CA('^', LACON): case CA(BEHIND, LACON): case CA('$', LACON): case CA(AHEAD, LACON): return COMPATIBLE; break; } assert(NOTREACHED); return INCOMPATIBLE; /* for benefit of blind compilers */ } /* - fixempties - get rid of EMPTY arcs ^ static VOID fixempties(struct nfa *, FILE *); */ static VOID fixempties(nfa, f) struct nfa *nfa; FILE *f; /* for debug output; NULL none */ { struct state *s; struct state *s2; struct state *nexts; struct arc *a; struct arc *nexta; /* * First, get rid of any states whose sole out-arc is an EMPTY, since * they're basically just aliases for their successor. The parsing * algorithm creates enough of these that it's worth special-casing this. */ for (s = nfa->states; s != NULL && !NISERR(); s = nexts) { nexts = s->next; if (s->nouts == 1 && !s->flag) { a = s->outs; assert(a != NULL && a->outchain == NULL); if (a->type == EMPTY) { if (s != a->to) moveins(nfa, s, a->to); dropstate(nfa, s); } } } /* * Similarly, get rid of any state with a single EMPTY in-arc, by folding * it into its predecessor. */ for (s = nfa->states; s != NULL && !NISERR(); s = nexts) { nexts = s->next; /* while we're at it, ensure tmp fields are clear for next step */ s->tmp = NULL; if (s->nins == 1 && !s->flag) { a = s->ins; assert(a != NULL && a->inchain == NULL); if (a->type == EMPTY) { if (s != a->from) moveouts(nfa, s, a->from); dropstate(nfa, s); } } } /* * For each remaining NFA state, find all other states that are reachable * from it by a chain of one or more EMPTY arcs. Then generate new arcs * that eliminate the need for each such chain. * * If we just do this straightforwardly, the algorithm gets slow in * complex graphs, because the same arcs get copied to all intermediate * states of an EMPTY chain, and then uselessly pushed repeatedly to the * chain's final state; we waste a lot of time in newarc's duplicate * checking. To improve matters, we decree that any state with only EMPTY * out-arcs is "doomed" and will not be part of the final NFA. That can be * ensured by not adding any new out-arcs to such a state. Having ensured * that, we need not update the state's in-arcs list either; all arcs that * might have gotten pushed forward to it will just get pushed directly to * successor states. This eliminates most of the useless duplicate arcs. */ for (s = nfa->states; s != NULL && !NISERR(); s = s->next) { for (s2 = emptyreachable(s, s); s2 != s && !NISERR(); s2 = nexts) { /* * If s2 is doomed, we decide that (1) we will always push arcs * forward to it, not pull them back to s; and (2) we can optimize * away the push-forward, per comment above. So do nothing. */ if (s2->flag || nonemptyouts(s2) > 0) replaceempty(nfa, s, s2); /* Reset the tmp fields as we walk back */ nexts = s2->tmp; s2->tmp = NULL; } s->tmp = NULL; } /* * Now remove all the EMPTY arcs, since we don't need them anymore. */ for (s = nfa->states; s != NULL && !NISERR(); s = s->next) { for (a = s->outs; a != NULL; a = nexta) { nexta = a->outchain; if (a->type == EMPTY) freearc(nfa, a); } } /* * And remove any states that have become useless. (This cleanup is not * very thorough, and would be even less so if we tried to combine it with * the previous step; but cleanup() will take care of anything we miss.) */ for (s = nfa->states; s != NULL && !NISERR(); s = nexts) { nexts = s->next; if ((s->nins == 0 || s->nouts == 0) && !s->flag) dropstate(nfa, s); } if (f != NULL && !NISERR()) dumpnfa(nfa, f); } /* - emptyreachable - recursively find all states reachable from s by EMPTY arcs * The return value is the last such state found. Its tmp field links back * to the next-to-last such state, and so on back to s, so that all these * states can be located without searching the whole NFA. * The maximum recursion depth here is equal to the length of the longest * loop-free chain of EMPTY arcs, which is surely no more than the size of * the NFA, and in practice will be a lot less than that. ^ static struct state *emptyreachable(struct state *, struct state *); */ static struct state * emptyreachable(s, lastfound) struct state *s; struct state *lastfound; { struct arc *a; s->tmp = lastfound; lastfound = s; for (a = s->outs; a != NULL; a = a->outchain) { if (a->type == EMPTY && a->to->tmp == NULL) lastfound = emptyreachable(a->to, lastfound); } return lastfound; } /* - replaceempty - replace an EMPTY arc chain with some non-empty arcs * The EMPTY arc(s) should be deleted later, but we can't do it here because * they may still be needed to identify other arc chains during fixempties(). ^ static void replaceempty(struct nfa *, struct state *, struct state *); */ static VOID replaceempty(nfa, from, to) struct nfa *nfa; struct state *from; struct state *to; { int fromouts; int toins; assert(from != to); /* * Create replacement arcs that bypass the need for the EMPTY chain. We * can do this either by pushing arcs forward (linking directly from * "from"'s predecessors to "to") or by pulling them back (linking * directly from "from" to "to"'s successors). In general, we choose * whichever way creates greater fan-out or fan-in, so as to improve the * odds of reducing the other state to zero in-arcs or out-arcs and * thereby being able to delete it. However, if "from" is doomed (has no * non-EMPTY out-arcs), we must keep it so, so always push forward in that * case. * * The fan-out/fan-in comparison should count only non-EMPTY arcs. If * "from" is doomed, we can skip counting "to"'s arcs, since we want to * force taking the copynonemptyins path in that case. */ fromouts = nonemptyouts(from); toins = (fromouts == 0) ? 1 : nonemptyins(to); if (fromouts > toins) { copynonemptyouts(nfa, to, from); return; } if (fromouts < toins) { copynonemptyins(nfa, from, to); return; } /* * fromouts == toins. Decide on secondary issue: copy fewest arcs. * * Doesn't seem to be worth the trouble to exclude empties from these * comparisons; that takes extra time and doesn't seem to improve the * resulting graph much. */ if (from->nins > to->nouts) { copynonemptyouts(nfa, to, from); return; } else { copynonemptyins(nfa, from, to); return; } } /* - cleanup - clean up NFA after optimizations ^ static VOID cleanup(struct nfa *); */ static VOID cleanup(nfa) struct nfa *nfa; { struct state *s; struct state *nexts; int n; /* clear out unreachable or dead-end states */ /* use pre to mark reachable, then post to mark can-reach-post */ markreachable(nfa, nfa->pre, (struct state *)NULL, nfa->pre); markcanreach(nfa, nfa->post, nfa->pre, nfa->post); for (s = nfa->states; s != NULL; s = nexts) { nexts = s->next; if (s->tmp != nfa->post && !s->flag) dropstate(nfa, s); } assert(nfa->post->nins == 0 || nfa->post->tmp == nfa->post); cleartraverse(nfa, nfa->pre); assert(nfa->post->nins == 0 || nfa->post->tmp == NULL); /* the nins==0 (final unreachable) case will be caught later */ /* renumber surviving states */ n = 0; for (s = nfa->states; s != NULL; s = s->next) s->no = n++; nfa->nstates = n; } /* - markreachable - recursive marking of reachable states ^ static VOID markreachable(struct nfa *, struct state *, struct state *, ^ struct state *); */ static VOID markreachable(nfa, s, okay, mark) struct nfa *nfa; struct state *s; struct state *okay; /* consider only states with this mark */ struct state *mark; /* the value to mark with */ { struct arc *a; if (s->tmp != okay) return; s->tmp = mark; for (a = s->outs; a != NULL; a = a->outchain) markreachable(nfa, a->to, okay, mark); } /* - markcanreach - recursive marking of states which can reach here ^ static VOID markcanreach(struct nfa *, struct state *, struct state *, ^ struct state *); */ static VOID markcanreach(nfa, s, okay, mark) struct nfa *nfa; struct state *s; struct state *okay; /* consider only states with this mark */ struct state *mark; /* the value to mark with */ { struct arc *a; if (s->tmp != okay) return; s->tmp = mark; for (a = s->ins; a != NULL; a = a->inchain) markcanreach(nfa, a->from, okay, mark); } /* - analyze - ascertain potentially-useful facts about an optimized NFA ^ static long analyze(struct nfa *); */ static long /* re_info bits to be ORed in */ analyze(nfa) struct nfa *nfa; { struct arc *a; struct arc *aa; if (nfa->pre->outs == NULL) return REG_UIMPOSSIBLE; for (a = nfa->pre->outs; a != NULL; a = a->outchain) for (aa = a->to->outs; aa != NULL; aa = aa->outchain) if (aa->to == nfa->post) return REG_UEMPTYMATCH; return 0; } /* - compact - compact an NFA ^ static VOID compact(struct nfa *, struct cnfa *); */ static VOID compact(nfa, cnfa) struct nfa *nfa; struct cnfa *cnfa; { struct state *s; struct arc *a; size_t nstates; size_t narcs; struct carc *ca; struct carc *first; assert (!NISERR()); nstates = 0; narcs = 0; for (s = nfa->states; s != NULL; s = s->next) { nstates++; narcs += 1 + s->nouts + 1; /* 1 as a fake for flags, nouts for arcs, 1 as endmarker */ } cnfa->states = (struct carc **)MALLOC(nstates * sizeof(struct carc *)); cnfa->arcs = (struct carc *)MALLOC(narcs * sizeof(struct carc)); if (cnfa->states == NULL || cnfa->arcs == NULL) { if (cnfa->states != NULL) FREE(cnfa->states); if (cnfa->arcs != NULL) FREE(cnfa->arcs); NERR(REG_ESPACE); return; } cnfa->nstates = nstates; cnfa->pre = nfa->pre->no; cnfa->post = nfa->post->no; cnfa->bos[0] = nfa->bos[0]; cnfa->bos[1] = nfa->bos[1]; cnfa->eos[0] = nfa->eos[0]; cnfa->eos[1] = nfa->eos[1]; cnfa->ncolors = maxcolor(nfa->cm) + 1; cnfa->flags = 0; ca = cnfa->arcs; for (s = nfa->states; s != NULL; s = s->next) { assert((size_t)s->no < nstates); cnfa->states[s->no] = ca; ca->co = 0; /* clear and skip flags "arc" */ ca++; first = ca; for (a = s->outs; a != NULL; a = a->outchain) switch (a->type) { case PLAIN: ca->co = a->co; ca->to = a->to->no; ca++; break; case LACON: assert(s->no != cnfa->pre); ca->co = (color)(cnfa->ncolors + a->co); ca->to = a->to->no; ca++; cnfa->flags |= HASLACONS; break; default: assert(NOTREACHED); break; } carcsort(first, ca-1); ca->co = COLORLESS; ca->to = 0; ca++; } assert(ca == &cnfa->arcs[narcs]); assert(cnfa->nstates != 0); /* mark no-progress states */ for (a = nfa->pre->outs; a != NULL; a = a->outchain) cnfa->states[a->to->no]->co = 1; cnfa->states[nfa->pre->no]->co = 1; } /* - carcsort - sort compacted-NFA arcs by color * Really dumb algorithm, but if the list is long enough for that to matter, * you're in real trouble anyway. ^ static VOID carcsort(struct carc *, struct carc *); */ static VOID carcsort(first, last) struct carc *first; struct carc *last; { struct carc *p; struct carc *q; struct carc tmp; if (last - first <= 1) return; for (p = first; p <= last; p++) for (q = p; q <= last; q++) if (p->co > q->co || (p->co == q->co && p->to > q->to)) { assert(p != q); tmp = *p; *p = *q; *q = tmp; } } /* - freecnfa - free a compacted NFA ^ static VOID freecnfa(struct cnfa *); */ static VOID freecnfa(cnfa) struct cnfa *cnfa; { assert(cnfa->nstates != 0); /* not empty already */ cnfa->nstates = 0; FREE(cnfa->states); FREE(cnfa->arcs); } /* - dumpnfa - dump an NFA in human-readable form ^ static VOID dumpnfa(struct nfa *, FILE *); */ static VOID dumpnfa(nfa, f) struct nfa *nfa; FILE *f; { #ifdef REG_DEBUG struct state *s; fprintf(f, "pre %d, post %d", nfa->pre->no, nfa->post->no); if (nfa->bos[0] != COLORLESS) fprintf(f, ", bos [%ld]", (long)nfa->bos[0]); if (nfa->bos[1] != COLORLESS) fprintf(f, ", bol [%ld]", (long)nfa->bos[1]); if (nfa->eos[0] != COLORLESS) fprintf(f, ", eos [%ld]", (long)nfa->eos[0]); if (nfa->eos[1] != COLORLESS) fprintf(f, ", eol [%ld]", (long)nfa->eos[1]); fprintf(f, "\n"); for (s = nfa->states; s != NULL; s = s->next) dumpstate(s, f); if (nfa->parent == NULL) dumpcolors(nfa->cm, f); fflush(f); #endif } #ifdef REG_DEBUG /* subordinates of dumpnfa */ /* ^ #ifdef REG_DEBUG */ /* - dumpstate - dump an NFA state in human-readable form ^ static VOID dumpstate(struct state *, FILE *); */ static VOID dumpstate(s, f) struct state *s; FILE *f; { struct arc *a; fprintf(f, "%d%s%c", s->no, (s->tmp != NULL) ? "T" : "", (s->flag) ? s->flag : '.'); if (s->prev != NULL && s->prev->next != s) fprintf(f, "\tstate chain bad\n"); if (s->nouts == 0) fprintf(f, "\tno out arcs\n"); else dumparcs(s, f); fflush(f); for (a = s->ins; a != NULL; a = a->inchain) { if (a->to != s) fprintf(f, "\tlink from %d to %d on %d's in-chain\n", a->from->no, a->to->no, s->no); } } /* - dumparcs - dump out-arcs in human-readable form ^ static VOID dumparcs(struct state *, FILE *); */ static VOID dumparcs(s, f) struct state *s; FILE *f; { int pos; assert(s->nouts > 0); /* printing arcs in reverse order is usually clearer */ pos = dumprarcs(s->outs, s, f, 1); if (pos != 1) fprintf(f, "\n"); } /* - dumprarcs - dump remaining outarcs, recursively, in reverse order ^ static int dumprarcs(struct arc *, struct state *, FILE *, int); */ static int /* resulting print position */ dumprarcs(a, s, f, pos) struct arc *a; struct state *s; FILE *f; int pos; /* initial print position */ { if (a->outchain != NULL) pos = dumprarcs(a->outchain, s, f, pos); dumparc(a, s, f); if (pos == 5) { fprintf(f, "\n"); pos = 1; } else pos++; return pos; } /* - dumparc - dump one outarc in readable form, including prefixing tab ^ static VOID dumparc(struct arc *, struct state *, FILE *); */ static VOID dumparc(a, s, f) struct arc *a; struct state *s; FILE *f; { struct arc *aa; struct arcbatch *ab; fprintf(f, "\t"); switch (a->type) { case PLAIN: fprintf(f, "[%ld]", (long)a->co); break; case AHEAD: fprintf(f, ">%ld>", (long)a->co); break; case BEHIND: fprintf(f, "<%ld<", (long)a->co); break; case LACON: fprintf(f, ":%ld:", (long)a->co); break; case '^': case '$': fprintf(f, "%c%d", a->type, (int)a->co); break; case EMPTY: break; default: fprintf(f, "0x%x/0%lo", a->type, (long)a->co); break; } if (a->from != s) fprintf(f, "?%d?", a->from->no); for (ab = &a->from->oas; ab != NULL; ab = ab->next) { for (aa = &ab->a[0]; aa < &ab->a[ABSIZE]; aa++) if (aa == a) break; /* NOTE BREAK OUT */ if (aa < &ab->a[ABSIZE]) /* propagate break */ break; /* NOTE BREAK OUT */ } if (ab == NULL) fprintf(f, "?!?"); /* not in allocated space */ fprintf(f, "->"); if (a->to == NULL) { fprintf(f, "NULL"); return; } fprintf(f, "%d", a->to->no); for (aa = a->to->ins; aa != NULL; aa = aa->inchain) if (aa == a) break; /* NOTE BREAK OUT */ if (aa == NULL) fprintf(f, "?!?"); /* missing from in-chain */ } /* ^ #endif */ #endif /* ifdef REG_DEBUG */ /* - dumpcnfa - dump a compacted NFA in human-readable form ^ static VOID dumpcnfa(struct cnfa *, FILE *); */ static VOID dumpcnfa(cnfa, f) struct cnfa *cnfa; FILE *f; { #ifdef REG_DEBUG int st; fprintf(f, "pre %d, post %d", cnfa->pre, cnfa->post); if (cnfa->bos[0] != COLORLESS) fprintf(f, ", bos [%ld]", (long)cnfa->bos[0]); if (cnfa->bos[1] != COLORLESS) fprintf(f, ", bol [%ld]", (long)cnfa->bos[1]); if (cnfa->eos[0] != COLORLESS) fprintf(f, ", eos [%ld]", (long)cnfa->eos[0]); if (cnfa->eos[1] != COLORLESS) fprintf(f, ", eol [%ld]", (long)cnfa->eos[1]); if (cnfa->flags&HASLACONS) fprintf(f, ", haslacons"); fprintf(f, "\n"); for (st = 0; st < cnfa->nstates; st++) dumpcstate(st, cnfa->states[st], cnfa, f); fflush(f); #endif } #ifdef REG_DEBUG /* subordinates of dumpcnfa */ /* ^ #ifdef REG_DEBUG */ /* - dumpcstate - dump a compacted-NFA state in human-readable form ^ static VOID dumpcstate(int, struct carc *, struct cnfa *, FILE *); */ static VOID dumpcstate(st, ca, cnfa, f) int st; struct carc *ca; struct cnfa *cnfa; FILE *f; { int i; int pos; fprintf(f, "%d%s", st, (ca[0].co) ? ":" : "."); pos = 1; for (i = 1; ca[i].co != COLORLESS; i++) { if (ca[i].co < cnfa->ncolors) fprintf(f, "\t[%ld]->%d", (long)ca[i].co, ca[i].to); else fprintf(f, "\t:%ld:->%d", (long)ca[i].co-cnfa->ncolors, ca[i].to); if (pos == 5) { fprintf(f, "\n"); pos = 1; } else pos++; } if (i == 1 || pos != 1) fprintf(f, "\n"); fflush(f); } /* ^ #endif */ #endif /* ifdef REG_DEBUG */