diff options
author | stanton <stanton> | 1999-08-05 01:16:57 (GMT) |
---|---|---|
committer | stanton <stanton> | 1999-08-05 01:16:57 (GMT) |
commit | 144c26987f8a6f6285efd2f4e878e844b9c13d33 (patch) | |
tree | 6ba27c1270de462a013ecbff3d320551d574c2c5 /generic/regcomp.c | |
parent | 97320151634cf8e22126e5ab7c59b1aa9931c002 (diff) | |
download | tcl-144c26987f8a6f6285efd2f4e878e844b9c13d33.zip tcl-144c26987f8a6f6285efd2f4e878e844b9c13d33.tar.gz tcl-144c26987f8a6f6285efd2f4e878e844b9c13d33.tar.bz2 |
* generic/regc_nfa.c:
* generic/regcomp.c:
* generic/rege_dfa.c:
* generic/regexec.c:
* generic/regguts.h: Applied patches supplied by Henry Spencer to
greatly enhance the performance of certain classes of regular
expressions. [Bug: 2440, 2447]
Diffstat (limited to 'generic/regcomp.c')
-rw-r--r-- | generic/regcomp.c | 128 |
1 files changed, 77 insertions, 51 deletions
diff --git a/generic/regcomp.c b/generic/regcomp.c index b292012..85639a1 100644 --- a/generic/regcomp.c +++ b/generic/regcomp.c @@ -41,7 +41,7 @@ int compile _ANSI_ARGS_((regex_t *, CONST chr *, size_t, int)); static VOID moresubs _ANSI_ARGS_((struct vars *, int)); static int freev _ANSI_ARGS_((struct vars *, int)); -static VOID makescan _ANSI_ARGS_((struct vars *, struct nfa *)); +static VOID makesearch _ANSI_ARGS_((struct vars *, struct nfa *)); static struct subre *parse _ANSI_ARGS_((struct vars *, int, int, struct state *, struct state *)); static struct subre *parsebranch _ANSI_ARGS_((struct vars *, int, int, struct state *, struct state *, int)); static VOID parseqatom _ANSI_ARGS_((struct vars *, int, int, struct state *, struct state *, struct subre *)); @@ -65,14 +65,15 @@ static VOID optst _ANSI_ARGS_((struct vars *, struct subre *)); static int numst _ANSI_ARGS_((struct subre *, int)); static VOID markst _ANSI_ARGS_((struct subre *)); static VOID cleanst _ANSI_ARGS_((struct vars *)); -static int nfatree _ANSI_ARGS_((struct vars *, struct subre *, FILE *)); -static int nfanode _ANSI_ARGS_((struct vars *, struct subre *, FILE *)); +static long nfatree _ANSI_ARGS_((struct vars *, struct subre *, FILE *)); +static long nfanode _ANSI_ARGS_((struct vars *, struct subre *, FILE *)); static int newlacon _ANSI_ARGS_((struct vars *, struct state *, struct state *, int)); static VOID freelacons _ANSI_ARGS_((struct subre *, int)); static VOID rfree _ANSI_ARGS_((regex_t *)); static VOID dump _ANSI_ARGS_((regex_t *, FILE *)); static VOID dumpst _ANSI_ARGS_((struct subre *, FILE *, int)); -static VOID stdump _ANSI_ARGS_((struct subre *, FILE *, int, int)); +static VOID stdump _ANSI_ARGS_((struct subre *, FILE *, int)); +static char *stid _ANSI_ARGS_((struct subre *, char *, size_t)); /* === regc_lex.c === */ static VOID lexstart _ANSI_ARGS_((struct vars *)); static VOID prefixes _ANSI_ARGS_((struct vars *)); @@ -138,7 +139,7 @@ static VOID dupnfa _ANSI_ARGS_((struct nfa *, struct state *, struct state *, st static VOID duptraverse _ANSI_ARGS_((struct nfa *, struct state *, struct state *)); static VOID cleartraverse _ANSI_ARGS_((struct nfa *, struct state *)); static VOID specialcolors _ANSI_ARGS_((struct nfa *)); -static int optimize _ANSI_ARGS_((struct nfa *, FILE *)); +static long optimize _ANSI_ARGS_((struct nfa *, FILE *)); static VOID pullback _ANSI_ARGS_((struct nfa *, FILE *)); static int pull _ANSI_ARGS_((struct nfa *, struct arc *)); static VOID pushfwd _ANSI_ARGS_((struct nfa *, FILE *)); @@ -152,7 +153,7 @@ static int unempty _ANSI_ARGS_((struct nfa *, struct arc *)); static VOID cleanup _ANSI_ARGS_((struct nfa *)); static VOID markreachable _ANSI_ARGS_((struct nfa *, struct state *, struct state *, struct state *)); static VOID markcanreach _ANSI_ARGS_((struct nfa *, struct state *, struct state *, struct state *)); -static int analyze _ANSI_ARGS_((struct nfa *)); +static long analyze _ANSI_ARGS_((struct nfa *)); static VOID compact _ANSI_ARGS_((struct nfa *, struct cnfa *)); static VOID carcsort _ANSI_ARGS_((struct carc *, struct carc *)); static VOID freecnfa _ANSI_ARGS_((struct cnfa *)); @@ -229,7 +230,6 @@ struct vars { struct subre *lacons; /* lookahead-constraint vector */ int nlacons; /* size of lacons */ int usedshorter; /* used short-preferring quantifiers */ - int unmatchable; /* can never match */ }; /* parsing macros; most know that `v' is the struct vars pointer */ @@ -382,36 +382,37 @@ int flags; specialcolors(v->nfa); CNOERR(); if (debug != NULL) { + fprintf(debug, "\n\n\n========= RAW ==========\n"); dumpnfa(v->nfa, debug); dumpst(v->tree, debug, 1); } v->usedshorter = 0; - v->unmatchable = 0; optst(v, v->tree); v->ntree = numst(v->tree, 1); markst(v->tree); cleanst(v); if (debug != NULL) { + fprintf(debug, "\n\n\n========= TREE FIXED ==========\n"); fprintf(debug, "-->\n"); dumpst(v->tree, debug, 1); } /* build compacted NFAs for tree, lacons, fast search */ re->re_info |= nfatree(v, v->tree, debug); - if (debug != NULL) { - fprintf(debug, "---->\n"); - dumpst(v->tree, debug, 1); - } CNOERR(); - if (re->re_info®_UIMPOSSIBLE) - v->unmatchable = 1; assert(v->nlacons == 0 || v->lacons != NULL); - for (i = 1; i < v->nlacons; i++) + for (i = 1; i < v->nlacons; i++) { + if (debug != NULL) + fprintf(debug, "\n\n\n========= LA%d ==========\n", i); nfanode(v, &v->lacons[i], debug); + } CNOERR(); + if (debug != NULL) + fprintf(debug, "\n\n\n========= SEARCH ==========\n"); + /* can sacrifice main NFA now, so use it as work area */ (DISCARD)optimize(v->nfa, debug); CNOERR(); - makescan(v, v->nfa); + makesearch(v, v->nfa); CNOERR(); compact(v->nfa, &g->search); CNOERR(); @@ -431,7 +432,6 @@ int flags; v->lacons = NULL; g->nlacons = v->nlacons; g->usedshorter = v->usedshorter; - g->unmatchable = v->unmatchable; if (flags®_DUMP) dump(re, stdout); @@ -507,12 +507,12 @@ int err; } /* - - makescan - turn an NFA into a fast-scan NFA (implicit prepend of .*?) + - makesearch - turn an NFA into a fast-scan NFA (implicit prepend of .*?) * NFA must have been optimize()d already. - ^ static VOID makescan(struct vars *, struct nfa *); + ^ static VOID makesearch(struct vars *, struct nfa *); */ static VOID -makescan(v, nfa) +makesearch(v, nfa) struct vars *v; struct nfa *nfa; { @@ -1907,9 +1907,9 @@ struct vars *v; /* - nfatree - turn a subRE subtree into a tree of compacted NFAs - ^ static int nfatree(struct vars *, struct subre *, FILE *); + ^ static long nfatree(struct vars *, struct subre *, FILE *); */ -static int /* optimize results from top node */ +static long /* optimize results from top node */ nfatree(v, t, f) struct vars *v; struct subre *t; @@ -1927,19 +1927,23 @@ FILE *f; /* for debug output */ /* - nfanode - do one NFA for nfatree - ^ static int nfanode(struct vars *, struct subre *, FILE *); + ^ static long nfanode(struct vars *, struct subre *, FILE *); */ -static int /* optimize results */ +static long /* optimize results */ nfanode(v, t, f) struct vars *v; struct subre *t; FILE *f; /* for debug output */ { struct nfa *nfa; - int ret = 0; + long ret = 0; + char idbuf[50]; assert(t->begin != NULL); + if (f != NULL) + fprintf(f, "\n\n\n========= TREE NODE %s ==========\n", + stid(t, idbuf, sizeof(idbuf))); nfa = newnfa(v, v->cm, v->nfa); NOERRZ(); dupnfa(nfa, t->begin, t->end, nfa->init, nfa->final); @@ -2061,20 +2065,22 @@ FILE *f; fprintf(f, "bad guts magic number (0x%x not 0x%x)\n", g->magic, GUTSMAGIC); - fprintf(f, "nsub %d, info 0%o, csize %d, ntree %d, usedshort %d\n", + fprintf(f, "\n\n\n========= DUMP ==========\n"); + fprintf(f, "nsub %d, info 0%lo, csize %d, ntree %d, usedshort %d\n", re->re_nsub, re->re_info, re->re_csize, g->ntree, g->usedshorter); dumpcolors(&g->cmap, f); if (!NULLCNFA(g->search)) { - printf("search:\n"); + printf("\nsearch:\n"); dumpcnfa(&g->search, f); } for (i = 1; i < g->nlacons; i++) { - fprintf(f, "la%d (%s):\n", i, + fprintf(f, "\nla%d (%s):\n", i, (g->lacons[i].subno) ? "positive" : "negative"); dumpcnfa(&g->lacons[i].cnfa, f); } + fprintf(f, "\n"); dumpst(g->tree, f, 0); #endif } @@ -2092,59 +2098,79 @@ int nfapresent; /* is the original NFA still around? */ if (t == NULL) fprintf(f, "null tree\n"); else - stdump(t, f, nfapresent, 0); + stdump(t, f, nfapresent); fflush(f); } /* - stdump - recursive guts of dumpst - ^ static VOID stdump(struct subre *, FILE *, int, int); + ^ static VOID stdump(struct subre *, FILE *, int); */ static VOID -stdump(t, f, nfapresent, level) +stdump(t, f, nfapresent) struct subre *t; FILE *f; int nfapresent; /* is the original NFA still around? */ -int level; { int i; -# define RTSEP " " + char idbuf[50]; - for (i = 0; i < level; i++) - fprintf(f, RTSEP); - fprintf(f, "%c (", t->op); + fprintf(f, "%s. `%c'", stid(t, idbuf, sizeof(idbuf)), t->op); if (t->flags&LONGER) - fprintf(f, "L"); + fprintf(f, " longest"); if (t->flags&SHORTER) - fprintf(f, "S"); + fprintf(f, " shortest"); if (t->flags&MIXED) - fprintf(f, "M"); + fprintf(f, " hasmixed"); if (t->flags&CAP) - fprintf(f, "c"); + fprintf(f, " hascapture"); if (t->flags&BACKR) - fprintf(f, "b"); + fprintf(f, " hasbackref"); if (!(t->flags&INUSE)) - fprintf(f, "!u"); - fprintf(f, ") r%d", t->retry); + fprintf(f, " UNUSED"); if (t->subno != 0) - fprintf(f, " #%d", t->subno); + fprintf(f, " (#%d)", t->subno); if (t->min != 1 || t->max != 1) { - fprintf(f, "{%d,", t->min); + fprintf(f, " {%d,", t->min); if (t->max != INFINITY) fprintf(f, "%d", t->max); fprintf(f, "}"); } if (nfapresent) fprintf(f, " %ld-%ld", (long)t->begin->no, (long)t->end->no); - if (!NULLCNFA(t->cnfa)) - fprintf(f, ":"); - fprintf(f, "\n"); if (t->left != NULL) - stdump(t->left, f, nfapresent, level+1); - if (!NULLCNFA(t->cnfa)) + fprintf(f, " L:%s", stid(t->left, idbuf, sizeof(idbuf))); + if (t->right != NULL) + fprintf(f, " R:%s", stid(t->right, idbuf, sizeof(idbuf))); + if (!NULLCNFA(t->cnfa)) { + fprintf(f, "\n"); dumpcnfa(&t->cnfa, f); + fprintf(f, "\n"); + } + if (t->left != NULL) + stdump(t->left, f, nfapresent); if (t->right != NULL) - stdump(t->right, f, nfapresent, level+1); + stdump(t->right, f, nfapresent); +} + +/* + - stid - identify a subtree node for dumping + ^ static char *stid(struct subre *, char *, size_t); + */ +static char * /* points to buf or constant string */ +stid(t, buf, bufsize) +struct subre *t; +char *buf; +size_t bufsize; +{ + /* big enough for hex int or decimal t->retry? */ + if (bufsize < sizeof(int)*2 + 3 || bufsize < sizeof(t->retry)*3 + 1) + return "unable"; + if (t->retry != 0) + sprintf(buf, "%d", t->retry); + else + sprintf(buf, "0x%x", (int)t); /* may lose bits, that's okay */ + return buf; } #include "regc_lex.c" |