diff options
author | stanton <stanton> | 1999-06-02 01:53:29 (GMT) |
---|---|---|
committer | stanton <stanton> | 1999-06-02 01:53:29 (GMT) |
commit | 21c3baed52eba908b3e56f33f9d0f9541495616e (patch) | |
tree | 02591dac73223fda831014e215800dee4b75d811 /generic | |
parent | a045db4812c23138a2f998a7f827f46a30558a79 (diff) | |
download | tcl-21c3baed52eba908b3e56f33f9d0f9541495616e.zip tcl-21c3baed52eba908b3e56f33f9d0f9541495616e.tar.gz tcl-21c3baed52eba908b3e56f33f9d0f9541495616e.tar.bz2 |
* tests/reg.test:
* generic/regc_color.c:
* generic/regc_cvec.c:
* generic/regc_lex.c:
* generic/regc_locale.c:
* generic/regc_nfa.c:
* generic/regcomp.c:
* generic/regcustom.h:
* generic/rege_dfa.c:
* generic/regerror.c:
* generic/regerrs.h:
* generic/regex.h:
* generic/regexec.c:
* generic/regfree.c:
* generic/regfronts.c:
* generic/regguts.h:
* generic/tclCmdMZ.c:
* generic/tclRegexp.c:
* generic/tclRegexp.h:
* generic/tclTest.c: Applied Henry Spencer's latest regexp patches
that fix an infinite loop bug and add support for testing whether
a string could match with additional input. [Bug: 2117]
Diffstat (limited to 'generic')
-rw-r--r-- | generic/regc_color.c | 52 | ||||
-rw-r--r-- | generic/regc_cvec.c | 33 | ||||
-rw-r--r-- | generic/regc_lex.c | 44 | ||||
-rw-r--r-- | generic/regc_locale.c | 2 | ||||
-rw-r--r-- | generic/regc_nfa.c | 81 | ||||
-rw-r--r-- | generic/regcomp.c | 60 | ||||
-rw-r--r-- | generic/regcustom.h | 28 | ||||
-rw-r--r-- | generic/rege_dfa.c | 93 | ||||
-rw-r--r-- | generic/regerror.c | 27 | ||||
-rw-r--r-- | generic/regerrs.h | 2 | ||||
-rw-r--r-- | generic/regex.h | 33 | ||||
-rw-r--r-- | generic/regexec.c | 231 | ||||
-rw-r--r-- | generic/regfree.c | 28 | ||||
-rw-r--r-- | generic/regfronts.c | 27 | ||||
-rw-r--r-- | generic/regguts.h | 33 | ||||
-rw-r--r-- | generic/tclCmdMZ.c | 6 | ||||
-rw-r--r-- | generic/tclRegexp.c | 22 | ||||
-rw-r--r-- | generic/tclRegexp.h | 27 | ||||
-rw-r--r-- | generic/tclTest.c | 43 |
19 files changed, 666 insertions, 206 deletions
diff --git a/generic/regc_color.c b/generic/regc_color.c index e86fea0..503bca2 100644 --- a/generic/regc_color.c +++ b/generic/regc_color.c @@ -2,6 +2,34 @@ * colorings of characters * This file is #included by regcomp.c. * + * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. + * + * Development of this software was funded, in part, by Cray Research Inc., + * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics + * Corporation, none of whom are responsible for the results. The author + * thanks all of them. + * + * Redistribution and use in source and binary forms -- with or without + * modification -- are permitted for any purpose, provided that + * redistributions in source form retain this entire copyright notice and + * indicate the origin and nature of any modifications. + * + * I'd appreciate being given credit for this package in the documentation + * of software which uses it, but that is not a requirement. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY + * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * + * * Note that there are some incestuous relationships between this code and * NFA arc maintenance, which perhaps ought to be cleaned up sometime. */ @@ -271,7 +299,7 @@ pcolor co; if ((size_t)co == cm->max) { while (cm->max > WHITE && UNUSEDCOLOR(&cm->cd[cm->max])) cm->max--; - assert(cm->max >= 0); + assert(cm->free >= 0); while ((size_t)cm->free > cm->max) cm->free = cm->cd[cm->free].sub; if (cm->free > 0) { @@ -327,9 +355,10 @@ pchr c; co = GETCOLOR(cm, c); sco = newsub(cm, co); - if (sco == COLORLESS) { + if (CISERR()) return COLORLESS; - } + assert(sco != COLORLESS); + if (co == sco) /* already in an open subcolor */ return co; /* rest is redundant */ cm->cd[co].nchrs--; @@ -354,8 +383,10 @@ pcolor co; if (cm->cd[co].nchrs == 1) /* optimization */ return co; sco = newcolor(cm); /* must create subcolor */ - if (sco == COLORLESS) + if (sco == COLORLESS) { + assert(CISERR()); return COLORLESS; + } cm->cd[co].sub = sco; cm->cd[sco].sub = sco; /* open subcolor points to self */ } @@ -583,11 +614,12 @@ struct arc *a; a->colorchain = NULL; /* paranoia */ } +#ifdef NOTDEF /* This isn't used currently. */ + /* - singleton - is this character in its own color? ^ static int singleton(struct colormap *, pchr c); */ -#if 0 static int /* predicate */ singleton(cm, c) struct colormap *cm; @@ -600,7 +632,9 @@ pchr c; return 1; return 0; } -#endif + +#endif /* NOTDEF */ + /* - rainbow - add arcs of all full colors (but one) between specified states ^ static VOID rainbow(struct nfa *, struct colormap *, int, pcolor, @@ -654,6 +688,9 @@ struct state *to; #ifdef REG_DEBUG +/* + ^ #ifdef REG_DEBUG + */ /* - dumpcolors - debugging output @@ -739,4 +776,7 @@ FILE *f; fprintf(f, "\\u%04lx", (long)c); } +/* + ^ #endif + */ #endif /* ifdef REG_DEBUG */ diff --git a/generic/regc_cvec.c b/generic/regc_cvec.c index c79e741..73ccc56 100644 --- a/generic/regc_cvec.c +++ b/generic/regc_cvec.c @@ -1,6 +1,33 @@ /* * Utility functions for handling cvecs * This file is #included by regcomp.c. + * + * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. + * + * Development of this software was funded, in part, by Cray Research Inc., + * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics + * Corporation, none of whom are responsible for the results. The author + * thanks all of them. + * + * Redistribution and use in source and binary forms -- with or without + * modification -- are permitted for any purpose, provided that + * redistributions in source form retain this entire copyright notice and + * indicate the origin and nature of any modifications. + * + * I'd appreciate being given credit for this package in the documentation + * of software which uses it, but that is not a requirement. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY + * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * */ /* @@ -83,7 +110,8 @@ pchr to; cv->nranges++; } -#ifdef USE_MCCE +#ifdef NOTDEF /* This isn't used currently. */ + /* - addmcce - add an MCCE to a cvec ^ static VOID addmcce(struct cvec *, chr *, chr *); @@ -110,7 +138,8 @@ chr *endp; /* just past end of text */ assert(d == &cv->chrs[cv->chrspace - cv->nmccechrs]); cv->nmccechrs += n + 1; } -#endif + +#endif /* NOTDEF */ /* - haschr - does a cvec contain this chr? diff --git a/generic/regc_lex.c b/generic/regc_lex.c index 5b93e0b..d07fe72 100644 --- a/generic/regc_lex.c +++ b/generic/regc_lex.c @@ -1,6 +1,33 @@ /* * lexical analyzer * This file is #included by regcomp.c. + * + * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. + * + * Development of this software was funded, in part, by Cray Research Inc., + * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics + * Corporation, none of whom are responsible for the results. The author + * thanks all of them. + * + * Redistribution and use in source and binary forms -- with or without + * modification -- are permitted for any purpose, provided that + * redistributions in source form retain this entire copyright notice and + * indicate the origin and nature of any modifications. + * + * I'd appreciate being given credit for this package in the documentation + * of software which uses it, but that is not a requirement. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY + * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * */ /* scanning macros (know about v) */ @@ -977,6 +1004,23 @@ newline() } /* + - ch - return the chr sequence for locale.c's fake collating element ch + * This helps confine use of CHR to this source file. + ^ #ifdef REG_DEBUG + ^ static chr *ch(NOPARMS); + ^ #endif + */ +#ifdef REG_DEBUG +static chr * +ch() +{ + static chr chstr[] = { CHR('c'), CHR('h'), CHR('\0') }; + + return chstr; +} +#endif + +/* - chrnamed - return the chr known by a given (chr string) name * The code is a bit clumsy, but this routine gets only such specialized * use that it hardly matters. diff --git a/generic/regc_locale.c b/generic/regc_locale.c index 82e83e2..98ea392 100644 --- a/generic/regc_locale.c +++ b/generic/regc_locale.c @@ -9,7 +9,7 @@ * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: regc_locale.c,v 1.2 1999/04/16 00:46:37 stanton Exp $ + * RCS: @(#) $Id: regc_locale.c,v 1.3 1999/06/02 01:53:29 stanton Exp $ */ /* ASCII character-name table */ diff --git a/generic/regc_nfa.c b/generic/regc_nfa.c index 80e9ac7..897b3eb 100644 --- a/generic/regc_nfa.c +++ b/generic/regc_nfa.c @@ -2,6 +2,34 @@ * NFA utilities. * This file is #included by regcomp.c. * + * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. + * + * Development of this software was funded, in part, by Cray Research Inc., + * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics + * Corporation, none of whom are responsible for the results. The author + * thanks all of them. + * + * Redistribution and use in source and binary forms -- with or without + * modification -- are permitted for any purpose, provided that + * redistributions in source form retain this entire copyright notice and + * indicate the origin and nature of any modifications. + * + * I'd appreciate being given credit for this package in the documentation + * of software which uses it, but that is not a requirement. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY + * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * + * * One or two things that technically ought to be in here * are actually in color.c, thanks to some incestuous relationships in * the color chains. @@ -86,16 +114,14 @@ struct nfa *nfa; } /* - - newfstate - allocate an NFA state, with specified flag value - ^ static struct state *newfstate(struct nfa *, int flag); + - newstate - allocate an NFA state, with zero flag value + ^ static struct state *newstate(struct nfa *); */ static struct state * /* NULL on error */ -newfstate(nfa, flag) +newstate(nfa) struct nfa *nfa; -int flag; { struct state *s; - int i; if (nfa->free != NULL) { s = nfa->free; @@ -107,17 +133,13 @@ int flag; return NULL; } s->oas.next = NULL; - s->free = &s->oas.a[0]; - for (i = 0; i < ABSIZE; i++) { - s->oas.a[i].type = 0; - s->oas.a[i].freechain = &s->oas.a[i+1]; - } - s->oas.a[ABSIZE-1].freechain = NULL; + s->free = NULL; + s->noas = 0; } assert(nfa->nstates >= 0); s->no = nfa->nstates++; - s->flag = (char)flag; + s->flag = 0; if (nfa->states == NULL) nfa->states = s; s->nins = 0; @@ -136,14 +158,20 @@ int flag; } /* - - newstate - allocate an ordinary NFA state - ^ static struct state *newstate(struct nfa *); + - newfstate - allocate an NFA state with a specified flag value + ^ static struct state *newfstate(struct nfa *, int flag); */ static struct state * /* NULL on error */ -newstate(nfa) +newfstate(nfa, flag) struct nfa *nfa; +int flag; { - return newfstate(nfa, 0); + struct state *s; + + s = newstate(nfa); + if (s != NULL) + s->flag = (char)flag; + return s; } /* @@ -237,7 +265,7 @@ struct state *to; /* check for duplicates */ for (a = from->outs; a != NULL; a = a->outchain) - if (a->type == t && a->co == co && a->to == to) + if (a->to == to && a->co == co && a->type == t) return; a = allocarc(nfa, from); @@ -283,6 +311,13 @@ struct state *s; struct arcbatch *new; int i; + /* shortcut */ + if (s->free == NULL && s->noas < ABSIZE) { + a = &s->oas.a[s->noas]; + s->noas++; + return a; + } + /* if none at hand, get more */ if (s->free == NULL) { new = (struct arcbatch *)MALLOC(sizeof(struct arcbatch)); @@ -1329,6 +1364,9 @@ FILE *f; } #ifdef REG_DEBUG /* subordinates of dumpnfa */ +/* + ^ #ifdef REG_DEBUG + */ /* - dumpstate - dump an NFA state in human-readable form @@ -1458,6 +1496,9 @@ FILE *f; fprintf(f, "?!?"); /* missing from in-chain */ } +/* + ^ #endif + */ #endif /* ifdef REG_DEBUG */ /* @@ -1491,6 +1532,9 @@ FILE *f; } #ifdef REG_DEBUG /* subordinates of dumpcnfa */ +/* + ^ #ifdef REG_DEBUG + */ /* - dumpcstate - dump a compacted-NFA state in human-readable form @@ -1525,4 +1569,7 @@ FILE *f; fflush(f); } +/* + ^ #endif + */ #endif /* ifdef REG_DEBUG */ diff --git a/generic/regcomp.c b/generic/regcomp.c index 8e1b61c..26b937e 100644 --- a/generic/regcomp.c +++ b/generic/regcomp.c @@ -1,6 +1,33 @@ /* * re_*comp and friends - compile REs * This file #includes several others (see the bottom). + * + * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. + * + * Development of this software was funded, in part, by Cray Research Inc., + * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics + * Corporation, none of whom are responsible for the results. The author + * thanks all of them. + * + * Redistribution and use in source and binary forms -- with or without + * modification -- are permitted for any purpose, provided that + * redistributions in source form retain this entire copyright notice and + * indicate the origin and nature of any modifications. + * + * I'd appreciate being given credit for this package in the documentation + * of software which uses it, but that is not a requirement. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY + * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * */ #include "regguts.h" @@ -57,6 +84,9 @@ static chr lexdigits _ANSI_ARGS_((struct vars *, int, int, int)); static int brenext _ANSI_ARGS_((struct vars *, pchr)); static VOID skip _ANSI_ARGS_((struct vars *)); static chr newline _ANSI_ARGS_((NOPARMS)); +#ifdef REG_DEBUG +static chr *ch _ANSI_ARGS_((NOPARMS)); +#endif static chr chrnamed _ANSI_ARGS_((struct vars *, chr *, chr *, pchr)); /* === regc_color.c === */ static VOID initcm _ANSI_ARGS_((struct vars *, struct colormap *)); @@ -74,7 +104,7 @@ static VOID subblock _ANSI_ARGS_((struct vars *, pchr, struct state *, struct st static VOID okcolors _ANSI_ARGS_((struct nfa *, struct colormap *)); static VOID colorchain _ANSI_ARGS_((struct colormap *, struct arc *)); static VOID uncolorchain _ANSI_ARGS_((struct colormap *, struct arc *)); -#if 0 +#ifdef NOTDEF /* Avoid compiler warnings. */ static int singleton _ANSI_ARGS_((struct colormap *, pchr c)); #endif static VOID rainbow _ANSI_ARGS_((struct nfa *, struct colormap *, int, pcolor, struct state *, struct state *)); @@ -87,8 +117,8 @@ static VOID dumpchr _ANSI_ARGS_((pchr, FILE *)); /* === regc_nfa.c === */ static struct nfa *newnfa _ANSI_ARGS_((struct vars *, struct colormap *, struct nfa *)); static VOID freenfa _ANSI_ARGS_((struct nfa *)); -static struct state *newfstate _ANSI_ARGS_((struct nfa *, int flag)); static struct state *newstate _ANSI_ARGS_((struct nfa *)); +static struct state *newfstate _ANSI_ARGS_((struct nfa *, int flag)); static VOID dropstate _ANSI_ARGS_((struct nfa *, struct state *)); static VOID freestate _ANSI_ARGS_((struct nfa *, struct state *)); static VOID destroystate _ANSI_ARGS_((struct nfa *, struct state *)); @@ -142,7 +172,7 @@ static struct cvec *newcvec _ANSI_ARGS_((int, int, int)); static struct cvec *clearcvec _ANSI_ARGS_((struct cvec *)); static VOID addchr _ANSI_ARGS_((struct cvec *, pchr)); static VOID addrange _ANSI_ARGS_((struct cvec *, pchr, pchr)); -#ifdef USE_MCCE +#ifdef NOTDEF /* Avoid compiler warnings. */ static VOID addmcce _ANSI_ARGS_((struct cvec *, chr *, chr *)); #endif static int haschr _ANSI_ARGS_((struct cvec *, pchr)); @@ -430,7 +460,7 @@ int wanted; /* want enough room for this one */ memcpy(VS(p), VS(v->subs), v->nsubs * sizeof(struct subre *)); } else - p = (struct subre**)REALLOC(v->subs, n*sizeof(struct subre *)); + p = (struct subre **)REALLOC(v->subs, n*sizeof(struct subre *)); if (p == NULL) { ERR(REG_ESPACE); return; @@ -490,26 +520,26 @@ struct nfa *nfa; struct arc *b; struct state *pre = nfa->pre; struct state *s; - struct state *s2; - struct state *slist; - - /* no loops are needed if it's anchored */ - for (a = pre->outs; a != NULL; a = a->outchain) { + struct state *s2; + struct state *slist; + + /* no loops are needed if it's anchored */ + for (a = pre->outs; a != NULL; a = a->outchain) { assert(a->type == PLAIN); if (a->co != nfa->bos[0] && a->co != nfa->bos[1]) break; - } - if (a != NULL) { + } + if (a != NULL) { /* add implicit .* in front */ rainbow(nfa, v->cm, PLAIN, COLORLESS, pre, pre); /* and ^* and \Z* too -- not always necessary, but harmless */ newarc(nfa, PLAIN, nfa->bos[0], pre, pre); newarc(nfa, PLAIN, nfa->bos[1], pre, pre); - } - - /* - * Now here's the subtle part. Because many REs have no lookback + } + + /* + * Now here's the subtle part. Because many REs have no lookback * constraints, often knowing when you were in the pre state tells * you little; it's the next state(s) that are informative. But * some of them may have other inarcs, i.e. it may be possible to diff --git a/generic/regcustom.h b/generic/regcustom.h index b1d53a9..8770ac7 100644 --- a/generic/regcustom.h +++ b/generic/regcustom.h @@ -1,3 +1,31 @@ +/* + * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. + * + * Development of this software was funded, in part, by Cray Research Inc., + * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics + * Corporation, none of whom are responsible for the results. The author + * thanks all of them. + * + * Redistribution and use in source and binary forms -- with or without + * modification -- are permitted for any purpose, provided that + * redistributions in source form retain this entire copyright notice and + * indicate the origin and nature of any modifications. + * + * I'd appreciate being given credit for this package in the documentation + * of software which uses it, but that is not a requirement. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY + * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + /* headers (which also pick up the standard ones, or equivalents) */ #include "tclInt.h" diff --git a/generic/rege_dfa.c b/generic/rege_dfa.c index eb31ffc6..a13dfa4 100644 --- a/generic/rege_dfa.c +++ b/generic/rege_dfa.c @@ -1,18 +1,46 @@ /* * DFA routines * This file is #included by regexec.c. + * + * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. + * + * Development of this software was funded, in part, by Cray Research Inc., + * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics + * Corporation, none of whom are responsible for the results. The author + * thanks all of them. + * + * Redistribution and use in source and binary forms -- with or without + * modification -- are permitted for any purpose, provided that + * redistributions in source form retain this entire copyright notice and + * indicate the origin and nature of any modifications. + * + * I'd appreciate being given credit for this package in the documentation + * of software which uses it, but that is not a requirement. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY + * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * */ /* - longest - longest-preferred matching engine - ^ static chr *longest(struct vars *, struct dfa *, chr *, chr *); + ^ static chr *longest(struct vars *, struct dfa *, chr *, chr *, int *); */ static chr * /* endpoint, or NULL */ -longest(v, d, start, stop) +longest(v, d, start, stop, hitstopp) struct vars *v; /* used only for debug and exec flags */ struct dfa *d; chr *start; /* where the match should start */ chr *stop; /* match must end at or before here */ +int *hitstopp; /* record whether hit v->stop, if non-NULL */ { chr *cp; chr *realstop = (stop == v->stop) ? stop : stop + 1; @@ -26,6 +54,8 @@ chr *stop; /* match must end at or before here */ /* initialize */ css = initialize(v, d, start); cp = start; + if (hitstopp != NULL) + *hitstopp = 0; /* startup */ FDEBUG(("+++ startup +++\n")); @@ -74,6 +104,8 @@ chr *stop; /* match must end at or before here */ /* shutdown */ FDEBUG(("+++ shutdown at c%d +++\n", css - d->ssets)); if (cp == v->stop && stop == v->stop) { + if (hitstopp != NULL) + *hitstopp = 1; co = d->cnfa->eos[(v->eflags®_NOTEOL) ? 0 : 1]; FDEBUG(("color %ld\n", (long)co)); ss = miss(v, d, css, co, cp, start); @@ -86,7 +118,7 @@ chr *stop; /* match must end at or before here */ /* find last match, if any */ post = d->lastpost; - for (ss = d->ssets, i = 0; i < d->nssused; ss++, i++) + for (ss = d->ssets, i = d->nssused; i > 0; ss++, i--) if ((ss->flags&POSTSTATE) && post != ss->lastseen && (post == NULL || post < ss->lastseen)) post = ss->lastseen; @@ -99,16 +131,17 @@ chr *stop; /* match must end at or before here */ /* - shortest - shortest-preferred matching engine ^ static chr *shortest(struct vars *, struct dfa *, chr *, chr *, chr *, - ^ chr **); + ^ chr **, int *); */ static chr * /* endpoint, or NULL */ -shortest(v, d, start, min, max, coldp) -struct vars *v; /* used only for debug and exec flags */ +shortest(v, d, start, min, max, coldp, hitstopp) +struct vars *v; struct dfa *d; chr *start; /* where the match should start */ chr *min; /* match must end at or after here */ chr *max; /* match must end at or before here */ chr **coldp; /* store coldstart pointer here, if nonNULL */ +int *hitstopp; /* record whether hit v->stop, if non-NULL */ { chr *cp; chr *realmin = (min == v->stop) ? min : min + 1; @@ -117,12 +150,12 @@ chr **coldp; /* store coldstart pointer here, if nonNULL */ struct sset *css; struct sset *ss; struct colormap *cm = d->cm; - chr *nopr; - int i; /* initialize */ css = initialize(v, d, start); cp = start; + if (hitstopp != NULL) + *hitstopp = 0; /* startup */ FDEBUG(("--- startup ---\n")); @@ -175,7 +208,11 @@ chr **coldp; /* store coldstart pointer here, if nonNULL */ if (ss == NULL) return NULL; - else if (ss->flags&POSTSTATE) { + + if (coldp != NULL) /* report last no-progress state set, if any */ + *coldp = lastcold(v, d); + + if ((ss->flags&POSTSTATE) && cp > min) { assert(cp >= realmin); cp--; } else if (cp == v->stop && max == v->stop) { @@ -183,21 +220,36 @@ chr **coldp; /* store coldstart pointer here, if nonNULL */ FDEBUG(("color %ld\n", (long)co)); ss = miss(v, d, css, co, cp, start); /* match might have ended at eol */ + if ((ss == NULL || !(ss->flags&POSTSTATE)) && hitstopp != NULL) + *hitstopp = 1; } if (ss == NULL || !(ss->flags&POSTSTATE)) return NULL; - /* find last no-progress state set, if any */ + return cp; +} + +/* + - lastcold - determine last point at which no progress had been made + ^ static chr *lastcold(struct vars *, struct dfa *); + */ +static chr * /* endpoint, or NULL */ +lastcold(v, d) +struct vars *v; +struct dfa *d; +{ + struct sset *ss; + chr *nopr; + int i; + nopr = d->lastnopr; - for (ss = d->ssets, i = 0; i < d->nssused; ss++, i++) - if ((ss->flags&NOPROGRESS) && nopr != ss->lastseen && - (nopr == NULL || nopr < ss->lastseen)) + if (nopr == NULL) + nopr = v->start; + for (ss = d->ssets, i = d->nssused; i > 0; ss++, i--) + if ((ss->flags&NOPROGRESS) && nopr < ss->lastseen) nopr = ss->lastseen; - assert(nopr != NULL); - if (coldp != NULL) - *coldp = (nopr == v->start) ? nopr : nopr-1; - return cp; + return nopr; } /* @@ -433,11 +485,6 @@ chr *start; /* where the attempt got started */ /* next, is that in the cache? */ for (p = d->ssets, i = d->nssused; i > 0; p++, i--) if (HIT(h, d->work, p, d->wordsper)) { -#ifndef xxx -p->hash == h && -memcmp(VS(d->work), VS(p->states), - d->wordsper*sizeof(unsigned)) == 0) { -#endif FDEBUG(("cached c%d\n", p - d->ssets)); break; /* NOTE BREAK OUT */ } @@ -488,7 +535,7 @@ pcolor co; /* "color" of the lookahead constraint */ ERR(REG_ESPACE); return 0; } - end = longest(v, d, cp, v->stop); + end = longest(v, d, cp, v->stop, (int *)NULL); freedfa(d); FDEBUG(("=== lacon %d match %d\n", n, (end != NULL))); return (sub->subno) ? (end != NULL) : (end == NULL); diff --git a/generic/regerror.c b/generic/regerror.c index 6779e51..aca13aa 100644 --- a/generic/regerror.c +++ b/generic/regerror.c @@ -1,5 +1,32 @@ /* * regerror - error-code expansion + * + * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. + * + * Development of this software was funded, in part, by Cray Research Inc., + * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics + * Corporation, none of whom are responsible for the results. The author + * thanks all of them. + * + * Redistribution and use in source and binary forms -- with or without + * modification -- are permitted for any purpose, provided that + * redistributions in source form retain this entire copyright notice and + * indicate the origin and nature of any modifications. + * + * I'd appreciate being given credit for this package in the documentation + * of software which uses it, but that is not a requirement. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY + * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * */ #include "regguts.h" diff --git a/generic/regerrs.h b/generic/regerrs.h index 1b6552c..34c1e68 100644 --- a/generic/regerrs.h +++ b/generic/regerrs.h @@ -1,6 +1,6 @@ { REG_OKAY, "REG_OKAY", "no errors detected" }, { REG_NOMATCH, "REG_NOMATCH", "failed to match" }, -{ REG_BADPAT, "REG_BADPAT", "invalid regexp (reg version 0.2)" }, +{ REG_BADPAT, "REG_BADPAT", "invalid regexp (reg version 0.4)" }, { REG_ECOLLATE, "REG_ECOLLATE", "invalid collating element" }, { REG_ECTYPE, "REG_ECTYPE", "invalid character class" }, { REG_EESCAPE, "REG_EESCAPE", "invalid escape \\ sequence" }, diff --git a/generic/regex.h b/generic/regex.h index 2f3ebfa..26be33c 100644 --- a/generic/regex.h +++ b/generic/regex.h @@ -3,6 +3,34 @@ /* * regular expressions * + * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. + * + * Development of this software was funded, in part, by Cray Research Inc., + * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics + * Corporation, none of whom are responsible for the results. The author + * thanks all of them. + * + * Redistribution and use in source and binary forms -- with or without + * modification -- are permitted for any purpose, provided that + * redistributions in source form retain this entire copyright notice and + * indicate the origin and nature of any modifications. + * + * I'd appreciate being given credit for this package in the documentation + * of software which uses it, but that is not a requirement. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY + * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * + * * Prototypes etc. marked with "^" within comments get gathered up (and * possibly edited) by the regfwd program and inserted near the bottom of * this file. @@ -162,9 +190,9 @@ typedef struct { regoff_t rm_eo; /* end of substring */ } regmatch_t; -/* supplementary control and reporting (placeholder for later work) */ +/* supplementary control and reporting */ typedef struct { - int rm_dummy; + regmatch_t rm_extend; /* see REG_EXPECT */ } rm_detail_t; @@ -194,6 +222,7 @@ typedef struct { #define REG_NLANCH 000200 /* ^ matches after \n, $ before */ #define REG_NEWLINE 000300 /* newlines are line terminators */ #define REG_PEND 000400 /* ugh -- backward-compatibility hack */ +#define REG_EXPECT 001000 /* report details on partial/limited matches */ #define REG_DUMP 004000 /* none of your business :-) */ #define REG_FAKEEC 010000 /* none of your business :-) */ #define REG_PROGRESS 020000 /* none of your business :-) */ diff --git a/generic/regexec.c b/generic/regexec.c index 088d12b..9424385 100644 --- a/generic/regexec.c +++ b/generic/regexec.c @@ -1,5 +1,32 @@ /* * re_*exec and friends - match REs + * + * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. + * + * Development of this software was funded, in part, by Cray Research Inc., + * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics + * Corporation, none of whom are responsible for the results. The author + * thanks all of them. + * + * Redistribution and use in source and binary forms -- with or without + * modification -- are permitted for any purpose, provided that + * redistributions in source form retain this entire copyright notice and + * indicate the origin and nature of any modifications. + * + * I'd appreciate being given credit for this package in the documentation + * of software which uses it, but that is not a requirement. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY + * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * */ #include "regguts.h" @@ -13,6 +40,7 @@ struct vars { int eflags; /* copies of arguments */ size_t nmatch; regmatch_t *pmatch; + rm_detail_t *details; chr *start; /* start of string */ chr *stop; /* just past end of string */ int err; /* error code if any (0 none) */ @@ -108,8 +136,9 @@ static int crevdissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *) static int cbrdissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *)); static int caltdissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *)); /* === rege_dfa.c === */ -static chr *longest _ANSI_ARGS_((struct vars *, struct dfa *, chr *, chr *)); -static chr *shortest _ANSI_ARGS_((struct vars *, struct dfa *, chr *, chr *, chr *, chr **)); +static chr *longest _ANSI_ARGS_((struct vars *, struct dfa *, chr *, chr *, int *)); +static chr *shortest _ANSI_ARGS_((struct vars *, struct dfa *, chr *, chr *, chr *, chr **, int *)); +static chr *lastcold _ANSI_ARGS_((struct vars *, struct dfa *)); static struct dfa *newdfa _ANSI_ARGS_((struct vars *, struct cnfa *, struct colormap *, struct smalldfa *)); static VOID freedfa _ANSI_ARGS_((struct dfa *)); static unsigned hash _ANSI_ARGS_((unsigned *, int)); @@ -133,7 +162,7 @@ exec(re, string, len, details, nmatch, pmatch, flags) regex_t *re; CONST chr *string; size_t len; -rm_detail_t *details; /* hook for future elaboration */ +rm_detail_t *details; size_t nmatch; regmatch_t pmatch[]; int flags; @@ -157,6 +186,8 @@ int flags; /* setup */ v->re = re; v->g = (struct guts *)re->re_guts; + if ((v->g->cflags®_EXPECT) && details == NULL) + return REG_INVARG; if (v->g->unmatchable) return REG_NOMATCH; complications = (v->g->info®_UBACKREF) ? 1 : 0; @@ -178,6 +209,7 @@ int flags; v->nmatch = v->g->nsub + 1; } else v->pmatch = pmatch; + v->details = details; v->start = (chr *)string; v->stop = (chr *)string + len; v->err = 0; @@ -234,55 +266,77 @@ struct colormap *cm; struct dfa *s = newdfa(v, &v->g->search, cm, &sa); chr *begin; chr *end; + chr *cold; chr *open; /* open and close of range of possible starts */ chr *close; + int hitend; - if (d == NULL) - return v->err; - if (s == NULL) { - freedfa(d); + if (ISERR()) { /* should be newdfa() failure */ + if (d != NULL) + freedfa(d); + if (s != NULL) + freedfa(s); return v->err; } - close = v->start; - do { - MDEBUG(("\nsearch at %ld\n", LOFF(close))); - close = shortest(v, s, close, close, v->stop, &open); - if (close == NULL) - break; /* NOTE BREAK */ - if (v->nmatch == 0) { - /* don't need exact location */ - freedfa(d); - freedfa(s); - return REG_OKAY; - } - MDEBUG(("between %ld and %ld\n", LOFF(open), LOFF(close))); - for (begin = open; begin <= close; begin++) { - MDEBUG(("\nfind trying at %ld\n", LOFF(begin))); - end = longest(v, d, begin, v->stop); - if (end != NULL) { - assert(v->nmatch > 0); - v->pmatch[0].rm_so = OFF(begin); - v->pmatch[0].rm_eo = OFF(end); - freedfa(d); - freedfa(s); - if (v->nmatch > 1) { - zapsubs(v->pmatch, v->nmatch); - return dissect(v, v->g->tree, begin, - end); - } - if (ISERR()) - return v->err; - return REG_OKAY; - } - } - } while (close < v->stop); + /* first, a shot with the search RE */ + MDEBUG(("\nsearch at %ld\n", LOFF(v->start))); + cold = NULL; + close = shortest(v, s, v->start, v->start, v->stop, &cold, (int *)NULL); + freedfa(s); + if (v->g->cflags®_EXPECT) { + assert(v->details != NULL); + if (cold != NULL) + v->details->rm_extend.rm_so = OFF(cold); + else + v->details->rm_extend.rm_so = OFF(v->stop); + v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */ + } + if (close == NULL) { /* not found */ + freedfa(d); + if (ISERR()) + return v->err; + return REG_NOMATCH; + } + if (v->nmatch == 0) { /* found, don't need exact location */ + freedfa(d); + return REG_OKAY; + } + /* find starting point */ + assert(cold != NULL); + open = cold; + cold = NULL; + MDEBUG(("between %ld and %ld\n", LOFF(open), LOFF(close))); + for (begin = open; begin <= close; begin++) { + MDEBUG(("\nfind trying at %ld\n", LOFF(begin))); + end = longest(v, d, begin, v->stop, &hitend); + if (hitend && cold == NULL) + cold = begin; + if (end != NULL) + break; /* NOTE BREAK OUT */ + } + assert(end != NULL); /* search RE succeeded so loop should */ freedfa(d); - freedfa(s); + + /* and pin down details */ + assert(v->nmatch > 0); + v->pmatch[0].rm_so = OFF(begin); + v->pmatch[0].rm_eo = OFF(end); + if (v->g->cflags®_EXPECT) { + if (cold != NULL) + v->details->rm_extend.rm_so = OFF(cold); + else + v->details->rm_extend.rm_eo = OFF(v->stop); + v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */ + } + if (v->nmatch > 1) { + zapsubs(v->pmatch, v->nmatch); + return dissect(v, v->g->tree, begin, end); + } if (ISERR()) return v->err; - return REG_NOMATCH; + return REG_OKAY; } /* @@ -301,12 +355,15 @@ struct colormap *cm; struct dfa *s = newdfa(v, &v->g->search, cm, &sa); chr *begin; chr *end; + chr *cold; chr *open; /* open and close of range of possible starts */ chr *close; chr *estart; chr *estop; int er; int shorter = v->g->tree->flags&SHORTER; + int ret = REG_NOMATCH; + int hitend; if (d == NULL) return v->err; @@ -315,12 +372,16 @@ struct colormap *cm; return v->err; } + cold = NULL; close = v->start; do { MDEBUG(("\ncsearch at %ld\n", LOFF(close))); - close = shortest(v, s, close, close, v->stop, &open); + close = shortest(v, s, close, close, v->stop, &cold, (int *)NULL); if (close == NULL) break; /* NOTE BREAK */ + assert(cold != NULL); + open = cold; + cold = NULL; MDEBUG(("cbetween %ld and %ld\n", LOFF(open), LOFF(close))); for (begin = open; begin <= close; begin++) { MDEBUG(("\ncfind trying at %ld\n", LOFF(begin))); @@ -329,47 +390,39 @@ struct colormap *cm; for (;;) { if (shorter) end = shortest(v, d, begin, estart, - estop, (chr **)NULL); + estop, (chr **)NULL, &hitend); else - end = longest(v, d, begin, estop); + end = longest(v, d, begin, estop, + &hitend); + if (hitend && cold == NULL) + cold = begin; if (end == NULL) break; /* NOTE BREAK OUT */ MDEBUG(("tentative end %ld\n", LOFF(end))); zapsubs(v->pmatch, v->nmatch); zapmem(v, v->g->tree); er = cdissect(v, v->g->tree, begin, end); - switch (er) { - case REG_OKAY: + if (er == REG_OKAY) { if (v->nmatch > 0) { v->pmatch[0].rm_so = OFF(begin); v->pmatch[0].rm_eo = OFF(end); } - freedfa(d); - freedfa(s); - if (ISERR()) - return v->err; - return REG_OKAY; - break; - case REG_NOMATCH: - /* go around and try again */ - if ((shorter) ? end == estop : - end == begin) { - /* no point in trying again */ - freedfa(s); - freedfa(d); - return REG_NOMATCH; - } - if (shorter) - estart = end + 1; - else - estop = end - 1; - break; - default: - freedfa(d); - freedfa(s); - return er; - break; + ret = REG_OKAY; + break; /* NOTE BREAK OUT */ + } + if (er != REG_NOMATCH) { + ERR(er); + break; /* NOTE BREAK OUT */ } + if ((shorter) ? end == estop : end == begin) { + /* no point in trying again */ + break; /* NOTE BREAK OUT */ + } + /* go around and try again */ + if (shorter) + estart = end + 1; + else + estop = end - 1; } } } while (close < v->stop); @@ -378,7 +431,15 @@ struct colormap *cm; freedfa(s); if (ISERR()) return v->err; - return REG_NOMATCH; + if (v->g->cflags®_EXPECT) { + assert(v->details != NULL); + if (cold != NULL) + v->details->rm_extend.rm_so = OFF(cold); + else + v->details->rm_extend.rm_so = OFF(v->stop); + v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */ + } + return ret; } /* @@ -520,7 +581,7 @@ chr *end; /* end of same */ } /* pick a tentative midpoint */ - mid = longest(v, d, begin, end); + mid = longest(v, d, begin, end, (int *)NULL); if (mid == NULL) { freedfa(d); freedfa(d2); @@ -529,7 +590,7 @@ chr *end; /* end of same */ MDEBUG(("tentative midpoint %ld\n", LOFF(mid))); /* iterate until satisfaction or failure */ - while (longest(v, d2, mid, end) != end) { + while (longest(v, d2, mid, end, (int *)NULL) != end) { /* that midpoint didn't work, find a new one */ if (mid == begin) { /* all possibilities exhausted! */ @@ -538,7 +599,7 @@ chr *end; /* end of same */ freedfa(d2); return REG_ASSERT; } - mid = longest(v, d, begin, mid-1); + mid = longest(v, d, begin, mid-1, (int *)NULL); if (mid == NULL) { /* failed to find a new one! */ MDEBUG(("failed midpoint!\n")); @@ -583,7 +644,7 @@ chr *end; /* end of same */ d = newdfa(v, &t->left->cnfa, &v->g->cmap, &da); if (ISERR()) return v->err; - if (longest(v, d, begin, end) == end) { + if (longest(v, d, begin, end, (int *)NULL) == end) { MDEBUG(("success\n")); freedfa(d); return dissect(v, t->left, begin, end); @@ -681,7 +742,7 @@ chr *end; /* end of same */ /* pick a tentative midpoint */ if (v->mem[t->retry] == 0) { - mid = longest(v, d, begin, end); + mid = longest(v, d, begin, end, (int *)NULL); if (mid == NULL) { freedfa(d); freedfa(d2); @@ -698,7 +759,8 @@ chr *end; /* end of same */ for (;;) { /* try this midpoint on for size */ er = cdissect(v, t->left, begin, mid); - if (er == REG_OKAY && longest(v, d2, mid, end) == end && + if (er == REG_OKAY && + longest(v, d2, mid, end, (int *)NULL) == end && (er = cdissect(v, t->right, mid, end)) == REG_OKAY) break; /* NOTE BREAK OUT */ @@ -716,7 +778,7 @@ chr *end; /* end of same */ freedfa(d2); return REG_NOMATCH; } - mid = longest(v, d, begin, mid-1); + mid = longest(v, d, begin, mid-1, (int *)NULL); if (mid == NULL) { /* failed to find a new one */ MDEBUG(("%d failed midpoint\n", t->retry)); @@ -775,7 +837,7 @@ chr *end; /* end of same */ /* pick a tentative midpoint */ if (v->mem[t->retry] == 0) { - mid = shortest(v, d, begin, begin, end, (chr **)NULL); + mid = shortest(v, d, begin, begin, end, (chr **)NULL, (int *)NULL); if (mid == NULL) { freedfa(d); freedfa(d2); @@ -792,7 +854,8 @@ chr *end; /* end of same */ for (;;) { /* try this midpoint on for size */ er = cdissect(v, t->left, begin, mid); - if (er == REG_OKAY && longest(v, d2, mid, end) == end && + if (er == REG_OKAY && + longest(v, d2, mid, end, (int *)NULL) == end && (er = cdissect(v, t->right, mid, end)) == REG_OKAY) break; /* NOTE BREAK OUT */ @@ -810,7 +873,7 @@ chr *end; /* end of same */ freedfa(d2); return REG_NOMATCH; } - mid = shortest(v, d, begin, mid+1, end, (chr **)NULL); + mid = shortest(v, d, begin, mid+1, end, (chr **)NULL, (int *)NULL); if (mid == NULL) { /* failed to find a new one */ MDEBUG(("%d failed midpoint\n", t->retry)); @@ -929,7 +992,7 @@ chr *end; /* end of same */ d = newdfa(v, &t->left->cnfa, &v->g->cmap, &da); if (ISERR()) return v->err; - if (longest(v, d, begin, end) != end) { + if (longest(v, d, begin, end, (int *)NULL) != end) { freedfa(d); v->mem[t->retry] = TRIED; return caltdissect(v, t->right, begin, end); diff --git a/generic/regfree.c b/generic/regfree.c index a5c3f0b..17a7389 100644 --- a/generic/regfree.c +++ b/generic/regfree.c @@ -1,6 +1,34 @@ /* * regfree - free an RE * + * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. + * + * Development of this software was funded, in part, by Cray Research Inc., + * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics + * Corporation, none of whom are responsible for the results. The author + * thanks all of them. + * + * Redistribution and use in source and binary forms -- with or without + * modification -- are permitted for any purpose, provided that + * redistributions in source form retain this entire copyright notice and + * indicate the origin and nature of any modifications. + * + * I'd appreciate being given credit for this package in the documentation + * of software which uses it, but that is not a requirement. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY + * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * + * * You might think that this could be incorporated into regcomp.c, and * that would be a reasonable idea... except that this is a generic * function (with a generic name), applicable to all compiled REs diff --git a/generic/regfronts.c b/generic/regfronts.c index a9bd556..82f48e2 100644 --- a/generic/regfronts.c +++ b/generic/regfronts.c @@ -3,6 +3,33 @@ * * Mostly for implementation of backward-compatibility kludges. Note * that these routines exist ONLY in char versions. + * + * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. + * + * Development of this software was funded, in part, by Cray Research Inc., + * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics + * Corporation, none of whom are responsible for the results. The author + * thanks all of them. + * + * Redistribution and use in source and binary forms -- with or without + * modification -- are permitted for any purpose, provided that + * redistributions in source form retain this entire copyright notice and + * indicate the origin and nature of any modifications. + * + * I'd appreciate being given credit for this package in the documentation + * of software which uses it, but that is not a requirement. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY + * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * */ #include "regguts.h" diff --git a/generic/regguts.h b/generic/regguts.h index badd8d4..29b1721 100644 --- a/generic/regguts.h +++ b/generic/regguts.h @@ -1,5 +1,31 @@ /* * Internal interface definitions, etc., for the regex package + * + * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. + * + * Development of this software was funded, in part, by Cray Research Inc., + * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics + * Corporation, none of whom are responsible for the results. The author + * thanks all of them. + * + * Redistribution and use in source and binary forms -- with or without + * modification -- are permitted for any purpose, provided that + * redistributions in source form retain this entire copyright notice and + * indicate the origin and nature of any modifications. + * + * I'd appreciate being given credit for this package in the documentation + * of software which uses it, but that is not a requirement. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY + * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ @@ -28,11 +54,11 @@ /* assertions */ #ifndef assert -#ifndef REG_DEBUG -#define NDEBUG +# ifndef REG_DEBUG +# define NDEBUG /* no assertions */ +# endif #include <assert.h> #endif -#endif /* voids */ #ifndef VOID @@ -278,6 +304,7 @@ struct state { struct state *next; /* chain for traversing all */ struct state *prev; /* back chain */ struct arcbatch oas; /* first arcbatch, avoid malloc in easy case */ + int noas; /* number of arcs used in first arcbatch */ }; struct nfa { diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c index c41976a..fdc89b7 100644 --- a/generic/tclCmdMZ.c +++ b/generic/tclCmdMZ.c @@ -13,7 +13,7 @@ * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclCmdMZ.c,v 1.10 1999/05/25 01:00:25 stanton Exp $ + * RCS: @(#) $Id: tclCmdMZ.c,v 1.11 1999/06/02 01:53:31 stanton Exp $ */ #include "tclInt.h" @@ -107,9 +107,7 @@ Tcl_PwdObjCmd(dummy, interp, objc, objv) * Tcl_RegexpObjCmd -- * * This procedure is invoked to process the "regexp" Tcl command. - * See the user documentation for details on what it does. The - * REGEXP_TEST stuff is to minimize code differences between this - * and the "testregexp" command. + * See the user documentation for details on what it does. * * Results: * A standard Tcl result. diff --git a/generic/tclRegexp.c b/generic/tclRegexp.c index 22e1db0..2318780 100644 --- a/generic/tclRegexp.c +++ b/generic/tclRegexp.c @@ -10,7 +10,7 @@ * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclRegexp.c,v 1.5 1999/05/22 01:20:13 stanton Exp $ + * RCS: @(#) $Id: tclRegexp.c,v 1.6 1999/06/02 01:53:31 stanton Exp $ */ #include "tclInt.h" @@ -287,13 +287,14 @@ TclRegExpExecUniChar(interp, re, wString, numChars, nmatches, flags) { int status; TclRegexp *regexpPtr = (TclRegexp *) re; - size_t nm = regexpPtr->re.re_nsub + 1; + size_t last = regexpPtr->re.re_nsub + 1; + size_t nm = last; if (nmatches >= 0 && (size_t) nmatches < nm) nm = (size_t) nmatches; status = TclReExec(®expPtr->re, wString, (size_t) numChars, - (rm_detail_t *)NULL, nm, regexpPtr->matches, flags); + ®expPtr->details, nm, regexpPtr->matches, flags); /* * Check for errors. @@ -318,12 +319,13 @@ TclRegExpExecUniChar(interp, re, wString, numChars, nmatches, flags) * TclRegExpRangeUniChar -- * * Returns pointers describing the range of a regular expression match, - * or one of the subranges within the match. + * or one of the subranges within the match, or the hypothetical range + * represented by the rm_extend field of the rm_detail_t. * * Results: * The variables at *startPtr and *endPtr are modified to hold the - * addresses of the endpoints of the range given by index. If the - * specified range doesn't exist then NULLs are returned. + * offsets of the endpoints of the range given by index. If the + * specified range doesn't exist then -1s are supplied. * * Side effects: * None. @@ -337,7 +339,8 @@ TclRegExpRangeUniChar(re, index, startPtr, endPtr) * been passed to Tcl_RegExpExec. */ int index; /* 0 means give the range of the entire * match, > 0 means give the range of - * a matching subrange. */ + * a matching subrange, -1 means the + * range of the rm_extend field. */ int *startPtr; /* Store address of first character in * (sub-) range here. */ int *endPtr; /* Store address of character just after last @@ -345,7 +348,10 @@ TclRegExpRangeUniChar(re, index, startPtr, endPtr) { TclRegexp *regexpPtr = (TclRegexp *) re; - if ((size_t) index > regexpPtr->re.re_nsub) { + if ((regexpPtr->flags®_EXPECT) && index == -1) { + *startPtr = regexpPtr->details.rm_extend.rm_so; + *endPtr = regexpPtr->details.rm_extend.rm_eo; + } else if ((size_t) index > regexpPtr->re.re_nsub) { *startPtr = -1; *endPtr = -1; } else { diff --git a/generic/tclRegexp.h b/generic/tclRegexp.h index 2745900..7a6fb2a 100644 --- a/generic/tclRegexp.h +++ b/generic/tclRegexp.h @@ -4,36 +4,13 @@ * This file contains definitions used internally by Henry * Spencer's regular expression code. * - * Copyright (c) 1998 Henry Spencer. All rights reserved. - * - * Development of this software was funded, in part, by Cray Research Inc., - * UUNET Communications Services Inc., Sun Microsystems Inc., and - * Scriptics Corporation, none of whom are responsible for the results. - * The author thanks all of them. - * - * Redistribution and use in source and binary forms -- with or without - * modification -- are permitted for any purpose, provided that - * redistributions in source form retain this entire copyright notice and - * indicate the origin and nature of any modifications. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, - * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY - * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL - * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; - * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR - * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF - * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * * Copyright (c) 1998 by Sun Microsystems, Inc. * Copyright (c) 1998-1999 by Scriptics Corporation. * * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclRegexp.h,v 1.7 1999/05/14 00:20:02 stanton Exp $ + * RCS: @(#) $Id: tclRegexp.h,v 1.8 1999/06/02 01:53:31 stanton Exp $ */ #ifndef _TCLREGEXP @@ -64,6 +41,8 @@ typedef struct TclRegexp { * representation of the last string matched * with this regexp to indicate the location * of subexpressions. */ + rm_detail_t details; /* Detailed information on match (currently + * used only for REG_EXPECT). */ int refCount; /* Count of number of references to this * compiled regexp. */ } TclRegexp; diff --git a/generic/tclTest.c b/generic/tclTest.c index 6506285..efc2520 100644 --- a/generic/tclTest.c +++ b/generic/tclTest.c @@ -13,7 +13,7 @@ * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclTest.c,v 1.12 1999/04/21 21:50:28 rjohnson Exp $ + * RCS: @(#) $Id: tclTest.c,v 1.13 1999/06/02 01:53:31 stanton Exp $ */ #define TCL_TEST @@ -2508,8 +2508,8 @@ TestparsevarnameObjCmd(clientData, interp, objc, objv) * * This procedure implements the "testregexp" command. It is * used to give a direct interface for regexp flags. It's identical - * to Tcl_RegexpObjCmd except for the REGEXP_TEST define, which - * enables the -xflags option. + * to Tcl_RegexpObjCmd except for the -xflags option, and the + * consequences thereof (including the REG_EXPECT kludge). * * Results: * A standard Tcl result. @@ -2528,33 +2528,24 @@ TestregexpObjCmd(dummy, interp, objc, objv) int objc; /* Number of arguments. */ Tcl_Obj *CONST objv[]; /* Argument objects. */ { - int i, result, indices, stringLength, wLen, match, about; + int i, ii, result, indices, stringLength, wLen, match, about; int hasxflags, cflags, eflags; Tcl_RegExp regExpr; char *string; Tcl_DString stringBuffer, valueBuffer; Tcl_UniChar *wStart; -# define REGEXP_TEST /* yes */ static char *options[] = { "-indices", "-nocase", "-about", "-expanded", "-line", "-linestop", "-lineanchor", -#ifdef REGEXP_TEST "-xflags", -#endif "--", (char *) NULL }; enum options { REGEXP_INDICES, REGEXP_NOCASE, REGEXP_ABOUT, REGEXP_EXPANDED, REGEXP_MULTI, REGEXP_NOCROSS, REGEXP_NEWL, -#ifdef REGEXP_TEST REGEXP_XFLAGS, -#endif REGEXP_LAST }; -#ifndef REGEXP_TEST -# define REGEXP_XFLAGS -1 /* impossible value */ -# define TestregexpXflags(a,b,c,d) /* do nothing */ -#endif indices = 0; about = 0; @@ -2662,6 +2653,21 @@ TestregexpObjCmd(dummy, interp, objc, objv) */ Tcl_SetIntObj(Tcl_GetObjResult(interp), 0); + if (objc > 2 && (cflags®_EXPECT) && indices) { + char *varName, *value; + int start, end; + char info[TCL_INTEGER_SPACE * 2]; + + varName = Tcl_GetString(objv[2]); + TclRegExpRangeUniChar(regExpr, -1, &start, &end); + sprintf(info, "%d %d", start, end-1); + value = Tcl_SetVar(interp, varName, info, 0); + if (value == NULL) { + Tcl_AppendResult(interp, "couldn't set variable \"", + varName, "\"", (char *) NULL); + result = TCL_ERROR; + } + } goto done; } @@ -2679,7 +2685,8 @@ TestregexpObjCmd(dummy, interp, objc, objv) varName = Tcl_GetString(objv[i]); - TclRegExpRangeUniChar(regExpr, i, &start, &end); + ii = ((cflags®_EXPECT) && i == objc-1) ? -1 : i; + TclRegExpRangeUniChar(regExpr, ii, &start, &end); if (start < 0) { if (indices) { value = Tcl_SetVar(interp, varName, "-1 -1", 0); @@ -2689,7 +2696,7 @@ TestregexpObjCmd(dummy, interp, objc, objv) } else { if (indices) { char info[TCL_INTEGER_SPACE * 2]; - + sprintf(info, "%d %d", start, end - 1); value = Tcl_SetVar(interp, varName, info, 0); } else { @@ -2736,7 +2743,7 @@ TestregexpObjCmd(dummy, interp, objc, objv) *---------------------------------------------------------------------- */ -static void +static VOID TestregexpXflags(string, length, cflagsPtr, eflagsPtr) char *string; /* The string of flags. */ int length; /* The length of the string in bytes. */ @@ -2801,6 +2808,10 @@ TestregexpXflags(string, length, cflagsPtr, eflagsPtr) eflags |= REG_NOTEOL; break; } + case '?': { + cflags |= REG_EXPECT; + break; + } case '%': { eflags |= REG_SMALL; break; |