summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorstanton <stanton>1999-06-02 01:53:29 (GMT)
committerstanton <stanton>1999-06-02 01:53:29 (GMT)
commit21c3baed52eba908b3e56f33f9d0f9541495616e (patch)
tree02591dac73223fda831014e215800dee4b75d811
parenta045db4812c23138a2f998a7f827f46a30558a79 (diff)
downloadtcl-21c3baed52eba908b3e56f33f9d0f9541495616e.zip
tcl-21c3baed52eba908b3e56f33f9d0f9541495616e.tar.gz
tcl-21c3baed52eba908b3e56f33f9d0f9541495616e.tar.bz2
* tests/reg.test:
* generic/regc_color.c: * generic/regc_cvec.c: * generic/regc_lex.c: * generic/regc_locale.c: * generic/regc_nfa.c: * generic/regcomp.c: * generic/regcustom.h: * generic/rege_dfa.c: * generic/regerror.c: * generic/regerrs.h: * generic/regex.h: * generic/regexec.c: * generic/regfree.c: * generic/regfronts.c: * generic/regguts.h: * generic/tclCmdMZ.c: * generic/tclRegexp.c: * generic/tclRegexp.h: * generic/tclTest.c: Applied Henry Spencer's latest regexp patches that fix an infinite loop bug and add support for testing whether a string could match with additional input. [Bug: 2117]
-rw-r--r--generic/regc_color.c52
-rw-r--r--generic/regc_cvec.c33
-rw-r--r--generic/regc_lex.c44
-rw-r--r--generic/regc_locale.c2
-rw-r--r--generic/regc_nfa.c81
-rw-r--r--generic/regcomp.c60
-rw-r--r--generic/regcustom.h28
-rw-r--r--generic/rege_dfa.c93
-rw-r--r--generic/regerror.c27
-rw-r--r--generic/regerrs.h2
-rw-r--r--generic/regex.h33
-rw-r--r--generic/regexec.c231
-rw-r--r--generic/regfree.c28
-rw-r--r--generic/regfronts.c27
-rw-r--r--generic/regguts.h33
-rw-r--r--generic/tclCmdMZ.c6
-rw-r--r--generic/tclRegexp.c22
-rw-r--r--generic/tclRegexp.h27
-rw-r--r--generic/tclTest.c43
-rw-r--r--tests/reg.test61
20 files changed, 716 insertions, 217 deletions
diff --git a/generic/regc_color.c b/generic/regc_color.c
index e86fea0..503bca2 100644
--- a/generic/regc_color.c
+++ b/generic/regc_color.c
@@ -2,6 +2,34 @@
* colorings of characters
* This file is #included by regcomp.c.
*
+ * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
+ *
+ * Development of this software was funded, in part, by Cray Research Inc.,
+ * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
+ * Corporation, none of whom are responsible for the results. The author
+ * thanks all of them.
+ *
+ * Redistribution and use in source and binary forms -- with or without
+ * modification -- are permitted for any purpose, provided that
+ * redistributions in source form retain this entire copyright notice and
+ * indicate the origin and nature of any modifications.
+ *
+ * I'd appreciate being given credit for this package in the documentation
+ * of software which uses it, but that is not a requirement.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *
+ *
* Note that there are some incestuous relationships between this code and
* NFA arc maintenance, which perhaps ought to be cleaned up sometime.
*/
@@ -271,7 +299,7 @@ pcolor co;
if ((size_t)co == cm->max) {
while (cm->max > WHITE && UNUSEDCOLOR(&cm->cd[cm->max]))
cm->max--;
- assert(cm->max >= 0);
+ assert(cm->free >= 0);
while ((size_t)cm->free > cm->max)
cm->free = cm->cd[cm->free].sub;
if (cm->free > 0) {
@@ -327,9 +355,10 @@ pchr c;
co = GETCOLOR(cm, c);
sco = newsub(cm, co);
- if (sco == COLORLESS) {
+ if (CISERR())
return COLORLESS;
- }
+ assert(sco != COLORLESS);
+
if (co == sco) /* already in an open subcolor */
return co; /* rest is redundant */
cm->cd[co].nchrs--;
@@ -354,8 +383,10 @@ pcolor co;
if (cm->cd[co].nchrs == 1) /* optimization */
return co;
sco = newcolor(cm); /* must create subcolor */
- if (sco == COLORLESS)
+ if (sco == COLORLESS) {
+ assert(CISERR());
return COLORLESS;
+ }
cm->cd[co].sub = sco;
cm->cd[sco].sub = sco; /* open subcolor points to self */
}
@@ -583,11 +614,12 @@ struct arc *a;
a->colorchain = NULL; /* paranoia */
}
+#ifdef NOTDEF /* This isn't used currently. */
+
/*
- singleton - is this character in its own color?
^ static int singleton(struct colormap *, pchr c);
*/
-#if 0
static int /* predicate */
singleton(cm, c)
struct colormap *cm;
@@ -600,7 +632,9 @@ pchr c;
return 1;
return 0;
}
-#endif
+
+#endif /* NOTDEF */
+
/*
- rainbow - add arcs of all full colors (but one) between specified states
^ static VOID rainbow(struct nfa *, struct colormap *, int, pcolor,
@@ -654,6 +688,9 @@ struct state *to;
#ifdef REG_DEBUG
+/*
+ ^ #ifdef REG_DEBUG
+ */
/*
- dumpcolors - debugging output
@@ -739,4 +776,7 @@ FILE *f;
fprintf(f, "\\u%04lx", (long)c);
}
+/*
+ ^ #endif
+ */
#endif /* ifdef REG_DEBUG */
diff --git a/generic/regc_cvec.c b/generic/regc_cvec.c
index c79e741..73ccc56 100644
--- a/generic/regc_cvec.c
+++ b/generic/regc_cvec.c
@@ -1,6 +1,33 @@
/*
* Utility functions for handling cvecs
* This file is #included by regcomp.c.
+ *
+ * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
+ *
+ * Development of this software was funded, in part, by Cray Research Inc.,
+ * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
+ * Corporation, none of whom are responsible for the results. The author
+ * thanks all of them.
+ *
+ * Redistribution and use in source and binary forms -- with or without
+ * modification -- are permitted for any purpose, provided that
+ * redistributions in source form retain this entire copyright notice and
+ * indicate the origin and nature of any modifications.
+ *
+ * I'd appreciate being given credit for this package in the documentation
+ * of software which uses it, but that is not a requirement.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
*/
/*
@@ -83,7 +110,8 @@ pchr to;
cv->nranges++;
}
-#ifdef USE_MCCE
+#ifdef NOTDEF /* This isn't used currently. */
+
/*
- addmcce - add an MCCE to a cvec
^ static VOID addmcce(struct cvec *, chr *, chr *);
@@ -110,7 +138,8 @@ chr *endp; /* just past end of text */
assert(d == &cv->chrs[cv->chrspace - cv->nmccechrs]);
cv->nmccechrs += n + 1;
}
-#endif
+
+#endif /* NOTDEF */
/*
- haschr - does a cvec contain this chr?
diff --git a/generic/regc_lex.c b/generic/regc_lex.c
index 5b93e0b..d07fe72 100644
--- a/generic/regc_lex.c
+++ b/generic/regc_lex.c
@@ -1,6 +1,33 @@
/*
* lexical analyzer
* This file is #included by regcomp.c.
+ *
+ * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
+ *
+ * Development of this software was funded, in part, by Cray Research Inc.,
+ * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
+ * Corporation, none of whom are responsible for the results. The author
+ * thanks all of them.
+ *
+ * Redistribution and use in source and binary forms -- with or without
+ * modification -- are permitted for any purpose, provided that
+ * redistributions in source form retain this entire copyright notice and
+ * indicate the origin and nature of any modifications.
+ *
+ * I'd appreciate being given credit for this package in the documentation
+ * of software which uses it, but that is not a requirement.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
*/
/* scanning macros (know about v) */
@@ -977,6 +1004,23 @@ newline()
}
/*
+ - ch - return the chr sequence for locale.c's fake collating element ch
+ * This helps confine use of CHR to this source file.
+ ^ #ifdef REG_DEBUG
+ ^ static chr *ch(NOPARMS);
+ ^ #endif
+ */
+#ifdef REG_DEBUG
+static chr *
+ch()
+{
+ static chr chstr[] = { CHR('c'), CHR('h'), CHR('\0') };
+
+ return chstr;
+}
+#endif
+
+/*
- chrnamed - return the chr known by a given (chr string) name
* The code is a bit clumsy, but this routine gets only such specialized
* use that it hardly matters.
diff --git a/generic/regc_locale.c b/generic/regc_locale.c
index 82e83e2..98ea392 100644
--- a/generic/regc_locale.c
+++ b/generic/regc_locale.c
@@ -9,7 +9,7 @@
* See the file "license.terms" for information on usage and redistribution
* of this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: regc_locale.c,v 1.2 1999/04/16 00:46:37 stanton Exp $
+ * RCS: @(#) $Id: regc_locale.c,v 1.3 1999/06/02 01:53:29 stanton Exp $
*/
/* ASCII character-name table */
diff --git a/generic/regc_nfa.c b/generic/regc_nfa.c
index 80e9ac7..897b3eb 100644
--- a/generic/regc_nfa.c
+++ b/generic/regc_nfa.c
@@ -2,6 +2,34 @@
* NFA utilities.
* This file is #included by regcomp.c.
*
+ * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
+ *
+ * Development of this software was funded, in part, by Cray Research Inc.,
+ * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
+ * Corporation, none of whom are responsible for the results. The author
+ * thanks all of them.
+ *
+ * Redistribution and use in source and binary forms -- with or without
+ * modification -- are permitted for any purpose, provided that
+ * redistributions in source form retain this entire copyright notice and
+ * indicate the origin and nature of any modifications.
+ *
+ * I'd appreciate being given credit for this package in the documentation
+ * of software which uses it, but that is not a requirement.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *
+ *
* One or two things that technically ought to be in here
* are actually in color.c, thanks to some incestuous relationships in
* the color chains.
@@ -86,16 +114,14 @@ struct nfa *nfa;
}
/*
- - newfstate - allocate an NFA state, with specified flag value
- ^ static struct state *newfstate(struct nfa *, int flag);
+ - newstate - allocate an NFA state, with zero flag value
+ ^ static struct state *newstate(struct nfa *);
*/
static struct state * /* NULL on error */
-newfstate(nfa, flag)
+newstate(nfa)
struct nfa *nfa;
-int flag;
{
struct state *s;
- int i;
if (nfa->free != NULL) {
s = nfa->free;
@@ -107,17 +133,13 @@ int flag;
return NULL;
}
s->oas.next = NULL;
- s->free = &s->oas.a[0];
- for (i = 0; i < ABSIZE; i++) {
- s->oas.a[i].type = 0;
- s->oas.a[i].freechain = &s->oas.a[i+1];
- }
- s->oas.a[ABSIZE-1].freechain = NULL;
+ s->free = NULL;
+ s->noas = 0;
}
assert(nfa->nstates >= 0);
s->no = nfa->nstates++;
- s->flag = (char)flag;
+ s->flag = 0;
if (nfa->states == NULL)
nfa->states = s;
s->nins = 0;
@@ -136,14 +158,20 @@ int flag;
}
/*
- - newstate - allocate an ordinary NFA state
- ^ static struct state *newstate(struct nfa *);
+ - newfstate - allocate an NFA state with a specified flag value
+ ^ static struct state *newfstate(struct nfa *, int flag);
*/
static struct state * /* NULL on error */
-newstate(nfa)
+newfstate(nfa, flag)
struct nfa *nfa;
+int flag;
{
- return newfstate(nfa, 0);
+ struct state *s;
+
+ s = newstate(nfa);
+ if (s != NULL)
+ s->flag = (char)flag;
+ return s;
}
/*
@@ -237,7 +265,7 @@ struct state *to;
/* check for duplicates */
for (a = from->outs; a != NULL; a = a->outchain)
- if (a->type == t && a->co == co && a->to == to)
+ if (a->to == to && a->co == co && a->type == t)
return;
a = allocarc(nfa, from);
@@ -283,6 +311,13 @@ struct state *s;
struct arcbatch *new;
int i;
+ /* shortcut */
+ if (s->free == NULL && s->noas < ABSIZE) {
+ a = &s->oas.a[s->noas];
+ s->noas++;
+ return a;
+ }
+
/* if none at hand, get more */
if (s->free == NULL) {
new = (struct arcbatch *)MALLOC(sizeof(struct arcbatch));
@@ -1329,6 +1364,9 @@ FILE *f;
}
#ifdef REG_DEBUG /* subordinates of dumpnfa */
+/*
+ ^ #ifdef REG_DEBUG
+ */
/*
- dumpstate - dump an NFA state in human-readable form
@@ -1458,6 +1496,9 @@ FILE *f;
fprintf(f, "?!?"); /* missing from in-chain */
}
+/*
+ ^ #endif
+ */
#endif /* ifdef REG_DEBUG */
/*
@@ -1491,6 +1532,9 @@ FILE *f;
}
#ifdef REG_DEBUG /* subordinates of dumpcnfa */
+/*
+ ^ #ifdef REG_DEBUG
+ */
/*
- dumpcstate - dump a compacted-NFA state in human-readable form
@@ -1525,4 +1569,7 @@ FILE *f;
fflush(f);
}
+/*
+ ^ #endif
+ */
#endif /* ifdef REG_DEBUG */
diff --git a/generic/regcomp.c b/generic/regcomp.c
index 8e1b61c..26b937e 100644
--- a/generic/regcomp.c
+++ b/generic/regcomp.c
@@ -1,6 +1,33 @@
/*
* re_*comp and friends - compile REs
* This file #includes several others (see the bottom).
+ *
+ * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
+ *
+ * Development of this software was funded, in part, by Cray Research Inc.,
+ * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
+ * Corporation, none of whom are responsible for the results. The author
+ * thanks all of them.
+ *
+ * Redistribution and use in source and binary forms -- with or without
+ * modification -- are permitted for any purpose, provided that
+ * redistributions in source form retain this entire copyright notice and
+ * indicate the origin and nature of any modifications.
+ *
+ * I'd appreciate being given credit for this package in the documentation
+ * of software which uses it, but that is not a requirement.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
*/
#include "regguts.h"
@@ -57,6 +84,9 @@ static chr lexdigits _ANSI_ARGS_((struct vars *, int, int, int));
static int brenext _ANSI_ARGS_((struct vars *, pchr));
static VOID skip _ANSI_ARGS_((struct vars *));
static chr newline _ANSI_ARGS_((NOPARMS));
+#ifdef REG_DEBUG
+static chr *ch _ANSI_ARGS_((NOPARMS));
+#endif
static chr chrnamed _ANSI_ARGS_((struct vars *, chr *, chr *, pchr));
/* === regc_color.c === */
static VOID initcm _ANSI_ARGS_((struct vars *, struct colormap *));
@@ -74,7 +104,7 @@ static VOID subblock _ANSI_ARGS_((struct vars *, pchr, struct state *, struct st
static VOID okcolors _ANSI_ARGS_((struct nfa *, struct colormap *));
static VOID colorchain _ANSI_ARGS_((struct colormap *, struct arc *));
static VOID uncolorchain _ANSI_ARGS_((struct colormap *, struct arc *));
-#if 0
+#ifdef NOTDEF /* Avoid compiler warnings. */
static int singleton _ANSI_ARGS_((struct colormap *, pchr c));
#endif
static VOID rainbow _ANSI_ARGS_((struct nfa *, struct colormap *, int, pcolor, struct state *, struct state *));
@@ -87,8 +117,8 @@ static VOID dumpchr _ANSI_ARGS_((pchr, FILE *));
/* === regc_nfa.c === */
static struct nfa *newnfa _ANSI_ARGS_((struct vars *, struct colormap *, struct nfa *));
static VOID freenfa _ANSI_ARGS_((struct nfa *));
-static struct state *newfstate _ANSI_ARGS_((struct nfa *, int flag));
static struct state *newstate _ANSI_ARGS_((struct nfa *));
+static struct state *newfstate _ANSI_ARGS_((struct nfa *, int flag));
static VOID dropstate _ANSI_ARGS_((struct nfa *, struct state *));
static VOID freestate _ANSI_ARGS_((struct nfa *, struct state *));
static VOID destroystate _ANSI_ARGS_((struct nfa *, struct state *));
@@ -142,7 +172,7 @@ static struct cvec *newcvec _ANSI_ARGS_((int, int, int));
static struct cvec *clearcvec _ANSI_ARGS_((struct cvec *));
static VOID addchr _ANSI_ARGS_((struct cvec *, pchr));
static VOID addrange _ANSI_ARGS_((struct cvec *, pchr, pchr));
-#ifdef USE_MCCE
+#ifdef NOTDEF /* Avoid compiler warnings. */
static VOID addmcce _ANSI_ARGS_((struct cvec *, chr *, chr *));
#endif
static int haschr _ANSI_ARGS_((struct cvec *, pchr));
@@ -430,7 +460,7 @@ int wanted; /* want enough room for this one */
memcpy(VS(p), VS(v->subs),
v->nsubs * sizeof(struct subre *));
} else
- p = (struct subre**)REALLOC(v->subs, n*sizeof(struct subre *));
+ p = (struct subre **)REALLOC(v->subs, n*sizeof(struct subre *));
if (p == NULL) {
ERR(REG_ESPACE);
return;
@@ -490,26 +520,26 @@ struct nfa *nfa;
struct arc *b;
struct state *pre = nfa->pre;
struct state *s;
- struct state *s2;
- struct state *slist;
-
- /* no loops are needed if it's anchored */
- for (a = pre->outs; a != NULL; a = a->outchain) {
+ struct state *s2;
+ struct state *slist;
+
+ /* no loops are needed if it's anchored */
+ for (a = pre->outs; a != NULL; a = a->outchain) {
assert(a->type == PLAIN);
if (a->co != nfa->bos[0] && a->co != nfa->bos[1])
break;
- }
- if (a != NULL) {
+ }
+ if (a != NULL) {
/* add implicit .* in front */
rainbow(nfa, v->cm, PLAIN, COLORLESS, pre, pre);
/* and ^* and \Z* too -- not always necessary, but harmless */
newarc(nfa, PLAIN, nfa->bos[0], pre, pre);
newarc(nfa, PLAIN, nfa->bos[1], pre, pre);
- }
-
- /*
- * Now here's the subtle part. Because many REs have no lookback
+ }
+
+ /*
+ * Now here's the subtle part. Because many REs have no lookback
* constraints, often knowing when you were in the pre state tells
* you little; it's the next state(s) that are informative. But
* some of them may have other inarcs, i.e. it may be possible to
diff --git a/generic/regcustom.h b/generic/regcustom.h
index b1d53a9..8770ac7 100644
--- a/generic/regcustom.h
+++ b/generic/regcustom.h
@@ -1,3 +1,31 @@
+/*
+ * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
+ *
+ * Development of this software was funded, in part, by Cray Research Inc.,
+ * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
+ * Corporation, none of whom are responsible for the results. The author
+ * thanks all of them.
+ *
+ * Redistribution and use in source and binary forms -- with or without
+ * modification -- are permitted for any purpose, provided that
+ * redistributions in source form retain this entire copyright notice and
+ * indicate the origin and nature of any modifications.
+ *
+ * I'd appreciate being given credit for this package in the documentation
+ * of software which uses it, but that is not a requirement.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
/* headers (which also pick up the standard ones, or equivalents) */
#include "tclInt.h"
diff --git a/generic/rege_dfa.c b/generic/rege_dfa.c
index eb31ffc6..a13dfa4 100644
--- a/generic/rege_dfa.c
+++ b/generic/rege_dfa.c
@@ -1,18 +1,46 @@
/*
* DFA routines
* This file is #included by regexec.c.
+ *
+ * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
+ *
+ * Development of this software was funded, in part, by Cray Research Inc.,
+ * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
+ * Corporation, none of whom are responsible for the results. The author
+ * thanks all of them.
+ *
+ * Redistribution and use in source and binary forms -- with or without
+ * modification -- are permitted for any purpose, provided that
+ * redistributions in source form retain this entire copyright notice and
+ * indicate the origin and nature of any modifications.
+ *
+ * I'd appreciate being given credit for this package in the documentation
+ * of software which uses it, but that is not a requirement.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
*/
/*
- longest - longest-preferred matching engine
- ^ static chr *longest(struct vars *, struct dfa *, chr *, chr *);
+ ^ static chr *longest(struct vars *, struct dfa *, chr *, chr *, int *);
*/
static chr * /* endpoint, or NULL */
-longest(v, d, start, stop)
+longest(v, d, start, stop, hitstopp)
struct vars *v; /* used only for debug and exec flags */
struct dfa *d;
chr *start; /* where the match should start */
chr *stop; /* match must end at or before here */
+int *hitstopp; /* record whether hit v->stop, if non-NULL */
{
chr *cp;
chr *realstop = (stop == v->stop) ? stop : stop + 1;
@@ -26,6 +54,8 @@ chr *stop; /* match must end at or before here */
/* initialize */
css = initialize(v, d, start);
cp = start;
+ if (hitstopp != NULL)
+ *hitstopp = 0;
/* startup */
FDEBUG(("+++ startup +++\n"));
@@ -74,6 +104,8 @@ chr *stop; /* match must end at or before here */
/* shutdown */
FDEBUG(("+++ shutdown at c%d +++\n", css - d->ssets));
if (cp == v->stop && stop == v->stop) {
+ if (hitstopp != NULL)
+ *hitstopp = 1;
co = d->cnfa->eos[(v->eflags&REG_NOTEOL) ? 0 : 1];
FDEBUG(("color %ld\n", (long)co));
ss = miss(v, d, css, co, cp, start);
@@ -86,7 +118,7 @@ chr *stop; /* match must end at or before here */
/* find last match, if any */
post = d->lastpost;
- for (ss = d->ssets, i = 0; i < d->nssused; ss++, i++)
+ for (ss = d->ssets, i = d->nssused; i > 0; ss++, i--)
if ((ss->flags&POSTSTATE) && post != ss->lastseen &&
(post == NULL || post < ss->lastseen))
post = ss->lastseen;
@@ -99,16 +131,17 @@ chr *stop; /* match must end at or before here */
/*
- shortest - shortest-preferred matching engine
^ static chr *shortest(struct vars *, struct dfa *, chr *, chr *, chr *,
- ^ chr **);
+ ^ chr **, int *);
*/
static chr * /* endpoint, or NULL */
-shortest(v, d, start, min, max, coldp)
-struct vars *v; /* used only for debug and exec flags */
+shortest(v, d, start, min, max, coldp, hitstopp)
+struct vars *v;
struct dfa *d;
chr *start; /* where the match should start */
chr *min; /* match must end at or after here */
chr *max; /* match must end at or before here */
chr **coldp; /* store coldstart pointer here, if nonNULL */
+int *hitstopp; /* record whether hit v->stop, if non-NULL */
{
chr *cp;
chr *realmin = (min == v->stop) ? min : min + 1;
@@ -117,12 +150,12 @@ chr **coldp; /* store coldstart pointer here, if nonNULL */
struct sset *css;
struct sset *ss;
struct colormap *cm = d->cm;
- chr *nopr;
- int i;
/* initialize */
css = initialize(v, d, start);
cp = start;
+ if (hitstopp != NULL)
+ *hitstopp = 0;
/* startup */
FDEBUG(("--- startup ---\n"));
@@ -175,7 +208,11 @@ chr **coldp; /* store coldstart pointer here, if nonNULL */
if (ss == NULL)
return NULL;
- else if (ss->flags&POSTSTATE) {
+
+ if (coldp != NULL) /* report last no-progress state set, if any */
+ *coldp = lastcold(v, d);
+
+ if ((ss->flags&POSTSTATE) && cp > min) {
assert(cp >= realmin);
cp--;
} else if (cp == v->stop && max == v->stop) {
@@ -183,21 +220,36 @@ chr **coldp; /* store coldstart pointer here, if nonNULL */
FDEBUG(("color %ld\n", (long)co));
ss = miss(v, d, css, co, cp, start);
/* match might have ended at eol */
+ if ((ss == NULL || !(ss->flags&POSTSTATE)) && hitstopp != NULL)
+ *hitstopp = 1;
}
if (ss == NULL || !(ss->flags&POSTSTATE))
return NULL;
- /* find last no-progress state set, if any */
+ return cp;
+}
+
+/*
+ - lastcold - determine last point at which no progress had been made
+ ^ static chr *lastcold(struct vars *, struct dfa *);
+ */
+static chr * /* endpoint, or NULL */
+lastcold(v, d)
+struct vars *v;
+struct dfa *d;
+{
+ struct sset *ss;
+ chr *nopr;
+ int i;
+
nopr = d->lastnopr;
- for (ss = d->ssets, i = 0; i < d->nssused; ss++, i++)
- if ((ss->flags&NOPROGRESS) && nopr != ss->lastseen &&
- (nopr == NULL || nopr < ss->lastseen))
+ if (nopr == NULL)
+ nopr = v->start;
+ for (ss = d->ssets, i = d->nssused; i > 0; ss++, i--)
+ if ((ss->flags&NOPROGRESS) && nopr < ss->lastseen)
nopr = ss->lastseen;
- assert(nopr != NULL);
- if (coldp != NULL)
- *coldp = (nopr == v->start) ? nopr : nopr-1;
- return cp;
+ return nopr;
}
/*
@@ -433,11 +485,6 @@ chr *start; /* where the attempt got started */
/* next, is that in the cache? */
for (p = d->ssets, i = d->nssused; i > 0; p++, i--)
if (HIT(h, d->work, p, d->wordsper)) {
-#ifndef xxx
-p->hash == h &&
-memcmp(VS(d->work), VS(p->states),
- d->wordsper*sizeof(unsigned)) == 0) {
-#endif
FDEBUG(("cached c%d\n", p - d->ssets));
break; /* NOTE BREAK OUT */
}
@@ -488,7 +535,7 @@ pcolor co; /* "color" of the lookahead constraint */
ERR(REG_ESPACE);
return 0;
}
- end = longest(v, d, cp, v->stop);
+ end = longest(v, d, cp, v->stop, (int *)NULL);
freedfa(d);
FDEBUG(("=== lacon %d match %d\n", n, (end != NULL)));
return (sub->subno) ? (end != NULL) : (end == NULL);
diff --git a/generic/regerror.c b/generic/regerror.c
index 6779e51..aca13aa 100644
--- a/generic/regerror.c
+++ b/generic/regerror.c
@@ -1,5 +1,32 @@
/*
* regerror - error-code expansion
+ *
+ * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
+ *
+ * Development of this software was funded, in part, by Cray Research Inc.,
+ * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
+ * Corporation, none of whom are responsible for the results. The author
+ * thanks all of them.
+ *
+ * Redistribution and use in source and binary forms -- with or without
+ * modification -- are permitted for any purpose, provided that
+ * redistributions in source form retain this entire copyright notice and
+ * indicate the origin and nature of any modifications.
+ *
+ * I'd appreciate being given credit for this package in the documentation
+ * of software which uses it, but that is not a requirement.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
*/
#include "regguts.h"
diff --git a/generic/regerrs.h b/generic/regerrs.h
index 1b6552c..34c1e68 100644
--- a/generic/regerrs.h
+++ b/generic/regerrs.h
@@ -1,6 +1,6 @@
{ REG_OKAY, "REG_OKAY", "no errors detected" },
{ REG_NOMATCH, "REG_NOMATCH", "failed to match" },
-{ REG_BADPAT, "REG_BADPAT", "invalid regexp (reg version 0.2)" },
+{ REG_BADPAT, "REG_BADPAT", "invalid regexp (reg version 0.4)" },
{ REG_ECOLLATE, "REG_ECOLLATE", "invalid collating element" },
{ REG_ECTYPE, "REG_ECTYPE", "invalid character class" },
{ REG_EESCAPE, "REG_EESCAPE", "invalid escape \\ sequence" },
diff --git a/generic/regex.h b/generic/regex.h
index 2f3ebfa..26be33c 100644
--- a/generic/regex.h
+++ b/generic/regex.h
@@ -3,6 +3,34 @@
/*
* regular expressions
*
+ * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
+ *
+ * Development of this software was funded, in part, by Cray Research Inc.,
+ * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
+ * Corporation, none of whom are responsible for the results. The author
+ * thanks all of them.
+ *
+ * Redistribution and use in source and binary forms -- with or without
+ * modification -- are permitted for any purpose, provided that
+ * redistributions in source form retain this entire copyright notice and
+ * indicate the origin and nature of any modifications.
+ *
+ * I'd appreciate being given credit for this package in the documentation
+ * of software which uses it, but that is not a requirement.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *
+ *
* Prototypes etc. marked with "^" within comments get gathered up (and
* possibly edited) by the regfwd program and inserted near the bottom of
* this file.
@@ -162,9 +190,9 @@ typedef struct {
regoff_t rm_eo; /* end of substring */
} regmatch_t;
-/* supplementary control and reporting (placeholder for later work) */
+/* supplementary control and reporting */
typedef struct {
- int rm_dummy;
+ regmatch_t rm_extend; /* see REG_EXPECT */
} rm_detail_t;
@@ -194,6 +222,7 @@ typedef struct {
#define REG_NLANCH 000200 /* ^ matches after \n, $ before */
#define REG_NEWLINE 000300 /* newlines are line terminators */
#define REG_PEND 000400 /* ugh -- backward-compatibility hack */
+#define REG_EXPECT 001000 /* report details on partial/limited matches */
#define REG_DUMP 004000 /* none of your business :-) */
#define REG_FAKEEC 010000 /* none of your business :-) */
#define REG_PROGRESS 020000 /* none of your business :-) */
diff --git a/generic/regexec.c b/generic/regexec.c
index 088d12b..9424385 100644
--- a/generic/regexec.c
+++ b/generic/regexec.c
@@ -1,5 +1,32 @@
/*
* re_*exec and friends - match REs
+ *
+ * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
+ *
+ * Development of this software was funded, in part, by Cray Research Inc.,
+ * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
+ * Corporation, none of whom are responsible for the results. The author
+ * thanks all of them.
+ *
+ * Redistribution and use in source and binary forms -- with or without
+ * modification -- are permitted for any purpose, provided that
+ * redistributions in source form retain this entire copyright notice and
+ * indicate the origin and nature of any modifications.
+ *
+ * I'd appreciate being given credit for this package in the documentation
+ * of software which uses it, but that is not a requirement.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
*/
#include "regguts.h"
@@ -13,6 +40,7 @@ struct vars {
int eflags; /* copies of arguments */
size_t nmatch;
regmatch_t *pmatch;
+ rm_detail_t *details;
chr *start; /* start of string */
chr *stop; /* just past end of string */
int err; /* error code if any (0 none) */
@@ -108,8 +136,9 @@ static int crevdissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *)
static int cbrdissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *));
static int caltdissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *));
/* === rege_dfa.c === */
-static chr *longest _ANSI_ARGS_((struct vars *, struct dfa *, chr *, chr *));
-static chr *shortest _ANSI_ARGS_((struct vars *, struct dfa *, chr *, chr *, chr *, chr **));
+static chr *longest _ANSI_ARGS_((struct vars *, struct dfa *, chr *, chr *, int *));
+static chr *shortest _ANSI_ARGS_((struct vars *, struct dfa *, chr *, chr *, chr *, chr **, int *));
+static chr *lastcold _ANSI_ARGS_((struct vars *, struct dfa *));
static struct dfa *newdfa _ANSI_ARGS_((struct vars *, struct cnfa *, struct colormap *, struct smalldfa *));
static VOID freedfa _ANSI_ARGS_((struct dfa *));
static unsigned hash _ANSI_ARGS_((unsigned *, int));
@@ -133,7 +162,7 @@ exec(re, string, len, details, nmatch, pmatch, flags)
regex_t *re;
CONST chr *string;
size_t len;
-rm_detail_t *details; /* hook for future elaboration */
+rm_detail_t *details;
size_t nmatch;
regmatch_t pmatch[];
int flags;
@@ -157,6 +186,8 @@ int flags;
/* setup */
v->re = re;
v->g = (struct guts *)re->re_guts;
+ if ((v->g->cflags&REG_EXPECT) && details == NULL)
+ return REG_INVARG;
if (v->g->unmatchable)
return REG_NOMATCH;
complications = (v->g->info&REG_UBACKREF) ? 1 : 0;
@@ -178,6 +209,7 @@ int flags;
v->nmatch = v->g->nsub + 1;
} else
v->pmatch = pmatch;
+ v->details = details;
v->start = (chr *)string;
v->stop = (chr *)string + len;
v->err = 0;
@@ -234,55 +266,77 @@ struct colormap *cm;
struct dfa *s = newdfa(v, &v->g->search, cm, &sa);
chr *begin;
chr *end;
+ chr *cold;
chr *open; /* open and close of range of possible starts */
chr *close;
+ int hitend;
- if (d == NULL)
- return v->err;
- if (s == NULL) {
- freedfa(d);
+ if (ISERR()) { /* should be newdfa() failure */
+ if (d != NULL)
+ freedfa(d);
+ if (s != NULL)
+ freedfa(s);
return v->err;
}
- close = v->start;
- do {
- MDEBUG(("\nsearch at %ld\n", LOFF(close)));
- close = shortest(v, s, close, close, v->stop, &open);
- if (close == NULL)
- break; /* NOTE BREAK */
- if (v->nmatch == 0) {
- /* don't need exact location */
- freedfa(d);
- freedfa(s);
- return REG_OKAY;
- }
- MDEBUG(("between %ld and %ld\n", LOFF(open), LOFF(close)));
- for (begin = open; begin <= close; begin++) {
- MDEBUG(("\nfind trying at %ld\n", LOFF(begin)));
- end = longest(v, d, begin, v->stop);
- if (end != NULL) {
- assert(v->nmatch > 0);
- v->pmatch[0].rm_so = OFF(begin);
- v->pmatch[0].rm_eo = OFF(end);
- freedfa(d);
- freedfa(s);
- if (v->nmatch > 1) {
- zapsubs(v->pmatch, v->nmatch);
- return dissect(v, v->g->tree, begin,
- end);
- }
- if (ISERR())
- return v->err;
- return REG_OKAY;
- }
- }
- } while (close < v->stop);
+ /* first, a shot with the search RE */
+ MDEBUG(("\nsearch at %ld\n", LOFF(v->start)));
+ cold = NULL;
+ close = shortest(v, s, v->start, v->start, v->stop, &cold, (int *)NULL);
+ freedfa(s);
+ if (v->g->cflags&REG_EXPECT) {
+ assert(v->details != NULL);
+ if (cold != NULL)
+ v->details->rm_extend.rm_so = OFF(cold);
+ else
+ v->details->rm_extend.rm_so = OFF(v->stop);
+ v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */
+ }
+ if (close == NULL) { /* not found */
+ freedfa(d);
+ if (ISERR())
+ return v->err;
+ return REG_NOMATCH;
+ }
+ if (v->nmatch == 0) { /* found, don't need exact location */
+ freedfa(d);
+ return REG_OKAY;
+ }
+ /* find starting point */
+ assert(cold != NULL);
+ open = cold;
+ cold = NULL;
+ MDEBUG(("between %ld and %ld\n", LOFF(open), LOFF(close)));
+ for (begin = open; begin <= close; begin++) {
+ MDEBUG(("\nfind trying at %ld\n", LOFF(begin)));
+ end = longest(v, d, begin, v->stop, &hitend);
+ if (hitend && cold == NULL)
+ cold = begin;
+ if (end != NULL)
+ break; /* NOTE BREAK OUT */
+ }
+ assert(end != NULL); /* search RE succeeded so loop should */
freedfa(d);
- freedfa(s);
+
+ /* and pin down details */
+ assert(v->nmatch > 0);
+ v->pmatch[0].rm_so = OFF(begin);
+ v->pmatch[0].rm_eo = OFF(end);
+ if (v->g->cflags&REG_EXPECT) {
+ if (cold != NULL)
+ v->details->rm_extend.rm_so = OFF(cold);
+ else
+ v->details->rm_extend.rm_eo = OFF(v->stop);
+ v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */
+ }
+ if (v->nmatch > 1) {
+ zapsubs(v->pmatch, v->nmatch);
+ return dissect(v, v->g->tree, begin, end);
+ }
if (ISERR())
return v->err;
- return REG_NOMATCH;
+ return REG_OKAY;
}
/*
@@ -301,12 +355,15 @@ struct colormap *cm;
struct dfa *s = newdfa(v, &v->g->search, cm, &sa);
chr *begin;
chr *end;
+ chr *cold;
chr *open; /* open and close of range of possible starts */
chr *close;
chr *estart;
chr *estop;
int er;
int shorter = v->g->tree->flags&SHORTER;
+ int ret = REG_NOMATCH;
+ int hitend;
if (d == NULL)
return v->err;
@@ -315,12 +372,16 @@ struct colormap *cm;
return v->err;
}
+ cold = NULL;
close = v->start;
do {
MDEBUG(("\ncsearch at %ld\n", LOFF(close)));
- close = shortest(v, s, close, close, v->stop, &open);
+ close = shortest(v, s, close, close, v->stop, &cold, (int *)NULL);
if (close == NULL)
break; /* NOTE BREAK */
+ assert(cold != NULL);
+ open = cold;
+ cold = NULL;
MDEBUG(("cbetween %ld and %ld\n", LOFF(open), LOFF(close)));
for (begin = open; begin <= close; begin++) {
MDEBUG(("\ncfind trying at %ld\n", LOFF(begin)));
@@ -329,47 +390,39 @@ struct colormap *cm;
for (;;) {
if (shorter)
end = shortest(v, d, begin, estart,
- estop, (chr **)NULL);
+ estop, (chr **)NULL, &hitend);
else
- end = longest(v, d, begin, estop);
+ end = longest(v, d, begin, estop,
+ &hitend);
+ if (hitend && cold == NULL)
+ cold = begin;
if (end == NULL)
break; /* NOTE BREAK OUT */
MDEBUG(("tentative end %ld\n", LOFF(end)));
zapsubs(v->pmatch, v->nmatch);
zapmem(v, v->g->tree);
er = cdissect(v, v->g->tree, begin, end);
- switch (er) {
- case REG_OKAY:
+ if (er == REG_OKAY) {
if (v->nmatch > 0) {
v->pmatch[0].rm_so = OFF(begin);
v->pmatch[0].rm_eo = OFF(end);
}
- freedfa(d);
- freedfa(s);
- if (ISERR())
- return v->err;
- return REG_OKAY;
- break;
- case REG_NOMATCH:
- /* go around and try again */
- if ((shorter) ? end == estop :
- end == begin) {
- /* no point in trying again */
- freedfa(s);
- freedfa(d);
- return REG_NOMATCH;
- }
- if (shorter)
- estart = end + 1;
- else
- estop = end - 1;
- break;
- default:
- freedfa(d);
- freedfa(s);
- return er;
- break;
+ ret = REG_OKAY;
+ break; /* NOTE BREAK OUT */
+ }
+ if (er != REG_NOMATCH) {
+ ERR(er);
+ break; /* NOTE BREAK OUT */
}
+ if ((shorter) ? end == estop : end == begin) {
+ /* no point in trying again */
+ break; /* NOTE BREAK OUT */
+ }
+ /* go around and try again */
+ if (shorter)
+ estart = end + 1;
+ else
+ estop = end - 1;
}
}
} while (close < v->stop);
@@ -378,7 +431,15 @@ struct colormap *cm;
freedfa(s);
if (ISERR())
return v->err;
- return REG_NOMATCH;
+ if (v->g->cflags&REG_EXPECT) {
+ assert(v->details != NULL);
+ if (cold != NULL)
+ v->details->rm_extend.rm_so = OFF(cold);
+ else
+ v->details->rm_extend.rm_so = OFF(v->stop);
+ v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */
+ }
+ return ret;
}
/*
@@ -520,7 +581,7 @@ chr *end; /* end of same */
}
/* pick a tentative midpoint */
- mid = longest(v, d, begin, end);
+ mid = longest(v, d, begin, end, (int *)NULL);
if (mid == NULL) {
freedfa(d);
freedfa(d2);
@@ -529,7 +590,7 @@ chr *end; /* end of same */
MDEBUG(("tentative midpoint %ld\n", LOFF(mid)));
/* iterate until satisfaction or failure */
- while (longest(v, d2, mid, end) != end) {
+ while (longest(v, d2, mid, end, (int *)NULL) != end) {
/* that midpoint didn't work, find a new one */
if (mid == begin) {
/* all possibilities exhausted! */
@@ -538,7 +599,7 @@ chr *end; /* end of same */
freedfa(d2);
return REG_ASSERT;
}
- mid = longest(v, d, begin, mid-1);
+ mid = longest(v, d, begin, mid-1, (int *)NULL);
if (mid == NULL) {
/* failed to find a new one! */
MDEBUG(("failed midpoint!\n"));
@@ -583,7 +644,7 @@ chr *end; /* end of same */
d = newdfa(v, &t->left->cnfa, &v->g->cmap, &da);
if (ISERR())
return v->err;
- if (longest(v, d, begin, end) == end) {
+ if (longest(v, d, begin, end, (int *)NULL) == end) {
MDEBUG(("success\n"));
freedfa(d);
return dissect(v, t->left, begin, end);
@@ -681,7 +742,7 @@ chr *end; /* end of same */
/* pick a tentative midpoint */
if (v->mem[t->retry] == 0) {
- mid = longest(v, d, begin, end);
+ mid = longest(v, d, begin, end, (int *)NULL);
if (mid == NULL) {
freedfa(d);
freedfa(d2);
@@ -698,7 +759,8 @@ chr *end; /* end of same */
for (;;) {
/* try this midpoint on for size */
er = cdissect(v, t->left, begin, mid);
- if (er == REG_OKAY && longest(v, d2, mid, end) == end &&
+ if (er == REG_OKAY &&
+ longest(v, d2, mid, end, (int *)NULL) == end &&
(er = cdissect(v, t->right, mid, end)) ==
REG_OKAY)
break; /* NOTE BREAK OUT */
@@ -716,7 +778,7 @@ chr *end; /* end of same */
freedfa(d2);
return REG_NOMATCH;
}
- mid = longest(v, d, begin, mid-1);
+ mid = longest(v, d, begin, mid-1, (int *)NULL);
if (mid == NULL) {
/* failed to find a new one */
MDEBUG(("%d failed midpoint\n", t->retry));
@@ -775,7 +837,7 @@ chr *end; /* end of same */
/* pick a tentative midpoint */
if (v->mem[t->retry] == 0) {
- mid = shortest(v, d, begin, begin, end, (chr **)NULL);
+ mid = shortest(v, d, begin, begin, end, (chr **)NULL, (int *)NULL);
if (mid == NULL) {
freedfa(d);
freedfa(d2);
@@ -792,7 +854,8 @@ chr *end; /* end of same */
for (;;) {
/* try this midpoint on for size */
er = cdissect(v, t->left, begin, mid);
- if (er == REG_OKAY && longest(v, d2, mid, end) == end &&
+ if (er == REG_OKAY &&
+ longest(v, d2, mid, end, (int *)NULL) == end &&
(er = cdissect(v, t->right, mid, end)) ==
REG_OKAY)
break; /* NOTE BREAK OUT */
@@ -810,7 +873,7 @@ chr *end; /* end of same */
freedfa(d2);
return REG_NOMATCH;
}
- mid = shortest(v, d, begin, mid+1, end, (chr **)NULL);
+ mid = shortest(v, d, begin, mid+1, end, (chr **)NULL, (int *)NULL);
if (mid == NULL) {
/* failed to find a new one */
MDEBUG(("%d failed midpoint\n", t->retry));
@@ -929,7 +992,7 @@ chr *end; /* end of same */
d = newdfa(v, &t->left->cnfa, &v->g->cmap, &da);
if (ISERR())
return v->err;
- if (longest(v, d, begin, end) != end) {
+ if (longest(v, d, begin, end, (int *)NULL) != end) {
freedfa(d);
v->mem[t->retry] = TRIED;
return caltdissect(v, t->right, begin, end);
diff --git a/generic/regfree.c b/generic/regfree.c
index a5c3f0b..17a7389 100644
--- a/generic/regfree.c
+++ b/generic/regfree.c
@@ -1,6 +1,34 @@
/*
* regfree - free an RE
*
+ * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
+ *
+ * Development of this software was funded, in part, by Cray Research Inc.,
+ * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
+ * Corporation, none of whom are responsible for the results. The author
+ * thanks all of them.
+ *
+ * Redistribution and use in source and binary forms -- with or without
+ * modification -- are permitted for any purpose, provided that
+ * redistributions in source form retain this entire copyright notice and
+ * indicate the origin and nature of any modifications.
+ *
+ * I'd appreciate being given credit for this package in the documentation
+ * of software which uses it, but that is not a requirement.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *
+ *
* You might think that this could be incorporated into regcomp.c, and
* that would be a reasonable idea... except that this is a generic
* function (with a generic name), applicable to all compiled REs
diff --git a/generic/regfronts.c b/generic/regfronts.c
index a9bd556..82f48e2 100644
--- a/generic/regfronts.c
+++ b/generic/regfronts.c
@@ -3,6 +3,33 @@
*
* Mostly for implementation of backward-compatibility kludges. Note
* that these routines exist ONLY in char versions.
+ *
+ * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
+ *
+ * Development of this software was funded, in part, by Cray Research Inc.,
+ * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
+ * Corporation, none of whom are responsible for the results. The author
+ * thanks all of them.
+ *
+ * Redistribution and use in source and binary forms -- with or without
+ * modification -- are permitted for any purpose, provided that
+ * redistributions in source form retain this entire copyright notice and
+ * indicate the origin and nature of any modifications.
+ *
+ * I'd appreciate being given credit for this package in the documentation
+ * of software which uses it, but that is not a requirement.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
*/
#include "regguts.h"
diff --git a/generic/regguts.h b/generic/regguts.h
index badd8d4..29b1721 100644
--- a/generic/regguts.h
+++ b/generic/regguts.h
@@ -1,5 +1,31 @@
/*
* Internal interface definitions, etc., for the regex package
+ *
+ * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
+ *
+ * Development of this software was funded, in part, by Cray Research Inc.,
+ * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
+ * Corporation, none of whom are responsible for the results. The author
+ * thanks all of them.
+ *
+ * Redistribution and use in source and binary forms -- with or without
+ * modification -- are permitted for any purpose, provided that
+ * redistributions in source form retain this entire copyright notice and
+ * indicate the origin and nature of any modifications.
+ *
+ * I'd appreciate being given credit for this package in the documentation
+ * of software which uses it, but that is not a requirement.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
@@ -28,11 +54,11 @@
/* assertions */
#ifndef assert
-#ifndef REG_DEBUG
-#define NDEBUG
+# ifndef REG_DEBUG
+# define NDEBUG /* no assertions */
+# endif
#include <assert.h>
#endif
-#endif
/* voids */
#ifndef VOID
@@ -278,6 +304,7 @@ struct state {
struct state *next; /* chain for traversing all */
struct state *prev; /* back chain */
struct arcbatch oas; /* first arcbatch, avoid malloc in easy case */
+ int noas; /* number of arcs used in first arcbatch */
};
struct nfa {
diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c
index c41976a..fdc89b7 100644
--- a/generic/tclCmdMZ.c
+++ b/generic/tclCmdMZ.c
@@ -13,7 +13,7 @@
* See the file "license.terms" for information on usage and redistribution
* of this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclCmdMZ.c,v 1.10 1999/05/25 01:00:25 stanton Exp $
+ * RCS: @(#) $Id: tclCmdMZ.c,v 1.11 1999/06/02 01:53:31 stanton Exp $
*/
#include "tclInt.h"
@@ -107,9 +107,7 @@ Tcl_PwdObjCmd(dummy, interp, objc, objv)
* Tcl_RegexpObjCmd --
*
* This procedure is invoked to process the "regexp" Tcl command.
- * See the user documentation for details on what it does. The
- * REGEXP_TEST stuff is to minimize code differences between this
- * and the "testregexp" command.
+ * See the user documentation for details on what it does.
*
* Results:
* A standard Tcl result.
diff --git a/generic/tclRegexp.c b/generic/tclRegexp.c
index 22e1db0..2318780 100644
--- a/generic/tclRegexp.c
+++ b/generic/tclRegexp.c
@@ -10,7 +10,7 @@
* See the file "license.terms" for information on usage and redistribution
* of this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclRegexp.c,v 1.5 1999/05/22 01:20:13 stanton Exp $
+ * RCS: @(#) $Id: tclRegexp.c,v 1.6 1999/06/02 01:53:31 stanton Exp $
*/
#include "tclInt.h"
@@ -287,13 +287,14 @@ TclRegExpExecUniChar(interp, re, wString, numChars, nmatches, flags)
{
int status;
TclRegexp *regexpPtr = (TclRegexp *) re;
- size_t nm = regexpPtr->re.re_nsub + 1;
+ size_t last = regexpPtr->re.re_nsub + 1;
+ size_t nm = last;
if (nmatches >= 0 && (size_t) nmatches < nm)
nm = (size_t) nmatches;
status = TclReExec(&regexpPtr->re, wString, (size_t) numChars,
- (rm_detail_t *)NULL, nm, regexpPtr->matches, flags);
+ &regexpPtr->details, nm, regexpPtr->matches, flags);
/*
* Check for errors.
@@ -318,12 +319,13 @@ TclRegExpExecUniChar(interp, re, wString, numChars, nmatches, flags)
* TclRegExpRangeUniChar --
*
* Returns pointers describing the range of a regular expression match,
- * or one of the subranges within the match.
+ * or one of the subranges within the match, or the hypothetical range
+ * represented by the rm_extend field of the rm_detail_t.
*
* Results:
* The variables at *startPtr and *endPtr are modified to hold the
- * addresses of the endpoints of the range given by index. If the
- * specified range doesn't exist then NULLs are returned.
+ * offsets of the endpoints of the range given by index. If the
+ * specified range doesn't exist then -1s are supplied.
*
* Side effects:
* None.
@@ -337,7 +339,8 @@ TclRegExpRangeUniChar(re, index, startPtr, endPtr)
* been passed to Tcl_RegExpExec. */
int index; /* 0 means give the range of the entire
* match, > 0 means give the range of
- * a matching subrange. */
+ * a matching subrange, -1 means the
+ * range of the rm_extend field. */
int *startPtr; /* Store address of first character in
* (sub-) range here. */
int *endPtr; /* Store address of character just after last
@@ -345,7 +348,10 @@ TclRegExpRangeUniChar(re, index, startPtr, endPtr)
{
TclRegexp *regexpPtr = (TclRegexp *) re;
- if ((size_t) index > regexpPtr->re.re_nsub) {
+ if ((regexpPtr->flags&REG_EXPECT) && index == -1) {
+ *startPtr = regexpPtr->details.rm_extend.rm_so;
+ *endPtr = regexpPtr->details.rm_extend.rm_eo;
+ } else if ((size_t) index > regexpPtr->re.re_nsub) {
*startPtr = -1;
*endPtr = -1;
} else {
diff --git a/generic/tclRegexp.h b/generic/tclRegexp.h
index 2745900..7a6fb2a 100644
--- a/generic/tclRegexp.h
+++ b/generic/tclRegexp.h
@@ -4,36 +4,13 @@
* This file contains definitions used internally by Henry
* Spencer's regular expression code.
*
- * Copyright (c) 1998 Henry Spencer. All rights reserved.
- *
- * Development of this software was funded, in part, by Cray Research Inc.,
- * UUNET Communications Services Inc., Sun Microsystems Inc., and
- * Scriptics Corporation, none of whom are responsible for the results.
- * The author thanks all of them.
- *
- * Redistribution and use in source and binary forms -- with or without
- * modification -- are permitted for any purpose, provided that
- * redistributions in source form retain this entire copyright notice and
- * indicate the origin and nature of any modifications.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
- * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
- * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
- * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
- * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
- * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
- * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
- * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
* Copyright (c) 1998 by Sun Microsystems, Inc.
* Copyright (c) 1998-1999 by Scriptics Corporation.
*
* See the file "license.terms" for information on usage and redistribution
* of this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclRegexp.h,v 1.7 1999/05/14 00:20:02 stanton Exp $
+ * RCS: @(#) $Id: tclRegexp.h,v 1.8 1999/06/02 01:53:31 stanton Exp $
*/
#ifndef _TCLREGEXP
@@ -64,6 +41,8 @@ typedef struct TclRegexp {
* representation of the last string matched
* with this regexp to indicate the location
* of subexpressions. */
+ rm_detail_t details; /* Detailed information on match (currently
+ * used only for REG_EXPECT). */
int refCount; /* Count of number of references to this
* compiled regexp. */
} TclRegexp;
diff --git a/generic/tclTest.c b/generic/tclTest.c
index 6506285..efc2520 100644
--- a/generic/tclTest.c
+++ b/generic/tclTest.c
@@ -13,7 +13,7 @@
* See the file "license.terms" for information on usage and redistribution
* of this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclTest.c,v 1.12 1999/04/21 21:50:28 rjohnson Exp $
+ * RCS: @(#) $Id: tclTest.c,v 1.13 1999/06/02 01:53:31 stanton Exp $
*/
#define TCL_TEST
@@ -2508,8 +2508,8 @@ TestparsevarnameObjCmd(clientData, interp, objc, objv)
*
* This procedure implements the "testregexp" command. It is
* used to give a direct interface for regexp flags. It's identical
- * to Tcl_RegexpObjCmd except for the REGEXP_TEST define, which
- * enables the -xflags option.
+ * to Tcl_RegexpObjCmd except for the -xflags option, and the
+ * consequences thereof (including the REG_EXPECT kludge).
*
* Results:
* A standard Tcl result.
@@ -2528,33 +2528,24 @@ TestregexpObjCmd(dummy, interp, objc, objv)
int objc; /* Number of arguments. */
Tcl_Obj *CONST objv[]; /* Argument objects. */
{
- int i, result, indices, stringLength, wLen, match, about;
+ int i, ii, result, indices, stringLength, wLen, match, about;
int hasxflags, cflags, eflags;
Tcl_RegExp regExpr;
char *string;
Tcl_DString stringBuffer, valueBuffer;
Tcl_UniChar *wStart;
-# define REGEXP_TEST /* yes */
static char *options[] = {
"-indices", "-nocase", "-about", "-expanded",
"-line", "-linestop", "-lineanchor",
-#ifdef REGEXP_TEST
"-xflags",
-#endif
"--", (char *) NULL
};
enum options {
REGEXP_INDICES, REGEXP_NOCASE, REGEXP_ABOUT, REGEXP_EXPANDED,
REGEXP_MULTI, REGEXP_NOCROSS, REGEXP_NEWL,
-#ifdef REGEXP_TEST
REGEXP_XFLAGS,
-#endif
REGEXP_LAST
};
-#ifndef REGEXP_TEST
-# define REGEXP_XFLAGS -1 /* impossible value */
-# define TestregexpXflags(a,b,c,d) /* do nothing */
-#endif
indices = 0;
about = 0;
@@ -2662,6 +2653,21 @@ TestregexpObjCmd(dummy, interp, objc, objv)
*/
Tcl_SetIntObj(Tcl_GetObjResult(interp), 0);
+ if (objc > 2 && (cflags&REG_EXPECT) && indices) {
+ char *varName, *value;
+ int start, end;
+ char info[TCL_INTEGER_SPACE * 2];
+
+ varName = Tcl_GetString(objv[2]);
+ TclRegExpRangeUniChar(regExpr, -1, &start, &end);
+ sprintf(info, "%d %d", start, end-1);
+ value = Tcl_SetVar(interp, varName, info, 0);
+ if (value == NULL) {
+ Tcl_AppendResult(interp, "couldn't set variable \"",
+ varName, "\"", (char *) NULL);
+ result = TCL_ERROR;
+ }
+ }
goto done;
}
@@ -2679,7 +2685,8 @@ TestregexpObjCmd(dummy, interp, objc, objv)
varName = Tcl_GetString(objv[i]);
- TclRegExpRangeUniChar(regExpr, i, &start, &end);
+ ii = ((cflags&REG_EXPECT) && i == objc-1) ? -1 : i;
+ TclRegExpRangeUniChar(regExpr, ii, &start, &end);
if (start < 0) {
if (indices) {
value = Tcl_SetVar(interp, varName, "-1 -1", 0);
@@ -2689,7 +2696,7 @@ TestregexpObjCmd(dummy, interp, objc, objv)
} else {
if (indices) {
char info[TCL_INTEGER_SPACE * 2];
-
+
sprintf(info, "%d %d", start, end - 1);
value = Tcl_SetVar(interp, varName, info, 0);
} else {
@@ -2736,7 +2743,7 @@ TestregexpObjCmd(dummy, interp, objc, objv)
*----------------------------------------------------------------------
*/
-static void
+static VOID
TestregexpXflags(string, length, cflagsPtr, eflagsPtr)
char *string; /* The string of flags. */
int length; /* The length of the string in bytes. */
@@ -2801,6 +2808,10 @@ TestregexpXflags(string, length, cflagsPtr, eflagsPtr)
eflags |= REG_NOTEOL;
break;
}
+ case '?': {
+ cflags |= REG_EXPECT;
+ break;
+ }
case '%': {
eflags |= REG_SMALL;
break;
diff --git a/tests/reg.test b/tests/reg.test
index de20e33..40efbdb 100644
--- a/tests/reg.test
+++ b/tests/reg.test
@@ -4,10 +4,9 @@
# built-in commands. Sourcing this file into Tcl runs the tests and
# generates output for errors. No output means no errors were found.
#
-# Copyright (c) 1998-1999 by Scriptics Corporation.
-# All rights reserved.
+# Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
#
-# RCS: @(#) $Id: reg.test,v 1.2 1999/04/16 00:47:33 stanton Exp $
+# RCS: @(#) $Id: reg.test,v 1.3 1999/06/02 01:53:32 stanton Exp $
if {[lsearch [namespace children] ::tcltest] == -1} {
source [file join [pwd] [file dirname [info script]] defs.tcl]
@@ -55,6 +54,7 @@ set ::tcltest::testConfig(testregexp) \
# % force small state-set cache in matcher (to test cache replace)
# ^ beginning of string is not beginning of line
# $ end of string is not end of line
+# ? report information on partial and limited matches
#
# & test as both ARE and BRE
# b BRE
@@ -117,7 +117,7 @@ proc doing {major desc} {
if {$testbypassed != 0} {
puts stdout "!!! bypassed $testbypassed tests in\
- $major, `$description'"
+ $prefix, `$description'"
}
set prefix reg-$major
@@ -234,6 +234,8 @@ proc f {testid flags re target args} {
# match expected, internal routine that does the work
# parameters like the "real" routines except they don't have "opts",
# which is a possibly-empty list of switches for the regexp match attempt
+# The ! flag is used to indicate expected match failure (for REG_EXPECT,
+# which wants argument testing even in the event of failure).
proc matchexpected {opts testid flags re target args} {
global prefix description ask
@@ -270,9 +272,16 @@ proc matchexpected {opts testid flags re target args} {
if {[string first "o" $flags] >= 0} { ;# REG_NOSUB
set nsub 0 ;# unsigned value cannot be -1
}
+ if {[string first "?" $flags] >= 0} { ;# REG_EXPECT
+ incr nsub -1 ;# the extra does not count
+ }
set ecmd [concat $ecmd $names]
set erun "list \[$ecmd\] $refs"
- set result [concat [list 1] $args]
+ set retcode [list 1]
+ if {[string first "!" $flags] >= 0} {
+ set retcode [list 0]
+ }
+ set result [concat $retcode $args]
set info [list $nsub $infoflags]
lappend testid "compile"
@@ -293,6 +302,15 @@ proc i {args} {
eval matchexpected [linsert $args 0 [list "-indices"]]
}
+# partial match expected
+# p testno flags re target mat "" ...
+# Quirk: number of ""s must be one more than number of subREs.
+proc p {args} {
+ set f [lindex $args 1] ;# add ! flag
+ set args [lreplace $args 1 1 "!$f"]
+ eval matchexpected [linsert $args 0 [list "-indices"]]
+}
+
# test temporarily unimplemented
proc xx {args} {
global testbypassed
@@ -309,7 +327,7 @@ proc xx {args} {
# support functions and preliminary misc.
# This is sensitive to changes in message wording, but we really have to
# test the code->message expansion at least once.
-test regexp-0.1 "regexp error reporting" {
+test reg-0.1 "regexp error reporting" {
list [catch {regexp (*) ign} msg] $msg
} {1 {couldn't compile regular expression pattern: quantifier operand invalid}}
@@ -431,8 +449,8 @@ e 7 - "a{1" EBRACE
e 8 - "a{1n}" BADBR
m 9 BS "a{b" "a\{b" "a\{b"
m 10 BS "a{" "a\{" "a\{"
-m 11 bQ {a\{0,1\}b} cb b
-e 12 b {a\{0,1} EBRACE
+m 11 bQ "a\\{0,1\\}b" cb b
+e 12 b "a\\{0,1" EBRACE
e 13 - "a{0,1\\" BADBR
m 14 Q "a{0}b" ab b
m 15 Q "a{0,0}b" ab b
@@ -836,8 +854,11 @@ m 8 PQ "ab{2,4}?c" abbbbc abbbbc
doing 25 "mixed quantifiers"
-xx to be done, actually
-xx should include |
+# should include |
+m 1 PN {^(.*?)(a*)$} xyza xyza xyz a
+m 2 PN {^(.*?)(a*)$} xyzaa xyzaa xyz aa
+m 3 PN {^(.*?)(a*)$} xyz xyz xyz ""
+xx lots more to be done
@@ -886,7 +907,25 @@ i 12 %LP {\w+(abcdefghijklmnopqrst)?} xyzabcdefghijklmnopqrs \
-doing 29 "misc. oddities and old bugs"
+doing 29 "incomplete matches"
+p 1 ? def abc {3 2} ""
+p 2 ? bcd abc {1 2} ""
+p 3 ? abc abab {0 3} ""
+p 4 ? abc abdab {3 4} ""
+i 5 ? abc abc {0 2} {0 2}
+i 6 ? abc xyabc {2 4} {2 4}
+p 7 ? abc+ xyab {2 3} ""
+i 8 ? abc+ xyabc {2 4} {2 4}
+p 9 ?P abc+? xyab {2 3} ""
+# the retain numbers in these two may look wrong, but they aren't
+i 10 ?P abc+? xyabc {2 4} {5 4}
+i 11 ?P abc+? xyabcc {2 4} {6 5}
+i 12 ?P abc+? xyabcd {2 4} {6 5}
+i 13 ? abcd|bc xyabc {3 4} {2 4}
+
+
+
+doing 30 "misc. oddities and old bugs"
e 1 & *** BADRPT
m 2 N a?b* abb abb
m 3 N a?b* bb bb