From 7e7056e21d0a0d9fa39bdfd742e82b101a6c4b7c Mon Sep 17 00:00:00 2001 From: stanton Date: Wed, 21 Oct 1998 20:39:57 +0000 Subject: Integrated latest regexp changes from Henry Spencer. Moved regexp related declarations out of tclInt.h and into tclRegexp.h. Added "encoding" command. --- changes | 6 +- doc/encoding.n | 79 ++ generic/chr.h | 48 - generic/color.c | 605 --------- generic/compile.c | 2089 ------------------------------- generic/exec.c | 1754 -------------------------- generic/guts.h | 233 ---- generic/lex.c | 938 -------------- generic/locale.c | 675 ---------- generic/nfa.c | 1368 -------------------- generic/regc_color.c | 619 ++++++++++ generic/regc_cvec.c | 143 +++ generic/regc_lex.c | 1022 +++++++++++++++ generic/regc_locale.c | 426 +++++++ generic/regc_nfa.c | 1552 +++++++++++++++++++++++ generic/regcomp.c | 2137 ++++++++++++++++++++++++++++++++ generic/regcustom.h | 90 ++ generic/regerror.c | 82 ++ generic/regerrs.h | 19 + generic/regex.h | 299 +++++ generic/regexec.c | 1675 +++++++++++++++++++++++++ generic/regfree.c | 25 + generic/regfronts.c | 56 + generic/regguts.h | 385 ++++++ generic/tclBasic.c | 4 +- generic/tclCmdAH.c | 121 +- generic/tclCmdIL.c | 3 +- generic/tclCmdMZ.c | 51 +- generic/tclEncoding.c | 60 +- generic/tclFileName.c | 3 +- generic/tclInt.h | 46 +- generic/tclRegexp.c | 283 +++-- generic/tclRegexp.h | 219 +--- generic/tclTest.c | 334 +---- tests/cmdAH.test | 633 ++++++---- tests/encoding.test | 314 +++-- tests/regexp.test | 10 +- tests/regexp2.test | 3176 ----------------------------------------------- tests/regexp3.test | 3295 ------------------------------------------------- tests/regtests.test | 867 +++++++++++++ unix/Makefile.in | 28 +- win/makefile.bc | 8 +- win/makefile.vc | 8 +- 43 files changed, 10510 insertions(+), 15278 deletions(-) create mode 100644 doc/encoding.n delete mode 100644 generic/chr.h delete mode 100644 generic/color.c delete mode 100644 generic/compile.c delete mode 100644 generic/exec.c delete mode 100644 generic/guts.h delete mode 100644 generic/lex.c delete mode 100644 generic/locale.c delete mode 100644 generic/nfa.c create mode 100644 generic/regc_color.c create mode 100644 generic/regc_cvec.c create mode 100644 generic/regc_lex.c create mode 100644 generic/regc_locale.c create mode 100644 generic/regc_nfa.c create mode 100644 generic/regcomp.c create mode 100644 generic/regcustom.h create mode 100644 generic/regerror.c create mode 100644 generic/regerrs.h create mode 100644 generic/regex.h create mode 100644 generic/regexec.c create mode 100644 generic/regfree.c create mode 100644 generic/regfronts.c create mode 100644 generic/regguts.h delete mode 100644 tests/regexp2.test delete mode 100644 tests/regexp3.test create mode 100644 tests/regtests.test diff --git a/changes b/changes index 3afdf02..d9ff620 100644 --- a/changes +++ b/changes @@ -1,6 +1,6 @@ Recent user-visible changes to Tcl: -RCS: @(#) $Id: changes,v 1.1.2.6 1998/10/16 01:28:25 stanton Exp $ +RCS: @(#) $Id: changes,v 1.1.2.7 1998/10/21 20:39:57 stanton Exp $ 1. No more [command1] [command2] construct for grouping multiple commands on a single command line. @@ -3925,3 +3925,7 @@ and lowercase all of the other characters. (stanton) 10/15/98 (bug fix) Changed regexp and string commands to properly handle case folding according to the Unicode character tables. (stanton) + +10/21/98 (new feature) Added an "encoding" command to facilitate +translations of strings between different character encodings. See +the encoding.n manual entry for more details. (stanton) diff --git a/doc/encoding.n b/doc/encoding.n new file mode 100644 index 0000000..f63f4b2 --- /dev/null +++ b/doc/encoding.n @@ -0,0 +1,79 @@ +'\" +'\" Copyright (c) 1998 by Scriptics Corporation. +'\" +'\" See the file "license.terms" for information on usage and redistribution +'\" of this file, and for a DISCLAIMER OF ALL WARRANTIES. +'\" +'\" RCS: @(#) $Id: encoding.n,v 1.1.2.1 1998/10/21 20:39:58 stanton Exp $ +'\" +.so man.macros +.TH encoding n "8.1" Tcl "Tcl Built-In Commands" +.BS +.SH NAME +encoding \- Manipulate encodings +.SH SYNOPSIS +\fBencoding \fIoption\fR ?\fIarg arg ...\fR? +.BE + +.SH INTRODUCTION +.PP +Strings in Tcl are encoded using 16-bit Unicode characters. Different +operating system interfaces or applications may generate strings in +other encodings such as Shift-JIS. The \fBencoding\fR command helps +to bridge the gap between Unicode and these other formats. + +.SH DESCRIPTION +.PP +Performs one of several encoding related operations, depending on +\fIoption\fR. The legal \fIoption\fRs are: +.TP +\fBencoding convertfrom ?\fIencoding\fR? \fIdata\fR +Convert \fIdata\fR to Unicode from the specified \fIencoding\fR. The +characters in \fIdata\fR are treated as binary data where the lower +8-bits of each character is taken as a single byte. The resulting +sequence of bytes is treated as a string in the specified +\fIencoding\fR. If \fIencoding\fR is not specified, the current +system encoding is used. +.TP +\fBencoding convertto ?\fIencoding\fR? \fIstring\fR +Convert \fIstring\fR from Unicode to the specified \fIencoding\fR. +The result is a sequence of bytes that represents the converted +string. Each byte is stored in the lower 8-bits of a Unicode +character. If \fIencoding\fR is not specified, the current +system encoding is used. +.TP +\fBencoding names\fR +Returns a list containing the names of all of the encodings that are +currently available. +.TP +\fBencoding system\fR ?\fIencoding\fR? +Set the system encoding to \fIencoding\fR. If \fIencoding\fR is +omitted then the command returns the current system encoding. The +system encoding is used whenever Tcl passes strings to system calls. + +.SH EXAMPLE +.PP +It is common practice to write script files using a text editor that +produces output in the euc-jp encoding, which represents the ASCII +characters as singe bytes and Japanese characters as two bytes. This +makes it easy to embed literal strings that correspond to non-ASCII +characters by simply typing the strings in place in the script. +However, because the \fBsource\fR command always reads files using the +ISO8859-1 encoding, Tcl will treat each byte in the file as a separate +character that maps to the 00 page in Unicode. The +resulting Tcl strings will not contain the expected Japanese +characters. Instead, they will contain a sequence of Latin-1 +characters that correspond to the bytes of the original string. The +\fBencoding\fR command can be used to convert this string to the +expected Japanese Unicode characters. For example, +.CS + set s [encoding convertfrom euc-jp "\\xA4\\xCF"] +.CE +would return the Unicode string "\\u306F", which is the Hiragana +letter HA. + +.SH "SEE ALSO" +Tcl_GetEncoding + +.SH KEYWORDS +encoding diff --git a/generic/chr.h b/generic/chr.h deleted file mode 100644 index 6a21159..0000000 --- a/generic/chr.h +++ /dev/null @@ -1,48 +0,0 @@ -/* - * chr.h -- - * - * Regexp package file: Unichar version of stuff related to the - * nature of a character. - * - * Copyright (c) 1998 Henry Spencer. All rights reserved. - * - * Development of this software was funded, in part, by Cray Research Inc., - * UUNET Communications Services Inc., and Sun Microsystems Inc., none of - * whom are responsible for the results. The author thanks all of them. - * - * Redistribution and use in source and binary forms -- with or without - * modification -- are permitted for any purpose, provided that - * redistributions in source form retain this entire copyright notice and - * indicate the origin and nature of any modifications. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, - * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY - * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL - * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; - * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR - * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF - * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Copyright (c) 1998 by Sun Microsystems, Inc. - * - * See the file "license.terms" for information on usage and redistribution - * of this file, and for a DISCLAIMER OF ALL WARRANTIES. - * - * RCS: @(#) $Id: chr.h,v 1.1.2.2 1998/10/03 01:56:40 stanton Exp $ - */ - -typedef Tcl_UniChar chr; /* internal character type */ -typedef int pchr; /* what it promotes to */ -typedef unsigned uchr; /* unsigned type big enough to hold a chr */ -#define CHRBITS (sizeof(Tcl_UniChar) * CHAR_BIT) /* bits in a chr */ -#define CHR(c) (UCHAR(c)) /* turn a char literal into a chr literal */ -#define DIGITVAL(c) ((c)-'0') /* turn a chr digit into its value */ - -/* - * char names for the externally-visible functions - */ -#define compile re_ucomp -#define exec re_uexec diff --git a/generic/color.c b/generic/color.c deleted file mode 100644 index fa640f9..0000000 --- a/generic/color.c +++ /dev/null @@ -1,605 +0,0 @@ -/* - * color.c -- - * - * Regexp package file: colorings of characters. - * Note that there are some incestuous relationships between this code and - * NFA arc maintenance, which perhaps ought to be cleaned up sometime. - * - * Copyright (c) 1998 Henry Spencer. All rights reserved. - * - * Development of this software was funded, in part, by Cray Research Inc., - * UUNET Communications Services Inc., and Sun Microsystems Inc., none of - * whom are responsible for the results. The author thanks all of them. - * - * Redistribution and use in source and binary forms -- with or without - * modification -- are permitted for any purpose, provided that - * redistributions in source form retain this entire copyright notice and - * indicate the origin and nature of any modifications. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, - * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY - * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL - * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; - * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR - * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF - * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Copyright (c) 1998 by Sun Microsystems, Inc. - * - * See the file "license.terms" for information on usage and redistribution - * of this file, and for a DISCLAIMER OF ALL WARRANTIES. - * - * RCS: @(#) $Id: color.c,v 1.1.2.2 1998/10/03 01:56:40 stanton Exp $ - */ - -/* - * The innards. - */ -struct colors { - color ccolor[BYTTAB]; -}; -struct ptrs { - union tree *pptr[BYTTAB]; -}; -union tree { - struct colors colors; - struct ptrs ptrs; -}; -#define tcolor colors.ccolor -#define tptr ptrs.pptr -/* - * Some of the function prototypes need this. - ^ union tree; - */ - -struct colordesc { - uchr nchrs; /* number of chars of this color */ - color sub; /* open subcolor of this one, or NOSUB */ -# define NOSUB COLORLESS - struct arc *arcs; /* color chain */ -# define UNUSEDCOLOR(cd) ((cd)->nchrs == 0 && (cd)->sub == NOSUB) - int flags; -# define PSEUDO 1 /* pseudocolor, no real chars */ -}; - -struct colormap { - int magic; -# define CMMAGIC 0x876 - struct vars *v; /* for error reporting */ - color rest; - int filled; /* has it been filled? */ - int ncds; /* number of colordescs */ - struct colordesc *cd; -# define CDEND(cm) (&(cm)->cd[(cm)->ncds]) -# define NINLINECDS 10 - struct colordesc cds[NINLINECDS]; - union tree tree[NBYTS]; /* tree top, plus fill blocks */ -}; - -#ifdef COMPILE - -/* - - newcm - get new colormap - ^ static struct colormap *newcm(struct vars *); - */ -static struct colormap * /* NULL for allocation failure */ -newcm(v) -struct vars *v; -{ - struct colormap *cm; - int i; - int j; - union tree *t; - union tree *nextt; - struct colordesc *cd; - - cm = (struct colormap *)ckalloc(sizeof(struct colormap)); - if (cm == NULL) { - ERR(REG_ESPACE); - return NULL; - } - cm->magic = CMMAGIC; - cm->v = v; - cm->rest = WHITE; - cm->filled = 0; - - cm->ncds = NINLINECDS; - cm->cd = cm->cds; - for (cd = cm->cd; cd < CDEND(cm); cd++) { - cd->nchrs = 0; - cd->sub = NOSUB; - cd->arcs = NULL; - cd->flags = 0; - } - cm->cd[WHITE].nchrs = WCHAR_MAX - WCHAR_MIN; - - /* treetop starts as NULLs if there are lower levels */ - t = cm->tree; - if (NBYTS > 1) { - for (i = BYTTAB-1; i >= 0; i--) - t->tptr[i] = NULL; - } - - /* if no lower levels, treetop and last fill block are the same */ - - /* fill blocks point to next fill block... */ - for (t = &cm->tree[1], j = NBYTS-2; j > 0; t = nextt, j--) { - nextt = t + 1; - for (i = BYTTAB-1; i >= 0; i--) - t->tptr[i] = t + 1; - } - /* ...except last which is solid white */ - t = &cm->tree[NBYTS-1]; - for (i = BYTTAB-1; i >= 0; i--) - t->tcolor[i] = WHITE; - - - return cm; -} - -/* - - freecm - free a colormap - ^ static VOID freecm(struct colormap *); - */ -static VOID -freecm(cm) -struct colormap *cm; -{ - cm->magic = 0; - if (NBYTS > 1) { - cmtreefree(cm, cm->tree, 0); - } - if (cm->cd != cm->cds) { - ckfree((char *)cm->cd); - } - ckfree((char *) cm); /* mem leak (CCS). */ -} - -/* - - cmtreefree - free a non-terminal part of a colormap tree - ^ static VOID cmtreefree(struct colormap *, union tree *, int); - */ -static VOID -cmtreefree(cm, tree, level) -struct colormap *cm; -union tree *tree; -int level; /* level number (top == 0) of this block */ -{ - int i; - union tree *t; - union tree *fillt = &cm->tree[level+1]; - - assert(level < NBYTS-1); /* this level has pointers */ - for (i = BYTTAB-1; i >= 0; i--) { - t = tree->tptr[i]; - if (t != NULL && t != fillt) { - if ((int) level < (int) NBYTS-2) { /* more pointer blocks below */ - cmtreefree(cm, t, level+1); - } - ckfree((char *) t); - } - } -} - -/* - - fillcm - fill in a colormap, so no NULLs remain - * The point of this is that the tree traversal can then be a fixed set - * of table lookups with no conditional branching. It might be better - * to do reallocation for a more compacted structure, on the order of - * what's done for NFAs, but the colormap can be quite large and a total - * rebuild of it could be costly. - ^ static VOID fillcm(struct colormap *); - */ -static VOID -fillcm(cm) -struct colormap *cm; -{ - if (!cm->filled && NBYTS > 1) - cmtreefill(cm, cm->tree, 0); - cm->filled = 1; -} - -/* - - cmtreefill - fill a non-terminal part of a colormap tree - ^ static VOID cmtreefill(struct colormap *, union tree *, int); - */ -static VOID -cmtreefill(cm, tree, level) -struct colormap *cm; -union tree *tree; -int level; /* level number (top == 0) of this block */ -{ - int i; - union tree *t; - union tree *fillt = &cm->tree[level+1]; - - assert(level < NBYTS-1); /* this level has pointers */ - for (i = BYTTAB-1; i >= 0; i--) { - t = tree->tptr[i]; - if (t == fillt) /* oops */ - {} - else if (t == NULL) { - tree->tptr[i] = fillt; - } - else if ((int) level < (int) NBYTS-2) {/* more pointer blocks below */ - cmtreefill(cm, t, level+1); - } - } -} - -#endif /* ifdef COMPILE */ - -/* - - getcolor - get the color of a character from a colormap - ^ static color getcolor(struct colormap *, pchr); - */ -static color -getcolor(cm, c) -struct colormap *cm; -pchr c; -{ - uchr uc = c; - int shift; - int b; - union tree *t; - - assert(cm->magic == CMMAGIC); - - t = cm->tree; - for (shift = BYTBITS * (NBYTS - 1); t != NULL; shift -= BYTBITS) { - b = (uc >> shift) & BYTMASK; - if (shift == 0) /* reached the bottom */ - return t->tcolor[b]; - t = t->tptr[b]; - } - - /* we fell off an incomplete part of the tree */ - assert(!cm->filled); - return cm->rest; -} - -#ifdef COMPILE - -/* - - setcolor - set the color of a character in a colormap - ^ static color setcolor(struct colormap *, pchr, pcolor); - */ -static color /* previous color */ -setcolor(cm, c, co) -struct colormap *cm; -pchr c; -pcolor co; -{ - uchr uc = c; - int shift; - int i; - int b; - int bottom; - union tree *t; - union tree *lastt; - color prev; - - assert(cm->magic == CMMAGIC); - if (VISERR(cm->v) || co == COLORLESS) - return COLORLESS; - - t = cm->tree; - for (shift = BYTBITS * (NBYTS - 1); shift > 0; shift -= BYTBITS) { - b = (uc >> shift) & BYTMASK; - lastt = t; - t = t->tptr[b]; - if (t == NULL) { /* fell off an incomplete part */ - bottom = (shift <= BYTBITS) ? 1 : 0; - t = (union tree *)ckalloc((bottom) ? - sizeof(struct colors) : sizeof(struct ptrs)); - if (t == NULL) { - VERR(cm->v, REG_ESPACE); - return COLORLESS; - } - if (bottom) - for (i = BYTTAB-1; i >= 0; i--) - t->tcolor[i] = cm->rest; - else - for (i = BYTTAB-1; i >= 0; i--) - t->tptr[i] = NULL; - lastt->tptr[b] = t; - } - } - assert(shift == 0 && t != NULL); /* we hit bottom; it's there */ - - b = uc & BYTMASK; - prev = t->tcolor[b]; - t->tcolor[b] = (color) co; - return prev; -} - -/* - - maxcolor - report largest color number in use - ^ static color maxcolor(struct colormap *); - */ -static color -maxcolor(cm) -struct colormap *cm; -{ - struct colordesc *cd; - struct colordesc *end; - struct colordesc *lastused; - - if (VISERR(cm->v)) - return COLORLESS; - - lastused = NULL; - end = CDEND(cm); - for (cd = cm->cd; cd < end; cd++) - if (!UNUSEDCOLOR(cd)) - lastused = cd; - assert(lastused != NULL); - return (color) (lastused - cm->cd); -} - -/* - - newcolor - find a new color (must be subject of setcolor at once) - * Beware: may relocate the colordescs. - ^ static color newcolor(struct colormap *); - */ -static color /* COLORLESS for error */ -newcolor(cm) -struct colormap *cm; -{ - struct colordesc *cd; - struct colordesc *end; - struct colordesc *firstnew; - int n; - - if (VISERR(cm->v)) - return COLORLESS; - - end = CDEND(cm); - for (cd = cm->cd; cd < end; cd++) - if (UNUSEDCOLOR(cd)) { - assert(cd->arcs == NULL); - return (color) (cd - cm->cd); - } - - /* oops, must allocate more */ - n = cm->ncds * 2; - if (cm->cd == cm->cds) { - cd = (struct colordesc *)ckalloc(sizeof(struct colordesc) * n); - if (cd != NULL) - memcpy((VOID *)cd, (VOID *)cm->cds, cm->ncds * - sizeof(struct colordesc)); - } else { - cd = (struct colordesc *)ckrealloc((VOID *)cm->cd, - sizeof(struct colordesc) * n); - } - if (cd == NULL) { - VERR(cm->v, REG_ESPACE); - return COLORLESS; - } - cm->cd = cd; - firstnew = CDEND(cm); - cm->ncds = n; - end = CDEND(cm); - for (cd = firstnew; cd < end; cd++) { - cd->nchrs = 0; - cd->sub = NOSUB; - cd->arcs = NULL; - cd->flags = 0; - } - assert(firstnew < CDEND(cm) && UNUSEDCOLOR(firstnew)); - return (color) (firstnew - cm->cd); -} - -/* - - pseudocolor - allocate a false color, to be managed by other means - ^ static color pseudocolor(struct colormap *); - */ -static color -pseudocolor(cm) -struct colormap *cm; -{ - color co; - - co = newcolor(cm); - if (VISERR(cm->v)) - return COLORLESS; - cm->cd[co].nchrs = 1; - cm->cd[co].flags = PSEUDO; - return co; -} - -/* - - subcolor - allocate a new subcolor (if necessary) to this chr - ^ static color subcolor(struct colormap *, pchr c); - */ -static color -subcolor(cm, c) -struct colormap *cm; -pchr c; -{ - color co; /* current color of c */ - color sco; /* new subcolor */ - - co = getcolor(cm, c); - sco = cm->cd[co].sub; - if (sco == NOSUB) { /* must create subcolor */ - if (cm->cd[co].nchrs == 1) /* shortcut */ - return co; - sco = newcolor(cm); - if (sco == COLORLESS) - return COLORLESS; - cm->cd[co].sub = sco; - cm->cd[sco].sub = sco; /* self-referential subcolor ptr */ - } - - if (co == sco) /* repeated character */ - return co; /* no further action needed */ - cm->cd[co].nchrs--; - cm->cd[sco].nchrs++; - setcolor(cm, c, sco); - return sco; -} - -/* - - okcolors - promote subcolors to full colors - ^ static VOID okcolors(struct nfa *, struct colormap *); - */ -static VOID -okcolors(nfa, cm) -struct nfa *nfa; -struct colormap *cm; -{ - struct colordesc *cd; - struct colordesc *end = CDEND(cm); - struct colordesc *scd; - struct arc *a; - color co; - color sco; - - for (cd = cm->cd, co = 0; cd < end; cd++, co++) { - sco = cd->sub; - if (sco == NOSUB) { - /* has no subcolor, no further action */ - } else if (sco == co) { - /* is subcolor, let parent deal with it */ - } else if (cd->nchrs == 0) { - /* parent empty, its arcs change color to subcolor */ - cd->sub = NOSUB; - scd = &cm->cd[sco]; - assert(scd->nchrs > 0); - assert(scd->sub == sco); - scd->sub = NOSUB; - while ((a = cd->arcs) != NULL) { - assert(a->co == co); - /* uncolorchain(cm, a); */ - cd->arcs = a->colorchain; - a->co = sco; - /* colorchain(cm, a); */ - a->colorchain = scd->arcs; - scd->arcs = a; - } - } else { - /* parent's arcs must gain parallel subcolor arcs */ - cd->sub = NOSUB; - scd = &cm->cd[sco]; - assert(scd->nchrs > 0); - assert(scd->sub == sco); - scd->sub = NOSUB; - for (a = cd->arcs; a != NULL; a = a->colorchain) { - assert(a->co == co); - newarc(nfa, a->type, sco, a->from, a->to); - } - } - } -} - -/* - - colorchain - add this arc to the color chain of its color - ^ static VOID colorchain(struct colormap *, struct arc *); - */ -static VOID -colorchain(cm, a) -struct colormap *cm; -struct arc *a; -{ - struct colordesc *cd = &cm->cd[a->co]; - - a->colorchain = cd->arcs; - cd->arcs = a; -} - -/* - - uncolorchain - delete this arc from the color chain of its color - ^ static VOID uncolorchain(struct colormap *, struct arc *); - */ -static VOID -uncolorchain(cm, a) -struct colormap *cm; -struct arc *a; -{ - struct colordesc *cd = &cm->cd[a->co]; - struct arc *aa; - - aa = cd->arcs; - if (aa == a) /* easy case */ - cd->arcs = a->colorchain; - else { - for (; aa != NULL && aa->colorchain != a; aa = aa->colorchain) - continue; - assert(aa != NULL); - aa->colorchain = a->colorchain; - } - a->colorchain = NULL; /* paranoia */ -} - -/* - - singleton - is this character in its own color? - ^ static int singleton(struct colormap *, pchr c); - */ -static int /* predicate */ -singleton(cm, c) -struct colormap *cm; -pchr c; -{ - color co; /* color of c */ - - co = getcolor(cm, c); - if (cm->cd[co].nchrs == 1 && cm->cd[co].sub == NOSUB) - return 1; - return 0; -} - -/* - - rainbow - add arcs of all full colors (but one) between specified states - ^ static VOID rainbow(struct nfa *, struct colormap *, int, pcolor, - ^ struct state *, struct state *); - */ -static VOID -rainbow(nfa, cm, type, exc, from, to) -struct nfa *nfa; -struct colormap *cm; -int type; -pcolor exc; /* COLORLESS if no exceptions */ -struct state *from; -struct state *to; -{ - struct colordesc *cd; - struct colordesc *end = CDEND(cm); - color co; - - for (cd = cm->cd, co = 0; cd < end && !VISERR(nfa->v); cd++, co++) - if (!UNUSEDCOLOR(cd) && cd->sub != co && co != exc && - !(cd->flags&PSEUDO)) - newarc(nfa, type, co, from, to); -} - -/* - - colorcomplement - add arcs of complementary colors - * The calling sequence ought to be reconciled with cloneouts(). - ^ static VOID colorcomplement(struct nfa *, struct colormap *, int, - ^ struct state *, struct state *, struct state *); - */ -static VOID -colorcomplement(nfa, cm, type, of, from, to) -struct nfa *nfa; -struct colormap *cm; -int type; -struct state *of; /* complements of this guy's PLAIN outarcs */ -struct state *from; -struct state *to; -{ - struct colordesc *cd; - struct colordesc *end = CDEND(cm); - color co; - - assert(of != from); - for (cd = cm->cd, co = 0; cd < end && !VISERR(nfa->v); cd++, co++) - if (!UNUSEDCOLOR(cd) && !(cd->flags&PSEUDO)) - if (findarc(of, PLAIN, co) == NULL) - newarc(nfa, type, co, from, to); -} - -#endif /* ifdef COMPILE */ diff --git a/generic/compile.c b/generic/compile.c deleted file mode 100644 index ee12d04..0000000 --- a/generic/compile.c +++ /dev/null @@ -1,2089 +0,0 @@ -/* - * compile.c -- - * - * Regexp package file: re_*comp and friends - compile REs - * - * Copyright (c) 1998 Henry Spencer. All rights reserved. - * - * Development of this software was funded, in part, by Cray Research Inc., - * UUNET Communications Services Inc., and Sun Microsystems Inc., none of - * whom are responsible for the results. The author thanks all of them. - * - * Redistribution and use in source and binary forms -- with or without - * modification -- are permitted for any purpose, provided that - * redistributions in source form retain this entire copyright notice and - * indicate the origin and nature of any modifications. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, - * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY - * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL - * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; - * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR - * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF - * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Copyright (c) 1998 by Sun Microsystems, Inc. - * - * See the file "license.terms" for information on usage and redistribution - * of this file, and for a DISCLAIMER OF ALL WARRANTIES. - * - * RCS: @(#) $Id: compile.c,v 1.1.2.2 1998/10/03 01:56:40 stanton Exp $ - */ - -#include "tclInt.h" -#include -#include "tclPort.h" -#include "tclRegexp.h" -#include "chr.h" -#include "guts.h" - -/* - * forward declarations, up here so forward datatypes etc. are defined early - */ -/* =====^!^===== begin forwards =====^!^===== */ -/* automatically gathered by fwd; do not hand-edit */ -/* === compile.c === */ -int compile _ANSI_ARGS_((regex_t *, CONST chr *, size_t, int)); -static VOID moresubs _ANSI_ARGS_((struct vars *, int)); -static int freev _ANSI_ARGS_((struct vars *, int)); -static struct rtree *parse _ANSI_ARGS_((struct vars *, int, int, struct state *, struct state *, int)); -static int scannum _ANSI_ARGS_((struct vars *)); -static VOID repeat _ANSI_ARGS_((struct vars *, struct state *, struct state *, int, int)); -static VOID bracket _ANSI_ARGS_((struct vars *, struct state *, struct state *)); -static VOID cbracket _ANSI_ARGS_((struct vars *, struct state *, struct state *)); -static VOID brackpart _ANSI_ARGS_((struct vars *, struct state *, struct state *)); -static chr *scanplain _ANSI_ARGS_((struct vars *)); -static VOID leaders _ANSI_ARGS_((struct vars *, struct cvec *)); -static VOID onechr _ANSI_ARGS_((struct vars *, pchr, struct state *, struct state *)); -static VOID dovec _ANSI_ARGS_((struct vars *, struct cvec *, struct state *, struct state *)); -static color nlcolor _ANSI_ARGS_((struct vars *)); -static VOID wordchrs _ANSI_ARGS_((struct vars *)); -static struct subre subre _ANSI_ARGS_((struct state *, struct state *, int, int, struct rtree *)); -static struct rtree *newrt _ANSI_ARGS_((struct vars *)); -static VOID freert _ANSI_ARGS_((struct rtree *)); -static VOID freertnode _ANSI_ARGS_((struct rtree *)); -static VOID optrt _ANSI_ARGS_((struct vars *, struct rtree *)); -static int numrt _ANSI_ARGS_((struct rtree *, int)); -static VOID nfatree _ANSI_ARGS_((struct vars *, struct rtree *)); -static VOID nfanode _ANSI_ARGS_((struct vars *, struct subre *)); -static int newlacon _ANSI_ARGS_((struct vars *, struct state *, struct state *, int)); -static VOID freelacons _ANSI_ARGS_((struct subre *, int)); -static VOID rfree _ANSI_ARGS_((regex_t *)); -static VOID dump _ANSI_ARGS_((regex_t *, FILE *)); -static VOID dumprt _ANSI_ARGS_((struct rtree *, FILE *, int)); -static VOID rtdump _ANSI_ARGS_((struct rtree *, FILE *, int, int)); -/* === lex.c === */ -static VOID lexstart _ANSI_ARGS_((struct vars *)); -static VOID prefixes _ANSI_ARGS_((struct vars *)); -static VOID lexnest _ANSI_ARGS_((struct vars *, chr *)); -static VOID lexword _ANSI_ARGS_((struct vars *)); -static int next _ANSI_ARGS_((struct vars *)); -static int lexescape _ANSI_ARGS_((struct vars *)); -static chr lexdigits _ANSI_ARGS_((struct vars *, int, int, int)); -static int brenext _ANSI_ARGS_((struct vars *, pchr)); -static VOID skip _ANSI_ARGS_((struct vars *)); -static chr newline _ANSI_ARGS_((VOID)); -static chr *ch _ANSI_ARGS_((VOID)); -static chr chrnamed _ANSI_ARGS_((struct vars *, chr *, pchr)); -/* === locale.c === */ -#define MAXCE 2 /* longest CE code is prepared to handle */ -typedef wint_t celt; /* type holding distinct codes for all chrs, all CEs */ -static int nces _ANSI_ARGS_((struct vars *)); -static int nleaders _ANSI_ARGS_((struct vars *)); -static struct cvec *allces _ANSI_ARGS_((struct vars *, struct cvec *)); -static celt element _ANSI_ARGS_((struct vars *, chr *, chr *)); -static struct cvec *range _ANSI_ARGS_((struct vars *, celt, celt, int)); -static int before _ANSI_ARGS_((celt, celt)); -static struct cvec *eclass _ANSI_ARGS_((struct vars *, celt, int)); -static struct cvec *cclass _ANSI_ARGS_((struct vars *, chr *, chr *, int)); -static struct cvec *allcases _ANSI_ARGS_((struct vars *, pchr)); -static int sncmp _ANSI_ARGS_((CONST chr *, CONST chr *, size_t)); -static struct cvec *newcvec _ANSI_ARGS_((int, int)); -static struct cvec *clearcvec _ANSI_ARGS_((struct cvec *)); -static VOID addchr _ANSI_ARGS_((struct cvec *, pchr)); -static VOID addce _ANSI_ARGS_((struct cvec *, chr *)); -static int haschr _ANSI_ARGS_((struct cvec *, pchr)); -static struct cvec *getcvec _ANSI_ARGS_((struct vars *, int, int)); -static VOID freecvec _ANSI_ARGS_((struct cvec *)); -/* === color.c === */ -union tree; -static struct colormap *newcm _ANSI_ARGS_((struct vars *)); -static VOID freecm _ANSI_ARGS_((struct colormap *)); -static VOID cmtreefree _ANSI_ARGS_((struct colormap *, union tree *, int)); -static VOID fillcm _ANSI_ARGS_((struct colormap *)); -static VOID cmtreefill _ANSI_ARGS_((struct colormap *, union tree *, int)); -static color getcolor _ANSI_ARGS_((struct colormap *, pchr)); -static color setcolor _ANSI_ARGS_((struct colormap *, pchr, pcolor)); -static color maxcolor _ANSI_ARGS_((struct colormap *)); -static color newcolor _ANSI_ARGS_((struct colormap *)); -static color pseudocolor _ANSI_ARGS_((struct colormap *)); -static color subcolor _ANSI_ARGS_((struct colormap *, pchr c)); -static VOID okcolors _ANSI_ARGS_((struct nfa *, struct colormap *)); -static VOID colorchain _ANSI_ARGS_((struct colormap *, struct arc *)); -static VOID uncolorchain _ANSI_ARGS_((struct colormap *, struct arc *)); -static int singleton _ANSI_ARGS_((struct colormap *, pchr c)); -static VOID rainbow _ANSI_ARGS_((struct nfa *, struct colormap *, int, pcolor, struct state *, struct state *)); -static VOID colorcomplement _ANSI_ARGS_((struct nfa *, struct colormap *, int, struct state *, struct state *, struct state *)); -/* === nfa.c === */ -static struct nfa *newnfa _ANSI_ARGS_((struct vars *, struct nfa *)); -static VOID freenfa _ANSI_ARGS_((struct nfa *)); -static struct state *newfstate _ANSI_ARGS_((struct nfa *, int flag)); -static struct state *newstate _ANSI_ARGS_((struct nfa *)); -static VOID dropstate _ANSI_ARGS_((struct nfa *, struct state *)); -static VOID freestate _ANSI_ARGS_((struct nfa *, struct state *)); -static VOID destroystate _ANSI_ARGS_((struct nfa *, struct state *)); -static VOID newarc _ANSI_ARGS_((struct nfa *, int, pcolor, struct state *, struct state *)); -static struct arc *allocarc _ANSI_ARGS_((struct nfa *, struct state *)); -static VOID freearc _ANSI_ARGS_((struct nfa *, struct arc *)); -static struct arc *findarc _ANSI_ARGS_((struct state *, int, pcolor)); -static VOID cparc _ANSI_ARGS_((struct nfa *, struct arc *, struct state *, struct state *)); -static VOID moveins _ANSI_ARGS_((struct nfa *, struct state *, struct state *)); -static VOID copyins _ANSI_ARGS_((struct nfa *, struct state *, struct state *)); -static VOID moveouts _ANSI_ARGS_((struct nfa *, struct state *, struct state *)); -static VOID copyouts _ANSI_ARGS_((struct nfa *, struct state *, struct state *)); -static VOID cloneouts _ANSI_ARGS_((struct nfa *, struct state *, struct state *, struct state *, int)); -static VOID delsub _ANSI_ARGS_((struct nfa *, struct state *, struct state *)); -static VOID deltraverse _ANSI_ARGS_((struct nfa *, struct state *, struct state *)); -static VOID dupnfa _ANSI_ARGS_((struct nfa *, struct state *, struct state *, struct state *, struct state *)); -static VOID duptraverse _ANSI_ARGS_((struct nfa *, struct state *, struct state *)); -static VOID cleartraverse _ANSI_ARGS_((struct nfa *, struct state *)); -static VOID specialcolors _ANSI_ARGS_((struct nfa *)); -static VOID optimize _ANSI_ARGS_((struct nfa *)); -static VOID pullback _ANSI_ARGS_((struct nfa *)); -static int pull _ANSI_ARGS_((struct nfa *, struct arc *)); -static VOID pushfwd _ANSI_ARGS_((struct nfa *)); -static int push _ANSI_ARGS_((struct nfa *, struct arc *)); -#define INCOMPATIBLE 1 /* destroys arc */ -#define SATISFIED 2 /* constraint satisfied */ -#define COMPATIBLE 3 /* compatible but not satisfied yet */ -static int combine _ANSI_ARGS_((struct arc *, struct arc *)); -static VOID fixempties _ANSI_ARGS_((struct nfa *)); -static int unempty _ANSI_ARGS_((struct nfa *, struct arc *)); -static VOID cleanup _ANSI_ARGS_((struct nfa *)); -static VOID markreachable _ANSI_ARGS_((struct nfa *, struct state *, struct state *, struct state *)); -static VOID markcanreach _ANSI_ARGS_((struct nfa *, struct state *, struct state *, struct state *)); -static int analyze _ANSI_ARGS_((struct vars *, struct nfa *)); -static int isempty _ANSI_ARGS_((struct state *, struct state *)); -static VOID compact _ANSI_ARGS_((struct vars *, struct nfa *, struct cnfa *)); -static VOID carcsort _ANSI_ARGS_((struct carc *, struct carc *)); -static VOID freecnfa _ANSI_ARGS_((struct cnfa *, int)); -static VOID dumpnfa _ANSI_ARGS_((struct nfa *, FILE *)); -static VOID dumpcnfa _ANSI_ARGS_((struct cnfa *, FILE *)); -/* automatically gathered by fwd; do not hand-edit */ -/* =====^!^===== end forwards =====^!^===== */ - - - -/* internal variables, bundled for easy passing around */ -struct vars { - regex_t *re; - chr *now; /* scan pointer into string */ - chr *stop; /* end of string */ - chr *savenow; /* saved now and stop for "subroutine call" */ - chr *savestop; - int err; /* error code (0 if none) */ - int cflags; /* copy of compile flags */ - int lasttype; /* type of previous token */ - int nexttype; /* type of next token */ - chr nextvalue; /* value (if any) of next token */ - int lexcon; /* lexical context type (see lex.c) */ - int nsubexp; /* subexpression count */ - struct subre **subs; /* subRE pointer vector */ - size_t nsubs; /* length of vector */ - struct subre *sub10[10]; /* initial vector, enough for most */ - struct nfa *nfa; /* the NFA */ - struct colormap *cm; /* character color map */ - color nlcolor; /* color of newline */ - struct state *wordchrs; /* state in nfa holding word-char outarcs */ - struct rtree *tree; /* subexpression tree */ - int ntree; /* number of tree nodes */ - struct cvec *cv; /* utility cvec */ - struct cvec *ces; /* collating-element information */ -# define ISCELEADER(v,c) (v->ces != NULL && haschr(v->ces, (c))) - struct state *cepbegin; /* state in nfa, start of CE prototypes */ - struct state *cepend; /* state in nfa, end of CE prototypes */ - struct subre *lacons; /* lookahead-constraint vector */ - int nlacons; /* size of lacons */ - int usedshorter; /* used short-preferring quantifiers */ -}; - -/* parsing macros; most know that `v' is the struct vars pointer */ -#define NEXT() (next(v)) /* advance by one token */ -#define SEE(t) (v->nexttype == (t)) /* is next token this? */ -#define EAT(t) (SEE(t) && next(v)) /* if next is this, swallow it */ -#define VISERR(vv) ((vv)->err != 0) /* have we seen an error yet? */ -#define ISERR() VISERR(v) -#define VERR(vv,e) ((vv)->nexttype = EOS, ((vv)->err) ? (vv)->err :\ - ((vv)->err = (e))) -#define ERR(e) VERR(v, e) /* record an error */ -#define NOERR() {if (ISERR()) return;} /* if error seen, return */ -#define NOERRN() {if (ISERR()) goto end;} /* NOERR with retval */ -#define INSIST(c, e) ((c) ? 0 : ERR(e)) /* if condition false, error */ -#define NOTE(b) (v->re->re_info |= (b)) /* note visible condition */ -#define EMPTYARC(x, y) newarc(v->nfa, EMPTY, 0, x, y) - -/* token type codes, some also used as NFA arc types */ -#define EMPTY 'n' /* no token present */ -#define EOS 'e' /* end of string */ -#define PLAIN 'p' /* ordinary character */ -#define DIGIT 'd' /* digit (in bound) */ -#define BACKREF 'b' /* back reference */ -#define COLLEL 'I' /* start of [. */ -#define ECLASS 'E' /* start of [= */ -#define CCLASS 'C' /* start of [: */ -#define END 'X' /* end of [. [= [: */ -#define RANGE 'R' /* - within [] which might be range delim. */ -#define LACON 'L' /* lookahead constraint subRE */ -#define AHEAD 'a' /* color-lookahead arc */ -#define BEHIND 'r' /* color-lookbehind arc */ -#define WBDRY 'w' /* word boundary constraint */ -#define NWBDRY 'W' /* non-word-boundary constraint */ -#define SBEGIN 'A' /* beginning of string (even if not BOL) */ -#define SEND 'Z' /* end of string (even if not EOL) */ -#define PREFER 'P' /* length preference */ - -/* is an arc colored, and hence on a color chain? */ -#define COLORED(a) ((a)->type == PLAIN || (a)->type == AHEAD || \ - (a)->type == BEHIND) - - - -/* static function list */ -static struct fns functions = { - rfree, /* regfree insides */ -}; - - - -/* - - regfree - free an RE (actually, just overall coordination) - */ -VOID -regfree(re) -regex_t *re; -{ - if (re == NULL || re->re_magic != REMAGIC) - return; /* no way we can report it, really */ - - /* free it, calling internal routine that knows details */ - (*((struct fns *)re->re_fns)->free)(re); - - re->re_magic = 0; -} - -/* - - compile - compile regular expression - ^ int compile(regex_t *, CONST chr *, size_t, int); - */ -int -compile(re, string, len, flags) -regex_t *re; -CONST chr *string; -size_t len; -int flags; -{ - struct vars var; - struct vars *v = &var; - struct guts *g; - int i; -# define CNOERR() { if (ISERR()) return freev(v, v->err); } - - if (re == NULL) { - return REG_INVARG; - } - - /* - * Init re to known state, because we will try to free it if - * compilation fails. - */ - - re->re_magic = 0; - - /* sanity checks */ - if (string == NULL || - ((flags®_EXTENDED) && (flags®_QUOTE)) || - (!(flags®_EXTENDED) && (flags®_ADVF))) { - return REG_INVARG; - } - - /* initial setup (after which freev() is callable) */ - v->re = re; - v->now = (chr *)string; - v->stop = v->now + len; - v->savenow = v->savestop = NULL; - v->err = 0; - v->cflags = flags; - v->nsubexp = 0; - v->subs = v->sub10; - v->nsubs = 10; - for (i = 0; (size_t) i < v->nsubs; i++) - v->subs[i] = NULL; - v->nfa = NULL; - v->cm = NULL; - v->nlcolor = COLORLESS; - v->wordchrs = NULL; - v->tree = NULL; - v->cv = NULL; - v->ces = NULL; - v->lacons = NULL; - v->nlacons = 0; - re->re_info = 0; /* bits get set during parse */ - re->re_guts = NULL; - re->re_fns = NULL; - - /* more complex setup, malloced things */ - v->cm = newcm(v); /* colormap must precede nfa... */ - CNOERR(); - v->nfa = newnfa(v, (struct nfa *)NULL); /* ...newnfa() uses it */ - CNOERR(); - re->re_guts = ckalloc(sizeof(struct guts)); - if (re->re_guts == NULL) - return freev(v, REG_ESPACE); - g = (struct guts *)re->re_guts; - ZAPCNFA(g->cnfa); - g->tree = NULL; - g->cm = NULL; - g->lacons = NULL; - g->nlacons = 0; - v->cv = newcvec(100, 10); - if (v->cv == NULL) - return freev(v, REG_ESPACE); - i = nces(v); - if (i > 0) { - v->ces = newcvec(nleaders(v), i); - CNOERR(); - v->ces = allces(v, v->ces); - leaders(v, v->ces); - } - CNOERR(); - - /* parsing */ - lexstart(v); /* also handles prefixes */ - if (SEE(EOS)) /* empty RE is illegal */ - return freev(v, REG_EMPTY); - v->tree = parse(v, EOS, PLAIN, v->nfa->init, v->nfa->final, NONEYET); - assert(SEE(EOS)); /* even if error; ISERR() => SEE(EOS) */ - CNOERR(); - - /* finish setup of nfa and its subre tree */ - specialcolors(v->nfa); - CNOERR(); - if (flags®_PROGRESS) { - dumpnfa(v->nfa, stdout); - dumprt(v->tree, stdout, 1); - } - v->usedshorter = 0; - optrt(v, v->tree); - if (v->tree != NULL) - v->ntree = numrt(v->tree, 1); - else - v->ntree = 0; - if (flags®_PROGRESS) { - printf("-->\n"); - dumprt(v->tree, stdout, 1); - } - - /* build compacted NFAs for tree, lacons, main nfa */ - nfatree(v, v->tree); - if (flags®_PROGRESS) { - printf("---->\n"); - dumprt(v->tree, stdout, 1); - } - CNOERR(); - assert(v->nlacons == 0 || v->lacons != NULL); - for (i = 1; i < v->nlacons; i++) - nfanode(v, &v->lacons[i]); - CNOERR(); - optimize(v->nfa); /* removes unreachable states */ - CNOERR(); - if (v->nfa->post->nins <= 0) - return freev(v, REG_IMPOSS); /* end unreachable! */ - assert(v->nfa->pre->nouts > 0); - compact(v, v->nfa, &g->cnfa); - CNOERR(); - freenfa(v->nfa); - v->nfa = NULL; - - /* fill color map */ - fillcm(v->cm); - CNOERR(); - - /* looks okay, package it up */ - re->re_magic = REMAGIC; - re->re_nsub = v->nsubexp; - /* re_info is already set */ - re->re_csize = sizeof(chr); - re->re_guts = (VOID *)g; - re->re_fns = (VOID *)&functions; - v->re = NULL; - g->magic = GUTSMAGIC; - g->cflags = v->cflags; - g->info = re->re_info; - g->nsub = re->re_nsub; - g->cm = v->cm; - v->cm = NULL; - g->tree = v->tree; - v->tree = NULL; - g->ntree = v->ntree; - g->compare = (v->cflags®_ICASE) ? sncmp : wcsncmp; - g->lacons = v->lacons; - v->lacons = NULL; - g->nlacons = v->nlacons; - g->usedshorter = v->usedshorter; - - if (flags®_DUMP) - dump(re, stdout); - - assert(v->err == 0); - return freev(v, 0); -} - -/* - - moresubs - enlarge subRE vector - ^ static VOID moresubs(struct vars *, int); - */ -static VOID -moresubs(v, wanted) -struct vars *v; -int wanted; /* want enough room for this one */ -{ - struct subre **p; - size_t n; - - assert((size_t)wanted >= v->nsubs); - n = (size_t)wanted * 3 / 2 + 1; - if (v->subs == v->sub10) { - p = (struct subre **)ckalloc(n * sizeof(struct subre *)); - if (p != NULL) - memcpy((VOID *)p, (VOID *)v->subs, - v->nsubs * sizeof(struct subre *)); - } else - p = (struct subre **) ckrealloc((VOID *)v->subs, - n * sizeof(struct subre *)); - if (p == NULL) { - ERR(REG_ESPACE); - return; - } - v->subs = p; - for (p = &v->subs[v->nsubs]; v->nsubs < n; p++, v->nsubs++) - *p = NULL; - assert(v->nsubs == n); - assert((size_t)wanted < v->nsubs); -} - -/* - - freev - free vars struct's substructures where necessary - * Does optional error-number setting, and returns error code, to make - * error code terser. - ^ static int freev(struct vars *, int); - */ -static int -freev(v, err) -struct vars *v; -int err; -{ - if (v->re != NULL) - rfree(v->re); - if (v->subs != v->sub10) - ckfree((char *)v->subs); - if (v->nfa != NULL) - freenfa(v->nfa); - if (v->cm != NULL) - freecm(v->cm); - if (v->tree != NULL) - freert(v->tree); - if (v->cv != NULL) - freecvec(v->cv); - if (v->ces != NULL) - freecvec(v->ces); - if (v->lacons != NULL) - freelacons(v->lacons, v->nlacons); - ERR(err); - - return v->err; -} - -/* - - parse - parse an RE - * Arguably this is too big and too complex and ought to be divided up. - * However, the code is somewhat intertwined... - ^ static struct rtree *parse(struct vars *, int, int, struct state *, - ^ struct state *, int); - */ -static struct rtree * /* NULL if no interesting substructure */ -parse(v, stopper, type, init, final, pprefer) -struct vars *v; -int stopper; /* EOS or ')' */ -int type; /* LACON (lookahead subRE) or PLAIN */ -struct state *init; /* initial state */ -struct state *final; /* final state */ -int pprefer; /* parent's short/long preference */ -{ - struct state *left; /* scaffolding for branch */ - struct state *right; - struct state *lp; /* scaffolding for current construct */ - struct state *rp; - struct state *s; /* temporaries for new states */ - struct state *s2; -# define ARCV(t, val) newarc(v->nfa, t, val, lp, rp) - int m, n; - int emptybranch; /* is there anything in this branch yet? */ - color co; - struct rtree *branches; /* top level */ - struct rtree *branch; /* current branch */ - struct subre *now; /* current subtree's top */ - struct subre sub; /* communication variable */ - struct rtree *rt1; /* temporaries */ - struct rtree *rt2; - struct subre *t; /* work pointer, top of interesting subtree */ - int firstbranch; /* is this the first branch? */ - int capture; /* any capturing parens within this? */ - int constraint; /* is the current atom a constraint? */ - - assert(stopper == ')' || stopper == EOS); - - branch = NULL; /* lint. */ - rt1 = NULL; /* lint. */ - - capture = 0; - branches = newrt(v); - firstbranch = 1; - NOERRN(); - do { - /* a branch */ - emptybranch = 1; /* tentatively */ - left = newstate(v->nfa); - right = newstate(v->nfa); - if (!firstbranch) - rt1 = newrt(v); -#if 1 - if (ISERR()) { - freert(rt1); - freert(branches); /* mem leak (CCS). */ - return NULL; - } -#else - NOERRN(); -#endif - EMPTYARC(init, left); - EMPTYARC(right, final); - lp = left; - rp = right; - if (firstbranch) - branch = branches; - else { - branch->next = rt1; - branch = rt1; - } - branch->op = '|'; - now = &branch->left; - *now = subre(left, right, NONEYET, 0, (struct rtree *)NULL); - firstbranch = 0; - NOERRN(); - - while (!SEE('|') && !SEE(stopper) && !SEE(EOS)) { - /* initial bookkeeping */ - sub.begin = NULL; /* no substructure seen yet */ - sub.subno = 0; - sub.prefer = NONEYET; - constraint = 0; - if (emptybranch) /* first of the branch */ - emptybranch = 0; - else { /* implicit concat operator */ - lp = newstate(v->nfa); - NOERRN(); - moveins(v->nfa, rp, lp); - } - assert(lp->nouts == 0); /* must string new code */ - assert(rp->nins == 0); /* between lp and rp */ - - /* an atom... */ - switch (v->nexttype) { - case '(': /* value flags as capturing or non */ - m = (type == LACON) ? 0 : v->nextvalue; - if (m) { - v->nsubexp++; - sub.subno = v->nsubexp; - if ((size_t)sub.subno >= v->nsubs) - moresubs(v, sub.subno); - assert((size_t) sub.subno < v->nsubs); - } else - sub.subno = 0; - NEXT(); - sub.begin = lp; /* NB, substructure seen */ - sub.end = rp; - /* use now->tree as temporary, so */ - /* things get freed on error returns */ - assert(now->tree == NULL); - now->tree = parse(v, ')', PLAIN, lp, rp, - now->prefer); - assert(SEE(')') || ISERR()); - NEXT(); - NOERRN(); - if (!m && now->tree == NULL) { - /* actually no relevant substructure */ - sub.begin = NULL; - } - if (now->tree != NULL) { - if (now->tree->op == '|') - sub.prefer = LONGER; - else - sub.prefer = - now->tree->left.prefer; - } - /* must postpone other processing until we */ - /* know about any {0,0} quantifier */ - break; - case BACKREF: /* the Feature From The Black Lagoon */ - INSIST(type != LACON, REG_ESUBREG); - INSIST(v->nextvalue < v->nsubs, REG_ESUBREG); - INSIST(v->subs[v->nextvalue] != NULL, - REG_ESUBREG); - NOERRN(); - assert(v->nextvalue > 0); - sub.subno = -v->nextvalue; - sub.begin = lp; /* NB, substructure seen */ - sub.end = rp; - EMPTYARC(lp, rp); /* temporarily */ - assert(now->tree == NULL); - NEXT(); - break; - case LACON: /* lookahead constraint */ - m = v->nextvalue; /* is positive? */ - NEXT(); - s = newstate(v->nfa); - s2 = newstate(v->nfa); - NOERRN(); - rt1 = parse(v, ')', LACON, s, s2, NONEYET); - assert(SEE(')') || ISERR()); - NEXT(); - m = newlacon(v, s, s2, m); - freert(rt1); - NOERRN(); - ARCV(LACON, m); - constraint = 1; - break; - case PREFER: /* length preference */ - sub.prefer = (v->nextvalue) ? LONGER : SHORTER; - NEXT(); - sub.begin = lp; /* NB, substructure seen */ - sub.end = rp; - /* use now->tree as temporary, so */ - /* things get freed on error returns */ - assert(now->tree == NULL); - now->tree = parse(v, ')', PLAIN, lp, rp, - sub.prefer); - assert(SEE(')') || ISERR()); - NEXT(); - NOERRN(); - if (now->prefer == NONEYET) - now->prefer = sub.prefer; - if (sub.prefer == now->prefer && - now->tree == NULL) { - /* actually no relevant substructure */ - sub.begin = NULL; - } - break; - case '[': - if (v->nextvalue == 1) - bracket(v, lp, rp); - else - cbracket(v, lp, rp); - assert(SEE(']') || ISERR()); - NEXT(); - break; - case '.': - co = (color) ((v->cflags®_NLSTOP) - ? nlcolor(v) - : COLORLESS); - rainbow(v->nfa, v->cm, PLAIN, co, lp, rp); - NEXT(); - break; - case '^': - ARCV('^', 1); - if (v->cflags®_NLANCH) - ARCV(BEHIND, nlcolor(v)); - NEXT(); - constraint = 1; - break; - case '$': - ARCV('$', 1); - if (v->cflags®_NLANCH) - ARCV(AHEAD, nlcolor(v)); - NEXT(); - constraint = 1; - break; - case SBEGIN: - ARCV('^', 1); /* BOL */ - ARCV('^', 0); /* or BOS */ - NEXT(); - constraint = 1; - break; - case SEND: - ARCV('$', 1); /* EOL */ - ARCV('$', 0); /* or EOS */ - NEXT(); - constraint = 1; - break; - case '<': - wordchrs(v); /* does NEXT() */ - s = newstate(v->nfa); - NOERRN(); - /* needs BOL, BOS, or nonword to left... */ - newarc(v->nfa, '^', 1, lp, s); - newarc(v->nfa, '^', 0, lp, s); - colorcomplement(v->nfa, v->cm, BEHIND, - v->wordchrs, lp, s); - /* ... and word to right */ - cloneouts(v->nfa, v->wordchrs, s, rp, AHEAD); - /* (no need for special attention to \n) */ - constraint = 1; - break; - case '>': - wordchrs(v); /* does NEXT() */ - s = newstate(v->nfa); - NOERRN(); - /* needs word to left... */ - cloneouts(v->nfa, v->wordchrs, lp, s, BEHIND); - /* ... and EOL, EOS, or nonword to right */ - newarc(v->nfa, '$', 1, s, rp); - newarc(v->nfa, '$', 0, s, rp); - colorcomplement(v->nfa, v->cm, AHEAD, - v->wordchrs, s, rp); - /* (no need for special attention to \n) */ - constraint = 1; - break; - case WBDRY: - wordchrs(v); /* does NEXT() */ - s = newstate(v->nfa); - NOERRN(); - /* needs BOL, BOS, or nonword to left... */ - newarc(v->nfa, '^', 1, lp, s); - newarc(v->nfa, '^', 0, lp, s); - colorcomplement(v->nfa, v->cm, BEHIND, - v->wordchrs, lp, s); - /* ... and word to right... */ - cloneouts(v->nfa, v->wordchrs, s, rp, AHEAD); - /* ...or... */ - s = newstate(v->nfa); - NOERRN(); - /* ...needs word to left... */ - cloneouts(v->nfa, v->wordchrs, lp, s, BEHIND); - /* ... and EOL, EOS, or nonword to right */ - newarc(v->nfa, '$', 1, s, rp); - newarc(v->nfa, '$', 0, s, rp); - colorcomplement(v->nfa, v->cm, AHEAD, - v->wordchrs, s, rp); - /* (no need for special attention to \n) */ - constraint = 1; - break; - case NWBDRY: - wordchrs(v); /* does NEXT() */ - s = newstate(v->nfa); - NOERRN(); - /* needs word to both left and right... */ - cloneouts(v->nfa, v->wordchrs, lp, s, BEHIND); - cloneouts(v->nfa, v->wordchrs, s, rp, AHEAD); - /* ...or... */ - s = newstate(v->nfa); - NOERRN(); - /* ...BOL, BOS, or nonword to left... */ - newarc(v->nfa, '^', 1, lp, s); - newarc(v->nfa, '^', 0, lp, s); - colorcomplement(v->nfa, v->cm, BEHIND, - v->wordchrs, lp, s); - /* ... and EOL, EOS, or nonword to right */ - newarc(v->nfa, '$', 1, s, rp); - newarc(v->nfa, '$', 0, s, rp); - colorcomplement(v->nfa, v->cm, AHEAD, - v->wordchrs, s, rp); - /* (no need for special attention to \n) */ - constraint = 1; - break; - case ')': /* unbalanced paren */ - if (!(v->cflags®_EXTENDED) || - (v->cflags®_ADVF)) { - ERR(REG_EPAREN); - goto end; - } - NOTE(REG_UPBOTCH); - /* fallthrough into case PLAIN */ - case PLAIN: - onechr(v, v->nextvalue, lp, rp); - okcolors(v->nfa, v->cm); - NOERRN(); - NEXT(); - break; - case '*': - case '+': - case '?': - case '{': - ERR(REG_BADRPT); - goto end; - default: - ERR(REG_ASSERT); - goto end; - } - - /* ...possibly followed by a quantifier */ - switch (v->nexttype) { - case '*': - m = 0; - n = INFINITY; - sub.prefer = (v->nextvalue) ? LONGER : SHORTER; - NEXT(); - break; - case '+': - m = 1; - n = INFINITY; - sub.prefer = (v->nextvalue) ? LONGER : SHORTER; - NEXT(); - break; - case '?': - m = 0; - n = 1; - sub.prefer = (v->nextvalue) ? LONGER : SHORTER; - NEXT(); - break; - case '{': - NEXT(); - m = scannum(v); - if (EAT(',')) { - if (SEE(DIGIT)) - n = scannum(v); - else - n = INFINITY; - if (m > n) { - ERR(REG_BADBR); - goto end; - } - } else - n = m; - if (!SEE('}')) { /* gets errors too */ - ERR(REG_BADBR); - goto end; - } - if (m != n) - sub.prefer = (v->nextvalue) ? LONGER : - SHORTER; - NEXT(); - break; - default: /* no quantifier */ - m = n = 1; - constraint = 0; - break; - } - - /* constraints may not be quantified */ - if (constraint) { - ERR(REG_BADRPT); - goto end; - } - - /* annoying special case: {0,0} cancels everything */ - if (m == 0 && n == 0 && sub.begin != NULL) { - freert(now->tree); - now->tree = NULL; - sub.begin = NULL; /* no substructure */ - sub.prefer = NONEYET; - /* the repeat() below will do the rest */ - } - - /* if no substructure, aVOID hard part */ - if (now->prefer == NONEYET) - now->prefer = sub.prefer; - if (sub.begin == NULL && (sub.prefer == NONEYET || - sub.prefer == now->prefer)) { - assert(sub.subno >= 0 || (m == 0 && n == 0)); - if (!(m == 1 && n == 1)) - repeat(v, lp, rp, m, n); - continue; /* NOTE CONTINUE */ - } - - /* hard part: something messy seen */ - /* break subRE into pre, x{...}, post-to-be */ - capture = 1; /* upper levels will care */ - rt1 = newrt(v); - rt2 = newrt(v); - s = newstate(v->nfa); /* between x and post-to-be */ - NOERRN(); - moveins(v->nfa, rp, s); - EMPTYARC(s, rp); - rt1->op = ','; - rt1->left = subre(now->begin, lp, now->prefer, 0, - (struct rtree *)NULL); - assert(now->end == rp); - rt1->right = subre(lp, rp, sub.prefer, 0, rt2); - rt2->op = ','; - rt2->left = subre(lp, s, sub.prefer, 0, now->tree); - rt2->right = subre(s, rp, NONEYET, 0, - (struct rtree *)NULL); - now->tree = rt1; - now = &rt2->right; /* future elaborations here */ - t = &rt2->left; /* current activity here */ - - /* if it's a backref, time to replicate the subNFA */ - if (sub.subno < 0) { - assert(lp->nouts == 1); /* just the EMPTY */ - delsub(v->nfa, lp, s); - assert(v->subs[-sub.subno] != NULL); - dupnfa(v->nfa, v->subs[-sub.subno]->begin, - v->subs[-sub.subno]->end, lp, s); - NOERRN(); - } - - /* if no/vacuous quantifier and not backref, done */ - if (m == 1 && n == 1 && sub.subno >= 0) { - t->subno = sub.subno; - if (sub.subno > 0) - v->subs[sub.subno] = t; - continue; /* NOTE CONTINUE */ - } - - /* really sticky part, quantified capturer/backref */ - /* first, turn x{0,...} into x{1,...}| */ - if (m == 0) { - s = newstate(v->nfa); - s2 = newstate(v->nfa); - rt1 = newrt(v); - rt2 = newrt(v); - NOERRN(); - moveouts(v->nfa, t->begin, s); - EMPTYARC(t->begin, s); - EMPTYARC(t->begin, s2); - EMPTYARC(s2, t->end); - rt1->op = rt2->op = '|'; - rt1->left = subre(s, t->end, sub.prefer, 0, - t->tree); - rt1->next = rt2; - rt2->left = subre(s2, t->end, sub.prefer, 0, - (struct rtree *)NULL); - t->tree = rt1; - t = &rt1->left; - m = 1; - } - - /* second, x{1,1} is just x */ - if (m == 1 && n == 1 && sub.subno >= 0) { - t->subno = sub.subno; - if (sub.subno > 0) - v->subs[sub.subno] = t; - continue; /* NOTE CONTINUE */ - } - - /* backrefs get special treatment */ - if (sub.subno < 0) { - repeat(v, t->begin, t->end, m, n); - rt1 = newrt(v); - NOERRN(); - assert(t->tree == NULL); - t->tree = rt1; - rt1->op = 'b'; - rt1->left.subno = sub.subno; - rt1->left.min = (short) m; - rt1->left.max = (short) n; - rt1->left.prefer = sub.prefer; - continue; /* NOTE CONTINUE */ - } - - /* turn x{m,n} into x{m-1,n-1}x, with capturing */ - /* parens in only second x */ - s = newstate(v->nfa); - NOERRN(); - moveouts(v->nfa, t->begin, s); - dupnfa(v->nfa, s, t->end, t->begin, s); - assert(m >= 1 && m != INFINITY && n >= 1); - repeat(v, t->begin, s, m-1, (n == INFINITY) ? n : n-1); - rt1 = newrt(v); - NOERRN(); - rt1->op = ','; - rt1->left = subre(t->begin, s, sub.prefer, 0, - (struct rtree *)NULL); - /* sub.prefer not really right, but doesn't matter */ - rt1->right = subre(s, t->end, sub.prefer, sub.subno, - t->tree); - if (sub.subno > 0) - v->subs[sub.subno] = &rt1->right; - t->tree = rt1; - } - if (emptybranch) { - NOTE(REG_UUNSPEC); - EMPTYARC(lp, rp); - } - } while (EAT('|')); - assert(SEE(stopper) || SEE(EOS)); - - if (!SEE(stopper)) { - assert(stopper == ')' && SEE(EOS)); - ERR(REG_EPAREN); - } - - /* higher levels care about our preference in certain situations */ - if (branch != branches) { /* >1 branch */ - if (pprefer != LONGER) - capture = 1; - } else if (branches->left.prefer != pprefer) - capture = 1; - - /* optimize out vacuous alternation */ - if (branch == branches) { - assert(branch->next == NULL && branch->right.begin == NULL); - assert(branch->left.subno == 0); - if (capture && branch->left.tree == NULL) - branch->op = ','; - else { - branches = branch->left.tree; /* might be NULL */ - freertnode(branch); - } - } - - if (capture) /* actually a catchall flag */ - return branches; - end: /* mem leak (CCS) */ - freert(branches); - return NULL; -} - -/* - - scannum - scan a number - ^ static int scannum(struct vars *); - */ -static int /* value, <= DUPMAX */ -scannum(v) -struct vars *v; -{ - int n = 0; - - while (SEE(DIGIT) && n < DUPMAX) { - n = n*10 + v->nextvalue; - NEXT(); - } - if (SEE(DIGIT) || n > DUPMAX) { - ERR(REG_BADBR); - return 0; - } - return n; -} - -/* - - repeat - replicate subNFA for quantifiers - * The duplication sequences used here are chosen carefully so that any - * pointers starting out pointing into the subexpression end up pointing into - * the last occurrence. (Note that it may not be strung between the same - * left and right end states, however!) This used to be important for the - * subRE tree, although the important bits are now handled by the in-line - * code in parse(), and when this is called, it doesn't matter any more. - ^ static VOID repeat(struct vars *, struct state *, struct state *, int, int); - */ -static VOID -repeat(v, lp, rp, m, n) -struct vars *v; -struct state *lp; -struct state *rp; -int m; -int n; -{ -# define SOME 2 -# define INF 3 -# define PAIR(x, y) ((x)*4 + (y)) -# define REDUCE(x) ( ((x) == INFINITY) ? INF : (((x) > 1) ? SOME : (x)) ) - CONST int rm = REDUCE(m); - CONST int rn = REDUCE(n); - struct state *s; - struct state *s2; - - switch (PAIR(rm, rn)) { - case PAIR(0, 0): /* empty string */ - delsub(v->nfa, lp, rp); - EMPTYARC(lp, rp); - break; - case PAIR(0, 1): /* do as x| */ - EMPTYARC(lp, rp); - break; - case PAIR(0, SOME): /* do as x{1,n}| */ - repeat(v, lp, rp, 1, n); - NOERR(); - EMPTYARC(lp, rp); - break; - case PAIR(0, INF): /* loop x around */ - s = newstate(v->nfa); - NOERR(); - moveouts(v->nfa, lp, s); - moveins(v->nfa, rp, s); - EMPTYARC(lp, s); - EMPTYARC(s, rp); - break; - case PAIR(1, 1): /* no action required */ - break; - case PAIR(1, SOME): /* do as x{0,n-1}x = (x{1,n-1}|)x */ - s = newstate(v->nfa); - NOERR(); - moveouts(v->nfa, lp, s); - dupnfa(v->nfa, s, rp, lp, s); - NOERR(); - repeat(v, lp, s, 1, n-1); - NOERR(); - EMPTYARC(lp, s); - break; - case PAIR(1, INF): /* add loopback arc */ - s = newstate(v->nfa); - s2 = newstate(v->nfa); - NOERR(); - moveouts(v->nfa, lp, s); - moveins(v->nfa, rp, s2); - EMPTYARC(lp, s); - EMPTYARC(s2, rp); - EMPTYARC(s2, s); - break; - case PAIR(SOME, SOME): /* do as x{m-1,n-1}x */ - s = newstate(v->nfa); - NOERR(); - moveouts(v->nfa, lp, s); - dupnfa(v->nfa, s, rp, lp, s); - NOERR(); - repeat(v, lp, s, m-1, n-1); - break; - case PAIR(SOME, INF): /* do as x{m-1,}x */ - s = newstate(v->nfa); - NOERR(); - moveouts(v->nfa, lp, s); - dupnfa(v->nfa, s, rp, lp, s); - NOERR(); - repeat(v, lp, s, m-1, n); - break; - default: - ERR(REG_ASSERT); - break; - } -} - -/* - - bracket - handle non-complemented bracket expression - * Also called from cbracket for complemented bracket expressions. - ^ static VOID bracket(struct vars *, struct state *, struct state *); - */ -static VOID -bracket(v, lp, rp) -struct vars *v; -struct state *lp; -struct state *rp; -{ - assert(SEE('[')); - NEXT(); - while (!SEE(']') && !SEE(EOS)) - brackpart(v, lp, rp); - assert(SEE(']') || ISERR()); - okcolors(v->nfa, v->cm); -} - -/* - - cbracket - handle complemented bracket expression - * We do it by calling bracket() with dummy endpoints, and then complementing - * the result. The alternative would be to invoke rainbow(), and then delete - * arcs as the b.e. is seen... but that gets messy. - ^ static VOID cbracket(struct vars *, struct state *, struct state *); - */ -static VOID -cbracket(v, lp, rp) -struct vars *v; -struct state *lp; -struct state *rp; -{ - struct state *left = newstate(v->nfa); - struct state *right = newstate(v->nfa); - struct state *s; - struct arc *a; /* arc from lp */ - struct arc *ba; /* arc from left, from bracket() */ - struct arc *pa; /* CE-prototype arc */ - color co; - chr *p; - int i; - - NOERR(); - bracket(v, left, right); - if (v->cflags®_NLSTOP) - newarc(v->nfa, PLAIN, nlcolor(v), left, right); - NOERR(); - - assert(lp->nouts == 0); /* all outarcs will be ours */ - - /* easy part of complementing */ - colorcomplement(v->nfa, v->cm, PLAIN, left, lp, rp); - NOERR(); - if (v->ces == NULL) { /* no CEs -- we're done */ - dropstate(v->nfa, left); - assert(right->nins == 0); - freestate(v->nfa, right); - return; - } - - /* but complementing gets messy in the presence of CEs... */ - NOTE(REG_ULOCALE); - for (p = v->ces->chrs, i = v->ces->nchrs; i > 0; p++, i--) { - co = getcolor(v->cm, *p); - a = findarc(lp, PLAIN, co); - ba = findarc(left, PLAIN, co); - if (ba == NULL) { - assert(a != NULL); - freearc(v->nfa, a); - } else { - assert(a == NULL); - } - s = newstate(v->nfa); - NOERR(); - newarc(v->nfa, PLAIN, co, lp, s); - NOERR(); - pa = findarc(v->cepbegin, PLAIN, co); - assert(pa != NULL); - if (ba == NULL) { /* easy case, need all of them */ - cloneouts(v->nfa, pa->to, s, rp, PLAIN); - newarc(v->nfa, '$', 1, s, rp); - newarc(v->nfa, '$', 0, s, rp); - colorcomplement(v->nfa, v->cm, AHEAD, pa->to, s, rp); - } else { /* must be selective */ - if (findarc(ba->to, '$', 1) == NULL) { - newarc(v->nfa, '$', 1, s, rp); - newarc(v->nfa, '$', 0, s, rp); - colorcomplement(v->nfa, v->cm, AHEAD, pa->to, - s, rp); - } - for (pa = pa->to->outs; pa != NULL; pa = pa->outchain) - if (findarc(ba->to, PLAIN, pa->co) == NULL) - newarc(v->nfa, PLAIN, pa->co, s, rp); - if (s->nouts == 0) /* limit of selectivity: none */ - dropstate(v->nfa, s); /* frees arc too */ - } - NOERR(); - } - - delsub(v->nfa, left, right); - assert(left->nouts == 0); - freestate(v->nfa, left); - assert(right->nins == 0); - freestate(v->nfa, right); -} - -/* - - brackpart - handle one item (or range) within a bracket expression - ^ static VOID brackpart(struct vars *, struct state *, struct state *); - */ -static VOID -brackpart(v, lp, rp) -struct vars *v; -struct state *lp; -struct state *rp; -{ - celt startc; - celt endc; - struct cvec *cv; - chr *startp; - chr *endp; - chr c[1]; - - /* parse something, get rid of special cases, take shortcuts */ - switch (v->nexttype) { - case RANGE: /* a-b-c or other botch */ - ERR(REG_ERANGE); - return; - case PLAIN: - c[0] = v->nextvalue; - NEXT(); - /* shortcut for ordinary chr (not range, not CE leader) */ - if (!SEE(RANGE) && !ISCELEADER(v, c[0])) { - onechr(v, c[0], lp, rp); - return; - } - startc = element(v, c, c+1); - NOERR(); - break; - case COLLEL: - startp = v->now; - endp = scanplain(v); - INSIST(startp < endp, REG_ECOLLATE); - NOERR(); - startc = element(v, startp, endp); - NOERR(); - break; - case ECLASS: - startp = v->now; - endp = scanplain(v); - INSIST(startp < endp, REG_ECOLLATE); - NOERR(); - startc = element(v, startp, endp); - NOERR(); - cv = eclass(v, startc, (v->cflags®_ICASE)); - NOERR(); - dovec(v, cv, lp, rp); - return; - case CCLASS: - startp = v->now; - endp = scanplain(v); - INSIST(startp < endp, REG_ECTYPE); - NOERR(); - cv = cclass(v, startp, endp, (v->cflags®_ICASE)); - NOERR(); - dovec(v, cv, lp, rp); - return; - default: - ERR(REG_ASSERT); - return; - } - - if (SEE(RANGE)) { - NEXT(); - switch (v->nexttype) { - case PLAIN: - case RANGE: - c[0] = v->nextvalue; - NEXT(); - endc = element(v, c, c+1); - NOERR(); - break; - case COLLEL: - startp = v->now; - endp = scanplain(v); - INSIST(startp < endp, REG_ECOLLATE); - NOERR(); - endc = element(v, startp, endp); - NOERR(); - break; - default: - ERR(REG_ERANGE); - return; - } - } else - endc = startc; - - /* - * Ranges are unportable. Actually, standard C does - * guarantee that digits are contiguous, but making - * that an exception is just too complicated. - */ - if (startc != endc) - NOTE(REG_UUNPORT); - cv = range(v, startc, endc, (v->cflags®_ICASE)); - NOERR(); - dovec(v, cv, lp, rp); -} - -/* - - scanplain - scan PLAIN contents of [. etc. - * Certain bits of trickery in lex.c know that this code does not try - * to look past the final bracket of the [. etc. - ^ static chr *scanplain(struct vars *); - */ -static chr * /* just after end of sequence */ -scanplain(v) -struct vars *v; -{ - chr *endp; - - assert(SEE(COLLEL) || SEE(ECLASS) || SEE(CCLASS)); - NEXT(); - - endp = v->now; - while (SEE(PLAIN)) { - endp = v->now; - NEXT(); - } - - assert(SEE(END) || ISERR()); - NEXT(); - - return endp; -} - -/* - - leaders - process a cvec of collating elements to also include leaders - * Also gives all characters involved their own colors, which is almost - * certainly necessary, and sets up little disconnected subNFA. - ^ static VOID leaders(struct vars *, struct cvec *); - */ -static VOID -leaders(v, cv) -struct vars *v; -struct cvec *cv; -{ - int ce; - chr *p; - chr leader; - struct state *s; - struct arc *a; - - v->cepbegin = newstate(v->nfa); - v->cepend = newstate(v->nfa); - NOERR(); - - for (ce = 0; ce < cv->nces; ce++) { - p = cv->ces[ce]; - leader = *p; - if (!haschr(cv, leader)) { - addchr(cv, leader); - s = newstate(v->nfa); - newarc(v->nfa, PLAIN, subcolor(v->cm, leader), - v->cepbegin, s); - okcolors(v->nfa, v->cm); - } else { - a = findarc(v->cepbegin, PLAIN, - getcolor(v->cm, leader)); - assert(a != NULL); - s = a->to; - assert(s != v->cepend); - } - p++; - assert(*p != 0 && *(p+1) == 0); /* only 2-char CEs at present */ - newarc(v->nfa, PLAIN, subcolor(v->cm, *p), s, v->cepend); - okcolors(v->nfa, v->cm); - } -} - -/* - - onechr - fill in arcs for a plain character, and possible case complements - * This is mostly a shortcut for efficient handling of the common case. - ^ static VOID onechr(struct vars *, pchr, struct state *, struct state *); - */ -static VOID -onechr(v, c, lp, rp) -struct vars *v; -pchr c; -struct state *lp; -struct state *rp; -{ - if (!(v->cflags®_ICASE)) { - newarc(v->nfa, PLAIN, subcolor(v->cm, c), lp, rp); - return; - } - - /* rats, need general case anyway... */ - dovec(v, allcases(v, c), lp, rp); -} - -/* - - dovec - fill in arcs for each element of a cvec - * This one has to handle the messy cases, like CEs and CE leaders. - ^ static VOID dovec(struct vars *, struct cvec *, struct state *, - ^ struct state *); - */ -static VOID -dovec(v, cv, lp, rp) -struct vars *v; -struct cvec *cv; -struct state *lp; -struct state *rp; -{ - chr *p; - chr *np; - int i; - color co; - struct arc *a; - struct arc *pa; /* arc in prototype */ - struct state *s; - struct state *ps; /* state in prototype */ - - /* first, get the ordinary characters out of the way */ - np = cv->chrs; - for (p = np, i = cv->nchrs; i > 0; p++, i--) - if (!ISCELEADER(v, *p)) { - newarc(v->nfa, PLAIN, subcolor(v->cm, *p), lp, rp); - *p = 0; - } else { - assert(singleton(v->cm, *p)); - *np++ = *p; - } - cv->nchrs = np - cv->chrs; /* only CE leaders remain */ - if (cv->nchrs == 0 && cv->nces == 0) - return; - - /* deal with the CE leaders */ - NOTE(REG_ULOCALE); - for (p = cv->chrs, i = cv->nchrs; i > 0; p++, i--) { - co = getcolor(v->cm, *p); - a = findarc(lp, PLAIN, co); - if (a != NULL) - s = a->to; - else { - s = newstate(v->nfa); - NOERR(); - newarc(v->nfa, PLAIN, co, lp, s); - NOERR(); - } - pa = findarc(v->cepbegin, PLAIN, co); - assert(pa != NULL); - ps = pa->to; - newarc(v->nfa, '$', 1, s, rp); - newarc(v->nfa, '$', 0, s, rp); - colorcomplement(v->nfa, v->cm, AHEAD, ps, s, rp); - NOERR(); - } - - /* and the CEs */ - for (i = 0; i < cv->nces; i++) { - p = cv->ces[i]; - assert(singleton(v->cm, *p)); - co = getcolor(v->cm, *p++); - a = findarc(lp, PLAIN, co); - if (a != NULL) - s = a->to; - else { - s = newstate(v->nfa); - NOERR(); - newarc(v->nfa, PLAIN, co, lp, s); - NOERR(); - } - assert(*p != 0); /* at least two chars */ - assert(singleton(v->cm, *p)); - co = getcolor(v->cm, *p++); - assert(*p == 0); /* and only two, for now */ - newarc(v->nfa, PLAIN, co, s, rp); - NOERR(); - } -} - -/* - - nlcolor - assign newline a unique color, if it doesn't have one already - * Restriction: can't be called when there are subcolors open. (Maybe - * this should be enforced...) - ^ static color nlcolor(struct vars *); - */ -static color -nlcolor(v) -struct vars *v; -{ - if (v->nlcolor == COLORLESS) { - v->nlcolor = subcolor(v->cm, newline()); - okcolors(v->nfa, v->cm); - } - return v->nlcolor; -} - -/* - - wordchrs - set up word-chr list for word-boundary stuff, if needed - * The list is kept as a bunch of arcs between two dummy states; it's - * disposed of by the unreachable-states sweep in NFA optimization. - * Does NEXT(). Must not be called from any unusual lexical context. - * This should be reconciled with the \w etc. handling in lex.c, and - * should be cleaned up to reduce dependencies on input scanning. - ^ static VOID wordchrs(struct vars *); - */ -static VOID -wordchrs(v) -struct vars *v; -{ - struct state *left; - struct state *right; - - if (v->wordchrs != NULL) { - NEXT(); /* for consistency */ - return; - } - - left = newstate(v->nfa); - right = newstate(v->nfa); - NOERR(); - lexword(v); - NEXT(); - assert(v->savenow != NULL && SEE('[')); - bracket(v, left, right); - assert(((v->savenow != NULL) && SEE(']')) || ISERR()); - NEXT(); - NOERR(); - v->wordchrs = left; -} - -/* - - subre - construct a subre struct - ^ static struct subre subre(struct state *, struct state *, int, int, - ^ struct rtree *); - */ -static struct subre -subre(begin, end, prefer, subno, tree) -struct state *begin; -struct state *end; -int prefer; -int subno; -struct rtree *tree; -{ - struct subre ret; - - ret.begin = begin; - ret.end = end; - ret.prefer = prefer; - ret.subno = subno; - ret.min = ret.max = 1; - ret.tree = tree; - ZAPCNFA(ret.cnfa); - return ret; -} - -/* - - newrt - allocate subRE-tree node - ^ static struct rtree *newrt(struct vars *); - */ -static struct rtree * -newrt(v) -struct vars *v; -{ - struct rtree *rt = (struct rtree *)ckalloc(sizeof(struct rtree)); - - if (rt == NULL) { - ERR(REG_ESPACE); - return NULL; - } - - rt->op = '?'; /* invalid */ - rt->no = 0; - rt->left.begin = NULL; - rt->left.end = NULL; - rt->left.prefer = NONEYET; - rt->left.subno = 0; - rt->left.min = rt->left.max = 1; - rt->left.tree = NULL; - ZAPCNFA(rt->left.cnfa); - rt->right.begin = NULL; - rt->right.end = NULL; - rt->right.prefer = NONEYET; - rt->right.subno = 0; - rt->right.min = rt->right.max = 1; - rt->right.tree = NULL; - ZAPCNFA(rt->right.cnfa); - rt->next = NULL; - return rt; -} - -/* - - freert - free a subRE subtree - ^ static VOID freert(struct rtree *); - */ -static VOID -freert(rt) -struct rtree *rt; -{ - if (rt == NULL) - return; - - if (rt->left.tree != NULL) - freert(rt->left.tree); - if (rt->right.tree != NULL) - freert(rt->right.tree); - if (rt->next != NULL) - freert(rt->next); - - freertnode(rt); -} - -/* - - freertnode - free one node in a subRE subtree - ^ static VOID freertnode(struct rtree *); - */ -static VOID -freertnode(rt) -struct rtree *rt; -{ - if (rt == NULL) - return; - - if (!NULLCNFA(rt->left.cnfa)) - freecnfa(&rt->left.cnfa, 0); - if (!NULLCNFA(rt->right.cnfa)) - freecnfa(&rt->right.cnfa, 0); - - ckfree((char *)rt); -} - -/* - - optrt - optimize a subRE subtree - ^ static VOID optrt(struct vars *, struct rtree *); - */ -static VOID -optrt(v, rt) -struct vars *v; -struct rtree *rt; -{ - struct rtree *t; - int subno; - - if (rt == NULL) - return; - assert(rt->op != 'b'); - - /* pull up subtrees if possible */ - if (rt->left.begin != NULL && rt->left.tree != NULL && - rt->left.tree->op != 'b') { - t = rt->left.tree; - optrt(v, t); - if (t->right.begin == NULL && t->next == NULL && - (rt->left.prefer == NONEYET || - t->left.prefer == rt->left.prefer) && - (rt->left.subno == 0 || t->left.subno == 0)) { - subno = rt->left.subno; - rt->left = t->left; - assert(NULLCNFA(t->left.cnfa)); - freertnode(t); - if (subno != 0) { - assert(rt->left.subno == 0 && subno > 0); - rt->left.subno = subno; - } - } - } - if (rt->right.begin != NULL && rt->right.tree != NULL && - rt->right.tree->op != 'b') { - t = rt->right.tree; - optrt(v, t); - if (t->right.begin == NULL && t->next == NULL && - (rt->right.prefer == NONEYET || - t->right.prefer == rt->right.prefer) && - (rt->right.subno == 0 || t->right.subno == 0)) { - subno = rt->right.subno; - rt->right = t->left; - assert(NULLCNFA(t->right.cnfa)); - freertnode(t); - if (subno != 0) { - assert(rt->right.subno == 0 && subno > 0); - rt->right.subno = subno; - } - } - } - - /* simplify empties */ - if (rt->left.begin != NULL && isempty(rt->left.begin, rt->left.end)) - rt->left.end = rt->left.begin; - if (rt->right.begin != NULL && isempty(rt->right.begin, rt->right.end)) - rt->right.end = rt->right.begin; - - /* if left subtree vacuous and right non-empty, move right over */ - if (rt->left.begin != NULL && rt->left.begin == rt->left.end && - rt->left.subno == 0 && rt->left.tree == NULL && - rt->right.begin != NULL) { - rt->left = rt->right; - rt->right.begin = NULL; - rt->right.tree = NULL; - } - - /* if right subtree vacuous, clear it out */ - if (rt->right.begin != NULL && rt->right.begin == rt->right.end && - rt->right.subno == 0 && rt->right.tree == NULL) { - rt->right.begin = NULL; - rt->right.tree = NULL; - } - - /* preference cleanup and analysis */ - if (rt->left.prefer == NONEYET) - rt->left.prefer = LONGER; - if (rt->left.prefer == SHORTER) - v->usedshorter = 1; - if (rt->right.begin != NULL) { - if (rt->right.prefer == NONEYET) - rt->right.prefer = LONGER; - if (rt->right.prefer == SHORTER) - v->usedshorter = 1; - } - - /* recurse through alternatives */ - if (rt->next != NULL) - optrt(v, rt->next); -} - -/* - - numrt - number tree nodes - ^ static int numrt(struct rtree *, int); - */ -static int /* next number */ -numrt(rt, start) -struct rtree *rt; -int start; /* starting point for subtree numbers */ -{ - int i; - - assert(rt != NULL); - - i = start; - rt->no = (short) i++; - if (rt->left.tree != NULL) - i = numrt(rt->left.tree, i); - if (rt->right.tree != NULL) - i = numrt(rt->right.tree, i); - if (rt->next != NULL) - i = numrt(rt->next, i); - return i; -} - -/* - - nfatree - turn a subRE subtree into a tree of compacted NFAs - ^ static VOID nfatree(struct vars *, struct rtree *); - */ -static VOID -nfatree(v, rt) -struct vars *v; -struct rtree *rt; -{ - if (rt == NULL) - return; - - if (rt->left.begin != NULL) - nfanode(v, &rt->left); - if (rt->left.tree != NULL) - nfatree(v, rt->left.tree); - - if (rt->right.begin != NULL) - nfanode(v, &rt->right); - if (rt->right.tree != NULL) - nfatree(v, rt->right.tree); - - if (rt->next != NULL) - nfatree(v, rt->next); -} - -/* - - nfanode - do one NFA for nfatree - ^ static VOID nfanode(struct vars *, struct subre *); - */ -static VOID -nfanode(v, sub) -struct vars *v; -struct subre *sub; -{ - struct nfa *nfa; - - if (sub->begin == NULL) - return; - - nfa = newnfa(v, v->nfa); - NOERR(); - dupnfa(nfa, sub->begin, sub->end, nfa->init, nfa->final); - if (!ISERR()) { - specialcolors(nfa); - optimize(nfa); - } - if (!ISERR()) - compact(v, nfa, &sub->cnfa); - freenfa(nfa); -} - -/* - - newlacon - allocate a lookahead-constraint subRE - ^ static int newlacon(struct vars *, struct state *, struct state *, int); - */ -static int /* lacon number */ -newlacon(v, begin, end, pos) -struct vars *v; -struct state *begin; -struct state *end; -int pos; -{ - int n; - struct subre *sub; - - if (v->nlacons == 0) { - v->lacons = (struct subre *)ckalloc(2 * sizeof(struct subre)); - n = 1; /* skip 0th */ - v->nlacons = 2; - } else { - v->lacons = (struct subre *)ckrealloc((VOID *) v->lacons, - (v->nlacons+1)*sizeof(struct subre)); - n = v->nlacons++; - } - if (v->lacons == NULL) { - ERR(REG_ESPACE); - return 0; - } - sub = &v->lacons[n]; - sub->begin = begin; - sub->end = end; - sub->subno = pos; - ZAPCNFA(sub->cnfa); - return n; -} - -/* - - freelacons - free lookahead-constraint subRE vector - ^ static VOID freelacons(struct subre *, int); - */ -static VOID -freelacons(subs, n) -struct subre *subs; -int n; -{ - struct subre *sub; - int i; - - for (sub = subs + 1, i = n - 1; i > 0; sub++, i--) - if (!NULLCNFA(sub->cnfa)) - freecnfa(&sub->cnfa, 0); - ckfree((char *)subs); -} - -/* - - rfree - free a whole RE (insides of regfree) - ^ static VOID rfree(regex_t *); - */ -static VOID -rfree(re) -regex_t *re; /* regfree has validated it */ -{ - struct guts *g = (struct guts *)re->re_guts; - - re->re_magic = 0; /* invalidate it */ - re->re_guts = NULL; - re->re_fns = NULL; - g->magic = 0; - if (!NULLCNFA(g->cnfa)) - freecnfa(&g->cnfa, 0); - if (g->cm != NULL) - freecm(g->cm); - if (g->tree != NULL) - freert(g->tree); - if (g->lacons != NULL) - freelacons(g->lacons, g->nlacons); - ckfree((char *)g); -} - -/* - - dumprt - dump a subRE tree - ^ static VOID dumprt(struct rtree *, FILE *, int); - */ -static VOID -dumprt(rt, f, nfapresent) -struct rtree *rt; -FILE *f; -int nfapresent; /* is the original NFA still around? */ -{ - if (rt == NULL) - fprintf(f, "null tree\n"); - else - rtdump(rt, f, nfapresent, 0); - fflush(f); -} - -/* - - rtdump - recursive guts of dumprt - ^ static VOID rtdump(struct rtree *, FILE *, int, int); - */ -static VOID -rtdump(rt, f, nfapresent, level) -struct rtree *rt; -FILE *f; -int nfapresent; /* is the original NFA still around? */ -int level; -{ - int i; -# define RTSEP " " - - for (i = 0; i < level; i++) - fprintf(f, RTSEP); - fprintf(f, "%c (n%d) {\n", rt->op, rt->no); - if (rt->left.begin != NULL) { - for (i = 0; i < level+1; i++) - fprintf(f, RTSEP); - fprintf(f, "L"); - fprintf(f, "%s", (rt->left.prefer == NONEYET) ? "-" : - ((rt->left.prefer == LONGER) ? ">" : "<")); - if (nfapresent) - fprintf(f, "%ld-%ld", (long)rt->left.begin->no, - (long)rt->left.end->no); - if (rt->left.subno > 0) - fprintf(f, " (%d)", rt->left.subno); - else if (rt->left.subno < 0) { - fprintf(f, " \\%d", -rt->left.subno); - if (rt->left.min != 1 || rt->left.max != 1) { - fprintf(f, "{%d-", (int)rt->left.min); - if (rt->left.max != INFINITY) - fprintf(f, "%d", (int)rt->left.max); - fprintf(f, "}"); - } - if (rt->left.tree != NULL) - fprintf(f, "(nonNULL tree!!)"); - } - if (rt->left.tree != NULL || !NULLCNFA(rt->left.cnfa)) - fprintf(f, ":"); - fprintf(f, "\n"); - if (!NULLCNFA(rt->left.cnfa)) - dumpcnfa(&rt->left.cnfa, f); - if (rt->left.tree != NULL) - rtdump(rt->left.tree, f, nfapresent, level+1); - } else if (rt->op == 'b') { - for (i = 0; i < level+1; i++) - fprintf(f, RTSEP); - fprintf(f, "L"); - fprintf(f, "%s", (rt->left.prefer == NONEYET) ? "-" : - ((rt->left.prefer == LONGER) ? ">" : "<")); - assert(rt->left.subno < 0); - fprintf(f, " \\%d", -rt->left.subno); - if (rt->left.min != 1 || rt->left.max != 1) { - fprintf(f, "{%d-", (int)rt->left.min); - if (rt->left.max != INFINITY) - fprintf(f, "%d", (int)rt->left.max); - fprintf(f, "}"); - } - if (rt->left.tree != NULL) - fprintf(f, "(nonNULL tree!!)"); - fprintf(f, "\n"); - } - - if (rt->right.begin != NULL) { - if (rt->op != ',') - fprintf(f, "op %c has non-NULL right tree\n", rt->op); - for (i = 0; i < level+1; i++) - fprintf(f, RTSEP); - fprintf(f, "R"); - fprintf(f, "%s", (rt->right.prefer == NONEYET) ? "-" : - ((rt->right.prefer == LONGER) ? ">" : "<")); - if (nfapresent) - fprintf(f, "%ld-%ld", (long)rt->right.begin->no, - (long)rt->right.end->no); - if (rt->right.subno > 0) - fprintf(f, " (%d)", rt->right.subno); - else if (rt->right.subno < 0) { - fprintf(f, " \\%d", -rt->right.subno); - if (rt->right.min != 1 || rt->right.max != 1) { - fprintf(f, "{%d-", (int)rt->right.min); - if (rt->right.max != INFINITY) - fprintf(f, "%d", (int)rt->right.max); - fprintf(f, "}"); - } - if (rt->right.tree != NULL) - fprintf(f, "(nonNULL tree!!)"); - } - if (rt->right.tree != NULL || !NULLCNFA(rt->right.cnfa)) - fprintf(f, ":"); - fprintf(f, "\n"); - if (!NULLCNFA(rt->right.cnfa)) - dumpcnfa(&rt->right.cnfa, f); - if (rt->right.tree != NULL) - rtdump(rt->right.tree, f, nfapresent, level+1); - } - for (i = 0; i < level; i++) - fprintf(f, RTSEP); - fprintf(f, "}\n"); - - if (rt->next != NULL) { - if (rt->op != '|') - fprintf(f, "op %c has non-NULL next\n", rt->op); - if (rt->next->op != rt->op) - fprintf(f, "next op %c, expecting %c\n", rt->next->op, - rt->op); - rtdump(rt->next, f, nfapresent, level); - } -} - -/* - - dump - dump an RE in human-readable form - ^ static VOID dump(regex_t *, FILE *); - */ -static VOID -dump(re, f) -regex_t *re; -FILE *f; -{ -} - -#undef NOERRN -#define NOERRN() {if (ISERR()) return NULL;} /* NOERR with retval */ - -#define COMPILE 1 -#include "lex.c" -#include "color.c" -#include "locale.c" -#include "nfa.c" diff --git a/generic/exec.c b/generic/exec.c deleted file mode 100644 index 92439aa..0000000 --- a/generic/exec.c +++ /dev/null @@ -1,1754 +0,0 @@ -/* - * exec.c -- - * - * Regexp package file: re_*exec and friends - match REs - * - * Copyright (c) 1998 Henry Spencer. All rights reserved. - * - * Development of this software was funded, in part, by Cray Research Inc., - * UUNET Communications Services Inc., and Sun Microsystems Inc., none of - * whom are responsible for the results. The author thanks all of them. - * - * Redistribution and use in source and binary forms -- with or without - * modification -- are permitted for any purpose, provided that - * redistributions in source form retain this entire copyright notice and - * indicate the origin and nature of any modifications. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, - * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY - * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL - * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; - * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR - * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF - * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Copyright (c) 1998 by Sun Microsystems, Inc. - * - * See the file "license.terms" for information on usage and redistribution - * of this file, and for a DISCLAIMER OF ALL WARRANTIES. - * - * RCS: @(#) $Id: exec.c,v 1.1.2.2 1998/10/05 17:38:26 stanton Exp $ - */ - -#include "tclInt.h" -#include -#include "tclRegexp.h" -#include "chr.h" -#include "guts.h" - - -/* internal variables, bundled for easy passing around */ -struct vars { - regex_t *re; - struct guts *g; - int eflags; /* copies of arguments */ - size_t nmatch; - regmatch_t *pmatch; - chr *start; /* start of string */ - chr *stop; /* just past end of string */ - int err; /* error code if any (0 none) */ - regoff_t *mem; /* memory vector for backtracking */ - regoff_t *mem1; /* localizer vector */ - regoff_t *mem2; /* dissector vector */ -}; -#define VISERR(vv) ((vv)->err != 0) /* have we seen an error yet? */ -#define ISERR() VISERR(v) -#define VERR(vv,e) (((vv)->err) ? (vv)->err : ((vv)->err = (e))) -#define ERR(e) VERR(v, e) /* record an error */ -#define NOERR() {if (ISERR()) return;} /* if error seen, return */ -#define OFF(p) ((p) - v->start) - - - -/* lazy-DFA representation */ -struct arcp { /* "pointer" to an outarc */ - struct sset *ss; - color co; -}; - -struct sset { /* state set */ - unsigned *states; /* pointer to bitvector */ - unsigned hash; /* xor of bitvector */ - int flags; -# define STARTER 01 /* the initial state set */ -# define POSTSTATE 02 /* includes the goal state */ - struct arcp ins; /* chain of inarcs pointing here */ - chr *lastseen; /* last entered on arrival here */ - struct sset **outs; /* outarc vector indexed by color */ - struct arcp *inchain; /* chain-pointer vector for outarcs */ -}; - -struct dfa { - int nssets; /* size of cache */ - int nssused; /* how many entries occupied yet */ - int nstates; /* number of states */ - int ncolors; /* length of outarc and inchain vectors */ - int wordsper; /* length of state-set bitvectors */ - struct sset *ssets; /* state-set cache */ - unsigned *statesarea; /* bitvector storage */ - unsigned *work; /* pointer to work area within statesarea */ - struct sset **outsarea; /* outarc-vector storage */ - struct arcp *incarea; /* inchain storage */ - struct cnfa *cnfa; - struct colormap *cm; - chr *lastpost; /* location of last cache-flushed success */ -}; - -#define CACHE 200 -#define WORK 1 /* number of work bitvectors needed */ - - - -/* - * forward declarations - */ -/* =====^!^===== begin forwards =====^!^===== */ -/* automatically gathered by fwd; do not hand-edit */ -/* === exec.c === */ -int exec _ANSI_ARGS_((regex_t *, CONST chr *, size_t, size_t, regmatch_t [], int)); -static int find _ANSI_ARGS_((struct vars *, struct cnfa *, struct colormap *)); -static int cfind _ANSI_ARGS_((struct vars *, struct cnfa *, struct colormap *)); -static VOID zapmatches _ANSI_ARGS_((regmatch_t *, size_t)); -static VOID zapmem _ANSI_ARGS_((struct vars *, struct rtree *)); -static VOID subset _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *)); -static int dissect _ANSI_ARGS_((struct vars *, struct rtree *, chr *, chr *)); -static int altdissect _ANSI_ARGS_((struct vars *, struct rtree *, chr *, chr *)); -static int cdissect _ANSI_ARGS_((struct vars *, struct rtree *, chr *, chr *)); -static int crevdissect _ANSI_ARGS_((struct vars *, struct rtree *, chr *, chr *)); -static int csindissect _ANSI_ARGS_((struct vars *, struct rtree *, chr *, chr *)); -static int cbrdissect _ANSI_ARGS_((struct vars *, struct rtree *, chr *, chr *)); -static int caltdissect _ANSI_ARGS_((struct vars *, struct rtree *, chr *, chr *)); -static chr *dismatch _ANSI_ARGS_((struct vars *, struct rtree *, chr *, chr *)); -static chr *dismrev _ANSI_ARGS_((struct vars *, struct rtree *, chr *, chr *)); -static chr *dismsin _ANSI_ARGS_((struct vars *, struct rtree *, chr *, chr *)); -static chr *longest _ANSI_ARGS_((struct vars *, struct dfa *, chr *, chr *)); -static chr *shortest _ANSI_ARGS_((struct vars *, struct dfa *, chr *, chr *, chr *)); -static struct dfa *newdfa _ANSI_ARGS_((struct vars *, struct cnfa *, struct colormap *)); -static VOID freedfa _ANSI_ARGS_((struct dfa *)); -static unsigned hash _ANSI_ARGS_((unsigned *, int)); -static struct sset *initialize _ANSI_ARGS_((struct vars *, struct dfa *, chr *)); -static struct sset *miss _ANSI_ARGS_((struct vars *, struct dfa *, struct sset *, pcolor, chr *)); -static int lacon _ANSI_ARGS_((struct vars *, struct cnfa *, chr *, pcolor)); -static struct sset *getvacant _ANSI_ARGS_((struct vars *, struct dfa *)); -static struct sset *pickss _ANSI_ARGS_((struct vars *, struct dfa *)); -/* === color.c === */ -union tree; -static color getcolor _ANSI_ARGS_((struct colormap *, pchr)); -/* automatically gathered by fwd; do not hand-edit */ -/* =====^!^===== end forwards =====^!^===== */ - - - -/* - - exec - match regular expression - ^ int exec(regex_t *, CONST chr *, size_t, size_t, regmatch_t [], int); - */ -int -exec(re, string, len, nmatch, pmatch, flags) -regex_t *re; -CONST chr *string; -size_t len; -size_t nmatch; -regmatch_t pmatch[]; -int flags; -{ - struct vars var; - register struct vars *v = &var; - int st; - size_t n; - int complications; - - /* sanity checks */ - if (re == NULL || string == NULL || re->re_magic != REMAGIC) - return REG_INVARG; - if (re->re_csize != sizeof(chr)) - return REG_MIXED; - - /* setup */ - v->re = re; - v->g = (struct guts *)re->re_guts; - complications = (v->g->info®_UBACKREF) ? 1 : 0; - if (v->g->usedshorter) - complications = 1; - v->eflags = flags; - if (v->g->cflags®_NOSUB) - nmatch = 0; /* override client */ - v->nmatch = nmatch; - if (complications && v->nmatch < (size_t)(v->g->nsub + 1)) { - /* need work area bigger than what user gave us */ - v->pmatch = (regmatch_t *)ckalloc((v->g->nsub + 1) * - sizeof(regmatch_t)); - if (v->pmatch == NULL) - return REG_ESPACE; - v->nmatch = v->g->nsub + 1; - } else - v->pmatch = pmatch; - v->start = (chr *)string; - v->stop = (chr *)string + len; - v->err = 0; - if (complications) { - v->mem1 = (regoff_t *)ckalloc(2*v->g->ntree*sizeof(regoff_t)); - if (v->mem1 == NULL) { - if (v->pmatch != pmatch) - ckfree((char *)v->pmatch); - return REG_ESPACE; - } - v->mem2 = v->mem1 + v->g->ntree; - } else - v->mem1 = NULL; - - /* do it */ - if (complications) - st = cfind(v, &v->g->cnfa, v->g->cm); - else - st = find(v, &v->g->cnfa, v->g->cm); - if (st == REG_OKAY && v->pmatch != pmatch && nmatch > 0) { - zapmatches(pmatch, nmatch); - n = (nmatch < v->nmatch) ? nmatch : v->nmatch; - memcpy((VOID *)pmatch, (VOID *)v->pmatch, n*sizeof(regmatch_t)); - } - if (v->pmatch != pmatch) - ckfree((char *)v->pmatch); - if (v->mem1 != NULL) - ckfree((char *)v->mem1); - return st; -} - -/* - - find - find a match for the main NFA (no-complications case) - ^ static int find(struct vars *, struct cnfa *, struct colormap *); - */ -static int -find(v, cnfa, cm) -struct vars *v; -struct cnfa *cnfa; -struct colormap *cm; -{ - struct dfa *d = newdfa(v, cnfa, cm); - chr *begin; - chr *end; - chr *stop = (cnfa->leftanch) ? v->start : v->stop; - - if (d == NULL) - return v->err; - - for (begin = v->start; begin <= stop; begin++) { - if (v->eflags®_MTRACE) - printf("\ntrying at %ld\n", (long)OFF(begin)); - end = longest(v, d, begin, v->stop); - if (end != NULL) { - if (v->nmatch > 0) { - v->pmatch[0].rm_so = OFF(begin); - v->pmatch[0].rm_eo = OFF(end); - } - freedfa(d); - if (v->nmatch > 1) { - zapmatches(v->pmatch, v->nmatch); - return dissect(v, v->g->tree, begin, end); - } - return REG_OKAY; - } - } - - freedfa(d); - return REG_NOMATCH; -} - -/* - - cfind - find a match for the main NFA (with complications) - ^ static int cfind(struct vars *, struct cnfa *, struct colormap *); - */ -static int -cfind(v, cnfa, cm) -struct vars *v; -struct cnfa *cnfa; -struct colormap *cm; -{ - struct dfa *d = newdfa(v, cnfa, cm); - chr *begin; - chr *end; - chr *stop = (cnfa->leftanch) ? v->start : v->stop; - chr *estop; - int er; - int usedis = (v->g->tree == NULL || v->g->tree->op == '|') ? 0 : 1; - - if (d == NULL) - return v->err; - - if (!v->g->usedshorter) - usedis = 0; - for (begin = v->start; begin <= stop; begin++) { - if (v->eflags®_MTRACE) - printf("\ntrying at %ld\n", (long)OFF(begin)); - if (usedis) { - v->mem = v->mem1; - zapmem(v, v->g->tree); - } - estop = v->stop; - for (;;) { - if (usedis) { - v->mem = v->mem1; - end = dismatch(v, v->g->tree, begin, v->stop); - } else - end = longest(v, d, begin, estop); - if (end == NULL) - break; /* NOTE BREAK OUT */ - if (v->eflags®_MTRACE) - printf("tentative end %ld\n", (long)OFF(end)); - zapmatches(v->pmatch, v->nmatch); - v->mem = v->mem2; - zapmem(v, v->g->tree); - er = cdissect(v, v->g->tree, begin, end); - switch (er) { - case REG_OKAY: - if (v->nmatch > 0) { - v->pmatch[0].rm_so = OFF(begin); - v->pmatch[0].rm_eo = OFF(end); - } - freedfa(d); - return REG_OKAY; - case REG_NOMATCH: - /* go around and try again */ - if (!usedis) { - if (end == begin) { - /* no point in trying again */ - freedfa(d); - return REG_NOMATCH; - } - estop = end - 1; - } - break; - default: - freedfa(d); - return er; - } - } - } - - freedfa(d); - return REG_NOMATCH; -} - -/* - - zapmatches - initialize the subexpression matches to "no match" - ^ static VOID zapmatches(regmatch_t *, size_t); - */ -static VOID -zapmatches(p, n) -regmatch_t *p; -size_t n; -{ - size_t i; - - for (i = 1; i < n; i++) { - p[i].rm_so = -1; - p[i].rm_eo = -1; - } -} - -/* - - zapmem - initialize the retry memory of a subtree to zeros - ^ static VOID zapmem(struct vars *, struct rtree *); - */ -static VOID -zapmem(v, rt) -struct vars *v; -struct rtree *rt; -{ - if (rt == NULL) - return; - - assert(v->mem != NULL); - v->mem[rt->no] = 0; - - if (rt->left.tree != NULL) - zapmem(v, rt->left.tree); - if (rt->left.subno > 0) { - v->pmatch[rt->left.subno].rm_so = -1; - v->pmatch[rt->left.subno].rm_eo = -1; - } - if (rt->right.tree != NULL) - zapmem(v, rt->right.tree); - if (rt->right.subno > 0) { - v->pmatch[rt->right.subno].rm_so = -1; - v->pmatch[rt->right.subno].rm_eo = -1; - } - if (rt->next != NULL) - zapmem(v, rt->next); -} - -/* - - subset - set any subexpression relevant to a successful subre - ^ static VOID subset(struct vars *, struct subre *, chr *, chr *); - */ -static VOID -subset(v, sub, begin, end) -struct vars *v; -struct subre *sub; -chr *begin; -chr *end; -{ - int n = sub->subno; - - if (n == 0) - return; - assert(n > 0); - if ((size_t)n >= v->nmatch) - return; - - if (v->eflags®_MTRACE) - printf("setting %d\n", n); - v->pmatch[n].rm_so = OFF(begin); - v->pmatch[n].rm_eo = OFF(end); -} - -/* - - dissect - determine subexpression matches (uncomplicated case) - ^ static int dissect(struct vars *, struct rtree *, chr *, chr *); - */ -static int /* regexec return code */ -dissect(v, rt, begin, end) -struct vars *v; -struct rtree *rt; -chr *begin; /* beginning of relevant substring */ -chr *end; /* end of same */ -{ - struct dfa *d; - struct dfa *d2; - chr *mid; - int i; - - if (rt == NULL) - return REG_OKAY; - if (v->eflags®_MTRACE) - printf("substring %ld-%ld\n", (long)OFF(begin), (long)OFF(end)); - - /* alternatives -- punt to auxiliary */ - if (rt->op == '|') - return altdissect(v, rt, begin, end); - - /* concatenation -- need to split the substring between parts */ - assert(rt->op == ','); - assert(rt->left.cnfa.nstates > 0); - d = newdfa(v, &rt->left.cnfa, v->g->cm); - if (ISERR()) - return v->err; - - /* in some cases, there may be no right side... */ - if (rt->right.cnfa.nstates == 0) { - if (v->eflags®_MTRACE) - printf("singleton\n"); - if (longest(v, d, begin, end) != end) { - freedfa(d); - return REG_ASSERT; - } - freedfa(d); - assert(rt->left.subno >= 0); - subset(v, &rt->left, begin, end); - return dissect(v, rt->left.tree, begin, end); - } - - /* general case */ - assert(rt->right.cnfa.nstates > 0); - d2 = newdfa(v, &rt->right.cnfa, v->g->cm); - if (ISERR()) { - freedfa(d); - return v->err; - } - - /* pick a tentative midpoint */ - mid = longest(v, d, begin, end); - if (mid == NULL) { - freedfa(d); - freedfa(d2); - return REG_ASSERT; - } - if (v->eflags®_MTRACE) - printf("tentative midpoint %ld\n", (long)OFF(mid)); - - /* iterate until satisfaction or failure */ - while (longest(v, d2, mid, end) != end) { - /* that midpoint didn't work, find a new one */ - if (mid == begin) { - /* all possibilities exhausted! */ - if (v->eflags®_MTRACE) - printf("no midpoint!\n"); - freedfa(d); - freedfa(d2); - return REG_ASSERT; - } - mid = longest(v, d, begin, mid-1); - if (mid == NULL) { - /* failed to find a new one! */ - if (v->eflags®_MTRACE) - printf("failed midpoint!\n"); - freedfa(d); - freedfa(d2); - return REG_ASSERT; - } - if (v->eflags®_MTRACE) - printf("new midpoint %ld\n", (long)OFF(mid)); - } - - /* satisfaction */ - if (v->eflags®_MTRACE) - printf("successful\n"); - freedfa(d); - freedfa(d2); - assert(rt->left.subno >= 0); - subset(v, &rt->left, begin, mid); - assert(rt->right.subno >= 0); - subset(v, &rt->right, mid, end); - i = dissect(v, rt->left.tree, begin, mid); - if (i != REG_OKAY) - return i; - return dissect(v, rt->right.tree, mid, end); -} - -/* - - altdissect - determine alternative subexpression matches (uncomplicated) - ^ static int altdissect(struct vars *, struct rtree *, chr *, chr *); - */ -static int /* regexec return code */ -altdissect(v, rt, begin, end) -struct vars *v; -struct rtree *rt; -chr *begin; /* beginning of relevant substring */ -chr *end; /* end of same */ -{ - struct dfa *d; - int i; - - assert(rt != NULL); - assert(rt->op == '|'); - - for (i = 0; rt != NULL; rt = rt->next, i++) { - if (v->eflags®_MTRACE) - printf("trying %dth\n", i); - assert(rt->left.begin != NULL); - d = newdfa(v, &rt->left.cnfa, v->g->cm); - if (ISERR()) - return v->err; - if (longest(v, d, begin, end) == end) { - if (v->eflags®_MTRACE) - printf("success\n"); - freedfa(d); - assert(rt->left.subno >= 0); - subset(v, &rt->left, begin, end); - return dissect(v, rt->left.tree, begin, end); - } - freedfa(d); - } - return REG_ASSERT; /* none of them matched?!? */ -} - -/* - - cdissect - determine subexpression matches (with complications) - * The retry memory stores the offset of the trial midpoint from begin, - * plus 1 so that 0 uniquely means "clean slate". - ^ static int cdissect(struct vars *, struct rtree *, chr *, chr *); - */ -static int /* regexec return code */ -cdissect(v, rt, begin, end) -struct vars *v; -struct rtree *rt; -chr *begin; /* beginning of relevant substring */ -chr *end; /* end of same */ -{ - struct dfa *d; - struct dfa *d2; - chr *mid; - int er; - - if (rt == NULL) - return REG_OKAY; - if (v->eflags®_MTRACE) - printf("csubstr %ld-%ld\n", (long)OFF(begin), (long)OFF(end)); - - /* punt various cases to auxiliaries */ - if (rt->op == '|') /* alternatives */ - return caltdissect(v, rt, begin, end); - if (rt->op == 'b') /* backref */ - return cbrdissect(v, rt, begin, end); - if (rt->right.cnfa.nstates == 0) /* no RHS */ - return csindissect(v, rt, begin, end); - if (rt->left.prefer == SHORTER) /* reverse scan */ - return crevdissect(v, rt, begin, end); - - /* concatenation -- need to split the substring between parts */ - assert(rt->op == ','); - assert(rt->left.cnfa.nstates > 0); - assert(rt->right.cnfa.nstates > 0); - d = newdfa(v, &rt->left.cnfa, v->g->cm); - if (ISERR()) - return v->err; - d2 = newdfa(v, &rt->right.cnfa, v->g->cm); - if (ISERR()) { - freedfa(d); - return v->err; - } - if (v->eflags®_MTRACE) - printf("cconcat %d\n", rt->no); - - /* pick a tentative midpoint */ - if (v->mem[rt->no] == 0) { - mid = longest(v, d, begin, end); - if (mid == NULL) { - freedfa(d); - freedfa(d2); - return REG_NOMATCH; - } - if (v->eflags®_MTRACE) - printf("tentative midpoint %ld\n", (long)OFF(mid)); - subset(v, &rt->left, begin, mid); - v->mem[rt->no] = (mid - begin) + 1; - } else { - mid = begin + (v->mem[rt->no] - 1); - if (v->eflags®_MTRACE) - printf("working midpoint %ld\n", (long)OFF(mid)); - } - - /* iterate until satisfaction or failure */ - for (;;) { - /* try this midpoint on for size */ - er = cdissect(v, rt->left.tree, begin, mid); - if (er == REG_OKAY && longest(v, d2, mid, end) == end && - (er = cdissect(v, rt->right.tree, mid, end)) == - REG_OKAY) - break; /* NOTE BREAK OUT */ - if (er != REG_OKAY && er != REG_NOMATCH) { - freedfa(d); - freedfa(d2); - return er; - } - - /* that midpoint didn't work, find a new one */ - if (mid == begin) { - /* all possibilities exhausted */ - if (v->eflags®_MTRACE) - printf("%d no midpoint\n", rt->no); - freedfa(d); - freedfa(d2); - return REG_NOMATCH; - } - mid = longest(v, d, begin, mid-1); - if (mid == NULL) { - /* failed to find a new one */ - if (v->eflags®_MTRACE) - printf("%d failed midpoint\n", rt->no); - freedfa(d); - freedfa(d2); - return REG_NOMATCH; - } - if (v->eflags®_MTRACE) - printf("%d: new midpoint %ld\n", rt->no, - (long)OFF(mid)); - subset(v, &rt->left, begin, mid); - v->mem[rt->no] = (mid - begin) + 1; - zapmem(v, rt->left.tree); - zapmem(v, rt->right.tree); - } - - /* satisfaction */ - if (v->eflags®_MTRACE) - printf("successful\n"); - freedfa(d); - freedfa(d2); - subset(v, &rt->right, mid, end); - return REG_OKAY; -} - -/* - - crevdissect - determine shortest-first subexpression matches - * The retry memory stores the offset of the trial midpoint from begin, - * plus 1 so that 0 uniquely means "clean slate". - ^ static int crevdissect(struct vars *, struct rtree *, chr *, chr *); - */ -static int /* regexec return code */ -crevdissect(v, rt, begin, end) -struct vars *v; -struct rtree *rt; -chr *begin; /* beginning of relevant substring */ -chr *end; /* end of same */ -{ - struct dfa *d; - struct dfa *d2; - chr *mid; - int er; - - if (rt == NULL) - return REG_OKAY; - assert(rt->op == ',' && rt->left.prefer == SHORTER); - - /* concatenation -- need to split the substring between parts */ - assert(rt->left.cnfa.nstates > 0); - assert(rt->right.cnfa.nstates > 0); - d = newdfa(v, &rt->left.cnfa, v->g->cm); - if (ISERR()) - return v->err; - d2 = newdfa(v, &rt->right.cnfa, v->g->cm); - if (ISERR()) { - freedfa(d); - return v->err; - } - if (v->eflags®_MTRACE) - printf("crev %d\n", rt->no); - - /* pick a tentative midpoint */ - if (v->mem[rt->no] == 0) { - mid = shortest(v, d, begin, begin, end); - if (mid == NULL) { - freedfa(d); - freedfa(d2); - return REG_NOMATCH; - } - if (v->eflags®_MTRACE) - printf("tentative midpoint %ld\n", (long)OFF(mid)); - subset(v, &rt->left, begin, mid); - v->mem[rt->no] = (mid - begin) + 1; - } else { - mid = begin + (v->mem[rt->no] - 1); - if (v->eflags®_MTRACE) - printf("working midpoint %ld\n", (long)OFF(mid)); - } - - /* iterate until satisfaction or failure */ - for (;;) { - /* try this midpoint on for size */ - er = cdissect(v, rt->left.tree, begin, mid); - if (er == REG_OKAY && longest(v, d2, mid, end) == end && - (er = cdissect(v, rt->right.tree, mid, end)) == - REG_OKAY) - break; /* NOTE BREAK OUT */ - if (er != REG_OKAY && er != REG_NOMATCH) { - freedfa(d); - freedfa(d2); - return er; - } - - /* that midpoint didn't work, find a new one */ - if (mid == end) { - /* all possibilities exhausted */ - if (v->eflags®_MTRACE) - printf("%d no midpoint\n", rt->no); - freedfa(d); - freedfa(d2); - return REG_NOMATCH; - } - mid = shortest(v, d, begin, mid+1, end); - if (mid == NULL) { - /* failed to find a new one */ - if (v->eflags®_MTRACE) - printf("%d failed midpoint\n", rt->no); - freedfa(d); - freedfa(d2); - return REG_NOMATCH; - } - if (v->eflags®_MTRACE) - printf("%d: new midpoint %ld\n", rt->no, - (long)OFF(mid)); - subset(v, &rt->left, begin, mid); - v->mem[rt->no] = (mid - begin) + 1; - zapmem(v, rt->left.tree); - zapmem(v, rt->right.tree); - } - - /* satisfaction */ - if (v->eflags®_MTRACE) - printf("successful\n"); - freedfa(d); - freedfa(d2); - subset(v, &rt->right, mid, end); - return REG_OKAY; -} - -/* - - csindissect - determine singleton subexpression matches (with complications) - ^ static int csindissect(struct vars *, struct rtree *, chr *, chr *); - */ -static int /* regexec return code */ -csindissect(v, rt, begin, end) -struct vars *v; -struct rtree *rt; -chr *begin; /* beginning of relevant substring */ -chr *end; /* end of same */ -{ - struct dfa *d; - int er; - - assert(rt != NULL); - assert(rt->op == ','); - assert(rt->right.cnfa.nstates == 0); - if (v->eflags®_MTRACE) - printf("csingleton %d\n", rt->no); - - assert(rt->left.cnfa.nstates > 0); - - /* exploit memory only to suppress repeated work in retries */ - if (!v->mem[rt->no]) { - d = newdfa(v, &rt->left.cnfa, v->g->cm); - if (longest(v, d, begin, end) != end) { - freedfa(d); - return REG_NOMATCH; - } - freedfa(d); - v->mem[rt->no] = 1; - if (v->eflags®_MTRACE) - printf("csingleton matched\n"); - } - - er = cdissect(v, rt->left.tree, begin, end); - if (er != REG_OKAY) - return er; - subset(v, &rt->left, begin, end); - return REG_OKAY; -} - -/* - - cbrdissect - determine backref subexpression matches - ^ static int cbrdissect(struct vars *, struct rtree *, chr *, chr *); - */ -static int /* regexec return code */ -cbrdissect(v, rt, begin, end) -struct vars *v; -struct rtree *rt; -chr *begin; /* beginning of relevant substring */ -chr *end; /* end of same */ -{ - int i; - int n = -rt->left.subno; - size_t len; - chr *paren; - chr *p; - chr *stop; - int min = rt->left.min; - int max = rt->left.max; - - assert(rt != NULL); - assert(rt->op == 'b'); - assert(rt->right.cnfa.nstates == 0); - assert((size_t)n < v->nmatch); - - if (v->eflags®_MTRACE) - printf("cbackref n%d %d{%d-%d}\n", rt->no, n, min, max); - - if (v->pmatch[n].rm_so == -1) - return REG_NOMATCH; - paren = v->start + v->pmatch[n].rm_so; - len = v->pmatch[n].rm_eo - v->pmatch[n].rm_so; - - /* no room to maneuver -- retries are pointless */ - if (v->mem[rt->no]) - return REG_NOMATCH; - v->mem[rt->no] = 1; - - /* special-case zero-length string */ - if (len == 0) { - if (begin == end) - return REG_OKAY; - return REG_NOMATCH; - } - - /* and too-short string */ - if ((size_t)(end - begin) < len) - return REG_NOMATCH; - stop = end - len; - - /* count occurrences */ - i = 0; - for (p = begin; p <= stop && (i < max || max == INFINITY); p += len) { - if ((*v->g->compare)(paren, p, len) != 0) - break; - i++; - } - if (v->eflags®_MTRACE) - printf("cbackref found %d\n", i); - - /* and sort it out */ - if (p != end) /* didn't consume all of it */ - return REG_NOMATCH; - if (min <= i && (i <= max || max == INFINITY)) - return REG_OKAY; - return REG_NOMATCH; /* out of range */ -} - -/* - - caltdissect - determine alternative subexpression matches (w. complications) - ^ static int caltdissect(struct vars *, struct rtree *, chr *, chr *); - */ -static int /* regexec return code */ -caltdissect(v, rt, begin, end) -struct vars *v; -struct rtree *rt; -chr *begin; /* beginning of relevant substring */ -chr *end; /* end of same */ -{ - struct dfa *d; - int er; -# define UNTRIED 0 /* not yet tried at all */ -# define TRYING 1 /* top matched, trying submatches */ -# define TRIED 2 /* top didn't match or submatches exhausted */ - - if (rt == NULL) - return REG_NOMATCH; - assert(rt->op == '|'); - if (v->mem[rt->no] == TRIED) - return caltdissect(v, rt->next, begin, end); - - if (v->eflags®_MTRACE) - printf("calt n%d\n", rt->no); - assert(rt->left.begin != NULL); - - if (v->mem[rt->no] == UNTRIED) { - d = newdfa(v, &rt->left.cnfa, v->g->cm); - if (ISERR()) - return v->err; - if (longest(v, d, begin, end) != end) { - freedfa(d); - v->mem[rt->no] = TRIED; - return caltdissect(v, rt->next, begin, end); - } - freedfa(d); - if (v->eflags®_MTRACE) - printf("calt matched\n"); - v->mem[rt->no] = TRYING; - } - - er = cdissect(v, rt->left.tree, begin, end); - if (er == REG_OKAY) { - subset(v, &rt->left, begin, end); - return REG_OKAY; - } - if (er != REG_NOMATCH) - return er; - - v->mem[rt->no] = TRIED; - return caltdissect(v, rt->next, begin, end); -} - -/* - - dismatch - determine overall match using top-level dissection - * The retry memory stores the offset of the trial midpoint from begin, - * plus 1 so that 0 uniquely means "clean slate". - ^ static chr *dismatch(struct vars *, struct rtree *, chr *, chr *); - */ -static chr * /* endpoint, or NULL */ -dismatch(v, rt, begin, end) -struct vars *v; -struct rtree *rt; -chr *begin; /* beginning of relevant substring */ -chr *end; /* end of same */ -{ - struct dfa *d; - struct dfa *d2; - chr *mid; - chr *ret; - - if (rt == NULL) - return begin; - if (v->eflags®_MTRACE) - printf("dsubstr %ld-%ld\n", (long)OFF(begin), (long)OFF(end)); - - /* punt various cases to auxiliaries */ - if (rt->right.cnfa.nstates == 0) /* no RHS */ - return dismsin(v, rt, begin, end); - if (rt->left.prefer == SHORTER) /* reverse scan */ - return dismrev(v, rt, begin, end); - - /* concatenation -- need to split the substring between parts */ - assert(rt->op == ','); - assert(rt->left.cnfa.nstates > 0); - assert(rt->right.cnfa.nstates > 0); - d = newdfa(v, &rt->left.cnfa, v->g->cm); - if (ISERR()) - return NULL; - d2 = newdfa(v, &rt->right.cnfa, v->g->cm); - if (ISERR()) { - freedfa(d); - return NULL; - } - if (v->eflags®_MTRACE) - printf("dconcat %d\n", rt->no); - - /* pick a tentative midpoint */ - if (v->mem[rt->no] == 0) { - mid = longest(v, d, begin, end); - if (mid == NULL) { - freedfa(d); - freedfa(d2); - return NULL; - } - if (v->eflags®_MTRACE) - printf("tentative midpoint %ld\n", (long)OFF(mid)); - v->mem[rt->no] = (mid - begin) + 1; - } else { - mid = begin + (v->mem[rt->no] - 1); - if (v->eflags®_MTRACE) - printf("working midpoint %ld\n", (long)OFF(mid)); - } - - /* iterate until satisfaction or failure */ - for (;;) { - /* try this midpoint on for size */ - if (rt->right.tree == NULL || rt->right.tree->op == 'b') { - if (rt->right.prefer == LONGER) - ret = longest(v, d2, mid, end); - else - ret = shortest(v, d2, mid, mid, end); - } else { - if (longest(v, d2, mid, end) != NULL) - ret = dismatch(v, rt->right.tree, mid, end); - else - ret = NULL; - } - if (ret != NULL) - break; /* NOTE BREAK OUT */ - - /* that midpoint didn't work, find a new one */ - if (mid == begin) { - /* all possibilities exhausted */ - if (v->eflags®_MTRACE) - printf("%d no midpoint\n", rt->no); - freedfa(d); - freedfa(d2); - return NULL; - } - mid = longest(v, d, begin, mid-1); - if (mid == NULL) { - /* failed to find a new one */ - if (v->eflags®_MTRACE) - printf("%d failed midpoint\n", rt->no); - freedfa(d); - freedfa(d2); - return NULL; - } - if (v->eflags®_MTRACE) - printf("%d: new midpoint %ld\n", rt->no, - (long)OFF(mid)); - v->mem[rt->no] = (mid - begin) + 1; - zapmem(v, rt->right.tree); - } - - /* satisfaction */ - if (v->eflags®_MTRACE) - printf("successful\n"); - freedfa(d); - freedfa(d2); - return ret; -} - -/* - - dismrev - determine overall match using top-level dissection - * The retry memory stores the offset of the trial midpoint from begin, - * plus 1 so that 0 uniquely means "clean slate". - ^ static chr *dismrev(struct vars *, struct rtree *, chr *, chr *); - */ -static chr * /* endpoint, or NULL */ -dismrev(v, rt, begin, end) -struct vars *v; -struct rtree *rt; -chr *begin; /* beginning of relevant substring */ -chr *end; /* end of same */ -{ - struct dfa *d; - struct dfa *d2; - chr *mid; - chr *ret; - - if (rt == NULL) - return begin; - if (v->eflags®_MTRACE) - printf("rsubstr %ld-%ld\n", (long)OFF(begin), (long)OFF(end)); - - /* concatenation -- need to split the substring between parts */ - assert(rt->op == ','); - assert(rt->left.cnfa.nstates > 0); - assert(rt->right.cnfa.nstates > 0); - d = newdfa(v, &rt->left.cnfa, v->g->cm); - if (ISERR()) - return NULL; - d2 = newdfa(v, &rt->right.cnfa, v->g->cm); - if (ISERR()) { - freedfa(d); - return NULL; - } - if (v->eflags®_MTRACE) - printf("dconcat %d\n", rt->no); - - /* pick a tentative midpoint */ - if (v->mem[rt->no] == 0) { - mid = shortest(v, d, begin, begin, end); - if (mid == NULL) { - freedfa(d); - freedfa(d2); - return NULL; - } - if (v->eflags®_MTRACE) - printf("tentative midpoint %ld\n", (long)OFF(mid)); - v->mem[rt->no] = (mid - begin) + 1; - } else { - mid = begin + (v->mem[rt->no] - 1); - if (v->eflags®_MTRACE) - printf("working midpoint %ld\n", (long)OFF(mid)); - } - - /* iterate until satisfaction or failure */ - for (;;) { - /* try this midpoint on for size */ - if (rt->right.tree == NULL || rt->right.tree->op == 'b') { - if (rt->right.prefer == LONGER) - ret = longest(v, d2, mid, end); - else - ret = shortest(v, d2, mid, mid, end); - } else { - if (longest(v, d2, mid, end) != NULL) - ret = dismatch(v, rt->right.tree, mid, end); - else - ret = NULL; - } - if (ret != NULL) - break; /* NOTE BREAK OUT */ - - /* that midpoint didn't work, find a new one */ - if (mid == end) { - /* all possibilities exhausted */ - if (v->eflags®_MTRACE) - printf("%d no midpoint\n", rt->no); - freedfa(d); - freedfa(d2); - return NULL; - } - mid = shortest(v, d, begin, mid+1, end); - if (mid == NULL) { - /* failed to find a new one */ - if (v->eflags®_MTRACE) - printf("%d failed midpoint\n", rt->no); - freedfa(d); - freedfa(d2); - return NULL; - } - if (v->eflags®_MTRACE) - printf("%d: new midpoint %ld\n", rt->no, - (long)OFF(mid)); - v->mem[rt->no] = (mid - begin) + 1; - zapmem(v, rt->right.tree); - } - - /* satisfaction */ - if (v->eflags®_MTRACE) - printf("successful\n"); - freedfa(d); - freedfa(d2); - return ret; -} - -/* - - dismsin - determine singleton subexpression matches (with complications) - ^ static chr *dismsin(struct vars *, struct rtree *, chr *, chr *); - */ -static chr * -dismsin(v, rt, begin, end) -struct vars *v; -struct rtree *rt; -chr *begin; /* beginning of relevant substring */ -chr *end; /* end of same */ -{ - struct dfa *d; - chr *ret; - - assert(rt != NULL); - assert(rt->op == ','); - assert(rt->right.cnfa.nstates == 0); - if (v->eflags®_MTRACE) - printf("dsingleton %d\n", rt->no); - - assert(rt->left.cnfa.nstates > 0); - - /* retries are pointless */ - if (v->mem[rt->no]) - return NULL; - v->mem[rt->no] = 1; - - d = newdfa(v, &rt->left.cnfa, v->g->cm); - if (d == NULL) - return NULL; - if (rt->left.prefer == LONGER) - ret = longest(v, d, begin, end); - else - ret = shortest(v, d, begin, begin, end); - freedfa(d); - if (ret != NULL && (v->eflags®_MTRACE)) - printf("dsingleton matched\n"); - return ret; -} - -/* - - longest - longest-preferred matching engine - ^ static chr *longest(struct vars *, struct dfa *, chr *, chr *); - */ -static chr * /* endpoint, or NULL */ -longest(v, d, start, stop) -struct vars *v; /* used only for debug and exec flags */ -struct dfa *d; -chr *start; /* where the match should start */ -chr *stop; /* match must end at or before here */ -{ - chr *cp; - chr *realstop = (stop == v->stop) ? stop : stop + 1; - color co; - struct sset *css; - struct sset *ss; - chr *post; - int i; - struct colormap *cm = d->cm; - - /* initialize */ - css = initialize(v, d, start); - cp = start; - - /* startup */ - if (v->eflags®_FTRACE) - printf("+++ startup +++\n"); - if (cp == v->start) { - co = d->cnfa->bos[(v->eflags®_NOTBOL) ? 0 : 1]; - if (v->eflags®_FTRACE) - printf("color %ld\n", (long)co); - } else { - co = getcolor(cm, *(cp - 1)); - if (v->eflags®_FTRACE) - printf("char %c, color %ld\n", (char)*(cp-1), (long)co); - } - css = miss(v, d, css, co, cp); - if (css == NULL) - return NULL; - css->lastseen = cp; - - /* main loop */ - if (v->eflags®_FTRACE) - while (cp < realstop) { - printf("+++ at c%d +++\n", css - d->ssets); - co = getcolor(cm, *cp); - printf("char %c, color %ld\n", (char)*cp, (long)co); - ss = css->outs[co]; - if (ss == NULL) { - ss = miss(v, d, css, co, cp); - if (ss == NULL) - break; /* NOTE BREAK OUT */ - } - cp++; - ss->lastseen = cp; - css = ss; - } - else - while (cp < realstop) { - co = getcolor(cm, *cp); - ss = css->outs[co]; - if (ss == NULL) { - ss = miss(v, d, css, co, cp+1); - if (ss == NULL) - break; /* NOTE BREAK OUT */ - } - cp++; - ss->lastseen = cp; - css = ss; - } - - /* shutdown */ - if (v->eflags®_FTRACE) - printf("+++ shutdown at c%d +++\n", css - d->ssets); - if (cp == v->stop && stop == v->stop) { - co = d->cnfa->eos[(v->eflags®_NOTEOL) ? 0 : 1]; - if (v->eflags®_FTRACE) - printf("color %ld\n", (long)co); - ss = miss(v, d, css, co, cp); - /* special case: match ended at eol? */ - if (ss != NULL && (ss->flags&POSTSTATE)) - return cp; - else if (ss != NULL) - ss->lastseen = cp; /* to be tidy */ - } - - /* find last match, if any */ - post = d->lastpost; - for (ss = d->ssets, i = 0; i < d->nssused; ss++, i++) - if ((ss->flags&POSTSTATE) && post != ss->lastseen && - (post == NULL || post < ss->lastseen)) - post = ss->lastseen; - if (post != NULL) /* found one */ - return post - 1; - - return NULL; -} - -/* - - shortest - shortest-preferred matching engine - ^ static chr *shortest(struct vars *, struct dfa *, chr *, chr *, chr *); - */ -static chr * /* endpoint, or NULL */ -shortest(v, d, start, min, max) -struct vars *v; /* used only for debug and exec flags */ -struct dfa *d; -chr *start; /* where the match should start */ -chr *min; /* match must end at or after here */ -chr *max; /* match must end at or before here */ -{ - chr *cp; - chr *realmin = (min == v->stop) ? min : min + 1; - chr *realmax = (max == v->stop) ? max : max + 1; - color co; - struct sset *css; - struct sset *ss = NULL; - struct colormap *cm = d->cm; - - /* initialize */ - css = initialize(v, d, start); - cp = start; - - /* startup */ - if (v->eflags®_FTRACE) - printf("--- startup ---\n"); - if (cp == v->start) { - co = d->cnfa->bos[(v->eflags®_NOTBOL) ? 0 : 1]; - if (v->eflags®_FTRACE) - printf("color %ld\n", (long)co); - } else { - co = getcolor(cm, *(cp - 1)); - if (v->eflags®_FTRACE) - printf("char %c, color %ld\n", (char)*(cp-1), (long)co); - } - css = miss(v, d, css, co, cp); - if (css == NULL) - return NULL; - css->lastseen = cp; - - /* main loop */ - if (v->eflags®_FTRACE) - while (cp < realmax) { - printf("--- at c%d ---\n", css - d->ssets); - co = getcolor(cm, *cp); - printf("char %c, color %ld\n", (char)*cp, (long)co); - ss = css->outs[co]; - if (ss == NULL) { - ss = miss(v, d, css, co, cp); - if (ss == NULL) - break; /* NOTE BREAK OUT */ - } - cp++; - ss->lastseen = cp; - css = ss; - if ((ss->flags&POSTSTATE) && cp >= realmin) - break; /* NOTE BREAK OUT */ - } - else - while (cp < realmax) { - co = getcolor(cm, *cp); - ss = css->outs[co]; - if (ss == NULL) { - ss = miss(v, d, css, co, cp+1); - if (ss == NULL) - break; /* NOTE BREAK OUT */ - } - cp++; - ss->lastseen = cp; - css = ss; - if ((ss->flags&POSTSTATE) && cp >= realmin) - break; /* NOTE BREAK OUT */ - } - - if (ss == NULL) - return NULL; - if (ss->flags&POSTSTATE) { - assert(cp >= realmin); - return cp - 1; - } - - /* shutdown */ - if (v->eflags®_FTRACE) - printf("--- shutdown at c%d ---\n", css - d->ssets); - if (cp == v->stop && max == v->stop) { - co = d->cnfa->eos[(v->eflags®_NOTEOL) ? 0 : 1]; - if (v->eflags®_FTRACE) - printf("color %ld\n", (long)co); - ss = miss(v, d, css, co, cp); - /* special case: match ended at eol? */ - if (ss != NULL && (ss->flags&POSTSTATE)) - return cp; - } - - return NULL; -} - -/* - - newdfa - set up a fresh DFA - ^ static struct dfa *newdfa(struct vars *, struct cnfa *, - ^ struct colormap *); - */ -static struct dfa * -newdfa(v, cnfa, cm) -struct vars *v; -struct cnfa *cnfa; -struct colormap *cm; -{ - struct dfa *d = (struct dfa *)ckalloc(sizeof(struct dfa)); - int wordsper = (cnfa->nstates + UBITS - 1) / UBITS; - struct sset *ss; - int i; - - assert(cnfa != NULL && cnfa->nstates != 0); - if (d == NULL) { - ERR(REG_ESPACE); - return NULL; - } - - d->ssets = (struct sset *)ckalloc(CACHE * sizeof(struct sset)); - d->statesarea = (unsigned *)ckalloc((CACHE+WORK) * wordsper * - sizeof(unsigned)); - d->work = &d->statesarea[CACHE * wordsper]; - d->outsarea = (struct sset **)ckalloc(CACHE * cnfa->ncolors * - sizeof(struct sset *)); - d->incarea = (struct arcp *)ckalloc(CACHE * cnfa->ncolors * - sizeof(struct arcp)); - if (d->ssets == NULL || d->statesarea == NULL || d->outsarea == NULL || - d->incarea == NULL) { - freedfa(d); - ERR(REG_ESPACE); - return NULL; - } - - d->nssets = (v->eflags®_SMALL) ? 5 : CACHE; - d->nssused = 0; - d->nstates = cnfa->nstates; - d->ncolors = cnfa->ncolors; - d->wordsper = wordsper; - d->cnfa = cnfa; - d->cm = cm; - d->lastpost = NULL; - - for (ss = d->ssets, i = 0; i < d->nssets; ss++, i++) { - /* initialization of most fields is done as needed */ - ss->states = &d->statesarea[i * d->wordsper]; - ss->outs = &d->outsarea[i * d->ncolors]; - ss->inchain = &d->incarea[i * d->ncolors]; - } - - return d; -} - -/* - - freedfa - free a DFA - ^ static VOID freedfa(struct dfa *); - */ -static VOID -freedfa(d) -struct dfa *d; -{ - if (d->ssets != NULL) - ckfree((char *)d->ssets); - if (d->statesarea != NULL) - ckfree((char *)d->statesarea); - if (d->outsarea != NULL) - ckfree((char *)d->outsarea); - if (d->incarea != NULL) - ckfree((char *)d->incarea); - ckfree((char *)d); -} - -/* - - hash - construct a hash code for a bitvector - * There are probably better ways, but they're more expensive. - ^ static unsigned hash(unsigned *, int); - */ -static unsigned -hash(uv, n) -unsigned *uv; -int n; -{ - int i; - unsigned h; - - h = 0; - for (i = 0; i < n; i++) - h ^= uv[i]; - return h; -} - -/* - - initialize - hand-craft a cache entry for startup, otherwise get ready - ^ static struct sset *initialize(struct vars *, struct dfa *, chr *); - */ -static struct sset * -initialize(v, d, start) -struct vars *v; /* used only for debug flags */ -struct dfa *d; -chr *start; -{ - struct sset *ss; - int i; - - /* is previous one still there? */ - if (d->nssused > 0 && (d->ssets[0].flags&STARTER)) - ss = &d->ssets[0]; - else { /* no, must (re)build it */ - ss = getvacant(v, d); - for (i = 0; i < d->wordsper; i++) - ss->states[i] = 0; - BSET(ss->states, d->cnfa->pre); - ss->hash = hash(ss->states, d->wordsper); - assert(d->cnfa->pre != d->cnfa->post); - ss->flags = STARTER; - /* lastseen dealt with below */ - } - - for (i = 0; i < d->nssused; i++) - d->ssets[i].lastseen = NULL; - ss->lastseen = start; /* maybe untrue, but harmless */ - d->lastpost = NULL; - return ss; -} - -/* - - miss - handle a cache miss - ^ static struct sset *miss(struct vars *, struct dfa *, struct sset *, - ^ pcolor, chr *); - */ -static struct sset * /* NULL if goes to empty set */ -miss(v, d, css, co, cp) -struct vars *v; /* used only for debug flags */ -struct dfa *d; -struct sset *css; -pcolor co; -chr *cp; /* next chr */ -{ - struct cnfa *cnfa = d->cnfa; - int i; - unsigned h; - struct carc *ca; - struct sset *p; - int ispost; - int gotstate; - int dolacons; - int didlacons; - - /* for convenience, we can be called even if it might not be a miss */ - if (css->outs[co] != NULL) { - if (v->eflags®_FTRACE) - printf("hit\n"); - return css->outs[co]; - } - if (v->eflags®_FTRACE) - printf("miss\n"); - - /* first, what set of states would we end up in? */ - for (i = 0; i < d->wordsper; i++) - d->work[i] = 0; - ispost = 0; - gotstate = 0; - for (i = 0; i < d->nstates; i++) - if (ISBSET(css->states, i)) - for (ca = cnfa->states[i]; ca->co != COLORLESS; ca++) - if (ca->co == co) { - BSET(d->work, ca->to); - gotstate = 1; - if (ca->to == cnfa->post) - ispost = 1; - if (v->eflags®_FTRACE) - printf("%d -> %d\n", i, ca->to); - } - dolacons = (gotstate) ? cnfa->haslacons : 0; - didlacons = 0; - while (dolacons) { /* transitive closure */ - dolacons = 0; - for (i = 0; i < d->nstates; i++) - if (ISBSET(d->work, i)) - for (ca = cnfa->states[i]; ca->co != COLORLESS; - ca++) - if (ca->co > cnfa->ncolors && - !ISBSET(d->work, ca->to) && - lacon(v, cnfa, cp, - ca->co)) { - BSET(d->work, ca->to); - dolacons = 1; - didlacons = 1; - if (ca->to == cnfa->post) - ispost = 1; - if (v->eflags®_FTRACE) - printf("%d :-> %d\n", - i, ca->to); - } - } - if (!gotstate) - return NULL; - h = hash(d->work, d->wordsper); - - /* next, is that in the cache? */ - for (p = d->ssets, i = d->nssused; i > 0; p++, i--) - if (p->hash == h && memcmp((VOID *)d->work, (VOID *)p->states, - d->wordsper*sizeof(unsigned)) == 0) { - if (v->eflags®_FTRACE) - printf("cached c%d\n", p - d->ssets); - break; /* NOTE BREAK OUT */ - } - if (i == 0) { /* nope, need a new cache entry */ - p = getvacant(v, d); - assert(p != css); - for (i = 0; i < d->wordsper; i++) - p->states[i] = d->work[i]; - p->hash = h; - p->flags = (ispost) ? POSTSTATE : 0; - /* lastseen to be dealt with by caller */ - } - - if (!didlacons) { /* lookahead conds. always cache miss */ - css->outs[co] = p; - css->inchain[co] = p->ins; - p->ins.ss = css; - p->ins.co = (color) co; - } - return p; -} - -/* - - lacon - lookahead-constraint checker for miss() - ^ static int lacon(struct vars *, struct cnfa *, chr *, pcolor); - */ -static int /* predicate: constraint satisfied? */ -lacon(v, pcnfa, precp, co) -struct vars *v; -struct cnfa *pcnfa; /* parent cnfa */ -chr *precp; /* points to previous chr */ -pcolor co; /* "color" of the lookahead constraint */ -{ - int n; - struct subre *sub; - struct dfa *d; - chr *end; - - n = co - pcnfa->ncolors; - assert(n < v->g->nlacons && v->g->lacons != NULL); - if (v->eflags®_FTRACE) - printf("=== testing lacon %d\n", n); - sub = &v->g->lacons[n]; - d = newdfa(v, &sub->cnfa, v->g->cm); - if (d == NULL) { - ERR(REG_ESPACE); - return 0; - } - end = longest(v, d, precp, v->stop); - freedfa(d); - if (v->eflags®_FTRACE) - printf("=== lacon %d match %d\n", n, (end != NULL)); - return (sub->subno) ? (end != NULL) : (end == NULL); -} - -/* - - getvacant - get a vacant state set - * This routine clears out the inarcs and outarcs, but does not otherwise - * clear the innards of the state set -- that's up to the caller. - ^ static struct sset *getvacant(struct vars *, struct dfa *); - */ -static struct sset * -getvacant(v, d) -struct vars *v; /* used only for debug flags */ -struct dfa *d; -{ - int i; - struct sset *ss; - struct sset *p; - struct arcp ap; - struct arcp lastap; - color co; - - ss = pickss(v, d); - - /* clear out its inarcs, including self-referential ones */ - ap = ss->ins; - while ((p = ap.ss) != NULL) { - co = ap.co; - if (v->eflags®_FTRACE) - printf("zapping c%d's %ld outarc\n", p - d->ssets, - (long)co); - p->outs[co] = NULL; - ap = p->inchain[co]; - p->inchain[co].ss = NULL; /* paranoia */ - } - ss->ins.ss = NULL; - - /* take it off the inarc chains of the ssets reached by its outarcs */ - for (i = 0; i < d->ncolors; i++) { - p = ss->outs[i]; - assert(p != ss); /* not self-referential */ - if (p == NULL) - continue; /* NOTE CONTINUE */ - if (v->eflags®_FTRACE) - printf("deleting outarc %d from c%d's inarc chain\n", - i, p - d->ssets); - if (p->ins.ss == ss && p->ins.co == i) - p->ins = ss->inchain[i]; - else { - assert(p->ins.ss != NULL); - for (ap = p->ins; ap.ss != NULL && - !(ap.ss == ss && ap.co == i); - ap = ap.ss->inchain[ap.co]) - lastap = ap; - assert(ap.ss != NULL); - lastap.ss->inchain[lastap.co] = ss->inchain[i]; - } - ss->outs[i] = NULL; - ss->inchain[i].ss = NULL; - } - - /* if ss was a success state, may need to remember location */ - if ((ss->flags&POSTSTATE) && ss->lastseen != d->lastpost && - (d->lastpost == NULL || d->lastpost < ss->lastseen)) - d->lastpost = ss->lastseen; - - return ss; -} - -/* - - pickss - pick the next stateset to be used - ^ static struct sset *pickss(struct vars *, struct dfa *); - */ -static struct sset * -pickss(v, d) -struct vars *v; /* used only for debug flags */ -struct dfa *d; -{ - int i; - struct sset *ss; - struct sset *oldest; - - /* shortcut for cases where cache isn't full */ - if (d->nssused < d->nssets) { - ss = &d->ssets[d->nssused]; - d->nssused++; - if (v->eflags®_FTRACE) - printf("new c%d\n", ss - d->ssets); - /* must make innards consistent */ - ss->ins.ss = NULL; - for (i = 0; i < d->ncolors; i++) { - ss->outs[i] = NULL; - ss->inchain[i].ss = NULL; - } - ss->flags = 0; - ss->ins.co = 0; - return ss; - } - - /* look for oldest */ - oldest = d->ssets; - for (ss = d->ssets, i = d->nssets; i > 0; ss++, i--) { - if (ss->lastseen != oldest->lastseen && (ss->lastseen == NULL || - ss->lastseen < oldest->lastseen)) - oldest = ss; - } - if (v->eflags®_FTRACE) - printf("replacing c%d\n", oldest - d->ssets); - return oldest; -} - -#define EXEC 1 -#include "color.c" diff --git a/generic/guts.h b/generic/guts.h deleted file mode 100644 index 7b847ac..0000000 --- a/generic/guts.h +++ /dev/null @@ -1,233 +0,0 @@ -/* - * guts.h -- - * - * Regexp package file: Misc. utilities. - * - * Copyright (c) 1998 Henry Spencer. All rights reserved. - * - * Development of this software was funded, in part, by Cray Research Inc., - * UUNET Communications Services Inc., and Sun Microsystems Inc., none of - * whom are responsible for the results. The author thanks all of them. - * - * Redistribution and use in source and binary forms -- with or without - * modification -- are permitted for any purpose, provided that - * redistributions in source form retain this entire copyright notice and - * indicate the origin and nature of any modifications. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, - * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY - * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL - * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; - * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR - * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF - * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Copyright (c) 1998 by Sun Microsystems, Inc. - * - * See the file "license.terms" for information on usage and redistribution - * of this file, and for a DISCLAIMER OF ALL WARRANTIES. - * - * RCS: @(#) $Id: guts.h,v 1.1.2.2 1998/10/03 01:56:40 stanton Exp $ - */ - -#include "tclInt.h" - -#define NOTREACHED 0 -#define xxx 1 - -#ifndef _POSIX2_RE_DUP_MAX -#define _POSIX2_RE_DUP_MAX 255 -#endif -#define DUPMAX _POSIX2_RE_DUP_MAX -#define INFINITY (DUPMAX+1) - -/* bitmap manipulation */ -#define UBITS (CHAR_BIT * sizeof(unsigned)) -#define BSET(uv, sn) ((uv)[(sn)/UBITS] |= (unsigned)1 << ((sn)%UBITS)) -#define ISBSET(uv, sn) ((uv)[(sn)/UBITS] & ((unsigned)1 << ((sn)%UBITS))) - -/* - * Map a truth value into -1 for false, 1 for true. This is so it is - * possible to write compile-time assertions by declaring a dummy array - * of this size. (Why not #if? Because sizeof is not available there.) - */ -#define NEGIFNOT(x) (2*!!(x) - 1) /* !! ensures 0 or 1 */ - -/* - * We dissect a chr into byts for colormap table indexing. Here we define - * a byt, which will be the same as a byte on most machines... The exact - * size of a byt is not critical, but about 8 bits is good, and extraction - * of 8-bit chunks is sometimes especially fast. - * - * Changes in several places are needed to handle an increase in MAXBYTS. - * Those places check whether MAXBYTS is larger than they expect. - */ -#ifndef BYTBITS -#define BYTBITS 8 /* bits in a byt */ -#endif -#define BYTTAB (1<