summaryrefslogtreecommitdiffstats
path: root/generic
diff options
context:
space:
mode:
authorstanton <stanton>1998-10-21 20:39:57 (GMT)
committerstanton <stanton>1998-10-21 20:39:57 (GMT)
commit7e7056e21d0a0d9fa39bdfd742e82b101a6c4b7c (patch)
tree99e08a09e1567ade05e7bc7edac3758b3695d424 /generic
parent966ff877247e93fbe6e641cfa77df19d03cfe932 (diff)
downloadtcl-7e7056e21d0a0d9fa39bdfd742e82b101a6c4b7c.zip
tcl-7e7056e21d0a0d9fa39bdfd742e82b101a6c4b7c.tar.gz
tcl-7e7056e21d0a0d9fa39bdfd742e82b101a6c4b7c.tar.bz2
Integrated latest regexp changes from Henry Spencer.
Moved regexp related declarations out of tclInt.h and into tclRegexp.h. Added "encoding" command.
Diffstat (limited to 'generic')
-rw-r--r--generic/chr.h48
-rw-r--r--generic/locale.c675
-rw-r--r--generic/regc_color.c (renamed from generic/color.c)298
-rw-r--r--generic/regc_cvec.c143
-rw-r--r--generic/regc_lex.c (renamed from generic/lex.c)336
-rw-r--r--generic/regc_locale.c426
-rw-r--r--generic/regc_nfa.c (renamed from generic/nfa.c)410
-rw-r--r--generic/regcomp.c (renamed from generic/compile.c)610
-rw-r--r--generic/regcustom.h90
-rw-r--r--generic/regerror.c82
-rw-r--r--generic/regerrs.h19
-rw-r--r--generic/regex.h299
-rw-r--r--generic/regexec.c (renamed from generic/exec.c)459
-rw-r--r--generic/regfree.c25
-rw-r--r--generic/regfronts.c56
-rw-r--r--generic/regguts.h (renamed from generic/guts.h)260
-rw-r--r--generic/tclBasic.c4
-rw-r--r--generic/tclCmdAH.c121
-rw-r--r--generic/tclCmdIL.c3
-rw-r--r--generic/tclCmdMZ.c51
-rw-r--r--generic/tclEncoding.c60
-rw-r--r--generic/tclFileName.c3
-rw-r--r--generic/tclInt.h46
-rw-r--r--generic/tclRegexp.c283
-rw-r--r--generic/tclRegexp.h219
-rw-r--r--generic/tclTest.c334
26 files changed, 2972 insertions, 2388 deletions
diff --git a/generic/chr.h b/generic/chr.h
deleted file mode 100644
index 6a21159..0000000
--- a/generic/chr.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * chr.h --
- *
- * Regexp package file: Unichar version of stuff related to the
- * nature of a character.
- *
- * Copyright (c) 1998 Henry Spencer. All rights reserved.
- *
- * Development of this software was funded, in part, by Cray Research Inc.,
- * UUNET Communications Services Inc., and Sun Microsystems Inc., none of
- * whom are responsible for the results. The author thanks all of them.
- *
- * Redistribution and use in source and binary forms -- with or without
- * modification -- are permitted for any purpose, provided that
- * redistributions in source form retain this entire copyright notice and
- * indicate the origin and nature of any modifications.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
- * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
- * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
- * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
- * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
- * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
- * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
- * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * Copyright (c) 1998 by Sun Microsystems, Inc.
- *
- * See the file "license.terms" for information on usage and redistribution
- * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
- *
- * RCS: @(#) $Id: chr.h,v 1.1.2.2 1998/10/03 01:56:40 stanton Exp $
- */
-
-typedef Tcl_UniChar chr; /* internal character type */
-typedef int pchr; /* what it promotes to */
-typedef unsigned uchr; /* unsigned type big enough to hold a chr */
-#define CHRBITS (sizeof(Tcl_UniChar) * CHAR_BIT) /* bits in a chr */
-#define CHR(c) (UCHAR(c)) /* turn a char literal into a chr literal */
-#define DIGITVAL(c) ((c)-'0') /* turn a chr digit into its value */
-
-/*
- * char names for the externally-visible functions
- */
-#define compile re_ucomp
-#define exec re_uexec
diff --git a/generic/locale.c b/generic/locale.c
deleted file mode 100644
index ca56fc4..0000000
--- a/generic/locale.c
+++ /dev/null
@@ -1,675 +0,0 @@
-/*
- * locale.c --
- *
- * Regexp package file:
- * collating-element handling and other locale-specific stuff
- *
- * Copyright (c) 1998 Henry Spencer. All rights reserved.
- *
- * Development of this software was funded, in part, by Cray Research Inc.,
- * UUNET Communications Services Inc., and Sun Microsystems Inc., none of
- * whom are responsible for the results. The author thanks all of them.
- *
- * Redistribution and use in source and binary forms -- with or without
- * modification -- are permitted for any purpose, provided that
- * redistributions in source form retain this entire copyright notice and
- * indicate the origin and nature of any modifications.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
- * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
- * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
- * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
- * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
- * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
- * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
- * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * Copyright (c) 1998 by Sun Microsystems, Inc.
- *
- * See the file "license.terms" for information on usage and redistribution
- * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
- *
- * RCS: @(#) $Id: locale.c,v 1.1.2.2 1998/10/03 01:56:41 stanton Exp $
- */
-
-/*
- * This is largely dummy code, since it needs locale interfaces. The
- * dummy code implements more or less the C locale. Parts of the code
- * are marked "dummy" and "generic" in hopes of making the situation
- * clearer.
- *
- * As a hack for testing, if REG_FAKE is turned on, we add a single
- * collating element ch between c and d, and a single equivalence class
- * containing x and y.
- *
- * The type "celt" is an entirely opaque non-array type -- it need not be
- * an integer type, it could be (say) a pointer -- which has distinct values
- * for all chrs and all collating elements. The only things the outside
- * world does to celts are copying them around and comparing them for
- * equality; everything else is done in this file. There need be no "null"
- * value for celt. The dummy code uses wint_t as celt, with WEOF as the
- * celt code for ch (ugh!).
- */
-
-/*
- * dummy:
- ^ #def MAXCE 2 // longest CE code is prepared to handle
- ^ typedef wint_t celt; // type holding distinct codes for all chrs, all CEs
- */
-
-/* dummy: character-name table */
-static struct cname {
- char *name;
- char code;
-} cnames[] = {
- {"NUL", '\0'},
- {"SOH", '\001'},
- {"STX", '\002'},
- {"ETX", '\003'},
- {"EOT", '\004'},
- {"ENQ", '\005'},
- {"ACK", '\006'},
- {"BEL", '\007'},
- {"alert", '\007'},
- {"BS", '\010'},
- {"backspace", '\b'},
- {"HT", '\011'},
- {"tab", '\t'},
- {"LF", '\012'},
- {"newline", '\n'},
- {"VT", '\013'},
- {"vertical-tab", '\v'},
- {"FF", '\014'},
- {"form-feed", '\f'},
- {"CR", '\015'},
- {"carriage-return", '\r'},
- {"SO", '\016'},
- {"SI", '\017'},
- {"DLE", '\020'},
- {"DC1", '\021'},
- {"DC2", '\022'},
- {"DC3", '\023'},
- {"DC4", '\024'},
- {"NAK", '\025'},
- {"SYN", '\026'},
- {"ETB", '\027'},
- {"CAN", '\030'},
- {"EM", '\031'},
- {"SUB", '\032'},
- {"ESC", '\033'},
- {"IS4", '\034'},
- {"FS", '\034'},
- {"IS3", '\035'},
- {"GS", '\035'},
- {"IS2", '\036'},
- {"RS", '\036'},
- {"IS1", '\037'},
- {"US", '\037'},
- {"space", ' '},
- {"exclamation-mark", '!'},
- {"quotation-mark", '"'},
- {"number-sign", '#'},
- {"dollar-sign", '$'},
- {"percent-sign", '%'},
- {"ampersand", '&'},
- {"apostrophe", '\''},
- {"left-parenthesis", '('},
- {"right-parenthesis", ')'},
- {"asterisk", '*'},
- {"plus-sign", '+'},
- {"comma", ','},
- {"hyphen", '-'},
- {"hyphen-minus", '-'},
- {"period", '.'},
- {"full-stop", '.'},
- {"slash", '/'},
- {"solidus", '/'},
- {"zero", '0'},
- {"one", '1'},
- {"two", '2'},
- {"three", '3'},
- {"four", '4'},
- {"five", '5'},
- {"six", '6'},
- {"seven", '7'},
- {"eight", '8'},
- {"nine", '9'},
- {"colon", ':'},
- {"semicolon", ';'},
- {"less-than-sign", '<'},
- {"equals-sign", '='},
- {"greater-than-sign", '>'},
- {"question-mark", '?'},
- {"commercial-at", '@'},
- {"left-square-bracket", '['},
- {"backslash", '\\'},
- {"reverse-solidus", '\\'},
- {"right-square-bracket", ']'},
- {"circumflex", '^'},
- {"circumflex-accent", '^'},
- {"underscore", '_'},
- {"low-line", '_'},
- {"grave-accent", '`'},
- {"left-brace", '{'},
- {"left-curly-bracket", '{'},
- {"vertical-line", '|'},
- {"right-brace", '}'},
- {"right-curly-bracket", '}'},
- {"tilde", '~'},
- {"DEL", '\177'},
- {NULL, 0}
-};
-
-/* dummy: character-class table */
-static struct cclass {
- char *name;
- char *chars;
- int hasch;
-} cclasses[] = {
- {"alnum", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
-0123456789", 1},
- {"alpha", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz",
- 1},
- {"blank", " \t", 0},
- {"cntrl", "\007\b\t\n\v\f\r\1\2\3\4\5\6\16\17\20\21\22\23\24\
-\25\26\27\30\31\32\33\34\35\36\37\177", 0},
- {"digit", "0123456789", 0},
- {"graph", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
-0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
- 1},
- {"lower", "abcdefghijklmnopqrstuvwxyz",
- 1},
- {"print", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
-0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~ ",
- 1},
- {"punct", "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
- 0},
- {"space", "\t\n\v\f\r ", 0},
- {"upper", "ABCDEFGHIJKLMNOPQRSTUVWXYZ",
- 0},
- {"xdigit", "0123456789ABCDEFabcdef",
- 0},
- {NULL, 0, 0}
-};
-
-#define CH WEOF /* dummy */
-
-/*
- - nces - how many distinct collating elements are there?
- * This is pure dummy code, although a straight "return 0" is definitely
- * what's wanted for all locales lucky enough not to have these stupid
- * things. Case counterparts should be included.
- ^ static int nces(struct vars *);
- */
-static int
-nces(v)
-struct vars *v;
-{
- return (v->cflags&REG_FAKE) ? 1 : 0;
-}
-
-/*
- - nleaders - how many chrs can be first chrs of collating elements?
- * This is pure dummy code, although a straight "return 0" is definitely
- * what's wanted for all locales lucky enough not to have these stupid
- * things. Case counterparts should be included.
- ^ static int nleaders(struct vars *);
- */
-static int
-nleaders(v)
-struct vars *v;
-{
- return (v->cflags&REG_FAKE) ? 1 : 0;
-}
-
-/*
- - allces - return a cvec with all the collating elements of the locale
- * This would be kind of costly if there were large numbers of them; with
- * any luck, that case does not occur in reality. Note that case variants
- * should be included; "all" means *all*.
- * This is pure dummy code.
- ^ static struct cvec *allces(struct vars *, struct cvec *);
- */
-static struct cvec *
-allces(v, cv)
-struct vars *v;
-struct cvec *cv; /* this is supposed to have enough room */
-{
- assert(cv->cespace > 0);
- (VOID) clearcvec(cv);
- if (v->cflags&REG_FAKE)
- addce(cv, ch());
- return cv;
-}
-
-/*
- - element - map collating-element name to celt
- ^ static celt element(struct vars *, chr *, chr *);
- */
-static celt
-element(v, startp, endp)
-struct vars *v;
-chr *startp; /* points to start of name */
-chr *endp; /* points just past end of name */
-{
- register struct cname *cn;
- register size_t len;
- Tcl_DString ds;
- char *name;
-
- /* generic: one-chr names stand for themselves */
- assert(startp < endp);
- len = endp - startp;
- if (len == 1)
- return *startp;
-
- NOTE(REG_ULOCALE);
-
- /*
- * INTL: ISO only, search table
- */
-
- Tcl_DStringInit(&ds);
- name = TclUniCharToUtfDString(startp, (int) len, &ds);
-
- for (cn = cnames; cn->name != NULL; cn++) {
- if (strlen(cn->name) == len && strncmp(cn->name, name, len) == 0) {
- return UCHAR(cn->code);
- }
- }
- Tcl_DStringFree(&ds);
-
- /*
- * Special case for testing.
- */
-
- if ((v->cflags&REG_FAKE) && len == 2) {
- if (*startp == 'c' && *(startp+1) == 'h')
- return (celt) CH;
- }
-
- /* generic: couldn't find it */
- ERR(REG_ECOLLATE);
- return 0;
-}
-
-/*
- - range - supply cvec for a range, including legality check
- * Must include case counterparts on request.
- ^ static struct cvec *range(struct vars *, celt, celt, int);
- */
-static struct cvec *
-range(v, a, b, cases)
-struct vars *v;
-celt a;
-celt b; /* might equal a */
-int cases; /* case-independent? */
-{
- int nchrs;
- int appendch;
- struct cvec *cv;
- celt c;
-
- /* generic: legality check */
- if (a != b && !before(a, b)) {
- ERR(REG_ERANGE);
- return NULL;
- }
-
- /* mostly dummy: compute vector length, note presence of ch */
- appendch = 0;
- if (a == (celt) CH) {
- if (b == (celt) CH) {
- a = 'c';
- b = a - 1; /* kludge to get no chrs */
- appendch = 1;
- } else {
- a = 'd';
- appendch = 1;
- }
- } else {
- if (b == CH) {
- appendch = 1;
- b = 'c';
- } else {
- if ((v->cflags&REG_FAKE) && a <= 'c' && b >= 'd')
- appendch = 1;
- }
- }
- nchrs = b - a + 1;
- if (cases)
- nchrs *= 2;
- cv = getcvec(v, nchrs, appendch);
- NOERRN();
-
- /* mostly dummy: fill in vector */
- for (c = a; c <= b; c++) {
- addchr(cv, c);
- if (cases) {
- if (TclUniCharIsUpper((Tcl_UniChar)c))
- addchr(cv, (chr)Tcl_UniCharToLower(
- (Tcl_UniChar)c));
- else if (TclUniCharIsLower((Tcl_UniChar)c))
- addchr(cv, (chr)Tcl_UniCharToUpper(
- (Tcl_UniChar)c));
- }
- }
- if (appendch)
- addce(cv, ch());
-
- return cv;
-}
-
-/*
- - before - is celt x before celt y, for purposes of range legality?
- * This is all dummy code.
- ^ static int before(celt, celt);
- */
-static int /* predicate */
-before(x, y)
-celt x;
-celt y;
-{
- int isxch = (x == CH);
- int isych = (y == CH);
-
- if (!isxch && !isych && x < y)
- return 1;
- if (isxch && !isych && y >= 'd')
- return 1;
- if (!isxch && isych && x <= 'c')
- return 1;
- return 0;
-}
-
-/*
- - eclass - supply cvec for an equivalence class
- * Must include case counterparts on request.
- * This is all dummy code.
- ^ static struct cvec *eclass(struct vars *, celt, int);
- */
-static struct cvec *
-eclass(v, c, cases)
-struct vars *v;
-celt c;
-int cases; /* all cases? */
-{
- struct cvec *cv;
-
- if (c == CH) {
- cv = getcvec(v, 0, 1);
- assert(cv != NULL);
- addce(cv, ch());
- return cv;
- }
-
- if ((v->cflags&REG_FAKE) && (c == 'x' || c == 'y')) {
- cv = getcvec(v, 4, 0);
- assert(cv != NULL);
- addchr(cv, (chr)'x');
- addchr(cv, (chr)'y');
- if (cases) {
- addchr(cv, (chr)'X');
- addchr(cv, (chr)'Y');
- }
- return cv;
- }
-
- /* no equivalence class by that name */
- if (cases)
- return allcases(v, c);
- cv = getcvec(v, 1, 0);
- assert(cv != NULL);
- addchr(cv, (chr)c);
- return cv;
-}
-
-/*
- - cclass - supply cvec for a character class
- * Must include case counterparts on request.
- * This is all dummy code.
- ^ static struct cvec *cclass(struct vars *, chr *, chr *, int);
- */
-static struct cvec *
-cclass(v, startp, endp, cases)
-struct vars *v;
-chr *startp; /* where the name starts */
-chr *endp; /* just past the end of the name */
-int cases; /* case-independent? */
-{
- size_t len;
- register char *p;
- register struct cclass *cc;
- int hasch;
- struct cvec *cv;
- Tcl_DString ds;
- char *name;
-
- /* check out the name */
- len = endp - startp;
-
- Tcl_DStringInit(&ds);
- name = TclUniCharToUtfDString(startp, (int) len, &ds);
-
- if (cases && len == 5 && (strncmp("lower", name, 5) == 0 ||
- strncmp("upper", name, 5) == 0))
- name = "alpha";
- for (cc = cclasses; cc->name != NULL; cc++) {
- if (strlen(cc->name) == len && strncmp(cc->name, name, len) == 0) {
- break;
- }
- }
- Tcl_DStringFree(&ds);
-
- if (cc->name == NULL) {
- ERR(REG_ECTYPE);
- return NULL;
- }
-
- /* set up vector */
- hasch = (v->cflags&REG_FAKE) ? cc->hasch : 0;
- cv = getcvec(v, (int) strlen(cc->chars), hasch);
- if (cv == NULL) {
- ERR(REG_ESPACE);
- return NULL;
- }
-
- /* fill it in */
- for (p = cc->chars; *p != '\0'; p++)
- addchr(cv, (chr)*p);
- if (hasch)
- addce(cv, ch());
-
- return cv;
-}
-
-/*
- - allcases - supply cvec for all case counterparts of a chr (including itself)
- * This is a shortcut, preferably an efficient one, for simple characters;
- * messy cases are done via range().
- * This is all dummy code.
- ^ static struct cvec *allcases(struct vars *, pchr);
- */
-static struct cvec *
-allcases(v, c)
-struct vars *v;
-pchr c;
-{
- struct cvec *cv = getcvec(v, 2, 0);
-
- assert(cv != NULL);
- addchr(cv, c);
- if (TclUniCharIsUpper((Tcl_UniChar)c))
- addchr(cv, (chr)Tcl_UniCharToLower((Tcl_UniChar)c));
- else if (TclUniCharIsLower((Tcl_UniChar)c))
- addchr(cv, (chr)Tcl_UniCharToUpper((Tcl_UniChar)c));
-
- return cv;
-}
-
-/*
- - sncmp - case-independent chr-string compare
- * REG_ICASE backrefs need this. It should preferably be efficient.
- * This is all dummy code.
- ^ static int sncmp(CONST chr *, CONST chr *, size_t);
- */
-static int /* -1, 0, 1 for <, =, > */
-sncmp(x, y, len)
-CONST chr *x;
-CONST chr *y;
-size_t len; /* maximum length of comparison */
-{
- int diff;
- size_t i;
-
- for (i = 0; i < len; i++) {
- diff = Tcl_UniCharToLower(x[i]) - Tcl_UniCharToLower(y[i]);
- if (diff) {
- return diff;
- }
- }
- return 0;
-}
-
-/*
- * Utility functions for handling cvecs
- */
-
-/*
- - newcvec - allocate a new cvec
- ^ static struct cvec *newcvec(int, int);
- */
-static struct cvec *
-newcvec(nchrs, nces)
-int nchrs; /* to hold this many chrs... */
-int nces; /* ... and this many CEs */
-{
- size_t n;
- size_t nc;
- struct cvec *cv;
-
- nc = (size_t)nchrs + (size_t)nces*(MAXCE+1);
- n = sizeof(struct cvec) + (size_t)(nces-1)*sizeof(chr *) +
- nc*sizeof(chr);
- cv = (struct cvec *)ckalloc(n);
- if (cv == NULL)
- return NULL;
- cv->chrspace = nc;
- cv->chrs = (chr *)&cv->ces[nces]; /* chrs just after CE ptrs */
- cv->cespace = nces;
- return clearcvec(cv);
-}
-
-/*
- - clearcvec - clear a possibly-new cvec
- * Returns pointer as convenience.
- ^ static struct cvec *clearcvec(struct cvec *);
- */
-static struct cvec *
-clearcvec(cv)
-struct cvec *cv;
-{
- int i;
-
- assert(cv != NULL);
- cv->nchrs = 0;
- assert(cv->chrs == (chr *)&cv->ces[cv->cespace]);
- cv->nces = 0;
- cv->ncechrs = 0;
- for (i = 0; i < cv->cespace; i++)
- cv->ces[i] = NULL;
-
- return cv;
-}
-
-/*
- - addchr - add a chr to a cvec
- ^ static VOID addchr(struct cvec *, pchr);
- */
-static VOID
-addchr(cv, c)
-struct cvec *cv;
-pchr c;
-{
- assert(cv->nchrs < cv->chrspace - cv->ncechrs);
- cv->chrs[cv->nchrs++] = (chr) c;
-}
-
-/*
- - addce - add a CE to a cvec
- ^ static VOID addce(struct cvec *, chr *);
- */
-static VOID
-addce(cv, startp)
-struct cvec *cv;
-chr *startp; /* 0-terminated text */
-{
- int n = wcslen(startp);
- int i;
- chr *s;
- chr *d;
-
- assert(n > 0);
- assert(cv->nchrs + n < cv->chrspace - cv->ncechrs);
- assert(cv->nces < cv->cespace);
- d = &cv->chrs[cv->chrspace - cv->ncechrs - n - 1];
- cv->ces[cv->nces++] = d;
- for (s = startp, i = n; i > 0; s++, i--)
- *d++ = *s;
- *d = 0; /* endmarker */
- assert(d == &cv->chrs[cv->chrspace - cv->ncechrs]);
- cv->ncechrs += n + 1;
-}
-
-/*
- - haschr - does a cvec contain this chr?
- ^ static int haschr(struct cvec *, pchr);
- */
-static int /* predicate */
-haschr(cv, c)
-struct cvec *cv;
-pchr c;
-{
- int i;
- chr *p;
-
- for (p = cv->chrs, i = cv->nchrs; i > 0; p++, i--)
- if (*p == c)
- return 1;
- return 0;
-}
-
-/*
- - getcvec - get a cvec, remembering it as v->cv
- ^ static struct cvec *getcvec(struct vars *, int, int);
- */
-static struct cvec *
-getcvec(v, nchrs, nces)
-struct vars *v;
-int nchrs; /* to hold this many chrs... */
-int nces; /* ... and this many CEs */
-{
- if (v->cv != NULL && nchrs <= v->cv->chrspace && nces <= v->cv->cespace)
- return clearcvec(v->cv);
-
- if (v->cv != NULL)
- freecvec(v->cv);
- v->cv = newcvec(nchrs, nces);
- if (v->cv == NULL)
- ERR(REG_ESPACE);
-
- return v->cv;
-}
-
-/*
- - freecvec - free a cvec
- ^ static VOID freecvec(struct cvec *);
- */
-static VOID
-freecvec(cv)
-struct cvec *cv;
-{
- ckfree((char *)cv);
-}
diff --git a/generic/color.c b/generic/regc_color.c
index fa640f9..4a8a87c 100644
--- a/generic/color.c
+++ b/generic/regc_color.c
@@ -1,85 +1,25 @@
/*
- * color.c --
+ * colorings of characters
+ * This file is #included by regcomp.c.
*
- * Regexp package file: colorings of characters.
- * Note that there are some incestuous relationships between this code and
- * NFA arc maintenance, which perhaps ought to be cleaned up sometime.
- *
- * Copyright (c) 1998 Henry Spencer. All rights reserved.
- *
- * Development of this software was funded, in part, by Cray Research Inc.,
- * UUNET Communications Services Inc., and Sun Microsystems Inc., none of
- * whom are responsible for the results. The author thanks all of them.
- *
- * Redistribution and use in source and binary forms -- with or without
- * modification -- are permitted for any purpose, provided that
- * redistributions in source form retain this entire copyright notice and
- * indicate the origin and nature of any modifications.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
- * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
- * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
- * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
- * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
- * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
- * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
- * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * Copyright (c) 1998 by Sun Microsystems, Inc.
- *
- * See the file "license.terms" for information on usage and redistribution
- * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
- *
- * RCS: @(#) $Id: color.c,v 1.1.2.2 1998/10/03 01:56:40 stanton Exp $
+ * Note that there are some incestuous relationships between this code and
+ * NFA arc maintenance, which perhaps ought to be cleaned up sometime.
*/
+
+
/*
- * The innards.
- */
-struct colors {
- color ccolor[BYTTAB];
-};
-struct ptrs {
- union tree *pptr[BYTTAB];
-};
-union tree {
- struct colors colors;
- struct ptrs ptrs;
-};
-#define tcolor colors.ccolor
-#define tptr ptrs.pptr
-/*
- * Some of the function prototypes need this.
- ^ union tree;
+ * If this declaration draws a complaint about a negative array size,
+ * then CHRBITS is defined incorrectly for the chr type.
*/
+static char isCHRBITSright[NEGIFNOT(sizeof(chr)*CHAR_BIT == CHRBITS)];
+
+
+
+#define CISERR() VISERR(cm->v)
+#define CERR(e) VERR(cm->v, (e))
-struct colordesc {
- uchr nchrs; /* number of chars of this color */
- color sub; /* open subcolor of this one, or NOSUB */
-# define NOSUB COLORLESS
- struct arc *arcs; /* color chain */
-# define UNUSEDCOLOR(cd) ((cd)->nchrs == 0 && (cd)->sub == NOSUB)
- int flags;
-# define PSEUDO 1 /* pseudocolor, no real chars */
-};
-
-struct colormap {
- int magic;
-# define CMMAGIC 0x876
- struct vars *v; /* for error reporting */
- color rest;
- int filled; /* has it been filled? */
- int ncds; /* number of colordescs */
- struct colordesc *cd;
-# define CDEND(cm) (&(cm)->cd[(cm)->ncds])
-# define NINLINECDS 10
- struct colordesc cds[NINLINECDS];
- union tree tree[NBYTS]; /* tree top, plus fill blocks */
-};
-#ifdef COMPILE
/*
- newcm - get new colormap
@@ -96,7 +36,7 @@ struct vars *v;
union tree *nextt;
struct colordesc *cd;
- cm = (struct colormap *)ckalloc(sizeof(struct colormap));
+ cm = (struct colormap *)MALLOC(sizeof(struct colormap));
if (cm == NULL) {
ERR(REG_ESPACE);
return NULL;
@@ -114,15 +54,13 @@ struct vars *v;
cd->arcs = NULL;
cd->flags = 0;
}
- cm->cd[WHITE].nchrs = WCHAR_MAX - WCHAR_MIN;
+ cm->cd[WHITE].nchrs = CHR_MAX - CHR_MIN + 1;
/* treetop starts as NULLs if there are lower levels */
t = cm->tree;
- if (NBYTS > 1) {
- for (i = BYTTAB-1; i >= 0; i--)
- t->tptr[i] = NULL;
- }
-
+ if (NBYTS > 1)
+ for (i = BYTTAB-1; i >= 0; i--)
+ t->tptr[i] = NULL;
/* if no lower levels, treetop and last fill block are the same */
/* fill blocks point to next fill block... */
@@ -149,13 +87,11 @@ freecm(cm)
struct colormap *cm;
{
cm->magic = 0;
- if (NBYTS > 1) {
- cmtreefree(cm, cm->tree, 0);
- }
- if (cm->cd != cm->cds) {
- ckfree((char *)cm->cd);
- }
- ckfree((char *) cm); /* mem leak (CCS). */
+ if (NBYTS > 1)
+ cmtreefree(cm, cm->tree, 0);
+ if (cm->cd != cm->cds)
+ FREE(cm->cd);
+ FREE(cm);
}
/*
@@ -176,10 +112,9 @@ int level; /* level number (top == 0) of this block */
for (i = BYTTAB-1; i >= 0; i--) {
t = tree->tptr[i];
if (t != NULL && t != fillt) {
- if ((int) level < (int) NBYTS-2) { /* more pointer blocks below */
+ if (level < NBYTS-2) /* more pointer blocks below */
cmtreefree(cm, t, level+1);
- }
- ckfree((char *) t);
+ FREE(t);
}
}
}
@@ -221,17 +156,13 @@ int level; /* level number (top == 0) of this block */
t = tree->tptr[i];
if (t == fillt) /* oops */
{}
- else if (t == NULL) {
+ else if (t == NULL)
tree->tptr[i] = fillt;
- }
- else if ((int) level < (int) NBYTS-2) {/* more pointer blocks below */
+ else if (level < NBYTS-2) /* more pointer blocks below */
cmtreefill(cm, t, level+1);
- }
}
}
-#endif /* ifdef COMPILE */
-
/*
- getcolor - get the color of a character from a colormap
^ static color getcolor(struct colormap *, pchr);
@@ -261,8 +192,6 @@ pchr c;
return cm->rest;
}
-#ifdef COMPILE
-
/*
- setcolor - set the color of a character in a colormap
^ static color setcolor(struct colormap *, pchr, pcolor);
@@ -283,7 +212,7 @@ pcolor co;
color prev;
assert(cm->magic == CMMAGIC);
- if (VISERR(cm->v) || co == COLORLESS)
+ if (CISERR() || co == COLORLESS)
return COLORLESS;
t = cm->tree;
@@ -293,10 +222,10 @@ pcolor co;
t = t->tptr[b];
if (t == NULL) { /* fell off an incomplete part */
bottom = (shift <= BYTBITS) ? 1 : 0;
- t = (union tree *)ckalloc((bottom) ?
+ t = (union tree *)MALLOC((bottom) ?
sizeof(struct colors) : sizeof(struct ptrs));
if (t == NULL) {
- VERR(cm->v, REG_ESPACE);
+ CERR(REG_ESPACE);
return COLORLESS;
}
if (bottom)
@@ -312,7 +241,7 @@ pcolor co;
b = uc & BYTMASK;
prev = t->tcolor[b];
- t->tcolor[b] = (color) co;
+ t->tcolor[b] = (color)co;
return prev;
}
@@ -328,7 +257,7 @@ struct colormap *cm;
struct colordesc *end;
struct colordesc *lastused;
- if (VISERR(cm->v))
+ if (CISERR())
return COLORLESS;
lastused = NULL;
@@ -337,7 +266,7 @@ struct colormap *cm;
if (!UNUSEDCOLOR(cd))
lastused = cd;
assert(lastused != NULL);
- return (color) (lastused - cm->cd);
+ return (color)(lastused - cm->cd);
}
/*
@@ -352,31 +281,31 @@ struct colormap *cm;
struct colordesc *cd;
struct colordesc *end;
struct colordesc *firstnew;
- int n;
+ size_t n;
- if (VISERR(cm->v))
+ if (CISERR())
return COLORLESS;
end = CDEND(cm);
for (cd = cm->cd; cd < end; cd++)
if (UNUSEDCOLOR(cd)) {
assert(cd->arcs == NULL);
- return (color) (cd - cm->cd);
+ return (color)(cd - cm->cd);
}
/* oops, must allocate more */
n = cm->ncds * 2;
if (cm->cd == cm->cds) {
- cd = (struct colordesc *)ckalloc(sizeof(struct colordesc) * n);
+ cd = (struct colordesc *)MALLOC(sizeof(struct colordesc) * n);
if (cd != NULL)
- memcpy((VOID *)cd, (VOID *)cm->cds, cm->ncds *
+ memcpy(VS(cd), VS(cm->cds), cm->ncds *
sizeof(struct colordesc));
} else {
- cd = (struct colordesc *)ckrealloc((VOID *)cm->cd,
- sizeof(struct colordesc) * n);
+ cd = (struct colordesc *)REALLOC(cm->cd,
+ n * sizeof(struct colordesc));
}
if (cd == NULL) {
- VERR(cm->v, REG_ESPACE);
+ CERR(REG_ESPACE);
return COLORLESS;
}
cm->cd = cd;
@@ -390,7 +319,7 @@ struct colormap *cm;
cd->flags = 0;
}
assert(firstnew < CDEND(cm) && UNUSEDCOLOR(firstnew));
- return (color) (firstnew - cm->cd);
+ return (color)(firstnew - cm->cd);
}
/*
@@ -404,7 +333,7 @@ struct colormap *cm;
color co;
co = newcolor(cm);
- if (VISERR(cm->v))
+ if (CISERR())
return COLORLESS;
cm->cd[co].nchrs = 1;
cm->cd[co].flags = PSEUDO;
@@ -459,22 +388,22 @@ struct colormap *cm;
color co;
color sco;
- for (cd = cm->cd, co = 0; cd < end; cd++, co++) {
- sco = cd->sub;
- if (sco == NOSUB) {
- /* has no subcolor, no further action */
- } else if (sco == co) {
- /* is subcolor, let parent deal with it */
- } else if (cd->nchrs == 0) {
- /* parent empty, its arcs change color to subcolor */
- cd->sub = NOSUB;
- scd = &cm->cd[sco];
- assert(scd->nchrs > 0);
- assert(scd->sub == sco);
- scd->sub = NOSUB;
- while ((a = cd->arcs) != NULL) {
- assert(a->co == co);
- /* uncolorchain(cm, a); */
+ for (cd = cm->cd, co = 0; cd < end; cd++, co++) {
+ sco = cd->sub;
+ if (sco == NOSUB) {
+ /* has no subcolor, no further action */
+ } else if (sco == co) {
+ /* is subcolor, let parent deal with it */
+ } else if (cd->nchrs == 0) {
+ /* parent empty, its arcs change color to subcolor */
+ cd->sub = NOSUB;
+ scd = &cm->cd[sco];
+ assert(scd->nchrs > 0);
+ assert(scd->sub == sco);
+ scd->sub = NOSUB;
+ while ((a = cd->arcs) != NULL) {
+ assert(a->co == co);
+ /* uncolorchain(cm, a); */
cd->arcs = a->colorchain;
a->co = sco;
/* colorchain(cm, a); */
@@ -483,11 +412,11 @@ struct colormap *cm;
}
} else {
/* parent's arcs must gain parallel subcolor arcs */
- cd->sub = NOSUB;
- scd = &cm->cd[sco];
- assert(scd->nchrs > 0);
- assert(scd->sub == sco);
- scd->sub = NOSUB;
+ cd->sub = NOSUB;
+ scd = &cm->cd[sco];
+ assert(scd->nchrs > 0);
+ assert(scd->sub == sco);
+ scd->sub = NOSUB;
for (a = cd->arcs; a != NULL; a = a->colorchain) {
assert(a->co == co);
newarc(nfa, a->type, sco, a->from, a->to);
@@ -558,11 +487,11 @@ pchr c;
^ struct state *, struct state *);
*/
static VOID
-rainbow(nfa, cm, type, exc, from, to)
+rainbow(nfa, cm, type, but, from, to)
struct nfa *nfa;
struct colormap *cm;
int type;
-pcolor exc; /* COLORLESS if no exceptions */
+pcolor but; /* COLORLESS if no exceptions */
struct state *from;
struct state *to;
{
@@ -570,8 +499,8 @@ struct state *to;
struct colordesc *end = CDEND(cm);
color co;
- for (cd = cm->cd, co = 0; cd < end && !VISERR(nfa->v); cd++, co++)
- if (!UNUSEDCOLOR(cd) && cd->sub != co && co != exc &&
+ for (cd = cm->cd, co = 0; cd < end && !CISERR(); cd++, co++)
+ if (!UNUSEDCOLOR(cd) && cd->sub != co && co != but &&
!(cd->flags&PSEUDO))
newarc(nfa, type, co, from, to);
}
@@ -596,10 +525,95 @@ struct state *to;
color co;
assert(of != from);
- for (cd = cm->cd, co = 0; cd < end && !VISERR(nfa->v); cd++, co++)
+ for (cd = cm->cd, co = 0; cd < end && !CISERR(); cd++, co++)
if (!UNUSEDCOLOR(cd) && !(cd->flags&PSEUDO))
if (findarc(of, PLAIN, co) == NULL)
newarc(nfa, type, co, from, to);
}
-#endif /* ifdef COMPILE */
+
+
+#ifdef REG_DEBUG
+
+/*
+ - dumpcolors - debugging output
+ ^ static VOID dumpcolors(struct colormap *, FILE *);
+ */
+static VOID
+dumpcolors(cm, f)
+struct colormap *cm;
+FILE *f;
+{
+ struct colordesc *cd;
+ struct colordesc *end;
+ color co;
+ chr c;
+
+ if (cm->filled) {
+ fprintf(f, "filled\n");
+ if (NBYTS > 1)
+ fillcheck(cm, cm->tree, 0, f);
+ }
+ end = CDEND(cm);
+ for (cd = cm->cd + 1, co = 1; cd < end; cd++, co++) /* skip 0 */
+ if (cd->nchrs > 0) {
+ if (cd->flags&PSEUDO)
+ fprintf(f, "#%2ld(ps): ", (long)co);
+ else
+ fprintf(f, "#%2ld(%2d): ", (long)co, cd->nchrs);
+ for (c = CHR_MIN; c < CHR_MAX; c++)
+ if (getcolor(cm, c) == co)
+ dumpchr(c, f);
+ assert(c == CHR_MAX);
+ if (getcolor(cm, c) == co)
+ dumpchr(c, f);
+ fprintf(f, "\n");
+ }
+}
+
+/*
+ - fillcheck - check proper filling of a tree
+ ^ static VOID fillcheck(struct colormap *, union tree *, int, FILE *);
+ */
+static VOID
+fillcheck(cm, tree, level, f)
+struct colormap *cm;
+union tree *tree;
+int level; /* level number (top == 0) of this block */
+FILE *f;
+{
+ int i;
+ union tree *t;
+ union tree *fillt = &cm->tree[level+1];
+
+ assert(level < NBYTS-1); /* this level has pointers */
+ for (i = BYTTAB-1; i >= 0; i--) {
+ t = tree->tptr[i];
+ if (t == NULL)
+ fprintf(f, "NULL found in filled tree!\n");
+ else if (t == fillt)
+ {}
+ else if (level < NBYTS-2) /* more pointer blocks below */
+ fillcheck(cm, t, level+1, f);
+ }
+}
+
+/*
+ - dumpchr - print a chr
+ * Kind of char-centric but works well enough for debug use.
+ ^ static VOID dumpchr(pchr, FILE *);
+ */
+static VOID
+dumpchr(c, f)
+pchr c;
+FILE *f;
+{
+ if (c == '\\')
+ fprintf(f, "\\\\");
+ else if (c > ' ' && c <= '~')
+ putc((char)c, f);
+ else
+ fprintf(f, "\\0%lo", (long)c);
+}
+
+#endif /* ifdef REG_DEBUG */
diff --git a/generic/regc_cvec.c b/generic/regc_cvec.c
new file mode 100644
index 0000000..0650883
--- /dev/null
+++ b/generic/regc_cvec.c
@@ -0,0 +1,143 @@
+/*
+ * Utility functions for handling cvecs
+ * This file is #included by regcomp.c.
+ */
+
+/*
+ - newcvec - allocate a new cvec
+ ^ static struct cvec *newcvec(int, int);
+ */
+static struct cvec *
+newcvec(nchrs, nmcces)
+int nchrs; /* to hold this many chrs... */
+int nmcces; /* ... and this many MCCEs */
+{
+ size_t n;
+ size_t nc;
+ struct cvec *cv;
+
+ nc = (size_t)nchrs + (size_t)nmcces*(MAXMCCE+1);
+ n = sizeof(struct cvec) + (size_t)(nmcces-1)*sizeof(chr *) +
+ nc*sizeof(chr);
+ cv = (struct cvec *)MALLOC(n);
+ if (cv == NULL)
+ return NULL;
+ cv->chrspace = nc;
+ cv->chrs = (chr *)&cv->mcces[nmcces]; /* chrs just after MCCE ptrs */
+ cv->mccespace = nmcces;
+ return clearcvec(cv);
+}
+
+/*
+ - clearcvec - clear a possibly-new cvec
+ * Returns pointer as convenience.
+ ^ static struct cvec *clearcvec(struct cvec *);
+ */
+static struct cvec *
+clearcvec(cv)
+struct cvec *cv;
+{
+ int i;
+
+ assert(cv != NULL);
+ cv->nchrs = 0;
+ assert(cv->chrs == (chr *)&cv->mcces[cv->mccespace]);
+ cv->nmcces = 0;
+ cv->nmccechrs = 0;
+ for (i = 0; i < cv->mccespace; i++)
+ cv->mcces[i] = NULL;
+
+ return cv;
+}
+
+/*
+ - addchr - add a chr to a cvec
+ ^ static VOID addchr(struct cvec *, pchr);
+ */
+static VOID
+addchr(cv, c)
+struct cvec *cv;
+pchr c;
+{
+ assert(cv->nchrs < cv->chrspace - cv->nmccechrs);
+ cv->chrs[cv->nchrs++] = (chr)c;
+}
+
+/*
+ - addmcce - add an MCCE to a cvec
+ ^ static VOID addmcce(struct cvec *, chr *, chr *);
+ */
+static VOID
+addmcce(cv, startp, endp)
+struct cvec *cv;
+chr *startp; /* beginning of text */
+chr *endp; /* just past end of text */
+{
+ int n = endp - startp;
+ int i;
+ chr *s;
+ chr *d;
+
+ assert(n > 0);
+ assert(cv->nchrs + n < cv->chrspace - cv->nmccechrs);
+ assert(cv->nmcces < cv->mccespace);
+ d = &cv->chrs[cv->chrspace - cv->nmccechrs - n - 1];
+ cv->mcces[cv->nmcces++] = d;
+ for (s = startp, i = n; i > 0; s++, i--)
+ *d++ = *s;
+ *d++ = 0; /* endmarker */
+ assert(d == &cv->chrs[cv->chrspace - cv->nmccechrs]);
+ cv->nmccechrs += n + 1;
+}
+
+/*
+ - haschr - does a cvec contain this chr?
+ ^ static int haschr(struct cvec *, pchr);
+ */
+static int /* predicate */
+haschr(cv, c)
+struct cvec *cv;
+pchr c;
+{
+ int i;
+ chr *p;
+
+ for (p = cv->chrs, i = cv->nchrs; i > 0; p++, i--)
+ if (*p == c)
+ return 1;
+ return 0;
+}
+
+/*
+ - getcvec - get a cvec, remembering it as v->cv
+ ^ static struct cvec *getcvec(struct vars *, int, int);
+ */
+static struct cvec *
+getcvec(v, nchrs, nmcces)
+struct vars *v;
+int nchrs; /* to hold this many chrs... */
+int nmcces; /* ... and this many MCCEs */
+{
+ if (v->cv != NULL && nchrs <= v->cv->chrspace &&
+ nmcces <= v->cv->mccespace)
+ return clearcvec(v->cv);
+
+ if (v->cv != NULL)
+ freecvec(v->cv);
+ v->cv = newcvec(nchrs, nmcces);
+ if (v->cv == NULL)
+ ERR(REG_ESPACE);
+
+ return v->cv;
+}
+
+/*
+ - freecvec - free a cvec
+ ^ static VOID freecvec(struct cvec *);
+ */
+static VOID
+freecvec(cv)
+struct cvec *cv;
+{
+ FREE(cv);
+}
diff --git a/generic/lex.c b/generic/regc_lex.c
index 7ae3ccc..820b404 100644
--- a/generic/lex.c
+++ b/generic/regc_lex.c
@@ -1,36 +1,6 @@
/*
- * lex --
- *
- * Regexp package file: lexical analyzer - #included in other source
- *
- * Copyright (c) 1998 Henry Spencer. All rights reserved.
- *
- * Development of this software was funded, in part, by Cray Research Inc.,
- * UUNET Communications Services Inc., and Sun Microsystems Inc., none of
- * whom are responsible for the results. The author thanks all of them.
- *
- * Redistribution and use in source and binary forms -- with or without
- * modification -- are permitted for any purpose, provided that
- * redistributions in source form retain this entire copyright notice and
- * indicate the origin and nature of any modifications.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
- * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
- * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
- * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
- * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
- * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
- * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
- * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * Copyright (c) 1998 by Sun Microsystems, Inc.
- *
- * See the file "license.terms" for information on usage and redistribution
- * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
- *
- * RCS: @(#) $Id: lex.c,v 1.1.2.2 1998/10/03 01:56:40 stanton Exp $
+ * lexical analyzer
+ * This file is #included by regcomp.c.
*/
/* scanning macros (know about v) */
@@ -58,8 +28,11 @@
#define L_CEL 7 /* collating element */
#define L_ECL 8 /* equivalence class */
#define L_CCL 9 /* character class */
-#define INTO(c) (v->lexcon = (c))
-#define _IN(con) (v->lexcon == (con))
+#define INTOCON(c) (v->lexcon = (c))
+#define INCON(con) (v->lexcon == (con))
+
+/* construct pointer past end of chr array */
+#define ENDOF(array) ((array) + sizeof(array)/sizeof(chr))
/*
- lexstart - set up lexical stuff, scan leading options
@@ -67,19 +40,20 @@
*/
static VOID
lexstart(v)
-register struct vars *v;
+struct vars *v;
{
prefixes(v); /* may turn on new type bits etc. */
NOERR();
if (v->cflags&REG_QUOTE) {
- v->cflags &= ~(REG_EXTENDED|REG_ADVF|REG_EXPANDED);
- INTO(L_Q);
- } else if (v->cflags&REG_EXTENDED)
- INTO(L_ERE);
- else {
- v->cflags &= ~REG_ADVF;
- INTO(L_BRE);
+ assert(!(v->cflags&(REG_ADVANCED|REG_EXPANDED|REG_NEWLINE)));
+ INTOCON(L_Q);
+ } else if (v->cflags&REG_EXTENDED) {
+ assert(!(v->cflags&REG_QUOTE));
+ INTOCON(L_ERE);
+ } else {
+ assert(!(v->cflags&(REG_QUOTE|REG_ADVF)));
+ INTOCON(L_BRE);
}
v->nexttype = EMPTY; /* remember we were at the start */
@@ -104,11 +78,14 @@ struct vars *v;
case CHR('?'): /* "***?" error, msg shows version */
ERR(REG_BADPAT);
return; /* proceed no further */
+ break;
case CHR('='): /* "***=" shifts to literal string */
NOTE(REG_UNONPOSIX);
v->cflags |= REG_QUOTE;
+ v->cflags &= ~(REG_ADVANCED|REG_EXPANDED|REG_NEWLINE);
v->now += 4;
return; /* and there can be no more prefixes */
+ break;
case CHR(':'): /* "***:" shifts to AREs */
NOTE(REG_UNONPOSIX);
v->cflags |= REG_ADVANCED;
@@ -117,26 +94,28 @@ struct vars *v;
default: /* otherwise *** is just an error */
ERR(REG_BADRPT);
return;
+ break;
}
- /* BREs and plain EREs don't get any other favors */
+ /* BREs and EREs don't get embedded options */
if ((v->cflags&REG_ADVANCED) != REG_ADVANCED)
return;
- /* embedded options */
- if (HAVE(3) && NEXT2('(', '?') && iswalpha(*(v->now + 2))) {
+ /* embedded options (AREs only) */
+ if (HAVE(3) && NEXT2('(', '?') && iscalpha(*(v->now + 2))) {
NOTE(REG_UNONPOSIX);
v->now += 2;
- for (; !ATEOS() && iswalpha(*v->now); v->now++)
+ for (; !ATEOS() && iscalpha(*v->now); v->now++)
switch (*v->now) {
case CHR('b'): /* BREs (but why???) */
- v->cflags &= ~REG_EXTENDED;
+ v->cflags &= ~(REG_ADVANCED|REG_QUOTE);
break;
case CHR('c'): /* case sensitive */
v->cflags &= ~REG_ICASE;
break;
case CHR('e'): /* plain EREs */
- v->cflags &= ~REG_ADVF;
+ v->cflags |= REG_EXTENDED;
+ v->cflags &= ~(REG_ADVF|REG_QUOTE);
break;
case CHR('i'): /* case insensitive */
v->cflags |= REG_ICASE;
@@ -151,6 +130,7 @@ struct vars *v;
break;
case CHR('q'): /* literal string */
v->cflags |= REG_QUOTE;
+ v->cflags &= ~REG_ADVANCED;
break;
case CHR('s'): /* single line, \n ordinary */
v->cflags &= ~REG_NEWLINE;
@@ -174,6 +154,8 @@ struct vars *v;
return;
}
v->now++;
+ if (v->cflags&REG_QUOTE)
+ v->cflags &= ~(REG_EXPANDED|REG_NEWLINE);
}
}
@@ -181,67 +163,68 @@ struct vars *v;
- lexnest - "call a subroutine", interpolating string at the lexical level
* Note, this is not a very general facility. There are a number of
* implicit assumptions about what sorts of strings can be subroutines.
- ^ static VOID lexnest(struct vars *, chr *);
+ ^ static VOID lexnest(struct vars *, chr *, chr *);
*/
static VOID
-lexnest(v, s)
+lexnest(v, beginp, endp)
struct vars *v;
-chr *s;
+chr *beginp; /* start of interpolation */
+chr *endp; /* one past end of interpolation */
{
assert(v->savenow == NULL); /* only one level of nesting */
v->savenow = v->now;
v->savestop = v->stop;
- v->now = s;
- v->stop = s + wcslen(s);
+ v->now = beginp;
+ v->stop = endp;
}
/*
- * string CONSTants to interpolate as expansions of things like \d
+ * string constants to interpolate as expansions of things like \d
*/
static chr backd[] = { /* \d */
CHR('['), CHR('['), CHR(':'),
CHR('d'), CHR('i'), CHR('g'), CHR('i'), CHR('t'),
- CHR(':'), CHR(']'), CHR(']'), CHR('\0')
+ CHR(':'), CHR(']'), CHR(']')
};
static chr backD[] = { /* \D */
CHR('['), CHR('^'), CHR('['), CHR(':'),
CHR('d'), CHR('i'), CHR('g'), CHR('i'), CHR('t'),
- CHR(':'), CHR(']'), CHR(']'), CHR('\0')
+ CHR(':'), CHR(']'), CHR(']')
};
static chr brbackd[] = { /* \d within brackets */
CHR('['), CHR(':'),
CHR('d'), CHR('i'), CHR('g'), CHR('i'), CHR('t'),
- CHR(':'), CHR(']'), CHR('\0')
+ CHR(':'), CHR(']')
};
static chr backs[] = { /* \s */
CHR('['), CHR('['), CHR(':'),
CHR('s'), CHR('p'), CHR('a'), CHR('c'), CHR('e'),
- CHR(':'), CHR(']'), CHR(']'), CHR('\0')
+ CHR(':'), CHR(']'), CHR(']')
};
static chr backS[] = { /* \S */
CHR('['), CHR('^'), CHR('['), CHR(':'),
CHR('s'), CHR('p'), CHR('a'), CHR('c'), CHR('e'),
- CHR(':'), CHR(']'), CHR(']'), CHR('\0')
+ CHR(':'), CHR(']'), CHR(']')
};
static chr brbacks[] = { /* \s within brackets */
CHR('['), CHR(':'),
CHR('s'), CHR('p'), CHR('a'), CHR('c'), CHR('e'),
- CHR(':'), CHR(']'), CHR('\0')
+ CHR(':'), CHR(']')
};
static chr backw[] = { /* \w */
CHR('['), CHR('['), CHR(':'),
CHR('a'), CHR('l'), CHR('n'), CHR('u'), CHR('m'),
- CHR(':'), CHR(']'), CHR('_'), CHR(']'), CHR('\0')
+ CHR(':'), CHR(']'), CHR('_'), CHR(']')
};
static chr backW[] = { /* \W */
CHR('['), CHR('^'), CHR('['), CHR(':'),
CHR('a'), CHR('l'), CHR('n'), CHR('u'), CHR('m'),
- CHR(':'), CHR(']'), CHR('_'), CHR(']'), CHR('\0')
+ CHR(':'), CHR(']'), CHR('_'), CHR(']')
};
static chr brbackw[] = { /* \w within brackets */
CHR('['), CHR(':'),
CHR('a'), CHR('l'), CHR('n'), CHR('u'), CHR('m'),
- CHR(':'), CHR(']'), CHR('_'), CHR('\0')
+ CHR(':'), CHR(']'), CHR('_')
};
/*
@@ -253,7 +236,7 @@ static VOID
lexword(v)
struct vars *v;
{
- lexnest(v, backw);
+ lexnest(v, backw, ENDOF(backw));
}
/*
@@ -262,9 +245,9 @@ struct vars *v;
*/
static int /* 1 normal, 0 failure */
next(v)
-register struct vars *v;
+struct vars *v;
{
- register chr c;
+ chr c;
/* errors yield an infinite sequence of failures */
if (ISERR())
@@ -298,14 +281,17 @@ register struct vars *v;
case L_BRE:
case L_Q:
RET(EOS);
+ break;
case L_EBND:
case L_BBND:
FAILW(REG_EBRACE);
+ break;
case L_BRACK:
case L_CEL:
case L_ECL:
case L_CCL:
FAILW(REG_EBRACK);
+ break;
}
assert(NOTREACHED);
}
@@ -317,22 +303,26 @@ register struct vars *v;
switch (v->lexcon) {
case L_BRE: /* punt BREs to separate function */
return brenext(v, c);
+ break;
case L_ERE: /* see below */
break;
case L_Q: /* literal strings are easy */
RETV(PLAIN, c);
+ break;
case L_BBND: /* bounds are fairly simple */
case L_EBND:
switch (c) {
case CHR('0'): case CHR('1'): case CHR('2'): case CHR('3'):
case CHR('4'): case CHR('5'): case CHR('6'): case CHR('7'):
case CHR('8'): case CHR('9'):
- RETV(DIGIT, (chr) DIGITVAL(c));
+ RETV(DIGIT, (chr)DIGITVAL(c));
+ break;
case CHR(','):
RET(',');
+ break;
case CHR('}'): /* ERE bound ends with } */
- if (_IN(L_EBND)) {
- INTO(L_ERE);
+ if (INCON(L_EBND)) {
+ INTOCON(L_ERE);
if ((v->cflags&REG_ADVF) && NEXT1('?')) {
v->now++;
NOTE(REG_UNONPOSIX);
@@ -341,25 +331,32 @@ register struct vars *v;
RETV('}', 1);
} else
FAILW(REG_BADBR);
+ break;
case CHR('\\'): /* BRE bound ends with \} */
- if (_IN(L_BBND) && NEXT1('}')) {
+ if (INCON(L_BBND) && NEXT1('}')) {
v->now++;
- INTO(L_BRE);
+ INTOCON(L_BRE);
RET('}');
} else
FAILW(REG_BADBR);
+ break;
default:
FAILW(REG_BADBR);
+ break;
}
+ assert(NOTREACHED);
+ break;
case L_BRACK: /* brackets are not too hard */
switch (c) {
case CHR(']'):
if (LASTTYPE('['))
RETV(PLAIN, c);
else {
- INTO((v->cflags&REG_EXTENDED) ? L_ERE : L_BRE);
+ INTOCON((v->cflags&REG_EXTENDED) ?
+ L_ERE : L_BRE);
RET(']');
}
+ break;
case CHR('\\'):
NOTE(REG_UBBS);
if (!(v->cflags&REG_ADVF))
@@ -367,85 +364,109 @@ register struct vars *v;
NOTE(REG_UNONPOSIX);
if (ATEOS())
FAILW(REG_EESCAPE);
- (VOID) lexescape(v);
+ (DISCARD) lexescape(v);
switch (v->nexttype) { /* not all escapes okay here */
case PLAIN:
return 1;
+ break;
case CCLASS:
switch (v->nextvalue) {
- case 'd': lexnest(v, brbackd); break;
- case 's': lexnest(v, brbacks); break;
- case 'w': lexnest(v, brbackw); break;
+ case 'd':
+ lexnest(v, brbackd, ENDOF(brbackd));
+ break;
+ case 's':
+ lexnest(v, brbacks, ENDOF(brbacks));
+ break;
+ case 'w':
+ lexnest(v, brbackw, ENDOF(brbackw));
+ break;
default:
FAILW(REG_EESCAPE);
+ break;
}
/* lexnest done, back up and try again */
v->nexttype = v->lasttype;
return next(v);
+ break;
}
/* not one of the acceptable escapes */
FAILW(REG_EESCAPE);
+ break;
case CHR('-'):
if (LASTTYPE('[') || NEXT1(']'))
RETV(PLAIN, c);
else
RETV(RANGE, c);
+ break;
case CHR('['):
if (ATEOS())
FAILW(REG_EBRACK);
switch (*v->now++) {
case CHR('.'):
- INTO(L_CEL);
+ INTOCON(L_CEL);
/* might or might not be locale-specific */
RET(COLLEL);
+ break;
case CHR('='):
- INTO(L_ECL);
+ INTOCON(L_ECL);
NOTE(REG_ULOCALE);
RET(ECLASS);
+ break;
case CHR(':'):
- INTO(L_CCL);
+ INTOCON(L_CCL);
NOTE(REG_ULOCALE);
RET(CCLASS);
+ break;
default: /* oops */
v->now--;
RETV(PLAIN, c);
+ break;
}
+ assert(NOTREACHED);
+ break;
default:
RETV(PLAIN, c);
+ break;
}
+ assert(NOTREACHED);
+ break;
case L_CEL: /* collating elements are easy */
if (c == CHR('.') && NEXT1(']')) {
v->now++;
- INTO(L_BRACK);
+ INTOCON(L_BRACK);
RETV(END, '.');
} else
RETV(PLAIN, c);
+ break;
case L_ECL: /* ditto equivalence classes */
if (c == CHR('=') && NEXT1(']')) {
v->now++;
- INTO(L_BRACK);
+ INTOCON(L_BRACK);
RETV(END, '=');
} else
RETV(PLAIN, c);
+ break;
case L_CCL: /* ditto character classes */
if (c == CHR(':') && NEXT1(']')) {
v->now++;
- INTO(L_BRACK);
+ INTOCON(L_BRACK);
RETV(END, ':');
} else
RETV(PLAIN, c);
+ break;
default:
assert(NOTREACHED);
break;
}
/* that got rid of everything except EREs */
- assert(_IN(L_ERE));
+ assert(INCON(L_ERE));
/* deal with EREs, except for backslashes */
switch (c) {
case CHR('|'):
RET('|');
+ break;
case CHR('*'):
if ((v->cflags&REG_ADVF) && NEXT1('?')) {
v->now++;
@@ -453,6 +474,7 @@ register struct vars *v;
RETV('*', 0);
}
RETV('*', 1);
+ break;
case CHR('+'):
if ((v->cflags&REG_ADVF) && NEXT1('?')) {
v->now++;
@@ -460,6 +482,7 @@ register struct vars *v;
RETV('+', 0);
}
RETV('+', 1);
+ break;
case CHR('?'):
if ((v->cflags&REG_ADVF) && NEXT1('?')) {
v->now++;
@@ -467,18 +490,21 @@ register struct vars *v;
RETV('?', 0);
}
RETV('?', 1);
+ break;
case CHR('{'): /* bounds start or plain character */
if (v->cflags&REG_EXPANDED)
skip(v);
- if (ATEOS() || !iswdigit(*v->now)) {
+ if (ATEOS() || !iscdigit(*v->now)) {
NOTE(REG_UBRACES);
NOTE(REG_UUNSPEC);
RETV(PLAIN, c);
} else {
NOTE(REG_UBOUNDS);
- INTO(L_EBND);
+ INTOCON(L_EBND);
RET('{');
}
+ assert(NOTREACHED);
+ break;
case CHR('('): /* parenthesis, or advanced extension */
if ((v->cflags&REG_ADVF) && NEXT1('?')) {
NOTE(REG_UNONPOSIX);
@@ -486,6 +512,7 @@ register struct vars *v;
switch (*v->now++) {
case CHR(':'): /* non-capturing paren */
RETV('(', 0);
+ break;
case CHR('#'): /* comment */
while (!ATEOS() && *v->now != CHR(')'))
v->now++;
@@ -493,28 +520,37 @@ register struct vars *v;
v->now++;
assert(v->nexttype == v->lasttype);
return next(v);
+ break;
case CHR('='): /* positive lookahead */
NOTE(REG_ULOOKAHEAD);
RETV(LACON, 1);
+ break;
case CHR('!'): /* negative lookahead */
NOTE(REG_ULOOKAHEAD);
RETV(LACON, 0);
+ break;
case CHR('<'): /* prefer short */
RETV(PREFER, 0);
+ break;
case CHR('>'): /* prefer long */
RETV(PREFER, 1);
+ break;
default:
FAILW(REG_BADRPT);
+ break;
}
+ assert(NOTREACHED);
}
- if (v->cflags&REG_NOSUB) {
- RETV('(', 0); /* all parens non-capturing */
- }
- RETV('(', 1);
+ if (v->cflags&REG_NOSUB)
+ RETV('(', 0); /* all parens non-capturing */
+ else
+ RETV('(', 1);
+ break;
case CHR(')'):
if (LASTTYPE('('))
NOTE(REG_UUNSPEC);
RETV(')', c);
+ break;
case CHR('['): /* easy except for [[:<:]] and [[:>:]] */
if (HAVE(6) && *(v->now+0) == CHR('[') &&
*(v->now+1) == CHR(':') &&
@@ -528,49 +564,55 @@ register struct vars *v;
NOTE(REG_UNONPOSIX);
RET((c == CHR('<')) ? '<' : '>');
}
- INTO(L_BRACK);
+ INTOCON(L_BRACK);
if (NEXT1('^')) {
v->now++;
RETV('[', 0);
}
RETV('[', 1);
+ break;
case CHR('.'):
RET('.');
+ break;
case CHR('^'):
RET('^');
+ break;
case CHR('$'):
RET('$');
+ break;
case CHR('\\'): /* mostly punt backslashes to code below */
if (ATEOS())
FAILW(REG_EESCAPE);
break;
default: /* ordinary character */
RETV(PLAIN, c);
+ break;
}
/* ERE backslash handling; backslash already eaten */
assert(!ATEOS());
if (!(v->cflags&REG_ADVF)) { /* only AREs have non-trivial escapes */
- if (iswalnum(*v->now)) {
+ if (iscalnum(*v->now)) {
NOTE(REG_UBSALNUM);
NOTE(REG_UUNSPEC);
}
RETV(PLAIN, *v->now++);
}
- (VOID) lexescape(v);
+ (DISCARD) lexescape(v);
if (ISERR())
FAILW(REG_EESCAPE);
if (v->nexttype == CCLASS) { /* fudge at lexical level */
switch (v->nextvalue) {
- case 'd': lexnest(v, backd); break;
- case 'D': lexnest(v, backD); break;
- case 's': lexnest(v, backs); break;
- case 'S': lexnest(v, backS); break;
- case 'w': lexnest(v, backw); break;
- case 'W': lexnest(v, backW); break;
+ case 'd': lexnest(v, backd, ENDOF(backd)); break;
+ case 'D': lexnest(v, backD, ENDOF(backD)); break;
+ case 's': lexnest(v, backs, ENDOF(backs)); break;
+ case 'S': lexnest(v, backS, ENDOF(backS)); break;
+ case 'w': lexnest(v, backw, ENDOF(backw)); break;
+ case 'W': lexnest(v, backW, ENDOF(backW)); break;
default:
assert(NOTREACHED);
FAILW(REG_ASSERT);
+ break;
}
/* lexnest done, back up and try again */
v->nexttype = v->lasttype;
@@ -591,10 +633,10 @@ struct vars *v;
{
chr c;
static chr alert[] = {
- CHR('a'), CHR('l'), CHR('e'), CHR('r'), CHR('t'), CHR('\0')
+ CHR('a'), CHR('l'), CHR('e'), CHR('r'), CHR('t')
};
static chr esc[] = {
- CHR('E'), CHR('S'), CHR('C'), CHR('\0')
+ CHR('E'), CHR('S'), CHR('C')
};
chr *save;
@@ -602,79 +644,102 @@ struct vars *v;
assert(!ATEOS());
c = *v->now++;
- if (!iswalnum(c))
+ if (!iscalnum(c))
RETV(PLAIN, c);
NOTE(REG_UNONPOSIX);
switch (c) {
case CHR('a'):
- RETV(PLAIN, chrnamed(v, alert, CHR('\007')));
+ RETV(PLAIN, chrnamed(v, alert, ENDOF(alert), CHR('\007')));
+ break;
case CHR('A'):
RETV(SBEGIN, 0);
+ break;
case CHR('b'):
RETV(PLAIN, CHR('\b'));
+ break;
+ case CHR('B'):
+ RETV(PLAIN, CHR('\\'));
+ break;
case CHR('c'):
NOTE(REG_UUNPORT);
if (ATEOS())
FAILW(REG_EESCAPE);
- RETV(PLAIN, (chr) (*v->now++ & 037));
+ RETV(PLAIN, (chr)(*v->now++ & 037));
+ break;
case CHR('d'):
NOTE(REG_ULOCALE);
RETV(CCLASS, 'd');
+ break;
case CHR('D'):
NOTE(REG_ULOCALE);
RETV(CCLASS, 'D');
+ break;
case CHR('e'):
NOTE(REG_UUNPORT);
- RETV(PLAIN, chrnamed(v, esc, CHR('\033')));
- case CHR('E'):
- RETV(PLAIN, CHR('\\'));
+ RETV(PLAIN, chrnamed(v, esc, ENDOF(esc), CHR('\033')));
+ break;
case CHR('f'):
RETV(PLAIN, CHR('\f'));
+ break;
case CHR('n'):
RETV(PLAIN, CHR('\n'));
+ break;
case CHR('r'):
RETV(PLAIN, CHR('\r'));
+ break;
case CHR('s'):
NOTE(REG_ULOCALE);
RETV(CCLASS, 's');
+ break;
case CHR('S'):
NOTE(REG_ULOCALE);
RETV(CCLASS, 'S');
+ break;
case CHR('t'):
RETV(PLAIN, CHR('\t'));
+ break;
case CHR('u'):
c = lexdigits(v, 16, 4, 4);
if (ISERR())
FAILW(REG_EESCAPE);
RETV(PLAIN, c);
+ break;
case CHR('U'):
c = lexdigits(v, 16, 8, 8);
if (ISERR())
FAILW(REG_EESCAPE);
RETV(PLAIN, c);
+ break;
case CHR('v'):
RETV(PLAIN, CHR('\v'));
+ break;
case CHR('w'):
NOTE(REG_ULOCALE);
RETV(CCLASS, 'w');
+ break;
case CHR('W'):
NOTE(REG_ULOCALE);
RETV(CCLASS, 'W');
+ break;
case CHR('x'):
NOTE(REG_UUNPORT);
c = lexdigits(v, 16, 1, 255); /* REs >255 long outside spec */
if (ISERR())
FAILW(REG_EESCAPE);
RETV(PLAIN, c);
+ break;
case CHR('y'):
NOTE(REG_ULOCALE);
RETV(WBDRY, 0);
+ break;
case CHR('Y'):
NOTE(REG_ULOCALE);
RETV(NWBDRY, 0);
+ break;
case CHR('Z'):
RETV(SEND, 0);
+ break;
case CHR('1'): case CHR('2'): case CHR('3'): case CHR('4'):
case CHR('5'): case CHR('6'): case CHR('7'): case CHR('8'):
case CHR('9'):
@@ -686,7 +751,7 @@ struct vars *v;
/* ugly heuristic (first test is "exactly 1 digit?") */
if (v->now - save == 0 || (int)c <= v->nsubexp) {
NOTE(REG_UBACKREF);
- RETV(BACKREF, (chr) c);
+ RETV(BACKREF, (chr)c);
}
/* oops, doesn't look like it's a backref after all... */
v->now = save;
@@ -698,10 +763,13 @@ struct vars *v;
if (ISERR())
FAILW(REG_EESCAPE);
RETV(PLAIN, c);
+ break;
default:
- assert(iswalpha(c));
+ assert(iscalpha(c));
FAILW(REG_EESCAPE); /* unknown alphabetic escape */
+ break;
}
+ assert(NOTREACHED);
}
/*
@@ -715,7 +783,7 @@ int base;
int minlen;
int maxlen;
{
- uchr n; /* unsigned to aVOID overflow misbehavior */
+ uchr n; /* unsigned to avoid overflow misbehavior */
int len;
chr c;
int d;
@@ -764,16 +832,17 @@ int maxlen;
*/
static int /* 1 normal, 0 failure */
brenext(v, pc)
-register struct vars *v;
-register pchr pc;
+struct vars *v;
+pchr pc;
{
- register chr c = (chr) pc;
+ chr c = (chr)pc;
switch (c) {
case CHR('*'):
if (LASTTYPE(EMPTY) || LASTTYPE('(') || LASTTYPE('^'))
RETV(PLAIN, c);
RET('*');
+ break;
case CHR('['):
if (HAVE(6) && *(v->now+0) == CHR('[') &&
*(v->now+1) == CHR(':') &&
@@ -787,14 +856,16 @@ register pchr pc;
NOTE(REG_UNONPOSIX);
RET((c == CHR('<')) ? '<' : '>');
}
- INTO(L_BRACK);
+ INTOCON(L_BRACK);
if (NEXT1('^')) {
v->now++;
RETV('[', 0);
}
RETV('[', 1);
+ break;
case CHR('.'):
RET('.');
+ break;
case CHR('^'):
if (LASTTYPE(EMPTY))
RET('^');
@@ -803,6 +874,7 @@ register pchr pc;
RET('^');
}
RETV(PLAIN, c);
+ break;
case CHR('$'):
if (v->cflags&REG_EXPANDED)
skip(v);
@@ -813,10 +885,12 @@ register pchr pc;
RET('$');
}
RETV(PLAIN, c);
+ break;
case CHR('\\'):
break; /* see below */
default:
RETV(PLAIN, c);
+ break;
}
assert(c == CHR('\\'));
@@ -827,31 +901,40 @@ register pchr pc;
c = *v->now++;
switch (c) {
case CHR('{'):
- INTO(L_BBND);
+ INTOCON(L_BBND);
NOTE(REG_UBOUNDS);
RET('{');
+ break;
case CHR('('):
RETV('(', 1);
+ break;
case CHR(')'):
RETV(')', c);
+ break;
case CHR('<'):
NOTE(REG_UNONPOSIX);
RET('<');
+ break;
case CHR('>'):
NOTE(REG_UNONPOSIX);
RET('>');
+ break;
case CHR('1'): case CHR('2'): case CHR('3'): case CHR('4'):
case CHR('5'): case CHR('6'): case CHR('7'): case CHR('8'):
case CHR('9'):
NOTE(REG_UBACKREF);
- RETV(BACKREF, (chr) DIGITVAL(c));
+ RETV(BACKREF, (chr)DIGITVAL(c));
+ break;
default:
- if (iswalnum(c)) {
+ if (iscalnum(c)) {
NOTE(REG_UBSALNUM);
NOTE(REG_UUNSPEC);
}
RETV(PLAIN, c);
+ break;
}
+
+ assert(NOTREACHED);
}
/*
@@ -867,14 +950,14 @@ struct vars *v;
assert(v->cflags&REG_EXPANDED);
for (;;) {
- while (!ATEOS() && iswspace(*v->now))
+ while (!ATEOS() && iscspace(*v->now))
v->now++;
if (ATEOS() || *v->now != CHR('#'))
break; /* NOTE BREAK OUT */
assert(NEXT1('#'));
while (!ATEOS() && *v->now != CHR('\n'))
v->now++;
- /* leave the newline to be picked up by the iswspace loop */
+ /* leave the newline to be picked up by the iscspace loop */
}
if (v->now != start)
@@ -884,7 +967,7 @@ struct vars *v;
/*
- newline - return the chr for a newline
* This helps confine use of CHR to this source file.
- ^ static chr newline(VOID);
+ ^ static chr newline(NOPARMS);
*/
static chr
newline()
@@ -895,7 +978,7 @@ newline()
/*
- ch - return the chr sequence for locale.c's fake collating element ch
* This helps confine use of CHR to this source file.
- ^ static chr *ch(VOID);
+ ^ static chr *ch(NOPARMS);
*/
static chr *
ch()
@@ -909,12 +992,13 @@ ch()
- chrnamed - return the chr known by a given (chr string) name
* The code is a bit clumsy, but this routine gets only such specialized
* use that it hardly matters.
- ^ static chr chrnamed(struct vars *, chr *, pchr);
+ ^ static chr chrnamed(struct vars *, chr *, chr *, pchr);
*/
static chr
-chrnamed(v, name, lastresort)
+chrnamed(v, startp, endp, lastresort)
struct vars *v;
-chr *name;
+chr *startp; /* start of name */
+chr *endp; /* just past end of name */
pchr lastresort; /* what to return if name lookup fails */
{
celt c;
@@ -924,15 +1008,15 @@ pchr lastresort; /* what to return if name lookup fails */
errsave = v->err;
v->err = 0;
- c = element(v, name, name+wcslen(name));
+ c = element(v, startp, endp);
e = v->err;
v->err = errsave;
if (e != 0)
- return (chr) lastresort;
+ return (chr)lastresort;
cv = range(v, c, c, 0);
if (cv->nchrs == 0)
- return (chr) lastresort;
+ return (chr)lastresort;
return cv->chrs[0];
}
diff --git a/generic/regc_locale.c b/generic/regc_locale.c
new file mode 100644
index 0000000..769241f
--- /dev/null
+++ b/generic/regc_locale.c
@@ -0,0 +1,426 @@
+/*
+ * locale-specific stuff, including MCCE handling
+ * This file is #included by regcomp.c.
+ *
+ * No MCCEs for Tcl. The handling of character names and classes is
+ * still ASCII-centric, and needs to be extended to handle full Unicode.
+ */
+
+/* ASCII character-name table */
+static struct cname {
+ char *name;
+ char code;
+} cnames[] = {
+ {"NUL", '\0'},
+ {"SOH", '\001'},
+ {"STX", '\002'},
+ {"ETX", '\003'},
+ {"EOT", '\004'},
+ {"ENQ", '\005'},
+ {"ACK", '\006'},
+ {"BEL", '\007'},
+ {"alert", '\007'},
+ {"BS", '\010'},
+ {"backspace", '\b'},
+ {"HT", '\011'},
+ {"tab", '\t'},
+ {"LF", '\012'},
+ {"newline", '\n'},
+ {"VT", '\013'},
+ {"vertical-tab", '\v'},
+ {"FF", '\014'},
+ {"form-feed", '\f'},
+ {"CR", '\015'},
+ {"carriage-return", '\r'},
+ {"SO", '\016'},
+ {"SI", '\017'},
+ {"DLE", '\020'},
+ {"DC1", '\021'},
+ {"DC2", '\022'},
+ {"DC3", '\023'},
+ {"DC4", '\024'},
+ {"NAK", '\025'},
+ {"SYN", '\026'},
+ {"ETB", '\027'},
+ {"CAN", '\030'},
+ {"EM", '\031'},
+ {"SUB", '\032'},
+ {"ESC", '\033'},
+ {"IS4", '\034'},
+ {"FS", '\034'},
+ {"IS3", '\035'},
+ {"GS", '\035'},
+ {"IS2", '\036'},
+ {"RS", '\036'},
+ {"IS1", '\037'},
+ {"US", '\037'},
+ {"space", ' '},
+ {"exclamation-mark", '!'},
+ {"quotation-mark", '"'},
+ {"number-sign", '#'},
+ {"dollar-sign", '$'},
+ {"percent-sign", '%'},
+ {"ampersand", '&'},
+ {"apostrophe", '\''},
+ {"left-parenthesis", '('},
+ {"right-parenthesis", ')'},
+ {"asterisk", '*'},
+ {"plus-sign", '+'},
+ {"comma", ','},
+ {"hyphen", '-'},
+ {"hyphen-minus", '-'},
+ {"period", '.'},
+ {"full-stop", '.'},
+ {"slash", '/'},
+ {"solidus", '/'},
+ {"zero", '0'},
+ {"one", '1'},
+ {"two", '2'},
+ {"three", '3'},
+ {"four", '4'},
+ {"five", '5'},
+ {"six", '6'},
+ {"seven", '7'},
+ {"eight", '8'},
+ {"nine", '9'},
+ {"colon", ':'},
+ {"semicolon", ';'},
+ {"less-than-sign", '<'},
+ {"equals-sign", '='},
+ {"greater-than-sign", '>'},
+ {"question-mark", '?'},
+ {"commercial-at", '@'},
+ {"left-square-bracket", '['},
+ {"backslash", '\\'},
+ {"reverse-solidus", '\\'},
+ {"right-square-bracket", ']'},
+ {"circumflex", '^'},
+ {"circumflex-accent", '^'},
+ {"underscore", '_'},
+ {"low-line", '_'},
+ {"grave-accent", '`'},
+ {"left-brace", '{'},
+ {"left-curly-bracket", '{'},
+ {"vertical-line", '|'},
+ {"right-brace", '}'},
+ {"right-curly-bracket", '}'},
+ {"tilde", '~'},
+ {"DEL", '\177'},
+ {NULL, 0}
+};
+
+/* ASCII character-class table */
+static struct cclass {
+ char *name;
+ char *chars;
+ int hasch;
+} cclasses[] = {
+ {"alnum", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
+0123456789", 1},
+ {"alpha", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz",
+ 1},
+ {"blank", " \t", 0},
+ {"cntrl", "\007\b\t\n\v\f\r\1\2\3\4\5\6\16\17\20\21\22\23\24\
+\25\26\27\30\31\32\33\34\35\36\37\177", 0},
+ {"digit", "0123456789", 0},
+ {"graph", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
+0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
+ 1},
+ {"lower", "abcdefghijklmnopqrstuvwxyz",
+ 1},
+ {"print", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
+0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~ ",
+ 1},
+ {"punct", "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
+ 0},
+ {"space", "\t\n\v\f\r ", 0},
+ {"upper", "ABCDEFGHIJKLMNOPQRSTUVWXYZ",
+ 0},
+ {"xdigit", "0123456789ABCDEFabcdef",
+ 0},
+ {NULL, 0, 0}
+};
+
+#define CH NOCELT
+
+/*
+ - nmcces - how many distinct MCCEs are there?
+ ^ static int nmcces(struct vars *);
+ */
+static int
+nmcces(v)
+struct vars *v;
+{
+ return 0;
+}
+
+/*
+ - nleaders - how many chrs can be first chrs of MCCEs?
+ ^ static int nleaders(struct vars *);
+ */
+static int
+nleaders(v)
+struct vars *v;
+{
+ return 0;
+}
+
+/*
+ - allmcces - return a cvec with all the MCCEs of the locale
+ ^ static struct cvec *allmcces(struct vars *, struct cvec *);
+ */
+static struct cvec *
+allmcces(v, cv)
+struct vars *v;
+struct cvec *cv; /* this is supposed to have enough room */
+{
+ return clearcvec(cv);
+}
+
+/*
+ - element - map collating-element name to celt
+ ^ static celt element(struct vars *, chr *, chr *);
+ */
+static celt
+element(v, startp, endp)
+struct vars *v;
+chr *startp; /* points to start of name */
+chr *endp; /* points just past end of name */
+{
+ struct cname *cn;
+ size_t len;
+ Tcl_DString ds;
+ char *np;
+
+ /* generic: one-chr names stand for themselves */
+ assert(startp < endp);
+ len = endp - startp;
+ if (len == 1)
+ return *startp;
+
+ NOTE(REG_ULOCALE);
+
+ /* search table */
+ Tcl_DStringInit(&ds);
+ np = TclUniCharToUtfDString(startp, (int)len, &ds);
+ for (cn = cnames; cn->name != NULL; cn++)
+ if (strlen(cn->name) == len && strncmp(cn->name, np, len) == 0)
+ break; /* NOTE BREAK OUT */
+ Tcl_DStringFree(&ds);
+ if (cn->name != NULL)
+ return CHR(cn->code);
+
+ /* couldn't find it */
+ ERR(REG_ECOLLATE);
+ return 0;
+}
+
+/*
+ - range - supply cvec for a range, including legality check
+ ^ static struct cvec *range(struct vars *, celt, celt, int);
+ */
+static struct cvec *
+range(v, a, b, cases)
+struct vars *v;
+celt a;
+celt b; /* might equal a */
+int cases; /* case-independent? */
+{
+ int nchrs;
+ struct cvec *cv;
+ celt c, lc, uc, tc;
+
+ if (a != b && !before(a, b)) {
+ ERR(REG_ERANGE);
+ return NULL;
+ }
+
+ nchrs = b - a + 1;
+ if (cases)
+ nchrs *= 2;
+ cv = getcvec(v, nchrs, 0);
+ NOERRN();
+
+ for (c = a; c <= b; c++) {
+ addchr(cv, c);
+ if (cases) {
+ lc = Tcl_UniCharToLower((chr)c);
+ uc = Tcl_UniCharToUpper((chr)c);
+ tc = Tcl_UniCharToTitle((chr)c);
+ if (c != lc) {
+ addchr(cv, lc);
+ }
+ if (c != uc) {
+ addchr(cv, uc);
+ }
+ if (c != tc && tc != uc) {
+ addchr(cv, tc);
+ }
+ }
+ }
+
+ return cv;
+}
+
+/*
+ - before - is celt x before celt y, for purposes of range legality?
+ ^ static int before(celt, celt);
+ */
+static int /* predicate */
+before(x, y)
+celt x;
+celt y;
+{
+ /* trivial because no MCCEs */
+ if (x < y)
+ return 1;
+ return 0;
+}
+
+/*
+ - eclass - supply cvec for an equivalence class
+ * Must include case counterparts on request.
+ ^ static struct cvec *eclass(struct vars *, celt, int);
+ */
+static struct cvec *
+eclass(v, c, cases)
+struct vars *v;
+celt c;
+int cases; /* all cases? */
+{
+ struct cvec *cv;
+
+ /* crude fake equivalence class for testing */
+ if ((v->cflags&REG_FAKEEC) && c == 'x') {
+ cv = getcvec(v, 4, 0);
+ addchr(cv, (chr)'x');
+ addchr(cv, (chr)'y');
+ if (cases) {
+ addchr(cv, (chr)'X');
+ addchr(cv, (chr)'Y');
+ }
+ return cv;
+ }
+
+ /* otherwise, none */
+ if (cases)
+ return allcases(v, c);
+ cv = getcvec(v, 1, 0);
+ assert(cv != NULL);
+ addchr(cv, (chr)c);
+ return cv;
+}
+
+/*
+ - cclass - supply cvec for a character class
+ * Must include case counterparts on request.
+ ^ static struct cvec *cclass(struct vars *, chr *, chr *, int);
+ */
+static struct cvec *
+cclass(v, startp, endp, cases)
+struct vars *v;
+chr *startp; /* where the name starts */
+chr *endp; /* just past the end of the name */
+int cases; /* case-independent? */
+{
+ size_t len;
+ char *p;
+ struct cclass *cc;
+ struct cvec *cv;
+ Tcl_DString ds;
+ char *np;
+
+ /* find the name */
+ len = endp - startp;
+ Tcl_DStringInit(&ds);
+ np = TclUniCharToUtfDString(startp, (int)len, &ds);
+ if (cases && len == 5 && (strncmp("lower", np, 5) == 0 ||
+ strncmp("upper", np, 5) == 0))
+ np = "alpha";
+ for (cc = cclasses; cc->name != NULL; cc++)
+ if (strlen(cc->name) == len && strncmp(cc->name, np, len) == 0)
+ break; /* NOTE BREAK OUT */
+ Tcl_DStringFree(&ds);
+ if (cc->name == NULL) {
+ ERR(REG_ECTYPE);
+ return NULL;
+ }
+
+ /* set up vector */
+ cv = getcvec(v, (int)strlen(cc->chars), 0);
+ if (cv == NULL) {
+ ERR(REG_ESPACE);
+ return NULL;
+ }
+
+ /* fill it in */
+ for (p = cc->chars; *p != '\0'; p++)
+ addchr(cv, (chr)*p);
+
+ return cv;
+}
+
+/*
+ - allcases - supply cvec for all case counterparts of a chr (including itself)
+ * This is a shortcut, preferably an efficient one, for simple characters;
+ * messy cases are done via range().
+ ^ static struct cvec *allcases(struct vars *, pchr);
+ */
+static struct cvec *
+allcases(v, pc)
+struct vars *v;
+pchr pc;
+{
+ struct cvec *cv = getcvec(v, 2, 0);
+ chr c = (chr)pc;
+
+ assert(cv != NULL);
+ addchr(cv, c);
+ if (TclUniCharIsUpper(c))
+ addchr(cv, Tcl_UniCharToLower(c));
+ else if (TclUniCharIsLower(c))
+ addchr(cv, Tcl_UniCharToUpper(c));
+
+ return cv;
+}
+
+/*
+ - cmp - chr-substring compare
+ * Backrefs need this. It should preferably be efficient.
+ * Note that it does not need to report anything except equal/unequal.
+ * Note also that the length is exact, and the comparison should not
+ * stop at embedded NULs!
+ ^ static int cmp(CONST chr *, CONST chr *, size_t);
+ */
+static int /* 0 for equal, nonzero for unequal */
+cmp(x, y, len)
+CONST chr *x;
+CONST chr *y;
+size_t len; /* exact length of comparison */
+{
+ return memcmp(VS(x), VS(y), len*sizeof(chr));
+}
+
+/*
+ - casecmp - case-independent chr-substring compare
+ * REG_ICASE backrefs need this. It should preferably be efficient.
+ * Note that it does not need to report anything except equal/unequal.
+ * Note also that the length is exact, and the comparison should not
+ * stop at embedded NULs!
+ ^ static int casecmp(CONST chr *, CONST chr *, size_t);
+ */
+static int /* 0 for equal, nonzero for unequal */
+casecmp(x, y, len)
+CONST chr *x;
+CONST chr *y;
+size_t len; /* exact length of comparison */
+{
+ size_t i;
+ CONST chr *xp;
+ CONST chr *yp;
+
+ for (xp = x, yp = y, i = len; i > 0; i--)
+ if (Tcl_UniCharToLower(*xp++) != Tcl_UniCharToLower(*yp++))
+ return 1;
+ return 0;
+}
diff --git a/generic/nfa.c b/generic/regc_nfa.c
index f6b8967..14ee077 100644
--- a/generic/nfa.c
+++ b/generic/regc_nfa.c
@@ -1,57 +1,29 @@
/*
- * nfa.c --
+ * NFA utilities.
+ * This file is #included by regcomp.c.
*
- * Regexp package file:
- * NFA utilities. One or two things that technically ought to be
- * in here are actually in color.c, thanks to some incestuous
- * relationships in the color chains.
- *
- * Copyright (c) 1998 Henry Spencer. All rights reserved.
- *
- * Development of this software was funded, in part, by Cray Research Inc.,
- * UUNET Communications Services Inc., and Sun Microsystems Inc., none of
- * whom are responsible for the results. The author thanks all of them.
- *
- * Redistribution and use in source and binary forms -- with or without
- * modification -- are permitted for any purpose, provided that
- * redistributions in source form retain this entire copyright notice and
- * indicate the origin and nature of any modifications.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
- * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
- * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
- * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
- * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
- * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
- * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
- * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * Copyright (c) 1998 by Sun Microsystems, Inc.
- *
- * See the file "license.terms" for information on usage and redistribution
- * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
- *
- * RCS: @(#) $Id: nfa.c,v 1.1.2.2 1998/10/03 01:56:41 stanton Exp $
+ * One or two things that technically ought to be in here
+ * are actually in color.c, thanks to some incestuous relationships in
+ * the color chains.
*/
#define NISERR() VISERR(nfa->v)
+#define NERR(e) VERR(nfa->v, (e))
/*
- newnfa - set up an NFA
- * Caution: colormap must be set up already.
- ^ static struct nfa *newnfa(struct vars *, struct nfa *);
+ ^ static struct nfa *newnfa(struct vars *, struct colormap *, struct nfa *);
*/
static struct nfa * /* the NFA, or NULL */
-newnfa(v, parent)
+newnfa(v, cm, parent)
struct vars *v;
+struct colormap *cm;
struct nfa *parent; /* NULL if primary NFA */
{
struct nfa *nfa;
- nfa = (struct nfa *)ckalloc(sizeof(struct nfa));
+ nfa = (struct nfa *)MALLOC(sizeof(struct nfa));
if (nfa == NULL)
return NULL;
@@ -59,6 +31,7 @@ struct nfa *parent; /* NULL if primary NFA */
nfa->slast = NULL;
nfa->free = NULL;
nfa->nstates = 0;
+ nfa->cm = cm;
nfa->v = v;
nfa->bos[0] = nfa->bos[1] = COLORLESS;
nfa->eos[0] = nfa->eos[1] = COLORLESS;
@@ -72,10 +45,10 @@ struct nfa *parent; /* NULL if primary NFA */
freenfa(nfa);
return NULL;
}
- rainbow(nfa, nfa->v->cm, PLAIN, COLORLESS, nfa->pre, nfa->init);
+ rainbow(nfa, nfa->cm, PLAIN, COLORLESS, nfa->pre, nfa->init);
newarc(nfa, '^', 1, nfa->pre, nfa->init);
newarc(nfa, '^', 0, nfa->pre, nfa->init);
- rainbow(nfa, nfa->v->cm, PLAIN, COLORLESS, nfa->final, nfa->post);
+ rainbow(nfa, nfa->cm, PLAIN, COLORLESS, nfa->final, nfa->post);
newarc(nfa, '$', 1, nfa->final, nfa->post);
newarc(nfa, '$', 0, nfa->final, nfa->post);
@@ -109,7 +82,7 @@ struct nfa *nfa;
nfa->nstates = -1;
nfa->pre = NULL;
nfa->post = NULL;
- ckfree((char *)nfa);
+ FREE(nfa);
}
/*
@@ -128,14 +101,11 @@ int flag;
s = nfa->free;
nfa->free = s->next;
} else {
- s = (struct state *)ckalloc(sizeof(struct state));
+ s = (struct state *)MALLOC(sizeof(struct state));
if (s == NULL) {
- VERR(nfa->v, REG_ESPACE);
+ NERR(REG_ESPACE);
return NULL;
}
-
- /* memleak (CCS). */
-
s->oas.next = NULL;
s->free = &s->oas.a[0];
for (i = 0; i < ABSIZE; i++) {
@@ -240,12 +210,12 @@ struct state *s;
assert(s->no == FREESTATE);
for (ab = s->oas.next; ab != NULL; ab = abnext) {
abnext = ab->next;
- ckfree((char *)ab);
+ FREE(ab);
}
s->ins = NULL;
s->outs = NULL;
s->next = NULL;
- ckfree((char *)s);
+ FREE(s);
}
/*
@@ -276,7 +246,7 @@ struct state *to;
assert(a != NULL);
a->type = t;
- a->co = (color) co;
+ a->co = (color)co;
a->to = to;
a->from = from;
@@ -295,7 +265,7 @@ struct state *to;
to->nins++;
if (COLORED(a) && nfa->parent == NULL)
- colorchain(nfa->v->cm, a);
+ colorchain(nfa->cm, a);
return;
}
@@ -315,9 +285,9 @@ struct state *s;
/* if none at hand, get more */
if (s->free == NULL) {
- new = (struct arcbatch *)ckalloc(sizeof(struct arcbatch));
+ new = (struct arcbatch *)MALLOC(sizeof(struct arcbatch));
if (new == NULL) {
- VERR(nfa->v, REG_ESPACE);
+ NERR(REG_ESPACE);
return NULL;
}
new->next = s->oas.next;
@@ -354,7 +324,7 @@ struct arc *victim;
/* take it off color chain if necessary */
if (COLORED(victim) && nfa->parent == NULL)
- uncolorchain(nfa->v->cm, victim);
+ uncolorchain(nfa->cm, victim);
/* take it off source's out-chain */
assert(from != NULL);
@@ -680,10 +650,10 @@ struct nfa *nfa;
{
/* false colors for BOS, BOL, EOS, EOL */
if (nfa->parent == NULL) {
- nfa->bos[0] = pseudocolor(nfa->v->cm);
- nfa->bos[1] = pseudocolor(nfa->v->cm);
- nfa->eos[0] = pseudocolor(nfa->v->cm);
- nfa->eos[1] = pseudocolor(nfa->v->cm);
+ nfa->bos[0] = pseudocolor(nfa->cm);
+ nfa->bos[1] = pseudocolor(nfa->cm);
+ nfa->eos[0] = pseudocolor(nfa->cm);
+ nfa->eos[1] = pseudocolor(nfa->cm);
} else {
assert(nfa->parent->bos[0] != COLORLESS);
nfa->bos[0] = nfa->parent->bos[0];
@@ -698,42 +668,41 @@ struct nfa *nfa;
/*
- optimize - optimize an NFA
- ^ static VOID optimize(struct nfa *);
+ ^ static int optimize(struct nfa *, FILE *);
*/
-static VOID
-optimize(nfa)
+static int /* re_info bits */
+optimize(nfa, f)
struct nfa *nfa;
+FILE *f; /* for debug output; NULL none */
{
- int verbose = (nfa->v->cflags&REG_PROGRESS) ? 1 : 0;
- int info;
+ int verbose = (f != NULL) ? 1 : 0;
if (verbose)
- printf("\ninitial cleanup:\n");
+ fprintf(f, "\ninitial cleanup:\n");
cleanup(nfa); /* may simplify situation */
- if (nfa->v->cflags&REG_PROGRESS)
- dumpnfa(nfa, stdout);
if (verbose)
- printf("\nempties:\n");
- fixempties(nfa); /* get rid of EMPTY arcs */
+ dumpnfa(nfa, f);
+ if (verbose)
+ fprintf(f, "\nempties:\n");
+ fixempties(nfa, f); /* get rid of EMPTY arcs */
if (verbose)
- printf("\nconstraints:\n");
- pullback(nfa); /* pull back constraints backward */
- pushfwd(nfa); /* push fwd constraints forward */
+ fprintf(f, "\nconstraints:\n");
+ pullback(nfa, f); /* pull back constraints backward */
+ pushfwd(nfa, f); /* push fwd constraints forward */
if (verbose)
- printf("\nfinal cleanup:\n");
+ fprintf(f, "\nfinal cleanup:\n");
cleanup(nfa); /* final tidying */
- info = analyze(nfa->v, nfa); /* and analysis */
- if (nfa->parent == NULL)
- nfa->v->re->re_info |= info;
+ return analyze(nfa); /* and analysis */
}
/*
- pullback - pull back constraints backward to (with luck) eliminate them
- ^ static VOID pullback(struct nfa *);
+ ^ static VOID pullback(struct nfa *, FILE *);
*/
static VOID
-pullback(nfa)
+pullback(nfa, f)
struct nfa *nfa;
+FILE *f; /* for debug output; NULL none */
{
struct state *s;
struct state *nexts;
@@ -754,8 +723,8 @@ struct nfa *nfa;
assert(nexta == NULL || s->no != FREESTATE);
}
}
- if (progress && (nfa->v->cflags&REG_PROGRESS))
- dumpnfa(nfa, stdout);
+ if (progress && f != NULL)
+ dumpnfa(nfa, f);
} while (progress && !NISERR());
if (NISERR())
return;
@@ -799,7 +768,7 @@ struct arc *con;
return 1;
}
- /* first, clone from state if necessary to aVOID other outarcs */
+ /* first, clone from state if necessary to avoid other outarcs */
if (from->nouts > 1) {
s = newstate(nfa);
if (NISERR())
@@ -846,11 +815,12 @@ struct arc *con;
/*
- pushfwd - push forward constraints forward to (with luck) eliminate them
- ^ static VOID pushfwd(struct nfa *);
+ ^ static VOID pushfwd(struct nfa *, FILE *);
*/
static VOID
-pushfwd(nfa)
+pushfwd(nfa, f)
struct nfa *nfa;
+FILE *f; /* for debug output; NULL none */
{
struct state *s;
struct state *nexts;
@@ -871,8 +841,8 @@ struct nfa *nfa;
assert(nexta == NULL || s->no != FREESTATE);
}
}
- if (progress && (nfa->v->cflags&REG_PROGRESS))
- dumpnfa(nfa, stdout);
+ if (progress && f != NULL)
+ dumpnfa(nfa, f);
} while (progress && !NISERR());
if (NISERR())
return;
@@ -916,7 +886,7 @@ struct arc *con;
return 1;
}
- /* first, clone to state if necessary to aVOID other inarcs */
+ /* first, clone to state if necessary to avoid other inarcs */
if (to->nins > 1) {
s = newstate(nfa);
if (NISERR())
@@ -978,11 +948,13 @@ struct arc *a;
case CA('^', PLAIN): /* newlines are handled separately */
case CA('$', PLAIN):
return INCOMPATIBLE;
+ break;
case CA(AHEAD, PLAIN): /* color constraints meet colors */
case CA(BEHIND, PLAIN):
if (con->co == a->co)
return SATISFIED;
return INCOMPATIBLE;
+ break;
case CA('^', '^'): /* collision, similar constraints */
case CA('$', '$'):
case CA(AHEAD, AHEAD):
@@ -990,11 +962,13 @@ struct arc *a;
if (con->co == a->co) /* true duplication */
return SATISFIED;
return INCOMPATIBLE;
+ break;
case CA('^', BEHIND): /* collision, dissimilar constraints */
case CA(BEHIND, '^'):
case CA('$', AHEAD):
case CA(AHEAD, '$'):
return INCOMPATIBLE;
+ break;
case CA('^', '$'): /* constraints passing each other */
case CA('^', AHEAD):
case CA(BEHIND, '$'):
@@ -1008,18 +982,20 @@ struct arc *a;
case CA('$', LACON):
case CA(AHEAD, LACON):
return COMPATIBLE;
+ break;
}
assert(NOTREACHED);
- return INCOMPATIBLE; /* keep compiler from complaining */
+ return INCOMPATIBLE; /* for benefit of blind compilers */
}
/*
- fixempties - get rid of EMPTY arcs
- ^ static VOID fixempties(struct nfa *);
+ ^ static VOID fixempties(struct nfa *, FILE *);
*/
static VOID
-fixempties(nfa)
+fixempties(nfa, f)
struct nfa *nfa;
+FILE *f; /* for debug output; NULL none */
{
struct state *s;
struct state *nexts;
@@ -1039,8 +1015,8 @@ struct nfa *nfa;
assert(nexta == NULL || s->no != FREESTATE);
}
}
- if (progress && (nfa->v->cflags&REG_PROGRESS))
- dumpnfa(nfa, stdout);
+ if (progress && f != NULL)
+ dumpnfa(nfa, f);
} while (progress && !NISERR());
}
@@ -1176,11 +1152,10 @@ struct state *mark; /* the value to mark with */
/*
- analyze - ascertain potentially-useful facts about an optimized NFA
- ^ static int analyze(struct vars *, struct nfa *);
+ ^ static int analyze(struct nfa *);
*/
static int /* re_info bits to be ORed in */
-analyze(v, nfa)
-struct vars *v;
+analyze(nfa)
struct nfa *nfa;
{
struct arc *a;
@@ -1219,11 +1194,10 @@ struct state *end;
/*
- compact - compact an NFA
- ^ static VOID compact(struct vars *, struct nfa *, struct cnfa *);
+ ^ static VOID compact(struct nfa *, struct cnfa *);
*/
static VOID
-compact(v, nfa, cnfa)
-struct vars *v;
+compact(nfa, cnfa)
struct nfa *nfa;
struct cnfa *cnfa;
{
@@ -1234,7 +1208,7 @@ struct cnfa *cnfa;
struct carc *ca;
struct carc *first;
- assert (!ISERR());
+ assert (!NISERR());
nstates = 0;
narcs = 0;
@@ -1243,14 +1217,14 @@ struct cnfa *cnfa;
narcs += s->nouts + 1;
}
- cnfa->states = (struct carc **)ckalloc(nstates * sizeof(struct carc *));
- cnfa->arcs = (struct carc *)ckalloc(narcs * sizeof(struct carc));
+ cnfa->states = (struct carc **)MALLOC(nstates * sizeof(struct carc *));
+ cnfa->arcs = (struct carc *)MALLOC(narcs * sizeof(struct carc));
if (cnfa->states == NULL || cnfa->arcs == NULL) {
if (cnfa->states != NULL)
- ckfree((char *)cnfa->states);
+ FREE(cnfa->states);
if (cnfa->arcs != NULL)
- ckfree((char *)cnfa->arcs);
- ERR(REG_ESPACE);
+ FREE(cnfa->arcs);
+ NERR(REG_ESPACE);
return;
}
cnfa->nstates = nstates;
@@ -1260,13 +1234,12 @@ struct cnfa *cnfa;
cnfa->bos[1] = nfa->bos[1];
cnfa->eos[0] = nfa->eos[0];
cnfa->eos[1] = nfa->eos[1];
- cnfa->ncolors = maxcolor(v->cm) + 1;
- cnfa->haslacons = 0;
- cnfa->leftanch = 1; /* tentatively */
+ cnfa->ncolors = maxcolor(nfa->cm) + 1;
+ cnfa->flags = LEFTANCH; /* tentatively */
ca = cnfa->arcs;
for (s = nfa->states; s != NULL; s = s->next) {
- assert((size_t) s->no < nstates);
+ assert((size_t)s->no < nstates);
cnfa->states[s->no] = ca;
first = ca;
for (a = s->outs; a != NULL; a = a->outchain)
@@ -1278,10 +1251,10 @@ struct cnfa *cnfa;
break;
case LACON:
assert(s->no != cnfa->pre);
- ca->co = (color) (a->co + cnfa->ncolors);
+ ca->co = (color)(cnfa->ncolors + a->co);
ca->to = a->to->no;
ca++;
- cnfa->haslacons = 1;
+ cnfa->flags |= HASLACONS;
break;
default:
assert(NOTREACHED);
@@ -1297,9 +1270,9 @@ struct cnfa *cnfa;
for (a = nfa->pre->outs; a != NULL; a = a->outchain)
if (a->type == PLAIN && a->co != nfa->bos[0] &&
- a->co != nfa->bos[1])
- cnfa->leftanch = 0;
- }
+ a->co != nfa->bos[1])
+ cnfa->flags &= ~LEFTANCH;
+}
/*
- carcsort - sort compacted-NFA arcs by color
@@ -1341,11 +1314,12 @@ int dynalloc; /* is the cnfa struct itself dynamic? */
{
assert(cnfa->nstates != 0); /* not empty already */
cnfa->nstates = 0;
- ckfree((char *)cnfa->states);
- ckfree((char *)cnfa->arcs);
+ FREE(cnfa->states);
+ FREE(cnfa->arcs);
if (dynalloc)
- ckfree((char *)cnfa);
+ FREE(cnfa);
}
+
/*
- dumpnfa - dump an NFA in human-readable form
^ static VOID dumpnfa(struct nfa *, FILE *);
@@ -1355,7 +1329,159 @@ dumpnfa(nfa, f)
struct nfa *nfa;
FILE *f;
{
+#ifdef REG_DEBUG
+ struct state *s;
+
+ fprintf(f, "pre %d, post %d", nfa->pre->no, nfa->post->no);
+ if (nfa->bos[0] != COLORLESS)
+ fprintf(f, ", bos [%ld]", (long)nfa->bos[0]);
+ if (nfa->bos[1] != COLORLESS)
+ fprintf(f, ", bol [%ld]", (long)nfa->bos[1]);
+ if (nfa->eos[0] != COLORLESS)
+ fprintf(f, ", eos [%ld]", (long)nfa->eos[0]);
+ if (nfa->eos[1] != COLORLESS)
+ fprintf(f, ", eol [%ld]", (long)nfa->eos[1]);
+ fprintf(f, "\n");
+ for (s = nfa->states; s != NULL; s = s->next)
+ dumpstate(s, f);
+ if (nfa->parent == NULL)
+ dumpcolors(nfa->cm, f);
+ fflush(f);
+#endif
}
+
+#ifdef REG_DEBUG /* subordinates of dumpnfa */
+
+/*
+ - dumpstate - dump an NFA state in human-readable form
+ ^ static VOID dumpstate(struct state *, FILE *);
+ */
+static VOID
+dumpstate(s, f)
+struct state *s;
+FILE *f;
+{
+ struct arc *a;
+
+ fprintf(f, "%d%s%c", s->no, (s->tmp != NULL) ? "T" : "",
+ (s->flag) ? s->flag : '.');
+ if (s->prev != NULL && s->prev->next != s)
+ fprintf(f, "\tstate chain bad\n");
+ if (s->nouts == 0)
+ fprintf(f, "\tno out arcs\n");
+ else
+ dumparcs(s, f);
+ fflush(f);
+ for (a = s->ins; a != NULL; a = a->inchain) {
+ if (a->to != s)
+ fprintf(f, "\tlink from %d to %d on %d's in-chain\n",
+ a->from->no, a->to->no, s->no);
+ }
+}
+
+/*
+ - dumparcs - dump out-arcs in human-readable form
+ ^ static VOID dumparcs(struct state *, FILE *);
+ */
+static VOID
+dumparcs(s, f)
+struct state *s;
+FILE *f;
+{
+ int pos;
+
+ assert(s->nouts > 0);
+ /* printing arcs in reverse order is usually clearer */
+ pos = dumprarcs(s->outs, s, f, 1);
+ if (pos != 1)
+ fprintf(f, "\n");
+}
+
+/*
+ - dumprarcs - dump remaining outarcs, recursively, in reverse order
+ ^ static int dumprarcs(struct arc *, struct state *, FILE *, int);
+ */
+static int /* resulting print position */
+dumprarcs(a, s, f, pos)
+struct arc *a;
+struct state *s;
+FILE *f;
+int pos; /* initial print position */
+{
+ if (a->outchain != NULL)
+ pos = dumprarcs(a->outchain, s, f, pos);
+ dumparc(a, s, f);
+ if (pos == 5) {
+ fprintf(f, "\n");
+ pos = 1;
+ } else
+ pos++;
+ return pos;
+}
+
+/*
+ - dumparc - dump one outarc in readable form, including prefixing tab
+ ^ static VOID dumparc(struct arc *, struct state *, FILE *);
+ */
+static VOID
+dumparc(a, s, f)
+struct arc *a;
+struct state *s;
+FILE *f;
+{
+ struct arc *aa;
+ struct arcbatch *ab;
+
+ fprintf(f, "\t");
+ switch (a->type) {
+ case PLAIN:
+ fprintf(f, "[%ld]", (long)a->co);
+ break;
+ case AHEAD:
+ fprintf(f, ">%ld>", (long)a->co);
+ break;
+ case BEHIND:
+ fprintf(f, "<%ld<", (long)a->co);
+ break;
+ case LACON:
+ fprintf(f, ":%ld:", (long)a->co);
+ break;
+ case '^':
+ case '$':
+ fprintf(f, "%c%d", a->type, (int)a->co);
+ break;
+ case EMPTY:
+ break;
+ default:
+ fprintf(f, "0x%x/0%lo", a->type, (long)a->co);
+ break;
+ }
+ if (a->from != s)
+ fprintf(f, "?%d?", a->from->no);
+ for (ab = &a->from->oas; ab != NULL; ab = ab->next) {
+ for (aa = &ab->a[0]; aa < &ab->a[ABSIZE]; aa++)
+ if (aa == a)
+ break; /* NOTE BREAK OUT */
+ if (aa < &ab->a[ABSIZE]) /* propagate break */
+ break; /* NOTE BREAK OUT */
+ }
+ if (ab == NULL)
+ fprintf(f, "?!?"); /* not in allocated space */
+ fprintf(f, "->");
+ if (a->to == NULL) {
+ fprintf(f, "NULL");
+ return;
+ }
+ fprintf(f, "%d", a->to->no);
+ for (aa = a->to->ins; aa != NULL; aa = aa->inchain)
+ if (aa == a)
+ break; /* NOTE BREAK OUT */
+ if (aa == NULL)
+ fprintf(f, "?!?"); /* missing from in-chain */
+}
+
+#endif /* ifdef REG_DEBUG */
+
/*
- dumpcnfa - dump a compacted NFA in human-readable form
^ static VOID dumpcnfa(struct cnfa *, FILE *);
@@ -1365,4 +1491,62 @@ dumpcnfa(cnfa, f)
struct cnfa *cnfa;
FILE *f;
{
+#ifdef REG_DEBUG
+ int st;
+
+ fprintf(f, "pre %d, post %d", cnfa->pre, cnfa->post);
+ if (cnfa->bos[0] != COLORLESS)
+ fprintf(f, ", bos [%ld]", (long)cnfa->bos[0]);
+ if (cnfa->bos[1] != COLORLESS)
+ fprintf(f, ", bol [%ld]", (long)cnfa->bos[1]);
+ if (cnfa->eos[0] != COLORLESS)
+ fprintf(f, ", eos [%ld]", (long)cnfa->eos[0]);
+ if (cnfa->eos[1] != COLORLESS)
+ fprintf(f, ", eol [%ld]", (long)cnfa->eos[1]);
+ if (cnfa->flags&HASLACONS)
+ fprintf(f, ", haslacons");
+ if (cnfa->flags&LEFTANCH)
+ fprintf(f, ", leftanch");
+ fprintf(f, "\n");
+ for (st = 0; st < cnfa->nstates; st++)
+ dumpcstate(st, cnfa->states[st], cnfa, f);
+ fflush(f);
+#endif
}
+
+#ifdef REG_DEBUG /* subordinates of dumpcnfa */
+
+/*
+ - dumpcstate - dump a compacted-NFA state in human-readable form
+ ^ static VOID dumpcstate(int, struct carc *, struct cnfa *, FILE *);
+ */
+static VOID
+dumpcstate(st, ca, cnfa, f)
+int st;
+struct carc *ca;
+struct cnfa *cnfa;
+FILE *f;
+{
+ int i;
+ int pos;
+
+ fprintf(f, "%d.", st);
+ pos = 1;
+ for (i = 0; ca[i].co != COLORLESS; i++) {
+ if (ca[i].co < cnfa->ncolors)
+ fprintf(f, "\t[%ld]->%d", (long)ca[i].co, ca[i].to);
+ else
+ fprintf(f, "\t:%ld:->%d", (long)ca[i].co-cnfa->ncolors,
+ ca[i].to);
+ if (pos == 5) {
+ fprintf(f, "\n");
+ pos = 1;
+ } else
+ pos++;
+ }
+ if (i == 0 || pos != 1)
+ fprintf(f, "\n");
+ fflush(f);
+}
+
+#endif /* ifdef REG_DEBUG */
diff --git a/generic/compile.c b/generic/regcomp.c
index ee12d04..2a13172 100644
--- a/generic/compile.c
+++ b/generic/regcomp.c
@@ -1,51 +1,16 @@
/*
- * compile.c --
- *
- * Regexp package file: re_*comp and friends - compile REs
- *
- * Copyright (c) 1998 Henry Spencer. All rights reserved.
- *
- * Development of this software was funded, in part, by Cray Research Inc.,
- * UUNET Communications Services Inc., and Sun Microsystems Inc., none of
- * whom are responsible for the results. The author thanks all of them.
- *
- * Redistribution and use in source and binary forms -- with or without
- * modification -- are permitted for any purpose, provided that
- * redistributions in source form retain this entire copyright notice and
- * indicate the origin and nature of any modifications.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
- * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
- * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
- * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
- * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
- * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
- * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
- * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * Copyright (c) 1998 by Sun Microsystems, Inc.
- *
- * See the file "license.terms" for information on usage and redistribution
- * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
- *
- * RCS: @(#) $Id: compile.c,v 1.1.2.2 1998/10/03 01:56:40 stanton Exp $
+ * re_*comp and friends - compile REs
+ * This file #includes several others (see the bottom).
*/
-#include "tclInt.h"
-#include <assert.h>
-#include "tclPort.h"
-#include "tclRegexp.h"
-#include "chr.h"
-#include "guts.h"
+#include "regguts.h"
/*
* forward declarations, up here so forward datatypes etc. are defined early
*/
/* =====^!^===== begin forwards =====^!^===== */
/* automatically gathered by fwd; do not hand-edit */
-/* === compile.c === */
+/* === regcomp.c === */
int compile _ANSI_ARGS_((regex_t *, CONST chr *, size_t, int));
static VOID moresubs _ANSI_ARGS_((struct vars *, int));
static int freev _ANSI_ARGS_((struct vars *, int));
@@ -63,53 +28,34 @@ static color nlcolor _ANSI_ARGS_((struct vars *));
static VOID wordchrs _ANSI_ARGS_((struct vars *));
static struct subre subre _ANSI_ARGS_((struct state *, struct state *, int, int, struct rtree *));
static struct rtree *newrt _ANSI_ARGS_((struct vars *));
-static VOID freert _ANSI_ARGS_((struct rtree *));
-static VOID freertnode _ANSI_ARGS_((struct rtree *));
+static VOID freert _ANSI_ARGS_((struct vars *, struct rtree *));
+static VOID freertnode _ANSI_ARGS_((struct vars *, struct rtree *));
static VOID optrt _ANSI_ARGS_((struct vars *, struct rtree *));
static int numrt _ANSI_ARGS_((struct rtree *, int));
-static VOID nfatree _ANSI_ARGS_((struct vars *, struct rtree *));
-static VOID nfanode _ANSI_ARGS_((struct vars *, struct subre *));
+static VOID markrt _ANSI_ARGS_((struct rtree *));
+static VOID cleanrt _ANSI_ARGS_((struct vars *));
+static VOID nfatree _ANSI_ARGS_((struct vars *, struct rtree *, FILE *));
+static VOID nfanode _ANSI_ARGS_((struct vars *, struct subre *, FILE *));
static int newlacon _ANSI_ARGS_((struct vars *, struct state *, struct state *, int));
static VOID freelacons _ANSI_ARGS_((struct subre *, int));
static VOID rfree _ANSI_ARGS_((regex_t *));
static VOID dump _ANSI_ARGS_((regex_t *, FILE *));
static VOID dumprt _ANSI_ARGS_((struct rtree *, FILE *, int));
static VOID rtdump _ANSI_ARGS_((struct rtree *, FILE *, int, int));
-/* === lex.c === */
+/* === regc_lex.c === */
static VOID lexstart _ANSI_ARGS_((struct vars *));
static VOID prefixes _ANSI_ARGS_((struct vars *));
-static VOID lexnest _ANSI_ARGS_((struct vars *, chr *));
+static VOID lexnest _ANSI_ARGS_((struct vars *, chr *, chr *));
static VOID lexword _ANSI_ARGS_((struct vars *));
static int next _ANSI_ARGS_((struct vars *));
static int lexescape _ANSI_ARGS_((struct vars *));
static chr lexdigits _ANSI_ARGS_((struct vars *, int, int, int));
static int brenext _ANSI_ARGS_((struct vars *, pchr));
static VOID skip _ANSI_ARGS_((struct vars *));
-static chr newline _ANSI_ARGS_((VOID));
-static chr *ch _ANSI_ARGS_((VOID));
-static chr chrnamed _ANSI_ARGS_((struct vars *, chr *, pchr));
-/* === locale.c === */
-#define MAXCE 2 /* longest CE code is prepared to handle */
-typedef wint_t celt; /* type holding distinct codes for all chrs, all CEs */
-static int nces _ANSI_ARGS_((struct vars *));
-static int nleaders _ANSI_ARGS_((struct vars *));
-static struct cvec *allces _ANSI_ARGS_((struct vars *, struct cvec *));
-static celt element _ANSI_ARGS_((struct vars *, chr *, chr *));
-static struct cvec *range _ANSI_ARGS_((struct vars *, celt, celt, int));
-static int before _ANSI_ARGS_((celt, celt));
-static struct cvec *eclass _ANSI_ARGS_((struct vars *, celt, int));
-static struct cvec *cclass _ANSI_ARGS_((struct vars *, chr *, chr *, int));
-static struct cvec *allcases _ANSI_ARGS_((struct vars *, pchr));
-static int sncmp _ANSI_ARGS_((CONST chr *, CONST chr *, size_t));
-static struct cvec *newcvec _ANSI_ARGS_((int, int));
-static struct cvec *clearcvec _ANSI_ARGS_((struct cvec *));
-static VOID addchr _ANSI_ARGS_((struct cvec *, pchr));
-static VOID addce _ANSI_ARGS_((struct cvec *, chr *));
-static int haschr _ANSI_ARGS_((struct cvec *, pchr));
-static struct cvec *getcvec _ANSI_ARGS_((struct vars *, int, int));
-static VOID freecvec _ANSI_ARGS_((struct cvec *));
-/* === color.c === */
-union tree;
+static chr newline _ANSI_ARGS_((NOPARMS));
+static chr *ch _ANSI_ARGS_((NOPARMS));
+static chr chrnamed _ANSI_ARGS_((struct vars *, chr *, chr *, pchr));
+/* === regc_color.c === */
static struct colormap *newcm _ANSI_ARGS_((struct vars *));
static VOID freecm _ANSI_ARGS_((struct colormap *));
static VOID cmtreefree _ANSI_ARGS_((struct colormap *, union tree *, int));
@@ -127,8 +73,11 @@ static VOID uncolorchain _ANSI_ARGS_((struct colormap *, struct arc *));
static int singleton _ANSI_ARGS_((struct colormap *, pchr c));
static VOID rainbow _ANSI_ARGS_((struct nfa *, struct colormap *, int, pcolor, struct state *, struct state *));
static VOID colorcomplement _ANSI_ARGS_((struct nfa *, struct colormap *, int, struct state *, struct state *, struct state *));
-/* === nfa.c === */
-static struct nfa *newnfa _ANSI_ARGS_((struct vars *, struct nfa *));
+static VOID dumpcolors _ANSI_ARGS_((struct colormap *, FILE *));
+static VOID fillcheck _ANSI_ARGS_((struct colormap *, union tree *, int, FILE *));
+static VOID dumpchr _ANSI_ARGS_((pchr, FILE *));
+/* === regc_nfa.c === */
+static struct nfa *newnfa _ANSI_ARGS_((struct vars *, struct colormap *, struct nfa *));
static VOID freenfa _ANSI_ARGS_((struct nfa *));
static struct state *newfstate _ANSI_ARGS_((struct nfa *, int flag));
static struct state *newstate _ANSI_ARGS_((struct nfa *));
@@ -151,27 +100,52 @@ static VOID dupnfa _ANSI_ARGS_((struct nfa *, struct state *, struct state *, st
static VOID duptraverse _ANSI_ARGS_((struct nfa *, struct state *, struct state *));
static VOID cleartraverse _ANSI_ARGS_((struct nfa *, struct state *));
static VOID specialcolors _ANSI_ARGS_((struct nfa *));
-static VOID optimize _ANSI_ARGS_((struct nfa *));
-static VOID pullback _ANSI_ARGS_((struct nfa *));
+static int optimize _ANSI_ARGS_((struct nfa *, FILE *));
+static VOID pullback _ANSI_ARGS_((struct nfa *, FILE *));
static int pull _ANSI_ARGS_((struct nfa *, struct arc *));
-static VOID pushfwd _ANSI_ARGS_((struct nfa *));
+static VOID pushfwd _ANSI_ARGS_((struct nfa *, FILE *));
static int push _ANSI_ARGS_((struct nfa *, struct arc *));
#define INCOMPATIBLE 1 /* destroys arc */
#define SATISFIED 2 /* constraint satisfied */
#define COMPATIBLE 3 /* compatible but not satisfied yet */
static int combine _ANSI_ARGS_((struct arc *, struct arc *));
-static VOID fixempties _ANSI_ARGS_((struct nfa *));
+static VOID fixempties _ANSI_ARGS_((struct nfa *, FILE *));
static int unempty _ANSI_ARGS_((struct nfa *, struct arc *));
static VOID cleanup _ANSI_ARGS_((struct nfa *));
static VOID markreachable _ANSI_ARGS_((struct nfa *, struct state *, struct state *, struct state *));
static VOID markcanreach _ANSI_ARGS_((struct nfa *, struct state *, struct state *, struct state *));
-static int analyze _ANSI_ARGS_((struct vars *, struct nfa *));
+static int analyze _ANSI_ARGS_((struct nfa *));
static int isempty _ANSI_ARGS_((struct state *, struct state *));
-static VOID compact _ANSI_ARGS_((struct vars *, struct nfa *, struct cnfa *));
+static VOID compact _ANSI_ARGS_((struct nfa *, struct cnfa *));
static VOID carcsort _ANSI_ARGS_((struct carc *, struct carc *));
static VOID freecnfa _ANSI_ARGS_((struct cnfa *, int));
static VOID dumpnfa _ANSI_ARGS_((struct nfa *, FILE *));
+static VOID dumpstate _ANSI_ARGS_((struct state *, FILE *));
+static VOID dumparcs _ANSI_ARGS_((struct state *, FILE *));
+static int dumprarcs _ANSI_ARGS_((struct arc *, struct state *, FILE *, int));
+static VOID dumparc _ANSI_ARGS_((struct arc *, struct state *, FILE *));
static VOID dumpcnfa _ANSI_ARGS_((struct cnfa *, FILE *));
+static VOID dumpcstate _ANSI_ARGS_((int, struct carc *, struct cnfa *, FILE *));
+/* === regc_cvec.c === */
+static struct cvec *newcvec _ANSI_ARGS_((int, int));
+static struct cvec *clearcvec _ANSI_ARGS_((struct cvec *));
+static VOID addchr _ANSI_ARGS_((struct cvec *, pchr));
+static VOID addmcce _ANSI_ARGS_((struct cvec *, chr *, chr *));
+static int haschr _ANSI_ARGS_((struct cvec *, pchr));
+static struct cvec *getcvec _ANSI_ARGS_((struct vars *, int, int));
+static VOID freecvec _ANSI_ARGS_((struct cvec *));
+/* === regc_locale.c === */
+static int nmcces _ANSI_ARGS_((struct vars *));
+static int nleaders _ANSI_ARGS_((struct vars *));
+static struct cvec *allmcces _ANSI_ARGS_((struct vars *, struct cvec *));
+static celt element _ANSI_ARGS_((struct vars *, chr *, chr *));
+static struct cvec *range _ANSI_ARGS_((struct vars *, celt, celt, int));
+static int before _ANSI_ARGS_((celt, celt));
+static struct cvec *eclass _ANSI_ARGS_((struct vars *, celt, int));
+static struct cvec *cclass _ANSI_ARGS_((struct vars *, chr *, chr *, int));
+static struct cvec *allcases _ANSI_ARGS_((struct vars *, pchr));
+static int cmp _ANSI_ARGS_((CONST chr *, CONST chr *, size_t));
+static int casecmp _ANSI_ARGS_((CONST chr *, CONST chr *, size_t));
/* automatically gathered by fwd; do not hand-edit */
/* =====^!^===== end forwards =====^!^===== */
@@ -199,12 +173,14 @@ struct vars {
color nlcolor; /* color of newline */
struct state *wordchrs; /* state in nfa holding word-char outarcs */
struct rtree *tree; /* subexpression tree */
+ struct rtree *treechain; /* all tree nodes allocated */
+ struct rtree *treefree; /* any free tree nodes */
int ntree; /* number of tree nodes */
struct cvec *cv; /* utility cvec */
- struct cvec *ces; /* collating-element information */
-# define ISCELEADER(v,c) (v->ces != NULL && haschr(v->ces, (c)))
- struct state *cepbegin; /* state in nfa, start of CE prototypes */
- struct state *cepend; /* state in nfa, end of CE prototypes */
+ struct cvec *mcces; /* collating-element information */
+# define ISCELEADER(v,c) (v->mcces != NULL && haschr(v->mcces, (c)))
+ struct state *mccepbegin; /* in nfa, start of MCCE prototypes */
+ struct state *mccepend; /* in nfa, end of MCCE prototypes */
struct subre *lacons; /* lookahead-constraint vector */
int nlacons; /* size of lacons */
int usedshorter; /* used short-preferring quantifiers */
@@ -220,7 +196,7 @@ struct vars {
((vv)->err = (e)))
#define ERR(e) VERR(v, e) /* record an error */
#define NOERR() {if (ISERR()) return;} /* if error seen, return */
-#define NOERRN() {if (ISERR()) goto end;} /* NOERR with retval */
+#define NOERRN() {if (ISERR()) return NULL;} /* NOERR with retval */
#define INSIST(c, e) ((c) ? 0 : ERR(e)) /* if condition false, error */
#define NOTE(b) (v->re->re_info |= (b)) /* note visible condition */
#define EMPTYARC(x, y) newarc(v->nfa, EMPTY, 0, x, y)
@@ -259,22 +235,6 @@ static struct fns functions = {
/*
- - regfree - free an RE (actually, just overall coordination)
- */
-VOID
-regfree(re)
-regex_t *re;
-{
- if (re == NULL || re->re_magic != REMAGIC)
- return; /* no way we can report it, really */
-
- /* free it, calling internal routine that knows details */
- (*((struct fns *)re->re_fns)->free)(re);
-
- re->re_magic = 0;
-}
-
-/*
- compile - compile regular expression
^ int compile(regex_t *, CONST chr *, size_t, int);
*/
@@ -289,25 +249,20 @@ int flags;
struct vars *v = &var;
struct guts *g;
int i;
+ size_t j;
+ FILE *debug = (flags&REG_PROGRESS) ? stdout : (FILE *)NULL;
# define CNOERR() { if (ISERR()) return freev(v, v->err); }
- if (re == NULL) {
- return REG_INVARG;
- }
-
- /*
- * Init re to known state, because we will try to free it if
- * compilation fails.
- */
-
- re->re_magic = 0;
-
/* sanity checks */
- if (string == NULL ||
- ((flags&REG_EXTENDED) && (flags&REG_QUOTE)) ||
- (!(flags&REG_EXTENDED) && (flags&REG_ADVF))) {
- return REG_INVARG;
- }
+
+ if (re == NULL || string == NULL)
+ return REG_INVARG;
+ assert(REG_ADVANCED == (REG_EXTENDED|REG_ADVF));
+ if ((flags&REG_QUOTE) &&
+ (flags&(REG_ADVANCED|REG_EXPANDED|REG_NEWLINE)))
+ return REG_INVARG;
+ if (!(flags&REG_EXTENDED) && (flags&REG_ADVF))
+ return REG_INVARG;
/* initial setup (after which freev() is callable) */
v->re = re;
@@ -319,27 +274,31 @@ int flags;
v->nsubexp = 0;
v->subs = v->sub10;
v->nsubs = 10;
- for (i = 0; (size_t) i < v->nsubs; i++)
- v->subs[i] = NULL;
+ for (j = 0; j < v->nsubs; j++)
+ v->subs[j] = NULL;
v->nfa = NULL;
v->cm = NULL;
v->nlcolor = COLORLESS;
v->wordchrs = NULL;
v->tree = NULL;
+ v->treechain = NULL;
+ v->treefree = NULL;
v->cv = NULL;
- v->ces = NULL;
+ v->mcces = NULL;
v->lacons = NULL;
v->nlacons = 0;
+ re->re_magic = REMAGIC;
re->re_info = 0; /* bits get set during parse */
+ re->re_csize = sizeof(chr);
re->re_guts = NULL;
- re->re_fns = NULL;
+ re->re_fns = VS(&functions);
/* more complex setup, malloced things */
- v->cm = newcm(v); /* colormap must precede nfa... */
+ v->cm = newcm(v);
CNOERR();
- v->nfa = newnfa(v, (struct nfa *)NULL); /* ...newnfa() uses it */
+ v->nfa = newnfa(v, v->cm, (struct nfa *)NULL);
CNOERR();
- re->re_guts = ckalloc(sizeof(struct guts));
+ re->re_guts = VS(MALLOC(sizeof(struct guts)));
if (re->re_guts == NULL)
return freev(v, REG_ESPACE);
g = (struct guts *)re->re_guts;
@@ -351,19 +310,17 @@ int flags;
v->cv = newcvec(100, 10);
if (v->cv == NULL)
return freev(v, REG_ESPACE);
- i = nces(v);
+ i = nmcces(v);
if (i > 0) {
- v->ces = newcvec(nleaders(v), i);
+ v->mcces = newcvec(nleaders(v), i);
CNOERR();
- v->ces = allces(v, v->ces);
- leaders(v, v->ces);
+ v->mcces = allmcces(v, v->mcces);
+ leaders(v, v->mcces);
}
CNOERR();
/* parsing */
lexstart(v); /* also handles prefixes */
- if (SEE(EOS)) /* empty RE is illegal */
- return freev(v, REG_EMPTY);
v->tree = parse(v, EOS, PLAIN, v->nfa->init, v->nfa->final, NONEYET);
assert(SEE(EOS)); /* even if error; ISERR() => SEE(EOS) */
CNOERR();
@@ -371,38 +328,40 @@ int flags;
/* finish setup of nfa and its subre tree */
specialcolors(v->nfa);
CNOERR();
- if (flags&REG_PROGRESS) {
- dumpnfa(v->nfa, stdout);
- dumprt(v->tree, stdout, 1);
+ if (debug != NULL) {
+ dumpnfa(v->nfa, debug);
+ dumprt(v->tree, debug, 1);
}
v->usedshorter = 0;
optrt(v, v->tree);
- if (v->tree != NULL)
+ if (v->tree != NULL) {
v->ntree = numrt(v->tree, 1);
- else
+ markrt(v->tree);
+ } else
v->ntree = 0;
- if (flags&REG_PROGRESS) {
- printf("-->\n");
- dumprt(v->tree, stdout, 1);
+ cleanrt(v);
+ if (debug != NULL) {
+ fprintf(debug, "-->\n");
+ dumprt(v->tree, debug, 1);
}
/* build compacted NFAs for tree, lacons, main nfa */
- nfatree(v, v->tree);
- if (flags&REG_PROGRESS) {
- printf("---->\n");
- dumprt(v->tree, stdout, 1);
+ nfatree(v, v->tree, debug);
+ if (debug != NULL) {
+ fprintf(debug, "---->\n");
+ dumprt(v->tree, debug, 1);
}
CNOERR();
assert(v->nlacons == 0 || v->lacons != NULL);
for (i = 1; i < v->nlacons; i++)
- nfanode(v, &v->lacons[i]);
+ nfanode(v, &v->lacons[i], debug);
CNOERR();
- optimize(v->nfa); /* removes unreachable states */
+ re->re_info |= optimize(v->nfa, debug);
CNOERR();
if (v->nfa->post->nins <= 0)
return freev(v, REG_IMPOSS); /* end unreachable! */
assert(v->nfa->pre->nouts > 0);
- compact(v, v->nfa, &g->cnfa);
+ compact(v->nfa, &g->cnfa);
CNOERR();
freenfa(v->nfa);
v->nfa = NULL;
@@ -412,13 +371,8 @@ int flags;
CNOERR();
/* looks okay, package it up */
- re->re_magic = REMAGIC;
re->re_nsub = v->nsubexp;
- /* re_info is already set */
- re->re_csize = sizeof(chr);
- re->re_guts = (VOID *)g;
- re->re_fns = (VOID *)&functions;
- v->re = NULL;
+ v->re = NULL; /* freev no longer frees re */
g->magic = GUTSMAGIC;
g->cflags = v->cflags;
g->info = re->re_info;
@@ -428,7 +382,7 @@ int flags;
g->tree = v->tree;
v->tree = NULL;
g->ntree = v->ntree;
- g->compare = (v->cflags&REG_ICASE) ? sncmp : wcsncmp;
+ g->compare = (v->cflags&REG_ICASE) ? casecmp : cmp;
g->lacons = v->lacons;
v->lacons = NULL;
g->nlacons = v->nlacons;
@@ -453,16 +407,15 @@ int wanted; /* want enough room for this one */
struct subre **p;
size_t n;
- assert((size_t)wanted >= v->nsubs);
+ assert(wanted > 0 && (size_t)wanted >= v->nsubs);
n = (size_t)wanted * 3 / 2 + 1;
if (v->subs == v->sub10) {
- p = (struct subre **)ckalloc(n * sizeof(struct subre *));
+ p = (struct subre **)MALLOC(n * sizeof(struct subre *));
if (p != NULL)
- memcpy((VOID *)p, (VOID *)v->subs,
+ memcpy(VS(p), VS(v->subs),
v->nsubs * sizeof(struct subre *));
} else
- p = (struct subre **) ckrealloc((VOID *)v->subs,
- n * sizeof(struct subre *));
+ p = REALLOC(v->subs, n * sizeof(struct subre *));
if (p == NULL) {
ERR(REG_ESPACE);
return;
@@ -476,8 +429,8 @@ int wanted; /* want enough room for this one */
/*
- freev - free vars struct's substructures where necessary
- * Does optional error-number setting, and returns error code, to make
- * error code terser.
+ * Optionally does error-number setting, and always returns error code
+ * (if any), to make error-handling code terser.
^ static int freev(struct vars *, int);
*/
static int
@@ -488,20 +441,22 @@ int err;
if (v->re != NULL)
rfree(v->re);
if (v->subs != v->sub10)
- ckfree((char *)v->subs);
+ FREE(v->subs);
if (v->nfa != NULL)
freenfa(v->nfa);
if (v->cm != NULL)
freecm(v->cm);
if (v->tree != NULL)
- freert(v->tree);
+ freert(v, v->tree);
+ if (v->treechain != NULL)
+ cleanrt(v);
if (v->cv != NULL)
freecvec(v->cv);
- if (v->ces != NULL)
- freecvec(v->ces);
+ if (v->mcces != NULL)
+ freecvec(v->mcces);
if (v->lacons != NULL)
freelacons(v->lacons, v->nlacons);
- ERR(err);
+ ERR(err); /* nop if err==0 */
return v->err;
}
@@ -510,6 +465,9 @@ int err;
- parse - parse an RE
* Arguably this is too big and too complex and ought to be divided up.
* However, the code is somewhat intertwined...
+ *
+ * Note that it is no longer necessary to be rigorous about freeing tree
+ * nodes on error exits, as the tree machinery keeps track of them.
^ static struct rtree *parse(struct vars *, int, int, struct state *,
^ struct state *, int);
*/
@@ -531,7 +489,6 @@ int pprefer; /* parent's short/long preference */
# define ARCV(t, val) newarc(v->nfa, t, val, lp, rp)
int m, n;
int emptybranch; /* is there anything in this branch yet? */
- color co;
struct rtree *branches; /* top level */
struct rtree *branch; /* current branch */
struct subre *now; /* current subtree's top */
@@ -545,11 +502,10 @@ int pprefer; /* parent's short/long preference */
assert(stopper == ')' || stopper == EOS);
- branch = NULL; /* lint. */
- rt1 = NULL; /* lint. */
-
capture = 0;
branches = newrt(v);
+ branch = branches;
+ rt1 = NULL; /* shut up lint */
firstbranch = 1;
NOERRN();
do {
@@ -557,27 +513,17 @@ int pprefer; /* parent's short/long preference */
emptybranch = 1; /* tentatively */
left = newstate(v->nfa);
right = newstate(v->nfa);
- if (!firstbranch)
+ NOERRN();
+ if (!firstbranch) {
rt1 = newrt(v);
-#if 1
- if (ISERR()) {
- freert(rt1);
- freert(branches); /* mem leak (CCS). */
- return NULL;
+ NOERRN();
+ branch->next = rt1;
+ branch = rt1;
}
-#else
- NOERRN();
-#endif
EMPTYARC(init, left);
EMPTYARC(right, final);
lp = left;
rp = right;
- if (firstbranch)
- branch = branches;
- else {
- branch->next = rt1;
- branch = rt1;
- }
branch->op = '|';
now = &branch->left;
*now = subre(left, right, NONEYET, 0, (struct rtree *)NULL);
@@ -609,7 +555,7 @@ int pprefer; /* parent's short/long preference */
sub.subno = v->nsubexp;
if ((size_t)sub.subno >= v->nsubs)
moresubs(v, sub.subno);
- assert((size_t) sub.subno < v->nsubs);
+ assert((size_t)sub.subno < v->nsubs);
} else
sub.subno = 0;
NEXT();
@@ -661,7 +607,7 @@ int pprefer; /* parent's short/long preference */
assert(SEE(')') || ISERR());
NEXT();
m = newlacon(v, s, s2, m);
- freert(rt1);
+ freert(v, rt1);
NOERRN();
ARCV(LACON, m);
constraint = 1;
@@ -696,10 +642,10 @@ int pprefer; /* parent's short/long preference */
NEXT();
break;
case '.':
- co = (color) ((v->cflags&REG_NLSTOP)
- ? nlcolor(v)
- : COLORLESS);
- rainbow(v->nfa, v->cm, PLAIN, co, lp, rp);
+ rainbow(v->nfa, v->cm, PLAIN,
+ (v->cflags&REG_NLSTOP) ?
+ nlcolor(v) : COLORLESS,
+ lp, rp);
NEXT();
break;
case '^':
@@ -804,13 +750,19 @@ int pprefer; /* parent's short/long preference */
constraint = 1;
break;
case ')': /* unbalanced paren */
+#ifdef POSIX_MISTAKE
if (!(v->cflags&REG_EXTENDED) ||
(v->cflags&REG_ADVF)) {
- ERR(REG_EPAREN);
- goto end;
+ ERR(REG_EPAREN);
+ return NULL;
}
NOTE(REG_UPBOTCH);
/* fallthrough into case PLAIN */
+#else
+ ERR(REG_EPAREN);
+ return NULL;
+ break;
+#endif
case PLAIN:
onechr(v, v->nextvalue, lp, rp);
okcolors(v->nfa, v->cm);
@@ -822,10 +774,12 @@ int pprefer; /* parent's short/long preference */
case '?':
case '{':
ERR(REG_BADRPT);
- goto end;
+ return NULL;
+ break;
default:
ERR(REG_ASSERT);
- goto end;
+ return NULL;
+ break;
}
/* ...possibly followed by a quantifier */
@@ -858,13 +812,13 @@ int pprefer; /* parent's short/long preference */
n = INFINITY;
if (m > n) {
ERR(REG_BADBR);
- goto end;
+ return NULL;
}
} else
n = m;
if (!SEE('}')) { /* gets errors too */
ERR(REG_BADBR);
- goto end;
+ return NULL;
}
if (m != n)
sub.prefer = (v->nextvalue) ? LONGER :
@@ -880,19 +834,19 @@ int pprefer; /* parent's short/long preference */
/* constraints may not be quantified */
if (constraint) {
ERR(REG_BADRPT);
- goto end;
+ return NULL;
}
/* annoying special case: {0,0} cancels everything */
if (m == 0 && n == 0 && sub.begin != NULL) {
- freert(now->tree);
+ freert(v, now->tree);
now->tree = NULL;
sub.begin = NULL; /* no substructure */
sub.prefer = NONEYET;
/* the repeat() below will do the rest */
}
- /* if no substructure, aVOID hard part */
+ /* if no substructure, avoid hard part */
if (now->prefer == NONEYET)
now->prefer = sub.prefer;
if (sub.begin == NULL && (sub.prefer == NONEYET ||
@@ -983,8 +937,8 @@ int pprefer; /* parent's short/long preference */
t->tree = rt1;
rt1->op = 'b';
rt1->left.subno = sub.subno;
- rt1->left.min = (short) m;
- rt1->left.max = (short) n;
+ rt1->left.min = (short)m;
+ rt1->left.max = (short)n;
rt1->left.prefer = sub.prefer;
continue; /* NOTE CONTINUE */
}
@@ -1036,14 +990,13 @@ int pprefer; /* parent's short/long preference */
branch->op = ',';
else {
branches = branch->left.tree; /* might be NULL */
- freertnode(branch);
+ freertnode(v, branch);
}
}
if (capture) /* actually a catchall flag */
return branches;
- end: /* mem leak (CCS) */
- freert(branches);
+ freert(v, branches);
return NULL;
}
@@ -1197,7 +1150,7 @@ struct state *rp;
struct state *s;
struct arc *a; /* arc from lp */
struct arc *ba; /* arc from left, from bracket() */
- struct arc *pa; /* CE-prototype arc */
+ struct arc *pa; /* MCCE-prototype arc */
color co;
chr *p;
int i;
@@ -1213,16 +1166,16 @@ struct state *rp;
/* easy part of complementing */
colorcomplement(v->nfa, v->cm, PLAIN, left, lp, rp);
NOERR();
- if (v->ces == NULL) { /* no CEs -- we're done */
+ if (v->mcces == NULL) { /* no MCCEs -- we're done */
dropstate(v->nfa, left);
assert(right->nins == 0);
freestate(v->nfa, right);
return;
}
- /* but complementing gets messy in the presence of CEs... */
+ /* but complementing gets messy in the presence of MCCEs... */
NOTE(REG_ULOCALE);
- for (p = v->ces->chrs, i = v->ces->nchrs; i > 0; p++, i--) {
+ for (p = v->mcces->chrs, i = v->mcces->nchrs; i > 0; p++, i--) {
co = getcolor(v->cm, *p);
a = findarc(lp, PLAIN, co);
ba = findarc(left, PLAIN, co);
@@ -1236,7 +1189,7 @@ struct state *rp;
NOERR();
newarc(v->nfa, PLAIN, co, lp, s);
NOERR();
- pa = findarc(v->cepbegin, PLAIN, co);
+ pa = findarc(v->mccepbegin, PLAIN, co);
assert(pa != NULL);
if (ba == NULL) { /* easy case, need all of them */
cloneouts(v->nfa, pa->to, s, rp, PLAIN);
@@ -1288,10 +1241,11 @@ struct state *rp;
case RANGE: /* a-b-c or other botch */
ERR(REG_ERANGE);
return;
+ break;
case PLAIN:
c[0] = v->nextvalue;
NEXT();
- /* shortcut for ordinary chr (not range, not CE leader) */
+ /* shortcut for ordinary chr (not range, not MCCE leader) */
if (!SEE(RANGE) && !ISCELEADER(v, c[0])) {
onechr(v, c[0], lp, rp);
return;
@@ -1318,6 +1272,7 @@ struct state *rp;
NOERR();
dovec(v, cv, lp, rp);
return;
+ break;
case CCLASS:
startp = v->now;
endp = scanplain(v);
@@ -1327,9 +1282,11 @@ struct state *rp;
NOERR();
dovec(v, cv, lp, rp);
return;
+ break;
default:
ERR(REG_ASSERT);
return;
+ break;
}
if (SEE(RANGE)) {
@@ -1353,6 +1310,7 @@ struct state *rp;
default:
ERR(REG_ERANGE);
return;
+ break;
}
} else
endc = startc;
@@ -1407,35 +1365,35 @@ leaders(v, cv)
struct vars *v;
struct cvec *cv;
{
- int ce;
+ int mcce;
chr *p;
chr leader;
struct state *s;
struct arc *a;
- v->cepbegin = newstate(v->nfa);
- v->cepend = newstate(v->nfa);
+ v->mccepbegin = newstate(v->nfa);
+ v->mccepend = newstate(v->nfa);
NOERR();
- for (ce = 0; ce < cv->nces; ce++) {
- p = cv->ces[ce];
+ for (mcce = 0; mcce < cv->nmcces; mcce++) {
+ p = cv->mcces[mcce];
leader = *p;
if (!haschr(cv, leader)) {
addchr(cv, leader);
s = newstate(v->nfa);
newarc(v->nfa, PLAIN, subcolor(v->cm, leader),
- v->cepbegin, s);
+ v->mccepbegin, s);
okcolors(v->nfa, v->cm);
} else {
- a = findarc(v->cepbegin, PLAIN,
+ a = findarc(v->mccepbegin, PLAIN,
getcolor(v->cm, leader));
assert(a != NULL);
s = a->to;
- assert(s != v->cepend);
+ assert(s != v->mccepend);
}
p++;
- assert(*p != 0 && *(p+1) == 0); /* only 2-char CEs at present */
- newarc(v->nfa, PLAIN, subcolor(v->cm, *p), s, v->cepend);
+ assert(*p != 0 && *(p+1) == 0); /* only 2-char MCCEs for now */
+ newarc(v->nfa, PLAIN, subcolor(v->cm, *p), s, v->mccepend);
okcolors(v->nfa, v->cm);
}
}
@@ -1463,7 +1421,7 @@ struct state *rp;
/*
- dovec - fill in arcs for each element of a cvec
- * This one has to handle the messy cases, like CEs and CE leaders.
+ * This one has to handle the messy cases, like MCCEs and MCCE leaders.
^ static VOID dovec(struct vars *, struct cvec *, struct state *,
^ struct state *);
*/
@@ -1493,11 +1451,11 @@ struct state *rp;
assert(singleton(v->cm, *p));
*np++ = *p;
}
- cv->nchrs = np - cv->chrs; /* only CE leaders remain */
- if (cv->nchrs == 0 && cv->nces == 0)
+ cv->nchrs = np - cv->chrs; /* only MCCE leaders remain */
+ if (cv->nchrs == 0 && cv->nmcces == 0)
return;
- /* deal with the CE leaders */
+ /* deal with the MCCE leaders */
NOTE(REG_ULOCALE);
for (p = cv->chrs, i = cv->nchrs; i > 0; p++, i--) {
co = getcolor(v->cm, *p);
@@ -1510,7 +1468,7 @@ struct state *rp;
newarc(v->nfa, PLAIN, co, lp, s);
NOERR();
}
- pa = findarc(v->cepbegin, PLAIN, co);
+ pa = findarc(v->mccepbegin, PLAIN, co);
assert(pa != NULL);
ps = pa->to;
newarc(v->nfa, '$', 1, s, rp);
@@ -1519,9 +1477,9 @@ struct state *rp;
NOERR();
}
- /* and the CEs */
- for (i = 0; i < cv->nces; i++) {
- p = cv->ces[i];
+ /* and the MCCEs */
+ for (i = 0; i < cv->nmcces; i++) {
+ p = cv->mcces[i];
assert(singleton(v->cm, *p));
co = getcolor(v->cm, *p++);
a = findarc(lp, PLAIN, co);
@@ -1587,7 +1545,7 @@ struct vars *v;
NEXT();
assert(v->savenow != NULL && SEE('['));
bracket(v, left, right);
- assert(((v->savenow != NULL) && SEE(']')) || ISERR());
+ assert((v->savenow != NULL && SEE(']')) || ISERR());
NEXT();
NOERR();
v->wordchrs = left;
@@ -1626,14 +1584,23 @@ static struct rtree *
newrt(v)
struct vars *v;
{
- struct rtree *rt = (struct rtree *)ckalloc(sizeof(struct rtree));
-
- if (rt == NULL) {
- ERR(REG_ESPACE);
- return NULL;
+ struct rtree *rt;
+
+ rt = v->treefree;
+ if (rt != NULL)
+ v->treefree = rt->next;
+ else {
+ rt = (struct rtree *)MALLOC(sizeof(struct rtree));
+ if (rt == NULL) {
+ ERR(REG_ESPACE);
+ return NULL;
+ }
+ rt->chain = v->treechain;
+ v->treechain = rt;
}
rt->op = '?'; /* invalid */
+ rt->flags = 0;
rt->no = 0;
rt->left.begin = NULL;
rt->left.end = NULL;
@@ -1650,36 +1617,39 @@ struct vars *v;
rt->right.tree = NULL;
ZAPCNFA(rt->right.cnfa);
rt->next = NULL;
+
return rt;
}
/*
- freert - free a subRE subtree
- ^ static VOID freert(struct rtree *);
+ ^ static VOID freert(struct vars *, struct rtree *);
*/
static VOID
-freert(rt)
+freert(v, rt)
+struct vars *v; /* might be NULL */
struct rtree *rt;
{
if (rt == NULL)
return;
if (rt->left.tree != NULL)
- freert(rt->left.tree);
+ freert(v, rt->left.tree);
if (rt->right.tree != NULL)
- freert(rt->right.tree);
+ freert(v, rt->right.tree);
if (rt->next != NULL)
- freert(rt->next);
+ freert(v, rt->next);
- freertnode(rt);
+ freertnode(v, rt);
}
/*
- freertnode - free one node in a subRE subtree
- ^ static VOID freertnode(struct rtree *);
+ ^ static VOID freertnode(struct vars *, struct rtree *);
*/
static VOID
-freertnode(rt)
+freertnode(v, rt)
+struct vars *v; /* might be NULL */
struct rtree *rt;
{
if (rt == NULL)
@@ -1689,8 +1659,13 @@ struct rtree *rt;
freecnfa(&rt->left.cnfa, 0);
if (!NULLCNFA(rt->right.cnfa))
freecnfa(&rt->right.cnfa, 0);
+ rt->flags = 0;
- ckfree((char *)rt);
+ if (v != NULL) {
+ rt->next = v->treefree;
+ v->treefree = rt;
+ } else
+ FREE(rt);
}
/*
@@ -1721,7 +1696,7 @@ struct rtree *rt;
subno = rt->left.subno;
rt->left = t->left;
assert(NULLCNFA(t->left.cnfa));
- freertnode(t);
+ freertnode(v, t);
if (subno != 0) {
assert(rt->left.subno == 0 && subno > 0);
rt->left.subno = subno;
@@ -1739,7 +1714,7 @@ struct rtree *rt;
subno = rt->right.subno;
rt->right = t->left;
assert(NULLCNFA(t->right.cnfa));
- freertnode(t);
+ freertnode(v, t);
if (subno != 0) {
assert(rt->right.subno == 0 && subno > 0);
rt->right.subno = subno;
@@ -1800,7 +1775,7 @@ int start; /* starting point for subtree numbers */
assert(rt != NULL);
i = start;
- rt->no = (short) i++;
+ rt->no = (short)i++;
if (rt->left.tree != NULL)
i = numrt(rt->left.tree, i);
if (rt->right.tree != NULL)
@@ -1811,54 +1786,95 @@ int start; /* starting point for subtree numbers */
}
/*
+ - markrt - mark tree nodes as INUSE
+ ^ static VOID markrt(struct rtree *);
+ */
+static VOID
+markrt(rt)
+struct rtree *rt;
+{
+ assert(rt != NULL);
+
+ rt->flags |= INUSE;
+ if (rt->left.tree != NULL)
+ markrt(rt->left.tree);
+ if (rt->right.tree != NULL)
+ markrt(rt->right.tree);
+ if (rt->next != NULL)
+ markrt(rt->next);
+}
+
+/*
+ - cleanrt - free any tree nodes not marked INUSE
+ ^ static VOID cleanrt(struct vars *);
+ */
+static VOID
+cleanrt(v)
+struct vars *v;
+{
+ struct rtree *rt;
+ struct rtree *next;
+
+ for (rt = v->treechain; rt != NULL; rt = next) {
+ next = rt->next;
+ if (!(rt->flags&INUSE))
+ FREE(rt);
+ }
+ v->treechain = NULL;
+ v->treefree = NULL; /* just on general principles */
+}
+
+/*
- nfatree - turn a subRE subtree into a tree of compacted NFAs
- ^ static VOID nfatree(struct vars *, struct rtree *);
+ ^ static VOID nfatree(struct vars *, struct rtree *, FILE *);
*/
static VOID
-nfatree(v, rt)
+nfatree(v, rt, f)
struct vars *v;
struct rtree *rt;
+FILE *f; /* for debug output */
{
if (rt == NULL)
return;
if (rt->left.begin != NULL)
- nfanode(v, &rt->left);
+ nfanode(v, &rt->left, f);
if (rt->left.tree != NULL)
- nfatree(v, rt->left.tree);
+ nfatree(v, rt->left.tree, f);
if (rt->right.begin != NULL)
- nfanode(v, &rt->right);
+ nfanode(v, &rt->right, f);
if (rt->right.tree != NULL)
- nfatree(v, rt->right.tree);
+ nfatree(v, rt->right.tree, f);
if (rt->next != NULL)
- nfatree(v, rt->next);
+ nfatree(v, rt->next, f);
}
/*
- nfanode - do one NFA for nfatree
- ^ static VOID nfanode(struct vars *, struct subre *);
+ ^ static VOID nfanode(struct vars *, struct subre *, FILE *);
*/
static VOID
-nfanode(v, sub)
+nfanode(v, sub, f)
struct vars *v;
struct subre *sub;
+FILE *f; /* for debug output */
{
struct nfa *nfa;
if (sub->begin == NULL)
return;
- nfa = newnfa(v, v->nfa);
+ nfa = newnfa(v, v->cm, v->nfa);
NOERR();
dupnfa(nfa, sub->begin, sub->end, nfa->init, nfa->final);
if (!ISERR()) {
specialcolors(nfa);
- optimize(nfa);
+ (DISCARD) optimize(nfa, f);
}
if (!ISERR())
- compact(v, nfa, &sub->cnfa);
+ compact(nfa, &sub->cnfa);
freenfa(nfa);
}
@@ -1877,11 +1893,11 @@ int pos;
struct subre *sub;
if (v->nlacons == 0) {
- v->lacons = (struct subre *)ckalloc(2 * sizeof(struct subre));
+ v->lacons = (struct subre *)MALLOC(2 * sizeof(struct subre));
n = 1; /* skip 0th */
v->nlacons = 2;
} else {
- v->lacons = (struct subre *)ckrealloc((VOID *) v->lacons,
+ v->lacons = (struct subre *)REALLOC(v->lacons,
(v->nlacons+1)*sizeof(struct subre));
n = v->nlacons++;
}
@@ -1909,10 +1925,11 @@ int n;
struct subre *sub;
int i;
+ assert(n > 0);
for (sub = subs + 1, i = n - 1; i > 0; sub++, i--)
if (!NULLCNFA(sub->cnfa))
freecnfa(&sub->cnfa, 0);
- ckfree((char *)subs);
+ FREE(subs);
}
/*
@@ -1921,11 +1938,15 @@ int n;
*/
static VOID
rfree(re)
-regex_t *re; /* regfree has validated it */
+regex_t *re;
{
- struct guts *g = (struct guts *)re->re_guts;
+ struct guts *g;
- re->re_magic = 0; /* invalidate it */
+ if (re == NULL || re->re_magic != REMAGIC)
+ return;
+
+ re->re_magic = 0; /* invalidate RE */
+ g = (struct guts *)re->re_guts;
re->re_guts = NULL;
re->re_fns = NULL;
g->magic = 0;
@@ -1934,10 +1955,50 @@ regex_t *re; /* regfree has validated it */
if (g->cm != NULL)
freecm(g->cm);
if (g->tree != NULL)
- freert(g->tree);
+ freert((struct vars *)NULL, g->tree);
if (g->lacons != NULL)
freelacons(g->lacons, g->nlacons);
- ckfree((char *)g);
+ FREE(g);
+}
+
+/*
+ - dump - dump an RE in human-readable form
+ ^ static VOID dump(regex_t *, FILE *);
+ */
+static VOID
+dump(re, f)
+regex_t *re;
+FILE *f;
+{
+#ifdef REG_DEBUG
+ struct guts *g;
+ int i;
+
+ if (re->re_magic != REMAGIC)
+ fprintf(f, "bad magic number (0x%x not 0x%x)\n", re->re_magic,
+ REMAGIC);
+ if (re->re_guts == NULL) {
+ fprintf(f, "NULL guts!!!\n");
+ return;
+ }
+ g = (struct guts *)re->re_guts;
+ if (g->magic != GUTSMAGIC)
+ fprintf(f, "bad guts magic number (0x%x not 0x%x)\n", g->magic,
+ GUTSMAGIC);
+
+ fprintf(f, "nsub %d, info 0%o, csize %d, ntree %d, usedshort %d\n",
+ re->re_nsub, re->re_info, re->re_csize, g->ntree,
+ g->usedshorter);
+
+ dumpcolors(g->cm, f);
+ dumpcnfa(&g->cnfa, f);
+ for (i = 1; i < g->nlacons; i++) {
+ fprintf(f, "la%d (%s):\n", i,
+ (g->lacons[i].subno) ? "positive" : "negative");
+ dumpcnfa(&g->lacons[i].cnfa, f);
+ }
+ dumprt(g->tree, f, 0);
+#endif
}
/*
@@ -2068,22 +2129,9 @@ int level;
}
}
-/*
- - dump - dump an RE in human-readable form
- ^ static VOID dump(regex_t *, FILE *);
- */
-static VOID
-dump(re, f)
-regex_t *re;
-FILE *f;
-{
-}
-
-#undef NOERRN
-#define NOERRN() {if (ISERR()) return NULL;} /* NOERR with retval */
-
#define COMPILE 1
-#include "lex.c"
-#include "color.c"
-#include "locale.c"
-#include "nfa.c"
+#include "regc_lex.c"
+#include "regc_color.c"
+#include "regc_nfa.c"
+#include "regc_cvec.c"
+#include "regc_locale.c"
diff --git a/generic/regcustom.h b/generic/regcustom.h
new file mode 100644
index 0000000..0fda25f
--- /dev/null
+++ b/generic/regcustom.h
@@ -0,0 +1,90 @@
+/* headers (which also pick up the standard ones, or equivalents) */
+#include "tclInt.h"
+#include "tclPort.h"
+
+/* overrides for regguts.h definitions */
+/* function-pointer declarations */
+#define FUNCPTR(name, args) (*name) _ANSI_ARGS_(args)
+#define MALLOC(n) ckalloc(n)
+#define FREE(p) ckfree(VS(p))
+#define REALLOC(p,n) ckrealloc(VS(p),n)
+
+
+
+/*
+ * Do not insert extras between the "begin" and "end" lines -- this
+ * chunk is automatically extracted to be fitted into regex.h.
+ */
+/* --- begin --- */
+/* ensure certain things don't sneak in from system headers */
+#ifdef __REG_WIDE_T
+#undef __REG_WIDE_T
+#endif
+#ifdef __REG_WIDE_COMPILE
+#undef __REG_WIDE_COMPILE
+#endif
+#ifdef __REG_WIDE_EXEC
+#undef __REG_WIDE_EXEC
+#endif
+#ifdef __REG_REGOFF_T
+#undef __REG_REGOFF_T
+#endif
+#ifdef __REG_VOID_T
+#undef __REG_VOID_T
+#endif
+#ifdef __REG_CONST
+#undef __REG_CONST
+#endif
+/* interface types */
+#define __REG_WIDE_T Tcl_UniChar
+#define __REG_WIDE_COMPILE re_ucomp
+#define __REG_WIDE_EXEC re_uexec
+#define __REG_REGOFF_T long /* not really right, but good enough... */
+#define __REG_VOID_T VOID
+#define __REG_CONST CONST
+#ifndef __REG_NOFRONT
+#define __REG_NOFRONT /* don't want regcomp() and regexec() */
+#endif
+#ifndef __REG_NOCHAR
+#define __REG_NOCHAR /* or the char versions */
+#endif
+/* --- end --- */
+
+
+
+/* internal character type and related */
+typedef Tcl_UniChar chr; /* the type itself */
+typedef int pchr; /* what it promotes to */
+typedef unsigned uchr; /* unsigned type that will hold a chr */
+typedef int celt; /* type to hold chr, MCCE number, or NOCELT */
+#define NOCELT (-1) /* celt value which is not valid chr or MCCE */
+#define CHR(c) (UCHAR(c)) /* turn char literal into chr literal */
+#define DIGITVAL(c) ((c)-'0') /* turn chr digit into its value */
+#define CHRBITS 16 /* bits in a chr; must not use sizeof */
+#define CHR_MIN 0x0000 /* smallest and largest chr; the value */
+#define CHR_MAX 0xffff /* CHR_MAX-CHR_MIN+1 should fit in uchr */
+
+/* functions operating on chr */
+#define iscalnum(x) TclUniCharIsAlnum(x)
+#define iscalpha(x) TclUniCharIsAlpha(x)
+#define iscdigit(x) TclUniCharIsDigit(x)
+#define iscspace(x) TclUniCharIsSpace(x)
+
+/* name the external functions */
+#define compile re_ucomp
+#define exec re_uexec
+#ifdef notdef
+#define regfree re_ufree
+#define regerror re_uerror
+#endif
+
+/*
+ * Implement a mistake in the original POSIX.2: in EREs, and only in EREs
+ * (AREs do not support this botch), an unbalanced right parenthesis is an
+ * ordinary character rather than an error. This was unintentional, and
+ * will be fixed someday.
+ */
+#define POSIX_MISTAKE /* sigh */
+
+/* and pick up the standard header */
+#include "regex.h"
diff --git a/generic/regerror.c b/generic/regerror.c
new file mode 100644
index 0000000..5eb67a7
--- /dev/null
+++ b/generic/regerror.c
@@ -0,0 +1,82 @@
+/*
+ * regerror - error-code expansion
+ */
+
+#include "regguts.h"
+
+/* unknown-error explanation */
+static char unk[] = "*** unknown regex error code 0x%x ***";
+
+/* struct to map among codes, code names, and explanations */
+static struct rerr {
+ int code;
+ char *name;
+ char *explain;
+} rerrs[] = {
+ /* the actual table is built from regex.h */
+# include "regerrs.h"
+ -1, "", "oops", /* explanation special-cased in code */
+};
+
+/*
+ - regerror - the interface to error numbers
+ */
+/* ARGSUSED */
+size_t /* actual space needed (including NUL) */
+regerror(errcode, preg, errbuf, errbuf_size)
+int errcode; /* error code, or REG_ATOI or REG_ITOA */
+const regex_t *preg; /* associated regex_t (unused at present) */
+char *errbuf; /* result buffer (unless errbuf_size==0) */
+size_t errbuf_size; /* available space in errbuf, can be 0 */
+{
+ struct rerr *r;
+ char *msg;
+ char convbuf[sizeof(unk)+50]; /* 50 = plenty for int */
+ size_t len;
+ int icode;
+
+ switch (errcode) {
+ case REG_ATOI: /* convert name to number */
+ for (r = rerrs; r->code >= 0; r++)
+ if (strcmp(r->name, errbuf) == 0)
+ break;
+ sprintf(convbuf, "%d", r->code); /* -1 for unknown */
+ msg = convbuf;
+ break;
+ case REG_ITOA: /* convert number to name */
+ icode = atoi(errbuf); /* not our problem if this fails */
+ for (r = rerrs; r->code >= 0; r++)
+ if (r->code == icode)
+ break;
+ if (r->code >= 0)
+ msg = r->name;
+ else { /* unknown; tell him the number */
+ sprintf(convbuf, "REG_%u", (unsigned)icode);
+ msg = convbuf;
+ }
+ break;
+ default: /* a real, normal error code */
+ for (r = rerrs; r->code >= 0; r++)
+ if (r->code == errcode)
+ break;
+ if (r->code >= 0)
+ msg = r->explain;
+ else { /* unknown; say so */
+ sprintf(convbuf, unk, errcode);
+ msg = convbuf;
+ }
+ break;
+ }
+
+ len = strlen(msg) + 1; /* space needed, including NUL */
+ if (errbuf_size > 0) {
+ if (errbuf_size > len)
+ strcpy(errbuf, msg);
+ else { /* truncate to fit */
+ strncpy(errbuf, msg, errbuf_size-1);
+ errbuf[errbuf_size-1] = '\0';
+ }
+ }
+
+ return len;
+}
diff --git a/generic/regerrs.h b/generic/regerrs.h
new file mode 100644
index 0000000..8298597
--- /dev/null
+++ b/generic/regerrs.h
@@ -0,0 +1,19 @@
+REG_OKAY, "REG_OKAY", "no errors detected",
+REG_NOMATCH, "REG_NOMATCH", "failed to match",
+REG_BADPAT, "REG_BADPAT", "invalid regexp (reg version 0.1)",
+REG_ECOLLATE, "REG_ECOLLATE", "invalid collating element",
+REG_ECTYPE, "REG_ECTYPE", "invalid character class",
+REG_EESCAPE, "REG_EESCAPE", "invalid escape \\ sequence",
+REG_ESUBREG, "REG_ESUBREG", "invalid backreference number",
+REG_EBRACK, "REG_EBRACK", "brackets [] not balanced",
+REG_EPAREN, "REG_EPAREN", "parentheses () not balanced",
+REG_EBRACE, "REG_EBRACE", "braces {} not balanced",
+REG_BADBR, "REG_BADBR", "invalid repetition count(s)",
+REG_ERANGE, "REG_ERANGE", "invalid character range",
+REG_ESPACE, "REG_ESPACE", "out of memory",
+REG_BADRPT, "REG_BADRPT", "quantifier operand invalid",
+REG_ASSERT, "REG_ASSERT", "\"can't happen\" -- you found a bug",
+REG_INVARG, "REG_INVARG", "invalid argument to regex function",
+REG_MIXED, "REG_MIXED", "character widths of regex and string differ",
+REG_BADOPT, "REG_BADOPT", "invalid embedded option",
+REG_IMPOSS, "REG_IMPOSS", "can never match",
diff --git a/generic/regex.h b/generic/regex.h
new file mode 100644
index 0000000..6f61dd3
--- /dev/null
+++ b/generic/regex.h
@@ -0,0 +1,299 @@
+#ifndef _REGEX_H_
+#define _REGEX_H_ /* never again */
+/*
+ * regular expressions
+ *
+ * Prototypes etc. marked with "^" within comments get gathered up (and
+ * possibly edited) by the regfwd program and inserted near the bottom of
+ * this file.
+ *
+ * We offer the option of declaring one wide-character version of the
+ * RE functions as well as the char versions. To do that, define
+ * __REG_WIDE_T to the type of wide characters (unfortunately, there
+ * is no consensus that wchar_t is suitable) and __REG_WIDE_COMPILE and
+ * __REG_WIDE_EXEC to the names to be used for the compile and execute
+ * functions (suggestion: re_Xcomp and re_Xexec, where X is a letter
+ * suggestive of the wide type, e.g. re_ucomp and re_uexec for Unicode).
+ * For cranky old compilers, it may be necessary to do something like:
+ * #define __REG_WIDE_COMPILE(a,b,c,d) re_Xcomp(a,b,c,d)
+ * #define __REG_WIDE_EXEC(a,b,c,d,e,f) re_Xexec(a,b,c,d,e,f)
+ * rather than just #defining the names as parameterless macros.
+ *
+ * For some specialized purposes, it may be desirable to suppress the
+ * declarations of the "front end" functions, regcomp() and regexec(),
+ * or of the char versions of the compile and execute functions. To
+ * suppress the front-end functions, define __REG_NOFRONT. To suppress
+ * the char versions, define __REG_NOCHAR.
+ *
+ * The right place to do those defines (and some others you may want, see
+ * below) would be <sys/types.h>. If you don't have control of that file,
+ * the right place to add your own defines to this file is marked below.
+ * This is normally done automatically, by the makefile and regmkhdr, based
+ * on the contents of regcustom.h.
+ */
+
+
+
+/*
+ * voodoo for C++
+ */
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+
+/*
+ * Add your own defines, if needed, here. The --- stuff is for automatic
+ * generation of this file from regproto.h and regcustom.h.
+ */
+/* --- begin --- */
+/* ensure certain things don't sneak in from system headers */
+#ifdef __REG_WIDE_T
+#undef __REG_WIDE_T
+#endif
+#ifdef __REG_WIDE_COMPILE
+#undef __REG_WIDE_COMPILE
+#endif
+#ifdef __REG_WIDE_EXEC
+#undef __REG_WIDE_EXEC
+#endif
+#ifdef __REG_REGOFF_T
+#undef __REG_REGOFF_T
+#endif
+#ifdef __REG_VOID_T
+#undef __REG_VOID_T
+#endif
+#ifdef __REG_CONST
+#undef __REG_CONST
+#endif
+/* interface types */
+#define __REG_WIDE_T Tcl_UniChar
+#define __REG_WIDE_COMPILE re_ucomp
+#define __REG_WIDE_EXEC re_uexec
+#define __REG_REGOFF_T long /* not really right, but good enough... */
+#define __REG_VOID_T VOID
+#define __REG_CONST CONST
+#ifndef __REG_NOFRONT
+#define __REG_NOFRONT /* don't want regcomp() and regexec() */
+#endif
+#ifndef __REG_NOCHAR
+#define __REG_NOCHAR /* or the char versions */
+#endif
+/* --- end --- */
+
+
+/*
+ * interface types etc.
+ */
+
+/*
+ * regoff_t has to be large enough to hold either off_t or ssize_t,
+ * and must be signed; it's only a guess that long is suitable, so we
+ * offer <sys/types.h> an override.
+ */
+#ifdef __REG_REGOFF_T
+typedef __REG_REGOFF_T regoff_t;
+#else
+typedef long regoff_t;
+#endif
+
+/*
+ * For benefit of old compilers, we offer <sys/types.h> the option of
+ * overriding the `void' type used to declare nonexistent return types.
+ */
+#ifdef __REG_VOID_T
+typedef __REG_VOID_T re_void;
+#else
+typedef void re_void;
+#endif
+
+/*
+ * Also for benefit of old compilers, <sys/types.h> can supply a macro
+ * which expands to a substitute for `const'.
+ */
+#ifndef __REG_CONST
+#define __REG_CONST const
+#endif
+
+
+
+/*
+ * other interface types
+ */
+
+/* the biggie, a compiled RE (or rather, a front end to same) */
+typedef struct {
+ int re_magic; /* magic number */
+ size_t re_nsub; /* number of subexpressions */
+ int re_info; /* information about RE */
+# define REG_UBACKREF 000001
+# define REG_ULOOKAHEAD 000002
+# define REG_UBOUNDS 000004
+# define REG_UBRACES 000010
+# define REG_UBSALNUM 000020
+# define REG_UPBOTCH 000040
+# define REG_UBBS 000100
+# define REG_UNONPOSIX 000200
+# define REG_UUNSPEC 000400
+# define REG_UUNPORT 001000
+# define REG_ULOCALE 002000
+# define REG_UEMPTYMATCH 004000
+ int re_csize; /* sizeof(character) */
+ char *re_endp; /* backward compatibility kludge */
+ /* the rest is opaque pointers to hidden innards */
+ char *re_guts; /* `char *' is more portable than `void *' */
+ char *re_fns;
+} regex_t;
+
+/* result reporting (may acquire more fields later) */
+typedef struct {
+ regoff_t rm_so; /* start of substring */
+ regoff_t rm_eo; /* end of substring */
+} regmatch_t;
+
+/* supplementary control and reporting (placeholder for later work) */
+typedef struct {
+ int rm_dummy;
+} rm_detail_t;
+
+
+
+/*
+ * compilation
+ ^ #ifndef __REG_NOCHAR
+ ^ int re_comp(regex_t *, __REG_CONST char *, size_t, int);
+ ^ #endif
+ ^ #ifndef __REG_NOFRONT
+ ^ int regcomp(regex_t *, __REG_CONST char *, int);
+ ^ #endif
+ ^ #ifdef __REG_WIDE_T
+ ^ int __REG_WIDE_COMPILE(regex_t *, __REG_CONST __REG_WIDE_T *, size_t, int);
+ ^ #endif
+ */
+#define REG_BASIC 000000 /* BREs (convenience) */
+#define REG_EXTENDED 000001 /* EREs */
+#define REG_ADVF 000002 /* advanced features in EREs */
+#define REG_ADVANCED 000003 /* AREs (which are also EREs) */
+#define REG_QUOTE 000004 /* no special characters, none */
+#define REG_NOSPEC REG_QUOTE /* historical synonym */
+#define REG_ICASE 000010 /* ignore case */
+#define REG_NOSUB 000020 /* don't care about subexpressions */
+#define REG_EXPANDED 000040 /* expanded format, white space & comments */
+#define REG_NLSTOP 000100 /* \n doesn't match . or [^ ] */
+#define REG_NLANCH 000200 /* ^ matches after \n, $ before */
+#define REG_NEWLINE 000300 /* newlines are line terminators */
+#define REG_PEND 000400 /* ugh -- backward-compatibility hack */
+#define REG_DUMP 004000 /* none of your business :-) */
+#define REG_FAKEEC 010000 /* none of your business :-) */
+#define REG_PROGRESS 020000 /* none of your business :-) */
+
+
+
+/*
+ * execution
+ ^ #ifndef __REG_NOCHAR
+ ^ int re_exec(regex_t *, __REG_CONST char *, size_t,
+ ^ rm_detail_t *, size_t, regmatch_t [], int);
+ ^ #endif
+ ^ #ifndef __REG_NOFRONT
+ ^ int regexec(regex_t *, __REG_CONST char *, size_t, regmatch_t [], int);
+ ^ #endif
+ ^ #ifdef __REG_WIDE_T
+ ^ int __REG_WIDE_EXEC(regex_t *, __REG_CONST __REG_WIDE_T *, size_t,
+ ^ rm_detail_t *, size_t, regmatch_t [], int);
+ ^ #endif
+ */
+#define REG_NOTBOL 0001 /* BOS is not BOL */
+#define REG_NOTEOL 0002 /* EOS is not EOL */
+#define REG_STARTEND 0004 /* backward compatibility kludge */
+#define REG_FTRACE 0010 /* none of your business */
+#define REG_MTRACE 0020 /* none of your business */
+#define REG_SMALL 0040 /* none of your business */
+
+
+
+/*
+ * misc generics (may be more functions here eventually)
+ ^ re_void regfree(regex_t *);
+ */
+
+
+
+/*
+ * error reporting
+ * Be careful if modifying the list of error codes -- the table used by
+ * regerror() is generated automatically from this file!
+ *
+ * Note that there is no wide-char variant of regerror at this time; what
+ * kind of character is used for error reports is independent of what kind
+ * is used in matching.
+ *
+ ^ extern size_t regerror(int, __REG_CONST regex_t *, char *, size_t);
+ */
+#define REG_OKAY 0 /* no errors detected */
+#define REG_NOMATCH 1 /* failed to match */
+#define REG_BADPAT 2 /* invalid regexp */
+#define REG_ECOLLATE 3 /* invalid collating element */
+#define REG_ECTYPE 4 /* invalid character class */
+#define REG_EESCAPE 5 /* invalid escape \ sequence */
+#define REG_ESUBREG 6 /* invalid backreference number */
+#define REG_EBRACK 7 /* brackets [] not balanced */
+#define REG_EPAREN 8 /* parentheses () not balanced */
+#define REG_EBRACE 9 /* braces {} not balanced */
+#define REG_BADBR 10 /* invalid repetition count(s) */
+#define REG_ERANGE 11 /* invalid character range */
+#define REG_ESPACE 12 /* out of memory */
+#define REG_BADRPT 13 /* quantifier operand invalid */
+#define REG_ASSERT 15 /* "can't happen" -- you found a bug */
+#define REG_INVARG 16 /* invalid argument to regex function */
+#define REG_MIXED 17 /* character widths of regex and string differ */
+#define REG_BADOPT 18 /* invalid embedded option */
+#define REG_IMPOSS 19 /* can never match */
+/* two specials for debugging and testing */
+#define REG_ATOI 101 /* convert error-code name to number */
+#define REG_ITOA 102 /* convert error-code number to name */
+
+
+
+/*
+ * the prototypes, as possibly munched by regfwd
+ */
+/* =====^!^===== begin forwards =====^!^===== */
+/* automatically gathered by fwd; do not hand-edit */
+/* === regproto.h === */
+#ifndef __REG_NOCHAR
+int re_comp _ANSI_ARGS_((regex_t *, __REG_CONST char *, size_t, int));
+#endif
+#ifndef __REG_NOFRONT
+int regcomp _ANSI_ARGS_((regex_t *, __REG_CONST char *, int));
+#endif
+#ifdef __REG_WIDE_T
+int __REG_WIDE_COMPILE _ANSI_ARGS_((regex_t *, __REG_CONST __REG_WIDE_T *, size_t, int));
+#endif
+#ifndef __REG_NOCHAR
+int re_exec _ANSI_ARGS_((regex_t *, __REG_CONST char *, size_t, rm_detail_t *, size_t, regmatch_t [], int));
+#endif
+#ifndef __REG_NOFRONT
+int regexec _ANSI_ARGS_((regex_t *, __REG_CONST char *, size_t, regmatch_t [], int));
+#endif
+#ifdef __REG_WIDE_T
+int __REG_WIDE_EXEC _ANSI_ARGS_((regex_t *, __REG_CONST __REG_WIDE_T *, size_t, rm_detail_t *, size_t, regmatch_t [], int));
+#endif
+re_void regfree _ANSI_ARGS_((regex_t *));
+extern size_t regerror _ANSI_ARGS_((int, __REG_CONST regex_t *, char *, size_t));
+/* automatically gathered by fwd; do not hand-edit */
+/* =====^!^===== end forwards =====^!^===== */
+
+
+
+/*
+ * more C++ voodoo
+ */
+#ifdef __cplusplus
+}
+#endif
+
+
+
+#endif
diff --git a/generic/exec.c b/generic/regexec.c
index 92439aa..4220062 100644
--- a/generic/exec.c
+++ b/generic/regexec.c
@@ -1,43 +1,9 @@
/*
- * exec.c --
- *
- * Regexp package file: re_*exec and friends - match REs
- *
- * Copyright (c) 1998 Henry Spencer. All rights reserved.
- *
- * Development of this software was funded, in part, by Cray Research Inc.,
- * UUNET Communications Services Inc., and Sun Microsystems Inc., none of
- * whom are responsible for the results. The author thanks all of them.
- *
- * Redistribution and use in source and binary forms -- with or without
- * modification -- are permitted for any purpose, provided that
- * redistributions in source form retain this entire copyright notice and
- * indicate the origin and nature of any modifications.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
- * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
- * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
- * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
- * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
- * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
- * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
- * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * Copyright (c) 1998 by Sun Microsystems, Inc.
- *
- * See the file "license.terms" for information on usage and redistribution
- * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
- *
- * RCS: @(#) $Id: exec.c,v 1.1.2.2 1998/10/05 17:38:26 stanton Exp $
+ * re_*exec and friends - match REs
*/
-#include "tclInt.h"
-#include <assert.h>
-#include "tclRegexp.h"
-#include "chr.h"
-#include "guts.h"
+#include "regguts.h"
+
/* internal variables, bundled for easy passing around */
@@ -75,6 +41,7 @@ struct sset { /* state set */
int flags;
# define STARTER 01 /* the initial state set */
# define POSTSTATE 02 /* includes the goal state */
+# define LOCKED 04 /* locked in cache */
struct arcp ins; /* chain of inarcs pointing here */
chr *lastseen; /* last entered on arrival here */
struct sset **outs; /* outarc vector indexed by color */
@@ -95,6 +62,7 @@ struct dfa {
struct cnfa *cnfa;
struct colormap *cm;
chr *lastpost; /* location of last cache-flushed success */
+ struct sset *search; /* replacement-search-pointer memory */
};
#define CACHE 200
@@ -107,8 +75,8 @@ struct dfa {
*/
/* =====^!^===== begin forwards =====^!^===== */
/* automatically gathered by fwd; do not hand-edit */
-/* === exec.c === */
-int exec _ANSI_ARGS_((regex_t *, CONST chr *, size_t, size_t, regmatch_t [], int));
+/* === regexec.c === */
+int exec _ANSI_ARGS_((regex_t *, CONST chr *, size_t, rm_detail_t *, size_t, regmatch_t [], int));
static int find _ANSI_ARGS_((struct vars *, struct cnfa *, struct colormap *));
static int cfind _ANSI_ARGS_((struct vars *, struct cnfa *, struct colormap *));
static VOID zapmatches _ANSI_ARGS_((regmatch_t *, size_t));
@@ -130,13 +98,10 @@ static struct dfa *newdfa _ANSI_ARGS_((struct vars *, struct cnfa *, struct colo
static VOID freedfa _ANSI_ARGS_((struct dfa *));
static unsigned hash _ANSI_ARGS_((unsigned *, int));
static struct sset *initialize _ANSI_ARGS_((struct vars *, struct dfa *, chr *));
-static struct sset *miss _ANSI_ARGS_((struct vars *, struct dfa *, struct sset *, pcolor, chr *));
+static struct sset *miss _ANSI_ARGS_((struct vars *, struct dfa *, struct sset *, pcolor, chr *, chr *));
static int lacon _ANSI_ARGS_((struct vars *, struct cnfa *, chr *, pcolor));
-static struct sset *getvacant _ANSI_ARGS_((struct vars *, struct dfa *));
-static struct sset *pickss _ANSI_ARGS_((struct vars *, struct dfa *));
-/* === color.c === */
-union tree;
-static color getcolor _ANSI_ARGS_((struct colormap *, pchr));
+static struct sset *getvacant _ANSI_ARGS_((struct vars *, struct dfa *, chr *, chr *));
+static struct sset *pickss _ANSI_ARGS_((struct vars *, struct dfa *, chr *, chr *));
/* automatically gathered by fwd; do not hand-edit */
/* =====^!^===== end forwards =====^!^===== */
@@ -144,13 +109,15 @@ static color getcolor _ANSI_ARGS_((struct colormap *, pchr));
/*
- exec - match regular expression
- ^ int exec(regex_t *, CONST chr *, size_t, size_t, regmatch_t [], int);
+ ^ int exec(regex_t *, CONST chr *, size_t, rm_detail_t *,
+ ^ size_t, regmatch_t [], int);
*/
int
-exec(re, string, len, nmatch, pmatch, flags)
+exec(re, string, len, details, nmatch, pmatch, flags)
regex_t *re;
CONST chr *string;
size_t len;
+rm_detail_t *details; /* hook for future elaboration */
size_t nmatch;
regmatch_t pmatch[];
int flags;
@@ -177,9 +144,9 @@ int flags;
if (v->g->cflags&REG_NOSUB)
nmatch = 0; /* override client */
v->nmatch = nmatch;
- if (complications && v->nmatch < (size_t)(v->g->nsub + 1)) {
+ if (complications && v->nmatch < v->g->nsub + 1) {
/* need work area bigger than what user gave us */
- v->pmatch = (regmatch_t *)ckalloc((v->g->nsub + 1) *
+ v->pmatch = (regmatch_t *)MALLOC((v->g->nsub + 1) *
sizeof(regmatch_t));
if (v->pmatch == NULL)
return REG_ESPACE;
@@ -190,10 +157,10 @@ int flags;
v->stop = (chr *)string + len;
v->err = 0;
if (complications) {
- v->mem1 = (regoff_t *)ckalloc(2*v->g->ntree*sizeof(regoff_t));
+ v->mem1 = (regoff_t *)MALLOC(2*v->g->ntree*sizeof(regoff_t));
if (v->mem1 == NULL) {
if (v->pmatch != pmatch)
- ckfree((char *)v->pmatch);
+ FREE(v->pmatch);
return REG_ESPACE;
}
v->mem2 = v->mem1 + v->g->ntree;
@@ -208,12 +175,12 @@ int flags;
if (st == REG_OKAY && v->pmatch != pmatch && nmatch > 0) {
zapmatches(pmatch, nmatch);
n = (nmatch < v->nmatch) ? nmatch : v->nmatch;
- memcpy((VOID *)pmatch, (VOID *)v->pmatch, n*sizeof(regmatch_t));
+ memcpy(VS(pmatch), VS(v->pmatch), n*sizeof(regmatch_t));
}
if (v->pmatch != pmatch)
- ckfree((char *)v->pmatch);
+ FREE(v->pmatch);
if (v->mem1 != NULL)
- ckfree((char *)v->mem1);
+ FREE(v->mem1);
return st;
}
@@ -230,15 +197,14 @@ struct colormap *cm;
struct dfa *d = newdfa(v, cnfa, cm);
chr *begin;
chr *end;
- chr *stop = (cnfa->leftanch) ? v->start : v->stop;
+ chr *stop = (cnfa->flags&LEFTANCH) ? v->start : v->stop;
if (d == NULL)
return v->err;
for (begin = v->start; begin <= stop; begin++) {
- if (v->eflags&REG_MTRACE)
- printf("\ntrying at %ld\n", (long)OFF(begin));
- end = longest(v, d, begin, v->stop);
+ MDEBUG(("\ntrying at %ld\n", (long)OFF(begin)));
+ end = longest(v, d, begin, v->stop);
if (end != NULL) {
if (v->nmatch > 0) {
v->pmatch[0].rm_so = OFF(begin);
@@ -249,11 +215,15 @@ struct colormap *cm;
zapmatches(v->pmatch, v->nmatch);
return dissect(v, v->g->tree, begin, end);
}
+ if (ISERR())
+ return v->err;
return REG_OKAY;
}
}
freedfa(d);
+ if (ISERR())
+ return v->err;
return REG_NOMATCH;
}
@@ -270,7 +240,7 @@ struct colormap *cm;
struct dfa *d = newdfa(v, cnfa, cm);
chr *begin;
chr *end;
- chr *stop = (cnfa->leftanch) ? v->start : v->stop;
+ chr *stop = (cnfa->flags&LEFTANCH) ? v->start : v->stop;
chr *estop;
int er;
int usedis = (v->g->tree == NULL || v->g->tree->op == '|') ? 0 : 1;
@@ -281,12 +251,11 @@ struct colormap *cm;
if (!v->g->usedshorter)
usedis = 0;
for (begin = v->start; begin <= stop; begin++) {
- if (v->eflags&REG_MTRACE)
- printf("\ntrying at %ld\n", (long)OFF(begin));
+ MDEBUG(("\ntrying at %ld\n", (long)OFF(begin)));
if (usedis) {
v->mem = v->mem1;
zapmem(v, v->g->tree);
- }
+ }
estop = v->stop;
for (;;) {
if (usedis) {
@@ -296,8 +265,7 @@ struct colormap *cm;
end = longest(v, d, begin, estop);
if (end == NULL)
break; /* NOTE BREAK OUT */
- if (v->eflags&REG_MTRACE)
- printf("tentative end %ld\n", (long)OFF(end));
+ MDEBUG(("tentative end %ld\n", (long)OFF(end)));
zapmatches(v->pmatch, v->nmatch);
v->mem = v->mem2;
zapmem(v, v->g->tree);
@@ -309,7 +277,10 @@ struct colormap *cm;
v->pmatch[0].rm_eo = OFF(end);
}
freedfa(d);
+ if (ISERR())
+ return v->err;
return REG_OKAY;
+ break;
case REG_NOMATCH:
/* go around and try again */
if (!usedis) {
@@ -324,11 +295,14 @@ struct colormap *cm;
default:
freedfa(d);
return er;
+ break;
}
}
}
freedfa(d);
+ if (ISERR())
+ return v->err;
return REG_NOMATCH;
}
@@ -343,7 +317,7 @@ size_t n;
{
size_t i;
- for (i = 1; i < n; i++) {
+ for (i = n-1; i > 0; i--) {
p[i].rm_so = -1;
p[i].rm_eo = -1;
}
@@ -399,8 +373,7 @@ chr *end;
if ((size_t)n >= v->nmatch)
return;
- if (v->eflags&REG_MTRACE)
- printf("setting %d\n", n);
+ MDEBUG(("setting %d\n", n));
v->pmatch[n].rm_so = OFF(begin);
v->pmatch[n].rm_eo = OFF(end);
}
@@ -423,8 +396,7 @@ chr *end; /* end of same */
if (rt == NULL)
return REG_OKAY;
- if (v->eflags&REG_MTRACE)
- printf("substring %ld-%ld\n", (long)OFF(begin), (long)OFF(end));
+ MDEBUG(("substring %ld-%ld\n", (long)OFF(begin), (long)OFF(end)));
/* alternatives -- punt to auxiliary */
if (rt->op == '|')
@@ -439,8 +411,7 @@ chr *end; /* end of same */
/* in some cases, there may be no right side... */
if (rt->right.cnfa.nstates == 0) {
- if (v->eflags&REG_MTRACE)
- printf("singleton\n");
+ MDEBUG(("singleton\n"));
if (longest(v, d, begin, end) != end) {
freedfa(d);
return REG_ASSERT;
@@ -466,16 +437,14 @@ chr *end; /* end of same */
freedfa(d2);
return REG_ASSERT;
}
- if (v->eflags&REG_MTRACE)
- printf("tentative midpoint %ld\n", (long)OFF(mid));
+ MDEBUG(("tentative midpoint %ld\n", (long)OFF(mid)));
/* iterate until satisfaction or failure */
while (longest(v, d2, mid, end) != end) {
/* that midpoint didn't work, find a new one */
if (mid == begin) {
/* all possibilities exhausted! */
- if (v->eflags&REG_MTRACE)
- printf("no midpoint!\n");
+ MDEBUG(("no midpoint!\n"));
freedfa(d);
freedfa(d2);
return REG_ASSERT;
@@ -483,19 +452,16 @@ chr *end; /* end of same */
mid = longest(v, d, begin, mid-1);
if (mid == NULL) {
/* failed to find a new one! */
- if (v->eflags&REG_MTRACE)
- printf("failed midpoint!\n");
+ MDEBUG(("failed midpoint!\n"));
freedfa(d);
freedfa(d2);
return REG_ASSERT;
}
- if (v->eflags&REG_MTRACE)
- printf("new midpoint %ld\n", (long)OFF(mid));
+ MDEBUG(("new midpoint %ld\n", (long)OFF(mid)));
}
/* satisfaction */
- if (v->eflags&REG_MTRACE)
- printf("successful\n");
+ MDEBUG(("successful\n"));
freedfa(d);
freedfa(d2);
assert(rt->left.subno >= 0);
@@ -526,15 +492,13 @@ chr *end; /* end of same */
assert(rt->op == '|');
for (i = 0; rt != NULL; rt = rt->next, i++) {
- if (v->eflags&REG_MTRACE)
- printf("trying %dth\n", i);
+ MDEBUG(("trying %dth\n", i));
assert(rt->left.begin != NULL);
d = newdfa(v, &rt->left.cnfa, v->g->cm);
if (ISERR())
return v->err;
if (longest(v, d, begin, end) == end) {
- if (v->eflags&REG_MTRACE)
- printf("success\n");
+ MDEBUG(("success\n"));
freedfa(d);
assert(rt->left.subno >= 0);
subset(v, &rt->left, begin, end);
@@ -565,8 +529,7 @@ chr *end; /* end of same */
if (rt == NULL)
return REG_OKAY;
- if (v->eflags&REG_MTRACE)
- printf("csubstr %ld-%ld\n", (long)OFF(begin), (long)OFF(end));
+ MDEBUG(("csubstr %ld-%ld\n", (long)OFF(begin), (long)OFF(end)));
/* punt various cases to auxiliaries */
if (rt->op == '|') /* alternatives */
@@ -590,8 +553,7 @@ chr *end; /* end of same */
freedfa(d);
return v->err;
}
- if (v->eflags&REG_MTRACE)
- printf("cconcat %d\n", rt->no);
+ MDEBUG(("cconcat %d\n", rt->no));
/* pick a tentative midpoint */
if (v->mem[rt->no] == 0) {
@@ -601,14 +563,12 @@ chr *end; /* end of same */
freedfa(d2);
return REG_NOMATCH;
}
- if (v->eflags&REG_MTRACE)
- printf("tentative midpoint %ld\n", (long)OFF(mid));
+ MDEBUG(("tentative midpoint %ld\n", (long)OFF(mid)));
subset(v, &rt->left, begin, mid);
v->mem[rt->no] = (mid - begin) + 1;
} else {
mid = begin + (v->mem[rt->no] - 1);
- if (v->eflags&REG_MTRACE)
- printf("working midpoint %ld\n", (long)OFF(mid));
+ MDEBUG(("working midpoint %ld\n", (long)OFF(mid)));
}
/* iterate until satisfaction or failure */
@@ -628,8 +588,7 @@ chr *end; /* end of same */
/* that midpoint didn't work, find a new one */
if (mid == begin) {
/* all possibilities exhausted */
- if (v->eflags&REG_MTRACE)
- printf("%d no midpoint\n", rt->no);
+ MDEBUG(("%d no midpoint\n", rt->no));
freedfa(d);
freedfa(d2);
return REG_NOMATCH;
@@ -637,15 +596,12 @@ chr *end; /* end of same */
mid = longest(v, d, begin, mid-1);
if (mid == NULL) {
/* failed to find a new one */
- if (v->eflags&REG_MTRACE)
- printf("%d failed midpoint\n", rt->no);
+ MDEBUG(("%d failed midpoint\n", rt->no));
freedfa(d);
freedfa(d2);
return REG_NOMATCH;
}
- if (v->eflags&REG_MTRACE)
- printf("%d: new midpoint %ld\n", rt->no,
- (long)OFF(mid));
+ MDEBUG(("%d: new midpoint %ld\n", rt->no, (long)OFF(mid)));
subset(v, &rt->left, begin, mid);
v->mem[rt->no] = (mid - begin) + 1;
zapmem(v, rt->left.tree);
@@ -653,8 +609,7 @@ chr *end; /* end of same */
}
/* satisfaction */
- if (v->eflags&REG_MTRACE)
- printf("successful\n");
+ MDEBUG(("successful\n"));
freedfa(d);
freedfa(d2);
subset(v, &rt->right, mid, end);
@@ -694,8 +649,7 @@ chr *end; /* end of same */
freedfa(d);
return v->err;
}
- if (v->eflags&REG_MTRACE)
- printf("crev %d\n", rt->no);
+ MDEBUG(("crev %d\n", rt->no));
/* pick a tentative midpoint */
if (v->mem[rt->no] == 0) {
@@ -705,14 +659,12 @@ chr *end; /* end of same */
freedfa(d2);
return REG_NOMATCH;
}
- if (v->eflags&REG_MTRACE)
- printf("tentative midpoint %ld\n", (long)OFF(mid));
+ MDEBUG(("tentative midpoint %ld\n", (long)OFF(mid)));
subset(v, &rt->left, begin, mid);
v->mem[rt->no] = (mid - begin) + 1;
} else {
mid = begin + (v->mem[rt->no] - 1);
- if (v->eflags&REG_MTRACE)
- printf("working midpoint %ld\n", (long)OFF(mid));
+ MDEBUG(("working midpoint %ld\n", (long)OFF(mid)));
}
/* iterate until satisfaction or failure */
@@ -732,8 +684,7 @@ chr *end; /* end of same */
/* that midpoint didn't work, find a new one */
if (mid == end) {
/* all possibilities exhausted */
- if (v->eflags&REG_MTRACE)
- printf("%d no midpoint\n", rt->no);
+ MDEBUG(("%d no midpoint\n", rt->no));
freedfa(d);
freedfa(d2);
return REG_NOMATCH;
@@ -741,15 +692,12 @@ chr *end; /* end of same */
mid = shortest(v, d, begin, mid+1, end);
if (mid == NULL) {
/* failed to find a new one */
- if (v->eflags&REG_MTRACE)
- printf("%d failed midpoint\n", rt->no);
+ MDEBUG(("%d failed midpoint\n", rt->no));
freedfa(d);
freedfa(d2);
return REG_NOMATCH;
}
- if (v->eflags&REG_MTRACE)
- printf("%d: new midpoint %ld\n", rt->no,
- (long)OFF(mid));
+ MDEBUG(("%d: new midpoint %ld\n", rt->no, (long)OFF(mid)));
subset(v, &rt->left, begin, mid);
v->mem[rt->no] = (mid - begin) + 1;
zapmem(v, rt->left.tree);
@@ -757,8 +705,7 @@ chr *end; /* end of same */
}
/* satisfaction */
- if (v->eflags&REG_MTRACE)
- printf("successful\n");
+ MDEBUG(("successful\n"));
freedfa(d);
freedfa(d2);
subset(v, &rt->right, mid, end);
@@ -782,8 +729,7 @@ chr *end; /* end of same */
assert(rt != NULL);
assert(rt->op == ',');
assert(rt->right.cnfa.nstates == 0);
- if (v->eflags&REG_MTRACE)
- printf("csingleton %d\n", rt->no);
+ MDEBUG(("csingleton %d\n", rt->no));
assert(rt->left.cnfa.nstates > 0);
@@ -796,8 +742,7 @@ chr *end; /* end of same */
}
freedfa(d);
v->mem[rt->no] = 1;
- if (v->eflags&REG_MTRACE)
- printf("csingleton matched\n");
+ MDEBUG(("csingleton matched\n"));
}
er = cdissect(v, rt->left.tree, begin, end);
@@ -830,10 +775,10 @@ chr *end; /* end of same */
assert(rt != NULL);
assert(rt->op == 'b');
assert(rt->right.cnfa.nstates == 0);
+ assert(n >= 0);
assert((size_t)n < v->nmatch);
- if (v->eflags&REG_MTRACE)
- printf("cbackref n%d %d{%d-%d}\n", rt->no, n, min, max);
+ MDEBUG(("cbackref n%d %d{%d-%d}\n", rt->no, n, min, max));
if (v->pmatch[n].rm_so == -1)
return REG_NOMATCH;
@@ -853,6 +798,7 @@ chr *end; /* end of same */
}
/* and too-short string */
+ assert(end >= begin);
if ((size_t)(end - begin) < len)
return REG_NOMATCH;
stop = end - len;
@@ -864,8 +810,7 @@ chr *end; /* end of same */
break;
i++;
}
- if (v->eflags&REG_MTRACE)
- printf("cbackref found %d\n", i);
+ MDEBUG(("cbackref found %d\n", i));
/* and sort it out */
if (p != end) /* didn't consume all of it */
@@ -898,8 +843,7 @@ chr *end; /* end of same */
if (v->mem[rt->no] == TRIED)
return caltdissect(v, rt->next, begin, end);
- if (v->eflags&REG_MTRACE)
- printf("calt n%d\n", rt->no);
+ MDEBUG(("calt n%d\n", rt->no));
assert(rt->left.begin != NULL);
if (v->mem[rt->no] == UNTRIED) {
@@ -912,8 +856,7 @@ chr *end; /* end of same */
return caltdissect(v, rt->next, begin, end);
}
freedfa(d);
- if (v->eflags&REG_MTRACE)
- printf("calt matched\n");
+ MDEBUG(("calt matched\n"));
v->mem[rt->no] = TRYING;
}
@@ -949,8 +892,7 @@ chr *end; /* end of same */
if (rt == NULL)
return begin;
- if (v->eflags&REG_MTRACE)
- printf("dsubstr %ld-%ld\n", (long)OFF(begin), (long)OFF(end));
+ MDEBUG(("dsubstr %ld-%ld\n", (long)OFF(begin), (long)OFF(end)));
/* punt various cases to auxiliaries */
if (rt->right.cnfa.nstates == 0) /* no RHS */
@@ -970,8 +912,7 @@ chr *end; /* end of same */
freedfa(d);
return NULL;
}
- if (v->eflags&REG_MTRACE)
- printf("dconcat %d\n", rt->no);
+ MDEBUG(("dconcat %d\n", rt->no));
/* pick a tentative midpoint */
if (v->mem[rt->no] == 0) {
@@ -981,13 +922,11 @@ chr *end; /* end of same */
freedfa(d2);
return NULL;
}
- if (v->eflags&REG_MTRACE)
- printf("tentative midpoint %ld\n", (long)OFF(mid));
+ MDEBUG(("tentative midpoint %ld\n", (long)OFF(mid)));
v->mem[rt->no] = (mid - begin) + 1;
} else {
mid = begin + (v->mem[rt->no] - 1);
- if (v->eflags&REG_MTRACE)
- printf("working midpoint %ld\n", (long)OFF(mid));
+ MDEBUG(("working midpoint %ld\n", (long)OFF(mid)));
}
/* iterate until satisfaction or failure */
@@ -1010,8 +949,7 @@ chr *end; /* end of same */
/* that midpoint didn't work, find a new one */
if (mid == begin) {
/* all possibilities exhausted */
- if (v->eflags&REG_MTRACE)
- printf("%d no midpoint\n", rt->no);
+ MDEBUG(("%d no midpoint\n", rt->no));
freedfa(d);
freedfa(d2);
return NULL;
@@ -1019,22 +957,18 @@ chr *end; /* end of same */
mid = longest(v, d, begin, mid-1);
if (mid == NULL) {
/* failed to find a new one */
- if (v->eflags&REG_MTRACE)
- printf("%d failed midpoint\n", rt->no);
+ MDEBUG(("%d failed midpoint\n", rt->no));
freedfa(d);
freedfa(d2);
return NULL;
}
- if (v->eflags&REG_MTRACE)
- printf("%d: new midpoint %ld\n", rt->no,
- (long)OFF(mid));
+ MDEBUG(("%d: new midpoint %ld\n", rt->no, (long)OFF(mid)));
v->mem[rt->no] = (mid - begin) + 1;
zapmem(v, rt->right.tree);
}
/* satisfaction */
- if (v->eflags&REG_MTRACE)
- printf("successful\n");
+ MDEBUG(("successful\n"));
freedfa(d);
freedfa(d2);
return ret;
@@ -1060,8 +994,7 @@ chr *end; /* end of same */
if (rt == NULL)
return begin;
- if (v->eflags&REG_MTRACE)
- printf("rsubstr %ld-%ld\n", (long)OFF(begin), (long)OFF(end));
+ MDEBUG(("rsubstr %ld-%ld\n", (long)OFF(begin), (long)OFF(end)));
/* concatenation -- need to split the substring between parts */
assert(rt->op == ',');
@@ -1075,8 +1008,7 @@ chr *end; /* end of same */
freedfa(d);
return NULL;
}
- if (v->eflags&REG_MTRACE)
- printf("dconcat %d\n", rt->no);
+ MDEBUG(("dconcat %d\n", rt->no));
/* pick a tentative midpoint */
if (v->mem[rt->no] == 0) {
@@ -1086,13 +1018,11 @@ chr *end; /* end of same */
freedfa(d2);
return NULL;
}
- if (v->eflags&REG_MTRACE)
- printf("tentative midpoint %ld\n", (long)OFF(mid));
+ MDEBUG(("tentative midpoint %ld\n", (long)OFF(mid)));
v->mem[rt->no] = (mid - begin) + 1;
} else {
mid = begin + (v->mem[rt->no] - 1);
- if (v->eflags&REG_MTRACE)
- printf("working midpoint %ld\n", (long)OFF(mid));
+ MDEBUG(("working midpoint %ld\n", (long)OFF(mid)));
}
/* iterate until satisfaction or failure */
@@ -1115,8 +1045,7 @@ chr *end; /* end of same */
/* that midpoint didn't work, find a new one */
if (mid == end) {
/* all possibilities exhausted */
- if (v->eflags&REG_MTRACE)
- printf("%d no midpoint\n", rt->no);
+ MDEBUG(("%d no midpoint\n", rt->no));
freedfa(d);
freedfa(d2);
return NULL;
@@ -1124,22 +1053,18 @@ chr *end; /* end of same */
mid = shortest(v, d, begin, mid+1, end);
if (mid == NULL) {
/* failed to find a new one */
- if (v->eflags&REG_MTRACE)
- printf("%d failed midpoint\n", rt->no);
+ MDEBUG(("%d failed midpoint\n", rt->no));
freedfa(d);
freedfa(d2);
return NULL;
}
- if (v->eflags&REG_MTRACE)
- printf("%d: new midpoint %ld\n", rt->no,
- (long)OFF(mid));
+ MDEBUG(("%d: new midpoint %ld\n", rt->no, (long)OFF(mid)));
v->mem[rt->no] = (mid - begin) + 1;
zapmem(v, rt->right.tree);
}
/* satisfaction */
- if (v->eflags&REG_MTRACE)
- printf("successful\n");
+ MDEBUG(("successful\n"));
freedfa(d);
freedfa(d2);
return ret;
@@ -1162,8 +1087,7 @@ chr *end; /* end of same */
assert(rt != NULL);
assert(rt->op == ',');
assert(rt->right.cnfa.nstates == 0);
- if (v->eflags&REG_MTRACE)
- printf("dsingleton %d\n", rt->no);
+ MDEBUG(("dsingleton %d\n", rt->no));
assert(rt->left.cnfa.nstates > 0);
@@ -1180,8 +1104,8 @@ chr *end; /* end of same */
else
ret = shortest(v, d, begin, begin, end);
freedfa(d);
- if (ret != NULL && (v->eflags&REG_MTRACE))
- printf("dsingleton matched\n");
+ if (ret != NULL)
+ MDEBUG(("dsingleton matched\n"));
return ret;
}
@@ -1210,18 +1134,15 @@ chr *stop; /* match must end at or before here */
cp = start;
/* startup */
- if (v->eflags&REG_FTRACE)
- printf("+++ startup +++\n");
+ FDEBUG(("+++ startup +++\n"));
if (cp == v->start) {
co = d->cnfa->bos[(v->eflags&REG_NOTBOL) ? 0 : 1];
- if (v->eflags&REG_FTRACE)
- printf("color %ld\n", (long)co);
+ FDEBUG(("color %ld\n", (long)co));
} else {
- co = getcolor(cm, *(cp - 1));
- if (v->eflags&REG_FTRACE)
- printf("char %c, color %ld\n", (char)*(cp-1), (long)co);
+ co = GETCOLOR(cm, *(cp - 1));
+ FDEBUG(("char %c, color %ld\n", (char)*(cp-1), (long)co));
}
- css = miss(v, d, css, co, cp);
+ css = miss(v, d, css, co, cp, start);
if (css == NULL)
return NULL;
css->lastseen = cp;
@@ -1229,12 +1150,12 @@ chr *stop; /* match must end at or before here */
/* main loop */
if (v->eflags&REG_FTRACE)
while (cp < realstop) {
- printf("+++ at c%d +++\n", css - d->ssets);
- co = getcolor(cm, *cp);
- printf("char %c, color %ld\n", (char)*cp, (long)co);
+ FDEBUG(("+++ at c%d +++\n", css - d->ssets));
+ co = GETCOLOR(cm, *cp);
+ FDEBUG(("char %c, color %ld\n", (char)*cp, (long)co));
ss = css->outs[co];
if (ss == NULL) {
- ss = miss(v, d, css, co, cp);
+ ss = miss(v, d, css, co, cp+1, start);
if (ss == NULL)
break; /* NOTE BREAK OUT */
}
@@ -1244,10 +1165,10 @@ chr *stop; /* match must end at or before here */
}
else
while (cp < realstop) {
- co = getcolor(cm, *cp);
+ co = GETCOLOR(cm, *cp);
ss = css->outs[co];
if (ss == NULL) {
- ss = miss(v, d, css, co, cp+1);
+ ss = miss(v, d, css, co, cp+1, start);
if (ss == NULL)
break; /* NOTE BREAK OUT */
}
@@ -1257,13 +1178,11 @@ chr *stop; /* match must end at or before here */
}
/* shutdown */
- if (v->eflags&REG_FTRACE)
- printf("+++ shutdown at c%d +++\n", css - d->ssets);
+ FDEBUG(("+++ shutdown at c%d +++\n", css - d->ssets));
if (cp == v->stop && stop == v->stop) {
co = d->cnfa->eos[(v->eflags&REG_NOTEOL) ? 0 : 1];
- if (v->eflags&REG_FTRACE)
- printf("color %ld\n", (long)co);
- ss = miss(v, d, css, co, cp);
+ FDEBUG(("color %ld\n", (long)co));
+ ss = miss(v, d, css, co, cp, start);
/* special case: match ended at eol? */
if (ss != NULL && (ss->flags&POSTSTATE))
return cp;
@@ -1300,7 +1219,7 @@ chr *max; /* match must end at or before here */
chr *realmax = (max == v->stop) ? max : max + 1;
color co;
struct sset *css;
- struct sset *ss = NULL;
+ struct sset *ss;
struct colormap *cm = d->cm;
/* initialize */
@@ -1308,31 +1227,29 @@ chr *max; /* match must end at or before here */
cp = start;
/* startup */
- if (v->eflags&REG_FTRACE)
- printf("--- startup ---\n");
+ FDEBUG(("--- startup ---\n"));
if (cp == v->start) {
co = d->cnfa->bos[(v->eflags&REG_NOTBOL) ? 0 : 1];
- if (v->eflags&REG_FTRACE)
- printf("color %ld\n", (long)co);
+ FDEBUG(("color %ld\n", (long)co));
} else {
- co = getcolor(cm, *(cp - 1));
- if (v->eflags&REG_FTRACE)
- printf("char %c, color %ld\n", (char)*(cp-1), (long)co);
+ co = GETCOLOR(cm, *(cp - 1));
+ FDEBUG(("char %c, color %ld\n", (char)*(cp-1), (long)co));
}
- css = miss(v, d, css, co, cp);
+ css = miss(v, d, css, co, cp, start);
if (css == NULL)
return NULL;
css->lastseen = cp;
+ ss = css;
/* main loop */
if (v->eflags&REG_FTRACE)
while (cp < realmax) {
- printf("--- at c%d ---\n", css - d->ssets);
- co = getcolor(cm, *cp);
- printf("char %c, color %ld\n", (char)*cp, (long)co);
+ FDEBUG(("--- at c%d ---\n", css - d->ssets));
+ co = GETCOLOR(cm, *cp);
+ FDEBUG(("char %c, color %ld\n", (char)*cp, (long)co));
ss = css->outs[co];
if (ss == NULL) {
- ss = miss(v, d, css, co, cp);
+ ss = miss(v, d, css, co, cp+1, start);
if (ss == NULL)
break; /* NOTE BREAK OUT */
}
@@ -1344,10 +1261,10 @@ chr *max; /* match must end at or before here */
}
else
while (cp < realmax) {
- co = getcolor(cm, *cp);
+ co = GETCOLOR(cm, *cp);
ss = css->outs[co];
if (ss == NULL) {
- ss = miss(v, d, css, co, cp+1);
+ ss = miss(v, d, css, co, cp+1, start);
if (ss == NULL)
break; /* NOTE BREAK OUT */
}
@@ -1366,13 +1283,11 @@ chr *max; /* match must end at or before here */
}
/* shutdown */
- if (v->eflags&REG_FTRACE)
- printf("--- shutdown at c%d ---\n", css - d->ssets);
+ FDEBUG(("--- shutdown at c%d ---\n", css - d->ssets));
if (cp == v->stop && max == v->stop) {
co = d->cnfa->eos[(v->eflags&REG_NOTEOL) ? 0 : 1];
- if (v->eflags&REG_FTRACE)
- printf("color %ld\n", (long)co);
- ss = miss(v, d, css, co, cp);
+ FDEBUG(("color %ld\n", (long)co));
+ ss = miss(v, d, css, co, cp, start);
/* special case: match ended at eol? */
if (ss != NULL && (ss->flags&POSTSTATE))
return cp;
@@ -1392,7 +1307,7 @@ struct vars *v;
struct cnfa *cnfa;
struct colormap *cm;
{
- struct dfa *d = (struct dfa *)ckalloc(sizeof(struct dfa));
+ struct dfa *d = (struct dfa *)MALLOC(sizeof(struct dfa));
int wordsper = (cnfa->nstates + UBITS - 1) / UBITS;
struct sset *ss;
int i;
@@ -1403,13 +1318,13 @@ struct colormap *cm;
return NULL;
}
- d->ssets = (struct sset *)ckalloc(CACHE * sizeof(struct sset));
- d->statesarea = (unsigned *)ckalloc((CACHE+WORK) * wordsper *
+ d->ssets = (struct sset *)MALLOC(CACHE * sizeof(struct sset));
+ d->statesarea = (unsigned *)MALLOC((CACHE+WORK) * wordsper *
sizeof(unsigned));
d->work = &d->statesarea[CACHE * wordsper];
- d->outsarea = (struct sset **)ckalloc(CACHE * cnfa->ncolors *
+ d->outsarea = (struct sset **)MALLOC(CACHE * cnfa->ncolors *
sizeof(struct sset *));
- d->incarea = (struct arcp *)ckalloc(CACHE * cnfa->ncolors *
+ d->incarea = (struct arcp *)MALLOC(CACHE * cnfa->ncolors *
sizeof(struct arcp));
if (d->ssets == NULL || d->statesarea == NULL || d->outsarea == NULL ||
d->incarea == NULL) {
@@ -1426,6 +1341,7 @@ struct colormap *cm;
d->cnfa = cnfa;
d->cm = cm;
d->lastpost = NULL;
+ d->search = d->ssets;
for (ss = d->ssets, i = 0; i < d->nssets; ss++, i++) {
/* initialization of most fields is done as needed */
@@ -1446,14 +1362,14 @@ freedfa(d)
struct dfa *d;
{
if (d->ssets != NULL)
- ckfree((char *)d->ssets);
+ FREE(d->ssets);
if (d->statesarea != NULL)
- ckfree((char *)d->statesarea);
+ FREE(d->statesarea);
if (d->outsarea != NULL)
- ckfree((char *)d->outsarea);
+ FREE(d->outsarea);
if (d->incarea != NULL)
- ckfree((char *)d->incarea);
- ckfree((char *)d);
+ FREE(d->incarea);
+ FREE(d);
}
/*
@@ -1492,7 +1408,7 @@ chr *start;
if (d->nssused > 0 && (d->ssets[0].flags&STARTER))
ss = &d->ssets[0];
else { /* no, must (re)build it */
- ss = getvacant(v, d);
+ ss = getvacant(v, d, start, start);
for (i = 0; i < d->wordsper; i++)
ss->states[i] = 0;
BSET(ss->states, d->cnfa->pre);
@@ -1512,15 +1428,16 @@ chr *start;
/*
- miss - handle a cache miss
^ static struct sset *miss(struct vars *, struct dfa *, struct sset *,
- ^ pcolor, chr *);
+ ^ pcolor, chr *, chr *);
*/
static struct sset * /* NULL if goes to empty set */
-miss(v, d, css, co, cp)
+miss(v, d, css, co, cp, start)
struct vars *v; /* used only for debug flags */
struct dfa *d;
struct sset *css;
pcolor co;
chr *cp; /* next chr */
+chr *start; /* where the attempt got started */
{
struct cnfa *cnfa = d->cnfa;
int i;
@@ -1534,12 +1451,10 @@ chr *cp; /* next chr */
/* for convenience, we can be called even if it might not be a miss */
if (css->outs[co] != NULL) {
- if (v->eflags&REG_FTRACE)
- printf("hit\n");
+ FDEBUG(("hit\n"));
return css->outs[co];
}
- if (v->eflags&REG_FTRACE)
- printf("miss\n");
+ FDEBUG(("miss\n"));
/* first, what set of states would we end up in? */
for (i = 0; i < d->wordsper; i++)
@@ -1554,10 +1469,9 @@ chr *cp; /* next chr */
gotstate = 1;
if (ca->to == cnfa->post)
ispost = 1;
- if (v->eflags&REG_FTRACE)
- printf("%d -> %d\n", i, ca->to);
+ FDEBUG(("%d -> %d\n", i, ca->to));
}
- dolacons = (gotstate) ? cnfa->haslacons : 0;
+ dolacons = (gotstate) ? (cnfa->flags&HASLACONS) : 0;
didlacons = 0;
while (dolacons) { /* transitive closure */
dolacons = 0;
@@ -1574,9 +1488,7 @@ chr *cp; /* next chr */
didlacons = 1;
if (ca->to == cnfa->post)
ispost = 1;
- if (v->eflags&REG_FTRACE)
- printf("%d :-> %d\n",
- i, ca->to);
+ FDEBUG(("%d :> %d\n",i,ca->to));
}
}
if (!gotstate)
@@ -1585,14 +1497,13 @@ chr *cp; /* next chr */
/* next, is that in the cache? */
for (p = d->ssets, i = d->nssused; i > 0; p++, i--)
- if (p->hash == h && memcmp((VOID *)d->work, (VOID *)p->states,
+ if (p->hash == h && memcmp(VS(d->work), VS(p->states),
d->wordsper*sizeof(unsigned)) == 0) {
- if (v->eflags&REG_FTRACE)
- printf("cached c%d\n", p - d->ssets);
+ FDEBUG(("cached c%d\n", p - d->ssets));
break; /* NOTE BREAK OUT */
}
if (i == 0) { /* nope, need a new cache entry */
- p = getvacant(v, d);
+ p = getvacant(v, d, cp, start);
assert(p != css);
for (i = 0; i < d->wordsper; i++)
p->states[i] = d->work[i];
@@ -1605,7 +1516,7 @@ chr *cp; /* next chr */
css->outs[co] = p;
css->inchain[co] = p->ins;
p->ins.ss = css;
- p->ins.co = (color) co;
+ p->ins.co = (color)co;
}
return p;
}
@@ -1615,10 +1526,10 @@ chr *cp; /* next chr */
^ static int lacon(struct vars *, struct cnfa *, chr *, pcolor);
*/
static int /* predicate: constraint satisfied? */
-lacon(v, pcnfa, precp, co)
+lacon(v, pcnfa, cp, co)
struct vars *v;
struct cnfa *pcnfa; /* parent cnfa */
-chr *precp; /* points to previous chr */
+chr *cp;
pcolor co; /* "color" of the lookahead constraint */
{
int n;
@@ -1628,18 +1539,16 @@ pcolor co; /* "color" of the lookahead constraint */
n = co - pcnfa->ncolors;
assert(n < v->g->nlacons && v->g->lacons != NULL);
- if (v->eflags&REG_FTRACE)
- printf("=== testing lacon %d\n", n);
+ FDEBUG(("=== testing lacon %d\n", n));
sub = &v->g->lacons[n];
d = newdfa(v, &sub->cnfa, v->g->cm);
if (d == NULL) {
ERR(REG_ESPACE);
return 0;
}
- end = longest(v, d, precp, v->stop);
+ end = longest(v, d, cp, v->stop);
freedfa(d);
- if (v->eflags&REG_FTRACE)
- printf("=== lacon %d match %d\n", n, (end != NULL));
+ FDEBUG(("=== lacon %d match %d\n", n, (end != NULL)));
return (sub->subno) ? (end != NULL) : (end == NULL);
}
@@ -1647,12 +1556,14 @@ pcolor co; /* "color" of the lookahead constraint */
- getvacant - get a vacant state set
* This routine clears out the inarcs and outarcs, but does not otherwise
* clear the innards of the state set -- that's up to the caller.
- ^ static struct sset *getvacant(struct vars *, struct dfa *);
+ ^ static struct sset *getvacant(struct vars *, struct dfa *, chr *, chr *);
*/
static struct sset *
-getvacant(v, d)
+getvacant(v, d, cp, start)
struct vars *v; /* used only for debug flags */
struct dfa *d;
+chr *cp;
+chr *start;
{
int i;
struct sset *ss;
@@ -1661,15 +1572,14 @@ struct dfa *d;
struct arcp lastap;
color co;
- ss = pickss(v, d);
+ ss = pickss(v, d, cp, start);
+ assert(!(ss->flags&LOCKED));
/* clear out its inarcs, including self-referential ones */
ap = ss->ins;
while ((p = ap.ss) != NULL) {
co = ap.co;
- if (v->eflags&REG_FTRACE)
- printf("zapping c%d's %ld outarc\n", p - d->ssets,
- (long)co);
+ FDEBUG(("zapping c%d's %ld outarc\n", p - d->ssets, (long)co));
p->outs[co] = NULL;
ap = p->inchain[co];
p->inchain[co].ss = NULL; /* paranoia */
@@ -1682,9 +1592,7 @@ struct dfa *d;
assert(p != ss); /* not self-referential */
if (p == NULL)
continue; /* NOTE CONTINUE */
- if (v->eflags&REG_FTRACE)
- printf("deleting outarc %d from c%d's inarc chain\n",
- i, p - d->ssets);
+ FDEBUG(("del outarc %d from c%d's in chn\n", i, p - d->ssets));
if (p->ins.ss == ss && p->ins.co == i)
p->ins = ss->inchain[i];
else {
@@ -1710,23 +1618,25 @@ struct dfa *d;
/*
- pickss - pick the next stateset to be used
- ^ static struct sset *pickss(struct vars *, struct dfa *);
+ ^ static struct sset *pickss(struct vars *, struct dfa *, chr *, chr *);
*/
static struct sset *
-pickss(v, d)
+pickss(v, d, cp, start)
struct vars *v; /* used only for debug flags */
struct dfa *d;
+chr *cp;
+chr *start;
{
int i;
struct sset *ss;
- struct sset *oldest;
+ struct sset *end;
+ chr *ancient;
/* shortcut for cases where cache isn't full */
if (d->nssused < d->nssets) {
ss = &d->ssets[d->nssused];
d->nssused++;
- if (v->eflags&REG_FTRACE)
- printf("new c%d\n", ss - d->ssets);
+ FDEBUG(("new c%d\n", ss - d->ssets));
/* must make innards consistent */
ss->ins.ss = NULL;
for (i = 0; i < d->ncolors; i++) {
@@ -1734,21 +1644,32 @@ struct dfa *d;
ss->inchain[i].ss = NULL;
}
ss->flags = 0;
- ss->ins.co = 0;
return ss;
}
- /* look for oldest */
- oldest = d->ssets;
- for (ss = d->ssets, i = d->nssets; i > 0; ss++, i--) {
- if (ss->lastseen != oldest->lastseen && (ss->lastseen == NULL ||
- ss->lastseen < oldest->lastseen))
- oldest = ss;
- }
- if (v->eflags&REG_FTRACE)
- printf("replacing c%d\n", oldest - d->ssets);
- return oldest;
-}
+ /* look for oldest, or old enough anyway */
+ if (cp - start > d->nssets*3/4) /* oldest 25% are expendable */
+ ancient = cp - d->nssets*3/4;
+ else
+ ancient = start;
+ for (ss = d->search, end = &d->ssets[d->nssets]; ss < end; ss++)
+ if ((ss->lastseen == NULL || ss->lastseen < ancient) &&
+ !(ss->flags&LOCKED)) {
+ d->search = ss + 1;
+ FDEBUG(("replacing c%d\n", ss - d->ssets));
+ return ss;
+ }
+ for (ss = d->ssets, end = d->search; ss < end; ss++)
+ if ((ss->lastseen == NULL || ss->lastseen < ancient) &&
+ !(ss->flags&LOCKED)) {
+ d->search = ss + 1;
+ FDEBUG(("replacing c%d\n", ss - d->ssets));
+ return ss;
+ }
-#define EXEC 1
-#include "color.c"
+ /* nobody's old enough?!? -- something's really wrong */
+ FDEBUG(("can't find victim to replace!\n"));
+ assert(NOTREACHED);
+ ERR(REG_ASSERT);
+ return d->ssets;
+}
diff --git a/generic/regfree.c b/generic/regfree.c
new file mode 100644
index 0000000..a5c3f0b
--- /dev/null
+++ b/generic/regfree.c
@@ -0,0 +1,25 @@
+/*
+ * regfree - free an RE
+ *
+ * You might think that this could be incorporated into regcomp.c, and
+ * that would be a reasonable idea... except that this is a generic
+ * function (with a generic name), applicable to all compiled REs
+ * regardless of the size of their characters, whereas the stuff in
+ * regcomp.c gets compiled once per character size.
+ */
+
+#include "regguts.h"
+
+/*
+ - regfree - free an RE (generic function, punts to RE-specific function)
+ *
+ * Ignoring invocation with NULL is a convenience.
+ */
+VOID
+regfree(re)
+regex_t *re;
+{
+ if (re == NULL)
+ return;
+ (*((struct fns *)re->re_fns)->free)(re);
+}
diff --git a/generic/regfronts.c b/generic/regfronts.c
new file mode 100644
index 0000000..a9bd556
--- /dev/null
+++ b/generic/regfronts.c
@@ -0,0 +1,56 @@
+/*
+ * regcomp and regexec - front ends to re_ routines
+ *
+ * Mostly for implementation of backward-compatibility kludges. Note
+ * that these routines exist ONLY in char versions.
+ */
+
+#include "regguts.h"
+
+/*
+ - regcomp - compile regular expression
+ */
+int
+regcomp(re, str, flags)
+regex_t *re;
+CONST char *str;
+int flags;
+{
+ size_t len;
+ int f = flags;
+
+ if (f&REG_PEND) {
+ len = re->re_endp - str;
+ f &= ~REG_PEND;
+ } else
+ len = strlen(str);
+
+ return re_comp(re, str, len, f);
+}
+
+/*
+ - regexec - execute regular expression
+ */
+int
+regexec(re, str, nmatch, pmatch, flags)
+regex_t *re;
+CONST char *str;
+size_t nmatch;
+regmatch_t pmatch[];
+int flags;
+{
+ CONST char *start;
+ size_t len;
+ int f = flags;
+
+ if (f&REG_STARTEND) {
+ start = str + pmatch[0].rm_so;
+ len = pmatch[0].rm_eo - pmatch[0].rm_so;
+ f &= ~REG_STARTEND;
+ } else {
+ start = str;
+ len = strlen(str);
+ }
+
+ return re_exec(re, start, len, nmatch, pmatch, f);
+}
diff --git a/generic/guts.h b/generic/regguts.h
index 7b847ac..1490d44 100644
--- a/generic/guts.h
+++ b/generic/regguts.h
@@ -1,54 +1,119 @@
/*
- * guts.h --
- *
- * Regexp package file: Misc. utilities.
- *
- * Copyright (c) 1998 Henry Spencer. All rights reserved.
- *
- * Development of this software was funded, in part, by Cray Research Inc.,
- * UUNET Communications Services Inc., and Sun Microsystems Inc., none of
- * whom are responsible for the results. The author thanks all of them.
- *
- * Redistribution and use in source and binary forms -- with or without
- * modification -- are permitted for any purpose, provided that
- * redistributions in source form retain this entire copyright notice and
- * indicate the origin and nature of any modifications.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
- * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
- * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
- * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
- * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
- * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
- * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
- * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * Copyright (c) 1998 by Sun Microsystems, Inc.
- *
- * See the file "license.terms" for information on usage and redistribution
- * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
- *
- * RCS: @(#) $Id: guts.h,v 1.1.2.2 1998/10/03 01:56:40 stanton Exp $
+ * Internal interface definitions, etc., for the regex package
*/
-#include "tclInt.h"
-#define NOTREACHED 0
-#define xxx 1
+/*
+ * Environmental customization. It should not (I hope) be necessary to
+ * alter the file you are now reading -- regcustom.h should handle it all,
+ * given care here and elsewhere.
+ */
+#include "regcustom.h"
+
+
+
+/*
+ * Things that regcustom.h might override.
+ */
+
+/* standard header files (NULL is a reasonable indicator for them) */
+#ifndef NULL
+#include <stdio.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include <limits.h>
+#include <string.h>
+#endif
+
+/* assertions */
+#ifndef assert
+#include <assert.h>
+#endif
+
+/* voids */
+#ifndef VOID
+#define VOID void /* for function return values */
+#endif
+#ifndef DISCARD
+#define DISCARD VOID /* for throwing values away */
+#endif
+#ifndef PVOID
+#define PVOID VOID * /* generic pointer */
+#endif
+#ifndef VS
+#define VS(x) ((PVOID)(x)) /* cast something to generic ptr */
+#endif
+#ifndef NOPARMS
+#define NOPARMS VOID /* for empty parm lists */
+#endif
+
+/* function-pointer declarator */
+#ifndef FUNCPTR
+#if __STDC__ >= 1
+#define FUNCPTR(name, args) (*name)args
+#else
+#define FUNCPTR(name, args) (*name)()
+#endif
+#endif
+
+/* memory allocation */
+#ifndef MALLOC
+#define MALLOC(n) malloc(n)
+#endif
+#ifndef REALLOC
+#define REALLOC(p, n) realloc(VS(p), n)
+#endif
+#ifndef FREE
+#define FREE(p) free(VS(p))
+#endif
+
+/* want size of a char in bits, and max value in bounded quantifiers */
+#ifndef CHAR_BIT
+#include <limits.h>
+#endif
#ifndef _POSIX2_RE_DUP_MAX
-#define _POSIX2_RE_DUP_MAX 255
+#define _POSIX2_RE_DUP_MAX 255 /* normally from <limits.h> */
#endif
+
+
+
+/*
+ * misc
+ */
+
+#define NOTREACHED 0
+#define xxx 1
+
#define DUPMAX _POSIX2_RE_DUP_MAX
#define INFINITY (DUPMAX+1)
-/* bitmap manipulation */
+#define REMAGIC 0xfed7 /* magic number for main struct */
+
+
+
+/*
+ * debugging facilities
+ */
+#ifdef REG_DEBUG
+#define FDEBUG(arglist) { if (v->eflags&REG_FTRACE) printf arglist; }
+#define MDEBUG(arglist) { if (v->eflags&REG_MTRACE) printf arglist; }
+#else
+#define FDEBUG(arglist) {}
+#define MDEBUG(arglist) {}
+#endif
+
+
+
+/*
+ * bitmap manipulation
+ */
#define UBITS (CHAR_BIT * sizeof(unsigned))
#define BSET(uv, sn) ((uv)[(sn)/UBITS] |= (unsigned)1 << ((sn)%UBITS))
#define ISBSET(uv, sn) ((uv)[(sn)/UBITS] & ((unsigned)1 << ((sn)%UBITS)))
+
+
/*
* Map a truth value into -1 for false, 1 for true. This is so it is
* possible to write compile-time assertions by declaring a dummy array
@@ -56,14 +121,13 @@
*/
#define NEGIFNOT(x) (2*!!(x) - 1) /* !! ensures 0 or 1 */
+
+
/*
* We dissect a chr into byts for colormap table indexing. Here we define
* a byt, which will be the same as a byte on most machines... The exact
* size of a byt is not critical, but about 8 bits is good, and extraction
* of 8-bit chunks is sometimes especially fast.
- *
- * Changes in several places are needed to handle an increase in MAXBYTS.
- * Those places check whether MAXBYTS is larger than they expect.
*/
#ifndef BYTBITS
#define BYTBITS 8 /* bits in a byt */
@@ -71,7 +135,9 @@
#define BYTTAB (1<<BYTBITS) /* size of table with one entry per byt value */
#define BYTMASK (BYTTAB-1) /* bit mask for byt */
#define NBYTS ((CHRBITS+BYTBITS-1)/BYTBITS)
-#define MAXBYTS 8 /* maximum NBYTS the code can handle */
+/* the definition of GETCOLOR(), below, assumes NBYTS <= 4 */
+
+
/*
* As soon as possible, we map chrs into equivalence classes -- "colors" --
@@ -81,22 +147,96 @@ typedef short color; /* colors of characters */
typedef int pcolor; /* what color promotes to */
#define COLORLESS (-1) /* impossible color */
#define WHITE 0 /* default color, parent of all others */
-struct colormap; /* forward def for master type */
+
+
/*
- * Interface definitions for locale-interface functions in locale.c
+ * A colormap is a tree -- more precisely, a DAG -- indexed at each level
+ * by a byt of the chr, to map the chr to a color efficiently. Because
+ * lower sections of the tree can be shared, it can exploit the usual
+ * sparseness of such a mapping table. The final tree is always NBYTS
+ * levels deep (at present it may be shallower during construction, but
+ * it is always "filled" to full depth at the end of that, using pointers
+ * to "fill blocks" which are entirely WHITE in color).
+ */
+
+/* the tree itself */
+struct colors {
+ color ccolor[BYTTAB];
+};
+struct ptrs {
+ union tree *pptr[BYTTAB];
+};
+union tree {
+ struct colors colors;
+ struct ptrs ptrs;
+};
+#define tcolor colors.ccolor
+#define tptr ptrs.pptr
+
+/* internal per-color structure for the color machinery */
+struct colordesc {
+ uchr nchrs; /* number of chars of this color */
+ color sub; /* open subcolor of this one, or NOSUB */
+# define NOSUB COLORLESS
+ struct arc *arcs; /* color chain */
+# define UNUSEDCOLOR(cd) ((cd)->nchrs == 0 && (cd)->sub == NOSUB)
+ int flags;
+# define PSEUDO 1 /* pseudocolor, no real chars */
+};
+
+/* the color map itself */
+struct colormap {
+ int magic;
+# define CMMAGIC 0x876
+ struct vars *v; /* for compile error reporting */
+ color rest;
+ int filled; /* has it been filled? */
+ size_t ncds; /* number of colordescs */
+ struct colordesc *cd;
+# define CDEND(cm) (&(cm)->cd[(cm)->ncds])
+# define NINLINECDS ((size_t)10)
+ struct colordesc cds[NINLINECDS];
+ union tree tree[NBYTS]; /* tree top, plus fill blocks */
+};
+
+/* optimization magic to do fast chr->color mapping */
+#define B0(c) ((c) & BYTMASK)
+#define B1(c) (((c)>>BYTBITS) & BYTMASK)
+#define B2(c) (((c)>>(2*BYTBITS)) & BYTMASK)
+#define B3(c) (((c)>>(3*BYTBITS)) & BYTMASK)
+#if NBYTS == 1
+#define GETCOLOR(cm, c) ((cm)->tree->tcolor[B0(c)])
+#endif
+#if NBYTS == 2
+#define GETCOLOR(cm, c) ((cm)->tree->tptr[B1(c)]->tcolor[B0(c)])
+#endif
+#if NBYTS == 4
+#define GETCOLOR(cm, c) ((cm)->tree->tptr[B3(c)]->tptr[B2(c)]->tptr[B1(c)]->tcolor[B0(c)])
+#endif
+
+
+
+/*
+ * Interface definitions for locale-interface functions in locale.c.
+ * Multi-character collating elements (MCCEs) cause most of the trouble.
*/
struct cvec {
int nchrs; /* number of chrs */
int chrspace; /* number of chrs possible */
chr *chrs; /* pointer to vector of chrs */
- int nces; /* number of multichr collating elements */
- int cespace; /* number of CEs possible */
- int ncechrs; /* number of chrs used for CEs */
- chr *ces[1]; /* pointers to 0-terminated CEs */
+ int nmcces; /* number of MCCEs */
+ int mccespace; /* number of MCCEs possible */
+ int nmccechrs; /* number of chrs used for MCCEs */
+ chr *mcces[1]; /* pointers to 0-terminated MCCEs */
/* and both batches of chrs are on the end */
};
+/* caution: this value cannot be changed easily */
+#define MAXMCCE 2 /* length of longest MCCE */
+
+
+
/*
* definitions for NFA internal representation
*
@@ -147,12 +287,15 @@ struct nfa {
struct state *states; /* state-chain header */
struct state *slast; /* tail of the chain */
struct state *free; /* free list */
+ struct colormap *cm; /* the color map */
color bos[2]; /* colors, if any, assigned to BOS and BOL */
color eos[2]; /* colors, if any, assigned to EOS and EOL */
struct vars *v; /* simplifies compile error reporting */
struct nfa *parent; /* parent NFA, if any */
};
+
+
/*
* definitions for compacted NFA
*/
@@ -164,8 +307,9 @@ struct carc {
struct cnfa {
int nstates; /* number of states */
int ncolors; /* number of colors */
- int haslacons; /* does it use lookahead constraints? */
- int leftanch; /* is it anchored on the left? */
+ int flags;
+# define HASLACONS 01 /* uses lookahead constraints */
+# define LEFTANCH 02 /* anchored on left */
int pre; /* setup state number */
int post; /* teardown state number */
color bos[2]; /* colors, if any, assigned to BOS and BOL */
@@ -176,6 +320,8 @@ struct cnfa {
#define ZAPCNFA(cnfa) ((cnfa).nstates = 0)
#define NULLCNFA(cnfa) ((cnfa).nstates == 0)
+
+
/*
* definitions for subexpression tree
* The intrepid code-reader is hereby warned that the subexpression tree
@@ -198,20 +344,27 @@ struct subre {
struct rtree {
char op; /* operator: '|', ',' */
- short no; /* node numbering */
+ char flags;
+# define INUSE 01 /* in use in the tree */
+ short no; /* index into retry memory */
struct subre left;
struct rtree *next; /* for '|' */
struct subre right; /* for ',' */
+ struct rtree *chain; /* for bookkeeping and error cleanup */
};
+
+
/*
* table of function pointers for generic manipulation functions
* A regex_t's re_fns points to one of these.
*/
struct fns {
- VOID (*free) _ANSI_ARGS_((regex_t *));
+ VOID FUNCPTR(free, (regex_t *));
};
+
+
/*
* the insides of a regex_t, hidden behind a void *
*/
@@ -220,13 +373,12 @@ struct guts {
# define GUTSMAGIC 0xfed9
int cflags; /* copy of compile flags */
int info; /* copy of re_info */
- int nsub; /* copy of re_nsub */
+ size_t nsub; /* copy of re_nsub */
struct cnfa cnfa;
struct rtree *tree;
int ntree;
struct colormap *cm;
- int (*compare) _ANSI_ARGS_((CONST chr *, CONST chr *, size_t));
- /* string-compare function */
+ int FUNCPTR(compare, (CONST chr *, CONST chr *, size_t));
struct subre *lacons; /* lookahead-constraint vector */
int nlacons; /* size of lacons */
int usedshorter; /* used non-greedy quantifiers? */
diff --git a/generic/tclBasic.c b/generic/tclBasic.c
index e8fa7ad..fcc1f93 100644
--- a/generic/tclBasic.c
+++ b/generic/tclBasic.c
@@ -12,7 +12,7 @@
* See the file "license.terms" for information on usage and redistribution
* of this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclBasic.c,v 1.1.2.3 1998/09/30 20:46:21 stanton Exp $
+ * RCS: @(#) $Id: tclBasic.c,v 1.1.2.4 1998/10/21 20:40:02 stanton Exp $
*/
#include "tclInt.h"
@@ -77,6 +77,8 @@ static CmdInfo builtInCmds[] = {
(CompileProc *) NULL, 1},
{"continue", (Tcl_CmdProc *) NULL, Tcl_ContinueObjCmd,
TclCompileContinueCmd, 1},
+ {"encoding", (Tcl_CmdProc *) NULL, Tcl_EncodingObjCmd,
+ (CompileProc *) NULL, 1},
{"error", (Tcl_CmdProc *) NULL, Tcl_ErrorObjCmd,
(CompileProc *) NULL, 1},
{"eval", (Tcl_CmdProc *) NULL, Tcl_EvalObjCmd,
diff --git a/generic/tclCmdAH.c b/generic/tclCmdAH.c
index f17b8fc..54a3046 100644
--- a/generic/tclCmdAH.c
+++ b/generic/tclCmdAH.c
@@ -11,7 +11,7 @@
* See the file "license.terms" for information on usage and redistribution
* of this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclCmdAH.c,v 1.1.2.3 1998/09/28 20:24:18 stanton Exp $
+ * RCS: @(#) $Id: tclCmdAH.c,v 1.1.2.4 1998/10/21 20:40:03 stanton Exp $
*/
#include "tclInt.h"
@@ -343,7 +343,7 @@ Tcl_CdObjCmd(dummy, interp, objc, objv)
* Tcl_ConcatObjCmd --
*
* This object-based procedure is invoked to process the "concat" Tcl
- * command. See the user documentation for details on what it does/
+ * command. See the user documentation for details on what it does.
*
* Results:
* A standard Tcl object result.
@@ -407,6 +407,123 @@ Tcl_ContinueObjCmd(dummy, interp, objc, objv)
/*
*----------------------------------------------------------------------
*
+ * Tcl_EncodingObjCmd --
+ *
+ * This command manipulates encodings.
+ *
+ * Results:
+ * A standard Tcl result.
+ *
+ * Side effects:
+ * See the user documentation.
+ *
+ *----------------------------------------------------------------------
+ */
+
+int
+Tcl_EncodingObjCmd(dummy, interp, objc, objv)
+ ClientData dummy; /* Not used. */
+ Tcl_Interp *interp; /* Current interpreter. */
+ int objc; /* Number of arguments. */
+ Tcl_Obj *CONST objv[]; /* Argument objects. */
+{
+ int index, length;
+ Tcl_Encoding encoding;
+ char *string;
+ Tcl_DString ds;
+ Tcl_Obj *resultPtr;
+
+ static char *optionStrings[] = {
+ "convertfrom", "convertto", "names", "system",
+ NULL
+ };
+ enum options {
+ ENC_CONVERTFROM, ENC_CONVERTTO, ENC_NAMES, ENC_SYSTEM
+ };
+
+ if (objc < 2) {
+ Tcl_WrongNumArgs(interp, 1, objv, "option ?arg ...?");
+ return TCL_ERROR;
+ }
+ if (Tcl_GetIndexFromObj(interp, objv[1], optionStrings, "option", 0,
+ &index) != TCL_OK) {
+ return TCL_ERROR;
+ }
+
+ switch ((enum options) index) {
+ case ENC_CONVERTTO:
+ case ENC_CONVERTFROM: {
+ char *name;
+ Tcl_Obj *data;
+ if (objc == 3) {
+ name = NULL;
+ data = objv[2];
+ } else if (objc == 4) {
+ name = Tcl_GetString(objv[2]);
+ data = objv[3];
+ } else {
+ Tcl_WrongNumArgs(interp, 2, objv, "?encoding? data");
+ return TCL_ERROR;
+ }
+
+ encoding = Tcl_GetEncoding(interp, name);
+ if (!encoding) {
+ return TCL_ERROR;
+ }
+
+ if ((enum options) index == ENC_CONVERTFROM) {
+ /*
+ * Treat the string as binary data.
+ */
+
+ string = (char *) Tcl_GetByteArrayFromObj(data, &length);
+ Tcl_ExternalToUtfDString(encoding, string, length, &ds);
+ Tcl_DStringResult(interp, &ds);
+ } else {
+ /*
+ * Store the result as binary data.
+ */
+
+ string = Tcl_GetStringFromObj(data, &length);
+ Tcl_UtfToExternalDString(encoding, string, length, &ds);
+ resultPtr = Tcl_GetObjResult(interp);
+ Tcl_SetByteArrayObj(resultPtr,
+ (unsigned char *) Tcl_DStringValue(&ds),
+ Tcl_DStringLength(&ds));
+ Tcl_DStringFree(&ds);
+ }
+
+ Tcl_FreeEncoding(encoding);
+ break;
+ }
+ case ENC_NAMES: {
+ if (objc > 2) {
+ Tcl_WrongNumArgs(interp, 2, objv, NULL);
+ return TCL_ERROR;
+ }
+ Tcl_GetEncodingNames(interp);
+ break;
+ }
+ case ENC_SYSTEM: {
+ if (objc > 3) {
+ Tcl_WrongNumArgs(interp, 2, objv, "?encoding?");
+ return TCL_ERROR;
+ }
+ if (objc == 2) {
+ Tcl_SetResult(interp, Tcl_GetEncodingName(NULL), TCL_STATIC);
+ } else {
+ return Tcl_SetSystemEncoding(interp,
+ Tcl_GetStringFromObj(objv[2], NULL));
+ }
+ break;
+ }
+ }
+ return TCL_OK;
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
* Tcl_ErrorObjCmd --
*
* This procedure is invoked to process the "error" Tcl command.
diff --git a/generic/tclCmdIL.c b/generic/tclCmdIL.c
index f47fb1e..6b4cc39 100644
--- a/generic/tclCmdIL.c
+++ b/generic/tclCmdIL.c
@@ -13,12 +13,13 @@
* See the file "license.terms" for information on usage and redistribution
* of this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclCmdIL.c,v 1.1.2.2 1998/09/24 23:58:42 stanton Exp $
+ * RCS: @(#) $Id: tclCmdIL.c,v 1.1.2.3 1998/10/21 20:40:04 stanton Exp $
*/
#include "tclInt.h"
#include "tclPort.h"
#include "tclCompile.h"
+#include "tclRegexp.h"
/*
* During execution of the "lsort" command, structures of the following
diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c
index 8a3b6d5..9f46efc 100644
--- a/generic/tclCmdMZ.c
+++ b/generic/tclCmdMZ.c
@@ -12,12 +12,13 @@
* See the file "license.terms" for information on usage and redistribution
* of this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclCmdMZ.c,v 1.1.2.3 1998/10/16 01:16:57 stanton Exp $
+ * RCS: @(#) $Id: tclCmdMZ.c,v 1.1.2.4 1998/10/21 20:40:05 stanton Exp $
*/
#include "tclInt.h"
#include "tclPort.h"
#include "tclCompile.h"
+#include "tclRegexp.h"
/*
* Structure used to hold information about variable traces:
@@ -108,20 +109,26 @@ Tcl_RegexpObjCmd(dummy, interp, objc, objv)
int objc; /* Number of arguments. */
Tcl_Obj *CONST objv[]; /* Argument objects. */
{
- int i, result, indices, flags, stringLength, wLen, match;
+ int i, result, indices, stringLength, wLen, match, about;
+ int hasxflags, cflags, eflags;
Tcl_RegExp regExpr;
char *string;
Tcl_DString stringBuffer, valueBuffer;
Tcl_UniChar *wStart;
static char *options[] = {
- "-indices", "-nocase", "--", (char *) NULL
+ "-indices", "-nocase", "-about", "-expanded",
+ "-unsupported0", "--", (char *) NULL
};
enum options {
- REGEXP_INDICES, REGEXP_NOCASE, REGEXP_LAST
+ REGEXP_INDICES, REGEXP_NOCASE, REGEXP_ABOUT, REGEXP_EXPANDED,
+ REGEXP_XFLAGS, REGEXP_LAST
};
indices = 0;
- flags = 0;
+ about = 0;
+ cflags = REG_ADVANCED;
+ eflags = 0;
+ hasxflags = 0;
for (i = 1; i < objc; i++) {
char *name;
@@ -141,7 +148,19 @@ Tcl_RegexpObjCmd(dummy, interp, objc, objv)
break;
}
case REGEXP_NOCASE: {
- flags |= REG_ICASE;
+ cflags |= REG_ICASE;
+ break;
+ }
+ case REGEXP_ABOUT: {
+ about = 1;
+ break;
+ }
+ case REGEXP_EXPANDED: {
+ cflags |= REG_EXPANDED;
+ break;
+ }
+ case REGEXP_XFLAGS: {
+ hasxflags = 1;
break;
}
case REGEXP_LAST: {
@@ -152,7 +171,7 @@ Tcl_RegexpObjCmd(dummy, interp, objc, objv)
}
endOfForLoop:
- if (objc - i < 2) {
+ if (objc - i < hasxflags + 2 - about) {
Tcl_WrongNumArgs(interp, 1, objv,
"?switches? exp string ?matchVar? ?subMatchVar subMatchVar ...?");
return TCL_ERROR;
@@ -160,11 +179,25 @@ Tcl_RegexpObjCmd(dummy, interp, objc, objv)
objc -= i;
objv += i;
- regExpr = TclRegCompObj(interp, objv[0], flags | REG_ADVANCED);
+ if (hasxflags) {
+ string = Tcl_GetStringFromObj(objv[0], &stringLength);
+ TclRegXflags(string, stringLength, &cflags, &eflags);
+ objc--;
+ objv++;
+ }
+
+ regExpr = TclRegCompObj(interp, objv[0], cflags);
if (regExpr == NULL) {
return TCL_ERROR;
}
+ if (about) {
+ if (TclRegAbout(interp, regExpr) < 0) {
+ return TCL_ERROR;
+ }
+ return TCL_OK;
+ }
+
result = TCL_OK;
string = Tcl_GetStringFromObj(objv[1], &stringLength);
@@ -174,7 +207,7 @@ Tcl_RegexpObjCmd(dummy, interp, objc, objv)
wStart = TclUtfToUniCharDString(string, stringLength, &stringBuffer);
wLen = Tcl_DStringLength(&stringBuffer) / sizeof(Tcl_UniChar);
- match = TclRegExpExecUniChar(interp, regExpr, wStart, wLen, 0);
+ match = TclRegExpExecUniChar(interp, regExpr, wStart, wLen, eflags);
if (match < 0) {
result = TCL_ERROR;
goto done;
diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c
index 9b3f18d..06da42e 100644
--- a/generic/tclEncoding.c
+++ b/generic/tclEncoding.c
@@ -8,7 +8,7 @@
* See the file "license.terms" for information on usage and redistribution
* of this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclEncoding.c,v 1.1.2.2 1998/10/03 01:56:41 stanton Exp $
+ * RCS: @(#) $Id: tclEncoding.c,v 1.1.2.3 1998/10/21 20:40:05 stanton Exp $
*/
#include "tclInt.h"
@@ -136,8 +136,8 @@ typedef struct EscapeEncodingData {
#define ENCODING_ESCAPE 3
/*
- * Hash table that keeps track of all loaded TextEncodings. Keys are
- * the string names that represent the encoding, values are (TextEncoding *).
+ * Hash table that keeps track of all loaded Encodings. Keys are
+ * the string names that represent the encoding, values are (Encoding *).
*/
static Tcl_HashTable encodingTable;
@@ -277,6 +277,23 @@ TclInitEncodingSubsystem()
Tcl_CreateEncoding(&type);
}
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * TclFinalizeEncodingSubsystem --
+ *
+ * Release the state associated with the encoding subsystem.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * Frees all of the encodings.
+ *
+ *----------------------------------------------------------------------
+ */
+
void
TclFinalizeEncodingSubsystem()
{
@@ -515,6 +532,11 @@ Tcl_GetEncodingNames(interp)
Tcl_DStringFree(&pwdString);
}
+ /*
+ * Clear any values placed in the result by globbing.
+ */
+
+ Tcl_ResetResult(interp);
resultPtr = Tcl_GetObjResult(interp);
hPtr = Tcl_FirstHashEntry(&table, &search);
@@ -573,9 +595,9 @@ Tcl_SetSystemEncoding(interp, name)
return TCL_ERROR;
}
}
- Tcl_FreeEncoding(systemEncoding);
Tcl_MutexLock(&encodingMutex);
+ Tcl_FreeEncoding(systemEncoding);
systemEncoding = encoding;
Tcl_MutexUnlock(&encodingMutex);
@@ -1009,7 +1031,7 @@ LoadEncodingFile(interp, name)
pathPtr = TclGetLibraryPath();
if (pathPtr == NULL) {
- return NULL;
+ goto unknown;
}
objc = 0;
Tcl_ListObjGetElements(NULL, pathPtr, &objc, &objv);
@@ -1023,10 +1045,7 @@ LoadEncodingFile(interp, name)
}
if (chan == NULL) {
- if (interp != NULL) {
- Tcl_AppendResult(interp, "unknown encoding \"", name, "\"", NULL);
- }
- return NULL;
+ goto unknown;
}
Tcl_SetChannelOption(NULL, chan, "-encoding", "utf-8");
@@ -1070,7 +1089,30 @@ LoadEncodingFile(interp, name)
}
Tcl_Close(NULL, chan);
return encoding;
+
+ unknown:
+ if (interp != NULL) {
+ Tcl_AppendResult(interp, "unknown encoding \"", name, "\"", NULL);
+ }
+ return NULL;
}
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * OpenEncodingFile --
+ *
+ * Look for the file encoding/<name>.enc in the specified
+ * directory.
+ *
+ * Results:
+ * Returns an open file channel if the file exists.
+ *
+ * Side effects:
+ * None.
+ *
+ *----------------------------------------------------------------------
+ */
static Tcl_Channel
OpenEncodingFile(dir, name)
diff --git a/generic/tclFileName.c b/generic/tclFileName.c
index 01fefa7..55832ab 100644
--- a/generic/tclFileName.c
+++ b/generic/tclFileName.c
@@ -9,11 +9,12 @@
* See the file "license.terms" for information on usage and redistribution
* of this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclFileName.c,v 1.1.2.3 1998/10/06 00:36:56 stanton Exp $
+ * RCS: @(#) $Id: tclFileName.c,v 1.1.2.4 1998/10/21 20:40:05 stanton Exp $
*/
#include "tclInt.h"
#include "tclPort.h"
+#include "tclRegexp.h"
/*
* This variable indicates whether the cleanup procedure has been
diff --git a/generic/tclInt.h b/generic/tclInt.h
index 140a2eb..0babdfd 100644
--- a/generic/tclInt.h
+++ b/generic/tclInt.h
@@ -10,7 +10,7 @@
* See the file "license.terms" for information on usage and redistribution
* of this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclInt.h,v 1.1.2.6 1998/10/16 01:16:57 stanton Exp $
+ * RCS: @(#) $Id: tclInt.h,v 1.1.2.7 1998/10/21 20:40:06 stanton Exp $
*/
#ifndef _TCLINT
@@ -774,48 +774,6 @@ typedef struct MathFunc {
} MathFunc;
/*
- *---------------------------------------------------------------------------
- * Definitions of flags used in regexp compilation and execution that need
- * to be visible to the rest of the Tcl core. Definitions that are
- * entirely private to the regexp package live in tclRegexp.h.
- *---------------------------------------------------------------------------
- */
-
-/*
- *Compilation flags.
- */
-
-#define REG_BASIC 000000 /* BREs (convenience) */
-#define REG_EXTENDED 000001 /* EREs */
-#define REG_ADVF 000002 /* advanced features in EREs */
-#define REG_ADVANCED 000003 /* AREs (which are also EREs) */
-#define REG_QUOTE 000004 /* no special characters, none */
-#define REG_NOSPEC REG_QUOTE /* historical synonym */
-#define REG_ICASE 000010 /* ignore case */
-#define REG_NOSUB 000020 /* don't care about subexpressions */
-#define REG_EXPANDED 000040 /* expanded format, white space & comments */
-#define REG_NLSTOP 000100 /* \n doesn't match . or [^ ] */
-#define REG_NLANCH 000200 /* ^ matches after \n, $ before */
-#define REG_NEWLINE 000300 /* newlines are line terminators */
-
-/*
- * Execution flags.
- */
-
-#define REG_NOTBOL 0001 /* BOS is not BOL */
-#define REG_NOTEOL 0002 /* EOS is not EOL */
-
-EXTERN Tcl_RegExp TclRegCompObj _ANSI_ARGS_((Tcl_Interp *interp,
- Tcl_Obj *patObj, int flags));
-EXTERN int TclRegExpExecUniChar _ANSI_ARGS_((Tcl_Interp *interp,
- Tcl_RegExp re, CONST Tcl_UniChar *uniString,
- int numChars, int flags));
-EXTERN int TclRegExpMatchObj _ANSI_ARGS_((Tcl_Interp *interp,
- char *string, Tcl_Obj *patObj));
-EXTERN void TclRegExpRangeUniChar _ANSI_ARGS_((Tcl_RegExp re,
- int index, int *startPtr, int *endPtr));
-
-/*
* Threads support.
* These routines are used to implement Tcl_GetThreadData.
*/
@@ -2161,6 +2119,8 @@ EXTERN int Tcl_ConcatObjCmd _ANSI_ARGS_((ClientData clientData,
Tcl_Interp *interp, int objc, Tcl_Obj *CONST objv[]));
EXTERN int Tcl_ContinueObjCmd _ANSI_ARGS_((ClientData clientData,
Tcl_Interp *interp, int objc, Tcl_Obj *CONST objv[]));
+EXTERN int Tcl_EncodingObjCmd _ANSI_ARGS_((ClientData clientData,
+ Tcl_Interp *interp, int objc, Tcl_Obj *CONST objv[]));
EXTERN int Tcl_EofObjCmd _ANSI_ARGS_((ClientData clientData,
Tcl_Interp *interp, int objc, Tcl_Obj *CONST objv[]));
EXTERN int Tcl_ErrorObjCmd _ANSI_ARGS_((ClientData clientData,
diff --git a/generic/tclRegexp.c b/generic/tclRegexp.c
index 44b575c..d65b19a 100644
--- a/generic/tclRegexp.c
+++ b/generic/tclRegexp.c
@@ -4,12 +4,13 @@
* This file contains the public interfaces to the Tcl regular
* expression mechanism.
*
+ * Copyright (c) 1998 by Scriptics Corporation.
* Copyright (c) 1998 by Sun Microsystems, Inc.
*
* See the file "license.terms" for information on usage and redistribution
* of this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclRegexp.c,v 1.1.2.2 1998/10/03 01:56:41 stanton Exp $
+ * RCS: @(#) $Id: tclRegexp.c,v 1.1.2.3 1998/10/21 20:40:06 stanton Exp $
*/
#include "tclInt.h"
@@ -337,6 +338,7 @@ TclRegExpExecUniChar(interp, re, wString, numChars, flags)
TclRegexp *regexpPtr = (TclRegexp *) re;
status = re_uexec(&regexpPtr->re, wString, (size_t) numChars,
+ (rm_detail_t *)NULL,
regexpPtr->re.re_nsub + 1, regexpPtr->matches, flags);
/*
@@ -528,6 +530,83 @@ TclRegCompObj(interp, objPtr, flags)
/*
*----------------------------------------------------------------------
*
+ * TclRegAbout --
+ *
+ * Return information about a compiled regular expression.
+ *
+ * Results:
+ * The return value is -1 for failure, 0 for success, although at
+ * the moment there's nothing that could fail. On success, a list
+ * is left in the interp's result: first element is the subexpression
+ * count, second is a list of re_info bit names.
+ *
+ * Side effects:
+ * None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+int
+TclRegAbout(interp, re)
+ Tcl_Interp *interp; /* For use in variable assignment. */
+ Tcl_RegExp re; /* The compiled regular expression. */
+{
+ TclRegexp *regexpPtr = (TclRegexp *)re;
+ char buf[TCL_INTEGER_SPACE];
+ static struct infoname {
+ int bit;
+ char *text;
+ } infonames[] = {
+ REG_UBACKREF, "REG_UBACKREF",
+ REG_ULOOKAHEAD, "REG_ULOOKAHEAD",
+ REG_UBOUNDS, "REG_UBOUNDS",
+ REG_UBRACES, "REG_UBRACES",
+ REG_UBSALNUM, "REG_UBSALNUM",
+ REG_UPBOTCH, "REG_UPBOTCH",
+ REG_UBBS, "REG_UBBS",
+ REG_UNONPOSIX, "REG_UNONPOSIX",
+ REG_UUNSPEC, "REG_UUNSPEC",
+ REG_UUNPORT, "REG_UUNPORT",
+ REG_ULOCALE, "REG_ULOCALE",
+ REG_UEMPTYMATCH, "REG_UEMPTYMATCH",
+ 0, "",
+ };
+ struct infoname *inf;
+ int n;
+
+ Tcl_ResetResult(interp);
+
+ sprintf(buf, "%u", (unsigned)(regexpPtr->re.re_nsub));
+ Tcl_AppendElement(interp, buf);
+
+ /*
+ * Must count bits before generating list, because we must know
+ * whether {} are needed before we start appending names.
+ */
+ n = 0;
+ for (inf = infonames; inf->bit != 0; inf++) {
+ if (regexpPtr->re.re_info&inf->bit) {
+ n++;
+ }
+ }
+ if (n != 1) {
+ Tcl_AppendResult(interp, " {", NULL);
+ }
+ for (inf = infonames; inf->bit != 0; inf++) {
+ if (regexpPtr->re.re_info&inf->bit) {
+ Tcl_AppendElement(interp, inf->text);
+ }
+ }
+ if (n != 1) {
+ Tcl_AppendResult(interp, "}", NULL);
+ }
+
+ return 0;
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
* TclRegError --
*
* Generate an error message based on the regexp status code.
@@ -536,7 +615,7 @@ TclRegCompObj(interp, objPtr, flags)
* Places an error in the interpreter.
*
* Side effects:
- * None.
+ * Sets errorCode as well.
*
*----------------------------------------------------------------------
*/
@@ -547,66 +626,19 @@ TclRegError(interp, msg, status)
char *msg; /* Message to prepend to error. */
int status; /* Status code to report. */
{
- char *errMsg;
-
- switch(status) {
- case REG_BADPAT:
- errMsg = "invalid regular expression";
- break;
- case REG_ECOLLATE:
- errMsg = "invalid collating element";
- break;
- case REG_ECTYPE:
- errMsg = "invalid character class";
- break;
- case REG_EESCAPE:
- errMsg = "invalid escape sequence";
- break;
- case REG_ESUBREG:
- errMsg = "invalid backreference number";
- break;
- case REG_EBRACK:
- errMsg = "unmatched []";
- break;
- case REG_EPAREN:
- errMsg = "unmatched ()";
- break;
- case REG_EBRACE:
- errMsg = "unmatched {}";
- break;
- case REG_BADBR:
- errMsg = "invalid repetition count(s)";
- break;
- case REG_ERANGE:
- errMsg = "invalid character range";
- break;
- case REG_ESPACE:
- errMsg = "out of memory";
- break;
- case REG_BADRPT:
- errMsg = "?+* follows nothing";
- break;
- case REG_ASSERT:
- errMsg = "\"can't happen\" -- you found a bug";
- break;
- case REG_INVARG:
- errMsg = "invalid argument to regex routine";
- break;
- case REG_MIXED:
- errMsg = "char RE applied to wchar_t string (etc.)";
- break;
- case REG_BADOPT:
- errMsg = "invalid embedded option";
- break;
- case REG_IMPOSS:
- errMsg = "can never match";
- break;
- default:
- errMsg = "\"can't happen\" -- you found an undefined error code";
- break;
- }
+ char buf[100]; /* ample in practice */
+ char cbuf[100]; /* lots in practice */
+ size_t n;
+ char *p;
+
Tcl_ResetResult(interp);
- Tcl_AppendResult(interp, msg, errMsg, NULL);
+ n = regerror(status, (regex_t *)NULL, buf, sizeof(buf));
+ p = (n > sizeof(buf)) ? "..." : "";
+ Tcl_AppendResult(interp, msg, buf, p, NULL);
+
+ sprintf(cbuf, "%d", status);
+ (VOID) regerror(REG_ITOA, (regex_t *)NULL, cbuf, sizeof(cbuf));
+ Tcl_SetErrorCode(interp, "REGEXP", cbuf, buf, NULL);
}
@@ -749,36 +781,15 @@ CompileRegexp(interp, string, length, flags)
if (status != REG_OKAY) {
/*
- * Warning, the following is a hack to allow empty regexp.
- * The goal is to compile a non-empty regexp that will always
- * find one empty match. If you use "(?:)" (an empty pair of
- * non-capturing parentheses) instead, that will avoid both the
- * overhead and the subexpression report.
- */
-
- if (status == REG_EMPTY) {
- static Tcl_UniChar uniEmpty[] = {'(', '?', ':', ')', '\0'};
-
- uniString = uniEmpty;
- numChars = 4;
- status = re_ucomp(&regexpPtr->re, uniString, (size_t) numChars,
- REG_ADVANCED);
- }
-
- /*
* Clean up and report errors in the interpreter, if possible.
*/
-
- if (status != REG_OKAY) {
- regfree(&regexpPtr->re);
- ckfree((char *)regexpPtr);
- if (interp) {
- TclRegError(interp,
- "couldn't compile regular expression pattern: ",
- status);
- }
- return NULL;
+ ckfree((char *)regexpPtr);
+ if (interp) {
+ TclRegError(interp,
+ "couldn't compile regular expression pattern: ",
+ status);
}
+ return NULL;
}
/*
@@ -791,4 +802,100 @@ CompileRegexp(interp, string, length, flags)
return regexpPtr;
}
+
+/*
+ *---------------------------------------------------------------------------
+ *
+ * TclRegXflags --
+ *
+ * Parse a string of extended regexp flag letters, for testing.
+ *
+ * Results:
+ * No return value (you're on your own for errors here).
+ *
+ * Side effects:
+ * Modifies *cflagsPtr, a regcomp flags word, and *eflagsPtr, a
+ * regexec flags word, as appropriate.
+ *
+ *----------------------------------------------------------------------
+ */
+VOID
+TclRegXflags(string, length, cflagsPtr, eflagsPtr)
+ char *string; /* The string of flags. */
+ int length; /* The length of the string in bytes. */
+ int *cflagsPtr; /* compile flags word */
+ int *eflagsPtr; /* exec flags word */
+{
+ int i;
+ int cflags;
+ int eflags;
+
+ cflags = *cflagsPtr;
+ eflags = *eflagsPtr;
+ for (i = 0; i < length; i++) {
+ switch (string[i]) {
+ case 'a': {
+ cflags |= REG_ADVF;
+ break;
+ }
+ case 'b': {
+ cflags &= ~REG_ADVANCED;
+ break;
+ }
+ case 'e': {
+ cflags &= ~REG_ADVANCED;
+ cflags |= REG_EXTENDED;
+ break;
+ }
+ case 'q': {
+ cflags &= ~REG_ADVANCED;
+ cflags |= REG_QUOTE;
+ break;
+ }
+ case 'i': {
+ cflags |= REG_ICASE;
+ break;
+ }
+ case 'o': { /* o for opaque */
+ cflags |= REG_NOSUB;
+ break;
+ }
+ case 'x': {
+ cflags |= REG_EXPANDED;
+ break;
+ }
+ case 'p': {
+ cflags |= REG_NLSTOP;
+ break;
+ }
+ case 'w': {
+ cflags |= REG_NLANCH;
+ break;
+ }
+ case 'n': {
+ cflags |= REG_NEWLINE;
+ break;
+ }
+ case '+': {
+ cflags |= REG_FAKEEC;
+ break;
+ }
+ case '^': {
+ eflags |= REG_NOTBOL;
+ break;
+ }
+ case '$': {
+ eflags |= REG_NOTEOL;
+ break;
+ }
+ case '%': {
+ eflags |= REG_SMALL;
+ break;
+ }
+ }
+ }
+
+ *cflagsPtr = cflags;
+ *eflagsPtr = eflags;
+}
diff --git a/generic/tclRegexp.h b/generic/tclRegexp.h
index be5cb77..9e56730 100644
--- a/generic/tclRegexp.h
+++ b/generic/tclRegexp.h
@@ -7,8 +7,9 @@
* Copyright (c) 1998 Henry Spencer. All rights reserved.
*
* Development of this software was funded, in part, by Cray Research Inc.,
- * UUNET Communications Services Inc., and Sun Microsystems Inc., none of
- * whom are responsible for the results. The author thanks all of them.
+ * UUNET Communications Services Inc., Sun Microsystems Inc., and
+ * Scriptics Corporation, none of whom are responsible for the results.
+ * The author thanks all of them.
*
* Redistribution and use in source and binary forms -- with or without
* modification -- are permitted for any purpose, provided that
@@ -26,20 +27,19 @@
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
+ * Copyright (c) 1998 by Scriptics Corporation.
* Copyright (c) 1998 by Sun Microsystems, Inc.
*
* See the file "license.terms" for information on usage and redistribution
* of this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclRegexp.h,v 1.1.2.2 1998/09/24 23:59:02 stanton Exp $
+ * RCS: @(#) $Id: tclRegexp.h,v 1.1.2.3 1998/10/21 20:40:06 stanton Exp $
*/
#ifndef _TCLREGEXP
#define _TCLREGEXP
-#ifndef _TCLINT
-#include "tclInt.h"
-#endif
+#include "regcustom.h"
#ifdef BUILD_tcl
# undef TCL_STORAGE_CLASS
@@ -47,190 +47,6 @@
#endif
/*
- * The following definitions were culled from wctype.h and wchar.h.
- * Those two header files are now gone. Eventually we should replace all
- * instances of, e.g., iswalnum() with TclUniCharIsAlnum() in the regexp
- * code.
- */
-
-#undef wint_t
-#define wint_t int
-
-#undef WEOF
-#undef WCHAR_MIN
-#undef WCHAR_MAX
-
-#define WEOF -1
-#define WCHAR_MIN 0x0000
-#define WCHAR_MAX 0xffff
-
-#undef iswalnum
-#undef iswalpha
-#undef iswdigit
-#undef iswspace
-
-#define iswalnum(x) TclUniCharIsAlnum(x)
-#define iswalpha(x) TclUniCharIsAlpha(x)
-#define iswdigit(x) TclUniCharIsDigit(x)
-#define iswspace(x) TclUniCharIsSpace(x)
-
-#undef wcslen
-#undef wcsncmp
-
-#define wcslen TclUniCharLen
-#define wcsncmp TclUniCharNcmp
-
-/*
- * The following definitions were added by JO to make Tcl compile
- * under SunOS, where off_t and wchar_t aren't defined; perhaps all of
- * the code below can be collapsed into a few simple definitions?
- */
-
-#ifndef __RE_REGOFF_T
-# define __RE_REGOFF_T int
-#endif
-#ifndef __RE_WCHAR_T
-# define __RE_WCHAR_T Tcl_UniChar
-#endif
-
-/*
- * regoff_t has to be large enough to hold either off_t or ssize_t,
- * and must be signed; it's only a guess that off_t is big enough, so we
- * offer an override.
- */
-#ifdef __RE_REGOFF_T
-typedef __RE_REGOFF_T regoff_t; /* offset type for result reporting */
-#else
-typedef off_t regoff_t;
-#endif
-
-/*
- * We offer the option of using a non-wchar_t type in the w prototypes so
- * that <regex.h> can be included without first including (e.g.) <wchar.h>.
- * Note that __RE_WCHAR_T must in fact be the same type as wchar_t!
- */
-#ifdef __RE_WCHAR_T
-typedef __RE_WCHAR_T re_wchar; /* internal name for the type */
-#else
-typedef wchar_t re_wchar;
-#endif
-
-#define REMAGIC 0xfed7
-
-/*
- * other interface types
- */
-
-/* the biggie, a compiled RE (or rather, a front end to same) */
-typedef struct {
- int re_magic; /* magic number */
- size_t re_nsub; /* number of subexpressions */
- int re_info; /* information about RE */
-# define REG_UBACKREF 000001
-# define REG_ULOOKAHEAD 000002
-# define REG_UBOUNDS 000004
-# define REG_UBRACES 000010
-# define REG_UBSALNUM 000020
-# define REG_UPBOTCH 000040
-# define REG_UBBS 000100
-# define REG_UNONPOSIX 000200
-# define REG_UUNSPEC 000400
-# define REG_UUNPORT 001000
-# define REG_ULOCALE 002000
-# define REG_UEMPTYMATCH 004000
- int re_csize; /* sizeof(character) */
- VOID *re_guts; /* none of your business :-) */
- VOID *re_fns; /* none of your business :-) */
-} regex_t;
-
-/* result reporting (may acquire more fields later) */
-typedef struct {
- regoff_t rm_so; /* start of substring */
- regoff_t rm_eo; /* end of substring */
-} regmatch_t;
-
-
-
-/*
- * compilation
- ^ int regcomp(regex_t *, const char *, int);
- ^ int re_comp(regex_t *, const char *, size_t, int);
- ^ #ifndef __RE_NOWIDE
- ^ int re_wcomp(regex_t *, const re_wchar *, size_t, int);
- ^ #endif
- */
-
-#define REG_DUMP 004000 /* none of your business :-) */
-#define REG_FAKE 010000 /* none of your business :-) */
-#define REG_PROGRESS 020000 /* none of your business :-) */
-
-
-
-/*
- * execution
- ^ int regexec(regex_t *, const char *, size_t, regmatch_t [], int);
- ^ int re_exec(regex_t *, const char *, size_t, size_t, regmatch_t [], int);
- ^ #ifndef __RE_NOWIDE
- ^ int re_wexec(regex_t *, const re_wchar *, size_t, size_t, regmatch_t [], int);
- ^ #endif
- */
-#define REG_FTRACE 0010 /* none of your business */
-#define REG_MTRACE 0020 /* none of your business */
-#define REG_SMALL 0040 /* none of your business */
-
-/*
- * error reporting
- * Be careful if modifying the list of error codes -- the table used by
- * regerror() is generated automatically from this file!
- *
- * Note that there is no wchar_t variant of regerror at this time; what
- * kind of character is used for error reports is independent of what kind
- * is used in matching.
- *
- ^ extern size_t regerror(int, const regex_t *, char *, size_t);
- */
-#define REG_OKAY 0 /* no errors detected */
-#define REG_NOMATCH 1 /* regexec() failed to match */
-#define REG_BADPAT 2 /* invalid regular expression */
-#define REG_ECOLLATE 3 /* invalid collating element */
-#define REG_ECTYPE 4 /* invalid character class */
-#define REG_EESCAPE 5 /* invalid escape \ sequence */
-#define REG_ESUBREG 6 /* invalid backreference number */
-#define REG_EBRACK 7 /* brackets [] not balanced */
-#define REG_EPAREN 8 /* parentheses () not balanced */
-#define REG_EBRACE 9 /* braces {} not balanced */
-#define REG_BADBR 10 /* invalid repetition count(s) */
-#define REG_ERANGE 11 /* invalid character range */
-#define REG_ESPACE 12 /* out of memory */
-#define REG_BADRPT 13 /* quantifier operand invalid */
-#define REG_EMPTY 14 /* empty regular expression */
-#define REG_ASSERT 15 /* "can't happen" -- you found a bug */
-#define REG_INVARG 16 /* invalid argument to regex routine */
-#define REG_MIXED 17 /* char RE applied to wchar_t string (etc.) */
-#define REG_BADOPT 18 /* invalid embedded option */
-#define REG_IMPOSS 19 /* can never match */
-/* two specials for debugging and testing */
-#define REG_ATOI 101 /* convert error-code name to number */
-#define REG_ITOA 102 /* convert error-code number to name */
-
-
-
-/*
- * the prototypes, as possibly munched by fwd
- */
-/* =====^!^===== begin forwards =====^!^===== */
-/* automatically gathered by fwd; do not hand-edit */
-/* === regex.h === */
-EXTERN int re_ucomp _ANSI_ARGS_((regex_t *, const Tcl_UniChar *,
- size_t, int));
-EXTERN int re_uexec _ANSI_ARGS_((regex_t *, const Tcl_UniChar *,
- size_t, size_t, regmatch_t [], int));
-EXTERN VOID regfree _ANSI_ARGS_((regex_t *));
-EXTERN size_t regerror _ANSI_ARGS_((int, const regex_t *, char *, size_t));
-/* automatically gathered by fwd; do not hand-edit */
-/* =====^!^===== end forwards =====^!^===== */
-
-/*
* The TclRegexp structure encapsulates a compiled regex_t,
* the flags that were used to compile it, and an array of pointers
* that are used to indicate subexpressions after a call to Tcl_RegExpExec.
@@ -251,6 +67,24 @@ typedef struct TclRegexp {
} TclRegexp;
/*
+ * Functions exported for use within the rest of Tcl.
+ */
+
+EXTERN Tcl_RegExp TclRegCompObj _ANSI_ARGS_((Tcl_Interp *interp,
+ Tcl_Obj *patObj, int flags));
+EXTERN int TclRegAbout _ANSI_ARGS_((Tcl_Interp *interp,
+ Tcl_RegExp re));
+EXTERN VOID TclRegXflags _ANSI_ARGS_((char *string, int length,
+ int *cflagsPtr, int *eflagsPtr));
+EXTERN int TclRegExpExecUniChar _ANSI_ARGS_((Tcl_Interp *interp,
+ Tcl_RegExp re, CONST Tcl_UniChar *uniString,
+ int numChars, int flags));
+EXTERN int TclRegExpMatchObj _ANSI_ARGS_((Tcl_Interp *interp,
+ char *string, Tcl_Obj *patObj));
+EXTERN void TclRegExpRangeUniChar _ANSI_ARGS_((Tcl_RegExp re,
+ int index, int *startPtr, int *endPtr));
+
+/*
* Functions exported from the regexp package for the test package to use.
*/
@@ -258,8 +92,3 @@ EXTERN void TclRegError _ANSI_ARGS_((Tcl_Interp *interp, char *msg,
int status));
#endif /* _TCLREGEXP */
-
-
-
-
-
diff --git a/generic/tclTest.c b/generic/tclTest.c
index 8da6785..2136b7c 100644
--- a/generic/tclTest.c
+++ b/generic/tclTest.c
@@ -12,14 +12,13 @@
* See the file "license.terms" for information on usage and redistribution
* of this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclTest.c,v 1.1.2.2 1998/09/24 23:59:02 stanton Exp $
+ * RCS: @(#) $Id: tclTest.c,v 1.1.2.3 1998/10/21 20:40:07 stanton Exp $
*/
#define TCL_TEST
#include "tclInt.h"
#include "tclPort.h"
-#include "tclRegexp.h" /* To test internals of regexp package. */
#include <locale.h>
/*
@@ -245,9 +244,6 @@ static int TestparsevarObjCmd _ANSI_ARGS_((ClientData dummy,
static int TestparsevarnameObjCmd _ANSI_ARGS_((ClientData dummy,
Tcl_Interp *interp, int objc,
Tcl_Obj *CONST objv[]));
-static int TestregexpObjCmd _ANSI_ARGS_((ClientData dummy,
- Tcl_Interp *interp, int objc,
- Tcl_Obj *CONST objv[]));
static int TestsaveresultCmd _ANSI_ARGS_((ClientData dummy,
Tcl_Interp *interp, int objc,
Tcl_Obj *CONST objv[]));
@@ -398,8 +394,6 @@ Tcltest_Init(interp)
(ClientData) 0, (Tcl_CmdDeleteProc *) NULL);
Tcl_CreateObjCommand(interp, "testparsevarname", TestparsevarnameObjCmd,
(ClientData) 0, (Tcl_CmdDeleteProc *) NULL);
- Tcl_CreateObjCommand(interp, "testregexp", TestregexpObjCmd,
- (ClientData) 0, (Tcl_CmdDeleteProc *) NULL);
Tcl_CreateObjCommand(interp, "testsaveresult", TestsaveresultCmd,
(ClientData) 0, (Tcl_CmdDeleteProc *) NULL);
Tcl_CreateCommand(interp, "testsetassocdata", TestsetassocdataCmd,
@@ -1317,19 +1311,15 @@ TestencodingObjCmd(dummy, interp, objc, objv)
Tcl_Obj *CONST objv[]; /* Argument objects. */
{
Tcl_Encoding encoding;
- Tcl_DString ds;
int index, length;
char *string;
- Tcl_Obj *resultPtr;
TclEncoding *encodingPtr;
static char *optionStrings[] = {
- "create", "delete", "toutf", "fromutf",
- "names", "system", "path",
+ "create", "delete", "path",
NULL
};
enum options {
- ENC_CREATE, ENC_DELETE, ENC_TOUTF, ENC_FROMUTF,
- ENC_NAMES, ENC_SYSTEM, ENC_PATH
+ ENC_CREATE, ENC_DELETE, ENC_PATH
};
if (Tcl_GetIndexFromObj(interp, objv[1], optionStrings, "option", 0,
@@ -1376,79 +1366,6 @@ TestencodingObjCmd(dummy, interp, objc, objv)
Tcl_FreeEncoding(encoding);
break;
}
- case ENC_TOUTF: {
- if (objc < 3) {
- return TCL_ERROR;
- }
- if (objc == 3) {
- string = "iso8859-1";
- } else {
- string = Tcl_GetString(objv[3]);
- }
- encoding = Tcl_GetEncoding(NULL, string);
-
- string = (char *) Tcl_GetByteArrayFromObj(objv[2], &length);
- Tcl_ExternalToUtfDString(encoding, string, length, &ds);
-
- /*
- * If the encoding performs a Tcl_Eval() (which is the case for
- * encodings created by the "encoding create" command, the
- * resultPtr from the interp will be invalidated and we need to
- * get it again.
- */
-
- resultPtr = Tcl_GetObjResult(interp);
- Tcl_SetStringObj(resultPtr, Tcl_DStringValue(&ds),
- Tcl_DStringLength(&ds));
- Tcl_DStringFree(&ds);
- Tcl_FreeEncoding(encoding);
- break;
- }
- case ENC_FROMUTF: {
- if (objc < 3) {
- return TCL_ERROR;
- }
- if (objc == 3) {
- string = "iso8859-1";
- } else {
- string = Tcl_GetString(objv[3]);
- }
- encoding = Tcl_GetEncoding(NULL, string);
-
- string = Tcl_GetStringFromObj(objv[2], &length);
- Tcl_UtfToExternalDString(encoding, string, length, &ds);
-
- /*
- * If the encoding performs a Tcl_Eval() (which is the case for
- * encodings created by the "encoding create" command, the
- * resultPtr from the interp will be invalidated and we need to
- * get it again.
- */
-
- resultPtr = Tcl_GetObjResult(interp);
- Tcl_SetByteArrayObj(resultPtr,
- (unsigned char *) Tcl_DStringValue(&ds),
- Tcl_DStringLength(&ds));
- Tcl_DStringFree(&ds);
- Tcl_FreeEncoding(encoding);
- break;
- }
-
- case ENC_NAMES: {
- Tcl_GetEncodingNames(interp);
- break;
- }
- case ENC_SYSTEM: {
- if (objc == 2) {
- Tcl_SetResult(interp, Tcl_GetEncodingName(NULL), TCL_STATIC);
- } else {
- char *str;
-
- str = Tcl_GetStringFromObj(objv[2], NULL);
- return Tcl_SetSystemEncoding(interp, str);
- }
- break;
- }
case ENC_PATH: {
if (objc == 2) {
Tcl_SetObjResult(interp, TclGetLibraryPath());
@@ -2584,251 +2501,6 @@ TestparsevarnameObjCmd(clientData, interp, objc, objv)
/*
*----------------------------------------------------------------------
*
- * TestregexpObjCmd --
- *
- * This procedure implements the "testregexp" command. It is
- * used to give a direct interface for regexp flags.
- *
- * Results:
- * A standard Tcl result.
- *
- * Side effects:
- * None.
- *
- *----------------------------------------------------------------------
- */
-
-static int
-TestregexpObjCmd(dummy, interp, objc, objv)
- ClientData dummy; /* Not used. */
- Tcl_Interp *interp; /* Current interpreter. */
- int objc; /* Number of arguments. */
- Tcl_Obj *CONST objv[]; /* Argument objects. */
-{
- TclRegexp *regExpr;
- char *string, *flagString, *start, *end;
- int flags, match, i, j;
-
- if (objc < 4) {
- Tcl_WrongNumArgs(interp, 1, objv,
- "flags exp string ?subMatchVar subMatchVar ...?");
- return TCL_ERROR;
- }
- flagString = Tcl_GetString(objv[1]);
- string = Tcl_GetString(objv[3]);
-
- flags = RegGetCompFlags(flagString);
- regExpr = (TclRegexp *) TclRegCompObj(interp, objv[2], flags);
- if (regExpr == NULL) {
- return TCL_ERROR;
- }
-
- flags = RegGetExecFlags(flagString);
- if (flags == -1) {
- /*
- * Do not try to match the string.
- */
-
- match = 0;
- } else {
- Tcl_DString stringBuffer;
- Tcl_UniChar *uniString;
- int numChars;
-
- /*
- * Remember the UTF-8 string so Tcl_RegExpRange() can convert the
- * matches from character to byte offsets.
- */
-
- regExpr->string = string;
-
- Tcl_DStringInit(&stringBuffer);
- uniString = TclUtfToUniCharDString(string, -1, &stringBuffer);
- numChars = Tcl_DStringLength(&stringBuffer) / sizeof(Tcl_UniChar);
-
- match = TclRegExpExecUniChar(interp, (Tcl_RegExp) regExpr, uniString,
- numChars, flags);
- Tcl_DStringFree(&stringBuffer);
-
- if (match < 0) {
- return TCL_ERROR;
- }
- if (flags & REG_NOSUB) {
- for (i = 0; i <= (int) regExpr->re.re_nsub; i++) {
- regExpr->matches[i].rm_so = -1;
- regExpr->matches[i].rm_eo = -1;
- }
- }
- }
- if (!match) {
- /*
- * Set the interpreter's object result to an integer object w/ value 0.
- */
-
- Tcl_SetIntObj(Tcl_GetObjResult(interp), 0);
- return TCL_OK;
- }
-
- /*
- * If additional variable names have been specified, return
- * index information in those variables.
- */
-
- for (i = 0, j = 4; j < objc; i++, j++) {
- char *result;
- char *currentString = Tcl_GetString(objv[j]);
-
- Tcl_RegExpRange((Tcl_RegExp) regExpr, i, &start, &end);
- if (start == NULL) {
- result = Tcl_SetVar(interp, currentString, "", 0);
- } else {
- char savedChar, *first, *last;
- char *tempString = Tcl_GetString(objv[3]);
- first = tempString + (start - string);
- last = tempString + (end - string);
- if (first == last) { /* don't modify argument */
- result = Tcl_SetVar(interp, currentString, "", 0);
- } else {
- savedChar = *last;
- *last = 0;
- result = Tcl_SetVar(interp, currentString, first, 0);
- *last = savedChar;
- }
- }
- if (result == NULL) {
- Tcl_AppendResult(interp, "couldn't set variable \"",
- currentString, "\"", (char *) NULL);
- return TCL_ERROR;
- }
- }
-
- /*
- * Set the interpreter's object result to an integer object w/ value 1.
- */
-
- Tcl_SetIntObj(Tcl_GetObjResult(interp), 1);
- return TCL_OK;
-}
-
-/*
- *----------------------------------------------------------------------
- *
- * RegGetCompFlags --
- *
- * Internal interface to regular expression compile flags.
- * Converts a string of chars to a single flag.
- *
- * Results:
- * Returns a flags for regular expression compilation.
- *
- * Side effects:
- * None.
- *
- *----------------------------------------------------------------------
- */
-static int
-RegGetCompFlags(s)
- char *s;
-{
- char c;
- register char *p;
- int result = REG_ADVANCED;
-
- for (p = s; (c = *p) != '\0'; p++)
- switch (c) {
- case 'a':
- result |= REG_ADVF;
- break;
- case 'b':
- result &= ~REG_ADVANCED;
- break;
- case 'e':
- result &= ~REG_ADVF;
- result |= REG_EXTENDED;
- break;
- case 'i':
- result |= REG_ICASE;
- break;
- case 'm':
- case 'n':
- result |= REG_NEWLINE;
- break;
- case 'p':
- result |= REG_NLSTOP;
- break;
- case 'q':
- result &= ~REG_ADVANCED;
- result |= REG_QUOTE;
- break;
- case 's':
- result |= REG_NOSUB;
- break;
- case 'w':
- result |= REG_NLANCH;
- break;
- case 'x':
- result |= REG_EXPANDED;
- break;
- case '+':
- result |= REG_FAKE;
- break;
- case ',':
- result |= REG_PROGRESS;
- break;
- }
- return result;
-}
-
-/*
- *----------------------------------------------------------------------
- *
- * RegGetExecFlags --
- *
- * Internal interface to regular expression exec flags.
- * Converts a string of chars to a single flag.
- *
- * Results:
- * Returns a flags for regular expression matching.
- *
- * Side effects:
- * None.
- *
- *----------------------------------------------------------------------
- */
-static int
-RegGetExecFlags(s)
- char *s;
-{
- char c;
- register char *p;
- int result = 0;
-
- for (p = s; (c = *p) != '\0'; p++)
- switch (c) {
- case '^':
- result |= REG_NOTBOL;
- break;
- case '$':
- result |= REG_NOTEOL;
- break;
- case ';':
- result |= REG_FTRACE;
- break;
- case ':':
- result |= REG_MTRACE;
- break;
- case '.':
- result |= REG_SMALL;
- break;
- case '/':
- return -1;
- }
- return result;
-}
-
-/*
- *----------------------------------------------------------------------
- *
* TestsetassocdataCmd --
*
* This procedure implements the "testsetassocdata" command. It is used