diff options
author | stanton <stanton> | 1998-10-21 20:39:57 (GMT) |
---|---|---|
committer | stanton <stanton> | 1998-10-21 20:39:57 (GMT) |
commit | 7e7056e21d0a0d9fa39bdfd742e82b101a6c4b7c (patch) | |
tree | 99e08a09e1567ade05e7bc7edac3758b3695d424 /generic | |
parent | 966ff877247e93fbe6e641cfa77df19d03cfe932 (diff) | |
download | tcl-7e7056e21d0a0d9fa39bdfd742e82b101a6c4b7c.zip tcl-7e7056e21d0a0d9fa39bdfd742e82b101a6c4b7c.tar.gz tcl-7e7056e21d0a0d9fa39bdfd742e82b101a6c4b7c.tar.bz2 |
Integrated latest regexp changes from Henry Spencer.
Moved regexp related declarations out of tclInt.h and into tclRegexp.h.
Added "encoding" command.
Diffstat (limited to 'generic')
-rw-r--r-- | generic/chr.h | 48 | ||||
-rw-r--r-- | generic/locale.c | 675 | ||||
-rw-r--r-- | generic/regc_color.c (renamed from generic/color.c) | 298 | ||||
-rw-r--r-- | generic/regc_cvec.c | 143 | ||||
-rw-r--r-- | generic/regc_lex.c (renamed from generic/lex.c) | 336 | ||||
-rw-r--r-- | generic/regc_locale.c | 426 | ||||
-rw-r--r-- | generic/regc_nfa.c (renamed from generic/nfa.c) | 410 | ||||
-rw-r--r-- | generic/regcomp.c (renamed from generic/compile.c) | 610 | ||||
-rw-r--r-- | generic/regcustom.h | 90 | ||||
-rw-r--r-- | generic/regerror.c | 82 | ||||
-rw-r--r-- | generic/regerrs.h | 19 | ||||
-rw-r--r-- | generic/regex.h | 299 | ||||
-rw-r--r-- | generic/regexec.c (renamed from generic/exec.c) | 459 | ||||
-rw-r--r-- | generic/regfree.c | 25 | ||||
-rw-r--r-- | generic/regfronts.c | 56 | ||||
-rw-r--r-- | generic/regguts.h (renamed from generic/guts.h) | 260 | ||||
-rw-r--r-- | generic/tclBasic.c | 4 | ||||
-rw-r--r-- | generic/tclCmdAH.c | 121 | ||||
-rw-r--r-- | generic/tclCmdIL.c | 3 | ||||
-rw-r--r-- | generic/tclCmdMZ.c | 51 | ||||
-rw-r--r-- | generic/tclEncoding.c | 60 | ||||
-rw-r--r-- | generic/tclFileName.c | 3 | ||||
-rw-r--r-- | generic/tclInt.h | 46 | ||||
-rw-r--r-- | generic/tclRegexp.c | 283 | ||||
-rw-r--r-- | generic/tclRegexp.h | 219 | ||||
-rw-r--r-- | generic/tclTest.c | 334 |
26 files changed, 2972 insertions, 2388 deletions
diff --git a/generic/chr.h b/generic/chr.h deleted file mode 100644 index 6a21159..0000000 --- a/generic/chr.h +++ /dev/null @@ -1,48 +0,0 @@ -/* - * chr.h -- - * - * Regexp package file: Unichar version of stuff related to the - * nature of a character. - * - * Copyright (c) 1998 Henry Spencer. All rights reserved. - * - * Development of this software was funded, in part, by Cray Research Inc., - * UUNET Communications Services Inc., and Sun Microsystems Inc., none of - * whom are responsible for the results. The author thanks all of them. - * - * Redistribution and use in source and binary forms -- with or without - * modification -- are permitted for any purpose, provided that - * redistributions in source form retain this entire copyright notice and - * indicate the origin and nature of any modifications. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, - * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY - * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL - * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; - * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR - * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF - * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Copyright (c) 1998 by Sun Microsystems, Inc. - * - * See the file "license.terms" for information on usage and redistribution - * of this file, and for a DISCLAIMER OF ALL WARRANTIES. - * - * RCS: @(#) $Id: chr.h,v 1.1.2.2 1998/10/03 01:56:40 stanton Exp $ - */ - -typedef Tcl_UniChar chr; /* internal character type */ -typedef int pchr; /* what it promotes to */ -typedef unsigned uchr; /* unsigned type big enough to hold a chr */ -#define CHRBITS (sizeof(Tcl_UniChar) * CHAR_BIT) /* bits in a chr */ -#define CHR(c) (UCHAR(c)) /* turn a char literal into a chr literal */ -#define DIGITVAL(c) ((c)-'0') /* turn a chr digit into its value */ - -/* - * char names for the externally-visible functions - */ -#define compile re_ucomp -#define exec re_uexec diff --git a/generic/locale.c b/generic/locale.c deleted file mode 100644 index ca56fc4..0000000 --- a/generic/locale.c +++ /dev/null @@ -1,675 +0,0 @@ -/* - * locale.c -- - * - * Regexp package file: - * collating-element handling and other locale-specific stuff - * - * Copyright (c) 1998 Henry Spencer. All rights reserved. - * - * Development of this software was funded, in part, by Cray Research Inc., - * UUNET Communications Services Inc., and Sun Microsystems Inc., none of - * whom are responsible for the results. The author thanks all of them. - * - * Redistribution and use in source and binary forms -- with or without - * modification -- are permitted for any purpose, provided that - * redistributions in source form retain this entire copyright notice and - * indicate the origin and nature of any modifications. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, - * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY - * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL - * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; - * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR - * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF - * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Copyright (c) 1998 by Sun Microsystems, Inc. - * - * See the file "license.terms" for information on usage and redistribution - * of this file, and for a DISCLAIMER OF ALL WARRANTIES. - * - * RCS: @(#) $Id: locale.c,v 1.1.2.2 1998/10/03 01:56:41 stanton Exp $ - */ - -/* - * This is largely dummy code, since it needs locale interfaces. The - * dummy code implements more or less the C locale. Parts of the code - * are marked "dummy" and "generic" in hopes of making the situation - * clearer. - * - * As a hack for testing, if REG_FAKE is turned on, we add a single - * collating element ch between c and d, and a single equivalence class - * containing x and y. - * - * The type "celt" is an entirely opaque non-array type -- it need not be - * an integer type, it could be (say) a pointer -- which has distinct values - * for all chrs and all collating elements. The only things the outside - * world does to celts are copying them around and comparing them for - * equality; everything else is done in this file. There need be no "null" - * value for celt. The dummy code uses wint_t as celt, with WEOF as the - * celt code for ch (ugh!). - */ - -/* - * dummy: - ^ #def MAXCE 2 // longest CE code is prepared to handle - ^ typedef wint_t celt; // type holding distinct codes for all chrs, all CEs - */ - -/* dummy: character-name table */ -static struct cname { - char *name; - char code; -} cnames[] = { - {"NUL", '\0'}, - {"SOH", '\001'}, - {"STX", '\002'}, - {"ETX", '\003'}, - {"EOT", '\004'}, - {"ENQ", '\005'}, - {"ACK", '\006'}, - {"BEL", '\007'}, - {"alert", '\007'}, - {"BS", '\010'}, - {"backspace", '\b'}, - {"HT", '\011'}, - {"tab", '\t'}, - {"LF", '\012'}, - {"newline", '\n'}, - {"VT", '\013'}, - {"vertical-tab", '\v'}, - {"FF", '\014'}, - {"form-feed", '\f'}, - {"CR", '\015'}, - {"carriage-return", '\r'}, - {"SO", '\016'}, - {"SI", '\017'}, - {"DLE", '\020'}, - {"DC1", '\021'}, - {"DC2", '\022'}, - {"DC3", '\023'}, - {"DC4", '\024'}, - {"NAK", '\025'}, - {"SYN", '\026'}, - {"ETB", '\027'}, - {"CAN", '\030'}, - {"EM", '\031'}, - {"SUB", '\032'}, - {"ESC", '\033'}, - {"IS4", '\034'}, - {"FS", '\034'}, - {"IS3", '\035'}, - {"GS", '\035'}, - {"IS2", '\036'}, - {"RS", '\036'}, - {"IS1", '\037'}, - {"US", '\037'}, - {"space", ' '}, - {"exclamation-mark", '!'}, - {"quotation-mark", '"'}, - {"number-sign", '#'}, - {"dollar-sign", '$'}, - {"percent-sign", '%'}, - {"ampersand", '&'}, - {"apostrophe", '\''}, - {"left-parenthesis", '('}, - {"right-parenthesis", ')'}, - {"asterisk", '*'}, - {"plus-sign", '+'}, - {"comma", ','}, - {"hyphen", '-'}, - {"hyphen-minus", '-'}, - {"period", '.'}, - {"full-stop", '.'}, - {"slash", '/'}, - {"solidus", '/'}, - {"zero", '0'}, - {"one", '1'}, - {"two", '2'}, - {"three", '3'}, - {"four", '4'}, - {"five", '5'}, - {"six", '6'}, - {"seven", '7'}, - {"eight", '8'}, - {"nine", '9'}, - {"colon", ':'}, - {"semicolon", ';'}, - {"less-than-sign", '<'}, - {"equals-sign", '='}, - {"greater-than-sign", '>'}, - {"question-mark", '?'}, - {"commercial-at", '@'}, - {"left-square-bracket", '['}, - {"backslash", '\\'}, - {"reverse-solidus", '\\'}, - {"right-square-bracket", ']'}, - {"circumflex", '^'}, - {"circumflex-accent", '^'}, - {"underscore", '_'}, - {"low-line", '_'}, - {"grave-accent", '`'}, - {"left-brace", '{'}, - {"left-curly-bracket", '{'}, - {"vertical-line", '|'}, - {"right-brace", '}'}, - {"right-curly-bracket", '}'}, - {"tilde", '~'}, - {"DEL", '\177'}, - {NULL, 0} -}; - -/* dummy: character-class table */ -static struct cclass { - char *name; - char *chars; - int hasch; -} cclasses[] = { - {"alnum", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\ -0123456789", 1}, - {"alpha", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz", - 1}, - {"blank", " \t", 0}, - {"cntrl", "\007\b\t\n\v\f\r\1\2\3\4\5\6\16\17\20\21\22\23\24\ -\25\26\27\30\31\32\33\34\35\36\37\177", 0}, - {"digit", "0123456789", 0}, - {"graph", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\ -0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", - 1}, - {"lower", "abcdefghijklmnopqrstuvwxyz", - 1}, - {"print", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\ -0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~ ", - 1}, - {"punct", "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", - 0}, - {"space", "\t\n\v\f\r ", 0}, - {"upper", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", - 0}, - {"xdigit", "0123456789ABCDEFabcdef", - 0}, - {NULL, 0, 0} -}; - -#define CH WEOF /* dummy */ - -/* - - nces - how many distinct collating elements are there? - * This is pure dummy code, although a straight "return 0" is definitely - * what's wanted for all locales lucky enough not to have these stupid - * things. Case counterparts should be included. - ^ static int nces(struct vars *); - */ -static int -nces(v) -struct vars *v; -{ - return (v->cflags®_FAKE) ? 1 : 0; -} - -/* - - nleaders - how many chrs can be first chrs of collating elements? - * This is pure dummy code, although a straight "return 0" is definitely - * what's wanted for all locales lucky enough not to have these stupid - * things. Case counterparts should be included. - ^ static int nleaders(struct vars *); - */ -static int -nleaders(v) -struct vars *v; -{ - return (v->cflags®_FAKE) ? 1 : 0; -} - -/* - - allces - return a cvec with all the collating elements of the locale - * This would be kind of costly if there were large numbers of them; with - * any luck, that case does not occur in reality. Note that case variants - * should be included; "all" means *all*. - * This is pure dummy code. - ^ static struct cvec *allces(struct vars *, struct cvec *); - */ -static struct cvec * -allces(v, cv) -struct vars *v; -struct cvec *cv; /* this is supposed to have enough room */ -{ - assert(cv->cespace > 0); - (VOID) clearcvec(cv); - if (v->cflags®_FAKE) - addce(cv, ch()); - return cv; -} - -/* - - element - map collating-element name to celt - ^ static celt element(struct vars *, chr *, chr *); - */ -static celt -element(v, startp, endp) -struct vars *v; -chr *startp; /* points to start of name */ -chr *endp; /* points just past end of name */ -{ - register struct cname *cn; - register size_t len; - Tcl_DString ds; - char *name; - - /* generic: one-chr names stand for themselves */ - assert(startp < endp); - len = endp - startp; - if (len == 1) - return *startp; - - NOTE(REG_ULOCALE); - - /* - * INTL: ISO only, search table - */ - - Tcl_DStringInit(&ds); - name = TclUniCharToUtfDString(startp, (int) len, &ds); - - for (cn = cnames; cn->name != NULL; cn++) { - if (strlen(cn->name) == len && strncmp(cn->name, name, len) == 0) { - return UCHAR(cn->code); - } - } - Tcl_DStringFree(&ds); - - /* - * Special case for testing. - */ - - if ((v->cflags®_FAKE) && len == 2) { - if (*startp == 'c' && *(startp+1) == 'h') - return (celt) CH; - } - - /* generic: couldn't find it */ - ERR(REG_ECOLLATE); - return 0; -} - -/* - - range - supply cvec for a range, including legality check - * Must include case counterparts on request. - ^ static struct cvec *range(struct vars *, celt, celt, int); - */ -static struct cvec * -range(v, a, b, cases) -struct vars *v; -celt a; -celt b; /* might equal a */ -int cases; /* case-independent? */ -{ - int nchrs; - int appendch; - struct cvec *cv; - celt c; - - /* generic: legality check */ - if (a != b && !before(a, b)) { - ERR(REG_ERANGE); - return NULL; - } - - /* mostly dummy: compute vector length, note presence of ch */ - appendch = 0; - if (a == (celt) CH) { - if (b == (celt) CH) { - a = 'c'; - b = a - 1; /* kludge to get no chrs */ - appendch = 1; - } else { - a = 'd'; - appendch = 1; - } - } else { - if (b == CH) { - appendch = 1; - b = 'c'; - } else { - if ((v->cflags®_FAKE) && a <= 'c' && b >= 'd') - appendch = 1; - } - } - nchrs = b - a + 1; - if (cases) - nchrs *= 2; - cv = getcvec(v, nchrs, appendch); - NOERRN(); - - /* mostly dummy: fill in vector */ - for (c = a; c <= b; c++) { - addchr(cv, c); - if (cases) { - if (TclUniCharIsUpper((Tcl_UniChar)c)) - addchr(cv, (chr)Tcl_UniCharToLower( - (Tcl_UniChar)c)); - else if (TclUniCharIsLower((Tcl_UniChar)c)) - addchr(cv, (chr)Tcl_UniCharToUpper( - (Tcl_UniChar)c)); - } - } - if (appendch) - addce(cv, ch()); - - return cv; -} - -/* - - before - is celt x before celt y, for purposes of range legality? - * This is all dummy code. - ^ static int before(celt, celt); - */ -static int /* predicate */ -before(x, y) -celt x; -celt y; -{ - int isxch = (x == CH); - int isych = (y == CH); - - if (!isxch && !isych && x < y) - return 1; - if (isxch && !isych && y >= 'd') - return 1; - if (!isxch && isych && x <= 'c') - return 1; - return 0; -} - -/* - - eclass - supply cvec for an equivalence class - * Must include case counterparts on request. - * This is all dummy code. - ^ static struct cvec *eclass(struct vars *, celt, int); - */ -static struct cvec * -eclass(v, c, cases) -struct vars *v; -celt c; -int cases; /* all cases? */ -{ - struct cvec *cv; - - if (c == CH) { - cv = getcvec(v, 0, 1); - assert(cv != NULL); - addce(cv, ch()); - return cv; - } - - if ((v->cflags®_FAKE) && (c == 'x' || c == 'y')) { - cv = getcvec(v, 4, 0); - assert(cv != NULL); - addchr(cv, (chr)'x'); - addchr(cv, (chr)'y'); - if (cases) { - addchr(cv, (chr)'X'); - addchr(cv, (chr)'Y'); - } - return cv; - } - - /* no equivalence class by that name */ - if (cases) - return allcases(v, c); - cv = getcvec(v, 1, 0); - assert(cv != NULL); - addchr(cv, (chr)c); - return cv; -} - -/* - - cclass - supply cvec for a character class - * Must include case counterparts on request. - * This is all dummy code. - ^ static struct cvec *cclass(struct vars *, chr *, chr *, int); - */ -static struct cvec * -cclass(v, startp, endp, cases) -struct vars *v; -chr *startp; /* where the name starts */ -chr *endp; /* just past the end of the name */ -int cases; /* case-independent? */ -{ - size_t len; - register char *p; - register struct cclass *cc; - int hasch; - struct cvec *cv; - Tcl_DString ds; - char *name; - - /* check out the name */ - len = endp - startp; - - Tcl_DStringInit(&ds); - name = TclUniCharToUtfDString(startp, (int) len, &ds); - - if (cases && len == 5 && (strncmp("lower", name, 5) == 0 || - strncmp("upper", name, 5) == 0)) - name = "alpha"; - for (cc = cclasses; cc->name != NULL; cc++) { - if (strlen(cc->name) == len && strncmp(cc->name, name, len) == 0) { - break; - } - } - Tcl_DStringFree(&ds); - - if (cc->name == NULL) { - ERR(REG_ECTYPE); - return NULL; - } - - /* set up vector */ - hasch = (v->cflags®_FAKE) ? cc->hasch : 0; - cv = getcvec(v, (int) strlen(cc->chars), hasch); - if (cv == NULL) { - ERR(REG_ESPACE); - return NULL; - } - - /* fill it in */ - for (p = cc->chars; *p != '\0'; p++) - addchr(cv, (chr)*p); - if (hasch) - addce(cv, ch()); - - return cv; -} - -/* - - allcases - supply cvec for all case counterparts of a chr (including itself) - * This is a shortcut, preferably an efficient one, for simple characters; - * messy cases are done via range(). - * This is all dummy code. - ^ static struct cvec *allcases(struct vars *, pchr); - */ -static struct cvec * -allcases(v, c) -struct vars *v; -pchr c; -{ - struct cvec *cv = getcvec(v, 2, 0); - - assert(cv != NULL); - addchr(cv, c); - if (TclUniCharIsUpper((Tcl_UniChar)c)) - addchr(cv, (chr)Tcl_UniCharToLower((Tcl_UniChar)c)); - else if (TclUniCharIsLower((Tcl_UniChar)c)) - addchr(cv, (chr)Tcl_UniCharToUpper((Tcl_UniChar)c)); - - return cv; -} - -/* - - sncmp - case-independent chr-string compare - * REG_ICASE backrefs need this. It should preferably be efficient. - * This is all dummy code. - ^ static int sncmp(CONST chr *, CONST chr *, size_t); - */ -static int /* -1, 0, 1 for <, =, > */ -sncmp(x, y, len) -CONST chr *x; -CONST chr *y; -size_t len; /* maximum length of comparison */ -{ - int diff; - size_t i; - - for (i = 0; i < len; i++) { - diff = Tcl_UniCharToLower(x[i]) - Tcl_UniCharToLower(y[i]); - if (diff) { - return diff; - } - } - return 0; -} - -/* - * Utility functions for handling cvecs - */ - -/* - - newcvec - allocate a new cvec - ^ static struct cvec *newcvec(int, int); - */ -static struct cvec * -newcvec(nchrs, nces) -int nchrs; /* to hold this many chrs... */ -int nces; /* ... and this many CEs */ -{ - size_t n; - size_t nc; - struct cvec *cv; - - nc = (size_t)nchrs + (size_t)nces*(MAXCE+1); - n = sizeof(struct cvec) + (size_t)(nces-1)*sizeof(chr *) + - nc*sizeof(chr); - cv = (struct cvec *)ckalloc(n); - if (cv == NULL) - return NULL; - cv->chrspace = nc; - cv->chrs = (chr *)&cv->ces[nces]; /* chrs just after CE ptrs */ - cv->cespace = nces; - return clearcvec(cv); -} - -/* - - clearcvec - clear a possibly-new cvec - * Returns pointer as convenience. - ^ static struct cvec *clearcvec(struct cvec *); - */ -static struct cvec * -clearcvec(cv) -struct cvec *cv; -{ - int i; - - assert(cv != NULL); - cv->nchrs = 0; - assert(cv->chrs == (chr *)&cv->ces[cv->cespace]); - cv->nces = 0; - cv->ncechrs = 0; - for (i = 0; i < cv->cespace; i++) - cv->ces[i] = NULL; - - return cv; -} - -/* - - addchr - add a chr to a cvec - ^ static VOID addchr(struct cvec *, pchr); - */ -static VOID -addchr(cv, c) -struct cvec *cv; -pchr c; -{ - assert(cv->nchrs < cv->chrspace - cv->ncechrs); - cv->chrs[cv->nchrs++] = (chr) c; -} - -/* - - addce - add a CE to a cvec - ^ static VOID addce(struct cvec *, chr *); - */ -static VOID -addce(cv, startp) -struct cvec *cv; -chr *startp; /* 0-terminated text */ -{ - int n = wcslen(startp); - int i; - chr *s; - chr *d; - - assert(n > 0); - assert(cv->nchrs + n < cv->chrspace - cv->ncechrs); - assert(cv->nces < cv->cespace); - d = &cv->chrs[cv->chrspace - cv->ncechrs - n - 1]; - cv->ces[cv->nces++] = d; - for (s = startp, i = n; i > 0; s++, i--) - *d++ = *s; - *d = 0; /* endmarker */ - assert(d == &cv->chrs[cv->chrspace - cv->ncechrs]); - cv->ncechrs += n + 1; -} - -/* - - haschr - does a cvec contain this chr? - ^ static int haschr(struct cvec *, pchr); - */ -static int /* predicate */ -haschr(cv, c) -struct cvec *cv; -pchr c; -{ - int i; - chr *p; - - for (p = cv->chrs, i = cv->nchrs; i > 0; p++, i--) - if (*p == c) - return 1; - return 0; -} - -/* - - getcvec - get a cvec, remembering it as v->cv - ^ static struct cvec *getcvec(struct vars *, int, int); - */ -static struct cvec * -getcvec(v, nchrs, nces) -struct vars *v; -int nchrs; /* to hold this many chrs... */ -int nces; /* ... and this many CEs */ -{ - if (v->cv != NULL && nchrs <= v->cv->chrspace && nces <= v->cv->cespace) - return clearcvec(v->cv); - - if (v->cv != NULL) - freecvec(v->cv); - v->cv = newcvec(nchrs, nces); - if (v->cv == NULL) - ERR(REG_ESPACE); - - return v->cv; -} - -/* - - freecvec - free a cvec - ^ static VOID freecvec(struct cvec *); - */ -static VOID -freecvec(cv) -struct cvec *cv; -{ - ckfree((char *)cv); -} diff --git a/generic/color.c b/generic/regc_color.c index fa640f9..4a8a87c 100644 --- a/generic/color.c +++ b/generic/regc_color.c @@ -1,85 +1,25 @@ /* - * color.c -- + * colorings of characters + * This file is #included by regcomp.c. * - * Regexp package file: colorings of characters. - * Note that there are some incestuous relationships between this code and - * NFA arc maintenance, which perhaps ought to be cleaned up sometime. - * - * Copyright (c) 1998 Henry Spencer. All rights reserved. - * - * Development of this software was funded, in part, by Cray Research Inc., - * UUNET Communications Services Inc., and Sun Microsystems Inc., none of - * whom are responsible for the results. The author thanks all of them. - * - * Redistribution and use in source and binary forms -- with or without - * modification -- are permitted for any purpose, provided that - * redistributions in source form retain this entire copyright notice and - * indicate the origin and nature of any modifications. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, - * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY - * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL - * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; - * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR - * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF - * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Copyright (c) 1998 by Sun Microsystems, Inc. - * - * See the file "license.terms" for information on usage and redistribution - * of this file, and for a DISCLAIMER OF ALL WARRANTIES. - * - * RCS: @(#) $Id: color.c,v 1.1.2.2 1998/10/03 01:56:40 stanton Exp $ + * Note that there are some incestuous relationships between this code and + * NFA arc maintenance, which perhaps ought to be cleaned up sometime. */ + + /* - * The innards. - */ -struct colors { - color ccolor[BYTTAB]; -}; -struct ptrs { - union tree *pptr[BYTTAB]; -}; -union tree { - struct colors colors; - struct ptrs ptrs; -}; -#define tcolor colors.ccolor -#define tptr ptrs.pptr -/* - * Some of the function prototypes need this. - ^ union tree; + * If this declaration draws a complaint about a negative array size, + * then CHRBITS is defined incorrectly for the chr type. */ +static char isCHRBITSright[NEGIFNOT(sizeof(chr)*CHAR_BIT == CHRBITS)]; + + + +#define CISERR() VISERR(cm->v) +#define CERR(e) VERR(cm->v, (e)) -struct colordesc { - uchr nchrs; /* number of chars of this color */ - color sub; /* open subcolor of this one, or NOSUB */ -# define NOSUB COLORLESS - struct arc *arcs; /* color chain */ -# define UNUSEDCOLOR(cd) ((cd)->nchrs == 0 && (cd)->sub == NOSUB) - int flags; -# define PSEUDO 1 /* pseudocolor, no real chars */ -}; - -struct colormap { - int magic; -# define CMMAGIC 0x876 - struct vars *v; /* for error reporting */ - color rest; - int filled; /* has it been filled? */ - int ncds; /* number of colordescs */ - struct colordesc *cd; -# define CDEND(cm) (&(cm)->cd[(cm)->ncds]) -# define NINLINECDS 10 - struct colordesc cds[NINLINECDS]; - union tree tree[NBYTS]; /* tree top, plus fill blocks */ -}; -#ifdef COMPILE /* - newcm - get new colormap @@ -96,7 +36,7 @@ struct vars *v; union tree *nextt; struct colordesc *cd; - cm = (struct colormap *)ckalloc(sizeof(struct colormap)); + cm = (struct colormap *)MALLOC(sizeof(struct colormap)); if (cm == NULL) { ERR(REG_ESPACE); return NULL; @@ -114,15 +54,13 @@ struct vars *v; cd->arcs = NULL; cd->flags = 0; } - cm->cd[WHITE].nchrs = WCHAR_MAX - WCHAR_MIN; + cm->cd[WHITE].nchrs = CHR_MAX - CHR_MIN + 1; /* treetop starts as NULLs if there are lower levels */ t = cm->tree; - if (NBYTS > 1) { - for (i = BYTTAB-1; i >= 0; i--) - t->tptr[i] = NULL; - } - + if (NBYTS > 1) + for (i = BYTTAB-1; i >= 0; i--) + t->tptr[i] = NULL; /* if no lower levels, treetop and last fill block are the same */ /* fill blocks point to next fill block... */ @@ -149,13 +87,11 @@ freecm(cm) struct colormap *cm; { cm->magic = 0; - if (NBYTS > 1) { - cmtreefree(cm, cm->tree, 0); - } - if (cm->cd != cm->cds) { - ckfree((char *)cm->cd); - } - ckfree((char *) cm); /* mem leak (CCS). */ + if (NBYTS > 1) + cmtreefree(cm, cm->tree, 0); + if (cm->cd != cm->cds) + FREE(cm->cd); + FREE(cm); } /* @@ -176,10 +112,9 @@ int level; /* level number (top == 0) of this block */ for (i = BYTTAB-1; i >= 0; i--) { t = tree->tptr[i]; if (t != NULL && t != fillt) { - if ((int) level < (int) NBYTS-2) { /* more pointer blocks below */ + if (level < NBYTS-2) /* more pointer blocks below */ cmtreefree(cm, t, level+1); - } - ckfree((char *) t); + FREE(t); } } } @@ -221,17 +156,13 @@ int level; /* level number (top == 0) of this block */ t = tree->tptr[i]; if (t == fillt) /* oops */ {} - else if (t == NULL) { + else if (t == NULL) tree->tptr[i] = fillt; - } - else if ((int) level < (int) NBYTS-2) {/* more pointer blocks below */ + else if (level < NBYTS-2) /* more pointer blocks below */ cmtreefill(cm, t, level+1); - } } } -#endif /* ifdef COMPILE */ - /* - getcolor - get the color of a character from a colormap ^ static color getcolor(struct colormap *, pchr); @@ -261,8 +192,6 @@ pchr c; return cm->rest; } -#ifdef COMPILE - /* - setcolor - set the color of a character in a colormap ^ static color setcolor(struct colormap *, pchr, pcolor); @@ -283,7 +212,7 @@ pcolor co; color prev; assert(cm->magic == CMMAGIC); - if (VISERR(cm->v) || co == COLORLESS) + if (CISERR() || co == COLORLESS) return COLORLESS; t = cm->tree; @@ -293,10 +222,10 @@ pcolor co; t = t->tptr[b]; if (t == NULL) { /* fell off an incomplete part */ bottom = (shift <= BYTBITS) ? 1 : 0; - t = (union tree *)ckalloc((bottom) ? + t = (union tree *)MALLOC((bottom) ? sizeof(struct colors) : sizeof(struct ptrs)); if (t == NULL) { - VERR(cm->v, REG_ESPACE); + CERR(REG_ESPACE); return COLORLESS; } if (bottom) @@ -312,7 +241,7 @@ pcolor co; b = uc & BYTMASK; prev = t->tcolor[b]; - t->tcolor[b] = (color) co; + t->tcolor[b] = (color)co; return prev; } @@ -328,7 +257,7 @@ struct colormap *cm; struct colordesc *end; struct colordesc *lastused; - if (VISERR(cm->v)) + if (CISERR()) return COLORLESS; lastused = NULL; @@ -337,7 +266,7 @@ struct colormap *cm; if (!UNUSEDCOLOR(cd)) lastused = cd; assert(lastused != NULL); - return (color) (lastused - cm->cd); + return (color)(lastused - cm->cd); } /* @@ -352,31 +281,31 @@ struct colormap *cm; struct colordesc *cd; struct colordesc *end; struct colordesc *firstnew; - int n; + size_t n; - if (VISERR(cm->v)) + if (CISERR()) return COLORLESS; end = CDEND(cm); for (cd = cm->cd; cd < end; cd++) if (UNUSEDCOLOR(cd)) { assert(cd->arcs == NULL); - return (color) (cd - cm->cd); + return (color)(cd - cm->cd); } /* oops, must allocate more */ n = cm->ncds * 2; if (cm->cd == cm->cds) { - cd = (struct colordesc *)ckalloc(sizeof(struct colordesc) * n); + cd = (struct colordesc *)MALLOC(sizeof(struct colordesc) * n); if (cd != NULL) - memcpy((VOID *)cd, (VOID *)cm->cds, cm->ncds * + memcpy(VS(cd), VS(cm->cds), cm->ncds * sizeof(struct colordesc)); } else { - cd = (struct colordesc *)ckrealloc((VOID *)cm->cd, - sizeof(struct colordesc) * n); + cd = (struct colordesc *)REALLOC(cm->cd, + n * sizeof(struct colordesc)); } if (cd == NULL) { - VERR(cm->v, REG_ESPACE); + CERR(REG_ESPACE); return COLORLESS; } cm->cd = cd; @@ -390,7 +319,7 @@ struct colormap *cm; cd->flags = 0; } assert(firstnew < CDEND(cm) && UNUSEDCOLOR(firstnew)); - return (color) (firstnew - cm->cd); + return (color)(firstnew - cm->cd); } /* @@ -404,7 +333,7 @@ struct colormap *cm; color co; co = newcolor(cm); - if (VISERR(cm->v)) + if (CISERR()) return COLORLESS; cm->cd[co].nchrs = 1; cm->cd[co].flags = PSEUDO; @@ -459,22 +388,22 @@ struct colormap *cm; color co; color sco; - for (cd = cm->cd, co = 0; cd < end; cd++, co++) { - sco = cd->sub; - if (sco == NOSUB) { - /* has no subcolor, no further action */ - } else if (sco == co) { - /* is subcolor, let parent deal with it */ - } else if (cd->nchrs == 0) { - /* parent empty, its arcs change color to subcolor */ - cd->sub = NOSUB; - scd = &cm->cd[sco]; - assert(scd->nchrs > 0); - assert(scd->sub == sco); - scd->sub = NOSUB; - while ((a = cd->arcs) != NULL) { - assert(a->co == co); - /* uncolorchain(cm, a); */ + for (cd = cm->cd, co = 0; cd < end; cd++, co++) { + sco = cd->sub; + if (sco == NOSUB) { + /* has no subcolor, no further action */ + } else if (sco == co) { + /* is subcolor, let parent deal with it */ + } else if (cd->nchrs == 0) { + /* parent empty, its arcs change color to subcolor */ + cd->sub = NOSUB; + scd = &cm->cd[sco]; + assert(scd->nchrs > 0); + assert(scd->sub == sco); + scd->sub = NOSUB; + while ((a = cd->arcs) != NULL) { + assert(a->co == co); + /* uncolorchain(cm, a); */ cd->arcs = a->colorchain; a->co = sco; /* colorchain(cm, a); */ @@ -483,11 +412,11 @@ struct colormap *cm; } } else { /* parent's arcs must gain parallel subcolor arcs */ - cd->sub = NOSUB; - scd = &cm->cd[sco]; - assert(scd->nchrs > 0); - assert(scd->sub == sco); - scd->sub = NOSUB; + cd->sub = NOSUB; + scd = &cm->cd[sco]; + assert(scd->nchrs > 0); + assert(scd->sub == sco); + scd->sub = NOSUB; for (a = cd->arcs; a != NULL; a = a->colorchain) { assert(a->co == co); newarc(nfa, a->type, sco, a->from, a->to); @@ -558,11 +487,11 @@ pchr c; ^ struct state *, struct state *); */ static VOID -rainbow(nfa, cm, type, exc, from, to) +rainbow(nfa, cm, type, but, from, to) struct nfa *nfa; struct colormap *cm; int type; -pcolor exc; /* COLORLESS if no exceptions */ +pcolor but; /* COLORLESS if no exceptions */ struct state *from; struct state *to; { @@ -570,8 +499,8 @@ struct state *to; struct colordesc *end = CDEND(cm); color co; - for (cd = cm->cd, co = 0; cd < end && !VISERR(nfa->v); cd++, co++) - if (!UNUSEDCOLOR(cd) && cd->sub != co && co != exc && + for (cd = cm->cd, co = 0; cd < end && !CISERR(); cd++, co++) + if (!UNUSEDCOLOR(cd) && cd->sub != co && co != but && !(cd->flags&PSEUDO)) newarc(nfa, type, co, from, to); } @@ -596,10 +525,95 @@ struct state *to; color co; assert(of != from); - for (cd = cm->cd, co = 0; cd < end && !VISERR(nfa->v); cd++, co++) + for (cd = cm->cd, co = 0; cd < end && !CISERR(); cd++, co++) if (!UNUSEDCOLOR(cd) && !(cd->flags&PSEUDO)) if (findarc(of, PLAIN, co) == NULL) newarc(nfa, type, co, from, to); } -#endif /* ifdef COMPILE */ + + +#ifdef REG_DEBUG + +/* + - dumpcolors - debugging output + ^ static VOID dumpcolors(struct colormap *, FILE *); + */ +static VOID +dumpcolors(cm, f) +struct colormap *cm; +FILE *f; +{ + struct colordesc *cd; + struct colordesc *end; + color co; + chr c; + + if (cm->filled) { + fprintf(f, "filled\n"); + if (NBYTS > 1) + fillcheck(cm, cm->tree, 0, f); + } + end = CDEND(cm); + for (cd = cm->cd + 1, co = 1; cd < end; cd++, co++) /* skip 0 */ + if (cd->nchrs > 0) { + if (cd->flags&PSEUDO) + fprintf(f, "#%2ld(ps): ", (long)co); + else + fprintf(f, "#%2ld(%2d): ", (long)co, cd->nchrs); + for (c = CHR_MIN; c < CHR_MAX; c++) + if (getcolor(cm, c) == co) + dumpchr(c, f); + assert(c == CHR_MAX); + if (getcolor(cm, c) == co) + dumpchr(c, f); + fprintf(f, "\n"); + } +} + +/* + - fillcheck - check proper filling of a tree + ^ static VOID fillcheck(struct colormap *, union tree *, int, FILE *); + */ +static VOID +fillcheck(cm, tree, level, f) +struct colormap *cm; +union tree *tree; +int level; /* level number (top == 0) of this block */ +FILE *f; +{ + int i; + union tree *t; + union tree *fillt = &cm->tree[level+1]; + + assert(level < NBYTS-1); /* this level has pointers */ + for (i = BYTTAB-1; i >= 0; i--) { + t = tree->tptr[i]; + if (t == NULL) + fprintf(f, "NULL found in filled tree!\n"); + else if (t == fillt) + {} + else if (level < NBYTS-2) /* more pointer blocks below */ + fillcheck(cm, t, level+1, f); + } +} + +/* + - dumpchr - print a chr + * Kind of char-centric but works well enough for debug use. + ^ static VOID dumpchr(pchr, FILE *); + */ +static VOID +dumpchr(c, f) +pchr c; +FILE *f; +{ + if (c == '\\') + fprintf(f, "\\\\"); + else if (c > ' ' && c <= '~') + putc((char)c, f); + else + fprintf(f, "\\0%lo", (long)c); +} + +#endif /* ifdef REG_DEBUG */ diff --git a/generic/regc_cvec.c b/generic/regc_cvec.c new file mode 100644 index 0000000..0650883 --- /dev/null +++ b/generic/regc_cvec.c @@ -0,0 +1,143 @@ +/* + * Utility functions for handling cvecs + * This file is #included by regcomp.c. + */ + +/* + - newcvec - allocate a new cvec + ^ static struct cvec *newcvec(int, int); + */ +static struct cvec * +newcvec(nchrs, nmcces) +int nchrs; /* to hold this many chrs... */ +int nmcces; /* ... and this many MCCEs */ +{ + size_t n; + size_t nc; + struct cvec *cv; + + nc = (size_t)nchrs + (size_t)nmcces*(MAXMCCE+1); + n = sizeof(struct cvec) + (size_t)(nmcces-1)*sizeof(chr *) + + nc*sizeof(chr); + cv = (struct cvec *)MALLOC(n); + if (cv == NULL) + return NULL; + cv->chrspace = nc; + cv->chrs = (chr *)&cv->mcces[nmcces]; /* chrs just after MCCE ptrs */ + cv->mccespace = nmcces; + return clearcvec(cv); +} + +/* + - clearcvec - clear a possibly-new cvec + * Returns pointer as convenience. + ^ static struct cvec *clearcvec(struct cvec *); + */ +static struct cvec * +clearcvec(cv) +struct cvec *cv; +{ + int i; + + assert(cv != NULL); + cv->nchrs = 0; + assert(cv->chrs == (chr *)&cv->mcces[cv->mccespace]); + cv->nmcces = 0; + cv->nmccechrs = 0; + for (i = 0; i < cv->mccespace; i++) + cv->mcces[i] = NULL; + + return cv; +} + +/* + - addchr - add a chr to a cvec + ^ static VOID addchr(struct cvec *, pchr); + */ +static VOID +addchr(cv, c) +struct cvec *cv; +pchr c; +{ + assert(cv->nchrs < cv->chrspace - cv->nmccechrs); + cv->chrs[cv->nchrs++] = (chr)c; +} + +/* + - addmcce - add an MCCE to a cvec + ^ static VOID addmcce(struct cvec *, chr *, chr *); + */ +static VOID +addmcce(cv, startp, endp) +struct cvec *cv; +chr *startp; /* beginning of text */ +chr *endp; /* just past end of text */ +{ + int n = endp - startp; + int i; + chr *s; + chr *d; + + assert(n > 0); + assert(cv->nchrs + n < cv->chrspace - cv->nmccechrs); + assert(cv->nmcces < cv->mccespace); + d = &cv->chrs[cv->chrspace - cv->nmccechrs - n - 1]; + cv->mcces[cv->nmcces++] = d; + for (s = startp, i = n; i > 0; s++, i--) + *d++ = *s; + *d++ = 0; /* endmarker */ + assert(d == &cv->chrs[cv->chrspace - cv->nmccechrs]); + cv->nmccechrs += n + 1; +} + +/* + - haschr - does a cvec contain this chr? + ^ static int haschr(struct cvec *, pchr); + */ +static int /* predicate */ +haschr(cv, c) +struct cvec *cv; +pchr c; +{ + int i; + chr *p; + + for (p = cv->chrs, i = cv->nchrs; i > 0; p++, i--) + if (*p == c) + return 1; + return 0; +} + +/* + - getcvec - get a cvec, remembering it as v->cv + ^ static struct cvec *getcvec(struct vars *, int, int); + */ +static struct cvec * +getcvec(v, nchrs, nmcces) +struct vars *v; +int nchrs; /* to hold this many chrs... */ +int nmcces; /* ... and this many MCCEs */ +{ + if (v->cv != NULL && nchrs <= v->cv->chrspace && + nmcces <= v->cv->mccespace) + return clearcvec(v->cv); + + if (v->cv != NULL) + freecvec(v->cv); + v->cv = newcvec(nchrs, nmcces); + if (v->cv == NULL) + ERR(REG_ESPACE); + + return v->cv; +} + +/* + - freecvec - free a cvec + ^ static VOID freecvec(struct cvec *); + */ +static VOID +freecvec(cv) +struct cvec *cv; +{ + FREE(cv); +} diff --git a/generic/lex.c b/generic/regc_lex.c index 7ae3ccc..820b404 100644 --- a/generic/lex.c +++ b/generic/regc_lex.c @@ -1,36 +1,6 @@ /* - * lex -- - * - * Regexp package file: lexical analyzer - #included in other source - * - * Copyright (c) 1998 Henry Spencer. All rights reserved. - * - * Development of this software was funded, in part, by Cray Research Inc., - * UUNET Communications Services Inc., and Sun Microsystems Inc., none of - * whom are responsible for the results. The author thanks all of them. - * - * Redistribution and use in source and binary forms -- with or without - * modification -- are permitted for any purpose, provided that - * redistributions in source form retain this entire copyright notice and - * indicate the origin and nature of any modifications. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, - * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY - * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL - * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; - * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR - * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF - * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Copyright (c) 1998 by Sun Microsystems, Inc. - * - * See the file "license.terms" for information on usage and redistribution - * of this file, and for a DISCLAIMER OF ALL WARRANTIES. - * - * RCS: @(#) $Id: lex.c,v 1.1.2.2 1998/10/03 01:56:40 stanton Exp $ + * lexical analyzer + * This file is #included by regcomp.c. */ /* scanning macros (know about v) */ @@ -58,8 +28,11 @@ #define L_CEL 7 /* collating element */ #define L_ECL 8 /* equivalence class */ #define L_CCL 9 /* character class */ -#define INTO(c) (v->lexcon = (c)) -#define _IN(con) (v->lexcon == (con)) +#define INTOCON(c) (v->lexcon = (c)) +#define INCON(con) (v->lexcon == (con)) + +/* construct pointer past end of chr array */ +#define ENDOF(array) ((array) + sizeof(array)/sizeof(chr)) /* - lexstart - set up lexical stuff, scan leading options @@ -67,19 +40,20 @@ */ static VOID lexstart(v) -register struct vars *v; +struct vars *v; { prefixes(v); /* may turn on new type bits etc. */ NOERR(); if (v->cflags®_QUOTE) { - v->cflags &= ~(REG_EXTENDED|REG_ADVF|REG_EXPANDED); - INTO(L_Q); - } else if (v->cflags®_EXTENDED) - INTO(L_ERE); - else { - v->cflags &= ~REG_ADVF; - INTO(L_BRE); + assert(!(v->cflags&(REG_ADVANCED|REG_EXPANDED|REG_NEWLINE))); + INTOCON(L_Q); + } else if (v->cflags®_EXTENDED) { + assert(!(v->cflags®_QUOTE)); + INTOCON(L_ERE); + } else { + assert(!(v->cflags&(REG_QUOTE|REG_ADVF))); + INTOCON(L_BRE); } v->nexttype = EMPTY; /* remember we were at the start */ @@ -104,11 +78,14 @@ struct vars *v; case CHR('?'): /* "***?" error, msg shows version */ ERR(REG_BADPAT); return; /* proceed no further */ + break; case CHR('='): /* "***=" shifts to literal string */ NOTE(REG_UNONPOSIX); v->cflags |= REG_QUOTE; + v->cflags &= ~(REG_ADVANCED|REG_EXPANDED|REG_NEWLINE); v->now += 4; return; /* and there can be no more prefixes */ + break; case CHR(':'): /* "***:" shifts to AREs */ NOTE(REG_UNONPOSIX); v->cflags |= REG_ADVANCED; @@ -117,26 +94,28 @@ struct vars *v; default: /* otherwise *** is just an error */ ERR(REG_BADRPT); return; + break; } - /* BREs and plain EREs don't get any other favors */ + /* BREs and EREs don't get embedded options */ if ((v->cflags®_ADVANCED) != REG_ADVANCED) return; - /* embedded options */ - if (HAVE(3) && NEXT2('(', '?') && iswalpha(*(v->now + 2))) { + /* embedded options (AREs only) */ + if (HAVE(3) && NEXT2('(', '?') && iscalpha(*(v->now + 2))) { NOTE(REG_UNONPOSIX); v->now += 2; - for (; !ATEOS() && iswalpha(*v->now); v->now++) + for (; !ATEOS() && iscalpha(*v->now); v->now++) switch (*v->now) { case CHR('b'): /* BREs (but why???) */ - v->cflags &= ~REG_EXTENDED; + v->cflags &= ~(REG_ADVANCED|REG_QUOTE); break; case CHR('c'): /* case sensitive */ v->cflags &= ~REG_ICASE; break; case CHR('e'): /* plain EREs */ - v->cflags &= ~REG_ADVF; + v->cflags |= REG_EXTENDED; + v->cflags &= ~(REG_ADVF|REG_QUOTE); break; case CHR('i'): /* case insensitive */ v->cflags |= REG_ICASE; @@ -151,6 +130,7 @@ struct vars *v; break; case CHR('q'): /* literal string */ v->cflags |= REG_QUOTE; + v->cflags &= ~REG_ADVANCED; break; case CHR('s'): /* single line, \n ordinary */ v->cflags &= ~REG_NEWLINE; @@ -174,6 +154,8 @@ struct vars *v; return; } v->now++; + if (v->cflags®_QUOTE) + v->cflags &= ~(REG_EXPANDED|REG_NEWLINE); } } @@ -181,67 +163,68 @@ struct vars *v; - lexnest - "call a subroutine", interpolating string at the lexical level * Note, this is not a very general facility. There are a number of * implicit assumptions about what sorts of strings can be subroutines. - ^ static VOID lexnest(struct vars *, chr *); + ^ static VOID lexnest(struct vars *, chr *, chr *); */ static VOID -lexnest(v, s) +lexnest(v, beginp, endp) struct vars *v; -chr *s; +chr *beginp; /* start of interpolation */ +chr *endp; /* one past end of interpolation */ { assert(v->savenow == NULL); /* only one level of nesting */ v->savenow = v->now; v->savestop = v->stop; - v->now = s; - v->stop = s + wcslen(s); + v->now = beginp; + v->stop = endp; } /* - * string CONSTants to interpolate as expansions of things like \d + * string constants to interpolate as expansions of things like \d */ static chr backd[] = { /* \d */ CHR('['), CHR('['), CHR(':'), CHR('d'), CHR('i'), CHR('g'), CHR('i'), CHR('t'), - CHR(':'), CHR(']'), CHR(']'), CHR('\0') + CHR(':'), CHR(']'), CHR(']') }; static chr backD[] = { /* \D */ CHR('['), CHR('^'), CHR('['), CHR(':'), CHR('d'), CHR('i'), CHR('g'), CHR('i'), CHR('t'), - CHR(':'), CHR(']'), CHR(']'), CHR('\0') + CHR(':'), CHR(']'), CHR(']') }; static chr brbackd[] = { /* \d within brackets */ CHR('['), CHR(':'), CHR('d'), CHR('i'), CHR('g'), CHR('i'), CHR('t'), - CHR(':'), CHR(']'), CHR('\0') + CHR(':'), CHR(']') }; static chr backs[] = { /* \s */ CHR('['), CHR('['), CHR(':'), CHR('s'), CHR('p'), CHR('a'), CHR('c'), CHR('e'), - CHR(':'), CHR(']'), CHR(']'), CHR('\0') + CHR(':'), CHR(']'), CHR(']') }; static chr backS[] = { /* \S */ CHR('['), CHR('^'), CHR('['), CHR(':'), CHR('s'), CHR('p'), CHR('a'), CHR('c'), CHR('e'), - CHR(':'), CHR(']'), CHR(']'), CHR('\0') + CHR(':'), CHR(']'), CHR(']') }; static chr brbacks[] = { /* \s within brackets */ CHR('['), CHR(':'), CHR('s'), CHR('p'), CHR('a'), CHR('c'), CHR('e'), - CHR(':'), CHR(']'), CHR('\0') + CHR(':'), CHR(']') }; static chr backw[] = { /* \w */ CHR('['), CHR('['), CHR(':'), CHR('a'), CHR('l'), CHR('n'), CHR('u'), CHR('m'), - CHR(':'), CHR(']'), CHR('_'), CHR(']'), CHR('\0') + CHR(':'), CHR(']'), CHR('_'), CHR(']') }; static chr backW[] = { /* \W */ CHR('['), CHR('^'), CHR('['), CHR(':'), CHR('a'), CHR('l'), CHR('n'), CHR('u'), CHR('m'), - CHR(':'), CHR(']'), CHR('_'), CHR(']'), CHR('\0') + CHR(':'), CHR(']'), CHR('_'), CHR(']') }; static chr brbackw[] = { /* \w within brackets */ CHR('['), CHR(':'), CHR('a'), CHR('l'), CHR('n'), CHR('u'), CHR('m'), - CHR(':'), CHR(']'), CHR('_'), CHR('\0') + CHR(':'), CHR(']'), CHR('_') }; /* @@ -253,7 +236,7 @@ static VOID lexword(v) struct vars *v; { - lexnest(v, backw); + lexnest(v, backw, ENDOF(backw)); } /* @@ -262,9 +245,9 @@ struct vars *v; */ static int /* 1 normal, 0 failure */ next(v) -register struct vars *v; +struct vars *v; { - register chr c; + chr c; /* errors yield an infinite sequence of failures */ if (ISERR()) @@ -298,14 +281,17 @@ register struct vars *v; case L_BRE: case L_Q: RET(EOS); + break; case L_EBND: case L_BBND: FAILW(REG_EBRACE); + break; case L_BRACK: case L_CEL: case L_ECL: case L_CCL: FAILW(REG_EBRACK); + break; } assert(NOTREACHED); } @@ -317,22 +303,26 @@ register struct vars *v; switch (v->lexcon) { case L_BRE: /* punt BREs to separate function */ return brenext(v, c); + break; case L_ERE: /* see below */ break; case L_Q: /* literal strings are easy */ RETV(PLAIN, c); + break; case L_BBND: /* bounds are fairly simple */ case L_EBND: switch (c) { case CHR('0'): case CHR('1'): case CHR('2'): case CHR('3'): case CHR('4'): case CHR('5'): case CHR('6'): case CHR('7'): case CHR('8'): case CHR('9'): - RETV(DIGIT, (chr) DIGITVAL(c)); + RETV(DIGIT, (chr)DIGITVAL(c)); + break; case CHR(','): RET(','); + break; case CHR('}'): /* ERE bound ends with } */ - if (_IN(L_EBND)) { - INTO(L_ERE); + if (INCON(L_EBND)) { + INTOCON(L_ERE); if ((v->cflags®_ADVF) && NEXT1('?')) { v->now++; NOTE(REG_UNONPOSIX); @@ -341,25 +331,32 @@ register struct vars *v; RETV('}', 1); } else FAILW(REG_BADBR); + break; case CHR('\\'): /* BRE bound ends with \} */ - if (_IN(L_BBND) && NEXT1('}')) { + if (INCON(L_BBND) && NEXT1('}')) { v->now++; - INTO(L_BRE); + INTOCON(L_BRE); RET('}'); } else FAILW(REG_BADBR); + break; default: FAILW(REG_BADBR); + break; } + assert(NOTREACHED); + break; case L_BRACK: /* brackets are not too hard */ switch (c) { case CHR(']'): if (LASTTYPE('[')) RETV(PLAIN, c); else { - INTO((v->cflags®_EXTENDED) ? L_ERE : L_BRE); + INTOCON((v->cflags®_EXTENDED) ? + L_ERE : L_BRE); RET(']'); } + break; case CHR('\\'): NOTE(REG_UBBS); if (!(v->cflags®_ADVF)) @@ -367,85 +364,109 @@ register struct vars *v; NOTE(REG_UNONPOSIX); if (ATEOS()) FAILW(REG_EESCAPE); - (VOID) lexescape(v); + (DISCARD) lexescape(v); switch (v->nexttype) { /* not all escapes okay here */ case PLAIN: return 1; + break; case CCLASS: switch (v->nextvalue) { - case 'd': lexnest(v, brbackd); break; - case 's': lexnest(v, brbacks); break; - case 'w': lexnest(v, brbackw); break; + case 'd': + lexnest(v, brbackd, ENDOF(brbackd)); + break; + case 's': + lexnest(v, brbacks, ENDOF(brbacks)); + break; + case 'w': + lexnest(v, brbackw, ENDOF(brbackw)); + break; default: FAILW(REG_EESCAPE); + break; } /* lexnest done, back up and try again */ v->nexttype = v->lasttype; return next(v); + break; } /* not one of the acceptable escapes */ FAILW(REG_EESCAPE); + break; case CHR('-'): if (LASTTYPE('[') || NEXT1(']')) RETV(PLAIN, c); else RETV(RANGE, c); + break; case CHR('['): if (ATEOS()) FAILW(REG_EBRACK); switch (*v->now++) { case CHR('.'): - INTO(L_CEL); + INTOCON(L_CEL); /* might or might not be locale-specific */ RET(COLLEL); + break; case CHR('='): - INTO(L_ECL); + INTOCON(L_ECL); NOTE(REG_ULOCALE); RET(ECLASS); + break; case CHR(':'): - INTO(L_CCL); + INTOCON(L_CCL); NOTE(REG_ULOCALE); RET(CCLASS); + break; default: /* oops */ v->now--; RETV(PLAIN, c); + break; } + assert(NOTREACHED); + break; default: RETV(PLAIN, c); + break; } + assert(NOTREACHED); + break; case L_CEL: /* collating elements are easy */ if (c == CHR('.') && NEXT1(']')) { v->now++; - INTO(L_BRACK); + INTOCON(L_BRACK); RETV(END, '.'); } else RETV(PLAIN, c); + break; case L_ECL: /* ditto equivalence classes */ if (c == CHR('=') && NEXT1(']')) { v->now++; - INTO(L_BRACK); + INTOCON(L_BRACK); RETV(END, '='); } else RETV(PLAIN, c); + break; case L_CCL: /* ditto character classes */ if (c == CHR(':') && NEXT1(']')) { v->now++; - INTO(L_BRACK); + INTOCON(L_BRACK); RETV(END, ':'); } else RETV(PLAIN, c); + break; default: assert(NOTREACHED); break; } /* that got rid of everything except EREs */ - assert(_IN(L_ERE)); + assert(INCON(L_ERE)); /* deal with EREs, except for backslashes */ switch (c) { case CHR('|'): RET('|'); + break; case CHR('*'): if ((v->cflags®_ADVF) && NEXT1('?')) { v->now++; @@ -453,6 +474,7 @@ register struct vars *v; RETV('*', 0); } RETV('*', 1); + break; case CHR('+'): if ((v->cflags®_ADVF) && NEXT1('?')) { v->now++; @@ -460,6 +482,7 @@ register struct vars *v; RETV('+', 0); } RETV('+', 1); + break; case CHR('?'): if ((v->cflags®_ADVF) && NEXT1('?')) { v->now++; @@ -467,18 +490,21 @@ register struct vars *v; RETV('?', 0); } RETV('?', 1); + break; case CHR('{'): /* bounds start or plain character */ if (v->cflags®_EXPANDED) skip(v); - if (ATEOS() || !iswdigit(*v->now)) { + if (ATEOS() || !iscdigit(*v->now)) { NOTE(REG_UBRACES); NOTE(REG_UUNSPEC); RETV(PLAIN, c); } else { NOTE(REG_UBOUNDS); - INTO(L_EBND); + INTOCON(L_EBND); RET('{'); } + assert(NOTREACHED); + break; case CHR('('): /* parenthesis, or advanced extension */ if ((v->cflags®_ADVF) && NEXT1('?')) { NOTE(REG_UNONPOSIX); @@ -486,6 +512,7 @@ register struct vars *v; switch (*v->now++) { case CHR(':'): /* non-capturing paren */ RETV('(', 0); + break; case CHR('#'): /* comment */ while (!ATEOS() && *v->now != CHR(')')) v->now++; @@ -493,28 +520,37 @@ register struct vars *v; v->now++; assert(v->nexttype == v->lasttype); return next(v); + break; case CHR('='): /* positive lookahead */ NOTE(REG_ULOOKAHEAD); RETV(LACON, 1); + break; case CHR('!'): /* negative lookahead */ NOTE(REG_ULOOKAHEAD); RETV(LACON, 0); + break; case CHR('<'): /* prefer short */ RETV(PREFER, 0); + break; case CHR('>'): /* prefer long */ RETV(PREFER, 1); + break; default: FAILW(REG_BADRPT); + break; } + assert(NOTREACHED); } - if (v->cflags®_NOSUB) { - RETV('(', 0); /* all parens non-capturing */ - } - RETV('(', 1); + if (v->cflags®_NOSUB) + RETV('(', 0); /* all parens non-capturing */ + else + RETV('(', 1); + break; case CHR(')'): if (LASTTYPE('(')) NOTE(REG_UUNSPEC); RETV(')', c); + break; case CHR('['): /* easy except for [[:<:]] and [[:>:]] */ if (HAVE(6) && *(v->now+0) == CHR('[') && *(v->now+1) == CHR(':') && @@ -528,49 +564,55 @@ register struct vars *v; NOTE(REG_UNONPOSIX); RET((c == CHR('<')) ? '<' : '>'); } - INTO(L_BRACK); + INTOCON(L_BRACK); if (NEXT1('^')) { v->now++; RETV('[', 0); } RETV('[', 1); + break; case CHR('.'): RET('.'); + break; case CHR('^'): RET('^'); + break; case CHR('$'): RET('$'); + break; case CHR('\\'): /* mostly punt backslashes to code below */ if (ATEOS()) FAILW(REG_EESCAPE); break; default: /* ordinary character */ RETV(PLAIN, c); + break; } /* ERE backslash handling; backslash already eaten */ assert(!ATEOS()); if (!(v->cflags®_ADVF)) { /* only AREs have non-trivial escapes */ - if (iswalnum(*v->now)) { + if (iscalnum(*v->now)) { NOTE(REG_UBSALNUM); NOTE(REG_UUNSPEC); } RETV(PLAIN, *v->now++); } - (VOID) lexescape(v); + (DISCARD) lexescape(v); if (ISERR()) FAILW(REG_EESCAPE); if (v->nexttype == CCLASS) { /* fudge at lexical level */ switch (v->nextvalue) { - case 'd': lexnest(v, backd); break; - case 'D': lexnest(v, backD); break; - case 's': lexnest(v, backs); break; - case 'S': lexnest(v, backS); break; - case 'w': lexnest(v, backw); break; - case 'W': lexnest(v, backW); break; + case 'd': lexnest(v, backd, ENDOF(backd)); break; + case 'D': lexnest(v, backD, ENDOF(backD)); break; + case 's': lexnest(v, backs, ENDOF(backs)); break; + case 'S': lexnest(v, backS, ENDOF(backS)); break; + case 'w': lexnest(v, backw, ENDOF(backw)); break; + case 'W': lexnest(v, backW, ENDOF(backW)); break; default: assert(NOTREACHED); FAILW(REG_ASSERT); + break; } /* lexnest done, back up and try again */ v->nexttype = v->lasttype; @@ -591,10 +633,10 @@ struct vars *v; { chr c; static chr alert[] = { - CHR('a'), CHR('l'), CHR('e'), CHR('r'), CHR('t'), CHR('\0') + CHR('a'), CHR('l'), CHR('e'), CHR('r'), CHR('t') }; static chr esc[] = { - CHR('E'), CHR('S'), CHR('C'), CHR('\0') + CHR('E'), CHR('S'), CHR('C') }; chr *save; @@ -602,79 +644,102 @@ struct vars *v; assert(!ATEOS()); c = *v->now++; - if (!iswalnum(c)) + if (!iscalnum(c)) RETV(PLAIN, c); NOTE(REG_UNONPOSIX); switch (c) { case CHR('a'): - RETV(PLAIN, chrnamed(v, alert, CHR('\007'))); + RETV(PLAIN, chrnamed(v, alert, ENDOF(alert), CHR('\007'))); + break; case CHR('A'): RETV(SBEGIN, 0); + break; case CHR('b'): RETV(PLAIN, CHR('\b')); + break; + case CHR('B'): + RETV(PLAIN, CHR('\\')); + break; case CHR('c'): NOTE(REG_UUNPORT); if (ATEOS()) FAILW(REG_EESCAPE); - RETV(PLAIN, (chr) (*v->now++ & 037)); + RETV(PLAIN, (chr)(*v->now++ & 037)); + break; case CHR('d'): NOTE(REG_ULOCALE); RETV(CCLASS, 'd'); + break; case CHR('D'): NOTE(REG_ULOCALE); RETV(CCLASS, 'D'); + break; case CHR('e'): NOTE(REG_UUNPORT); - RETV(PLAIN, chrnamed(v, esc, CHR('\033'))); - case CHR('E'): - RETV(PLAIN, CHR('\\')); + RETV(PLAIN, chrnamed(v, esc, ENDOF(esc), CHR('\033'))); + break; case CHR('f'): RETV(PLAIN, CHR('\f')); + break; case CHR('n'): RETV(PLAIN, CHR('\n')); + break; case CHR('r'): RETV(PLAIN, CHR('\r')); + break; case CHR('s'): NOTE(REG_ULOCALE); RETV(CCLASS, 's'); + break; case CHR('S'): NOTE(REG_ULOCALE); RETV(CCLASS, 'S'); + break; case CHR('t'): RETV(PLAIN, CHR('\t')); + break; case CHR('u'): c = lexdigits(v, 16, 4, 4); if (ISERR()) FAILW(REG_EESCAPE); RETV(PLAIN, c); + break; case CHR('U'): c = lexdigits(v, 16, 8, 8); if (ISERR()) FAILW(REG_EESCAPE); RETV(PLAIN, c); + break; case CHR('v'): RETV(PLAIN, CHR('\v')); + break; case CHR('w'): NOTE(REG_ULOCALE); RETV(CCLASS, 'w'); + break; case CHR('W'): NOTE(REG_ULOCALE); RETV(CCLASS, 'W'); + break; case CHR('x'): NOTE(REG_UUNPORT); c = lexdigits(v, 16, 1, 255); /* REs >255 long outside spec */ if (ISERR()) FAILW(REG_EESCAPE); RETV(PLAIN, c); + break; case CHR('y'): NOTE(REG_ULOCALE); RETV(WBDRY, 0); + break; case CHR('Y'): NOTE(REG_ULOCALE); RETV(NWBDRY, 0); + break; case CHR('Z'): RETV(SEND, 0); + break; case CHR('1'): case CHR('2'): case CHR('3'): case CHR('4'): case CHR('5'): case CHR('6'): case CHR('7'): case CHR('8'): case CHR('9'): @@ -686,7 +751,7 @@ struct vars *v; /* ugly heuristic (first test is "exactly 1 digit?") */ if (v->now - save == 0 || (int)c <= v->nsubexp) { NOTE(REG_UBACKREF); - RETV(BACKREF, (chr) c); + RETV(BACKREF, (chr)c); } /* oops, doesn't look like it's a backref after all... */ v->now = save; @@ -698,10 +763,13 @@ struct vars *v; if (ISERR()) FAILW(REG_EESCAPE); RETV(PLAIN, c); + break; default: - assert(iswalpha(c)); + assert(iscalpha(c)); FAILW(REG_EESCAPE); /* unknown alphabetic escape */ + break; } + assert(NOTREACHED); } /* @@ -715,7 +783,7 @@ int base; int minlen; int maxlen; { - uchr n; /* unsigned to aVOID overflow misbehavior */ + uchr n; /* unsigned to avoid overflow misbehavior */ int len; chr c; int d; @@ -764,16 +832,17 @@ int maxlen; */ static int /* 1 normal, 0 failure */ brenext(v, pc) -register struct vars *v; -register pchr pc; +struct vars *v; +pchr pc; { - register chr c = (chr) pc; + chr c = (chr)pc; switch (c) { case CHR('*'): if (LASTTYPE(EMPTY) || LASTTYPE('(') || LASTTYPE('^')) RETV(PLAIN, c); RET('*'); + break; case CHR('['): if (HAVE(6) && *(v->now+0) == CHR('[') && *(v->now+1) == CHR(':') && @@ -787,14 +856,16 @@ register pchr pc; NOTE(REG_UNONPOSIX); RET((c == CHR('<')) ? '<' : '>'); } - INTO(L_BRACK); + INTOCON(L_BRACK); if (NEXT1('^')) { v->now++; RETV('[', 0); } RETV('[', 1); + break; case CHR('.'): RET('.'); + break; case CHR('^'): if (LASTTYPE(EMPTY)) RET('^'); @@ -803,6 +874,7 @@ register pchr pc; RET('^'); } RETV(PLAIN, c); + break; case CHR('$'): if (v->cflags®_EXPANDED) skip(v); @@ -813,10 +885,12 @@ register pchr pc; RET('$'); } RETV(PLAIN, c); + break; case CHR('\\'): break; /* see below */ default: RETV(PLAIN, c); + break; } assert(c == CHR('\\')); @@ -827,31 +901,40 @@ register pchr pc; c = *v->now++; switch (c) { case CHR('{'): - INTO(L_BBND); + INTOCON(L_BBND); NOTE(REG_UBOUNDS); RET('{'); + break; case CHR('('): RETV('(', 1); + break; case CHR(')'): RETV(')', c); + break; case CHR('<'): NOTE(REG_UNONPOSIX); RET('<'); + break; case CHR('>'): NOTE(REG_UNONPOSIX); RET('>'); + break; case CHR('1'): case CHR('2'): case CHR('3'): case CHR('4'): case CHR('5'): case CHR('6'): case CHR('7'): case CHR('8'): case CHR('9'): NOTE(REG_UBACKREF); - RETV(BACKREF, (chr) DIGITVAL(c)); + RETV(BACKREF, (chr)DIGITVAL(c)); + break; default: - if (iswalnum(c)) { + if (iscalnum(c)) { NOTE(REG_UBSALNUM); NOTE(REG_UUNSPEC); } RETV(PLAIN, c); + break; } + + assert(NOTREACHED); } /* @@ -867,14 +950,14 @@ struct vars *v; assert(v->cflags®_EXPANDED); for (;;) { - while (!ATEOS() && iswspace(*v->now)) + while (!ATEOS() && iscspace(*v->now)) v->now++; if (ATEOS() || *v->now != CHR('#')) break; /* NOTE BREAK OUT */ assert(NEXT1('#')); while (!ATEOS() && *v->now != CHR('\n')) v->now++; - /* leave the newline to be picked up by the iswspace loop */ + /* leave the newline to be picked up by the iscspace loop */ } if (v->now != start) @@ -884,7 +967,7 @@ struct vars *v; /* - newline - return the chr for a newline * This helps confine use of CHR to this source file. - ^ static chr newline(VOID); + ^ static chr newline(NOPARMS); */ static chr newline() @@ -895,7 +978,7 @@ newline() /* - ch - return the chr sequence for locale.c's fake collating element ch * This helps confine use of CHR to this source file. - ^ static chr *ch(VOID); + ^ static chr *ch(NOPARMS); */ static chr * ch() @@ -909,12 +992,13 @@ ch() - chrnamed - return the chr known by a given (chr string) name * The code is a bit clumsy, but this routine gets only such specialized * use that it hardly matters. - ^ static chr chrnamed(struct vars *, chr *, pchr); + ^ static chr chrnamed(struct vars *, chr *, chr *, pchr); */ static chr -chrnamed(v, name, lastresort) +chrnamed(v, startp, endp, lastresort) struct vars *v; -chr *name; +chr *startp; /* start of name */ +chr *endp; /* just past end of name */ pchr lastresort; /* what to return if name lookup fails */ { celt c; @@ -924,15 +1008,15 @@ pchr lastresort; /* what to return if name lookup fails */ errsave = v->err; v->err = 0; - c = element(v, name, name+wcslen(name)); + c = element(v, startp, endp); e = v->err; v->err = errsave; if (e != 0) - return (chr) lastresort; + return (chr)lastresort; cv = range(v, c, c, 0); if (cv->nchrs == 0) - return (chr) lastresort; + return (chr)lastresort; return cv->chrs[0]; } diff --git a/generic/regc_locale.c b/generic/regc_locale.c new file mode 100644 index 0000000..769241f --- /dev/null +++ b/generic/regc_locale.c @@ -0,0 +1,426 @@ +/* + * locale-specific stuff, including MCCE handling + * This file is #included by regcomp.c. + * + * No MCCEs for Tcl. The handling of character names and classes is + * still ASCII-centric, and needs to be extended to handle full Unicode. + */ + +/* ASCII character-name table */ +static struct cname { + char *name; + char code; +} cnames[] = { + {"NUL", '\0'}, + {"SOH", '\001'}, + {"STX", '\002'}, + {"ETX", '\003'}, + {"EOT", '\004'}, + {"ENQ", '\005'}, + {"ACK", '\006'}, + {"BEL", '\007'}, + {"alert", '\007'}, + {"BS", '\010'}, + {"backspace", '\b'}, + {"HT", '\011'}, + {"tab", '\t'}, + {"LF", '\012'}, + {"newline", '\n'}, + {"VT", '\013'}, + {"vertical-tab", '\v'}, + {"FF", '\014'}, + {"form-feed", '\f'}, + {"CR", '\015'}, + {"carriage-return", '\r'}, + {"SO", '\016'}, + {"SI", '\017'}, + {"DLE", '\020'}, + {"DC1", '\021'}, + {"DC2", '\022'}, + {"DC3", '\023'}, + {"DC4", '\024'}, + {"NAK", '\025'}, + {"SYN", '\026'}, + {"ETB", '\027'}, + {"CAN", '\030'}, + {"EM", '\031'}, + {"SUB", '\032'}, + {"ESC", '\033'}, + {"IS4", '\034'}, + {"FS", '\034'}, + {"IS3", '\035'}, + {"GS", '\035'}, + {"IS2", '\036'}, + {"RS", '\036'}, + {"IS1", '\037'}, + {"US", '\037'}, + {"space", ' '}, + {"exclamation-mark", '!'}, + {"quotation-mark", '"'}, + {"number-sign", '#'}, + {"dollar-sign", '$'}, + {"percent-sign", '%'}, + {"ampersand", '&'}, + {"apostrophe", '\''}, + {"left-parenthesis", '('}, + {"right-parenthesis", ')'}, + {"asterisk", '*'}, + {"plus-sign", '+'}, + {"comma", ','}, + {"hyphen", '-'}, + {"hyphen-minus", '-'}, + {"period", '.'}, + {"full-stop", '.'}, + {"slash", '/'}, + {"solidus", '/'}, + {"zero", '0'}, + {"one", '1'}, + {"two", '2'}, + {"three", '3'}, + {"four", '4'}, + {"five", '5'}, + {"six", '6'}, + {"seven", '7'}, + {"eight", '8'}, + {"nine", '9'}, + {"colon", ':'}, + {"semicolon", ';'}, + {"less-than-sign", '<'}, + {"equals-sign", '='}, + {"greater-than-sign", '>'}, + {"question-mark", '?'}, + {"commercial-at", '@'}, + {"left-square-bracket", '['}, + {"backslash", '\\'}, + {"reverse-solidus", '\\'}, + {"right-square-bracket", ']'}, + {"circumflex", '^'}, + {"circumflex-accent", '^'}, + {"underscore", '_'}, + {"low-line", '_'}, + {"grave-accent", '`'}, + {"left-brace", '{'}, + {"left-curly-bracket", '{'}, + {"vertical-line", '|'}, + {"right-brace", '}'}, + {"right-curly-bracket", '}'}, + {"tilde", '~'}, + {"DEL", '\177'}, + {NULL, 0} +}; + +/* ASCII character-class table */ +static struct cclass { + char *name; + char *chars; + int hasch; +} cclasses[] = { + {"alnum", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\ +0123456789", 1}, + {"alpha", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz", + 1}, + {"blank", " \t", 0}, + {"cntrl", "\007\b\t\n\v\f\r\1\2\3\4\5\6\16\17\20\21\22\23\24\ +\25\26\27\30\31\32\33\34\35\36\37\177", 0}, + {"digit", "0123456789", 0}, + {"graph", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\ +0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", + 1}, + {"lower", "abcdefghijklmnopqrstuvwxyz", + 1}, + {"print", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\ +0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~ ", + 1}, + {"punct", "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", + 0}, + {"space", "\t\n\v\f\r ", 0}, + {"upper", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", + 0}, + {"xdigit", "0123456789ABCDEFabcdef", + 0}, + {NULL, 0, 0} +}; + +#define CH NOCELT + +/* + - nmcces - how many distinct MCCEs are there? + ^ static int nmcces(struct vars *); + */ +static int +nmcces(v) +struct vars *v; +{ + return 0; +} + +/* + - nleaders - how many chrs can be first chrs of MCCEs? + ^ static int nleaders(struct vars *); + */ +static int +nleaders(v) +struct vars *v; +{ + return 0; +} + +/* + - allmcces - return a cvec with all the MCCEs of the locale + ^ static struct cvec *allmcces(struct vars *, struct cvec *); + */ +static struct cvec * +allmcces(v, cv) +struct vars *v; +struct cvec *cv; /* this is supposed to have enough room */ +{ + return clearcvec(cv); +} + +/* + - element - map collating-element name to celt + ^ static celt element(struct vars *, chr *, chr *); + */ +static celt +element(v, startp, endp) +struct vars *v; +chr *startp; /* points to start of name */ +chr *endp; /* points just past end of name */ +{ + struct cname *cn; + size_t len; + Tcl_DString ds; + char *np; + + /* generic: one-chr names stand for themselves */ + assert(startp < endp); + len = endp - startp; + if (len == 1) + return *startp; + + NOTE(REG_ULOCALE); + + /* search table */ + Tcl_DStringInit(&ds); + np = TclUniCharToUtfDString(startp, (int)len, &ds); + for (cn = cnames; cn->name != NULL; cn++) + if (strlen(cn->name) == len && strncmp(cn->name, np, len) == 0) + break; /* NOTE BREAK OUT */ + Tcl_DStringFree(&ds); + if (cn->name != NULL) + return CHR(cn->code); + + /* couldn't find it */ + ERR(REG_ECOLLATE); + return 0; +} + +/* + - range - supply cvec for a range, including legality check + ^ static struct cvec *range(struct vars *, celt, celt, int); + */ +static struct cvec * +range(v, a, b, cases) +struct vars *v; +celt a; +celt b; /* might equal a */ +int cases; /* case-independent? */ +{ + int nchrs; + struct cvec *cv; + celt c, lc, uc, tc; + + if (a != b && !before(a, b)) { + ERR(REG_ERANGE); + return NULL; + } + + nchrs = b - a + 1; + if (cases) + nchrs *= 2; + cv = getcvec(v, nchrs, 0); + NOERRN(); + + for (c = a; c <= b; c++) { + addchr(cv, c); + if (cases) { + lc = Tcl_UniCharToLower((chr)c); + uc = Tcl_UniCharToUpper((chr)c); + tc = Tcl_UniCharToTitle((chr)c); + if (c != lc) { + addchr(cv, lc); + } + if (c != uc) { + addchr(cv, uc); + } + if (c != tc && tc != uc) { + addchr(cv, tc); + } + } + } + + return cv; +} + +/* + - before - is celt x before celt y, for purposes of range legality? + ^ static int before(celt, celt); + */ +static int /* predicate */ +before(x, y) +celt x; +celt y; +{ + /* trivial because no MCCEs */ + if (x < y) + return 1; + return 0; +} + +/* + - eclass - supply cvec for an equivalence class + * Must include case counterparts on request. + ^ static struct cvec *eclass(struct vars *, celt, int); + */ +static struct cvec * +eclass(v, c, cases) +struct vars *v; +celt c; +int cases; /* all cases? */ +{ + struct cvec *cv; + + /* crude fake equivalence class for testing */ + if ((v->cflags®_FAKEEC) && c == 'x') { + cv = getcvec(v, 4, 0); + addchr(cv, (chr)'x'); + addchr(cv, (chr)'y'); + if (cases) { + addchr(cv, (chr)'X'); + addchr(cv, (chr)'Y'); + } + return cv; + } + + /* otherwise, none */ + if (cases) + return allcases(v, c); + cv = getcvec(v, 1, 0); + assert(cv != NULL); + addchr(cv, (chr)c); + return cv; +} + +/* + - cclass - supply cvec for a character class + * Must include case counterparts on request. + ^ static struct cvec *cclass(struct vars *, chr *, chr *, int); + */ +static struct cvec * +cclass(v, startp, endp, cases) +struct vars *v; +chr *startp; /* where the name starts */ +chr *endp; /* just past the end of the name */ +int cases; /* case-independent? */ +{ + size_t len; + char *p; + struct cclass *cc; + struct cvec *cv; + Tcl_DString ds; + char *np; + + /* find the name */ + len = endp - startp; + Tcl_DStringInit(&ds); + np = TclUniCharToUtfDString(startp, (int)len, &ds); + if (cases && len == 5 && (strncmp("lower", np, 5) == 0 || + strncmp("upper", np, 5) == 0)) + np = "alpha"; + for (cc = cclasses; cc->name != NULL; cc++) + if (strlen(cc->name) == len && strncmp(cc->name, np, len) == 0) + break; /* NOTE BREAK OUT */ + Tcl_DStringFree(&ds); + if (cc->name == NULL) { + ERR(REG_ECTYPE); + return NULL; + } + + /* set up vector */ + cv = getcvec(v, (int)strlen(cc->chars), 0); + if (cv == NULL) { + ERR(REG_ESPACE); + return NULL; + } + + /* fill it in */ + for (p = cc->chars; *p != '\0'; p++) + addchr(cv, (chr)*p); + + return cv; +} + +/* + - allcases - supply cvec for all case counterparts of a chr (including itself) + * This is a shortcut, preferably an efficient one, for simple characters; + * messy cases are done via range(). + ^ static struct cvec *allcases(struct vars *, pchr); + */ +static struct cvec * +allcases(v, pc) +struct vars *v; +pchr pc; +{ + struct cvec *cv = getcvec(v, 2, 0); + chr c = (chr)pc; + + assert(cv != NULL); + addchr(cv, c); + if (TclUniCharIsUpper(c)) + addchr(cv, Tcl_UniCharToLower(c)); + else if (TclUniCharIsLower(c)) + addchr(cv, Tcl_UniCharToUpper(c)); + + return cv; +} + +/* + - cmp - chr-substring compare + * Backrefs need this. It should preferably be efficient. + * Note that it does not need to report anything except equal/unequal. + * Note also that the length is exact, and the comparison should not + * stop at embedded NULs! + ^ static int cmp(CONST chr *, CONST chr *, size_t); + */ +static int /* 0 for equal, nonzero for unequal */ +cmp(x, y, len) +CONST chr *x; +CONST chr *y; +size_t len; /* exact length of comparison */ +{ + return memcmp(VS(x), VS(y), len*sizeof(chr)); +} + +/* + - casecmp - case-independent chr-substring compare + * REG_ICASE backrefs need this. It should preferably be efficient. + * Note that it does not need to report anything except equal/unequal. + * Note also that the length is exact, and the comparison should not + * stop at embedded NULs! + ^ static int casecmp(CONST chr *, CONST chr *, size_t); + */ +static int /* 0 for equal, nonzero for unequal */ +casecmp(x, y, len) +CONST chr *x; +CONST chr *y; +size_t len; /* exact length of comparison */ +{ + size_t i; + CONST chr *xp; + CONST chr *yp; + + for (xp = x, yp = y, i = len; i > 0; i--) + if (Tcl_UniCharToLower(*xp++) != Tcl_UniCharToLower(*yp++)) + return 1; + return 0; +} diff --git a/generic/nfa.c b/generic/regc_nfa.c index f6b8967..14ee077 100644 --- a/generic/nfa.c +++ b/generic/regc_nfa.c @@ -1,57 +1,29 @@ /* - * nfa.c -- + * NFA utilities. + * This file is #included by regcomp.c. * - * Regexp package file: - * NFA utilities. One or two things that technically ought to be - * in here are actually in color.c, thanks to some incestuous - * relationships in the color chains. - * - * Copyright (c) 1998 Henry Spencer. All rights reserved. - * - * Development of this software was funded, in part, by Cray Research Inc., - * UUNET Communications Services Inc., and Sun Microsystems Inc., none of - * whom are responsible for the results. The author thanks all of them. - * - * Redistribution and use in source and binary forms -- with or without - * modification -- are permitted for any purpose, provided that - * redistributions in source form retain this entire copyright notice and - * indicate the origin and nature of any modifications. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, - * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY - * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL - * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; - * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR - * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF - * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Copyright (c) 1998 by Sun Microsystems, Inc. - * - * See the file "license.terms" for information on usage and redistribution - * of this file, and for a DISCLAIMER OF ALL WARRANTIES. - * - * RCS: @(#) $Id: nfa.c,v 1.1.2.2 1998/10/03 01:56:41 stanton Exp $ + * One or two things that technically ought to be in here + * are actually in color.c, thanks to some incestuous relationships in + * the color chains. */ #define NISERR() VISERR(nfa->v) +#define NERR(e) VERR(nfa->v, (e)) /* - newnfa - set up an NFA - * Caution: colormap must be set up already. - ^ static struct nfa *newnfa(struct vars *, struct nfa *); + ^ static struct nfa *newnfa(struct vars *, struct colormap *, struct nfa *); */ static struct nfa * /* the NFA, or NULL */ -newnfa(v, parent) +newnfa(v, cm, parent) struct vars *v; +struct colormap *cm; struct nfa *parent; /* NULL if primary NFA */ { struct nfa *nfa; - nfa = (struct nfa *)ckalloc(sizeof(struct nfa)); + nfa = (struct nfa *)MALLOC(sizeof(struct nfa)); if (nfa == NULL) return NULL; @@ -59,6 +31,7 @@ struct nfa *parent; /* NULL if primary NFA */ nfa->slast = NULL; nfa->free = NULL; nfa->nstates = 0; + nfa->cm = cm; nfa->v = v; nfa->bos[0] = nfa->bos[1] = COLORLESS; nfa->eos[0] = nfa->eos[1] = COLORLESS; @@ -72,10 +45,10 @@ struct nfa *parent; /* NULL if primary NFA */ freenfa(nfa); return NULL; } - rainbow(nfa, nfa->v->cm, PLAIN, COLORLESS, nfa->pre, nfa->init); + rainbow(nfa, nfa->cm, PLAIN, COLORLESS, nfa->pre, nfa->init); newarc(nfa, '^', 1, nfa->pre, nfa->init); newarc(nfa, '^', 0, nfa->pre, nfa->init); - rainbow(nfa, nfa->v->cm, PLAIN, COLORLESS, nfa->final, nfa->post); + rainbow(nfa, nfa->cm, PLAIN, COLORLESS, nfa->final, nfa->post); newarc(nfa, '$', 1, nfa->final, nfa->post); newarc(nfa, '$', 0, nfa->final, nfa->post); @@ -109,7 +82,7 @@ struct nfa *nfa; nfa->nstates = -1; nfa->pre = NULL; nfa->post = NULL; - ckfree((char *)nfa); + FREE(nfa); } /* @@ -128,14 +101,11 @@ int flag; s = nfa->free; nfa->free = s->next; } else { - s = (struct state *)ckalloc(sizeof(struct state)); + s = (struct state *)MALLOC(sizeof(struct state)); if (s == NULL) { - VERR(nfa->v, REG_ESPACE); + NERR(REG_ESPACE); return NULL; } - - /* memleak (CCS). */ - s->oas.next = NULL; s->free = &s->oas.a[0]; for (i = 0; i < ABSIZE; i++) { @@ -240,12 +210,12 @@ struct state *s; assert(s->no == FREESTATE); for (ab = s->oas.next; ab != NULL; ab = abnext) { abnext = ab->next; - ckfree((char *)ab); + FREE(ab); } s->ins = NULL; s->outs = NULL; s->next = NULL; - ckfree((char *)s); + FREE(s); } /* @@ -276,7 +246,7 @@ struct state *to; assert(a != NULL); a->type = t; - a->co = (color) co; + a->co = (color)co; a->to = to; a->from = from; @@ -295,7 +265,7 @@ struct state *to; to->nins++; if (COLORED(a) && nfa->parent == NULL) - colorchain(nfa->v->cm, a); + colorchain(nfa->cm, a); return; } @@ -315,9 +285,9 @@ struct state *s; /* if none at hand, get more */ if (s->free == NULL) { - new = (struct arcbatch *)ckalloc(sizeof(struct arcbatch)); + new = (struct arcbatch *)MALLOC(sizeof(struct arcbatch)); if (new == NULL) { - VERR(nfa->v, REG_ESPACE); + NERR(REG_ESPACE); return NULL; } new->next = s->oas.next; @@ -354,7 +324,7 @@ struct arc *victim; /* take it off color chain if necessary */ if (COLORED(victim) && nfa->parent == NULL) - uncolorchain(nfa->v->cm, victim); + uncolorchain(nfa->cm, victim); /* take it off source's out-chain */ assert(from != NULL); @@ -680,10 +650,10 @@ struct nfa *nfa; { /* false colors for BOS, BOL, EOS, EOL */ if (nfa->parent == NULL) { - nfa->bos[0] = pseudocolor(nfa->v->cm); - nfa->bos[1] = pseudocolor(nfa->v->cm); - nfa->eos[0] = pseudocolor(nfa->v->cm); - nfa->eos[1] = pseudocolor(nfa->v->cm); + nfa->bos[0] = pseudocolor(nfa->cm); + nfa->bos[1] = pseudocolor(nfa->cm); + nfa->eos[0] = pseudocolor(nfa->cm); + nfa->eos[1] = pseudocolor(nfa->cm); } else { assert(nfa->parent->bos[0] != COLORLESS); nfa->bos[0] = nfa->parent->bos[0]; @@ -698,42 +668,41 @@ struct nfa *nfa; /* - optimize - optimize an NFA - ^ static VOID optimize(struct nfa *); + ^ static int optimize(struct nfa *, FILE *); */ -static VOID -optimize(nfa) +static int /* re_info bits */ +optimize(nfa, f) struct nfa *nfa; +FILE *f; /* for debug output; NULL none */ { - int verbose = (nfa->v->cflags®_PROGRESS) ? 1 : 0; - int info; + int verbose = (f != NULL) ? 1 : 0; if (verbose) - printf("\ninitial cleanup:\n"); + fprintf(f, "\ninitial cleanup:\n"); cleanup(nfa); /* may simplify situation */ - if (nfa->v->cflags®_PROGRESS) - dumpnfa(nfa, stdout); if (verbose) - printf("\nempties:\n"); - fixempties(nfa); /* get rid of EMPTY arcs */ + dumpnfa(nfa, f); + if (verbose) + fprintf(f, "\nempties:\n"); + fixempties(nfa, f); /* get rid of EMPTY arcs */ if (verbose) - printf("\nconstraints:\n"); - pullback(nfa); /* pull back constraints backward */ - pushfwd(nfa); /* push fwd constraints forward */ + fprintf(f, "\nconstraints:\n"); + pullback(nfa, f); /* pull back constraints backward */ + pushfwd(nfa, f); /* push fwd constraints forward */ if (verbose) - printf("\nfinal cleanup:\n"); + fprintf(f, "\nfinal cleanup:\n"); cleanup(nfa); /* final tidying */ - info = analyze(nfa->v, nfa); /* and analysis */ - if (nfa->parent == NULL) - nfa->v->re->re_info |= info; + return analyze(nfa); /* and analysis */ } /* - pullback - pull back constraints backward to (with luck) eliminate them - ^ static VOID pullback(struct nfa *); + ^ static VOID pullback(struct nfa *, FILE *); */ static VOID -pullback(nfa) +pullback(nfa, f) struct nfa *nfa; +FILE *f; /* for debug output; NULL none */ { struct state *s; struct state *nexts; @@ -754,8 +723,8 @@ struct nfa *nfa; assert(nexta == NULL || s->no != FREESTATE); } } - if (progress && (nfa->v->cflags®_PROGRESS)) - dumpnfa(nfa, stdout); + if (progress && f != NULL) + dumpnfa(nfa, f); } while (progress && !NISERR()); if (NISERR()) return; @@ -799,7 +768,7 @@ struct arc *con; return 1; } - /* first, clone from state if necessary to aVOID other outarcs */ + /* first, clone from state if necessary to avoid other outarcs */ if (from->nouts > 1) { s = newstate(nfa); if (NISERR()) @@ -846,11 +815,12 @@ struct arc *con; /* - pushfwd - push forward constraints forward to (with luck) eliminate them - ^ static VOID pushfwd(struct nfa *); + ^ static VOID pushfwd(struct nfa *, FILE *); */ static VOID -pushfwd(nfa) +pushfwd(nfa, f) struct nfa *nfa; +FILE *f; /* for debug output; NULL none */ { struct state *s; struct state *nexts; @@ -871,8 +841,8 @@ struct nfa *nfa; assert(nexta == NULL || s->no != FREESTATE); } } - if (progress && (nfa->v->cflags®_PROGRESS)) - dumpnfa(nfa, stdout); + if (progress && f != NULL) + dumpnfa(nfa, f); } while (progress && !NISERR()); if (NISERR()) return; @@ -916,7 +886,7 @@ struct arc *con; return 1; } - /* first, clone to state if necessary to aVOID other inarcs */ + /* first, clone to state if necessary to avoid other inarcs */ if (to->nins > 1) { s = newstate(nfa); if (NISERR()) @@ -978,11 +948,13 @@ struct arc *a; case CA('^', PLAIN): /* newlines are handled separately */ case CA('$', PLAIN): return INCOMPATIBLE; + break; case CA(AHEAD, PLAIN): /* color constraints meet colors */ case CA(BEHIND, PLAIN): if (con->co == a->co) return SATISFIED; return INCOMPATIBLE; + break; case CA('^', '^'): /* collision, similar constraints */ case CA('$', '$'): case CA(AHEAD, AHEAD): @@ -990,11 +962,13 @@ struct arc *a; if (con->co == a->co) /* true duplication */ return SATISFIED; return INCOMPATIBLE; + break; case CA('^', BEHIND): /* collision, dissimilar constraints */ case CA(BEHIND, '^'): case CA('$', AHEAD): case CA(AHEAD, '$'): return INCOMPATIBLE; + break; case CA('^', '$'): /* constraints passing each other */ case CA('^', AHEAD): case CA(BEHIND, '$'): @@ -1008,18 +982,20 @@ struct arc *a; case CA('$', LACON): case CA(AHEAD, LACON): return COMPATIBLE; + break; } assert(NOTREACHED); - return INCOMPATIBLE; /* keep compiler from complaining */ + return INCOMPATIBLE; /* for benefit of blind compilers */ } /* - fixempties - get rid of EMPTY arcs - ^ static VOID fixempties(struct nfa *); + ^ static VOID fixempties(struct nfa *, FILE *); */ static VOID -fixempties(nfa) +fixempties(nfa, f) struct nfa *nfa; +FILE *f; /* for debug output; NULL none */ { struct state *s; struct state *nexts; @@ -1039,8 +1015,8 @@ struct nfa *nfa; assert(nexta == NULL || s->no != FREESTATE); } } - if (progress && (nfa->v->cflags®_PROGRESS)) - dumpnfa(nfa, stdout); + if (progress && f != NULL) + dumpnfa(nfa, f); } while (progress && !NISERR()); } @@ -1176,11 +1152,10 @@ struct state *mark; /* the value to mark with */ /* - analyze - ascertain potentially-useful facts about an optimized NFA - ^ static int analyze(struct vars *, struct nfa *); + ^ static int analyze(struct nfa *); */ static int /* re_info bits to be ORed in */ -analyze(v, nfa) -struct vars *v; +analyze(nfa) struct nfa *nfa; { struct arc *a; @@ -1219,11 +1194,10 @@ struct state *end; /* - compact - compact an NFA - ^ static VOID compact(struct vars *, struct nfa *, struct cnfa *); + ^ static VOID compact(struct nfa *, struct cnfa *); */ static VOID -compact(v, nfa, cnfa) -struct vars *v; +compact(nfa, cnfa) struct nfa *nfa; struct cnfa *cnfa; { @@ -1234,7 +1208,7 @@ struct cnfa *cnfa; struct carc *ca; struct carc *first; - assert (!ISERR()); + assert (!NISERR()); nstates = 0; narcs = 0; @@ -1243,14 +1217,14 @@ struct cnfa *cnfa; narcs += s->nouts + 1; } - cnfa->states = (struct carc **)ckalloc(nstates * sizeof(struct carc *)); - cnfa->arcs = (struct carc *)ckalloc(narcs * sizeof(struct carc)); + cnfa->states = (struct carc **)MALLOC(nstates * sizeof(struct carc *)); + cnfa->arcs = (struct carc *)MALLOC(narcs * sizeof(struct carc)); if (cnfa->states == NULL || cnfa->arcs == NULL) { if (cnfa->states != NULL) - ckfree((char *)cnfa->states); + FREE(cnfa->states); if (cnfa->arcs != NULL) - ckfree((char *)cnfa->arcs); - ERR(REG_ESPACE); + FREE(cnfa->arcs); + NERR(REG_ESPACE); return; } cnfa->nstates = nstates; @@ -1260,13 +1234,12 @@ struct cnfa *cnfa; cnfa->bos[1] = nfa->bos[1]; cnfa->eos[0] = nfa->eos[0]; cnfa->eos[1] = nfa->eos[1]; - cnfa->ncolors = maxcolor(v->cm) + 1; - cnfa->haslacons = 0; - cnfa->leftanch = 1; /* tentatively */ + cnfa->ncolors = maxcolor(nfa->cm) + 1; + cnfa->flags = LEFTANCH; /* tentatively */ ca = cnfa->arcs; for (s = nfa->states; s != NULL; s = s->next) { - assert((size_t) s->no < nstates); + assert((size_t)s->no < nstates); cnfa->states[s->no] = ca; first = ca; for (a = s->outs; a != NULL; a = a->outchain) @@ -1278,10 +1251,10 @@ struct cnfa *cnfa; break; case LACON: assert(s->no != cnfa->pre); - ca->co = (color) (a->co + cnfa->ncolors); + ca->co = (color)(cnfa->ncolors + a->co); ca->to = a->to->no; ca++; - cnfa->haslacons = 1; + cnfa->flags |= HASLACONS; break; default: assert(NOTREACHED); @@ -1297,9 +1270,9 @@ struct cnfa *cnfa; for (a = nfa->pre->outs; a != NULL; a = a->outchain) if (a->type == PLAIN && a->co != nfa->bos[0] && - a->co != nfa->bos[1]) - cnfa->leftanch = 0; - } + a->co != nfa->bos[1]) + cnfa->flags &= ~LEFTANCH; +} /* - carcsort - sort compacted-NFA arcs by color @@ -1341,11 +1314,12 @@ int dynalloc; /* is the cnfa struct itself dynamic? */ { assert(cnfa->nstates != 0); /* not empty already */ cnfa->nstates = 0; - ckfree((char *)cnfa->states); - ckfree((char *)cnfa->arcs); + FREE(cnfa->states); + FREE(cnfa->arcs); if (dynalloc) - ckfree((char *)cnfa); + FREE(cnfa); } + /* - dumpnfa - dump an NFA in human-readable form ^ static VOID dumpnfa(struct nfa *, FILE *); @@ -1355,7 +1329,159 @@ dumpnfa(nfa, f) struct nfa *nfa; FILE *f; { +#ifdef REG_DEBUG + struct state *s; + + fprintf(f, "pre %d, post %d", nfa->pre->no, nfa->post->no); + if (nfa->bos[0] != COLORLESS) + fprintf(f, ", bos [%ld]", (long)nfa->bos[0]); + if (nfa->bos[1] != COLORLESS) + fprintf(f, ", bol [%ld]", (long)nfa->bos[1]); + if (nfa->eos[0] != COLORLESS) + fprintf(f, ", eos [%ld]", (long)nfa->eos[0]); + if (nfa->eos[1] != COLORLESS) + fprintf(f, ", eol [%ld]", (long)nfa->eos[1]); + fprintf(f, "\n"); + for (s = nfa->states; s != NULL; s = s->next) + dumpstate(s, f); + if (nfa->parent == NULL) + dumpcolors(nfa->cm, f); + fflush(f); +#endif } + +#ifdef REG_DEBUG /* subordinates of dumpnfa */ + +/* + - dumpstate - dump an NFA state in human-readable form + ^ static VOID dumpstate(struct state *, FILE *); + */ +static VOID +dumpstate(s, f) +struct state *s; +FILE *f; +{ + struct arc *a; + + fprintf(f, "%d%s%c", s->no, (s->tmp != NULL) ? "T" : "", + (s->flag) ? s->flag : '.'); + if (s->prev != NULL && s->prev->next != s) + fprintf(f, "\tstate chain bad\n"); + if (s->nouts == 0) + fprintf(f, "\tno out arcs\n"); + else + dumparcs(s, f); + fflush(f); + for (a = s->ins; a != NULL; a = a->inchain) { + if (a->to != s) + fprintf(f, "\tlink from %d to %d on %d's in-chain\n", + a->from->no, a->to->no, s->no); + } +} + +/* + - dumparcs - dump out-arcs in human-readable form + ^ static VOID dumparcs(struct state *, FILE *); + */ +static VOID +dumparcs(s, f) +struct state *s; +FILE *f; +{ + int pos; + + assert(s->nouts > 0); + /* printing arcs in reverse order is usually clearer */ + pos = dumprarcs(s->outs, s, f, 1); + if (pos != 1) + fprintf(f, "\n"); +} + +/* + - dumprarcs - dump remaining outarcs, recursively, in reverse order + ^ static int dumprarcs(struct arc *, struct state *, FILE *, int); + */ +static int /* resulting print position */ +dumprarcs(a, s, f, pos) +struct arc *a; +struct state *s; +FILE *f; +int pos; /* initial print position */ +{ + if (a->outchain != NULL) + pos = dumprarcs(a->outchain, s, f, pos); + dumparc(a, s, f); + if (pos == 5) { + fprintf(f, "\n"); + pos = 1; + } else + pos++; + return pos; +} + +/* + - dumparc - dump one outarc in readable form, including prefixing tab + ^ static VOID dumparc(struct arc *, struct state *, FILE *); + */ +static VOID +dumparc(a, s, f) +struct arc *a; +struct state *s; +FILE *f; +{ + struct arc *aa; + struct arcbatch *ab; + + fprintf(f, "\t"); + switch (a->type) { + case PLAIN: + fprintf(f, "[%ld]", (long)a->co); + break; + case AHEAD: + fprintf(f, ">%ld>", (long)a->co); + break; + case BEHIND: + fprintf(f, "<%ld<", (long)a->co); + break; + case LACON: + fprintf(f, ":%ld:", (long)a->co); + break; + case '^': + case '$': + fprintf(f, "%c%d", a->type, (int)a->co); + break; + case EMPTY: + break; + default: + fprintf(f, "0x%x/0%lo", a->type, (long)a->co); + break; + } + if (a->from != s) + fprintf(f, "?%d?", a->from->no); + for (ab = &a->from->oas; ab != NULL; ab = ab->next) { + for (aa = &ab->a[0]; aa < &ab->a[ABSIZE]; aa++) + if (aa == a) + break; /* NOTE BREAK OUT */ + if (aa < &ab->a[ABSIZE]) /* propagate break */ + break; /* NOTE BREAK OUT */ + } + if (ab == NULL) + fprintf(f, "?!?"); /* not in allocated space */ + fprintf(f, "->"); + if (a->to == NULL) { + fprintf(f, "NULL"); + return; + } + fprintf(f, "%d", a->to->no); + for (aa = a->to->ins; aa != NULL; aa = aa->inchain) + if (aa == a) + break; /* NOTE BREAK OUT */ + if (aa == NULL) + fprintf(f, "?!?"); /* missing from in-chain */ +} + +#endif /* ifdef REG_DEBUG */ + /* - dumpcnfa - dump a compacted NFA in human-readable form ^ static VOID dumpcnfa(struct cnfa *, FILE *); @@ -1365,4 +1491,62 @@ dumpcnfa(cnfa, f) struct cnfa *cnfa; FILE *f; { +#ifdef REG_DEBUG + int st; + + fprintf(f, "pre %d, post %d", cnfa->pre, cnfa->post); + if (cnfa->bos[0] != COLORLESS) + fprintf(f, ", bos [%ld]", (long)cnfa->bos[0]); + if (cnfa->bos[1] != COLORLESS) + fprintf(f, ", bol [%ld]", (long)cnfa->bos[1]); + if (cnfa->eos[0] != COLORLESS) + fprintf(f, ", eos [%ld]", (long)cnfa->eos[0]); + if (cnfa->eos[1] != COLORLESS) + fprintf(f, ", eol [%ld]", (long)cnfa->eos[1]); + if (cnfa->flags&HASLACONS) + fprintf(f, ", haslacons"); + if (cnfa->flags&LEFTANCH) + fprintf(f, ", leftanch"); + fprintf(f, "\n"); + for (st = 0; st < cnfa->nstates; st++) + dumpcstate(st, cnfa->states[st], cnfa, f); + fflush(f); +#endif } + +#ifdef REG_DEBUG /* subordinates of dumpcnfa */ + +/* + - dumpcstate - dump a compacted-NFA state in human-readable form + ^ static VOID dumpcstate(int, struct carc *, struct cnfa *, FILE *); + */ +static VOID +dumpcstate(st, ca, cnfa, f) +int st; +struct carc *ca; +struct cnfa *cnfa; +FILE *f; +{ + int i; + int pos; + + fprintf(f, "%d.", st); + pos = 1; + for (i = 0; ca[i].co != COLORLESS; i++) { + if (ca[i].co < cnfa->ncolors) + fprintf(f, "\t[%ld]->%d", (long)ca[i].co, ca[i].to); + else + fprintf(f, "\t:%ld:->%d", (long)ca[i].co-cnfa->ncolors, + ca[i].to); + if (pos == 5) { + fprintf(f, "\n"); + pos = 1; + } else + pos++; + } + if (i == 0 || pos != 1) + fprintf(f, "\n"); + fflush(f); +} + +#endif /* ifdef REG_DEBUG */ diff --git a/generic/compile.c b/generic/regcomp.c index ee12d04..2a13172 100644 --- a/generic/compile.c +++ b/generic/regcomp.c @@ -1,51 +1,16 @@ /* - * compile.c -- - * - * Regexp package file: re_*comp and friends - compile REs - * - * Copyright (c) 1998 Henry Spencer. All rights reserved. - * - * Development of this software was funded, in part, by Cray Research Inc., - * UUNET Communications Services Inc., and Sun Microsystems Inc., none of - * whom are responsible for the results. The author thanks all of them. - * - * Redistribution and use in source and binary forms -- with or without - * modification -- are permitted for any purpose, provided that - * redistributions in source form retain this entire copyright notice and - * indicate the origin and nature of any modifications. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, - * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY - * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL - * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; - * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR - * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF - * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Copyright (c) 1998 by Sun Microsystems, Inc. - * - * See the file "license.terms" for information on usage and redistribution - * of this file, and for a DISCLAIMER OF ALL WARRANTIES. - * - * RCS: @(#) $Id: compile.c,v 1.1.2.2 1998/10/03 01:56:40 stanton Exp $ + * re_*comp and friends - compile REs + * This file #includes several others (see the bottom). */ -#include "tclInt.h" -#include <assert.h> -#include "tclPort.h" -#include "tclRegexp.h" -#include "chr.h" -#include "guts.h" +#include "regguts.h" /* * forward declarations, up here so forward datatypes etc. are defined early */ /* =====^!^===== begin forwards =====^!^===== */ /* automatically gathered by fwd; do not hand-edit */ -/* === compile.c === */ +/* === regcomp.c === */ int compile _ANSI_ARGS_((regex_t *, CONST chr *, size_t, int)); static VOID moresubs _ANSI_ARGS_((struct vars *, int)); static int freev _ANSI_ARGS_((struct vars *, int)); @@ -63,53 +28,34 @@ static color nlcolor _ANSI_ARGS_((struct vars *)); static VOID wordchrs _ANSI_ARGS_((struct vars *)); static struct subre subre _ANSI_ARGS_((struct state *, struct state *, int, int, struct rtree *)); static struct rtree *newrt _ANSI_ARGS_((struct vars *)); -static VOID freert _ANSI_ARGS_((struct rtree *)); -static VOID freertnode _ANSI_ARGS_((struct rtree *)); +static VOID freert _ANSI_ARGS_((struct vars *, struct rtree *)); +static VOID freertnode _ANSI_ARGS_((struct vars *, struct rtree *)); static VOID optrt _ANSI_ARGS_((struct vars *, struct rtree *)); static int numrt _ANSI_ARGS_((struct rtree *, int)); -static VOID nfatree _ANSI_ARGS_((struct vars *, struct rtree *)); -static VOID nfanode _ANSI_ARGS_((struct vars *, struct subre *)); +static VOID markrt _ANSI_ARGS_((struct rtree *)); +static VOID cleanrt _ANSI_ARGS_((struct vars *)); +static VOID nfatree _ANSI_ARGS_((struct vars *, struct rtree *, FILE *)); +static VOID nfanode _ANSI_ARGS_((struct vars *, struct subre *, FILE *)); static int newlacon _ANSI_ARGS_((struct vars *, struct state *, struct state *, int)); static VOID freelacons _ANSI_ARGS_((struct subre *, int)); static VOID rfree _ANSI_ARGS_((regex_t *)); static VOID dump _ANSI_ARGS_((regex_t *, FILE *)); static VOID dumprt _ANSI_ARGS_((struct rtree *, FILE *, int)); static VOID rtdump _ANSI_ARGS_((struct rtree *, FILE *, int, int)); -/* === lex.c === */ +/* === regc_lex.c === */ static VOID lexstart _ANSI_ARGS_((struct vars *)); static VOID prefixes _ANSI_ARGS_((struct vars *)); -static VOID lexnest _ANSI_ARGS_((struct vars *, chr *)); +static VOID lexnest _ANSI_ARGS_((struct vars *, chr *, chr *)); static VOID lexword _ANSI_ARGS_((struct vars *)); static int next _ANSI_ARGS_((struct vars *)); static int lexescape _ANSI_ARGS_((struct vars *)); static chr lexdigits _ANSI_ARGS_((struct vars *, int, int, int)); static int brenext _ANSI_ARGS_((struct vars *, pchr)); static VOID skip _ANSI_ARGS_((struct vars *)); -static chr newline _ANSI_ARGS_((VOID)); -static chr *ch _ANSI_ARGS_((VOID)); -static chr chrnamed _ANSI_ARGS_((struct vars *, chr *, pchr)); -/* === locale.c === */ -#define MAXCE 2 /* longest CE code is prepared to handle */ -typedef wint_t celt; /* type holding distinct codes for all chrs, all CEs */ -static int nces _ANSI_ARGS_((struct vars *)); -static int nleaders _ANSI_ARGS_((struct vars *)); -static struct cvec *allces _ANSI_ARGS_((struct vars *, struct cvec *)); -static celt element _ANSI_ARGS_((struct vars *, chr *, chr *)); -static struct cvec *range _ANSI_ARGS_((struct vars *, celt, celt, int)); -static int before _ANSI_ARGS_((celt, celt)); -static struct cvec *eclass _ANSI_ARGS_((struct vars *, celt, int)); -static struct cvec *cclass _ANSI_ARGS_((struct vars *, chr *, chr *, int)); -static struct cvec *allcases _ANSI_ARGS_((struct vars *, pchr)); -static int sncmp _ANSI_ARGS_((CONST chr *, CONST chr *, size_t)); -static struct cvec *newcvec _ANSI_ARGS_((int, int)); -static struct cvec *clearcvec _ANSI_ARGS_((struct cvec *)); -static VOID addchr _ANSI_ARGS_((struct cvec *, pchr)); -static VOID addce _ANSI_ARGS_((struct cvec *, chr *)); -static int haschr _ANSI_ARGS_((struct cvec *, pchr)); -static struct cvec *getcvec _ANSI_ARGS_((struct vars *, int, int)); -static VOID freecvec _ANSI_ARGS_((struct cvec *)); -/* === color.c === */ -union tree; +static chr newline _ANSI_ARGS_((NOPARMS)); +static chr *ch _ANSI_ARGS_((NOPARMS)); +static chr chrnamed _ANSI_ARGS_((struct vars *, chr *, chr *, pchr)); +/* === regc_color.c === */ static struct colormap *newcm _ANSI_ARGS_((struct vars *)); static VOID freecm _ANSI_ARGS_((struct colormap *)); static VOID cmtreefree _ANSI_ARGS_((struct colormap *, union tree *, int)); @@ -127,8 +73,11 @@ static VOID uncolorchain _ANSI_ARGS_((struct colormap *, struct arc *)); static int singleton _ANSI_ARGS_((struct colormap *, pchr c)); static VOID rainbow _ANSI_ARGS_((struct nfa *, struct colormap *, int, pcolor, struct state *, struct state *)); static VOID colorcomplement _ANSI_ARGS_((struct nfa *, struct colormap *, int, struct state *, struct state *, struct state *)); -/* === nfa.c === */ -static struct nfa *newnfa _ANSI_ARGS_((struct vars *, struct nfa *)); +static VOID dumpcolors _ANSI_ARGS_((struct colormap *, FILE *)); +static VOID fillcheck _ANSI_ARGS_((struct colormap *, union tree *, int, FILE *)); +static VOID dumpchr _ANSI_ARGS_((pchr, FILE *)); +/* === regc_nfa.c === */ +static struct nfa *newnfa _ANSI_ARGS_((struct vars *, struct colormap *, struct nfa *)); static VOID freenfa _ANSI_ARGS_((struct nfa *)); static struct state *newfstate _ANSI_ARGS_((struct nfa *, int flag)); static struct state *newstate _ANSI_ARGS_((struct nfa *)); @@ -151,27 +100,52 @@ static VOID dupnfa _ANSI_ARGS_((struct nfa *, struct state *, struct state *, st static VOID duptraverse _ANSI_ARGS_((struct nfa *, struct state *, struct state *)); static VOID cleartraverse _ANSI_ARGS_((struct nfa *, struct state *)); static VOID specialcolors _ANSI_ARGS_((struct nfa *)); -static VOID optimize _ANSI_ARGS_((struct nfa *)); -static VOID pullback _ANSI_ARGS_((struct nfa *)); +static int optimize _ANSI_ARGS_((struct nfa *, FILE *)); +static VOID pullback _ANSI_ARGS_((struct nfa *, FILE *)); static int pull _ANSI_ARGS_((struct nfa *, struct arc *)); -static VOID pushfwd _ANSI_ARGS_((struct nfa *)); +static VOID pushfwd _ANSI_ARGS_((struct nfa *, FILE *)); static int push _ANSI_ARGS_((struct nfa *, struct arc *)); #define INCOMPATIBLE 1 /* destroys arc */ #define SATISFIED 2 /* constraint satisfied */ #define COMPATIBLE 3 /* compatible but not satisfied yet */ static int combine _ANSI_ARGS_((struct arc *, struct arc *)); -static VOID fixempties _ANSI_ARGS_((struct nfa *)); +static VOID fixempties _ANSI_ARGS_((struct nfa *, FILE *)); static int unempty _ANSI_ARGS_((struct nfa *, struct arc *)); static VOID cleanup _ANSI_ARGS_((struct nfa *)); static VOID markreachable _ANSI_ARGS_((struct nfa *, struct state *, struct state *, struct state *)); static VOID markcanreach _ANSI_ARGS_((struct nfa *, struct state *, struct state *, struct state *)); -static int analyze _ANSI_ARGS_((struct vars *, struct nfa *)); +static int analyze _ANSI_ARGS_((struct nfa *)); static int isempty _ANSI_ARGS_((struct state *, struct state *)); -static VOID compact _ANSI_ARGS_((struct vars *, struct nfa *, struct cnfa *)); +static VOID compact _ANSI_ARGS_((struct nfa *, struct cnfa *)); static VOID carcsort _ANSI_ARGS_((struct carc *, struct carc *)); static VOID freecnfa _ANSI_ARGS_((struct cnfa *, int)); static VOID dumpnfa _ANSI_ARGS_((struct nfa *, FILE *)); +static VOID dumpstate _ANSI_ARGS_((struct state *, FILE *)); +static VOID dumparcs _ANSI_ARGS_((struct state *, FILE *)); +static int dumprarcs _ANSI_ARGS_((struct arc *, struct state *, FILE *, int)); +static VOID dumparc _ANSI_ARGS_((struct arc *, struct state *, FILE *)); static VOID dumpcnfa _ANSI_ARGS_((struct cnfa *, FILE *)); +static VOID dumpcstate _ANSI_ARGS_((int, struct carc *, struct cnfa *, FILE *)); +/* === regc_cvec.c === */ +static struct cvec *newcvec _ANSI_ARGS_((int, int)); +static struct cvec *clearcvec _ANSI_ARGS_((struct cvec *)); +static VOID addchr _ANSI_ARGS_((struct cvec *, pchr)); +static VOID addmcce _ANSI_ARGS_((struct cvec *, chr *, chr *)); +static int haschr _ANSI_ARGS_((struct cvec *, pchr)); +static struct cvec *getcvec _ANSI_ARGS_((struct vars *, int, int)); +static VOID freecvec _ANSI_ARGS_((struct cvec *)); +/* === regc_locale.c === */ +static int nmcces _ANSI_ARGS_((struct vars *)); +static int nleaders _ANSI_ARGS_((struct vars *)); +static struct cvec *allmcces _ANSI_ARGS_((struct vars *, struct cvec *)); +static celt element _ANSI_ARGS_((struct vars *, chr *, chr *)); +static struct cvec *range _ANSI_ARGS_((struct vars *, celt, celt, int)); +static int before _ANSI_ARGS_((celt, celt)); +static struct cvec *eclass _ANSI_ARGS_((struct vars *, celt, int)); +static struct cvec *cclass _ANSI_ARGS_((struct vars *, chr *, chr *, int)); +static struct cvec *allcases _ANSI_ARGS_((struct vars *, pchr)); +static int cmp _ANSI_ARGS_((CONST chr *, CONST chr *, size_t)); +static int casecmp _ANSI_ARGS_((CONST chr *, CONST chr *, size_t)); /* automatically gathered by fwd; do not hand-edit */ /* =====^!^===== end forwards =====^!^===== */ @@ -199,12 +173,14 @@ struct vars { color nlcolor; /* color of newline */ struct state *wordchrs; /* state in nfa holding word-char outarcs */ struct rtree *tree; /* subexpression tree */ + struct rtree *treechain; /* all tree nodes allocated */ + struct rtree *treefree; /* any free tree nodes */ int ntree; /* number of tree nodes */ struct cvec *cv; /* utility cvec */ - struct cvec *ces; /* collating-element information */ -# define ISCELEADER(v,c) (v->ces != NULL && haschr(v->ces, (c))) - struct state *cepbegin; /* state in nfa, start of CE prototypes */ - struct state *cepend; /* state in nfa, end of CE prototypes */ + struct cvec *mcces; /* collating-element information */ +# define ISCELEADER(v,c) (v->mcces != NULL && haschr(v->mcces, (c))) + struct state *mccepbegin; /* in nfa, start of MCCE prototypes */ + struct state *mccepend; /* in nfa, end of MCCE prototypes */ struct subre *lacons; /* lookahead-constraint vector */ int nlacons; /* size of lacons */ int usedshorter; /* used short-preferring quantifiers */ @@ -220,7 +196,7 @@ struct vars { ((vv)->err = (e))) #define ERR(e) VERR(v, e) /* record an error */ #define NOERR() {if (ISERR()) return;} /* if error seen, return */ -#define NOERRN() {if (ISERR()) goto end;} /* NOERR with retval */ +#define NOERRN() {if (ISERR()) return NULL;} /* NOERR with retval */ #define INSIST(c, e) ((c) ? 0 : ERR(e)) /* if condition false, error */ #define NOTE(b) (v->re->re_info |= (b)) /* note visible condition */ #define EMPTYARC(x, y) newarc(v->nfa, EMPTY, 0, x, y) @@ -259,22 +235,6 @@ static struct fns functions = { /* - - regfree - free an RE (actually, just overall coordination) - */ -VOID -regfree(re) -regex_t *re; -{ - if (re == NULL || re->re_magic != REMAGIC) - return; /* no way we can report it, really */ - - /* free it, calling internal routine that knows details */ - (*((struct fns *)re->re_fns)->free)(re); - - re->re_magic = 0; -} - -/* - compile - compile regular expression ^ int compile(regex_t *, CONST chr *, size_t, int); */ @@ -289,25 +249,20 @@ int flags; struct vars *v = &var; struct guts *g; int i; + size_t j; + FILE *debug = (flags®_PROGRESS) ? stdout : (FILE *)NULL; # define CNOERR() { if (ISERR()) return freev(v, v->err); } - if (re == NULL) { - return REG_INVARG; - } - - /* - * Init re to known state, because we will try to free it if - * compilation fails. - */ - - re->re_magic = 0; - /* sanity checks */ - if (string == NULL || - ((flags®_EXTENDED) && (flags®_QUOTE)) || - (!(flags®_EXTENDED) && (flags®_ADVF))) { - return REG_INVARG; - } + + if (re == NULL || string == NULL) + return REG_INVARG; + assert(REG_ADVANCED == (REG_EXTENDED|REG_ADVF)); + if ((flags®_QUOTE) && + (flags&(REG_ADVANCED|REG_EXPANDED|REG_NEWLINE))) + return REG_INVARG; + if (!(flags®_EXTENDED) && (flags®_ADVF)) + return REG_INVARG; /* initial setup (after which freev() is callable) */ v->re = re; @@ -319,27 +274,31 @@ int flags; v->nsubexp = 0; v->subs = v->sub10; v->nsubs = 10; - for (i = 0; (size_t) i < v->nsubs; i++) - v->subs[i] = NULL; + for (j = 0; j < v->nsubs; j++) + v->subs[j] = NULL; v->nfa = NULL; v->cm = NULL; v->nlcolor = COLORLESS; v->wordchrs = NULL; v->tree = NULL; + v->treechain = NULL; + v->treefree = NULL; v->cv = NULL; - v->ces = NULL; + v->mcces = NULL; v->lacons = NULL; v->nlacons = 0; + re->re_magic = REMAGIC; re->re_info = 0; /* bits get set during parse */ + re->re_csize = sizeof(chr); re->re_guts = NULL; - re->re_fns = NULL; + re->re_fns = VS(&functions); /* more complex setup, malloced things */ - v->cm = newcm(v); /* colormap must precede nfa... */ + v->cm = newcm(v); CNOERR(); - v->nfa = newnfa(v, (struct nfa *)NULL); /* ...newnfa() uses it */ + v->nfa = newnfa(v, v->cm, (struct nfa *)NULL); CNOERR(); - re->re_guts = ckalloc(sizeof(struct guts)); + re->re_guts = VS(MALLOC(sizeof(struct guts))); if (re->re_guts == NULL) return freev(v, REG_ESPACE); g = (struct guts *)re->re_guts; @@ -351,19 +310,17 @@ int flags; v->cv = newcvec(100, 10); if (v->cv == NULL) return freev(v, REG_ESPACE); - i = nces(v); + i = nmcces(v); if (i > 0) { - v->ces = newcvec(nleaders(v), i); + v->mcces = newcvec(nleaders(v), i); CNOERR(); - v->ces = allces(v, v->ces); - leaders(v, v->ces); + v->mcces = allmcces(v, v->mcces); + leaders(v, v->mcces); } CNOERR(); /* parsing */ lexstart(v); /* also handles prefixes */ - if (SEE(EOS)) /* empty RE is illegal */ - return freev(v, REG_EMPTY); v->tree = parse(v, EOS, PLAIN, v->nfa->init, v->nfa->final, NONEYET); assert(SEE(EOS)); /* even if error; ISERR() => SEE(EOS) */ CNOERR(); @@ -371,38 +328,40 @@ int flags; /* finish setup of nfa and its subre tree */ specialcolors(v->nfa); CNOERR(); - if (flags®_PROGRESS) { - dumpnfa(v->nfa, stdout); - dumprt(v->tree, stdout, 1); + if (debug != NULL) { + dumpnfa(v->nfa, debug); + dumprt(v->tree, debug, 1); } v->usedshorter = 0; optrt(v, v->tree); - if (v->tree != NULL) + if (v->tree != NULL) { v->ntree = numrt(v->tree, 1); - else + markrt(v->tree); + } else v->ntree = 0; - if (flags®_PROGRESS) { - printf("-->\n"); - dumprt(v->tree, stdout, 1); + cleanrt(v); + if (debug != NULL) { + fprintf(debug, "-->\n"); + dumprt(v->tree, debug, 1); } /* build compacted NFAs for tree, lacons, main nfa */ - nfatree(v, v->tree); - if (flags®_PROGRESS) { - printf("---->\n"); - dumprt(v->tree, stdout, 1); + nfatree(v, v->tree, debug); + if (debug != NULL) { + fprintf(debug, "---->\n"); + dumprt(v->tree, debug, 1); } CNOERR(); assert(v->nlacons == 0 || v->lacons != NULL); for (i = 1; i < v->nlacons; i++) - nfanode(v, &v->lacons[i]); + nfanode(v, &v->lacons[i], debug); CNOERR(); - optimize(v->nfa); /* removes unreachable states */ + re->re_info |= optimize(v->nfa, debug); CNOERR(); if (v->nfa->post->nins <= 0) return freev(v, REG_IMPOSS); /* end unreachable! */ assert(v->nfa->pre->nouts > 0); - compact(v, v->nfa, &g->cnfa); + compact(v->nfa, &g->cnfa); CNOERR(); freenfa(v->nfa); v->nfa = NULL; @@ -412,13 +371,8 @@ int flags; CNOERR(); /* looks okay, package it up */ - re->re_magic = REMAGIC; re->re_nsub = v->nsubexp; - /* re_info is already set */ - re->re_csize = sizeof(chr); - re->re_guts = (VOID *)g; - re->re_fns = (VOID *)&functions; - v->re = NULL; + v->re = NULL; /* freev no longer frees re */ g->magic = GUTSMAGIC; g->cflags = v->cflags; g->info = re->re_info; @@ -428,7 +382,7 @@ int flags; g->tree = v->tree; v->tree = NULL; g->ntree = v->ntree; - g->compare = (v->cflags®_ICASE) ? sncmp : wcsncmp; + g->compare = (v->cflags®_ICASE) ? casecmp : cmp; g->lacons = v->lacons; v->lacons = NULL; g->nlacons = v->nlacons; @@ -453,16 +407,15 @@ int wanted; /* want enough room for this one */ struct subre **p; size_t n; - assert((size_t)wanted >= v->nsubs); + assert(wanted > 0 && (size_t)wanted >= v->nsubs); n = (size_t)wanted * 3 / 2 + 1; if (v->subs == v->sub10) { - p = (struct subre **)ckalloc(n * sizeof(struct subre *)); + p = (struct subre **)MALLOC(n * sizeof(struct subre *)); if (p != NULL) - memcpy((VOID *)p, (VOID *)v->subs, + memcpy(VS(p), VS(v->subs), v->nsubs * sizeof(struct subre *)); } else - p = (struct subre **) ckrealloc((VOID *)v->subs, - n * sizeof(struct subre *)); + p = REALLOC(v->subs, n * sizeof(struct subre *)); if (p == NULL) { ERR(REG_ESPACE); return; @@ -476,8 +429,8 @@ int wanted; /* want enough room for this one */ /* - freev - free vars struct's substructures where necessary - * Does optional error-number setting, and returns error code, to make - * error code terser. + * Optionally does error-number setting, and always returns error code + * (if any), to make error-handling code terser. ^ static int freev(struct vars *, int); */ static int @@ -488,20 +441,22 @@ int err; if (v->re != NULL) rfree(v->re); if (v->subs != v->sub10) - ckfree((char *)v->subs); + FREE(v->subs); if (v->nfa != NULL) freenfa(v->nfa); if (v->cm != NULL) freecm(v->cm); if (v->tree != NULL) - freert(v->tree); + freert(v, v->tree); + if (v->treechain != NULL) + cleanrt(v); if (v->cv != NULL) freecvec(v->cv); - if (v->ces != NULL) - freecvec(v->ces); + if (v->mcces != NULL) + freecvec(v->mcces); if (v->lacons != NULL) freelacons(v->lacons, v->nlacons); - ERR(err); + ERR(err); /* nop if err==0 */ return v->err; } @@ -510,6 +465,9 @@ int err; - parse - parse an RE * Arguably this is too big and too complex and ought to be divided up. * However, the code is somewhat intertwined... + * + * Note that it is no longer necessary to be rigorous about freeing tree + * nodes on error exits, as the tree machinery keeps track of them. ^ static struct rtree *parse(struct vars *, int, int, struct state *, ^ struct state *, int); */ @@ -531,7 +489,6 @@ int pprefer; /* parent's short/long preference */ # define ARCV(t, val) newarc(v->nfa, t, val, lp, rp) int m, n; int emptybranch; /* is there anything in this branch yet? */ - color co; struct rtree *branches; /* top level */ struct rtree *branch; /* current branch */ struct subre *now; /* current subtree's top */ @@ -545,11 +502,10 @@ int pprefer; /* parent's short/long preference */ assert(stopper == ')' || stopper == EOS); - branch = NULL; /* lint. */ - rt1 = NULL; /* lint. */ - capture = 0; branches = newrt(v); + branch = branches; + rt1 = NULL; /* shut up lint */ firstbranch = 1; NOERRN(); do { @@ -557,27 +513,17 @@ int pprefer; /* parent's short/long preference */ emptybranch = 1; /* tentatively */ left = newstate(v->nfa); right = newstate(v->nfa); - if (!firstbranch) + NOERRN(); + if (!firstbranch) { rt1 = newrt(v); -#if 1 - if (ISERR()) { - freert(rt1); - freert(branches); /* mem leak (CCS). */ - return NULL; + NOERRN(); + branch->next = rt1; + branch = rt1; } -#else - NOERRN(); -#endif EMPTYARC(init, left); EMPTYARC(right, final); lp = left; rp = right; - if (firstbranch) - branch = branches; - else { - branch->next = rt1; - branch = rt1; - } branch->op = '|'; now = &branch->left; *now = subre(left, right, NONEYET, 0, (struct rtree *)NULL); @@ -609,7 +555,7 @@ int pprefer; /* parent's short/long preference */ sub.subno = v->nsubexp; if ((size_t)sub.subno >= v->nsubs) moresubs(v, sub.subno); - assert((size_t) sub.subno < v->nsubs); + assert((size_t)sub.subno < v->nsubs); } else sub.subno = 0; NEXT(); @@ -661,7 +607,7 @@ int pprefer; /* parent's short/long preference */ assert(SEE(')') || ISERR()); NEXT(); m = newlacon(v, s, s2, m); - freert(rt1); + freert(v, rt1); NOERRN(); ARCV(LACON, m); constraint = 1; @@ -696,10 +642,10 @@ int pprefer; /* parent's short/long preference */ NEXT(); break; case '.': - co = (color) ((v->cflags®_NLSTOP) - ? nlcolor(v) - : COLORLESS); - rainbow(v->nfa, v->cm, PLAIN, co, lp, rp); + rainbow(v->nfa, v->cm, PLAIN, + (v->cflags®_NLSTOP) ? + nlcolor(v) : COLORLESS, + lp, rp); NEXT(); break; case '^': @@ -804,13 +750,19 @@ int pprefer; /* parent's short/long preference */ constraint = 1; break; case ')': /* unbalanced paren */ +#ifdef POSIX_MISTAKE if (!(v->cflags®_EXTENDED) || (v->cflags®_ADVF)) { - ERR(REG_EPAREN); - goto end; + ERR(REG_EPAREN); + return NULL; } NOTE(REG_UPBOTCH); /* fallthrough into case PLAIN */ +#else + ERR(REG_EPAREN); + return NULL; + break; +#endif case PLAIN: onechr(v, v->nextvalue, lp, rp); okcolors(v->nfa, v->cm); @@ -822,10 +774,12 @@ int pprefer; /* parent's short/long preference */ case '?': case '{': ERR(REG_BADRPT); - goto end; + return NULL; + break; default: ERR(REG_ASSERT); - goto end; + return NULL; + break; } /* ...possibly followed by a quantifier */ @@ -858,13 +812,13 @@ int pprefer; /* parent's short/long preference */ n = INFINITY; if (m > n) { ERR(REG_BADBR); - goto end; + return NULL; } } else n = m; if (!SEE('}')) { /* gets errors too */ ERR(REG_BADBR); - goto end; + return NULL; } if (m != n) sub.prefer = (v->nextvalue) ? LONGER : @@ -880,19 +834,19 @@ int pprefer; /* parent's short/long preference */ /* constraints may not be quantified */ if (constraint) { ERR(REG_BADRPT); - goto end; + return NULL; } /* annoying special case: {0,0} cancels everything */ if (m == 0 && n == 0 && sub.begin != NULL) { - freert(now->tree); + freert(v, now->tree); now->tree = NULL; sub.begin = NULL; /* no substructure */ sub.prefer = NONEYET; /* the repeat() below will do the rest */ } - /* if no substructure, aVOID hard part */ + /* if no substructure, avoid hard part */ if (now->prefer == NONEYET) now->prefer = sub.prefer; if (sub.begin == NULL && (sub.prefer == NONEYET || @@ -983,8 +937,8 @@ int pprefer; /* parent's short/long preference */ t->tree = rt1; rt1->op = 'b'; rt1->left.subno = sub.subno; - rt1->left.min = (short) m; - rt1->left.max = (short) n; + rt1->left.min = (short)m; + rt1->left.max = (short)n; rt1->left.prefer = sub.prefer; continue; /* NOTE CONTINUE */ } @@ -1036,14 +990,13 @@ int pprefer; /* parent's short/long preference */ branch->op = ','; else { branches = branch->left.tree; /* might be NULL */ - freertnode(branch); + freertnode(v, branch); } } if (capture) /* actually a catchall flag */ return branches; - end: /* mem leak (CCS) */ - freert(branches); + freert(v, branches); return NULL; } @@ -1197,7 +1150,7 @@ struct state *rp; struct state *s; struct arc *a; /* arc from lp */ struct arc *ba; /* arc from left, from bracket() */ - struct arc *pa; /* CE-prototype arc */ + struct arc *pa; /* MCCE-prototype arc */ color co; chr *p; int i; @@ -1213,16 +1166,16 @@ struct state *rp; /* easy part of complementing */ colorcomplement(v->nfa, v->cm, PLAIN, left, lp, rp); NOERR(); - if (v->ces == NULL) { /* no CEs -- we're done */ + if (v->mcces == NULL) { /* no MCCEs -- we're done */ dropstate(v->nfa, left); assert(right->nins == 0); freestate(v->nfa, right); return; } - /* but complementing gets messy in the presence of CEs... */ + /* but complementing gets messy in the presence of MCCEs... */ NOTE(REG_ULOCALE); - for (p = v->ces->chrs, i = v->ces->nchrs; i > 0; p++, i--) { + for (p = v->mcces->chrs, i = v->mcces->nchrs; i > 0; p++, i--) { co = getcolor(v->cm, *p); a = findarc(lp, PLAIN, co); ba = findarc(left, PLAIN, co); @@ -1236,7 +1189,7 @@ struct state *rp; NOERR(); newarc(v->nfa, PLAIN, co, lp, s); NOERR(); - pa = findarc(v->cepbegin, PLAIN, co); + pa = findarc(v->mccepbegin, PLAIN, co); assert(pa != NULL); if (ba == NULL) { /* easy case, need all of them */ cloneouts(v->nfa, pa->to, s, rp, PLAIN); @@ -1288,10 +1241,11 @@ struct state *rp; case RANGE: /* a-b-c or other botch */ ERR(REG_ERANGE); return; + break; case PLAIN: c[0] = v->nextvalue; NEXT(); - /* shortcut for ordinary chr (not range, not CE leader) */ + /* shortcut for ordinary chr (not range, not MCCE leader) */ if (!SEE(RANGE) && !ISCELEADER(v, c[0])) { onechr(v, c[0], lp, rp); return; @@ -1318,6 +1272,7 @@ struct state *rp; NOERR(); dovec(v, cv, lp, rp); return; + break; case CCLASS: startp = v->now; endp = scanplain(v); @@ -1327,9 +1282,11 @@ struct state *rp; NOERR(); dovec(v, cv, lp, rp); return; + break; default: ERR(REG_ASSERT); return; + break; } if (SEE(RANGE)) { @@ -1353,6 +1310,7 @@ struct state *rp; default: ERR(REG_ERANGE); return; + break; } } else endc = startc; @@ -1407,35 +1365,35 @@ leaders(v, cv) struct vars *v; struct cvec *cv; { - int ce; + int mcce; chr *p; chr leader; struct state *s; struct arc *a; - v->cepbegin = newstate(v->nfa); - v->cepend = newstate(v->nfa); + v->mccepbegin = newstate(v->nfa); + v->mccepend = newstate(v->nfa); NOERR(); - for (ce = 0; ce < cv->nces; ce++) { - p = cv->ces[ce]; + for (mcce = 0; mcce < cv->nmcces; mcce++) { + p = cv->mcces[mcce]; leader = *p; if (!haschr(cv, leader)) { addchr(cv, leader); s = newstate(v->nfa); newarc(v->nfa, PLAIN, subcolor(v->cm, leader), - v->cepbegin, s); + v->mccepbegin, s); okcolors(v->nfa, v->cm); } else { - a = findarc(v->cepbegin, PLAIN, + a = findarc(v->mccepbegin, PLAIN, getcolor(v->cm, leader)); assert(a != NULL); s = a->to; - assert(s != v->cepend); + assert(s != v->mccepend); } p++; - assert(*p != 0 && *(p+1) == 0); /* only 2-char CEs at present */ - newarc(v->nfa, PLAIN, subcolor(v->cm, *p), s, v->cepend); + assert(*p != 0 && *(p+1) == 0); /* only 2-char MCCEs for now */ + newarc(v->nfa, PLAIN, subcolor(v->cm, *p), s, v->mccepend); okcolors(v->nfa, v->cm); } } @@ -1463,7 +1421,7 @@ struct state *rp; /* - dovec - fill in arcs for each element of a cvec - * This one has to handle the messy cases, like CEs and CE leaders. + * This one has to handle the messy cases, like MCCEs and MCCE leaders. ^ static VOID dovec(struct vars *, struct cvec *, struct state *, ^ struct state *); */ @@ -1493,11 +1451,11 @@ struct state *rp; assert(singleton(v->cm, *p)); *np++ = *p; } - cv->nchrs = np - cv->chrs; /* only CE leaders remain */ - if (cv->nchrs == 0 && cv->nces == 0) + cv->nchrs = np - cv->chrs; /* only MCCE leaders remain */ + if (cv->nchrs == 0 && cv->nmcces == 0) return; - /* deal with the CE leaders */ + /* deal with the MCCE leaders */ NOTE(REG_ULOCALE); for (p = cv->chrs, i = cv->nchrs; i > 0; p++, i--) { co = getcolor(v->cm, *p); @@ -1510,7 +1468,7 @@ struct state *rp; newarc(v->nfa, PLAIN, co, lp, s); NOERR(); } - pa = findarc(v->cepbegin, PLAIN, co); + pa = findarc(v->mccepbegin, PLAIN, co); assert(pa != NULL); ps = pa->to; newarc(v->nfa, '$', 1, s, rp); @@ -1519,9 +1477,9 @@ struct state *rp; NOERR(); } - /* and the CEs */ - for (i = 0; i < cv->nces; i++) { - p = cv->ces[i]; + /* and the MCCEs */ + for (i = 0; i < cv->nmcces; i++) { + p = cv->mcces[i]; assert(singleton(v->cm, *p)); co = getcolor(v->cm, *p++); a = findarc(lp, PLAIN, co); @@ -1587,7 +1545,7 @@ struct vars *v; NEXT(); assert(v->savenow != NULL && SEE('[')); bracket(v, left, right); - assert(((v->savenow != NULL) && SEE(']')) || ISERR()); + assert((v->savenow != NULL && SEE(']')) || ISERR()); NEXT(); NOERR(); v->wordchrs = left; @@ -1626,14 +1584,23 @@ static struct rtree * newrt(v) struct vars *v; { - struct rtree *rt = (struct rtree *)ckalloc(sizeof(struct rtree)); - - if (rt == NULL) { - ERR(REG_ESPACE); - return NULL; + struct rtree *rt; + + rt = v->treefree; + if (rt != NULL) + v->treefree = rt->next; + else { + rt = (struct rtree *)MALLOC(sizeof(struct rtree)); + if (rt == NULL) { + ERR(REG_ESPACE); + return NULL; + } + rt->chain = v->treechain; + v->treechain = rt; } rt->op = '?'; /* invalid */ + rt->flags = 0; rt->no = 0; rt->left.begin = NULL; rt->left.end = NULL; @@ -1650,36 +1617,39 @@ struct vars *v; rt->right.tree = NULL; ZAPCNFA(rt->right.cnfa); rt->next = NULL; + return rt; } /* - freert - free a subRE subtree - ^ static VOID freert(struct rtree *); + ^ static VOID freert(struct vars *, struct rtree *); */ static VOID -freert(rt) +freert(v, rt) +struct vars *v; /* might be NULL */ struct rtree *rt; { if (rt == NULL) return; if (rt->left.tree != NULL) - freert(rt->left.tree); + freert(v, rt->left.tree); if (rt->right.tree != NULL) - freert(rt->right.tree); + freert(v, rt->right.tree); if (rt->next != NULL) - freert(rt->next); + freert(v, rt->next); - freertnode(rt); + freertnode(v, rt); } /* - freertnode - free one node in a subRE subtree - ^ static VOID freertnode(struct rtree *); + ^ static VOID freertnode(struct vars *, struct rtree *); */ static VOID -freertnode(rt) +freertnode(v, rt) +struct vars *v; /* might be NULL */ struct rtree *rt; { if (rt == NULL) @@ -1689,8 +1659,13 @@ struct rtree *rt; freecnfa(&rt->left.cnfa, 0); if (!NULLCNFA(rt->right.cnfa)) freecnfa(&rt->right.cnfa, 0); + rt->flags = 0; - ckfree((char *)rt); + if (v != NULL) { + rt->next = v->treefree; + v->treefree = rt; + } else + FREE(rt); } /* @@ -1721,7 +1696,7 @@ struct rtree *rt; subno = rt->left.subno; rt->left = t->left; assert(NULLCNFA(t->left.cnfa)); - freertnode(t); + freertnode(v, t); if (subno != 0) { assert(rt->left.subno == 0 && subno > 0); rt->left.subno = subno; @@ -1739,7 +1714,7 @@ struct rtree *rt; subno = rt->right.subno; rt->right = t->left; assert(NULLCNFA(t->right.cnfa)); - freertnode(t); + freertnode(v, t); if (subno != 0) { assert(rt->right.subno == 0 && subno > 0); rt->right.subno = subno; @@ -1800,7 +1775,7 @@ int start; /* starting point for subtree numbers */ assert(rt != NULL); i = start; - rt->no = (short) i++; + rt->no = (short)i++; if (rt->left.tree != NULL) i = numrt(rt->left.tree, i); if (rt->right.tree != NULL) @@ -1811,54 +1786,95 @@ int start; /* starting point for subtree numbers */ } /* + - markrt - mark tree nodes as INUSE + ^ static VOID markrt(struct rtree *); + */ +static VOID +markrt(rt) +struct rtree *rt; +{ + assert(rt != NULL); + + rt->flags |= INUSE; + if (rt->left.tree != NULL) + markrt(rt->left.tree); + if (rt->right.tree != NULL) + markrt(rt->right.tree); + if (rt->next != NULL) + markrt(rt->next); +} + +/* + - cleanrt - free any tree nodes not marked INUSE + ^ static VOID cleanrt(struct vars *); + */ +static VOID +cleanrt(v) +struct vars *v; +{ + struct rtree *rt; + struct rtree *next; + + for (rt = v->treechain; rt != NULL; rt = next) { + next = rt->next; + if (!(rt->flags&INUSE)) + FREE(rt); + } + v->treechain = NULL; + v->treefree = NULL; /* just on general principles */ +} + +/* - nfatree - turn a subRE subtree into a tree of compacted NFAs - ^ static VOID nfatree(struct vars *, struct rtree *); + ^ static VOID nfatree(struct vars *, struct rtree *, FILE *); */ static VOID -nfatree(v, rt) +nfatree(v, rt, f) struct vars *v; struct rtree *rt; +FILE *f; /* for debug output */ { if (rt == NULL) return; if (rt->left.begin != NULL) - nfanode(v, &rt->left); + nfanode(v, &rt->left, f); if (rt->left.tree != NULL) - nfatree(v, rt->left.tree); + nfatree(v, rt->left.tree, f); if (rt->right.begin != NULL) - nfanode(v, &rt->right); + nfanode(v, &rt->right, f); if (rt->right.tree != NULL) - nfatree(v, rt->right.tree); + nfatree(v, rt->right.tree, f); if (rt->next != NULL) - nfatree(v, rt->next); + nfatree(v, rt->next, f); } /* - nfanode - do one NFA for nfatree - ^ static VOID nfanode(struct vars *, struct subre *); + ^ static VOID nfanode(struct vars *, struct subre *, FILE *); */ static VOID -nfanode(v, sub) +nfanode(v, sub, f) struct vars *v; struct subre *sub; +FILE *f; /* for debug output */ { struct nfa *nfa; if (sub->begin == NULL) return; - nfa = newnfa(v, v->nfa); + nfa = newnfa(v, v->cm, v->nfa); NOERR(); dupnfa(nfa, sub->begin, sub->end, nfa->init, nfa->final); if (!ISERR()) { specialcolors(nfa); - optimize(nfa); + (DISCARD) optimize(nfa, f); } if (!ISERR()) - compact(v, nfa, &sub->cnfa); + compact(nfa, &sub->cnfa); freenfa(nfa); } @@ -1877,11 +1893,11 @@ int pos; struct subre *sub; if (v->nlacons == 0) { - v->lacons = (struct subre *)ckalloc(2 * sizeof(struct subre)); + v->lacons = (struct subre *)MALLOC(2 * sizeof(struct subre)); n = 1; /* skip 0th */ v->nlacons = 2; } else { - v->lacons = (struct subre *)ckrealloc((VOID *) v->lacons, + v->lacons = (struct subre *)REALLOC(v->lacons, (v->nlacons+1)*sizeof(struct subre)); n = v->nlacons++; } @@ -1909,10 +1925,11 @@ int n; struct subre *sub; int i; + assert(n > 0); for (sub = subs + 1, i = n - 1; i > 0; sub++, i--) if (!NULLCNFA(sub->cnfa)) freecnfa(&sub->cnfa, 0); - ckfree((char *)subs); + FREE(subs); } /* @@ -1921,11 +1938,15 @@ int n; */ static VOID rfree(re) -regex_t *re; /* regfree has validated it */ +regex_t *re; { - struct guts *g = (struct guts *)re->re_guts; + struct guts *g; - re->re_magic = 0; /* invalidate it */ + if (re == NULL || re->re_magic != REMAGIC) + return; + + re->re_magic = 0; /* invalidate RE */ + g = (struct guts *)re->re_guts; re->re_guts = NULL; re->re_fns = NULL; g->magic = 0; @@ -1934,10 +1955,50 @@ regex_t *re; /* regfree has validated it */ if (g->cm != NULL) freecm(g->cm); if (g->tree != NULL) - freert(g->tree); + freert((struct vars *)NULL, g->tree); if (g->lacons != NULL) freelacons(g->lacons, g->nlacons); - ckfree((char *)g); + FREE(g); +} + +/* + - dump - dump an RE in human-readable form + ^ static VOID dump(regex_t *, FILE *); + */ +static VOID +dump(re, f) +regex_t *re; +FILE *f; +{ +#ifdef REG_DEBUG + struct guts *g; + int i; + + if (re->re_magic != REMAGIC) + fprintf(f, "bad magic number (0x%x not 0x%x)\n", re->re_magic, + REMAGIC); + if (re->re_guts == NULL) { + fprintf(f, "NULL guts!!!\n"); + return; + } + g = (struct guts *)re->re_guts; + if (g->magic != GUTSMAGIC) + fprintf(f, "bad guts magic number (0x%x not 0x%x)\n", g->magic, + GUTSMAGIC); + + fprintf(f, "nsub %d, info 0%o, csize %d, ntree %d, usedshort %d\n", + re->re_nsub, re->re_info, re->re_csize, g->ntree, + g->usedshorter); + + dumpcolors(g->cm, f); + dumpcnfa(&g->cnfa, f); + for (i = 1; i < g->nlacons; i++) { + fprintf(f, "la%d (%s):\n", i, + (g->lacons[i].subno) ? "positive" : "negative"); + dumpcnfa(&g->lacons[i].cnfa, f); + } + dumprt(g->tree, f, 0); +#endif } /* @@ -2068,22 +2129,9 @@ int level; } } -/* - - dump - dump an RE in human-readable form - ^ static VOID dump(regex_t *, FILE *); - */ -static VOID -dump(re, f) -regex_t *re; -FILE *f; -{ -} - -#undef NOERRN -#define NOERRN() {if (ISERR()) return NULL;} /* NOERR with retval */ - #define COMPILE 1 -#include "lex.c" -#include "color.c" -#include "locale.c" -#include "nfa.c" +#include "regc_lex.c" +#include "regc_color.c" +#include "regc_nfa.c" +#include "regc_cvec.c" +#include "regc_locale.c" diff --git a/generic/regcustom.h b/generic/regcustom.h new file mode 100644 index 0000000..0fda25f --- /dev/null +++ b/generic/regcustom.h @@ -0,0 +1,90 @@ +/* headers (which also pick up the standard ones, or equivalents) */ +#include "tclInt.h" +#include "tclPort.h" + +/* overrides for regguts.h definitions */ +/* function-pointer declarations */ +#define FUNCPTR(name, args) (*name) _ANSI_ARGS_(args) +#define MALLOC(n) ckalloc(n) +#define FREE(p) ckfree(VS(p)) +#define REALLOC(p,n) ckrealloc(VS(p),n) + + + +/* + * Do not insert extras between the "begin" and "end" lines -- this + * chunk is automatically extracted to be fitted into regex.h. + */ +/* --- begin --- */ +/* ensure certain things don't sneak in from system headers */ +#ifdef __REG_WIDE_T +#undef __REG_WIDE_T +#endif +#ifdef __REG_WIDE_COMPILE +#undef __REG_WIDE_COMPILE +#endif +#ifdef __REG_WIDE_EXEC +#undef __REG_WIDE_EXEC +#endif +#ifdef __REG_REGOFF_T +#undef __REG_REGOFF_T +#endif +#ifdef __REG_VOID_T +#undef __REG_VOID_T +#endif +#ifdef __REG_CONST +#undef __REG_CONST +#endif +/* interface types */ +#define __REG_WIDE_T Tcl_UniChar +#define __REG_WIDE_COMPILE re_ucomp +#define __REG_WIDE_EXEC re_uexec +#define __REG_REGOFF_T long /* not really right, but good enough... */ +#define __REG_VOID_T VOID +#define __REG_CONST CONST +#ifndef __REG_NOFRONT +#define __REG_NOFRONT /* don't want regcomp() and regexec() */ +#endif +#ifndef __REG_NOCHAR +#define __REG_NOCHAR /* or the char versions */ +#endif +/* --- end --- */ + + + +/* internal character type and related */ +typedef Tcl_UniChar chr; /* the type itself */ +typedef int pchr; /* what it promotes to */ +typedef unsigned uchr; /* unsigned type that will hold a chr */ +typedef int celt; /* type to hold chr, MCCE number, or NOCELT */ +#define NOCELT (-1) /* celt value which is not valid chr or MCCE */ +#define CHR(c) (UCHAR(c)) /* turn char literal into chr literal */ +#define DIGITVAL(c) ((c)-'0') /* turn chr digit into its value */ +#define CHRBITS 16 /* bits in a chr; must not use sizeof */ +#define CHR_MIN 0x0000 /* smallest and largest chr; the value */ +#define CHR_MAX 0xffff /* CHR_MAX-CHR_MIN+1 should fit in uchr */ + +/* functions operating on chr */ +#define iscalnum(x) TclUniCharIsAlnum(x) +#define iscalpha(x) TclUniCharIsAlpha(x) +#define iscdigit(x) TclUniCharIsDigit(x) +#define iscspace(x) TclUniCharIsSpace(x) + +/* name the external functions */ +#define compile re_ucomp +#define exec re_uexec +#ifdef notdef +#define regfree re_ufree +#define regerror re_uerror +#endif + +/* + * Implement a mistake in the original POSIX.2: in EREs, and only in EREs + * (AREs do not support this botch), an unbalanced right parenthesis is an + * ordinary character rather than an error. This was unintentional, and + * will be fixed someday. + */ +#define POSIX_MISTAKE /* sigh */ + +/* and pick up the standard header */ +#include "regex.h" diff --git a/generic/regerror.c b/generic/regerror.c new file mode 100644 index 0000000..5eb67a7 --- /dev/null +++ b/generic/regerror.c @@ -0,0 +1,82 @@ +/* + * regerror - error-code expansion + */ + +#include "regguts.h" + +/* unknown-error explanation */ +static char unk[] = "*** unknown regex error code 0x%x ***"; + +/* struct to map among codes, code names, and explanations */ +static struct rerr { + int code; + char *name; + char *explain; +} rerrs[] = { + /* the actual table is built from regex.h */ +# include "regerrs.h" + -1, "", "oops", /* explanation special-cased in code */ +}; + +/* + - regerror - the interface to error numbers + */ +/* ARGSUSED */ +size_t /* actual space needed (including NUL) */ +regerror(errcode, preg, errbuf, errbuf_size) +int errcode; /* error code, or REG_ATOI or REG_ITOA */ +const regex_t *preg; /* associated regex_t (unused at present) */ +char *errbuf; /* result buffer (unless errbuf_size==0) */ +size_t errbuf_size; /* available space in errbuf, can be 0 */ +{ + struct rerr *r; + char *msg; + char convbuf[sizeof(unk)+50]; /* 50 = plenty for int */ + size_t len; + int icode; + + switch (errcode) { + case REG_ATOI: /* convert name to number */ + for (r = rerrs; r->code >= 0; r++) + if (strcmp(r->name, errbuf) == 0) + break; + sprintf(convbuf, "%d", r->code); /* -1 for unknown */ + msg = convbuf; + break; + case REG_ITOA: /* convert number to name */ + icode = atoi(errbuf); /* not our problem if this fails */ + for (r = rerrs; r->code >= 0; r++) + if (r->code == icode) + break; + if (r->code >= 0) + msg = r->name; + else { /* unknown; tell him the number */ + sprintf(convbuf, "REG_%u", (unsigned)icode); + msg = convbuf; + } + break; + default: /* a real, normal error code */ + for (r = rerrs; r->code >= 0; r++) + if (r->code == errcode) + break; + if (r->code >= 0) + msg = r->explain; + else { /* unknown; say so */ + sprintf(convbuf, unk, errcode); + msg = convbuf; + } + break; + } + + len = strlen(msg) + 1; /* space needed, including NUL */ + if (errbuf_size > 0) { + if (errbuf_size > len) + strcpy(errbuf, msg); + else { /* truncate to fit */ + strncpy(errbuf, msg, errbuf_size-1); + errbuf[errbuf_size-1] = '\0'; + } + } + + return len; +} diff --git a/generic/regerrs.h b/generic/regerrs.h new file mode 100644 index 0000000..8298597 --- /dev/null +++ b/generic/regerrs.h @@ -0,0 +1,19 @@ +REG_OKAY, "REG_OKAY", "no errors detected", +REG_NOMATCH, "REG_NOMATCH", "failed to match", +REG_BADPAT, "REG_BADPAT", "invalid regexp (reg version 0.1)", +REG_ECOLLATE, "REG_ECOLLATE", "invalid collating element", +REG_ECTYPE, "REG_ECTYPE", "invalid character class", +REG_EESCAPE, "REG_EESCAPE", "invalid escape \\ sequence", +REG_ESUBREG, "REG_ESUBREG", "invalid backreference number", +REG_EBRACK, "REG_EBRACK", "brackets [] not balanced", +REG_EPAREN, "REG_EPAREN", "parentheses () not balanced", +REG_EBRACE, "REG_EBRACE", "braces {} not balanced", +REG_BADBR, "REG_BADBR", "invalid repetition count(s)", +REG_ERANGE, "REG_ERANGE", "invalid character range", +REG_ESPACE, "REG_ESPACE", "out of memory", +REG_BADRPT, "REG_BADRPT", "quantifier operand invalid", +REG_ASSERT, "REG_ASSERT", "\"can't happen\" -- you found a bug", +REG_INVARG, "REG_INVARG", "invalid argument to regex function", +REG_MIXED, "REG_MIXED", "character widths of regex and string differ", +REG_BADOPT, "REG_BADOPT", "invalid embedded option", +REG_IMPOSS, "REG_IMPOSS", "can never match", diff --git a/generic/regex.h b/generic/regex.h new file mode 100644 index 0000000..6f61dd3 --- /dev/null +++ b/generic/regex.h @@ -0,0 +1,299 @@ +#ifndef _REGEX_H_ +#define _REGEX_H_ /* never again */ +/* + * regular expressions + * + * Prototypes etc. marked with "^" within comments get gathered up (and + * possibly edited) by the regfwd program and inserted near the bottom of + * this file. + * + * We offer the option of declaring one wide-character version of the + * RE functions as well as the char versions. To do that, define + * __REG_WIDE_T to the type of wide characters (unfortunately, there + * is no consensus that wchar_t is suitable) and __REG_WIDE_COMPILE and + * __REG_WIDE_EXEC to the names to be used for the compile and execute + * functions (suggestion: re_Xcomp and re_Xexec, where X is a letter + * suggestive of the wide type, e.g. re_ucomp and re_uexec for Unicode). + * For cranky old compilers, it may be necessary to do something like: + * #define __REG_WIDE_COMPILE(a,b,c,d) re_Xcomp(a,b,c,d) + * #define __REG_WIDE_EXEC(a,b,c,d,e,f) re_Xexec(a,b,c,d,e,f) + * rather than just #defining the names as parameterless macros. + * + * For some specialized purposes, it may be desirable to suppress the + * declarations of the "front end" functions, regcomp() and regexec(), + * or of the char versions of the compile and execute functions. To + * suppress the front-end functions, define __REG_NOFRONT. To suppress + * the char versions, define __REG_NOCHAR. + * + * The right place to do those defines (and some others you may want, see + * below) would be <sys/types.h>. If you don't have control of that file, + * the right place to add your own defines to this file is marked below. + * This is normally done automatically, by the makefile and regmkhdr, based + * on the contents of regcustom.h. + */ + + + +/* + * voodoo for C++ + */ +#ifdef __cplusplus +extern "C" { +#endif + + + +/* + * Add your own defines, if needed, here. The --- stuff is for automatic + * generation of this file from regproto.h and regcustom.h. + */ +/* --- begin --- */ +/* ensure certain things don't sneak in from system headers */ +#ifdef __REG_WIDE_T +#undef __REG_WIDE_T +#endif +#ifdef __REG_WIDE_COMPILE +#undef __REG_WIDE_COMPILE +#endif +#ifdef __REG_WIDE_EXEC +#undef __REG_WIDE_EXEC +#endif +#ifdef __REG_REGOFF_T +#undef __REG_REGOFF_T +#endif +#ifdef __REG_VOID_T +#undef __REG_VOID_T +#endif +#ifdef __REG_CONST +#undef __REG_CONST +#endif +/* interface types */ +#define __REG_WIDE_T Tcl_UniChar +#define __REG_WIDE_COMPILE re_ucomp +#define __REG_WIDE_EXEC re_uexec +#define __REG_REGOFF_T long /* not really right, but good enough... */ +#define __REG_VOID_T VOID +#define __REG_CONST CONST +#ifndef __REG_NOFRONT +#define __REG_NOFRONT /* don't want regcomp() and regexec() */ +#endif +#ifndef __REG_NOCHAR +#define __REG_NOCHAR /* or the char versions */ +#endif +/* --- end --- */ + + +/* + * interface types etc. + */ + +/* + * regoff_t has to be large enough to hold either off_t or ssize_t, + * and must be signed; it's only a guess that long is suitable, so we + * offer <sys/types.h> an override. + */ +#ifdef __REG_REGOFF_T +typedef __REG_REGOFF_T regoff_t; +#else +typedef long regoff_t; +#endif + +/* + * For benefit of old compilers, we offer <sys/types.h> the option of + * overriding the `void' type used to declare nonexistent return types. + */ +#ifdef __REG_VOID_T +typedef __REG_VOID_T re_void; +#else +typedef void re_void; +#endif + +/* + * Also for benefit of old compilers, <sys/types.h> can supply a macro + * which expands to a substitute for `const'. + */ +#ifndef __REG_CONST +#define __REG_CONST const +#endif + + + +/* + * other interface types + */ + +/* the biggie, a compiled RE (or rather, a front end to same) */ +typedef struct { + int re_magic; /* magic number */ + size_t re_nsub; /* number of subexpressions */ + int re_info; /* information about RE */ +# define REG_UBACKREF 000001 +# define REG_ULOOKAHEAD 000002 +# define REG_UBOUNDS 000004 +# define REG_UBRACES 000010 +# define REG_UBSALNUM 000020 +# define REG_UPBOTCH 000040 +# define REG_UBBS 000100 +# define REG_UNONPOSIX 000200 +# define REG_UUNSPEC 000400 +# define REG_UUNPORT 001000 +# define REG_ULOCALE 002000 +# define REG_UEMPTYMATCH 004000 + int re_csize; /* sizeof(character) */ + char *re_endp; /* backward compatibility kludge */ + /* the rest is opaque pointers to hidden innards */ + char *re_guts; /* `char *' is more portable than `void *' */ + char *re_fns; +} regex_t; + +/* result reporting (may acquire more fields later) */ +typedef struct { + regoff_t rm_so; /* start of substring */ + regoff_t rm_eo; /* end of substring */ +} regmatch_t; + +/* supplementary control and reporting (placeholder for later work) */ +typedef struct { + int rm_dummy; +} rm_detail_t; + + + +/* + * compilation + ^ #ifndef __REG_NOCHAR + ^ int re_comp(regex_t *, __REG_CONST char *, size_t, int); + ^ #endif + ^ #ifndef __REG_NOFRONT + ^ int regcomp(regex_t *, __REG_CONST char *, int); + ^ #endif + ^ #ifdef __REG_WIDE_T + ^ int __REG_WIDE_COMPILE(regex_t *, __REG_CONST __REG_WIDE_T *, size_t, int); + ^ #endif + */ +#define REG_BASIC 000000 /* BREs (convenience) */ +#define REG_EXTENDED 000001 /* EREs */ +#define REG_ADVF 000002 /* advanced features in EREs */ +#define REG_ADVANCED 000003 /* AREs (which are also EREs) */ +#define REG_QUOTE 000004 /* no special characters, none */ +#define REG_NOSPEC REG_QUOTE /* historical synonym */ +#define REG_ICASE 000010 /* ignore case */ +#define REG_NOSUB 000020 /* don't care about subexpressions */ +#define REG_EXPANDED 000040 /* expanded format, white space & comments */ +#define REG_NLSTOP 000100 /* \n doesn't match . or [^ ] */ +#define REG_NLANCH 000200 /* ^ matches after \n, $ before */ +#define REG_NEWLINE 000300 /* newlines are line terminators */ +#define REG_PEND 000400 /* ugh -- backward-compatibility hack */ +#define REG_DUMP 004000 /* none of your business :-) */ +#define REG_FAKEEC 010000 /* none of your business :-) */ +#define REG_PROGRESS 020000 /* none of your business :-) */ + + + +/* + * execution + ^ #ifndef __REG_NOCHAR + ^ int re_exec(regex_t *, __REG_CONST char *, size_t, + ^ rm_detail_t *, size_t, regmatch_t [], int); + ^ #endif + ^ #ifndef __REG_NOFRONT + ^ int regexec(regex_t *, __REG_CONST char *, size_t, regmatch_t [], int); + ^ #endif + ^ #ifdef __REG_WIDE_T + ^ int __REG_WIDE_EXEC(regex_t *, __REG_CONST __REG_WIDE_T *, size_t, + ^ rm_detail_t *, size_t, regmatch_t [], int); + ^ #endif + */ +#define REG_NOTBOL 0001 /* BOS is not BOL */ +#define REG_NOTEOL 0002 /* EOS is not EOL */ +#define REG_STARTEND 0004 /* backward compatibility kludge */ +#define REG_FTRACE 0010 /* none of your business */ +#define REG_MTRACE 0020 /* none of your business */ +#define REG_SMALL 0040 /* none of your business */ + + + +/* + * misc generics (may be more functions here eventually) + ^ re_void regfree(regex_t *); + */ + + + +/* + * error reporting + * Be careful if modifying the list of error codes -- the table used by + * regerror() is generated automatically from this file! + * + * Note that there is no wide-char variant of regerror at this time; what + * kind of character is used for error reports is independent of what kind + * is used in matching. + * + ^ extern size_t regerror(int, __REG_CONST regex_t *, char *, size_t); + */ +#define REG_OKAY 0 /* no errors detected */ +#define REG_NOMATCH 1 /* failed to match */ +#define REG_BADPAT 2 /* invalid regexp */ +#define REG_ECOLLATE 3 /* invalid collating element */ +#define REG_ECTYPE 4 /* invalid character class */ +#define REG_EESCAPE 5 /* invalid escape \ sequence */ +#define REG_ESUBREG 6 /* invalid backreference number */ +#define REG_EBRACK 7 /* brackets [] not balanced */ +#define REG_EPAREN 8 /* parentheses () not balanced */ +#define REG_EBRACE 9 /* braces {} not balanced */ +#define REG_BADBR 10 /* invalid repetition count(s) */ +#define REG_ERANGE 11 /* invalid character range */ +#define REG_ESPACE 12 /* out of memory */ +#define REG_BADRPT 13 /* quantifier operand invalid */ +#define REG_ASSERT 15 /* "can't happen" -- you found a bug */ +#define REG_INVARG 16 /* invalid argument to regex function */ +#define REG_MIXED 17 /* character widths of regex and string differ */ +#define REG_BADOPT 18 /* invalid embedded option */ +#define REG_IMPOSS 19 /* can never match */ +/* two specials for debugging and testing */ +#define REG_ATOI 101 /* convert error-code name to number */ +#define REG_ITOA 102 /* convert error-code number to name */ + + + +/* + * the prototypes, as possibly munched by regfwd + */ +/* =====^!^===== begin forwards =====^!^===== */ +/* automatically gathered by fwd; do not hand-edit */ +/* === regproto.h === */ +#ifndef __REG_NOCHAR +int re_comp _ANSI_ARGS_((regex_t *, __REG_CONST char *, size_t, int)); +#endif +#ifndef __REG_NOFRONT +int regcomp _ANSI_ARGS_((regex_t *, __REG_CONST char *, int)); +#endif +#ifdef __REG_WIDE_T +int __REG_WIDE_COMPILE _ANSI_ARGS_((regex_t *, __REG_CONST __REG_WIDE_T *, size_t, int)); +#endif +#ifndef __REG_NOCHAR +int re_exec _ANSI_ARGS_((regex_t *, __REG_CONST char *, size_t, rm_detail_t *, size_t, regmatch_t [], int)); +#endif +#ifndef __REG_NOFRONT +int regexec _ANSI_ARGS_((regex_t *, __REG_CONST char *, size_t, regmatch_t [], int)); +#endif +#ifdef __REG_WIDE_T +int __REG_WIDE_EXEC _ANSI_ARGS_((regex_t *, __REG_CONST __REG_WIDE_T *, size_t, rm_detail_t *, size_t, regmatch_t [], int)); +#endif +re_void regfree _ANSI_ARGS_((regex_t *)); +extern size_t regerror _ANSI_ARGS_((int, __REG_CONST regex_t *, char *, size_t)); +/* automatically gathered by fwd; do not hand-edit */ +/* =====^!^===== end forwards =====^!^===== */ + + + +/* + * more C++ voodoo + */ +#ifdef __cplusplus +} +#endif + + + +#endif diff --git a/generic/exec.c b/generic/regexec.c index 92439aa..4220062 100644 --- a/generic/exec.c +++ b/generic/regexec.c @@ -1,43 +1,9 @@ /* - * exec.c -- - * - * Regexp package file: re_*exec and friends - match REs - * - * Copyright (c) 1998 Henry Spencer. All rights reserved. - * - * Development of this software was funded, in part, by Cray Research Inc., - * UUNET Communications Services Inc., and Sun Microsystems Inc., none of - * whom are responsible for the results. The author thanks all of them. - * - * Redistribution and use in source and binary forms -- with or without - * modification -- are permitted for any purpose, provided that - * redistributions in source form retain this entire copyright notice and - * indicate the origin and nature of any modifications. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, - * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY - * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL - * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; - * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR - * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF - * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Copyright (c) 1998 by Sun Microsystems, Inc. - * - * See the file "license.terms" for information on usage and redistribution - * of this file, and for a DISCLAIMER OF ALL WARRANTIES. - * - * RCS: @(#) $Id: exec.c,v 1.1.2.2 1998/10/05 17:38:26 stanton Exp $ + * re_*exec and friends - match REs */ -#include "tclInt.h" -#include <assert.h> -#include "tclRegexp.h" -#include "chr.h" -#include "guts.h" +#include "regguts.h" + /* internal variables, bundled for easy passing around */ @@ -75,6 +41,7 @@ struct sset { /* state set */ int flags; # define STARTER 01 /* the initial state set */ # define POSTSTATE 02 /* includes the goal state */ +# define LOCKED 04 /* locked in cache */ struct arcp ins; /* chain of inarcs pointing here */ chr *lastseen; /* last entered on arrival here */ struct sset **outs; /* outarc vector indexed by color */ @@ -95,6 +62,7 @@ struct dfa { struct cnfa *cnfa; struct colormap *cm; chr *lastpost; /* location of last cache-flushed success */ + struct sset *search; /* replacement-search-pointer memory */ }; #define CACHE 200 @@ -107,8 +75,8 @@ struct dfa { */ /* =====^!^===== begin forwards =====^!^===== */ /* automatically gathered by fwd; do not hand-edit */ -/* === exec.c === */ -int exec _ANSI_ARGS_((regex_t *, CONST chr *, size_t, size_t, regmatch_t [], int)); +/* === regexec.c === */ +int exec _ANSI_ARGS_((regex_t *, CONST chr *, size_t, rm_detail_t *, size_t, regmatch_t [], int)); static int find _ANSI_ARGS_((struct vars *, struct cnfa *, struct colormap *)); static int cfind _ANSI_ARGS_((struct vars *, struct cnfa *, struct colormap *)); static VOID zapmatches _ANSI_ARGS_((regmatch_t *, size_t)); @@ -130,13 +98,10 @@ static struct dfa *newdfa _ANSI_ARGS_((struct vars *, struct cnfa *, struct colo static VOID freedfa _ANSI_ARGS_((struct dfa *)); static unsigned hash _ANSI_ARGS_((unsigned *, int)); static struct sset *initialize _ANSI_ARGS_((struct vars *, struct dfa *, chr *)); -static struct sset *miss _ANSI_ARGS_((struct vars *, struct dfa *, struct sset *, pcolor, chr *)); +static struct sset *miss _ANSI_ARGS_((struct vars *, struct dfa *, struct sset *, pcolor, chr *, chr *)); static int lacon _ANSI_ARGS_((struct vars *, struct cnfa *, chr *, pcolor)); -static struct sset *getvacant _ANSI_ARGS_((struct vars *, struct dfa *)); -static struct sset *pickss _ANSI_ARGS_((struct vars *, struct dfa *)); -/* === color.c === */ -union tree; -static color getcolor _ANSI_ARGS_((struct colormap *, pchr)); +static struct sset *getvacant _ANSI_ARGS_((struct vars *, struct dfa *, chr *, chr *)); +static struct sset *pickss _ANSI_ARGS_((struct vars *, struct dfa *, chr *, chr *)); /* automatically gathered by fwd; do not hand-edit */ /* =====^!^===== end forwards =====^!^===== */ @@ -144,13 +109,15 @@ static color getcolor _ANSI_ARGS_((struct colormap *, pchr)); /* - exec - match regular expression - ^ int exec(regex_t *, CONST chr *, size_t, size_t, regmatch_t [], int); + ^ int exec(regex_t *, CONST chr *, size_t, rm_detail_t *, + ^ size_t, regmatch_t [], int); */ int -exec(re, string, len, nmatch, pmatch, flags) +exec(re, string, len, details, nmatch, pmatch, flags) regex_t *re; CONST chr *string; size_t len; +rm_detail_t *details; /* hook for future elaboration */ size_t nmatch; regmatch_t pmatch[]; int flags; @@ -177,9 +144,9 @@ int flags; if (v->g->cflags®_NOSUB) nmatch = 0; /* override client */ v->nmatch = nmatch; - if (complications && v->nmatch < (size_t)(v->g->nsub + 1)) { + if (complications && v->nmatch < v->g->nsub + 1) { /* need work area bigger than what user gave us */ - v->pmatch = (regmatch_t *)ckalloc((v->g->nsub + 1) * + v->pmatch = (regmatch_t *)MALLOC((v->g->nsub + 1) * sizeof(regmatch_t)); if (v->pmatch == NULL) return REG_ESPACE; @@ -190,10 +157,10 @@ int flags; v->stop = (chr *)string + len; v->err = 0; if (complications) { - v->mem1 = (regoff_t *)ckalloc(2*v->g->ntree*sizeof(regoff_t)); + v->mem1 = (regoff_t *)MALLOC(2*v->g->ntree*sizeof(regoff_t)); if (v->mem1 == NULL) { if (v->pmatch != pmatch) - ckfree((char *)v->pmatch); + FREE(v->pmatch); return REG_ESPACE; } v->mem2 = v->mem1 + v->g->ntree; @@ -208,12 +175,12 @@ int flags; if (st == REG_OKAY && v->pmatch != pmatch && nmatch > 0) { zapmatches(pmatch, nmatch); n = (nmatch < v->nmatch) ? nmatch : v->nmatch; - memcpy((VOID *)pmatch, (VOID *)v->pmatch, n*sizeof(regmatch_t)); + memcpy(VS(pmatch), VS(v->pmatch), n*sizeof(regmatch_t)); } if (v->pmatch != pmatch) - ckfree((char *)v->pmatch); + FREE(v->pmatch); if (v->mem1 != NULL) - ckfree((char *)v->mem1); + FREE(v->mem1); return st; } @@ -230,15 +197,14 @@ struct colormap *cm; struct dfa *d = newdfa(v, cnfa, cm); chr *begin; chr *end; - chr *stop = (cnfa->leftanch) ? v->start : v->stop; + chr *stop = (cnfa->flags&LEFTANCH) ? v->start : v->stop; if (d == NULL) return v->err; for (begin = v->start; begin <= stop; begin++) { - if (v->eflags®_MTRACE) - printf("\ntrying at %ld\n", (long)OFF(begin)); - end = longest(v, d, begin, v->stop); + MDEBUG(("\ntrying at %ld\n", (long)OFF(begin))); + end = longest(v, d, begin, v->stop); if (end != NULL) { if (v->nmatch > 0) { v->pmatch[0].rm_so = OFF(begin); @@ -249,11 +215,15 @@ struct colormap *cm; zapmatches(v->pmatch, v->nmatch); return dissect(v, v->g->tree, begin, end); } + if (ISERR()) + return v->err; return REG_OKAY; } } freedfa(d); + if (ISERR()) + return v->err; return REG_NOMATCH; } @@ -270,7 +240,7 @@ struct colormap *cm; struct dfa *d = newdfa(v, cnfa, cm); chr *begin; chr *end; - chr *stop = (cnfa->leftanch) ? v->start : v->stop; + chr *stop = (cnfa->flags&LEFTANCH) ? v->start : v->stop; chr *estop; int er; int usedis = (v->g->tree == NULL || v->g->tree->op == '|') ? 0 : 1; @@ -281,12 +251,11 @@ struct colormap *cm; if (!v->g->usedshorter) usedis = 0; for (begin = v->start; begin <= stop; begin++) { - if (v->eflags®_MTRACE) - printf("\ntrying at %ld\n", (long)OFF(begin)); + MDEBUG(("\ntrying at %ld\n", (long)OFF(begin))); if (usedis) { v->mem = v->mem1; zapmem(v, v->g->tree); - } + } estop = v->stop; for (;;) { if (usedis) { @@ -296,8 +265,7 @@ struct colormap *cm; end = longest(v, d, begin, estop); if (end == NULL) break; /* NOTE BREAK OUT */ - if (v->eflags®_MTRACE) - printf("tentative end %ld\n", (long)OFF(end)); + MDEBUG(("tentative end %ld\n", (long)OFF(end))); zapmatches(v->pmatch, v->nmatch); v->mem = v->mem2; zapmem(v, v->g->tree); @@ -309,7 +277,10 @@ struct colormap *cm; v->pmatch[0].rm_eo = OFF(end); } freedfa(d); + if (ISERR()) + return v->err; return REG_OKAY; + break; case REG_NOMATCH: /* go around and try again */ if (!usedis) { @@ -324,11 +295,14 @@ struct colormap *cm; default: freedfa(d); return er; + break; } } } freedfa(d); + if (ISERR()) + return v->err; return REG_NOMATCH; } @@ -343,7 +317,7 @@ size_t n; { size_t i; - for (i = 1; i < n; i++) { + for (i = n-1; i > 0; i--) { p[i].rm_so = -1; p[i].rm_eo = -1; } @@ -399,8 +373,7 @@ chr *end; if ((size_t)n >= v->nmatch) return; - if (v->eflags®_MTRACE) - printf("setting %d\n", n); + MDEBUG(("setting %d\n", n)); v->pmatch[n].rm_so = OFF(begin); v->pmatch[n].rm_eo = OFF(end); } @@ -423,8 +396,7 @@ chr *end; /* end of same */ if (rt == NULL) return REG_OKAY; - if (v->eflags®_MTRACE) - printf("substring %ld-%ld\n", (long)OFF(begin), (long)OFF(end)); + MDEBUG(("substring %ld-%ld\n", (long)OFF(begin), (long)OFF(end))); /* alternatives -- punt to auxiliary */ if (rt->op == '|') @@ -439,8 +411,7 @@ chr *end; /* end of same */ /* in some cases, there may be no right side... */ if (rt->right.cnfa.nstates == 0) { - if (v->eflags®_MTRACE) - printf("singleton\n"); + MDEBUG(("singleton\n")); if (longest(v, d, begin, end) != end) { freedfa(d); return REG_ASSERT; @@ -466,16 +437,14 @@ chr *end; /* end of same */ freedfa(d2); return REG_ASSERT; } - if (v->eflags®_MTRACE) - printf("tentative midpoint %ld\n", (long)OFF(mid)); + MDEBUG(("tentative midpoint %ld\n", (long)OFF(mid))); /* iterate until satisfaction or failure */ while (longest(v, d2, mid, end) != end) { /* that midpoint didn't work, find a new one */ if (mid == begin) { /* all possibilities exhausted! */ - if (v->eflags®_MTRACE) - printf("no midpoint!\n"); + MDEBUG(("no midpoint!\n")); freedfa(d); freedfa(d2); return REG_ASSERT; @@ -483,19 +452,16 @@ chr *end; /* end of same */ mid = longest(v, d, begin, mid-1); if (mid == NULL) { /* failed to find a new one! */ - if (v->eflags®_MTRACE) - printf("failed midpoint!\n"); + MDEBUG(("failed midpoint!\n")); freedfa(d); freedfa(d2); return REG_ASSERT; } - if (v->eflags®_MTRACE) - printf("new midpoint %ld\n", (long)OFF(mid)); + MDEBUG(("new midpoint %ld\n", (long)OFF(mid))); } /* satisfaction */ - if (v->eflags®_MTRACE) - printf("successful\n"); + MDEBUG(("successful\n")); freedfa(d); freedfa(d2); assert(rt->left.subno >= 0); @@ -526,15 +492,13 @@ chr *end; /* end of same */ assert(rt->op == '|'); for (i = 0; rt != NULL; rt = rt->next, i++) { - if (v->eflags®_MTRACE) - printf("trying %dth\n", i); + MDEBUG(("trying %dth\n", i)); assert(rt->left.begin != NULL); d = newdfa(v, &rt->left.cnfa, v->g->cm); if (ISERR()) return v->err; if (longest(v, d, begin, end) == end) { - if (v->eflags®_MTRACE) - printf("success\n"); + MDEBUG(("success\n")); freedfa(d); assert(rt->left.subno >= 0); subset(v, &rt->left, begin, end); @@ -565,8 +529,7 @@ chr *end; /* end of same */ if (rt == NULL) return REG_OKAY; - if (v->eflags®_MTRACE) - printf("csubstr %ld-%ld\n", (long)OFF(begin), (long)OFF(end)); + MDEBUG(("csubstr %ld-%ld\n", (long)OFF(begin), (long)OFF(end))); /* punt various cases to auxiliaries */ if (rt->op == '|') /* alternatives */ @@ -590,8 +553,7 @@ chr *end; /* end of same */ freedfa(d); return v->err; } - if (v->eflags®_MTRACE) - printf("cconcat %d\n", rt->no); + MDEBUG(("cconcat %d\n", rt->no)); /* pick a tentative midpoint */ if (v->mem[rt->no] == 0) { @@ -601,14 +563,12 @@ chr *end; /* end of same */ freedfa(d2); return REG_NOMATCH; } - if (v->eflags®_MTRACE) - printf("tentative midpoint %ld\n", (long)OFF(mid)); + MDEBUG(("tentative midpoint %ld\n", (long)OFF(mid))); subset(v, &rt->left, begin, mid); v->mem[rt->no] = (mid - begin) + 1; } else { mid = begin + (v->mem[rt->no] - 1); - if (v->eflags®_MTRACE) - printf("working midpoint %ld\n", (long)OFF(mid)); + MDEBUG(("working midpoint %ld\n", (long)OFF(mid))); } /* iterate until satisfaction or failure */ @@ -628,8 +588,7 @@ chr *end; /* end of same */ /* that midpoint didn't work, find a new one */ if (mid == begin) { /* all possibilities exhausted */ - if (v->eflags®_MTRACE) - printf("%d no midpoint\n", rt->no); + MDEBUG(("%d no midpoint\n", rt->no)); freedfa(d); freedfa(d2); return REG_NOMATCH; @@ -637,15 +596,12 @@ chr *end; /* end of same */ mid = longest(v, d, begin, mid-1); if (mid == NULL) { /* failed to find a new one */ - if (v->eflags®_MTRACE) - printf("%d failed midpoint\n", rt->no); + MDEBUG(("%d failed midpoint\n", rt->no)); freedfa(d); freedfa(d2); return REG_NOMATCH; } - if (v->eflags®_MTRACE) - printf("%d: new midpoint %ld\n", rt->no, - (long)OFF(mid)); + MDEBUG(("%d: new midpoint %ld\n", rt->no, (long)OFF(mid))); subset(v, &rt->left, begin, mid); v->mem[rt->no] = (mid - begin) + 1; zapmem(v, rt->left.tree); @@ -653,8 +609,7 @@ chr *end; /* end of same */ } /* satisfaction */ - if (v->eflags®_MTRACE) - printf("successful\n"); + MDEBUG(("successful\n")); freedfa(d); freedfa(d2); subset(v, &rt->right, mid, end); @@ -694,8 +649,7 @@ chr *end; /* end of same */ freedfa(d); return v->err; } - if (v->eflags®_MTRACE) - printf("crev %d\n", rt->no); + MDEBUG(("crev %d\n", rt->no)); /* pick a tentative midpoint */ if (v->mem[rt->no] == 0) { @@ -705,14 +659,12 @@ chr *end; /* end of same */ freedfa(d2); return REG_NOMATCH; } - if (v->eflags®_MTRACE) - printf("tentative midpoint %ld\n", (long)OFF(mid)); + MDEBUG(("tentative midpoint %ld\n", (long)OFF(mid))); subset(v, &rt->left, begin, mid); v->mem[rt->no] = (mid - begin) + 1; } else { mid = begin + (v->mem[rt->no] - 1); - if (v->eflags®_MTRACE) - printf("working midpoint %ld\n", (long)OFF(mid)); + MDEBUG(("working midpoint %ld\n", (long)OFF(mid))); } /* iterate until satisfaction or failure */ @@ -732,8 +684,7 @@ chr *end; /* end of same */ /* that midpoint didn't work, find a new one */ if (mid == end) { /* all possibilities exhausted */ - if (v->eflags®_MTRACE) - printf("%d no midpoint\n", rt->no); + MDEBUG(("%d no midpoint\n", rt->no)); freedfa(d); freedfa(d2); return REG_NOMATCH; @@ -741,15 +692,12 @@ chr *end; /* end of same */ mid = shortest(v, d, begin, mid+1, end); if (mid == NULL) { /* failed to find a new one */ - if (v->eflags®_MTRACE) - printf("%d failed midpoint\n", rt->no); + MDEBUG(("%d failed midpoint\n", rt->no)); freedfa(d); freedfa(d2); return REG_NOMATCH; } - if (v->eflags®_MTRACE) - printf("%d: new midpoint %ld\n", rt->no, - (long)OFF(mid)); + MDEBUG(("%d: new midpoint %ld\n", rt->no, (long)OFF(mid))); subset(v, &rt->left, begin, mid); v->mem[rt->no] = (mid - begin) + 1; zapmem(v, rt->left.tree); @@ -757,8 +705,7 @@ chr *end; /* end of same */ } /* satisfaction */ - if (v->eflags®_MTRACE) - printf("successful\n"); + MDEBUG(("successful\n")); freedfa(d); freedfa(d2); subset(v, &rt->right, mid, end); @@ -782,8 +729,7 @@ chr *end; /* end of same */ assert(rt != NULL); assert(rt->op == ','); assert(rt->right.cnfa.nstates == 0); - if (v->eflags®_MTRACE) - printf("csingleton %d\n", rt->no); + MDEBUG(("csingleton %d\n", rt->no)); assert(rt->left.cnfa.nstates > 0); @@ -796,8 +742,7 @@ chr *end; /* end of same */ } freedfa(d); v->mem[rt->no] = 1; - if (v->eflags®_MTRACE) - printf("csingleton matched\n"); + MDEBUG(("csingleton matched\n")); } er = cdissect(v, rt->left.tree, begin, end); @@ -830,10 +775,10 @@ chr *end; /* end of same */ assert(rt != NULL); assert(rt->op == 'b'); assert(rt->right.cnfa.nstates == 0); + assert(n >= 0); assert((size_t)n < v->nmatch); - if (v->eflags®_MTRACE) - printf("cbackref n%d %d{%d-%d}\n", rt->no, n, min, max); + MDEBUG(("cbackref n%d %d{%d-%d}\n", rt->no, n, min, max)); if (v->pmatch[n].rm_so == -1) return REG_NOMATCH; @@ -853,6 +798,7 @@ chr *end; /* end of same */ } /* and too-short string */ + assert(end >= begin); if ((size_t)(end - begin) < len) return REG_NOMATCH; stop = end - len; @@ -864,8 +810,7 @@ chr *end; /* end of same */ break; i++; } - if (v->eflags®_MTRACE) - printf("cbackref found %d\n", i); + MDEBUG(("cbackref found %d\n", i)); /* and sort it out */ if (p != end) /* didn't consume all of it */ @@ -898,8 +843,7 @@ chr *end; /* end of same */ if (v->mem[rt->no] == TRIED) return caltdissect(v, rt->next, begin, end); - if (v->eflags®_MTRACE) - printf("calt n%d\n", rt->no); + MDEBUG(("calt n%d\n", rt->no)); assert(rt->left.begin != NULL); if (v->mem[rt->no] == UNTRIED) { @@ -912,8 +856,7 @@ chr *end; /* end of same */ return caltdissect(v, rt->next, begin, end); } freedfa(d); - if (v->eflags®_MTRACE) - printf("calt matched\n"); + MDEBUG(("calt matched\n")); v->mem[rt->no] = TRYING; } @@ -949,8 +892,7 @@ chr *end; /* end of same */ if (rt == NULL) return begin; - if (v->eflags®_MTRACE) - printf("dsubstr %ld-%ld\n", (long)OFF(begin), (long)OFF(end)); + MDEBUG(("dsubstr %ld-%ld\n", (long)OFF(begin), (long)OFF(end))); /* punt various cases to auxiliaries */ if (rt->right.cnfa.nstates == 0) /* no RHS */ @@ -970,8 +912,7 @@ chr *end; /* end of same */ freedfa(d); return NULL; } - if (v->eflags®_MTRACE) - printf("dconcat %d\n", rt->no); + MDEBUG(("dconcat %d\n", rt->no)); /* pick a tentative midpoint */ if (v->mem[rt->no] == 0) { @@ -981,13 +922,11 @@ chr *end; /* end of same */ freedfa(d2); return NULL; } - if (v->eflags®_MTRACE) - printf("tentative midpoint %ld\n", (long)OFF(mid)); + MDEBUG(("tentative midpoint %ld\n", (long)OFF(mid))); v->mem[rt->no] = (mid - begin) + 1; } else { mid = begin + (v->mem[rt->no] - 1); - if (v->eflags®_MTRACE) - printf("working midpoint %ld\n", (long)OFF(mid)); + MDEBUG(("working midpoint %ld\n", (long)OFF(mid))); } /* iterate until satisfaction or failure */ @@ -1010,8 +949,7 @@ chr *end; /* end of same */ /* that midpoint didn't work, find a new one */ if (mid == begin) { /* all possibilities exhausted */ - if (v->eflags®_MTRACE) - printf("%d no midpoint\n", rt->no); + MDEBUG(("%d no midpoint\n", rt->no)); freedfa(d); freedfa(d2); return NULL; @@ -1019,22 +957,18 @@ chr *end; /* end of same */ mid = longest(v, d, begin, mid-1); if (mid == NULL) { /* failed to find a new one */ - if (v->eflags®_MTRACE) - printf("%d failed midpoint\n", rt->no); + MDEBUG(("%d failed midpoint\n", rt->no)); freedfa(d); freedfa(d2); return NULL; } - if (v->eflags®_MTRACE) - printf("%d: new midpoint %ld\n", rt->no, - (long)OFF(mid)); + MDEBUG(("%d: new midpoint %ld\n", rt->no, (long)OFF(mid))); v->mem[rt->no] = (mid - begin) + 1; zapmem(v, rt->right.tree); } /* satisfaction */ - if (v->eflags®_MTRACE) - printf("successful\n"); + MDEBUG(("successful\n")); freedfa(d); freedfa(d2); return ret; @@ -1060,8 +994,7 @@ chr *end; /* end of same */ if (rt == NULL) return begin; - if (v->eflags®_MTRACE) - printf("rsubstr %ld-%ld\n", (long)OFF(begin), (long)OFF(end)); + MDEBUG(("rsubstr %ld-%ld\n", (long)OFF(begin), (long)OFF(end))); /* concatenation -- need to split the substring between parts */ assert(rt->op == ','); @@ -1075,8 +1008,7 @@ chr *end; /* end of same */ freedfa(d); return NULL; } - if (v->eflags®_MTRACE) - printf("dconcat %d\n", rt->no); + MDEBUG(("dconcat %d\n", rt->no)); /* pick a tentative midpoint */ if (v->mem[rt->no] == 0) { @@ -1086,13 +1018,11 @@ chr *end; /* end of same */ freedfa(d2); return NULL; } - if (v->eflags®_MTRACE) - printf("tentative midpoint %ld\n", (long)OFF(mid)); + MDEBUG(("tentative midpoint %ld\n", (long)OFF(mid))); v->mem[rt->no] = (mid - begin) + 1; } else { mid = begin + (v->mem[rt->no] - 1); - if (v->eflags®_MTRACE) - printf("working midpoint %ld\n", (long)OFF(mid)); + MDEBUG(("working midpoint %ld\n", (long)OFF(mid))); } /* iterate until satisfaction or failure */ @@ -1115,8 +1045,7 @@ chr *end; /* end of same */ /* that midpoint didn't work, find a new one */ if (mid == end) { /* all possibilities exhausted */ - if (v->eflags®_MTRACE) - printf("%d no midpoint\n", rt->no); + MDEBUG(("%d no midpoint\n", rt->no)); freedfa(d); freedfa(d2); return NULL; @@ -1124,22 +1053,18 @@ chr *end; /* end of same */ mid = shortest(v, d, begin, mid+1, end); if (mid == NULL) { /* failed to find a new one */ - if (v->eflags®_MTRACE) - printf("%d failed midpoint\n", rt->no); + MDEBUG(("%d failed midpoint\n", rt->no)); freedfa(d); freedfa(d2); return NULL; } - if (v->eflags®_MTRACE) - printf("%d: new midpoint %ld\n", rt->no, - (long)OFF(mid)); + MDEBUG(("%d: new midpoint %ld\n", rt->no, (long)OFF(mid))); v->mem[rt->no] = (mid - begin) + 1; zapmem(v, rt->right.tree); } /* satisfaction */ - if (v->eflags®_MTRACE) - printf("successful\n"); + MDEBUG(("successful\n")); freedfa(d); freedfa(d2); return ret; @@ -1162,8 +1087,7 @@ chr *end; /* end of same */ assert(rt != NULL); assert(rt->op == ','); assert(rt->right.cnfa.nstates == 0); - if (v->eflags®_MTRACE) - printf("dsingleton %d\n", rt->no); + MDEBUG(("dsingleton %d\n", rt->no)); assert(rt->left.cnfa.nstates > 0); @@ -1180,8 +1104,8 @@ chr *end; /* end of same */ else ret = shortest(v, d, begin, begin, end); freedfa(d); - if (ret != NULL && (v->eflags®_MTRACE)) - printf("dsingleton matched\n"); + if (ret != NULL) + MDEBUG(("dsingleton matched\n")); return ret; } @@ -1210,18 +1134,15 @@ chr *stop; /* match must end at or before here */ cp = start; /* startup */ - if (v->eflags®_FTRACE) - printf("+++ startup +++\n"); + FDEBUG(("+++ startup +++\n")); if (cp == v->start) { co = d->cnfa->bos[(v->eflags®_NOTBOL) ? 0 : 1]; - if (v->eflags®_FTRACE) - printf("color %ld\n", (long)co); + FDEBUG(("color %ld\n", (long)co)); } else { - co = getcolor(cm, *(cp - 1)); - if (v->eflags®_FTRACE) - printf("char %c, color %ld\n", (char)*(cp-1), (long)co); + co = GETCOLOR(cm, *(cp - 1)); + FDEBUG(("char %c, color %ld\n", (char)*(cp-1), (long)co)); } - css = miss(v, d, css, co, cp); + css = miss(v, d, css, co, cp, start); if (css == NULL) return NULL; css->lastseen = cp; @@ -1229,12 +1150,12 @@ chr *stop; /* match must end at or before here */ /* main loop */ if (v->eflags®_FTRACE) while (cp < realstop) { - printf("+++ at c%d +++\n", css - d->ssets); - co = getcolor(cm, *cp); - printf("char %c, color %ld\n", (char)*cp, (long)co); + FDEBUG(("+++ at c%d +++\n", css - d->ssets)); + co = GETCOLOR(cm, *cp); + FDEBUG(("char %c, color %ld\n", (char)*cp, (long)co)); ss = css->outs[co]; if (ss == NULL) { - ss = miss(v, d, css, co, cp); + ss = miss(v, d, css, co, cp+1, start); if (ss == NULL) break; /* NOTE BREAK OUT */ } @@ -1244,10 +1165,10 @@ chr *stop; /* match must end at or before here */ } else while (cp < realstop) { - co = getcolor(cm, *cp); + co = GETCOLOR(cm, *cp); ss = css->outs[co]; if (ss == NULL) { - ss = miss(v, d, css, co, cp+1); + ss = miss(v, d, css, co, cp+1, start); if (ss == NULL) break; /* NOTE BREAK OUT */ } @@ -1257,13 +1178,11 @@ chr *stop; /* match must end at or before here */ } /* shutdown */ - if (v->eflags®_FTRACE) - printf("+++ shutdown at c%d +++\n", css - d->ssets); + FDEBUG(("+++ shutdown at c%d +++\n", css - d->ssets)); if (cp == v->stop && stop == v->stop) { co = d->cnfa->eos[(v->eflags®_NOTEOL) ? 0 : 1]; - if (v->eflags®_FTRACE) - printf("color %ld\n", (long)co); - ss = miss(v, d, css, co, cp); + FDEBUG(("color %ld\n", (long)co)); + ss = miss(v, d, css, co, cp, start); /* special case: match ended at eol? */ if (ss != NULL && (ss->flags&POSTSTATE)) return cp; @@ -1300,7 +1219,7 @@ chr *max; /* match must end at or before here */ chr *realmax = (max == v->stop) ? max : max + 1; color co; struct sset *css; - struct sset *ss = NULL; + struct sset *ss; struct colormap *cm = d->cm; /* initialize */ @@ -1308,31 +1227,29 @@ chr *max; /* match must end at or before here */ cp = start; /* startup */ - if (v->eflags®_FTRACE) - printf("--- startup ---\n"); + FDEBUG(("--- startup ---\n")); if (cp == v->start) { co = d->cnfa->bos[(v->eflags®_NOTBOL) ? 0 : 1]; - if (v->eflags®_FTRACE) - printf("color %ld\n", (long)co); + FDEBUG(("color %ld\n", (long)co)); } else { - co = getcolor(cm, *(cp - 1)); - if (v->eflags®_FTRACE) - printf("char %c, color %ld\n", (char)*(cp-1), (long)co); + co = GETCOLOR(cm, *(cp - 1)); + FDEBUG(("char %c, color %ld\n", (char)*(cp-1), (long)co)); } - css = miss(v, d, css, co, cp); + css = miss(v, d, css, co, cp, start); if (css == NULL) return NULL; css->lastseen = cp; + ss = css; /* main loop */ if (v->eflags®_FTRACE) while (cp < realmax) { - printf("--- at c%d ---\n", css - d->ssets); - co = getcolor(cm, *cp); - printf("char %c, color %ld\n", (char)*cp, (long)co); + FDEBUG(("--- at c%d ---\n", css - d->ssets)); + co = GETCOLOR(cm, *cp); + FDEBUG(("char %c, color %ld\n", (char)*cp, (long)co)); ss = css->outs[co]; if (ss == NULL) { - ss = miss(v, d, css, co, cp); + ss = miss(v, d, css, co, cp+1, start); if (ss == NULL) break; /* NOTE BREAK OUT */ } @@ -1344,10 +1261,10 @@ chr *max; /* match must end at or before here */ } else while (cp < realmax) { - co = getcolor(cm, *cp); + co = GETCOLOR(cm, *cp); ss = css->outs[co]; if (ss == NULL) { - ss = miss(v, d, css, co, cp+1); + ss = miss(v, d, css, co, cp+1, start); if (ss == NULL) break; /* NOTE BREAK OUT */ } @@ -1366,13 +1283,11 @@ chr *max; /* match must end at or before here */ } /* shutdown */ - if (v->eflags®_FTRACE) - printf("--- shutdown at c%d ---\n", css - d->ssets); + FDEBUG(("--- shutdown at c%d ---\n", css - d->ssets)); if (cp == v->stop && max == v->stop) { co = d->cnfa->eos[(v->eflags®_NOTEOL) ? 0 : 1]; - if (v->eflags®_FTRACE) - printf("color %ld\n", (long)co); - ss = miss(v, d, css, co, cp); + FDEBUG(("color %ld\n", (long)co)); + ss = miss(v, d, css, co, cp, start); /* special case: match ended at eol? */ if (ss != NULL && (ss->flags&POSTSTATE)) return cp; @@ -1392,7 +1307,7 @@ struct vars *v; struct cnfa *cnfa; struct colormap *cm; { - struct dfa *d = (struct dfa *)ckalloc(sizeof(struct dfa)); + struct dfa *d = (struct dfa *)MALLOC(sizeof(struct dfa)); int wordsper = (cnfa->nstates + UBITS - 1) / UBITS; struct sset *ss; int i; @@ -1403,13 +1318,13 @@ struct colormap *cm; return NULL; } - d->ssets = (struct sset *)ckalloc(CACHE * sizeof(struct sset)); - d->statesarea = (unsigned *)ckalloc((CACHE+WORK) * wordsper * + d->ssets = (struct sset *)MALLOC(CACHE * sizeof(struct sset)); + d->statesarea = (unsigned *)MALLOC((CACHE+WORK) * wordsper * sizeof(unsigned)); d->work = &d->statesarea[CACHE * wordsper]; - d->outsarea = (struct sset **)ckalloc(CACHE * cnfa->ncolors * + d->outsarea = (struct sset **)MALLOC(CACHE * cnfa->ncolors * sizeof(struct sset *)); - d->incarea = (struct arcp *)ckalloc(CACHE * cnfa->ncolors * + d->incarea = (struct arcp *)MALLOC(CACHE * cnfa->ncolors * sizeof(struct arcp)); if (d->ssets == NULL || d->statesarea == NULL || d->outsarea == NULL || d->incarea == NULL) { @@ -1426,6 +1341,7 @@ struct colormap *cm; d->cnfa = cnfa; d->cm = cm; d->lastpost = NULL; + d->search = d->ssets; for (ss = d->ssets, i = 0; i < d->nssets; ss++, i++) { /* initialization of most fields is done as needed */ @@ -1446,14 +1362,14 @@ freedfa(d) struct dfa *d; { if (d->ssets != NULL) - ckfree((char *)d->ssets); + FREE(d->ssets); if (d->statesarea != NULL) - ckfree((char *)d->statesarea); + FREE(d->statesarea); if (d->outsarea != NULL) - ckfree((char *)d->outsarea); + FREE(d->outsarea); if (d->incarea != NULL) - ckfree((char *)d->incarea); - ckfree((char *)d); + FREE(d->incarea); + FREE(d); } /* @@ -1492,7 +1408,7 @@ chr *start; if (d->nssused > 0 && (d->ssets[0].flags&STARTER)) ss = &d->ssets[0]; else { /* no, must (re)build it */ - ss = getvacant(v, d); + ss = getvacant(v, d, start, start); for (i = 0; i < d->wordsper; i++) ss->states[i] = 0; BSET(ss->states, d->cnfa->pre); @@ -1512,15 +1428,16 @@ chr *start; /* - miss - handle a cache miss ^ static struct sset *miss(struct vars *, struct dfa *, struct sset *, - ^ pcolor, chr *); + ^ pcolor, chr *, chr *); */ static struct sset * /* NULL if goes to empty set */ -miss(v, d, css, co, cp) +miss(v, d, css, co, cp, start) struct vars *v; /* used only for debug flags */ struct dfa *d; struct sset *css; pcolor co; chr *cp; /* next chr */ +chr *start; /* where the attempt got started */ { struct cnfa *cnfa = d->cnfa; int i; @@ -1534,12 +1451,10 @@ chr *cp; /* next chr */ /* for convenience, we can be called even if it might not be a miss */ if (css->outs[co] != NULL) { - if (v->eflags®_FTRACE) - printf("hit\n"); + FDEBUG(("hit\n")); return css->outs[co]; } - if (v->eflags®_FTRACE) - printf("miss\n"); + FDEBUG(("miss\n")); /* first, what set of states would we end up in? */ for (i = 0; i < d->wordsper; i++) @@ -1554,10 +1469,9 @@ chr *cp; /* next chr */ gotstate = 1; if (ca->to == cnfa->post) ispost = 1; - if (v->eflags®_FTRACE) - printf("%d -> %d\n", i, ca->to); + FDEBUG(("%d -> %d\n", i, ca->to)); } - dolacons = (gotstate) ? cnfa->haslacons : 0; + dolacons = (gotstate) ? (cnfa->flags&HASLACONS) : 0; didlacons = 0; while (dolacons) { /* transitive closure */ dolacons = 0; @@ -1574,9 +1488,7 @@ chr *cp; /* next chr */ didlacons = 1; if (ca->to == cnfa->post) ispost = 1; - if (v->eflags®_FTRACE) - printf("%d :-> %d\n", - i, ca->to); + FDEBUG(("%d :> %d\n",i,ca->to)); } } if (!gotstate) @@ -1585,14 +1497,13 @@ chr *cp; /* next chr */ /* next, is that in the cache? */ for (p = d->ssets, i = d->nssused; i > 0; p++, i--) - if (p->hash == h && memcmp((VOID *)d->work, (VOID *)p->states, + if (p->hash == h && memcmp(VS(d->work), VS(p->states), d->wordsper*sizeof(unsigned)) == 0) { - if (v->eflags®_FTRACE) - printf("cached c%d\n", p - d->ssets); + FDEBUG(("cached c%d\n", p - d->ssets)); break; /* NOTE BREAK OUT */ } if (i == 0) { /* nope, need a new cache entry */ - p = getvacant(v, d); + p = getvacant(v, d, cp, start); assert(p != css); for (i = 0; i < d->wordsper; i++) p->states[i] = d->work[i]; @@ -1605,7 +1516,7 @@ chr *cp; /* next chr */ css->outs[co] = p; css->inchain[co] = p->ins; p->ins.ss = css; - p->ins.co = (color) co; + p->ins.co = (color)co; } return p; } @@ -1615,10 +1526,10 @@ chr *cp; /* next chr */ ^ static int lacon(struct vars *, struct cnfa *, chr *, pcolor); */ static int /* predicate: constraint satisfied? */ -lacon(v, pcnfa, precp, co) +lacon(v, pcnfa, cp, co) struct vars *v; struct cnfa *pcnfa; /* parent cnfa */ -chr *precp; /* points to previous chr */ +chr *cp; pcolor co; /* "color" of the lookahead constraint */ { int n; @@ -1628,18 +1539,16 @@ pcolor co; /* "color" of the lookahead constraint */ n = co - pcnfa->ncolors; assert(n < v->g->nlacons && v->g->lacons != NULL); - if (v->eflags®_FTRACE) - printf("=== testing lacon %d\n", n); + FDEBUG(("=== testing lacon %d\n", n)); sub = &v->g->lacons[n]; d = newdfa(v, &sub->cnfa, v->g->cm); if (d == NULL) { ERR(REG_ESPACE); return 0; } - end = longest(v, d, precp, v->stop); + end = longest(v, d, cp, v->stop); freedfa(d); - if (v->eflags®_FTRACE) - printf("=== lacon %d match %d\n", n, (end != NULL)); + FDEBUG(("=== lacon %d match %d\n", n, (end != NULL))); return (sub->subno) ? (end != NULL) : (end == NULL); } @@ -1647,12 +1556,14 @@ pcolor co; /* "color" of the lookahead constraint */ - getvacant - get a vacant state set * This routine clears out the inarcs and outarcs, but does not otherwise * clear the innards of the state set -- that's up to the caller. - ^ static struct sset *getvacant(struct vars *, struct dfa *); + ^ static struct sset *getvacant(struct vars *, struct dfa *, chr *, chr *); */ static struct sset * -getvacant(v, d) +getvacant(v, d, cp, start) struct vars *v; /* used only for debug flags */ struct dfa *d; +chr *cp; +chr *start; { int i; struct sset *ss; @@ -1661,15 +1572,14 @@ struct dfa *d; struct arcp lastap; color co; - ss = pickss(v, d); + ss = pickss(v, d, cp, start); + assert(!(ss->flags&LOCKED)); /* clear out its inarcs, including self-referential ones */ ap = ss->ins; while ((p = ap.ss) != NULL) { co = ap.co; - if (v->eflags®_FTRACE) - printf("zapping c%d's %ld outarc\n", p - d->ssets, - (long)co); + FDEBUG(("zapping c%d's %ld outarc\n", p - d->ssets, (long)co)); p->outs[co] = NULL; ap = p->inchain[co]; p->inchain[co].ss = NULL; /* paranoia */ @@ -1682,9 +1592,7 @@ struct dfa *d; assert(p != ss); /* not self-referential */ if (p == NULL) continue; /* NOTE CONTINUE */ - if (v->eflags®_FTRACE) - printf("deleting outarc %d from c%d's inarc chain\n", - i, p - d->ssets); + FDEBUG(("del outarc %d from c%d's in chn\n", i, p - d->ssets)); if (p->ins.ss == ss && p->ins.co == i) p->ins = ss->inchain[i]; else { @@ -1710,23 +1618,25 @@ struct dfa *d; /* - pickss - pick the next stateset to be used - ^ static struct sset *pickss(struct vars *, struct dfa *); + ^ static struct sset *pickss(struct vars *, struct dfa *, chr *, chr *); */ static struct sset * -pickss(v, d) +pickss(v, d, cp, start) struct vars *v; /* used only for debug flags */ struct dfa *d; +chr *cp; +chr *start; { int i; struct sset *ss; - struct sset *oldest; + struct sset *end; + chr *ancient; /* shortcut for cases where cache isn't full */ if (d->nssused < d->nssets) { ss = &d->ssets[d->nssused]; d->nssused++; - if (v->eflags®_FTRACE) - printf("new c%d\n", ss - d->ssets); + FDEBUG(("new c%d\n", ss - d->ssets)); /* must make innards consistent */ ss->ins.ss = NULL; for (i = 0; i < d->ncolors; i++) { @@ -1734,21 +1644,32 @@ struct dfa *d; ss->inchain[i].ss = NULL; } ss->flags = 0; - ss->ins.co = 0; return ss; } - /* look for oldest */ - oldest = d->ssets; - for (ss = d->ssets, i = d->nssets; i > 0; ss++, i--) { - if (ss->lastseen != oldest->lastseen && (ss->lastseen == NULL || - ss->lastseen < oldest->lastseen)) - oldest = ss; - } - if (v->eflags®_FTRACE) - printf("replacing c%d\n", oldest - d->ssets); - return oldest; -} + /* look for oldest, or old enough anyway */ + if (cp - start > d->nssets*3/4) /* oldest 25% are expendable */ + ancient = cp - d->nssets*3/4; + else + ancient = start; + for (ss = d->search, end = &d->ssets[d->nssets]; ss < end; ss++) + if ((ss->lastseen == NULL || ss->lastseen < ancient) && + !(ss->flags&LOCKED)) { + d->search = ss + 1; + FDEBUG(("replacing c%d\n", ss - d->ssets)); + return ss; + } + for (ss = d->ssets, end = d->search; ss < end; ss++) + if ((ss->lastseen == NULL || ss->lastseen < ancient) && + !(ss->flags&LOCKED)) { + d->search = ss + 1; + FDEBUG(("replacing c%d\n", ss - d->ssets)); + return ss; + } -#define EXEC 1 -#include "color.c" + /* nobody's old enough?!? -- something's really wrong */ + FDEBUG(("can't find victim to replace!\n")); + assert(NOTREACHED); + ERR(REG_ASSERT); + return d->ssets; +} diff --git a/generic/regfree.c b/generic/regfree.c new file mode 100644 index 0000000..a5c3f0b --- /dev/null +++ b/generic/regfree.c @@ -0,0 +1,25 @@ +/* + * regfree - free an RE + * + * You might think that this could be incorporated into regcomp.c, and + * that would be a reasonable idea... except that this is a generic + * function (with a generic name), applicable to all compiled REs + * regardless of the size of their characters, whereas the stuff in + * regcomp.c gets compiled once per character size. + */ + +#include "regguts.h" + +/* + - regfree - free an RE (generic function, punts to RE-specific function) + * + * Ignoring invocation with NULL is a convenience. + */ +VOID +regfree(re) +regex_t *re; +{ + if (re == NULL) + return; + (*((struct fns *)re->re_fns)->free)(re); +} diff --git a/generic/regfronts.c b/generic/regfronts.c new file mode 100644 index 0000000..a9bd556 --- /dev/null +++ b/generic/regfronts.c @@ -0,0 +1,56 @@ +/* + * regcomp and regexec - front ends to re_ routines + * + * Mostly for implementation of backward-compatibility kludges. Note + * that these routines exist ONLY in char versions. + */ + +#include "regguts.h" + +/* + - regcomp - compile regular expression + */ +int +regcomp(re, str, flags) +regex_t *re; +CONST char *str; +int flags; +{ + size_t len; + int f = flags; + + if (f®_PEND) { + len = re->re_endp - str; + f &= ~REG_PEND; + } else + len = strlen(str); + + return re_comp(re, str, len, f); +} + +/* + - regexec - execute regular expression + */ +int +regexec(re, str, nmatch, pmatch, flags) +regex_t *re; +CONST char *str; +size_t nmatch; +regmatch_t pmatch[]; +int flags; +{ + CONST char *start; + size_t len; + int f = flags; + + if (f®_STARTEND) { + start = str + pmatch[0].rm_so; + len = pmatch[0].rm_eo - pmatch[0].rm_so; + f &= ~REG_STARTEND; + } else { + start = str; + len = strlen(str); + } + + return re_exec(re, start, len, nmatch, pmatch, f); +} diff --git a/generic/guts.h b/generic/regguts.h index 7b847ac..1490d44 100644 --- a/generic/guts.h +++ b/generic/regguts.h @@ -1,54 +1,119 @@ /* - * guts.h -- - * - * Regexp package file: Misc. utilities. - * - * Copyright (c) 1998 Henry Spencer. All rights reserved. - * - * Development of this software was funded, in part, by Cray Research Inc., - * UUNET Communications Services Inc., and Sun Microsystems Inc., none of - * whom are responsible for the results. The author thanks all of them. - * - * Redistribution and use in source and binary forms -- with or without - * modification -- are permitted for any purpose, provided that - * redistributions in source form retain this entire copyright notice and - * indicate the origin and nature of any modifications. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, - * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY - * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL - * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; - * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR - * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF - * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Copyright (c) 1998 by Sun Microsystems, Inc. - * - * See the file "license.terms" for information on usage and redistribution - * of this file, and for a DISCLAIMER OF ALL WARRANTIES. - * - * RCS: @(#) $Id: guts.h,v 1.1.2.2 1998/10/03 01:56:40 stanton Exp $ + * Internal interface definitions, etc., for the regex package */ -#include "tclInt.h" -#define NOTREACHED 0 -#define xxx 1 +/* + * Environmental customization. It should not (I hope) be necessary to + * alter the file you are now reading -- regcustom.h should handle it all, + * given care here and elsewhere. + */ +#include "regcustom.h" + + + +/* + * Things that regcustom.h might override. + */ + +/* standard header files (NULL is a reasonable indicator for them) */ +#ifndef NULL +#include <stdio.h> +#include <stdlib.h> +#include <ctype.h> +#include <limits.h> +#include <string.h> +#endif + +/* assertions */ +#ifndef assert +#include <assert.h> +#endif + +/* voids */ +#ifndef VOID +#define VOID void /* for function return values */ +#endif +#ifndef DISCARD +#define DISCARD VOID /* for throwing values away */ +#endif +#ifndef PVOID +#define PVOID VOID * /* generic pointer */ +#endif +#ifndef VS +#define VS(x) ((PVOID)(x)) /* cast something to generic ptr */ +#endif +#ifndef NOPARMS +#define NOPARMS VOID /* for empty parm lists */ +#endif + +/* function-pointer declarator */ +#ifndef FUNCPTR +#if __STDC__ >= 1 +#define FUNCPTR(name, args) (*name)args +#else +#define FUNCPTR(name, args) (*name)() +#endif +#endif + +/* memory allocation */ +#ifndef MALLOC +#define MALLOC(n) malloc(n) +#endif +#ifndef REALLOC +#define REALLOC(p, n) realloc(VS(p), n) +#endif +#ifndef FREE +#define FREE(p) free(VS(p)) +#endif + +/* want size of a char in bits, and max value in bounded quantifiers */ +#ifndef CHAR_BIT +#include <limits.h> +#endif #ifndef _POSIX2_RE_DUP_MAX -#define _POSIX2_RE_DUP_MAX 255 +#define _POSIX2_RE_DUP_MAX 255 /* normally from <limits.h> */ #endif + + + +/* + * misc + */ + +#define NOTREACHED 0 +#define xxx 1 + #define DUPMAX _POSIX2_RE_DUP_MAX #define INFINITY (DUPMAX+1) -/* bitmap manipulation */ +#define REMAGIC 0xfed7 /* magic number for main struct */ + + + +/* + * debugging facilities + */ +#ifdef REG_DEBUG +#define FDEBUG(arglist) { if (v->eflags®_FTRACE) printf arglist; } +#define MDEBUG(arglist) { if (v->eflags®_MTRACE) printf arglist; } +#else +#define FDEBUG(arglist) {} +#define MDEBUG(arglist) {} +#endif + + + +/* + * bitmap manipulation + */ #define UBITS (CHAR_BIT * sizeof(unsigned)) #define BSET(uv, sn) ((uv)[(sn)/UBITS] |= (unsigned)1 << ((sn)%UBITS)) #define ISBSET(uv, sn) ((uv)[(sn)/UBITS] & ((unsigned)1 << ((sn)%UBITS))) + + /* * Map a truth value into -1 for false, 1 for true. This is so it is * possible to write compile-time assertions by declaring a dummy array @@ -56,14 +121,13 @@ */ #define NEGIFNOT(x) (2*!!(x) - 1) /* !! ensures 0 or 1 */ + + /* * We dissect a chr into byts for colormap table indexing. Here we define * a byt, which will be the same as a byte on most machines... The exact * size of a byt is not critical, but about 8 bits is good, and extraction * of 8-bit chunks is sometimes especially fast. - * - * Changes in several places are needed to handle an increase in MAXBYTS. - * Those places check whether MAXBYTS is larger than they expect. */ #ifndef BYTBITS #define BYTBITS 8 /* bits in a byt */ @@ -71,7 +135,9 @@ #define BYTTAB (1<<BYTBITS) /* size of table with one entry per byt value */ #define BYTMASK (BYTTAB-1) /* bit mask for byt */ #define NBYTS ((CHRBITS+BYTBITS-1)/BYTBITS) -#define MAXBYTS 8 /* maximum NBYTS the code can handle */ +/* the definition of GETCOLOR(), below, assumes NBYTS <= 4 */ + + /* * As soon as possible, we map chrs into equivalence classes -- "colors" -- @@ -81,22 +147,96 @@ typedef short color; /* colors of characters */ typedef int pcolor; /* what color promotes to */ #define COLORLESS (-1) /* impossible color */ #define WHITE 0 /* default color, parent of all others */ -struct colormap; /* forward def for master type */ + + /* - * Interface definitions for locale-interface functions in locale.c + * A colormap is a tree -- more precisely, a DAG -- indexed at each level + * by a byt of the chr, to map the chr to a color efficiently. Because + * lower sections of the tree can be shared, it can exploit the usual + * sparseness of such a mapping table. The final tree is always NBYTS + * levels deep (at present it may be shallower during construction, but + * it is always "filled" to full depth at the end of that, using pointers + * to "fill blocks" which are entirely WHITE in color). + */ + +/* the tree itself */ +struct colors { + color ccolor[BYTTAB]; +}; +struct ptrs { + union tree *pptr[BYTTAB]; +}; +union tree { + struct colors colors; + struct ptrs ptrs; +}; +#define tcolor colors.ccolor +#define tptr ptrs.pptr + +/* internal per-color structure for the color machinery */ +struct colordesc { + uchr nchrs; /* number of chars of this color */ + color sub; /* open subcolor of this one, or NOSUB */ +# define NOSUB COLORLESS + struct arc *arcs; /* color chain */ +# define UNUSEDCOLOR(cd) ((cd)->nchrs == 0 && (cd)->sub == NOSUB) + int flags; +# define PSEUDO 1 /* pseudocolor, no real chars */ +}; + +/* the color map itself */ +struct colormap { + int magic; +# define CMMAGIC 0x876 + struct vars *v; /* for compile error reporting */ + color rest; + int filled; /* has it been filled? */ + size_t ncds; /* number of colordescs */ + struct colordesc *cd; +# define CDEND(cm) (&(cm)->cd[(cm)->ncds]) +# define NINLINECDS ((size_t)10) + struct colordesc cds[NINLINECDS]; + union tree tree[NBYTS]; /* tree top, plus fill blocks */ +}; + +/* optimization magic to do fast chr->color mapping */ +#define B0(c) ((c) & BYTMASK) +#define B1(c) (((c)>>BYTBITS) & BYTMASK) +#define B2(c) (((c)>>(2*BYTBITS)) & BYTMASK) +#define B3(c) (((c)>>(3*BYTBITS)) & BYTMASK) +#if NBYTS == 1 +#define GETCOLOR(cm, c) ((cm)->tree->tcolor[B0(c)]) +#endif +#if NBYTS == 2 +#define GETCOLOR(cm, c) ((cm)->tree->tptr[B1(c)]->tcolor[B0(c)]) +#endif +#if NBYTS == 4 +#define GETCOLOR(cm, c) ((cm)->tree->tptr[B3(c)]->tptr[B2(c)]->tptr[B1(c)]->tcolor[B0(c)]) +#endif + + + +/* + * Interface definitions for locale-interface functions in locale.c. + * Multi-character collating elements (MCCEs) cause most of the trouble. */ struct cvec { int nchrs; /* number of chrs */ int chrspace; /* number of chrs possible */ chr *chrs; /* pointer to vector of chrs */ - int nces; /* number of multichr collating elements */ - int cespace; /* number of CEs possible */ - int ncechrs; /* number of chrs used for CEs */ - chr *ces[1]; /* pointers to 0-terminated CEs */ + int nmcces; /* number of MCCEs */ + int mccespace; /* number of MCCEs possible */ + int nmccechrs; /* number of chrs used for MCCEs */ + chr *mcces[1]; /* pointers to 0-terminated MCCEs */ /* and both batches of chrs are on the end */ }; +/* caution: this value cannot be changed easily */ +#define MAXMCCE 2 /* length of longest MCCE */ + + + /* * definitions for NFA internal representation * @@ -147,12 +287,15 @@ struct nfa { struct state *states; /* state-chain header */ struct state *slast; /* tail of the chain */ struct state *free; /* free list */ + struct colormap *cm; /* the color map */ color bos[2]; /* colors, if any, assigned to BOS and BOL */ color eos[2]; /* colors, if any, assigned to EOS and EOL */ struct vars *v; /* simplifies compile error reporting */ struct nfa *parent; /* parent NFA, if any */ }; + + /* * definitions for compacted NFA */ @@ -164,8 +307,9 @@ struct carc { struct cnfa { int nstates; /* number of states */ int ncolors; /* number of colors */ - int haslacons; /* does it use lookahead constraints? */ - int leftanch; /* is it anchored on the left? */ + int flags; +# define HASLACONS 01 /* uses lookahead constraints */ +# define LEFTANCH 02 /* anchored on left */ int pre; /* setup state number */ int post; /* teardown state number */ color bos[2]; /* colors, if any, assigned to BOS and BOL */ @@ -176,6 +320,8 @@ struct cnfa { #define ZAPCNFA(cnfa) ((cnfa).nstates = 0) #define NULLCNFA(cnfa) ((cnfa).nstates == 0) + + /* * definitions for subexpression tree * The intrepid code-reader is hereby warned that the subexpression tree @@ -198,20 +344,27 @@ struct subre { struct rtree { char op; /* operator: '|', ',' */ - short no; /* node numbering */ + char flags; +# define INUSE 01 /* in use in the tree */ + short no; /* index into retry memory */ struct subre left; struct rtree *next; /* for '|' */ struct subre right; /* for ',' */ + struct rtree *chain; /* for bookkeeping and error cleanup */ }; + + /* * table of function pointers for generic manipulation functions * A regex_t's re_fns points to one of these. */ struct fns { - VOID (*free) _ANSI_ARGS_((regex_t *)); + VOID FUNCPTR(free, (regex_t *)); }; + + /* * the insides of a regex_t, hidden behind a void * */ @@ -220,13 +373,12 @@ struct guts { # define GUTSMAGIC 0xfed9 int cflags; /* copy of compile flags */ int info; /* copy of re_info */ - int nsub; /* copy of re_nsub */ + size_t nsub; /* copy of re_nsub */ struct cnfa cnfa; struct rtree *tree; int ntree; struct colormap *cm; - int (*compare) _ANSI_ARGS_((CONST chr *, CONST chr *, size_t)); - /* string-compare function */ + int FUNCPTR(compare, (CONST chr *, CONST chr *, size_t)); struct subre *lacons; /* lookahead-constraint vector */ int nlacons; /* size of lacons */ int usedshorter; /* used non-greedy quantifiers? */ diff --git a/generic/tclBasic.c b/generic/tclBasic.c index e8fa7ad..fcc1f93 100644 --- a/generic/tclBasic.c +++ b/generic/tclBasic.c @@ -12,7 +12,7 @@ * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclBasic.c,v 1.1.2.3 1998/09/30 20:46:21 stanton Exp $ + * RCS: @(#) $Id: tclBasic.c,v 1.1.2.4 1998/10/21 20:40:02 stanton Exp $ */ #include "tclInt.h" @@ -77,6 +77,8 @@ static CmdInfo builtInCmds[] = { (CompileProc *) NULL, 1}, {"continue", (Tcl_CmdProc *) NULL, Tcl_ContinueObjCmd, TclCompileContinueCmd, 1}, + {"encoding", (Tcl_CmdProc *) NULL, Tcl_EncodingObjCmd, + (CompileProc *) NULL, 1}, {"error", (Tcl_CmdProc *) NULL, Tcl_ErrorObjCmd, (CompileProc *) NULL, 1}, {"eval", (Tcl_CmdProc *) NULL, Tcl_EvalObjCmd, diff --git a/generic/tclCmdAH.c b/generic/tclCmdAH.c index f17b8fc..54a3046 100644 --- a/generic/tclCmdAH.c +++ b/generic/tclCmdAH.c @@ -11,7 +11,7 @@ * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclCmdAH.c,v 1.1.2.3 1998/09/28 20:24:18 stanton Exp $ + * RCS: @(#) $Id: tclCmdAH.c,v 1.1.2.4 1998/10/21 20:40:03 stanton Exp $ */ #include "tclInt.h" @@ -343,7 +343,7 @@ Tcl_CdObjCmd(dummy, interp, objc, objv) * Tcl_ConcatObjCmd -- * * This object-based procedure is invoked to process the "concat" Tcl - * command. See the user documentation for details on what it does/ + * command. See the user documentation for details on what it does. * * Results: * A standard Tcl object result. @@ -407,6 +407,123 @@ Tcl_ContinueObjCmd(dummy, interp, objc, objv) /* *---------------------------------------------------------------------- * + * Tcl_EncodingObjCmd -- + * + * This command manipulates encodings. + * + * Results: + * A standard Tcl result. + * + * Side effects: + * See the user documentation. + * + *---------------------------------------------------------------------- + */ + +int +Tcl_EncodingObjCmd(dummy, interp, objc, objv) + ClientData dummy; /* Not used. */ + Tcl_Interp *interp; /* Current interpreter. */ + int objc; /* Number of arguments. */ + Tcl_Obj *CONST objv[]; /* Argument objects. */ +{ + int index, length; + Tcl_Encoding encoding; + char *string; + Tcl_DString ds; + Tcl_Obj *resultPtr; + + static char *optionStrings[] = { + "convertfrom", "convertto", "names", "system", + NULL + }; + enum options { + ENC_CONVERTFROM, ENC_CONVERTTO, ENC_NAMES, ENC_SYSTEM + }; + + if (objc < 2) { + Tcl_WrongNumArgs(interp, 1, objv, "option ?arg ...?"); + return TCL_ERROR; + } + if (Tcl_GetIndexFromObj(interp, objv[1], optionStrings, "option", 0, + &index) != TCL_OK) { + return TCL_ERROR; + } + + switch ((enum options) index) { + case ENC_CONVERTTO: + case ENC_CONVERTFROM: { + char *name; + Tcl_Obj *data; + if (objc == 3) { + name = NULL; + data = objv[2]; + } else if (objc == 4) { + name = Tcl_GetString(objv[2]); + data = objv[3]; + } else { + Tcl_WrongNumArgs(interp, 2, objv, "?encoding? data"); + return TCL_ERROR; + } + + encoding = Tcl_GetEncoding(interp, name); + if (!encoding) { + return TCL_ERROR; + } + + if ((enum options) index == ENC_CONVERTFROM) { + /* + * Treat the string as binary data. + */ + + string = (char *) Tcl_GetByteArrayFromObj(data, &length); + Tcl_ExternalToUtfDString(encoding, string, length, &ds); + Tcl_DStringResult(interp, &ds); + } else { + /* + * Store the result as binary data. + */ + + string = Tcl_GetStringFromObj(data, &length); + Tcl_UtfToExternalDString(encoding, string, length, &ds); + resultPtr = Tcl_GetObjResult(interp); + Tcl_SetByteArrayObj(resultPtr, + (unsigned char *) Tcl_DStringValue(&ds), + Tcl_DStringLength(&ds)); + Tcl_DStringFree(&ds); + } + + Tcl_FreeEncoding(encoding); + break; + } + case ENC_NAMES: { + if (objc > 2) { + Tcl_WrongNumArgs(interp, 2, objv, NULL); + return TCL_ERROR; + } + Tcl_GetEncodingNames(interp); + break; + } + case ENC_SYSTEM: { + if (objc > 3) { + Tcl_WrongNumArgs(interp, 2, objv, "?encoding?"); + return TCL_ERROR; + } + if (objc == 2) { + Tcl_SetResult(interp, Tcl_GetEncodingName(NULL), TCL_STATIC); + } else { + return Tcl_SetSystemEncoding(interp, + Tcl_GetStringFromObj(objv[2], NULL)); + } + break; + } + } + return TCL_OK; +} + +/* + *---------------------------------------------------------------------- + * * Tcl_ErrorObjCmd -- * * This procedure is invoked to process the "error" Tcl command. diff --git a/generic/tclCmdIL.c b/generic/tclCmdIL.c index f47fb1e..6b4cc39 100644 --- a/generic/tclCmdIL.c +++ b/generic/tclCmdIL.c @@ -13,12 +13,13 @@ * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclCmdIL.c,v 1.1.2.2 1998/09/24 23:58:42 stanton Exp $ + * RCS: @(#) $Id: tclCmdIL.c,v 1.1.2.3 1998/10/21 20:40:04 stanton Exp $ */ #include "tclInt.h" #include "tclPort.h" #include "tclCompile.h" +#include "tclRegexp.h" /* * During execution of the "lsort" command, structures of the following diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c index 8a3b6d5..9f46efc 100644 --- a/generic/tclCmdMZ.c +++ b/generic/tclCmdMZ.c @@ -12,12 +12,13 @@ * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclCmdMZ.c,v 1.1.2.3 1998/10/16 01:16:57 stanton Exp $ + * RCS: @(#) $Id: tclCmdMZ.c,v 1.1.2.4 1998/10/21 20:40:05 stanton Exp $ */ #include "tclInt.h" #include "tclPort.h" #include "tclCompile.h" +#include "tclRegexp.h" /* * Structure used to hold information about variable traces: @@ -108,20 +109,26 @@ Tcl_RegexpObjCmd(dummy, interp, objc, objv) int objc; /* Number of arguments. */ Tcl_Obj *CONST objv[]; /* Argument objects. */ { - int i, result, indices, flags, stringLength, wLen, match; + int i, result, indices, stringLength, wLen, match, about; + int hasxflags, cflags, eflags; Tcl_RegExp regExpr; char *string; Tcl_DString stringBuffer, valueBuffer; Tcl_UniChar *wStart; static char *options[] = { - "-indices", "-nocase", "--", (char *) NULL + "-indices", "-nocase", "-about", "-expanded", + "-unsupported0", "--", (char *) NULL }; enum options { - REGEXP_INDICES, REGEXP_NOCASE, REGEXP_LAST + REGEXP_INDICES, REGEXP_NOCASE, REGEXP_ABOUT, REGEXP_EXPANDED, + REGEXP_XFLAGS, REGEXP_LAST }; indices = 0; - flags = 0; + about = 0; + cflags = REG_ADVANCED; + eflags = 0; + hasxflags = 0; for (i = 1; i < objc; i++) { char *name; @@ -141,7 +148,19 @@ Tcl_RegexpObjCmd(dummy, interp, objc, objv) break; } case REGEXP_NOCASE: { - flags |= REG_ICASE; + cflags |= REG_ICASE; + break; + } + case REGEXP_ABOUT: { + about = 1; + break; + } + case REGEXP_EXPANDED: { + cflags |= REG_EXPANDED; + break; + } + case REGEXP_XFLAGS: { + hasxflags = 1; break; } case REGEXP_LAST: { @@ -152,7 +171,7 @@ Tcl_RegexpObjCmd(dummy, interp, objc, objv) } endOfForLoop: - if (objc - i < 2) { + if (objc - i < hasxflags + 2 - about) { Tcl_WrongNumArgs(interp, 1, objv, "?switches? exp string ?matchVar? ?subMatchVar subMatchVar ...?"); return TCL_ERROR; @@ -160,11 +179,25 @@ Tcl_RegexpObjCmd(dummy, interp, objc, objv) objc -= i; objv += i; - regExpr = TclRegCompObj(interp, objv[0], flags | REG_ADVANCED); + if (hasxflags) { + string = Tcl_GetStringFromObj(objv[0], &stringLength); + TclRegXflags(string, stringLength, &cflags, &eflags); + objc--; + objv++; + } + + regExpr = TclRegCompObj(interp, objv[0], cflags); if (regExpr == NULL) { return TCL_ERROR; } + if (about) { + if (TclRegAbout(interp, regExpr) < 0) { + return TCL_ERROR; + } + return TCL_OK; + } + result = TCL_OK; string = Tcl_GetStringFromObj(objv[1], &stringLength); @@ -174,7 +207,7 @@ Tcl_RegexpObjCmd(dummy, interp, objc, objv) wStart = TclUtfToUniCharDString(string, stringLength, &stringBuffer); wLen = Tcl_DStringLength(&stringBuffer) / sizeof(Tcl_UniChar); - match = TclRegExpExecUniChar(interp, regExpr, wStart, wLen, 0); + match = TclRegExpExecUniChar(interp, regExpr, wStart, wLen, eflags); if (match < 0) { result = TCL_ERROR; goto done; diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 9b3f18d..06da42e 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -8,7 +8,7 @@ * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclEncoding.c,v 1.1.2.2 1998/10/03 01:56:41 stanton Exp $ + * RCS: @(#) $Id: tclEncoding.c,v 1.1.2.3 1998/10/21 20:40:05 stanton Exp $ */ #include "tclInt.h" @@ -136,8 +136,8 @@ typedef struct EscapeEncodingData { #define ENCODING_ESCAPE 3 /* - * Hash table that keeps track of all loaded TextEncodings. Keys are - * the string names that represent the encoding, values are (TextEncoding *). + * Hash table that keeps track of all loaded Encodings. Keys are + * the string names that represent the encoding, values are (Encoding *). */ static Tcl_HashTable encodingTable; @@ -277,6 +277,23 @@ TclInitEncodingSubsystem() Tcl_CreateEncoding(&type); } + +/* + *---------------------------------------------------------------------- + * + * TclFinalizeEncodingSubsystem -- + * + * Release the state associated with the encoding subsystem. + * + * Results: + * None. + * + * Side effects: + * Frees all of the encodings. + * + *---------------------------------------------------------------------- + */ + void TclFinalizeEncodingSubsystem() { @@ -515,6 +532,11 @@ Tcl_GetEncodingNames(interp) Tcl_DStringFree(&pwdString); } + /* + * Clear any values placed in the result by globbing. + */ + + Tcl_ResetResult(interp); resultPtr = Tcl_GetObjResult(interp); hPtr = Tcl_FirstHashEntry(&table, &search); @@ -573,9 +595,9 @@ Tcl_SetSystemEncoding(interp, name) return TCL_ERROR; } } - Tcl_FreeEncoding(systemEncoding); Tcl_MutexLock(&encodingMutex); + Tcl_FreeEncoding(systemEncoding); systemEncoding = encoding; Tcl_MutexUnlock(&encodingMutex); @@ -1009,7 +1031,7 @@ LoadEncodingFile(interp, name) pathPtr = TclGetLibraryPath(); if (pathPtr == NULL) { - return NULL; + goto unknown; } objc = 0; Tcl_ListObjGetElements(NULL, pathPtr, &objc, &objv); @@ -1023,10 +1045,7 @@ LoadEncodingFile(interp, name) } if (chan == NULL) { - if (interp != NULL) { - Tcl_AppendResult(interp, "unknown encoding \"", name, "\"", NULL); - } - return NULL; + goto unknown; } Tcl_SetChannelOption(NULL, chan, "-encoding", "utf-8"); @@ -1070,7 +1089,30 @@ LoadEncodingFile(interp, name) } Tcl_Close(NULL, chan); return encoding; + + unknown: + if (interp != NULL) { + Tcl_AppendResult(interp, "unknown encoding \"", name, "\"", NULL); + } + return NULL; } + +/* + *---------------------------------------------------------------------- + * + * OpenEncodingFile -- + * + * Look for the file encoding/<name>.enc in the specified + * directory. + * + * Results: + * Returns an open file channel if the file exists. + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ static Tcl_Channel OpenEncodingFile(dir, name) diff --git a/generic/tclFileName.c b/generic/tclFileName.c index 01fefa7..55832ab 100644 --- a/generic/tclFileName.c +++ b/generic/tclFileName.c @@ -9,11 +9,12 @@ * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclFileName.c,v 1.1.2.3 1998/10/06 00:36:56 stanton Exp $ + * RCS: @(#) $Id: tclFileName.c,v 1.1.2.4 1998/10/21 20:40:05 stanton Exp $ */ #include "tclInt.h" #include "tclPort.h" +#include "tclRegexp.h" /* * This variable indicates whether the cleanup procedure has been diff --git a/generic/tclInt.h b/generic/tclInt.h index 140a2eb..0babdfd 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -10,7 +10,7 @@ * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclInt.h,v 1.1.2.6 1998/10/16 01:16:57 stanton Exp $ + * RCS: @(#) $Id: tclInt.h,v 1.1.2.7 1998/10/21 20:40:06 stanton Exp $ */ #ifndef _TCLINT @@ -774,48 +774,6 @@ typedef struct MathFunc { } MathFunc; /* - *--------------------------------------------------------------------------- - * Definitions of flags used in regexp compilation and execution that need - * to be visible to the rest of the Tcl core. Definitions that are - * entirely private to the regexp package live in tclRegexp.h. - *--------------------------------------------------------------------------- - */ - -/* - *Compilation flags. - */ - -#define REG_BASIC 000000 /* BREs (convenience) */ -#define REG_EXTENDED 000001 /* EREs */ -#define REG_ADVF 000002 /* advanced features in EREs */ -#define REG_ADVANCED 000003 /* AREs (which are also EREs) */ -#define REG_QUOTE 000004 /* no special characters, none */ -#define REG_NOSPEC REG_QUOTE /* historical synonym */ -#define REG_ICASE 000010 /* ignore case */ -#define REG_NOSUB 000020 /* don't care about subexpressions */ -#define REG_EXPANDED 000040 /* expanded format, white space & comments */ -#define REG_NLSTOP 000100 /* \n doesn't match . or [^ ] */ -#define REG_NLANCH 000200 /* ^ matches after \n, $ before */ -#define REG_NEWLINE 000300 /* newlines are line terminators */ - -/* - * Execution flags. - */ - -#define REG_NOTBOL 0001 /* BOS is not BOL */ -#define REG_NOTEOL 0002 /* EOS is not EOL */ - -EXTERN Tcl_RegExp TclRegCompObj _ANSI_ARGS_((Tcl_Interp *interp, - Tcl_Obj *patObj, int flags)); -EXTERN int TclRegExpExecUniChar _ANSI_ARGS_((Tcl_Interp *interp, - Tcl_RegExp re, CONST Tcl_UniChar *uniString, - int numChars, int flags)); -EXTERN int TclRegExpMatchObj _ANSI_ARGS_((Tcl_Interp *interp, - char *string, Tcl_Obj *patObj)); -EXTERN void TclRegExpRangeUniChar _ANSI_ARGS_((Tcl_RegExp re, - int index, int *startPtr, int *endPtr)); - -/* * Threads support. * These routines are used to implement Tcl_GetThreadData. */ @@ -2161,6 +2119,8 @@ EXTERN int Tcl_ConcatObjCmd _ANSI_ARGS_((ClientData clientData, Tcl_Interp *interp, int objc, Tcl_Obj *CONST objv[])); EXTERN int Tcl_ContinueObjCmd _ANSI_ARGS_((ClientData clientData, Tcl_Interp *interp, int objc, Tcl_Obj *CONST objv[])); +EXTERN int Tcl_EncodingObjCmd _ANSI_ARGS_((ClientData clientData, + Tcl_Interp *interp, int objc, Tcl_Obj *CONST objv[])); EXTERN int Tcl_EofObjCmd _ANSI_ARGS_((ClientData clientData, Tcl_Interp *interp, int objc, Tcl_Obj *CONST objv[])); EXTERN int Tcl_ErrorObjCmd _ANSI_ARGS_((ClientData clientData, diff --git a/generic/tclRegexp.c b/generic/tclRegexp.c index 44b575c..d65b19a 100644 --- a/generic/tclRegexp.c +++ b/generic/tclRegexp.c @@ -4,12 +4,13 @@ * This file contains the public interfaces to the Tcl regular * expression mechanism. * + * Copyright (c) 1998 by Scriptics Corporation. * Copyright (c) 1998 by Sun Microsystems, Inc. * * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclRegexp.c,v 1.1.2.2 1998/10/03 01:56:41 stanton Exp $ + * RCS: @(#) $Id: tclRegexp.c,v 1.1.2.3 1998/10/21 20:40:06 stanton Exp $ */ #include "tclInt.h" @@ -337,6 +338,7 @@ TclRegExpExecUniChar(interp, re, wString, numChars, flags) TclRegexp *regexpPtr = (TclRegexp *) re; status = re_uexec(®expPtr->re, wString, (size_t) numChars, + (rm_detail_t *)NULL, regexpPtr->re.re_nsub + 1, regexpPtr->matches, flags); /* @@ -528,6 +530,83 @@ TclRegCompObj(interp, objPtr, flags) /* *---------------------------------------------------------------------- * + * TclRegAbout -- + * + * Return information about a compiled regular expression. + * + * Results: + * The return value is -1 for failure, 0 for success, although at + * the moment there's nothing that could fail. On success, a list + * is left in the interp's result: first element is the subexpression + * count, second is a list of re_info bit names. + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ + +int +TclRegAbout(interp, re) + Tcl_Interp *interp; /* For use in variable assignment. */ + Tcl_RegExp re; /* The compiled regular expression. */ +{ + TclRegexp *regexpPtr = (TclRegexp *)re; + char buf[TCL_INTEGER_SPACE]; + static struct infoname { + int bit; + char *text; + } infonames[] = { + REG_UBACKREF, "REG_UBACKREF", + REG_ULOOKAHEAD, "REG_ULOOKAHEAD", + REG_UBOUNDS, "REG_UBOUNDS", + REG_UBRACES, "REG_UBRACES", + REG_UBSALNUM, "REG_UBSALNUM", + REG_UPBOTCH, "REG_UPBOTCH", + REG_UBBS, "REG_UBBS", + REG_UNONPOSIX, "REG_UNONPOSIX", + REG_UUNSPEC, "REG_UUNSPEC", + REG_UUNPORT, "REG_UUNPORT", + REG_ULOCALE, "REG_ULOCALE", + REG_UEMPTYMATCH, "REG_UEMPTYMATCH", + 0, "", + }; + struct infoname *inf; + int n; + + Tcl_ResetResult(interp); + + sprintf(buf, "%u", (unsigned)(regexpPtr->re.re_nsub)); + Tcl_AppendElement(interp, buf); + + /* + * Must count bits before generating list, because we must know + * whether {} are needed before we start appending names. + */ + n = 0; + for (inf = infonames; inf->bit != 0; inf++) { + if (regexpPtr->re.re_info&inf->bit) { + n++; + } + } + if (n != 1) { + Tcl_AppendResult(interp, " {", NULL); + } + for (inf = infonames; inf->bit != 0; inf++) { + if (regexpPtr->re.re_info&inf->bit) { + Tcl_AppendElement(interp, inf->text); + } + } + if (n != 1) { + Tcl_AppendResult(interp, "}", NULL); + } + + return 0; +} + +/* + *---------------------------------------------------------------------- + * * TclRegError -- * * Generate an error message based on the regexp status code. @@ -536,7 +615,7 @@ TclRegCompObj(interp, objPtr, flags) * Places an error in the interpreter. * * Side effects: - * None. + * Sets errorCode as well. * *---------------------------------------------------------------------- */ @@ -547,66 +626,19 @@ TclRegError(interp, msg, status) char *msg; /* Message to prepend to error. */ int status; /* Status code to report. */ { - char *errMsg; - - switch(status) { - case REG_BADPAT: - errMsg = "invalid regular expression"; - break; - case REG_ECOLLATE: - errMsg = "invalid collating element"; - break; - case REG_ECTYPE: - errMsg = "invalid character class"; - break; - case REG_EESCAPE: - errMsg = "invalid escape sequence"; - break; - case REG_ESUBREG: - errMsg = "invalid backreference number"; - break; - case REG_EBRACK: - errMsg = "unmatched []"; - break; - case REG_EPAREN: - errMsg = "unmatched ()"; - break; - case REG_EBRACE: - errMsg = "unmatched {}"; - break; - case REG_BADBR: - errMsg = "invalid repetition count(s)"; - break; - case REG_ERANGE: - errMsg = "invalid character range"; - break; - case REG_ESPACE: - errMsg = "out of memory"; - break; - case REG_BADRPT: - errMsg = "?+* follows nothing"; - break; - case REG_ASSERT: - errMsg = "\"can't happen\" -- you found a bug"; - break; - case REG_INVARG: - errMsg = "invalid argument to regex routine"; - break; - case REG_MIXED: - errMsg = "char RE applied to wchar_t string (etc.)"; - break; - case REG_BADOPT: - errMsg = "invalid embedded option"; - break; - case REG_IMPOSS: - errMsg = "can never match"; - break; - default: - errMsg = "\"can't happen\" -- you found an undefined error code"; - break; - } + char buf[100]; /* ample in practice */ + char cbuf[100]; /* lots in practice */ + size_t n; + char *p; + Tcl_ResetResult(interp); - Tcl_AppendResult(interp, msg, errMsg, NULL); + n = regerror(status, (regex_t *)NULL, buf, sizeof(buf)); + p = (n > sizeof(buf)) ? "..." : ""; + Tcl_AppendResult(interp, msg, buf, p, NULL); + + sprintf(cbuf, "%d", status); + (VOID) regerror(REG_ITOA, (regex_t *)NULL, cbuf, sizeof(cbuf)); + Tcl_SetErrorCode(interp, "REGEXP", cbuf, buf, NULL); } @@ -749,36 +781,15 @@ CompileRegexp(interp, string, length, flags) if (status != REG_OKAY) { /* - * Warning, the following is a hack to allow empty regexp. - * The goal is to compile a non-empty regexp that will always - * find one empty match. If you use "(?:)" (an empty pair of - * non-capturing parentheses) instead, that will avoid both the - * overhead and the subexpression report. - */ - - if (status == REG_EMPTY) { - static Tcl_UniChar uniEmpty[] = {'(', '?', ':', ')', '\0'}; - - uniString = uniEmpty; - numChars = 4; - status = re_ucomp(®expPtr->re, uniString, (size_t) numChars, - REG_ADVANCED); - } - - /* * Clean up and report errors in the interpreter, if possible. */ - - if (status != REG_OKAY) { - regfree(®expPtr->re); - ckfree((char *)regexpPtr); - if (interp) { - TclRegError(interp, - "couldn't compile regular expression pattern: ", - status); - } - return NULL; + ckfree((char *)regexpPtr); + if (interp) { + TclRegError(interp, + "couldn't compile regular expression pattern: ", + status); } + return NULL; } /* @@ -791,4 +802,100 @@ CompileRegexp(interp, string, length, flags) return regexpPtr; } + +/* + *--------------------------------------------------------------------------- + * + * TclRegXflags -- + * + * Parse a string of extended regexp flag letters, for testing. + * + * Results: + * No return value (you're on your own for errors here). + * + * Side effects: + * Modifies *cflagsPtr, a regcomp flags word, and *eflagsPtr, a + * regexec flags word, as appropriate. + * + *---------------------------------------------------------------------- + */ +VOID +TclRegXflags(string, length, cflagsPtr, eflagsPtr) + char *string; /* The string of flags. */ + int length; /* The length of the string in bytes. */ + int *cflagsPtr; /* compile flags word */ + int *eflagsPtr; /* exec flags word */ +{ + int i; + int cflags; + int eflags; + + cflags = *cflagsPtr; + eflags = *eflagsPtr; + for (i = 0; i < length; i++) { + switch (string[i]) { + case 'a': { + cflags |= REG_ADVF; + break; + } + case 'b': { + cflags &= ~REG_ADVANCED; + break; + } + case 'e': { + cflags &= ~REG_ADVANCED; + cflags |= REG_EXTENDED; + break; + } + case 'q': { + cflags &= ~REG_ADVANCED; + cflags |= REG_QUOTE; + break; + } + case 'i': { + cflags |= REG_ICASE; + break; + } + case 'o': { /* o for opaque */ + cflags |= REG_NOSUB; + break; + } + case 'x': { + cflags |= REG_EXPANDED; + break; + } + case 'p': { + cflags |= REG_NLSTOP; + break; + } + case 'w': { + cflags |= REG_NLANCH; + break; + } + case 'n': { + cflags |= REG_NEWLINE; + break; + } + case '+': { + cflags |= REG_FAKEEC; + break; + } + case '^': { + eflags |= REG_NOTBOL; + break; + } + case '$': { + eflags |= REG_NOTEOL; + break; + } + case '%': { + eflags |= REG_SMALL; + break; + } + } + } + + *cflagsPtr = cflags; + *eflagsPtr = eflags; +} diff --git a/generic/tclRegexp.h b/generic/tclRegexp.h index be5cb77..9e56730 100644 --- a/generic/tclRegexp.h +++ b/generic/tclRegexp.h @@ -7,8 +7,9 @@ * Copyright (c) 1998 Henry Spencer. All rights reserved. * * Development of this software was funded, in part, by Cray Research Inc., - * UUNET Communications Services Inc., and Sun Microsystems Inc., none of - * whom are responsible for the results. The author thanks all of them. + * UUNET Communications Services Inc., Sun Microsystems Inc., and + * Scriptics Corporation, none of whom are responsible for the results. + * The author thanks all of them. * * Redistribution and use in source and binary forms -- with or without * modification -- are permitted for any purpose, provided that @@ -26,20 +27,19 @@ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * + * Copyright (c) 1998 by Scriptics Corporation. * Copyright (c) 1998 by Sun Microsystems, Inc. * * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclRegexp.h,v 1.1.2.2 1998/09/24 23:59:02 stanton Exp $ + * RCS: @(#) $Id: tclRegexp.h,v 1.1.2.3 1998/10/21 20:40:06 stanton Exp $ */ #ifndef _TCLREGEXP #define _TCLREGEXP -#ifndef _TCLINT -#include "tclInt.h" -#endif +#include "regcustom.h" #ifdef BUILD_tcl # undef TCL_STORAGE_CLASS @@ -47,190 +47,6 @@ #endif /* - * The following definitions were culled from wctype.h and wchar.h. - * Those two header files are now gone. Eventually we should replace all - * instances of, e.g., iswalnum() with TclUniCharIsAlnum() in the regexp - * code. - */ - -#undef wint_t -#define wint_t int - -#undef WEOF -#undef WCHAR_MIN -#undef WCHAR_MAX - -#define WEOF -1 -#define WCHAR_MIN 0x0000 -#define WCHAR_MAX 0xffff - -#undef iswalnum -#undef iswalpha -#undef iswdigit -#undef iswspace - -#define iswalnum(x) TclUniCharIsAlnum(x) -#define iswalpha(x) TclUniCharIsAlpha(x) -#define iswdigit(x) TclUniCharIsDigit(x) -#define iswspace(x) TclUniCharIsSpace(x) - -#undef wcslen -#undef wcsncmp - -#define wcslen TclUniCharLen -#define wcsncmp TclUniCharNcmp - -/* - * The following definitions were added by JO to make Tcl compile - * under SunOS, where off_t and wchar_t aren't defined; perhaps all of - * the code below can be collapsed into a few simple definitions? - */ - -#ifndef __RE_REGOFF_T -# define __RE_REGOFF_T int -#endif -#ifndef __RE_WCHAR_T -# define __RE_WCHAR_T Tcl_UniChar -#endif - -/* - * regoff_t has to be large enough to hold either off_t or ssize_t, - * and must be signed; it's only a guess that off_t is big enough, so we - * offer an override. - */ -#ifdef __RE_REGOFF_T -typedef __RE_REGOFF_T regoff_t; /* offset type for result reporting */ -#else -typedef off_t regoff_t; -#endif - -/* - * We offer the option of using a non-wchar_t type in the w prototypes so - * that <regex.h> can be included without first including (e.g.) <wchar.h>. - * Note that __RE_WCHAR_T must in fact be the same type as wchar_t! - */ -#ifdef __RE_WCHAR_T -typedef __RE_WCHAR_T re_wchar; /* internal name for the type */ -#else -typedef wchar_t re_wchar; -#endif - -#define REMAGIC 0xfed7 - -/* - * other interface types - */ - -/* the biggie, a compiled RE (or rather, a front end to same) */ -typedef struct { - int re_magic; /* magic number */ - size_t re_nsub; /* number of subexpressions */ - int re_info; /* information about RE */ -# define REG_UBACKREF 000001 -# define REG_ULOOKAHEAD 000002 -# define REG_UBOUNDS 000004 -# define REG_UBRACES 000010 -# define REG_UBSALNUM 000020 -# define REG_UPBOTCH 000040 -# define REG_UBBS 000100 -# define REG_UNONPOSIX 000200 -# define REG_UUNSPEC 000400 -# define REG_UUNPORT 001000 -# define REG_ULOCALE 002000 -# define REG_UEMPTYMATCH 004000 - int re_csize; /* sizeof(character) */ - VOID *re_guts; /* none of your business :-) */ - VOID *re_fns; /* none of your business :-) */ -} regex_t; - -/* result reporting (may acquire more fields later) */ -typedef struct { - regoff_t rm_so; /* start of substring */ - regoff_t rm_eo; /* end of substring */ -} regmatch_t; - - - -/* - * compilation - ^ int regcomp(regex_t *, const char *, int); - ^ int re_comp(regex_t *, const char *, size_t, int); - ^ #ifndef __RE_NOWIDE - ^ int re_wcomp(regex_t *, const re_wchar *, size_t, int); - ^ #endif - */ - -#define REG_DUMP 004000 /* none of your business :-) */ -#define REG_FAKE 010000 /* none of your business :-) */ -#define REG_PROGRESS 020000 /* none of your business :-) */ - - - -/* - * execution - ^ int regexec(regex_t *, const char *, size_t, regmatch_t [], int); - ^ int re_exec(regex_t *, const char *, size_t, size_t, regmatch_t [], int); - ^ #ifndef __RE_NOWIDE - ^ int re_wexec(regex_t *, const re_wchar *, size_t, size_t, regmatch_t [], int); - ^ #endif - */ -#define REG_FTRACE 0010 /* none of your business */ -#define REG_MTRACE 0020 /* none of your business */ -#define REG_SMALL 0040 /* none of your business */ - -/* - * error reporting - * Be careful if modifying the list of error codes -- the table used by - * regerror() is generated automatically from this file! - * - * Note that there is no wchar_t variant of regerror at this time; what - * kind of character is used for error reports is independent of what kind - * is used in matching. - * - ^ extern size_t regerror(int, const regex_t *, char *, size_t); - */ -#define REG_OKAY 0 /* no errors detected */ -#define REG_NOMATCH 1 /* regexec() failed to match */ -#define REG_BADPAT 2 /* invalid regular expression */ -#define REG_ECOLLATE 3 /* invalid collating element */ -#define REG_ECTYPE 4 /* invalid character class */ -#define REG_EESCAPE 5 /* invalid escape \ sequence */ -#define REG_ESUBREG 6 /* invalid backreference number */ -#define REG_EBRACK 7 /* brackets [] not balanced */ -#define REG_EPAREN 8 /* parentheses () not balanced */ -#define REG_EBRACE 9 /* braces {} not balanced */ -#define REG_BADBR 10 /* invalid repetition count(s) */ -#define REG_ERANGE 11 /* invalid character range */ -#define REG_ESPACE 12 /* out of memory */ -#define REG_BADRPT 13 /* quantifier operand invalid */ -#define REG_EMPTY 14 /* empty regular expression */ -#define REG_ASSERT 15 /* "can't happen" -- you found a bug */ -#define REG_INVARG 16 /* invalid argument to regex routine */ -#define REG_MIXED 17 /* char RE applied to wchar_t string (etc.) */ -#define REG_BADOPT 18 /* invalid embedded option */ -#define REG_IMPOSS 19 /* can never match */ -/* two specials for debugging and testing */ -#define REG_ATOI 101 /* convert error-code name to number */ -#define REG_ITOA 102 /* convert error-code number to name */ - - - -/* - * the prototypes, as possibly munched by fwd - */ -/* =====^!^===== begin forwards =====^!^===== */ -/* automatically gathered by fwd; do not hand-edit */ -/* === regex.h === */ -EXTERN int re_ucomp _ANSI_ARGS_((regex_t *, const Tcl_UniChar *, - size_t, int)); -EXTERN int re_uexec _ANSI_ARGS_((regex_t *, const Tcl_UniChar *, - size_t, size_t, regmatch_t [], int)); -EXTERN VOID regfree _ANSI_ARGS_((regex_t *)); -EXTERN size_t regerror _ANSI_ARGS_((int, const regex_t *, char *, size_t)); -/* automatically gathered by fwd; do not hand-edit */ -/* =====^!^===== end forwards =====^!^===== */ - -/* * The TclRegexp structure encapsulates a compiled regex_t, * the flags that were used to compile it, and an array of pointers * that are used to indicate subexpressions after a call to Tcl_RegExpExec. @@ -251,6 +67,24 @@ typedef struct TclRegexp { } TclRegexp; /* + * Functions exported for use within the rest of Tcl. + */ + +EXTERN Tcl_RegExp TclRegCompObj _ANSI_ARGS_((Tcl_Interp *interp, + Tcl_Obj *patObj, int flags)); +EXTERN int TclRegAbout _ANSI_ARGS_((Tcl_Interp *interp, + Tcl_RegExp re)); +EXTERN VOID TclRegXflags _ANSI_ARGS_((char *string, int length, + int *cflagsPtr, int *eflagsPtr)); +EXTERN int TclRegExpExecUniChar _ANSI_ARGS_((Tcl_Interp *interp, + Tcl_RegExp re, CONST Tcl_UniChar *uniString, + int numChars, int flags)); +EXTERN int TclRegExpMatchObj _ANSI_ARGS_((Tcl_Interp *interp, + char *string, Tcl_Obj *patObj)); +EXTERN void TclRegExpRangeUniChar _ANSI_ARGS_((Tcl_RegExp re, + int index, int *startPtr, int *endPtr)); + +/* * Functions exported from the regexp package for the test package to use. */ @@ -258,8 +92,3 @@ EXTERN void TclRegError _ANSI_ARGS_((Tcl_Interp *interp, char *msg, int status)); #endif /* _TCLREGEXP */ - - - - - diff --git a/generic/tclTest.c b/generic/tclTest.c index 8da6785..2136b7c 100644 --- a/generic/tclTest.c +++ b/generic/tclTest.c @@ -12,14 +12,13 @@ * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclTest.c,v 1.1.2.2 1998/09/24 23:59:02 stanton Exp $ + * RCS: @(#) $Id: tclTest.c,v 1.1.2.3 1998/10/21 20:40:07 stanton Exp $ */ #define TCL_TEST #include "tclInt.h" #include "tclPort.h" -#include "tclRegexp.h" /* To test internals of regexp package. */ #include <locale.h> /* @@ -245,9 +244,6 @@ static int TestparsevarObjCmd _ANSI_ARGS_((ClientData dummy, static int TestparsevarnameObjCmd _ANSI_ARGS_((ClientData dummy, Tcl_Interp *interp, int objc, Tcl_Obj *CONST objv[])); -static int TestregexpObjCmd _ANSI_ARGS_((ClientData dummy, - Tcl_Interp *interp, int objc, - Tcl_Obj *CONST objv[])); static int TestsaveresultCmd _ANSI_ARGS_((ClientData dummy, Tcl_Interp *interp, int objc, Tcl_Obj *CONST objv[])); @@ -398,8 +394,6 @@ Tcltest_Init(interp) (ClientData) 0, (Tcl_CmdDeleteProc *) NULL); Tcl_CreateObjCommand(interp, "testparsevarname", TestparsevarnameObjCmd, (ClientData) 0, (Tcl_CmdDeleteProc *) NULL); - Tcl_CreateObjCommand(interp, "testregexp", TestregexpObjCmd, - (ClientData) 0, (Tcl_CmdDeleteProc *) NULL); Tcl_CreateObjCommand(interp, "testsaveresult", TestsaveresultCmd, (ClientData) 0, (Tcl_CmdDeleteProc *) NULL); Tcl_CreateCommand(interp, "testsetassocdata", TestsetassocdataCmd, @@ -1317,19 +1311,15 @@ TestencodingObjCmd(dummy, interp, objc, objv) Tcl_Obj *CONST objv[]; /* Argument objects. */ { Tcl_Encoding encoding; - Tcl_DString ds; int index, length; char *string; - Tcl_Obj *resultPtr; TclEncoding *encodingPtr; static char *optionStrings[] = { - "create", "delete", "toutf", "fromutf", - "names", "system", "path", + "create", "delete", "path", NULL }; enum options { - ENC_CREATE, ENC_DELETE, ENC_TOUTF, ENC_FROMUTF, - ENC_NAMES, ENC_SYSTEM, ENC_PATH + ENC_CREATE, ENC_DELETE, ENC_PATH }; if (Tcl_GetIndexFromObj(interp, objv[1], optionStrings, "option", 0, @@ -1376,79 +1366,6 @@ TestencodingObjCmd(dummy, interp, objc, objv) Tcl_FreeEncoding(encoding); break; } - case ENC_TOUTF: { - if (objc < 3) { - return TCL_ERROR; - } - if (objc == 3) { - string = "iso8859-1"; - } else { - string = Tcl_GetString(objv[3]); - } - encoding = Tcl_GetEncoding(NULL, string); - - string = (char *) Tcl_GetByteArrayFromObj(objv[2], &length); - Tcl_ExternalToUtfDString(encoding, string, length, &ds); - - /* - * If the encoding performs a Tcl_Eval() (which is the case for - * encodings created by the "encoding create" command, the - * resultPtr from the interp will be invalidated and we need to - * get it again. - */ - - resultPtr = Tcl_GetObjResult(interp); - Tcl_SetStringObj(resultPtr, Tcl_DStringValue(&ds), - Tcl_DStringLength(&ds)); - Tcl_DStringFree(&ds); - Tcl_FreeEncoding(encoding); - break; - } - case ENC_FROMUTF: { - if (objc < 3) { - return TCL_ERROR; - } - if (objc == 3) { - string = "iso8859-1"; - } else { - string = Tcl_GetString(objv[3]); - } - encoding = Tcl_GetEncoding(NULL, string); - - string = Tcl_GetStringFromObj(objv[2], &length); - Tcl_UtfToExternalDString(encoding, string, length, &ds); - - /* - * If the encoding performs a Tcl_Eval() (which is the case for - * encodings created by the "encoding create" command, the - * resultPtr from the interp will be invalidated and we need to - * get it again. - */ - - resultPtr = Tcl_GetObjResult(interp); - Tcl_SetByteArrayObj(resultPtr, - (unsigned char *) Tcl_DStringValue(&ds), - Tcl_DStringLength(&ds)); - Tcl_DStringFree(&ds); - Tcl_FreeEncoding(encoding); - break; - } - - case ENC_NAMES: { - Tcl_GetEncodingNames(interp); - break; - } - case ENC_SYSTEM: { - if (objc == 2) { - Tcl_SetResult(interp, Tcl_GetEncodingName(NULL), TCL_STATIC); - } else { - char *str; - - str = Tcl_GetStringFromObj(objv[2], NULL); - return Tcl_SetSystemEncoding(interp, str); - } - break; - } case ENC_PATH: { if (objc == 2) { Tcl_SetObjResult(interp, TclGetLibraryPath()); @@ -2584,251 +2501,6 @@ TestparsevarnameObjCmd(clientData, interp, objc, objv) /* *---------------------------------------------------------------------- * - * TestregexpObjCmd -- - * - * This procedure implements the "testregexp" command. It is - * used to give a direct interface for regexp flags. - * - * Results: - * A standard Tcl result. - * - * Side effects: - * None. - * - *---------------------------------------------------------------------- - */ - -static int -TestregexpObjCmd(dummy, interp, objc, objv) - ClientData dummy; /* Not used. */ - Tcl_Interp *interp; /* Current interpreter. */ - int objc; /* Number of arguments. */ - Tcl_Obj *CONST objv[]; /* Argument objects. */ -{ - TclRegexp *regExpr; - char *string, *flagString, *start, *end; - int flags, match, i, j; - - if (objc < 4) { - Tcl_WrongNumArgs(interp, 1, objv, - "flags exp string ?subMatchVar subMatchVar ...?"); - return TCL_ERROR; - } - flagString = Tcl_GetString(objv[1]); - string = Tcl_GetString(objv[3]); - - flags = RegGetCompFlags(flagString); - regExpr = (TclRegexp *) TclRegCompObj(interp, objv[2], flags); - if (regExpr == NULL) { - return TCL_ERROR; - } - - flags = RegGetExecFlags(flagString); - if (flags == -1) { - /* - * Do not try to match the string. - */ - - match = 0; - } else { - Tcl_DString stringBuffer; - Tcl_UniChar *uniString; - int numChars; - - /* - * Remember the UTF-8 string so Tcl_RegExpRange() can convert the - * matches from character to byte offsets. - */ - - regExpr->string = string; - - Tcl_DStringInit(&stringBuffer); - uniString = TclUtfToUniCharDString(string, -1, &stringBuffer); - numChars = Tcl_DStringLength(&stringBuffer) / sizeof(Tcl_UniChar); - - match = TclRegExpExecUniChar(interp, (Tcl_RegExp) regExpr, uniString, - numChars, flags); - Tcl_DStringFree(&stringBuffer); - - if (match < 0) { - return TCL_ERROR; - } - if (flags & REG_NOSUB) { - for (i = 0; i <= (int) regExpr->re.re_nsub; i++) { - regExpr->matches[i].rm_so = -1; - regExpr->matches[i].rm_eo = -1; - } - } - } - if (!match) { - /* - * Set the interpreter's object result to an integer object w/ value 0. - */ - - Tcl_SetIntObj(Tcl_GetObjResult(interp), 0); - return TCL_OK; - } - - /* - * If additional variable names have been specified, return - * index information in those variables. - */ - - for (i = 0, j = 4; j < objc; i++, j++) { - char *result; - char *currentString = Tcl_GetString(objv[j]); - - Tcl_RegExpRange((Tcl_RegExp) regExpr, i, &start, &end); - if (start == NULL) { - result = Tcl_SetVar(interp, currentString, "", 0); - } else { - char savedChar, *first, *last; - char *tempString = Tcl_GetString(objv[3]); - first = tempString + (start - string); - last = tempString + (end - string); - if (first == last) { /* don't modify argument */ - result = Tcl_SetVar(interp, currentString, "", 0); - } else { - savedChar = *last; - *last = 0; - result = Tcl_SetVar(interp, currentString, first, 0); - *last = savedChar; - } - } - if (result == NULL) { - Tcl_AppendResult(interp, "couldn't set variable \"", - currentString, "\"", (char *) NULL); - return TCL_ERROR; - } - } - - /* - * Set the interpreter's object result to an integer object w/ value 1. - */ - - Tcl_SetIntObj(Tcl_GetObjResult(interp), 1); - return TCL_OK; -} - -/* - *---------------------------------------------------------------------- - * - * RegGetCompFlags -- - * - * Internal interface to regular expression compile flags. - * Converts a string of chars to a single flag. - * - * Results: - * Returns a flags for regular expression compilation. - * - * Side effects: - * None. - * - *---------------------------------------------------------------------- - */ -static int -RegGetCompFlags(s) - char *s; -{ - char c; - register char *p; - int result = REG_ADVANCED; - - for (p = s; (c = *p) != '\0'; p++) - switch (c) { - case 'a': - result |= REG_ADVF; - break; - case 'b': - result &= ~REG_ADVANCED; - break; - case 'e': - result &= ~REG_ADVF; - result |= REG_EXTENDED; - break; - case 'i': - result |= REG_ICASE; - break; - case 'm': - case 'n': - result |= REG_NEWLINE; - break; - case 'p': - result |= REG_NLSTOP; - break; - case 'q': - result &= ~REG_ADVANCED; - result |= REG_QUOTE; - break; - case 's': - result |= REG_NOSUB; - break; - case 'w': - result |= REG_NLANCH; - break; - case 'x': - result |= REG_EXPANDED; - break; - case '+': - result |= REG_FAKE; - break; - case ',': - result |= REG_PROGRESS; - break; - } - return result; -} - -/* - *---------------------------------------------------------------------- - * - * RegGetExecFlags -- - * - * Internal interface to regular expression exec flags. - * Converts a string of chars to a single flag. - * - * Results: - * Returns a flags for regular expression matching. - * - * Side effects: - * None. - * - *---------------------------------------------------------------------- - */ -static int -RegGetExecFlags(s) - char *s; -{ - char c; - register char *p; - int result = 0; - - for (p = s; (c = *p) != '\0'; p++) - switch (c) { - case '^': - result |= REG_NOTBOL; - break; - case '$': - result |= REG_NOTEOL; - break; - case ';': - result |= REG_FTRACE; - break; - case ':': - result |= REG_MTRACE; - break; - case '.': - result |= REG_SMALL; - break; - case '/': - return -1; - } - return result; -} - -/* - *---------------------------------------------------------------------- - * * TestsetassocdataCmd -- * * This procedure implements the "testsetassocdata" command. It is used |