diff options
author | dkf <donal.k.fellows@manchester.ac.uk> | 2005-11-09 16:36:14 (GMT) |
---|---|---|
committer | dkf <donal.k.fellows@manchester.ac.uk> | 2005-11-09 16:36:14 (GMT) |
commit | 133fb4a09f75b408471dd65c981eb8842ae92c76 (patch) | |
tree | 31171954bc88cbb4b8756f647f2f878267ce4207 /generic | |
parent | 93b10873c6edf504fe3aa5358bf31b80a6bd97b1 (diff) | |
download | tcl-133fb4a09f75b408471dd65c981eb8842ae92c76.zip tcl-133fb4a09f75b408471dd65c981eb8842ae92c76.tar.gz tcl-133fb4a09f75b408471dd65c981eb8842ae92c76.tar.bz2 |
ANSIfy the RE compiler.
Diffstat (limited to 'generic')
-rw-r--r-- | generic/regc_color.c | 1255 | ||||
-rw-r--r-- | generic/regc_cvec.c | 111 | ||||
-rw-r--r-- | generic/regc_lex.c | 1804 | ||||
-rw-r--r-- | generic/regc_locale.c | 777 | ||||
-rw-r--r-- | generic/regc_nfa.c | 2436 | ||||
-rw-r--r-- | generic/regcomp.c | 3496 | ||||
-rw-r--r-- | generic/regex.h | 143 | ||||
-rw-r--r-- | generic/regguts.h | 353 |
8 files changed, 5523 insertions, 4852 deletions
diff --git a/generic/regc_color.c b/generic/regc_color.c index 5aed21c..2c4d97e 100644 --- a/generic/regc_color.c +++ b/generic/regc_color.c @@ -3,20 +3,20 @@ * This file is #included by regcomp.c. * * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. - * + * * Development of this software was funded, in part, by Cray Research Inc., * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics - * Corporation, none of whom are responsible for the results. The author - * thanks all of them. - * + * Corporation, none of whom are responsible for the results. The author + * thanks all of them. + * * Redistribution and use in source and binary forms -- with or without * modification -- are permitted for any purpose, provided that * redistributions in source form retain this entire copyright notice and * indicate the origin and nature of any modifications. - * - * I'd appreciate being given credit for this package in the documentation - * of software which uses it, but that is not a requirement. - * + * + * I'd appreciate being given credit for this package in the documentation of + * software which uses it, but that is not a requirement. + * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL @@ -28,661 +28,734 @@ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * - * - * Note that there are some incestuous relationships between this code and - * NFA arc maintenance, which perhaps ought to be cleaned up sometime. + * Note that there are some incestuous relationships between this code and NFA + * arc maintenance, which perhaps ought to be cleaned up sometime. */ - - #define CISERR() VISERR(cm->v) #define CERR(e) VERR(cm->v, (e)) - - - + /* - initcm - set up new colormap ^ static VOID initcm(struct vars *, struct colormap *); */ -static VOID -initcm(v, cm) -struct vars *v; -struct colormap *cm; +static void +initcm( + struct vars *v, + struct colormap *cm) { - int i; - int j; - union tree *t; - union tree *nextt; - struct colordesc *cd; - - cm->magic = CMMAGIC; - cm->v = v; - - cm->ncds = NINLINECDS; - cm->cd = cm->cdspace; - cm->max = 0; - cm->free = 0; - - cd = cm->cd; /* cm->cd[WHITE] */ - cd->sub = NOSUB; - cd->arcs = NULL; - cd->flags = 0; - cd->nchrs = CHR_MAX - CHR_MIN + 1; - - /* upper levels of tree */ - for (t = &cm->tree[0], j = NBYTS-1; j > 0; t = nextt, j--) { - nextt = t + 1; - for (i = BYTTAB-1; i >= 0; i--) - t->tptr[i] = nextt; + int i; + int j; + union tree *t; + union tree *nextt; + struct colordesc *cd; + + cm->magic = CMMAGIC; + cm->v = v; + + cm->ncds = NINLINECDS; + cm->cd = cm->cdspace; + cm->max = 0; + cm->free = 0; + + cd = cm->cd; /* cm->cd[WHITE] */ + cd->sub = NOSUB; + cd->arcs = NULL; + cd->flags = 0; + cd->nchrs = CHR_MAX - CHR_MIN + 1; + + /* + * Upper levels of tree. + */ + + for (t = &cm->tree[0], j = NBYTS-1; j > 0; t = nextt, j--) { + nextt = t + 1; + for (i = BYTTAB-1; i >= 0; i--) { + t->tptr[i] = nextt; } - /* bottom level is solid white */ - t = &cm->tree[NBYTS-1]; - for (i = BYTTAB-1; i >= 0; i--) - t->tcolor[i] = WHITE; - cd->block = t; -} + } + + /* + * Bottom level is solid white. + */ + t = &cm->tree[NBYTS-1]; + for (i = BYTTAB-1; i >= 0; i--) { + t->tcolor[i] = WHITE; + } + cd->block = t; +} + /* - freecm - free dynamically-allocated things in a colormap ^ static VOID freecm(struct colormap *); */ -static VOID -freecm(cm) -struct colormap *cm; +static void +freecm( + struct colormap *cm) { - size_t i; - union tree *cb; - - cm->magic = 0; - if (NBYTS > 1) - cmtreefree(cm, cm->tree, 0); - for (i = 1; i <= cm->max; i++) /* skip WHITE */ - if (!UNUSEDCOLOR(&cm->cd[i])) { - cb = cm->cd[i].block; - if (cb != NULL) - FREE(cb); - } - if (cm->cd != cm->cdspace) - FREE(cm->cd); + size_t i; + union tree *cb; + + cm->magic = 0; + if (NBYTS > 1) { + cmtreefree(cm, cm->tree, 0); + } + for (i = 1; i <= cm->max; i++) { /* skip WHITE */ + if (!UNUSEDCOLOR(&cm->cd[i])) { + cb = cm->cd[i].block; + if (cb != NULL) { + FREE(cb); + } + } + } + if (cm->cd != cm->cdspace) { + FREE(cm->cd); + } } - + /* - cmtreefree - free a non-terminal part of a colormap tree ^ static VOID cmtreefree(struct colormap *, union tree *, int); */ -static VOID -cmtreefree(cm, tree, level) -struct colormap *cm; -union tree *tree; -int level; /* level number (top == 0) of this block */ +static void +cmtreefree( + struct colormap *cm, + union tree *tree, + int level) /* level number (top == 0) of this + * block */ { - int i; - union tree *t; - union tree *fillt = &cm->tree[level+1]; - union tree *cb; - - assert(level < NBYTS-1); /* this level has pointers */ - for (i = BYTTAB-1; i >= 0; i--) { - t = tree->tptr[i]; - assert(t != NULL); - if (t != fillt) { - if (level < NBYTS-2) { /* more pointer blocks below */ - cmtreefree(cm, t, level+1); - FREE(t); - } else { /* color block below */ - cb = cm->cd[t->tcolor[0]].block; - if (t != cb) /* not a solid block */ - FREE(t); - } + int i; + union tree *t; + union tree *fillt = &cm->tree[level+1]; + union tree *cb; + + assert(level < NBYTS-1); /* this level has pointers */ + for (i = BYTTAB-1; i >= 0; i--) { + t = tree->tptr[i]; + assert(t != NULL); + if (t != fillt) { + if (level < NBYTS-2) { /* more pointer blocks below */ + cmtreefree(cm, t, level+1); + FREE(t); + } else { /* color block below */ + cb = cm->cd[t->tcolor[0]].block; + if (t != cb) { /* not a solid block */ + FREE(t); } + } } + } } - + /* - setcolor - set the color of a character in a colormap ^ static color setcolor(struct colormap *, pchr, pcolor); */ -static color /* previous color */ -setcolor(cm, c, co) -struct colormap *cm; -pchr c; -pcolor co; +static color /* previous color */ +setcolor( + struct colormap *cm, + pchr c, + pcolor co) { - uchr uc = c; - int shift; - int level; - int b; - int bottom; - union tree *t; - union tree *newt; - union tree *fillt; - union tree *lastt; - union tree *cb; - color prev; - - assert(cm->magic == CMMAGIC); - if (CISERR() || co == COLORLESS) + uchr uc = c; + int shift; + int level; + int b; + int bottom; + union tree *t; + union tree *newt; + union tree *fillt; + union tree *lastt; + union tree *cb; + color prev; + + assert(cm->magic == CMMAGIC); + if (CISERR() || co == COLORLESS) { + return COLORLESS; + } + + t = cm->tree; + for (level = 0, shift = BYTBITS * (NBYTS - 1); shift > 0; + level++, shift -= BYTBITS) { + b = (uc >> shift) & BYTMASK; + lastt = t; + t = lastt->tptr[b]; + assert(t != NULL); + fillt = &cm->tree[level+1]; + bottom = (shift <= BYTBITS) ? 1 : 0; + cb = (bottom) ? cm->cd[t->tcolor[0]].block : fillt; + if (t == fillt || t == cb) { /* must allocate a new block */ + newt = (union tree *)MALLOC((bottom) ? + sizeof(struct colors) : sizeof(struct ptrs)); + if (newt == NULL) { + CERR(REG_ESPACE); return COLORLESS; - - t = cm->tree; - for (level = 0, shift = BYTBITS * (NBYTS - 1); shift > 0; - level++, shift -= BYTBITS) { - b = (uc >> shift) & BYTMASK; - lastt = t; - t = lastt->tptr[b]; - assert(t != NULL); - fillt = &cm->tree[level+1]; - bottom = (shift <= BYTBITS) ? 1 : 0; - cb = (bottom) ? cm->cd[t->tcolor[0]].block : fillt; - if (t == fillt || t == cb) { /* must allocate a new block */ - newt = (union tree *)MALLOC((bottom) ? - sizeof(struct colors) : sizeof(struct ptrs)); - if (newt == NULL) { - CERR(REG_ESPACE); - return COLORLESS; - } - if (bottom) - memcpy(VS(newt->tcolor), VS(t->tcolor), - BYTTAB*sizeof(color)); - else - memcpy(VS(newt->tptr), VS(t->tptr), - BYTTAB*sizeof(union tree *)); - t = newt; - lastt->tptr[b] = t; - } + } + if (bottom) { + memcpy(VS(newt->tcolor), VS(t->tcolor), + BYTTAB*sizeof(color)); + } else { + memcpy(VS(newt->tptr), VS(t->tptr), + BYTTAB*sizeof(union tree *)); + } + t = newt; + lastt->tptr[b] = t; } + } - b = uc & BYTMASK; - prev = t->tcolor[b]; - t->tcolor[b] = (color)co; - return prev; + b = uc & BYTMASK; + prev = t->tcolor[b]; + t->tcolor[b] = (color)co; + return prev; } - + /* - maxcolor - report largest color number in use ^ static color maxcolor(struct colormap *); */ static color -maxcolor(cm) -struct colormap *cm; +maxcolor( + struct colormap *cm) { - if (CISERR()) - return COLORLESS; + if (CISERR()) { + return COLORLESS; + } - return (color)cm->max; + return (color)cm->max; } - + /* - newcolor - find a new color (must be subject of setcolor at once) * Beware: may relocate the colordescs. ^ static color newcolor(struct colormap *); */ -static color /* COLORLESS for error */ -newcolor(cm) -struct colormap *cm; +static color /* COLORLESS for error */ +newcolor( + struct colormap *cm) { - struct colordesc *cd; - struct colordesc *new; - size_t n; - - if (CISERR()) - return COLORLESS; - - if (cm->free != 0) { - assert(cm->free > 0); - assert((size_t)cm->free < cm->ncds); - cd = &cm->cd[cm->free]; - assert(UNUSEDCOLOR(cd)); - assert(cd->arcs == NULL); - cm->free = cd->sub; - } else if (cm->max < cm->ncds - 1) { - cm->max++; - cd = &cm->cd[cm->max]; + struct colordesc *cd; + struct colordesc *new; + size_t n; + + if (CISERR()) { + return COLORLESS; + } + + if (cm->free != 0) { + assert(cm->free > 0); + assert((size_t)cm->free < cm->ncds); + cd = &cm->cd[cm->free]; + assert(UNUSEDCOLOR(cd)); + assert(cd->arcs == NULL); + cm->free = cd->sub; + } else if (cm->max < cm->ncds - 1) { + cm->max++; + cd = &cm->cd[cm->max]; + } else { + /* + * Oops, must allocate more. + */ + + n = cm->ncds * 2; + if (cm->cd == cm->cdspace) { + new = (struct colordesc *)MALLOC(n * sizeof(struct colordesc)); + if (new != NULL) { + memcpy(VS(new), VS(cm->cdspace), + cm->ncds * sizeof(struct colordesc)); + } } else { - /* oops, must allocate more */ - n = cm->ncds * 2; - if (cm->cd == cm->cdspace) { - new = (struct colordesc *)MALLOC(n * - sizeof(struct colordesc)); - if (new != NULL) - memcpy(VS(new), VS(cm->cdspace), cm->ncds * - sizeof(struct colordesc)); - } else - new = (struct colordesc *)REALLOC(cm->cd, - n * sizeof(struct colordesc)); - if (new == NULL) { - CERR(REG_ESPACE); - return COLORLESS; - } - cm->cd = new; - cm->ncds = n; - assert(cm->max < cm->ncds - 1); - cm->max++; - cd = &cm->cd[cm->max]; + new = (struct colordesc *)REALLOC(cm->cd, + n * sizeof(struct colordesc)); } - - cd->nchrs = 0; - cd->sub = NOSUB; - cd->arcs = NULL; - cd->flags = 0; - cd->block = NULL; - - return (color)(cd - cm->cd); + if (new == NULL) { + CERR(REG_ESPACE); + return COLORLESS; + } + cm->cd = new; + cm->ncds = n; + assert(cm->max < cm->ncds - 1); + cm->max++; + cd = &cm->cd[cm->max]; + } + + cd->nchrs = 0; + cd->sub = NOSUB; + cd->arcs = NULL; + cd->flags = 0; + cd->block = NULL; + + return (color)(cd - cm->cd); } - + /* - freecolor - free a color (must have no arcs or subcolor) ^ static VOID freecolor(struct colormap *, pcolor); */ -static VOID -freecolor(cm, co) -struct colormap *cm; -pcolor co; +static void +freecolor( + struct colormap *cm, + pcolor co) { - struct colordesc *cd = &cm->cd[co]; - color pco, nco; /* for freelist scan */ - - assert(co >= 0); - if (co == WHITE) - return; - - assert(cd->arcs == NULL); - assert(cd->sub == NOSUB); - assert(cd->nchrs == 0); - cd->flags = FREECOL; - if (cd->block != NULL) { - FREE(cd->block); - cd->block = NULL; /* just paranoia */ + struct colordesc *cd = &cm->cd[co]; + color pco, nco; /* for freelist scan */ + + assert(co >= 0); + if (co == WHITE) { + return; + } + + assert(cd->arcs == NULL); + assert(cd->sub == NOSUB); + assert(cd->nchrs == 0); + cd->flags = FREECOL; + if (cd->block != NULL) { + FREE(cd->block); + cd->block = NULL; /* just paranoia */ + } + + if ((size_t)co == cm->max) { + while (cm->max > WHITE && UNUSEDCOLOR(&cm->cd[cm->max])) { + cm->max--; } - - if ((size_t)co == cm->max) { - while (cm->max > WHITE && UNUSEDCOLOR(&cm->cd[cm->max])) - cm->max--; - assert(cm->free >= 0); - while ((size_t)cm->free > cm->max) - cm->free = cm->cd[cm->free].sub; - if (cm->free > 0) { - assert(cm->free < cm->max); - pco = cm->free; - nco = cm->cd[pco].sub; - while (nco > 0) - if ((size_t)nco > cm->max) { - /* take this one out of freelist */ - nco = cm->cd[nco].sub; - cm->cd[pco].sub = nco; - } else { - assert(nco < cm->max); - pco = nco; - nco = cm->cd[pco].sub; - } + assert(cm->free >= 0); + while ((size_t)cm->free > cm->max) { + cm->free = cm->cd[cm->free].sub; + } + if (cm->free > 0) { + assert(cm->free < cm->max); + pco = cm->free; + nco = cm->cd[pco].sub; + while (nco > 0) { + if ((size_t)nco > cm->max) { + /* + * Take this one out of freelist. + */ + + nco = cm->cd[nco].sub; + cm->cd[pco].sub = nco; + } else { + assert(nco < cm->max); + pco = nco; + nco = cm->cd[pco].sub; } - } else { - cd->sub = cm->free; - cm->free = (color)(cd - cm->cd); + } } + } else { + cd->sub = cm->free; + cm->free = (color)(cd - cm->cd); + } } - + /* - pseudocolor - allocate a false color, to be managed by other means ^ static color pseudocolor(struct colormap *); */ static color -pseudocolor(cm) -struct colormap *cm; +pseudocolor( + struct colormap *cm) { - color co; - - co = newcolor(cm); - if (CISERR()) - return COLORLESS; - cm->cd[co].nchrs = 1; - cm->cd[co].flags = PSEUDO; - return co; + color co; + + co = newcolor(cm); + if (CISERR()) { + return COLORLESS; + } + cm->cd[co].nchrs = 1; + cm->cd[co].flags = PSEUDO; + return co; } - + /* - subcolor - allocate a new subcolor (if necessary) to this chr ^ static color subcolor(struct colormap *, pchr c); */ static color -subcolor(cm, c) -struct colormap *cm; -pchr c; +subcolor( + struct colormap *cm, + pchr c) { - color co; /* current color of c */ - color sco; /* new subcolor */ - - co = GETCOLOR(cm, c); - sco = newsub(cm, co); - if (CISERR()) - return COLORLESS; - assert(sco != COLORLESS); - - if (co == sco) /* already in an open subcolor */ - return co; /* rest is redundant */ - cm->cd[co].nchrs--; - cm->cd[sco].nchrs++; - setcolor(cm, c, sco); - return sco; + color co; /* current color of c */ + color sco; /* new subcolor */ + + co = GETCOLOR(cm, c); + sco = newsub(cm, co); + if (CISERR()) { + return COLORLESS; + } + assert(sco != COLORLESS); + + if (co == sco) { /* already in an open subcolor */ + return co; /* rest is redundant */ + } + cm->cd[co].nchrs--; + cm->cd[sco].nchrs++; + setcolor(cm, c, sco); + return sco; } - + /* - newsub - allocate a new subcolor (if necessary) for a color ^ static color newsub(struct colormap *, pcolor); */ static color -newsub(cm, co) -struct colormap *cm; -pcolor co; +newsub( + struct colormap *cm, + pcolor co) { - color sco; /* new subcolor */ - - sco = cm->cd[co].sub; - if (sco == NOSUB) { /* color has no open subcolor */ - if (cm->cd[co].nchrs == 1) /* optimization */ - return co; - sco = newcolor(cm); /* must create subcolor */ - if (sco == COLORLESS) { - assert(CISERR()); - return COLORLESS; - } - cm->cd[co].sub = sco; - cm->cd[sco].sub = sco; /* open subcolor points to self */ + color sco; /* new subcolor */ + + sco = cm->cd[co].sub; + if (sco == NOSUB) { /* color has no open subcolor */ + if (cm->cd[co].nchrs == 1) { /* optimization */ + return co; } - assert(sco != NOSUB); + sco = newcolor(cm); /* must create subcolor */ + if (sco == COLORLESS) { + assert(CISERR()); + return COLORLESS; + } + cm->cd[co].sub = sco; + cm->cd[sco].sub = sco; /* open subcolor points to self */ + } + assert(sco != NOSUB); - return sco; + return sco; } - + /* - subrange - allocate new subcolors to this range of chrs, fill in arcs ^ static VOID subrange(struct vars *, pchr, pchr, struct state *, ^ struct state *); */ -static VOID -subrange(v, from, to, lp, rp) -struct vars *v; -pchr from; -pchr to; -struct state *lp; -struct state *rp; +static void +subrange( + struct vars *v, + pchr from, + pchr to, + struct state *lp, + struct state *rp) { - uchr uf; - int i; - - assert(from <= to); - - /* first, align "from" on a tree-block boundary */ - uf = (uchr)from; - i = (int)( ((uf + BYTTAB-1) & (uchr)~BYTMASK) - uf ); - for (; from <= to && i > 0; i--, from++) - newarc(v->nfa, PLAIN, subcolor(v->cm, from), lp, rp); - if (from > to) /* didn't reach a boundary */ - return; - - /* deal with whole blocks */ - for (; to - from >= BYTTAB; from += BYTTAB) - subblock(v, from, lp, rp); - - /* clean up any remaining partial table */ - for (; from <= to; from++) - newarc(v->nfa, PLAIN, subcolor(v->cm, from), lp, rp); + uchr uf; + int i; + + assert(from <= to); + + /* + * First, align "from" on a tree-block boundary + */ + + uf = (uchr)from; + i = (int)( ((uf + BYTTAB-1) & (uchr)~BYTMASK) - uf ); + for (; from <= to && i > 0; i--, from++) { + newarc(v->nfa, PLAIN, subcolor(v->cm, from), lp, rp); + } + if (from > to) { /* didn't reach a boundary */ + return; + } + + /* + * Deal with whole blocks. + */ + + for (; to - from >= BYTTAB; from += BYTTAB) { + subblock(v, from, lp, rp); + } + + /* + * Clean up any remaining partial table. + */ + + for (; from <= to; from++) { + newarc(v->nfa, PLAIN, subcolor(v->cm, from), lp, rp); + } } - + /* - subblock - allocate new subcolors for one tree block of chrs, fill in arcs ^ static VOID subblock(struct vars *, pchr, struct state *, struct state *); */ -static VOID -subblock(v, start, lp, rp) -struct vars *v; -pchr start; /* first of BYTTAB chrs */ -struct state *lp; -struct state *rp; +static void +subblock( + struct vars *v, + pchr start, /* first of BYTTAB chrs */ + struct state *lp, + struct state *rp) { - uchr uc = start; - struct colormap *cm = v->cm; - int shift; - int level; - int i; - int b; - union tree *t; - union tree *cb; - union tree *fillt; - union tree *lastt; - int previ; - int ndone; - color co; - color sco; - - assert((uc % BYTTAB) == 0); - - /* find its color block, making new pointer blocks as needed */ - t = cm->tree; - fillt = NULL; - for (level = 0, shift = BYTBITS * (NBYTS - 1); shift > 0; - level++, shift -= BYTBITS) { - b = (uc >> shift) & BYTMASK; - lastt = t; - t = lastt->tptr[b]; - assert(t != NULL); - fillt = &cm->tree[level+1]; - if (t == fillt && shift > BYTBITS) { /* need new ptr block */ - t = (union tree *)MALLOC(sizeof(struct ptrs)); - if (t == NULL) { - CERR(REG_ESPACE); - return; - } - memcpy(VS(t->tptr), VS(fillt->tptr), - BYTTAB*sizeof(union tree *)); - lastt->tptr[b] = t; - } + uchr uc = start; + struct colormap *cm = v->cm; + int shift; + int level; + int i; + int b; + union tree *t; + union tree *cb; + union tree *fillt; + union tree *lastt; + int previ; + int ndone; + color co; + color sco; + + assert((uc % BYTTAB) == 0); + + /* + * Find its color block, making new pointer blocks as needed. + */ + + t = cm->tree; + fillt = NULL; + for (level = 0, shift = BYTBITS * (NBYTS - 1); shift > 0; + level++, shift -= BYTBITS) { + b = (uc >> shift) & BYTMASK; + lastt = t; + t = lastt->tptr[b]; + assert(t != NULL); + fillt = &cm->tree[level+1]; + if (t == fillt && shift > BYTBITS) { /* need new ptr block */ + t = (union tree *)MALLOC(sizeof(struct ptrs)); + if (t == NULL) { + CERR(REG_ESPACE); + return; + } + memcpy(VS(t->tptr), VS(fillt->tptr), + BYTTAB*sizeof(union tree *)); + lastt->tptr[b] = t; } + } + + /* + * Special cases: fill block or solid block. + */ + co = t->tcolor[0]; + cb = cm->cd[co].block; + if (t == fillt || t == cb) { + /* + * Either way, we want a subcolor solid block. + */ - /* special cases: fill block or solid block */ - co = t->tcolor[0]; - cb = cm->cd[co].block; - if (t == fillt || t == cb) { - /* either way, we want a subcolor solid block */ - sco = newsub(cm, co); - t = cm->cd[sco].block; - if (t == NULL) { /* must set it up */ - t = (union tree *)MALLOC(sizeof(struct colors)); - if (t == NULL) { - CERR(REG_ESPACE); - return; - } - for (i = 0; i < BYTTAB; i++) - t->tcolor[i] = sco; - cm->cd[sco].block = t; - } - /* find loop must have run at least once */ - lastt->tptr[b] = t; - newarc(v->nfa, PLAIN, sco, lp, rp); - cm->cd[co].nchrs -= BYTTAB; - cm->cd[sco].nchrs += BYTTAB; + sco = newsub(cm, co); + t = cm->cd[sco].block; + if (t == NULL) { /* must set it up */ + t = (union tree *)MALLOC(sizeof(struct colors)); + if (t == NULL) { + CERR(REG_ESPACE); return; + } + for (i = 0; i < BYTTAB; i++) { + t->tcolor[i] = sco; + } + cm->cd[sco].block = t; } - /* general case, a mixed block to be altered */ - i = 0; - while (i < BYTTAB) { - co = t->tcolor[i]; - sco = newsub(cm, co); - newarc(v->nfa, PLAIN, sco, lp, rp); - previ = i; - do { - t->tcolor[i++] = sco; - } while (i < BYTTAB && t->tcolor[i] == co); - ndone = i - previ; - cm->cd[co].nchrs -= ndone; - cm->cd[sco].nchrs += ndone; - } -} + /* + * Find loop must have run at least once. + */ + + lastt->tptr[b] = t; + newarc(v->nfa, PLAIN, sco, lp, rp); + cm->cd[co].nchrs -= BYTTAB; + cm->cd[sco].nchrs += BYTTAB; + return; + } + /* + * General case, a mixed block to be altered. + */ + + i = 0; + while (i < BYTTAB) { + co = t->tcolor[i]; + sco = newsub(cm, co); + newarc(v->nfa, PLAIN, sco, lp, rp); + previ = i; + do { + t->tcolor[i++] = sco; + } while (i < BYTTAB && t->tcolor[i] == co); + ndone = i - previ; + cm->cd[co].nchrs -= ndone; + cm->cd[sco].nchrs += ndone; + } +} + /* - okcolors - promote subcolors to full colors ^ static VOID okcolors(struct nfa *, struct colormap *); */ -static VOID -okcolors(nfa, cm) -struct nfa *nfa; -struct colormap *cm; +static void +okcolors( + struct nfa *nfa, + struct colormap *cm) { - struct colordesc *cd; - struct colordesc *end = CDEND(cm); - struct colordesc *scd; - struct arc *a; - color co; - color sco; - - for (cd = cm->cd, co = 0; cd < end; cd++, co++) { - sco = cd->sub; - if (UNUSEDCOLOR(cd) || sco == NOSUB) { - /* has no subcolor, no further action */ - } else if (sco == co) { - /* is subcolor, let parent deal with it */ - } else if (cd->nchrs == 0) { - /* parent empty, its arcs change color to subcolor */ - cd->sub = NOSUB; - scd = &cm->cd[sco]; - assert(scd->nchrs > 0); - assert(scd->sub == sco); - scd->sub = NOSUB; - while ((a = cd->arcs) != NULL) { - assert(a->co == co); - /* uncolorchain(cm, a); */ - cd->arcs = a->colorchain; - a->co = sco; - /* colorchain(cm, a); */ - a->colorchain = scd->arcs; - scd->arcs = a; - } - freecolor(cm, co); - } else { - /* parent's arcs must gain parallel subcolor arcs */ - cd->sub = NOSUB; - scd = &cm->cd[sco]; - assert(scd->nchrs > 0); - assert(scd->sub == sco); - scd->sub = NOSUB; - for (a = cd->arcs; a != NULL; a = a->colorchain) { - assert(a->co == co); - newarc(nfa, a->type, sco, a->from, a->to); - } - } + struct colordesc *cd; + struct colordesc *end = CDEND(cm); + struct colordesc *scd; + struct arc *a; + color co; + color sco; + + for (cd = cm->cd, co = 0; cd < end; cd++, co++) { + sco = cd->sub; + if (UNUSEDCOLOR(cd) || sco == NOSUB) { + /* + * Has no subcolor, no further action. + */ + } else if (sco == co) { + /* + * Is subcolor, let parent deal with it. + */ + } else if (cd->nchrs == 0) { + /* + * Parent empty, its arcs change color to subcolor. + */ + + cd->sub = NOSUB; + scd = &cm->cd[sco]; + assert(scd->nchrs > 0); + assert(scd->sub == sco); + scd->sub = NOSUB; + while ((a = cd->arcs) != NULL) { + assert(a->co == co); + /* uncolorchain(cm, a); */ + cd->arcs = a->colorchain; + a->co = sco; + /* colorchain(cm, a); */ + a->colorchain = scd->arcs; + scd->arcs = a; + } + freecolor(cm, co); + } else { + /* + * Parent's arcs must gain parallel subcolor arcs. + */ + + cd->sub = NOSUB; + scd = &cm->cd[sco]; + assert(scd->nchrs > 0); + assert(scd->sub == sco); + scd->sub = NOSUB; + for (a = cd->arcs; a != NULL; a = a->colorchain) { + assert(a->co == co); + newarc(nfa, a->type, sco, a->from, a->to); + } } + } } - + /* - colorchain - add this arc to the color chain of its color ^ static VOID colorchain(struct colormap *, struct arc *); */ -static VOID -colorchain(cm, a) -struct colormap *cm; -struct arc *a; +static void +colorchain( + struct colormap *cm, + struct arc *a) { - struct colordesc *cd = &cm->cd[a->co]; + struct colordesc *cd = &cm->cd[a->co]; - a->colorchain = cd->arcs; - cd->arcs = a; + a->colorchain = cd->arcs; + cd->arcs = a; } - + /* - uncolorchain - delete this arc from the color chain of its color ^ static VOID uncolorchain(struct colormap *, struct arc *); */ -static VOID -uncolorchain(cm, a) -struct colormap *cm; -struct arc *a; +static void +uncolorchain( + struct colormap *cm, + struct arc *a) { - struct colordesc *cd = &cm->cd[a->co]; - struct arc *aa; - - aa = cd->arcs; - if (aa == a) /* easy case */ - cd->arcs = a->colorchain; - else { - for (; aa != NULL && aa->colorchain != a; aa = aa->colorchain) - continue; - assert(aa != NULL); - aa->colorchain = a->colorchain; + struct colordesc *cd = &cm->cd[a->co]; + struct arc *aa; + + aa = cd->arcs; + if (aa == a) { /* easy case */ + cd->arcs = a->colorchain; + } else { + for (; aa != NULL && aa->colorchain != a; aa = aa->colorchain) { + continue; } - a->colorchain = NULL; /* paranoia */ + assert(aa != NULL); + aa->colorchain = a->colorchain; + } + a->colorchain = NULL; /* paranoia */ } - + /* - singleton - is this character in its own color? ^ static int singleton(struct colormap *, pchr c); */ -static int /* predicate */ -singleton(cm, c) -struct colormap *cm; -pchr c; +static int /* predicate */ +singleton( + struct colormap *cm, + pchr c) { - color co; /* color of c */ + color co; /* color of c */ - co = GETCOLOR(cm, c); - if (cm->cd[co].nchrs == 1 && cm->cd[co].sub == NOSUB) - return 1; - return 0; + co = GETCOLOR(cm, c); + if (cm->cd[co].nchrs == 1 && cm->cd[co].sub == NOSUB) { + return 1; + } + return 0; } - + /* - rainbow - add arcs of all full colors (but one) between specified states ^ static VOID rainbow(struct nfa *, struct colormap *, int, pcolor, ^ struct state *, struct state *); */ -static VOID -rainbow(nfa, cm, type, but, from, to) -struct nfa *nfa; -struct colormap *cm; -int type; -pcolor but; /* COLORLESS if no exceptions */ -struct state *from; -struct state *to; +static void +rainbow( + struct nfa *nfa, + struct colormap *cm, + int type, + pcolor but, /* COLORLESS if no exceptions */ + struct state *from, + struct state *to) { - struct colordesc *cd; - struct colordesc *end = CDEND(cm); - color co; - - for (cd = cm->cd, co = 0; cd < end && !CISERR(); cd++, co++) - if (!UNUSEDCOLOR(cd) && cd->sub != co && co != but && - !(cd->flags&PSEUDO)) - newarc(nfa, type, co, from, to); + struct colordesc *cd; + struct colordesc *end = CDEND(cm); + color co; + + for (cd = cm->cd, co = 0; cd < end && !CISERR(); cd++, co++) { + if (!UNUSEDCOLOR(cd) && (cd->sub != co) && (co != but) + && !(cd->flags&PSEUDO)) { + newarc(nfa, type, co, from, to); + } + } } - + /* - colorcomplement - add arcs of complementary colors * The calling sequence ought to be reconciled with cloneouts(). ^ static VOID colorcomplement(struct nfa *, struct colormap *, int, ^ struct state *, struct state *, struct state *); */ -static VOID -colorcomplement(nfa, cm, type, of, from, to) -struct nfa *nfa; -struct colormap *cm; -int type; -struct state *of; /* complements of this guy's PLAIN outarcs */ -struct state *from; -struct state *to; +static void +colorcomplement( + struct nfa *nfa, + struct colormap *cm, + int type, + struct state *of, /* complements of this guy's PLAIN + * outarcs */ + struct state *from, + struct state *to) { - struct colordesc *cd; - struct colordesc *end = CDEND(cm); - color co; - - assert(of != from); - for (cd = cm->cd, co = 0; cd < end && !CISERR(); cd++, co++) - if (!UNUSEDCOLOR(cd) && !(cd->flags&PSEUDO)) - if (findarc(of, PLAIN, co) == NULL) - newarc(nfa, type, co, from, to); + struct colordesc *cd; + struct colordesc *end = CDEND(cm); + color co; + + assert(of != from); + for (cd = cm->cd, co = 0; cd < end && !CISERR(); cd++, co++) { + if (!UNUSEDCOLOR(cd) && !(cd->flags&PSEUDO)) { + if (findarc(of, PLAIN, co) == NULL) { + newarc(nfa, type, co, from, to); + } + } + } } - - - + #ifdef REG_DEBUG /* ^ #ifdef REG_DEBUG @@ -692,87 +765,107 @@ struct state *to; - dumpcolors - debugging output ^ static VOID dumpcolors(struct colormap *, FILE *); */ -static VOID -dumpcolors(cm, f) -struct colormap *cm; -FILE *f; +static void +dumpcolors( + struct colormap *cm, + FILE *f) { - struct colordesc *cd; - struct colordesc *end; - color co; - chr c; - char *has; - - fprintf(f, "max %ld\n", (long)cm->max); - if (NBYTS > 1) - fillcheck(cm, cm->tree, 0, f); - end = CDEND(cm); - for (cd = cm->cd + 1, co = 1; cd < end; cd++, co++) /* skip 0 */ - if (!UNUSEDCOLOR(cd)) { - assert(cd->nchrs > 0); - has = (cd->block != NULL) ? "#" : ""; - if (cd->flags&PSEUDO) - fprintf(f, "#%2ld%s(ps): ", (long)co, has); - else - fprintf(f, "#%2ld%s(%2d): ", (long)co, - has, cd->nchrs); - /* it's hard to do this more efficiently */ - for (c = CHR_MIN; c < CHR_MAX; c++) - if (GETCOLOR(cm, c) == co) - dumpchr(c, f); - assert(c == CHR_MAX); - if (GETCOLOR(cm, c) == co) - dumpchr(c, f); - fprintf(f, "\n"); + struct colordesc *cd; + struct colordesc *end; + color co; + chr c; + char *has; + + fprintf(f, "max %ld\n", (long)cm->max); + if (NBYTS > 1) { + fillcheck(cm, cm->tree, 0, f); + } + end = CDEND(cm); + for (cd = cm->cd + 1, co = 1; cd < end; cd++, co++) { /* skip 0 */ + if (!UNUSEDCOLOR(cd)) { + assert(cd->nchrs > 0); + has = (cd->block != NULL) ? "#" : ""; + if (cd->flags&PSEUDO) { + fprintf(f, "#%2ld%s(ps): ", (long)co, has); + } else { + fprintf(f, "#%2ld%s(%2d): ", (long)co, has, cd->nchrs); + } + + /* + * It's hard to do this more efficiently. + */ + + for (c = CHR_MIN; c < CHR_MAX; c++) { + if (GETCOLOR(cm, c) == co) { + dumpchr(c, f); } + } + assert(c == CHR_MAX); + if (GETCOLOR(cm, c) == co) { + dumpchr(c, f); + } + fprintf(f, "\n"); + } + } } - + /* - fillcheck - check proper filling of a tree ^ static VOID fillcheck(struct colormap *, union tree *, int, FILE *); */ -static VOID -fillcheck(cm, tree, level, f) -struct colormap *cm; -union tree *tree; -int level; /* level number (top == 0) of this block */ -FILE *f; +static void +fillcheck( + struct colormap *cm, + union tree *tree, + int level, /* level number (top == 0) of this + * block */ + FILE *f) { - int i; - union tree *t; - union tree *fillt = &cm->tree[level+1]; - - assert(level < NBYTS-1); /* this level has pointers */ - for (i = BYTTAB-1; i >= 0; i--) { - t = tree->tptr[i]; - if (t == NULL) - fprintf(f, "NULL found in filled tree!\n"); - else if (t == fillt) - {} - else if (level < NBYTS-2) /* more pointer blocks below */ - fillcheck(cm, t, level+1, f); + int i; + union tree *t; + union tree *fillt = &cm->tree[level+1]; + + assert(level < NBYTS-1); /* this level has pointers */ + for (i = BYTTAB-1; i >= 0; i--) { + t = tree->tptr[i]; + if (t == NULL) { + fprintf(f, "NULL found in filled tree!\n"); + } else if (t == fillt) { + /* empty body */ + } else if (level < NBYTS-2) { /* more pointer blocks below */ + fillcheck(cm, t, level+1, f); } + } } - + /* - dumpchr - print a chr * Kind of char-centric but works well enough for debug use. ^ static VOID dumpchr(pchr, FILE *); */ -static VOID -dumpchr(c, f) -pchr c; -FILE *f; +static void +dumpchr( + pchr c, + FILE *f) { - if (c == '\\') - fprintf(f, "\\\\"); - else if (c > ' ' && c <= '~') - putc((char)c, f); - else - fprintf(f, "\\u%04lx", (long)c); + if (c == '\\') { + fprintf(f, "\\\\"); + } else if (c > ' ' && c <= '~') { + putc((char)c, f); + } else { + fprintf(f, "\\u%04lx", (long)c); + } } /* ^ #endif */ #endif /* ifdef REG_DEBUG */ + +/* + * Local Variables: + * mode: c + * c-basic-offset: 4 + * fill-column: 78 + * End: + */ diff --git a/generic/regc_cvec.c b/generic/regc_cvec.c index d2d56fc..f4c6dd0 100644 --- a/generic/regc_cvec.c +++ b/generic/regc_cvec.c @@ -3,20 +3,20 @@ * This file is #included by regcomp.c. * * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. - * + * * Development of this software was funded, in part, by Cray Research Inc., * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics - * Corporation, none of whom are responsible for the results. The author - * thanks all of them. - * + * Corporation, none of whom are responsible for the results. The author + * thanks all of them. + * * Redistribution and use in source and binary forms -- with or without * modification -- are permitted for any purpose, provided that * redistributions in source form retain this entire copyright notice and * indicate the origin and nature of any modifications. - * - * I'd appreciate being given credit for this package in the documentation - * of software which uses it, but that is not a requirement. - * + * + * I'd appreciate being given credit for this package in the documentation of + * software which uses it, but that is not a requirement. + * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL @@ -27,18 +27,17 @@ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * */ - + /* - newcvec - allocate a new cvec ^ static struct cvec *newcvec(int, int, int); */ static struct cvec * -newcvec(nchrs, nranges, nmcces) - int nchrs; /* to hold this many chrs... */ - int nranges; /* ... and this many ranges... */ - int nmcces; /* ... and this many MCCEs */ +newcvec( + int nchrs, /* to hold this many chrs... */ + int nranges, /* ... and this many ranges... */ + int nmcces) /* ... and this many MCCEs */ { size_t n; size_t nc; @@ -58,15 +57,15 @@ newcvec(nchrs, nranges, nmcces) cv->rangespace = nranges; return clearcvec(cv); } - + /* - clearcvec - clear a possibly-new cvec * Returns pointer as convenience. ^ static struct cvec *clearcvec(struct cvec *); */ static struct cvec * -clearcvec(cv) - struct cvec *cv; /* character vector */ +clearcvec( + struct cvec *cv) /* character vector */ { int i; @@ -82,45 +81,45 @@ clearcvec(cv) return cv; } - + /* - addchr - add a chr to a cvec ^ static VOID addchr(struct cvec *, pchr); */ -static VOID -addchr(cv, c) - struct cvec *cv; /* character vector */ - pchr c; /* character to add */ +static void +addchr( + struct cvec *cv, /* character vector */ + pchr c) /* character to add */ { assert(cv->nchrs < cv->chrspace - cv->nmccechrs); cv->chrs[cv->nchrs++] = (chr)c; } - + /* - addrange - add a range to a cvec ^ static VOID addrange(struct cvec *, pchr, pchr); */ -static VOID -addrange(cv, from, to) - struct cvec *cv; /* character vector */ - pchr from; /* first character of range */ - pchr to; /* last character of range */ +static void +addrange( + struct cvec *cv, /* character vector */ + pchr from, /* first character of range */ + pchr to) /* last character of range */ { assert(cv->nranges < cv->rangespace); cv->ranges[cv->nranges*2] = (chr)from; cv->ranges[cv->nranges*2 + 1] = (chr)to; cv->nranges++; } - + /* - addmcce - add an MCCE to a cvec ^ static VOID addmcce(struct cvec *, chr *, chr *); */ -static VOID -addmcce(cv, startp, endp) - struct cvec *cv; /* character vector */ - chr *startp; /* beginning of text */ - chr *endp; /* just past end of text */ +static void +addmcce( + struct cvec *cv, /* character vector */ + chr *startp, /* beginning of text */ + chr *endp) /* just past end of text */ { int len; int i; @@ -139,19 +138,19 @@ addmcce(cv, startp, endp) for (s = startp, i = len; i > 0; s++, i--) { *d++ = *s; } - *d++ = 0; /* endmarker */ + *d++ = 0; /* endmarker */ assert(d == &cv->chrs[cv->chrspace - cv->nmccechrs]); cv->nmccechrs += len + 1; } - + /* - haschr - does a cvec contain this chr? ^ static int haschr(struct cvec *, pchr); */ -static int /* predicate */ -haschr(cv, c) - struct cvec *cv; /* character vector */ - pchr c; /* character to test for */ +static int /* predicate */ +haschr( + struct cvec *cv, /* character vector */ + pchr c) /* character to test for */ { int i; chr *p; @@ -168,20 +167,20 @@ haschr(cv, c) } return 0; } - + /* - getcvec - get a cvec, remembering it as v->cv ^ static struct cvec *getcvec(struct vars *, int, int, int); */ static struct cvec * -getcvec(v, nchrs, nranges, nmcces) - struct vars *v; /* context */ - int nchrs; /* to hold this many chrs... */ - int nranges; /* ... and this many ranges... */ - int nmcces; /* ... and this many MCCEs */ +getcvec( + struct vars *v, /* context */ + int nchrs, /* to hold this many chrs... */ + int nranges, /* ... and this many ranges... */ + int nmcces) /* ... and this many MCCEs */ { - if (v->cv != NULL && nchrs <= v->cv->chrspace && - nranges <= v->cv->rangespace && nmcces <= v->cv->mccespace) { + if ((v->cv != NULL) && (nchrs <= v->cv->chrspace) && + (nranges <= v->cv->rangespace) && (nmcces <= v->cv->mccespace)) { return clearcvec(v->cv); } @@ -195,14 +194,22 @@ getcvec(v, nchrs, nranges, nmcces) return v->cv; } - + /* - freecvec - free a cvec ^ static VOID freecvec(struct cvec *); */ -static VOID -freecvec(cv) - struct cvec *cv; /* character vector */ +static void +freecvec( + struct cvec *cv) /* character vector */ { FREE(cv); } + +/* + * Local Variables: + * mode: c + * c-basic-offset: 4 + * fill-column: 78 + * End: + */ diff --git a/generic/regc_lex.c b/generic/regc_lex.c index 1acc3f4..cc02e9d 100644 --- a/generic/regc_lex.c +++ b/generic/regc_lex.c @@ -3,20 +3,20 @@ * This file is #included by regcomp.c. * * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. - * + * * Development of this software was funded, in part, by Cray Research Inc., * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics * Corporation, none of whom are responsible for the results. The author - * thanks all of them. - * + * thanks all of them. + * * Redistribution and use in source and binary forms -- with or without * modification -- are permitted for any purpose, provided that * redistributions in source form retain this entire copyright notice and * indicate the origin and nature of any modifications. - * - * I'd appreciate being given credit for this package in the documentation - * of software which uses it, but that is not a requirement. - * + * + * I'd appreciate being given credit for this package in the documentation of + * software which uses it, but that is not a requirement. + * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL @@ -27,7 +27,6 @@ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * */ /* scanning macros (know about v) */ @@ -35,9 +34,10 @@ #define HAVE(n) (v->stop - v->now >= (n)) #define NEXT1(c) (!ATEOS() && *v->now == CHR(c)) #define NEXT2(a,b) (HAVE(2) && *v->now == CHR(a) && *(v->now+1) == CHR(b)) -#define NEXT3(a,b,c) (HAVE(3) && *v->now == CHR(a) && \ - *(v->now+1) == CHR(b) && \ - *(v->now+2) == CHR(c)) +#define NEXT3(a,b,c) \ + (HAVE(3) && *v->now == CHR(a) && \ + *(v->now+1) == CHR(b) && \ + *(v->now+2) == CHR(c)) #define SET(c) (v->nexttype = (c)) #define SETV(c, n) (v->nexttype = (c), v->nextvalue = (n)) #define RET(c) return (SET(c), 1) @@ -60,804 +60,907 @@ /* construct pointer past end of chr array */ #define ENDOF(array) ((array) + sizeof(array)/sizeof(chr)) - + /* - lexstart - set up lexical stuff, scan leading options ^ static VOID lexstart(struct vars *); */ -static VOID -lexstart(v) -struct vars *v; +static void +lexstart( + struct vars *v) { - prefixes(v); /* may turn on new type bits etc. */ - NOERR(); + prefixes(v); /* may turn on new type bits etc. */ + NOERR(); - if (v->cflags®_QUOTE) { - assert(!(v->cflags&(REG_ADVANCED|REG_EXPANDED|REG_NEWLINE))); - INTOCON(L_Q); - } else if (v->cflags®_EXTENDED) { - assert(!(v->cflags®_QUOTE)); - INTOCON(L_ERE); - } else { - assert(!(v->cflags&(REG_QUOTE|REG_ADVF))); - INTOCON(L_BRE); - } + if (v->cflags®_QUOTE) { + assert(!(v->cflags&(REG_ADVANCED|REG_EXPANDED|REG_NEWLINE))); + INTOCON(L_Q); + } else if (v->cflags®_EXTENDED) { + assert(!(v->cflags®_QUOTE)); + INTOCON(L_ERE); + } else { + assert(!(v->cflags&(REG_QUOTE|REG_ADVF))); + INTOCON(L_BRE); + } - v->nexttype = EMPTY; /* remember we were at the start */ - next(v); /* set up the first token */ + v->nexttype = EMPTY; /* remember we were at the start */ + next(v); /* set up the first token */ } - + /* - prefixes - implement various special prefixes ^ static VOID prefixes(struct vars *); */ -static VOID -prefixes(v) -struct vars *v; +static void +prefixes( + struct vars *v) { - /* literal string doesn't get any of this stuff */ - if (v->cflags®_QUOTE) - return; + /* + * Literal string doesn't get any of this stuff. + */ - /* initial "***" gets special things */ - if (HAVE(4) && NEXT3('*', '*', '*')) - switch (*(v->now + 3)) { - case CHR('?'): /* "***?" error, msg shows version */ - ERR(REG_BADPAT); - return; /* proceed no further */ - break; - case CHR('='): /* "***=" shifts to literal string */ - NOTE(REG_UNONPOSIX); - v->cflags |= REG_QUOTE; - v->cflags &= ~(REG_ADVANCED|REG_EXPANDED|REG_NEWLINE); - v->now += 4; - return; /* and there can be no more prefixes */ - break; - case CHR(':'): /* "***:" shifts to AREs */ - NOTE(REG_UNONPOSIX); - v->cflags |= REG_ADVANCED; - v->now += 4; - break; - default: /* otherwise *** is just an error */ - ERR(REG_BADRPT); - return; - break; - } + if (v->cflags®_QUOTE) { + return; + } - /* BREs and EREs don't get embedded options */ - if ((v->cflags®_ADVANCED) != REG_ADVANCED) - return; + /* + * Initial "***" gets special things. + */ - /* embedded options (AREs only) */ - if (HAVE(3) && NEXT2('(', '?') && iscalpha(*(v->now + 2))) { - NOTE(REG_UNONPOSIX); - v->now += 2; - for (; !ATEOS() && iscalpha(*v->now); v->now++) - switch (*v->now) { - case CHR('b'): /* BREs (but why???) */ - v->cflags &= ~(REG_ADVANCED|REG_QUOTE); - break; - case CHR('c'): /* case sensitive */ - v->cflags &= ~REG_ICASE; - break; - case CHR('e'): /* plain EREs */ - v->cflags |= REG_EXTENDED; - v->cflags &= ~(REG_ADVF|REG_QUOTE); - break; - case CHR('i'): /* case insensitive */ - v->cflags |= REG_ICASE; - break; - case CHR('m'): /* Perloid synonym for n */ - case CHR('n'): /* \n affects ^ $ . [^ */ - v->cflags |= REG_NEWLINE; - break; - case CHR('p'): /* ~Perl, \n affects . [^ */ - v->cflags |= REG_NLSTOP; - v->cflags &= ~REG_NLANCH; - break; - case CHR('q'): /* literal string */ - v->cflags |= REG_QUOTE; - v->cflags &= ~REG_ADVANCED; - break; - case CHR('s'): /* single line, \n ordinary */ - v->cflags &= ~REG_NEWLINE; - break; - case CHR('t'): /* tight syntax */ - v->cflags &= ~REG_EXPANDED; - break; - case CHR('w'): /* weird, \n affects ^ $ only */ - v->cflags &= ~REG_NLSTOP; - v->cflags |= REG_NLANCH; - break; - case CHR('x'): /* expanded syntax */ - v->cflags |= REG_EXPANDED; - break; - default: - ERR(REG_BADOPT); - return; - } - if (!NEXT1(')')) { - ERR(REG_BADOPT); - return; - } - v->now++; - if (v->cflags®_QUOTE) - v->cflags &= ~(REG_EXPANDED|REG_NEWLINE); + if (HAVE(4) && NEXT3('*', '*', '*')) { + switch (*(v->now + 3)) { + case CHR('?'): /* "***?" error, msg shows version */ + ERR(REG_BADPAT); + return; /* proceed no further */ + break; + case CHR('='): /* "***=" shifts to literal string */ + NOTE(REG_UNONPOSIX); + v->cflags |= REG_QUOTE; + v->cflags &= ~(REG_ADVANCED|REG_EXPANDED|REG_NEWLINE); + v->now += 4; + return; /* and there can be no more prefixes */ + break; + case CHR(':'): /* "***:" shifts to AREs */ + NOTE(REG_UNONPOSIX); + v->cflags |= REG_ADVANCED; + v->now += 4; + break; + default: /* otherwise *** is just an error */ + ERR(REG_BADRPT); + return; + break; } -} + } + /* + * BREs and EREs don't get embedded options. + */ + + if ((v->cflags®_ADVANCED) != REG_ADVANCED) { + return; + } + + /* + * Embedded options (AREs only). + */ + + if (HAVE(3) && NEXT2('(', '?') && iscalpha(*(v->now + 2))) { + NOTE(REG_UNONPOSIX); + v->now += 2; + for (; !ATEOS() && iscalpha(*v->now); v->now++) { + switch (*v->now) { + case CHR('b'): /* BREs (but why???) */ + v->cflags &= ~(REG_ADVANCED|REG_QUOTE); + break; + case CHR('c'): /* case sensitive */ + v->cflags &= ~REG_ICASE; + break; + case CHR('e'): /* plain EREs */ + v->cflags |= REG_EXTENDED; + v->cflags &= ~(REG_ADVF|REG_QUOTE); + break; + case CHR('i'): /* case insensitive */ + v->cflags |= REG_ICASE; + break; + case CHR('m'): /* Perloid synonym for n */ + case CHR('n'): /* \n affects ^ $ . [^ */ + v->cflags |= REG_NEWLINE; + break; + case CHR('p'): /* ~Perl, \n affects . [^ */ + v->cflags |= REG_NLSTOP; + v->cflags &= ~REG_NLANCH; + break; + case CHR('q'): /* literal string */ + v->cflags |= REG_QUOTE; + v->cflags &= ~REG_ADVANCED; + break; + case CHR('s'): /* single line, \n ordinary */ + v->cflags &= ~REG_NEWLINE; + break; + case CHR('t'): /* tight syntax */ + v->cflags &= ~REG_EXPANDED; + break; + case CHR('w'): /* weird, \n affects ^ $ only */ + v->cflags &= ~REG_NLSTOP; + v->cflags |= REG_NLANCH; + break; + case CHR('x'): /* expanded syntax */ + v->cflags |= REG_EXPANDED; + break; + default: + ERR(REG_BADOPT); + return; + } + } + if (!NEXT1(')')) { + ERR(REG_BADOPT); + return; + } + v->now++; + if (v->cflags®_QUOTE) { + v->cflags &= ~(REG_EXPANDED|REG_NEWLINE); + } + } +} + /* - lexnest - "call a subroutine", interpolating string at the lexical level * Note, this is not a very general facility. There are a number of * implicit assumptions about what sorts of strings can be subroutines. ^ static VOID lexnest(struct vars *, chr *, chr *); */ -static VOID -lexnest(v, beginp, endp) -struct vars *v; -chr *beginp; /* start of interpolation */ -chr *endp; /* one past end of interpolation */ +static void +lexnest( + struct vars *v, + chr *beginp, /* start of interpolation */ + chr *endp) /* one past end of interpolation */ { - assert(v->savenow == NULL); /* only one level of nesting */ - v->savenow = v->now; - v->savestop = v->stop; - v->now = beginp; - v->stop = endp; + assert(v->savenow == NULL); /* only one level of nesting */ + v->savenow = v->now; + v->savestop = v->stop; + v->now = beginp; + v->stop = endp; } - + /* * string constants to interpolate as expansions of things like \d */ + static chr backd[] = { /* \d */ - CHR('['), CHR('['), CHR(':'), - CHR('d'), CHR('i'), CHR('g'), CHR('i'), CHR('t'), - CHR(':'), CHR(']'), CHR(']') + CHR('['), CHR('['), CHR(':'), + CHR('d'), CHR('i'), CHR('g'), CHR('i'), CHR('t'), + CHR(':'), CHR(']'), CHR(']') }; static chr backD[] = { /* \D */ - CHR('['), CHR('^'), CHR('['), CHR(':'), - CHR('d'), CHR('i'), CHR('g'), CHR('i'), CHR('t'), - CHR(':'), CHR(']'), CHR(']') + CHR('['), CHR('^'), CHR('['), CHR(':'), + CHR('d'), CHR('i'), CHR('g'), CHR('i'), CHR('t'), + CHR(':'), CHR(']'), CHR(']') }; static chr brbackd[] = { /* \d within brackets */ - CHR('['), CHR(':'), - CHR('d'), CHR('i'), CHR('g'), CHR('i'), CHR('t'), - CHR(':'), CHR(']') + CHR('['), CHR(':'), + CHR('d'), CHR('i'), CHR('g'), CHR('i'), CHR('t'), + CHR(':'), CHR(']') }; static chr backs[] = { /* \s */ - CHR('['), CHR('['), CHR(':'), - CHR('s'), CHR('p'), CHR('a'), CHR('c'), CHR('e'), - CHR(':'), CHR(']'), CHR(']') + CHR('['), CHR('['), CHR(':'), + CHR('s'), CHR('p'), CHR('a'), CHR('c'), CHR('e'), + CHR(':'), CHR(']'), CHR(']') }; static chr backS[] = { /* \S */ - CHR('['), CHR('^'), CHR('['), CHR(':'), - CHR('s'), CHR('p'), CHR('a'), CHR('c'), CHR('e'), - CHR(':'), CHR(']'), CHR(']') + CHR('['), CHR('^'), CHR('['), CHR(':'), + CHR('s'), CHR('p'), CHR('a'), CHR('c'), CHR('e'), + CHR(':'), CHR(']'), CHR(']') }; static chr brbacks[] = { /* \s within brackets */ - CHR('['), CHR(':'), - CHR('s'), CHR('p'), CHR('a'), CHR('c'), CHR('e'), - CHR(':'), CHR(']') + CHR('['), CHR(':'), + CHR('s'), CHR('p'), CHR('a'), CHR('c'), CHR('e'), + CHR(':'), CHR(']') }; static chr backw[] = { /* \w */ - CHR('['), CHR('['), CHR(':'), - CHR('a'), CHR('l'), CHR('n'), CHR('u'), CHR('m'), - CHR(':'), CHR(']'), CHR('_'), CHR(']') + CHR('['), CHR('['), CHR(':'), + CHR('a'), CHR('l'), CHR('n'), CHR('u'), CHR('m'), + CHR(':'), CHR(']'), CHR('_'), CHR(']') }; static chr backW[] = { /* \W */ - CHR('['), CHR('^'), CHR('['), CHR(':'), - CHR('a'), CHR('l'), CHR('n'), CHR('u'), CHR('m'), - CHR(':'), CHR(']'), CHR('_'), CHR(']') + CHR('['), CHR('^'), CHR('['), CHR(':'), + CHR('a'), CHR('l'), CHR('n'), CHR('u'), CHR('m'), + CHR(':'), CHR(']'), CHR('_'), CHR(']') }; static chr brbackw[] = { /* \w within brackets */ - CHR('['), CHR(':'), - CHR('a'), CHR('l'), CHR('n'), CHR('u'), CHR('m'), - CHR(':'), CHR(']'), CHR('_') + CHR('['), CHR(':'), + CHR('a'), CHR('l'), CHR('n'), CHR('u'), CHR('m'), + CHR(':'), CHR(']'), CHR('_') }; - + /* - lexword - interpolate a bracket expression for word characters * Possibly ought to inquire whether there is a "word" character class. ^ static VOID lexword(struct vars *); */ -static VOID -lexword(v) -struct vars *v; +static void +lexword( + struct vars *v) { - lexnest(v, backw, ENDOF(backw)); + lexnest(v, backw, ENDOF(backw)); } - + /* - next - get next token ^ static int next(struct vars *); */ static int /* 1 normal, 0 failure */ -next(v) -struct vars *v; +next( + struct vars *v) { - chr c; + chr c; - /* errors yield an infinite sequence of failures */ - if (ISERR()) - return 0; /* the error has set nexttype to EOS */ + /* + * Errors yield an infinite sequence of failures. + */ - /* remember flavor of last token */ - v->lasttype = v->nexttype; + if (ISERR()) { + return 0; /* the error has set nexttype to EOS */ + } - /* REG_BOSONLY */ - if (v->nexttype == EMPTY && (v->cflags®_BOSONLY)) { - /* at start of a REG_BOSONLY RE */ - RETV(SBEGIN, 0); /* same as \A */ - } + /* + * Remember flavor of last token. + */ - /* if we're nested and we've hit end, return to outer level */ - if (v->savenow != NULL && ATEOS()) { - v->now = v->savenow; - v->stop = v->savestop; - v->savenow = v->savestop = NULL; - } + v->lasttype = v->nexttype; - /* skip white space etc. if appropriate (not in literal or []) */ - if (v->cflags®_EXPANDED) - switch (v->lexcon) { - case L_ERE: - case L_BRE: - case L_EBND: - case L_BBND: - skip(v); - break; - } + /* + * REG_BOSONLY + */ - /* handle EOS, depending on context */ - if (ATEOS()) { - switch (v->lexcon) { - case L_ERE: - case L_BRE: - case L_Q: - RET(EOS); - break; - case L_EBND: - case L_BBND: - FAILW(REG_EBRACE); - break; - case L_BRACK: - case L_CEL: - case L_ECL: - case L_CCL: - FAILW(REG_EBRACK); - break; - } - assert(NOTREACHED); + if (v->nexttype == EMPTY && (v->cflags®_BOSONLY)) { + /* at start of a REG_BOSONLY RE */ + RETV(SBEGIN, 0); /* same as \A */ + } + + /* + * If we're nested and we've hit end, return to outer level. + */ + + if (v->savenow != NULL && ATEOS()) { + v->now = v->savenow; + v->stop = v->savestop; + v->savenow = v->savestop = NULL; + } + + /* + * Skip white space etc. if appropriate (not in literal or []) + */ + + if (v->cflags®_EXPANDED) { + switch (v->lexcon) { + case L_ERE: + case L_BRE: + case L_EBND: + case L_BBND: + skip(v); + break; } + } - /* okay, time to actually get a character */ - c = *v->now++; + /* + * Handle EOS, depending on context. + */ - /* deal with the easy contexts, punt EREs to code below */ + if (ATEOS()) { switch (v->lexcon) { - case L_BRE: /* punt BREs to separate function */ - return brenext(v, c); - break; - case L_ERE: /* see below */ - break; - case L_Q: /* literal strings are easy */ - RETV(PLAIN, c); - break; - case L_BBND: /* bounds are fairly simple */ + case L_ERE: + case L_BRE: + case L_Q: + RET(EOS); + break; case L_EBND: - switch (c) { - case CHR('0'): case CHR('1'): case CHR('2'): case CHR('3'): - case CHR('4'): case CHR('5'): case CHR('6'): case CHR('7'): - case CHR('8'): case CHR('9'): - RETV(DIGIT, (chr)DIGITVAL(c)); - break; - case CHR(','): - RET(','); - break; - case CHR('}'): /* ERE bound ends with } */ - if (INCON(L_EBND)) { - INTOCON(L_ERE); - if ((v->cflags®_ADVF) && NEXT1('?')) { - v->now++; - NOTE(REG_UNONPOSIX); - RETV('}', 0); - } - RETV('}', 1); - } else - FAILW(REG_BADBR); - break; - case CHR('\\'): /* BRE bound ends with \} */ - if (INCON(L_BBND) && NEXT1('}')) { - v->now++; - INTOCON(L_BRE); - RET('}'); - } else - FAILW(REG_BADBR); - break; - default: - FAILW(REG_BADBR); - break; - } - assert(NOTREACHED); - break; - case L_BRACK: /* brackets are not too hard */ - switch (c) { - case CHR(']'): - if (LASTTYPE('[')) - RETV(PLAIN, c); - else { - INTOCON((v->cflags®_EXTENDED) ? - L_ERE : L_BRE); - RET(']'); - } - break; - case CHR('\\'): - NOTE(REG_UBBS); - if (!(v->cflags®_ADVF)) - RETV(PLAIN, c); - NOTE(REG_UNONPOSIX); - if (ATEOS()) - FAILW(REG_EESCAPE); - (DISCARD)lexescape(v); - switch (v->nexttype) { /* not all escapes okay here */ - case PLAIN: - return 1; - break; - case CCLASS: - switch (v->nextvalue) { - case 'd': - lexnest(v, brbackd, ENDOF(brbackd)); - break; - case 's': - lexnest(v, brbacks, ENDOF(brbacks)); - break; - case 'w': - lexnest(v, brbackw, ENDOF(brbackw)); - break; - default: - FAILW(REG_EESCAPE); - break; - } - /* lexnest done, back up and try again */ - v->nexttype = v->lasttype; - return next(v); - break; - } - /* not one of the acceptable escapes */ - FAILW(REG_EESCAPE); - break; - case CHR('-'): - if (LASTTYPE('[') || NEXT1(']')) - RETV(PLAIN, c); - else - RETV(RANGE, c); - break; - case CHR('['): - if (ATEOS()) - FAILW(REG_EBRACK); - switch (*v->now++) { - case CHR('.'): - INTOCON(L_CEL); - /* might or might not be locale-specific */ - RET(COLLEL); - break; - case CHR('='): - INTOCON(L_ECL); - NOTE(REG_ULOCALE); - RET(ECLASS); - break; - case CHR(':'): - INTOCON(L_CCL); - NOTE(REG_ULOCALE); - RET(CCLASS); - break; - default: /* oops */ - v->now--; - RETV(PLAIN, c); - break; - } - assert(NOTREACHED); - break; - default: - RETV(PLAIN, c); - break; - } - assert(NOTREACHED); - break; - case L_CEL: /* collating elements are easy */ - if (c == CHR('.') && NEXT1(']')) { - v->now++; - INTOCON(L_BRACK); - RETV(END, '.'); - } else - RETV(PLAIN, c); - break; - case L_ECL: /* ditto equivalence classes */ - if (c == CHR('=') && NEXT1(']')) { - v->now++; - INTOCON(L_BRACK); - RETV(END, '='); - } else - RETV(PLAIN, c); - break; - case L_CCL: /* ditto character classes */ - if (c == CHR(':') && NEXT1(']')) { - v->now++; - INTOCON(L_BRACK); - RETV(END, ':'); - } else - RETV(PLAIN, c); - break; - default: - assert(NOTREACHED); - break; + case L_BBND: + FAILW(REG_EBRACE); + break; + case L_BRACK: + case L_CEL: + case L_ECL: + case L_CCL: + FAILW(REG_EBRACK); + break; } + assert(NOTREACHED); + } + + /* + * Okay, time to actually get a character. + */ + + c = *v->now++; - /* that got rid of everything except EREs and AREs */ - assert(INCON(L_ERE)); + /* + * Deal with the easy contexts, punt EREs to code below. + */ - /* deal with EREs and AREs, except for backslashes */ + switch (v->lexcon) { + case L_BRE: /* punt BREs to separate function */ + return brenext(v, c); + break; + case L_ERE: /* see below */ + break; + case L_Q: /* literal strings are easy */ + RETV(PLAIN, c); + break; + case L_BBND: /* bounds are fairly simple */ + case L_EBND: switch (c) { - case CHR('|'): - RET('|'); - break; - case CHR('*'): + case CHR('0'): case CHR('1'): case CHR('2'): case CHR('3'): + case CHR('4'): case CHR('5'): case CHR('6'): case CHR('7'): + case CHR('8'): case CHR('9'): + RETV(DIGIT, (chr)DIGITVAL(c)); + break; + case CHR(','): + RET(','); + break; + case CHR('}'): /* ERE bound ends with } */ + if (INCON(L_EBND)) { + INTOCON(L_ERE); if ((v->cflags®_ADVF) && NEXT1('?')) { - v->now++; - NOTE(REG_UNONPOSIX); - RETV('*', 0); + v->now++; + NOTE(REG_UNONPOSIX); + RETV('}', 0); } - RETV('*', 1); + RETV('}', 1); + } else { + FAILW(REG_BADBR); + } + break; + case CHR('\\'): /* BRE bound ends with \} */ + if (INCON(L_BBND) && NEXT1('}')) { + v->now++; + INTOCON(L_BRE); + RET('}'); + } else { + FAILW(REG_BADBR); + } + break; + default: + FAILW(REG_BADBR); + break; + } + assert(NOTREACHED); + break; + case L_BRACK: /* brackets are not too hard */ + switch (c) { + case CHR(']'): + if (LASTTYPE('[')) { + RETV(PLAIN, c); + } else { + INTOCON((v->cflags®_EXTENDED) ? L_ERE : L_BRE); + RET(']'); + } + break; + case CHR('\\'): + NOTE(REG_UBBS); + if (!(v->cflags®_ADVF)) { + RETV(PLAIN, c); + } + NOTE(REG_UNONPOSIX); + if (ATEOS()) { + FAILW(REG_EESCAPE); + } + (DISCARD)lexescape(v); + switch (v->nexttype) { /* not all escapes okay here */ + case PLAIN: + return 1; break; - case CHR('+'): - if ((v->cflags®_ADVF) && NEXT1('?')) { - v->now++; - NOTE(REG_UNONPOSIX); - RETV('+', 0); + case CCLASS: + switch (v->nextvalue) { + case 'd': + lexnest(v, brbackd, ENDOF(brbackd)); + break; + case 's': + lexnest(v, brbacks, ENDOF(brbacks)); + break; + case 'w': + lexnest(v, brbackw, ENDOF(brbackw)); + break; + default: + FAILW(REG_EESCAPE); + break; } - RETV('+', 1); + + /* + * lexnest() done, back up and try again. + */ + + v->nexttype = v->lasttype; + return next(v); break; - case CHR('?'): - if ((v->cflags®_ADVF) && NEXT1('?')) { - v->now++; - NOTE(REG_UNONPOSIX); - RETV('?', 0); - } - RETV('?', 1); + } + + /* + * Not one of the acceptable escapes. + */ + + FAILW(REG_EESCAPE); + break; + case CHR('-'): + if (LASTTYPE('[') || NEXT1(']')) { + RETV(PLAIN, c); + } else { + RETV(RANGE, c); + } + break; + case CHR('['): + if (ATEOS()) { + FAILW(REG_EBRACK); + } + switch (*v->now++) { + case CHR('.'): + INTOCON(L_CEL); + + /* + * Might or might not be locale-specific. + */ + + RET(COLLEL); break; - case CHR('{'): /* bounds start or plain character */ - if (v->cflags®_EXPANDED) - skip(v); - if (ATEOS() || !iscdigit(*v->now)) { - NOTE(REG_UBRACES); - NOTE(REG_UUNSPEC); - RETV(PLAIN, c); - } else { - NOTE(REG_UBOUNDS); - INTOCON(L_EBND); - RET('{'); - } - assert(NOTREACHED); + case CHR('='): + INTOCON(L_ECL); + NOTE(REG_ULOCALE); + RET(ECLASS); break; - case CHR('('): /* parenthesis, or advanced extension */ - if ((v->cflags®_ADVF) && NEXT1('?')) { - NOTE(REG_UNONPOSIX); - v->now++; - switch (*v->now++) { - case CHR(':'): /* non-capturing paren */ - RETV('(', 0); - break; - case CHR('#'): /* comment */ - while (!ATEOS() && *v->now != CHR(')')) - v->now++; - if (!ATEOS()) - v->now++; - assert(v->nexttype == v->lasttype); - return next(v); - break; - case CHR('='): /* positive lookahead */ - NOTE(REG_ULOOKAHEAD); - RETV(LACON, 1); - break; - case CHR('!'): /* negative lookahead */ - NOTE(REG_ULOOKAHEAD); - RETV(LACON, 0); - break; - default: - FAILW(REG_BADRPT); - break; - } - assert(NOTREACHED); - } - if (v->cflags®_NOSUB) - RETV('(', 0); /* all parens non-capturing */ - else - RETV('(', 1); + case CHR(':'): + INTOCON(L_CCL); + NOTE(REG_ULOCALE); + RET(CCLASS); break; - case CHR(')'): - if (LASTTYPE('(')) { - NOTE(REG_UUNSPEC); - } - RETV(')', c); + default: /* oops */ + v->now--; + RETV(PLAIN, c); break; - case CHR('['): /* easy except for [[:<:]] and [[:>:]] */ - if (HAVE(6) && *(v->now+0) == CHR('[') && - *(v->now+1) == CHR(':') && - (*(v->now+2) == CHR('<') || - *(v->now+2) == CHR('>')) && - *(v->now+3) == CHR(':') && - *(v->now+4) == CHR(']') && - *(v->now+5) == CHR(']')) { - c = *(v->now+2); - v->now += 6; - NOTE(REG_UNONPOSIX); - RET((c == CHR('<')) ? '<' : '>'); + } + assert(NOTREACHED); + break; + default: + RETV(PLAIN, c); + break; + } + assert(NOTREACHED); + break; + case L_CEL: /* collating elements are easy */ + if (c == CHR('.') && NEXT1(']')) { + v->now++; + INTOCON(L_BRACK); + RETV(END, '.'); + } else { + RETV(PLAIN, c); + } + break; + case L_ECL: /* ditto equivalence classes */ + if (c == CHR('=') && NEXT1(']')) { + v->now++; + INTOCON(L_BRACK); + RETV(END, '='); + } else { + RETV(PLAIN, c); + } + break; + case L_CCL: /* ditto character classes */ + if (c == CHR(':') && NEXT1(']')) { + v->now++; + INTOCON(L_BRACK); + RETV(END, ':'); + } else { + RETV(PLAIN, c); + } + break; + default: + assert(NOTREACHED); + break; + } + + /* + * That got rid of everything except EREs and AREs. + */ + + assert(INCON(L_ERE)); + + /* + * Deal with EREs and AREs, except for backslashes. + */ + + switch (c) { + case CHR('|'): + RET('|'); + break; + case CHR('*'): + if ((v->cflags®_ADVF) && NEXT1('?')) { + v->now++; + NOTE(REG_UNONPOSIX); + RETV('*', 0); + } + RETV('*', 1); + break; + case CHR('+'): + if ((v->cflags®_ADVF) && NEXT1('?')) { + v->now++; + NOTE(REG_UNONPOSIX); + RETV('+', 0); + } + RETV('+', 1); + break; + case CHR('?'): + if ((v->cflags®_ADVF) && NEXT1('?')) { + v->now++; + NOTE(REG_UNONPOSIX); + RETV('?', 0); + } + RETV('?', 1); + break; + case CHR('{'): /* bounds start or plain character */ + if (v->cflags®_EXPANDED) { + skip(v); + } + if (ATEOS() || !iscdigit(*v->now)) { + NOTE(REG_UBRACES); + NOTE(REG_UUNSPEC); + RETV(PLAIN, c); + } else { + NOTE(REG_UBOUNDS); + INTOCON(L_EBND); + RET('{'); + } + assert(NOTREACHED); + break; + case CHR('('): /* parenthesis, or advanced extension */ + if ((v->cflags®_ADVF) && NEXT1('?')) { + NOTE(REG_UNONPOSIX); + v->now++; + switch (*v->now++) { + case CHR(':'): /* non-capturing paren */ + RETV('(', 0); + break; + case CHR('#'): /* comment */ + while (!ATEOS() && *v->now != CHR(')')) { + v->now++; } - INTOCON(L_BRACK); - if (NEXT1('^')) { - v->now++; - RETV('[', 0); + if (!ATEOS()) { + v->now++; } - RETV('[', 1); - break; - case CHR('.'): - RET('.'); + assert(v->nexttype == v->lasttype); + return next(v); break; - case CHR('^'): - RET('^'); + case CHR('='): /* positive lookahead */ + NOTE(REG_ULOOKAHEAD); + RETV(LACON, 1); break; - case CHR('$'): - RET('$'); + case CHR('!'): /* negative lookahead */ + NOTE(REG_ULOOKAHEAD); + RETV(LACON, 0); break; - case CHR('\\'): /* mostly punt backslashes to code below */ - if (ATEOS()) - FAILW(REG_EESCAPE); - break; - default: /* ordinary character */ - RETV(PLAIN, c); + default: + FAILW(REG_BADRPT); break; + } + assert(NOTREACHED); + } + if (v->cflags®_NOSUB) { + RETV('(', 0); /* all parens non-capturing */ + } else { + RETV('(', 1); + } + break; + case CHR(')'): + if (LASTTYPE('(')) { + NOTE(REG_UUNSPEC); + } + RETV(')', c); + break; + case CHR('['): /* easy except for [[:<:]] and [[:>:]] */ + if (HAVE(6) && *(v->now+0) == CHR('[') && + *(v->now+1) == CHR(':') && + (*(v->now+2) == CHR('<') || *(v->now+2) == CHR('>')) && + *(v->now+3) == CHR(':') && + *(v->now+4) == CHR(']') && + *(v->now+5) == CHR(']')) { + c = *(v->now+2); + v->now += 6; + NOTE(REG_UNONPOSIX); + RET((c == CHR('<')) ? '<' : '>'); + } + INTOCON(L_BRACK); + if (NEXT1('^')) { + v->now++; + RETV('[', 0); + } + RETV('[', 1); + break; + case CHR('.'): + RET('.'); + break; + case CHR('^'): + RET('^'); + break; + case CHR('$'): + RET('$'); + break; + case CHR('\\'): /* mostly punt backslashes to code below */ + if (ATEOS()) { + FAILW(REG_EESCAPE); } + break; + default: /* ordinary character */ + RETV(PLAIN, c); + break; + } - /* ERE/ARE backslash handling; backslash already eaten */ - assert(!ATEOS()); - if (!(v->cflags®_ADVF)) { /* only AREs have non-trivial escapes */ - if (iscalnum(*v->now)) { - NOTE(REG_UBSALNUM); - NOTE(REG_UUNSPEC); - } - RETV(PLAIN, *v->now++); + /* + * ERE/ARE backslash handling; backslash already eaten. + */ + + assert(!ATEOS()); + if (!(v->cflags®_ADVF)) {/* only AREs have non-trivial escapes */ + if (iscalnum(*v->now)) { + NOTE(REG_UBSALNUM); + NOTE(REG_UUNSPEC); } - (DISCARD)lexescape(v); - if (ISERR()) - FAILW(REG_EESCAPE); - if (v->nexttype == CCLASS) { /* fudge at lexical level */ - switch (v->nextvalue) { - case 'd': lexnest(v, backd, ENDOF(backd)); break; - case 'D': lexnest(v, backD, ENDOF(backD)); break; - case 's': lexnest(v, backs, ENDOF(backs)); break; - case 'S': lexnest(v, backS, ENDOF(backS)); break; - case 'w': lexnest(v, backw, ENDOF(backw)); break; - case 'W': lexnest(v, backW, ENDOF(backW)); break; - default: - assert(NOTREACHED); - FAILW(REG_ASSERT); - break; - } - /* lexnest done, back up and try again */ - v->nexttype = v->lasttype; - return next(v); + RETV(PLAIN, *v->now++); + } + (DISCARD)lexescape(v); + if (ISERR()) { + FAILW(REG_EESCAPE); + } + if (v->nexttype == CCLASS) {/* fudge at lexical level */ + switch (v->nextvalue) { + case 'd': lexnest(v, backd, ENDOF(backd)); break; + case 'D': lexnest(v, backD, ENDOF(backD)); break; + case 's': lexnest(v, backs, ENDOF(backs)); break; + case 'S': lexnest(v, backS, ENDOF(backS)); break; + case 'w': lexnest(v, backw, ENDOF(backw)); break; + case 'W': lexnest(v, backW, ENDOF(backW)); break; + default: + assert(NOTREACHED); + FAILW(REG_ASSERT); + break; } - /* otherwise, lexescape has already done the work */ - return !ISERR(); -} + /* lexnest done, back up and try again */ + v->nexttype = v->lasttype; + return next(v); + } + + /* + * Otherwise, lexescape has already done the work. + */ + return !ISERR(); +} + /* - lexescape - parse an ARE backslash escape (backslash already eaten) * Note slightly nonstandard use of the CCLASS type code. ^ static int lexescape(struct vars *); */ static int /* not actually used, but convenient for RETV */ -lexescape(v) -struct vars *v; +lexescape( + struct vars *v) { - chr c; - static chr alert[] = { - CHR('a'), CHR('l'), CHR('e'), CHR('r'), CHR('t') - }; - static chr esc[] = { - CHR('E'), CHR('S'), CHR('C') - }; - chr *save; - - assert(v->cflags®_ADVF); - - assert(!ATEOS()); - c = *v->now++; - if (!iscalnum(c)) - RETV(PLAIN, c); + chr c; + static chr alert[] = { + CHR('a'), CHR('l'), CHR('e'), CHR('r'), CHR('t') + }; + static chr esc[] = { + CHR('E'), CHR('S'), CHR('C') + }; + chr *save; - NOTE(REG_UNONPOSIX); - switch (c) { - case CHR('a'): - RETV(PLAIN, chrnamed(v, alert, ENDOF(alert), CHR('\007'))); - break; - case CHR('A'): - RETV(SBEGIN, 0); - break; - case CHR('b'): - RETV(PLAIN, CHR('\b')); - break; - case CHR('B'): - RETV(PLAIN, CHR('\\')); - break; - case CHR('c'): - NOTE(REG_UUNPORT); - if (ATEOS()) - FAILW(REG_EESCAPE); - RETV(PLAIN, (chr)(*v->now++ & 037)); - break; - case CHR('d'): - NOTE(REG_ULOCALE); - RETV(CCLASS, 'd'); - break; - case CHR('D'): - NOTE(REG_ULOCALE); - RETV(CCLASS, 'D'); - break; - case CHR('e'): - NOTE(REG_UUNPORT); - RETV(PLAIN, chrnamed(v, esc, ENDOF(esc), CHR('\033'))); - break; - case CHR('f'): - RETV(PLAIN, CHR('\f')); - break; - case CHR('m'): - RET('<'); - break; - case CHR('M'): - RET('>'); - break; - case CHR('n'): - RETV(PLAIN, CHR('\n')); - break; - case CHR('r'): - RETV(PLAIN, CHR('\r')); - break; - case CHR('s'): - NOTE(REG_ULOCALE); - RETV(CCLASS, 's'); - break; - case CHR('S'): - NOTE(REG_ULOCALE); - RETV(CCLASS, 'S'); - break; - case CHR('t'): - RETV(PLAIN, CHR('\t')); - break; - case CHR('u'): - c = lexdigits(v, 16, 4, 4); - if (ISERR()) - FAILW(REG_EESCAPE); - RETV(PLAIN, c); - break; - case CHR('U'): - c = lexdigits(v, 16, 8, 8); - if (ISERR()) - FAILW(REG_EESCAPE); - RETV(PLAIN, c); - break; - case CHR('v'): - RETV(PLAIN, CHR('\v')); - break; - case CHR('w'): - NOTE(REG_ULOCALE); - RETV(CCLASS, 'w'); - break; - case CHR('W'): - NOTE(REG_ULOCALE); - RETV(CCLASS, 'W'); - break; - case CHR('x'): - NOTE(REG_UUNPORT); - c = lexdigits(v, 16, 1, 255); /* REs >255 long outside spec */ - if (ISERR()) - FAILW(REG_EESCAPE); - RETV(PLAIN, c); - break; - case CHR('y'): - NOTE(REG_ULOCALE); - RETV(WBDRY, 0); - break; - case CHR('Y'): - NOTE(REG_ULOCALE); - RETV(NWBDRY, 0); - break; - case CHR('Z'): - RETV(SEND, 0); - break; - case CHR('1'): case CHR('2'): case CHR('3'): case CHR('4'): - case CHR('5'): case CHR('6'): case CHR('7'): case CHR('8'): - case CHR('9'): - save = v->now; - v->now--; /* put first digit back */ - c = lexdigits(v, 10, 1, 255); /* REs >255 long outside spec */ - if (ISERR()) - FAILW(REG_EESCAPE); - /* ugly heuristic (first test is "exactly 1 digit?") */ - if (v->now - save == 0 || (int)c <= v->nsubexp) { - NOTE(REG_UBACKREF); - RETV(BACKREF, (chr)c); - } - /* oops, doesn't look like it's a backref after all... */ - v->now = save; - /* and fall through into octal number */ - case CHR('0'): - NOTE(REG_UUNPORT); - v->now--; /* put first digit back */ - c = lexdigits(v, 8, 1, 3); - if (ISERR()) - FAILW(REG_EESCAPE); - RETV(PLAIN, c); - break; - default: - assert(iscalpha(c)); - FAILW(REG_EESCAPE); /* unknown alphabetic escape */ - break; + assert(v->cflags®_ADVF); + + assert(!ATEOS()); + c = *v->now++; + if (!iscalnum(c)) { + RETV(PLAIN, c); + } + + NOTE(REG_UNONPOSIX); + switch (c) { + case CHR('a'): + RETV(PLAIN, chrnamed(v, alert, ENDOF(alert), CHR('\007'))); + break; + case CHR('A'): + RETV(SBEGIN, 0); + break; + case CHR('b'): + RETV(PLAIN, CHR('\b')); + break; + case CHR('B'): + RETV(PLAIN, CHR('\\')); + break; + case CHR('c'): + NOTE(REG_UUNPORT); + if (ATEOS()) { + FAILW(REG_EESCAPE); } - assert(NOTREACHED); -} + RETV(PLAIN, (chr)(*v->now++ & 037)); + break; + case CHR('d'): + NOTE(REG_ULOCALE); + RETV(CCLASS, 'd'); + break; + case CHR('D'): + NOTE(REG_ULOCALE); + RETV(CCLASS, 'D'); + break; + case CHR('e'): + NOTE(REG_UUNPORT); + RETV(PLAIN, chrnamed(v, esc, ENDOF(esc), CHR('\033'))); + break; + case CHR('f'): + RETV(PLAIN, CHR('\f')); + break; + case CHR('m'): + RET('<'); + break; + case CHR('M'): + RET('>'); + break; + case CHR('n'): + RETV(PLAIN, CHR('\n')); + break; + case CHR('r'): + RETV(PLAIN, CHR('\r')); + break; + case CHR('s'): + NOTE(REG_ULOCALE); + RETV(CCLASS, 's'); + break; + case CHR('S'): + NOTE(REG_ULOCALE); + RETV(CCLASS, 'S'); + break; + case CHR('t'): + RETV(PLAIN, CHR('\t')); + break; + case CHR('u'): + c = lexdigits(v, 16, 4, 4); + if (ISERR()) { + FAILW(REG_EESCAPE); + } + RETV(PLAIN, c); + break; + case CHR('U'): + c = lexdigits(v, 16, 8, 8); + if (ISERR()) { + FAILW(REG_EESCAPE); + } + RETV(PLAIN, c); + break; + case CHR('v'): + RETV(PLAIN, CHR('\v')); + break; + case CHR('w'): + NOTE(REG_ULOCALE); + RETV(CCLASS, 'w'); + break; + case CHR('W'): + NOTE(REG_ULOCALE); + RETV(CCLASS, 'W'); + break; + case CHR('x'): + NOTE(REG_UUNPORT); + c = lexdigits(v, 16, 1, 255); /* REs >255 long outside spec */ + if (ISERR()) { + FAILW(REG_EESCAPE); + } + RETV(PLAIN, c); + break; + case CHR('y'): + NOTE(REG_ULOCALE); + RETV(WBDRY, 0); + break; + case CHR('Y'): + NOTE(REG_ULOCALE); + RETV(NWBDRY, 0); + break; + case CHR('Z'): + RETV(SEND, 0); + break; + case CHR('1'): case CHR('2'): case CHR('3'): case CHR('4'): + case CHR('5'): case CHR('6'): case CHR('7'): case CHR('8'): + case CHR('9'): + save = v->now; + v->now--; /* put first digit back */ + c = lexdigits(v, 10, 1, 255); /* REs >255 long outside spec */ + if (ISERR()) { + FAILW(REG_EESCAPE); + } + + /* + * Ugly heuristic (first test is "exactly 1 digit?") + */ + + if (v->now - save == 0 || (int)c <= v->nsubexp) { + NOTE(REG_UBACKREF); + RETV(BACKREF, (chr)c); + } + + /* + * Oops, doesn't look like it's a backref after all... + */ + v->now = save; + + /* + * And fall through into octal number. + */ + + case CHR('0'): + NOTE(REG_UUNPORT); + v->now--; /* put first digit back */ + c = lexdigits(v, 8, 1, 3); + if (ISERR()) { + FAILW(REG_EESCAPE); + } + RETV(PLAIN, c); + break; + default: + assert(iscalpha(c)); + FAILW(REG_EESCAPE); /* unknown alphabetic escape */ + break; + } + assert(NOTREACHED); +} + /* - lexdigits - slurp up digits and return chr value ^ static chr lexdigits(struct vars *, int, int, int); */ static chr /* chr value; errors signalled via ERR */ -lexdigits(v, base, minlen, maxlen) -struct vars *v; -int base; -int minlen; -int maxlen; +lexdigits( + struct vars *v, + int base, + int minlen, + int maxlen) { - uchr n; /* unsigned to avoid overflow misbehavior */ - int len; - chr c; - int d; - CONST uchr ub = (uchr) base; - - n = 0; - for (len = 0; len < maxlen && !ATEOS(); len++) { - c = *v->now++; - switch (c) { - case CHR('0'): case CHR('1'): case CHR('2'): case CHR('3'): - case CHR('4'): case CHR('5'): case CHR('6'): case CHR('7'): - case CHR('8'): case CHR('9'): - d = DIGITVAL(c); - break; - case CHR('a'): case CHR('A'): d = 10; break; - case CHR('b'): case CHR('B'): d = 11; break; - case CHR('c'): case CHR('C'): d = 12; break; - case CHR('d'): case CHR('D'): d = 13; break; - case CHR('e'): case CHR('E'): d = 14; break; - case CHR('f'): case CHR('F'): d = 15; break; - default: - v->now--; /* oops, not a digit at all */ - d = -1; - break; - } + uchr n; /* unsigned to avoid overflow misbehavior */ + int len; + chr c; + int d; + CONST uchr ub = (uchr) base; - if (d >= base) { /* not a plausible digit */ - v->now--; - d = -1; - } - if (d < 0) - break; /* NOTE BREAK OUT */ - n = n*ub + (uchr)d; + n = 0; + for (len = 0; len < maxlen && !ATEOS(); len++) { + c = *v->now++; + switch (c) { + case CHR('0'): case CHR('1'): case CHR('2'): case CHR('3'): + case CHR('4'): case CHR('5'): case CHR('6'): case CHR('7'): + case CHR('8'): case CHR('9'): + d = DIGITVAL(c); + break; + case CHR('a'): case CHR('A'): d = 10; break; + case CHR('b'): case CHR('B'): d = 11; break; + case CHR('c'): case CHR('C'): d = 12; break; + case CHR('d'): case CHR('D'): d = 13; break; + case CHR('e'): case CHR('E'): d = 14; break; + case CHR('f'): case CHR('F'): d = 15; break; + default: + v->now--; /* oops, not a digit at all */ + d = -1; + break; } - if (len < minlen) - ERR(REG_EESCAPE); - return (chr)n; -} + if (d >= base) { /* not a plausible digit */ + v->now--; + d = -1; + } + if (d < 0) { + break; /* NOTE BREAK OUT */ + } + n = n*ub + (uchr)d; + } + if (len < minlen) { + ERR(REG_EESCAPE); + } + return (chr)n; +} + /* - brenext - get next BRE token * This is much like EREs except for all the stupid backslashes and the @@ -865,150 +968,161 @@ int maxlen; ^ static int brenext(struct vars *, pchr); */ static int /* 1 normal, 0 failure */ -brenext(v, pc) -struct vars *v; -pchr pc; +brenext( + struct vars *v, + pchr pc) { - chr c = (chr)pc; + chr c = (chr)pc; - switch (c) { - case CHR('*'): - if (LASTTYPE(EMPTY) || LASTTYPE('(') || LASTTYPE('^')) - RETV(PLAIN, c); - RET('*'); - break; - case CHR('['): - if (HAVE(6) && *(v->now+0) == CHR('[') && - *(v->now+1) == CHR(':') && - (*(v->now+2) == CHR('<') || - *(v->now+2) == CHR('>')) && - *(v->now+3) == CHR(':') && - *(v->now+4) == CHR(']') && - *(v->now+5) == CHR(']')) { - c = *(v->now+2); - v->now += 6; - NOTE(REG_UNONPOSIX); - RET((c == CHR('<')) ? '<' : '>'); - } - INTOCON(L_BRACK); - if (NEXT1('^')) { - v->now++; - RETV('[', 0); - } - RETV('[', 1); - break; - case CHR('.'): - RET('.'); - break; - case CHR('^'): - if (LASTTYPE(EMPTY)) - RET('^'); - if (LASTTYPE('(')) { - NOTE(REG_UUNSPEC); - RET('^'); - } - RETV(PLAIN, c); - break; - case CHR('$'): - if (v->cflags®_EXPANDED) - skip(v); - if (ATEOS()) - RET('$'); - if (NEXT2('\\', ')')) { - NOTE(REG_UUNSPEC); - RET('$'); - } - RETV(PLAIN, c); - break; - case CHR('\\'): - break; /* see below */ - default: - RETV(PLAIN, c); - break; + switch (c) { + case CHR('*'): + if (LASTTYPE(EMPTY) || LASTTYPE('(') || LASTTYPE('^')) { + RETV(PLAIN, c); + } + RET('*'); + break; + case CHR('['): + if (HAVE(6) && *(v->now+0) == CHR('[') && + *(v->now+1) == CHR(':') && + (*(v->now+2) == CHR('<') || *(v->now+2) == CHR('>')) && + *(v->now+3) == CHR(':') && + *(v->now+4) == CHR(']') && + *(v->now+5) == CHR(']')) { + c = *(v->now+2); + v->now += 6; + NOTE(REG_UNONPOSIX); + RET((c == CHR('<')) ? '<' : '>'); + } + INTOCON(L_BRACK); + if (NEXT1('^')) { + v->now++; + RETV('[', 0); + } + RETV('[', 1); + break; + case CHR('.'): + RET('.'); + break; + case CHR('^'): + if (LASTTYPE(EMPTY)) { + RET('^'); + } + if (LASTTYPE('(')) { + NOTE(REG_UUNSPEC); + RET('^'); } + RETV(PLAIN, c); + break; + case CHR('$'): + if (v->cflags®_EXPANDED) { + skip(v); + } + if (ATEOS()) { + RET('$'); + } + if (NEXT2('\\', ')')) { + NOTE(REG_UUNSPEC); + RET('$'); + } + RETV(PLAIN, c); + break; + case CHR('\\'): + break; /* see below */ + default: + RETV(PLAIN, c); + break; + } - assert(c == CHR('\\')); + assert(c == CHR('\\')); - if (ATEOS()) - FAILW(REG_EESCAPE); + if (ATEOS()) { + FAILW(REG_EESCAPE); + } - c = *v->now++; - switch (c) { - case CHR('{'): - INTOCON(L_BBND); - NOTE(REG_UBOUNDS); - RET('{'); - break; - case CHR('('): - RETV('(', 1); - break; - case CHR(')'): - RETV(')', c); - break; - case CHR('<'): - NOTE(REG_UNONPOSIX); - RET('<'); - break; - case CHR('>'): - NOTE(REG_UNONPOSIX); - RET('>'); - break; - case CHR('1'): case CHR('2'): case CHR('3'): case CHR('4'): - case CHR('5'): case CHR('6'): case CHR('7'): case CHR('8'): - case CHR('9'): - NOTE(REG_UBACKREF); - RETV(BACKREF, (chr)DIGITVAL(c)); - break; - default: - if (iscalnum(c)) { - NOTE(REG_UBSALNUM); - NOTE(REG_UUNSPEC); - } - RETV(PLAIN, c); - break; + c = *v->now++; + switch (c) { + case CHR('{'): + INTOCON(L_BBND); + NOTE(REG_UBOUNDS); + RET('{'); + break; + case CHR('('): + RETV('(', 1); + break; + case CHR(')'): + RETV(')', c); + break; + case CHR('<'): + NOTE(REG_UNONPOSIX); + RET('<'); + break; + case CHR('>'): + NOTE(REG_UNONPOSIX); + RET('>'); + break; + case CHR('1'): case CHR('2'): case CHR('3'): case CHR('4'): + case CHR('5'): case CHR('6'): case CHR('7'): case CHR('8'): + case CHR('9'): + NOTE(REG_UBACKREF); + RETV(BACKREF, (chr)DIGITVAL(c)); + break; + default: + if (iscalnum(c)) { + NOTE(REG_UBSALNUM); + NOTE(REG_UUNSPEC); } + RETV(PLAIN, c); + break; + } - assert(NOTREACHED); + assert(NOTREACHED); } - + /* - skip - skip white space and comments in expanded form ^ static VOID skip(struct vars *); */ -static VOID -skip(v) -struct vars *v; +static void +skip( + struct vars *v) { - chr *start = v->now; - - assert(v->cflags®_EXPANDED); - - for (;;) { - while (!ATEOS() && iscspace(*v->now)) - v->now++; - if (ATEOS() || *v->now != CHR('#')) - break; /* NOTE BREAK OUT */ - assert(NEXT1('#')); - while (!ATEOS() && *v->now != CHR('\n')) - v->now++; - /* leave the newline to be picked up by the iscspace loop */ + chr *start = v->now; + + assert(v->cflags®_EXPANDED); + + for (;;) { + while (!ATEOS() && iscspace(*v->now)) { + v->now++; + } + if (ATEOS() || *v->now != CHR('#')) { + break; /* NOTE BREAK OUT */ + } + assert(NEXT1('#')); + while (!ATEOS() && *v->now != CHR('\n')) { + v->now++; } - if (v->now != start) - NOTE(REG_UNONPOSIX); -} + /* + * Leave the newline to be picked up by the iscspace loop. + */ + } + if (v->now != start) { + NOTE(REG_UNONPOSIX); + } +} + /* - newline - return the chr for a newline * This helps confine use of CHR to this source file. ^ static chr newline(NOPARMS); */ static chr -newline() +newline(void) { - return CHR('\n'); + return CHR('\n'); } - + /* - ch - return the chr sequence for regc_locale.c's fake collating element ch * This helps confine use of CHR to this source file. Beware that the caller @@ -1019,14 +1133,14 @@ newline() */ #ifdef REG_DEBUG static chr * -ch() +ch(void) { - static chr chstr[] = { CHR('c'), CHR('h'), CHR('\0') }; + static chr chstr[] = { CHR('c'), CHR('h'), CHR('\0') }; - return chstr; + return chstr; } #endif - + /* - chrnamed - return the chr known by a given (chr string) name * The code is a bit clumsy, but this routine gets only such specialized @@ -1034,28 +1148,38 @@ ch() ^ static chr chrnamed(struct vars *, chr *, chr *, pchr); */ static chr -chrnamed(v, startp, endp, lastresort) -struct vars *v; -chr *startp; /* start of name */ -chr *endp; /* just past end of name */ -pchr lastresort; /* what to return if name lookup fails */ +chrnamed( + struct vars *v, + chr *startp, /* start of name */ + chr *endp, /* just past end of name */ + pchr lastresort) /* what to return if name lookup fails */ { - celt c; - int errsave; - int e; - struct cvec *cv; - - errsave = v->err; - v->err = 0; - c = element(v, startp, endp); - e = v->err; - v->err = errsave; - - if (e != 0) - return (chr)lastresort; - - cv = range(v, c, c, 0); - if (cv->nchrs == 0) - return (chr)lastresort; - return cv->chrs[0]; + celt c; + int errsave; + int e; + struct cvec *cv; + + errsave = v->err; + v->err = 0; + c = element(v, startp, endp); + e = v->err; + v->err = errsave; + + if (e != 0) { + return (chr)lastresort; + } + + cv = range(v, c, c, 0); + if (cv->nchrs == 0) { + return (chr)lastresort; + } + return cv->chrs[0]; } + +/* + * Local Variables: + * mode: c + * c-basic-offset: 4 + * fill-column: 78 + * End: + */ diff --git a/generic/regc_locale.c b/generic/regc_locale.c index 50f4792..9652028 100644 --- a/generic/regc_locale.c +++ b/generic/regc_locale.c @@ -1,4 +1,4 @@ -/* +/* * regc_locale.c -- * * This file contains the Unicode locale specific regexp routines. @@ -6,12 +6,12 @@ * * Copyright (c) 1998 by Scriptics Corporation. * - * See the file "license.terms" for information on usage and redistribution - * of this file, and for a DISCLAIMER OF ALL WARRANTIES. + * See the file "license.terms" for information on usage and redistribution of + * this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: regc_locale.c,v 1.11 2004/02/23 10:43:23 dkf Exp $ + * RCS: @(#) $Id: regc_locale.c,v 1.12 2005/11/09 16:36:14 dkf Exp $ */ - + /* ASCII character-name table */ static struct cname { @@ -115,8 +115,10 @@ static struct cname { {"DEL", '\177'}, {NULL, 0} }; - -/* Unicode character-class tables */ + +/* + * Unicode character-class tables. + */ typedef struct crange { chr start; @@ -132,112 +134,120 @@ typedef struct crange { /* Unicode: alphabetic characters */ static crange alphaRangeTable[] = { - {0x0041, 0x005a}, {0x0061, 0x007a}, {0x00c0, 0x00d6}, {0x00d8, 0x00f6}, - {0x00f8, 0x021f}, {0x0222, 0x0233}, {0x0250, 0x02ad}, {0x02b0, 0x02b8}, - {0x02bb, 0x02c1}, {0x02e0, 0x02e4}, {0x0388, 0x038a}, {0x038e, 0x03a1}, - {0x03a3, 0x03ce}, {0x03d0, 0x03d7}, {0x03da, 0x03f5}, {0x0400, 0x0481}, - {0x048c, 0x04c4}, {0x04d0, 0x04f5}, {0x0531, 0x0556}, {0x0561, 0x0587}, - {0x05d0, 0x05ea}, {0x05f0, 0x05f2}, {0x0621, 0x063a}, {0x0640, 0x064a}, - {0x0671, 0x06d3}, {0x06fa, 0x06fc}, {0x0712, 0x072c}, {0x0780, 0x07a5}, - {0x0905, 0x0939}, {0x0958, 0x0961}, {0x0985, 0x098c}, {0x0993, 0x09a8}, - {0x09aa, 0x09b0}, {0x09b6, 0x09b9}, {0x09df, 0x09e1}, {0x0a05, 0x0a0a}, - {0x0a13, 0x0a28}, {0x0a2a, 0x0a30}, {0x0a59, 0x0a5c}, {0x0a72, 0x0a74}, - {0x0a85, 0x0a8b}, {0x0a8f, 0x0a91}, {0x0a93, 0x0aa8}, {0x0aaa, 0x0ab0}, - {0x0ab5, 0x0ab9}, {0x0b05, 0x0b0c}, {0x0b13, 0x0b28}, {0x0b2a, 0x0b30}, - {0x0b36, 0x0b39}, {0x0b5f, 0x0b61}, {0x0b85, 0x0b8a}, {0x0b8e, 0x0b90}, - {0x0b92, 0x0b95}, {0x0ba8, 0x0baa}, {0x0bae, 0x0bb5}, {0x0bb7, 0x0bb9}, - {0x0c05, 0x0c0c}, {0x0c0e, 0x0c10}, {0x0c12, 0x0c28}, {0x0c2a, 0x0c33}, - {0x0c35, 0x0c39}, {0x0c85, 0x0c8c}, {0x0c8e, 0x0c90}, {0x0c92, 0x0ca8}, - {0x0caa, 0x0cb3}, {0x0cb5, 0x0cb9}, {0x0d05, 0x0d0c}, {0x0d0e, 0x0d10}, - {0x0d12, 0x0d28}, {0x0d2a, 0x0d39}, {0x0d85, 0x0d96}, {0x0d9a, 0x0db1}, - {0x0db3, 0x0dbb}, {0x0dc0, 0x0dc6}, {0x0e01, 0x0e30}, {0x0e40, 0x0e46}, - {0x0e94, 0x0e97}, {0x0e99, 0x0e9f}, {0x0ea1, 0x0ea3}, {0x0ead, 0x0eb0}, - {0x0ec0, 0x0ec4}, {0x0f40, 0x0f47}, {0x0f49, 0x0f6a}, {0x0f88, 0x0f8b}, - {0x1000, 0x1021}, {0x1023, 0x1027}, {0x1050, 0x1055}, {0x10a0, 0x10c5}, - {0x10d0, 0x10f6}, {0x1100, 0x1159}, {0x115f, 0x11a2}, {0x11a8, 0x11f9}, - {0x1200, 0x1206}, {0x1208, 0x1246}, {0x124a, 0x124d}, {0x1250, 0x1256}, - {0x125a, 0x125d}, {0x1260, 0x1286}, {0x128a, 0x128d}, {0x1290, 0x12ae}, - {0x12b2, 0x12b5}, {0x12b8, 0x12be}, {0x12c2, 0x12c5}, {0x12c8, 0x12ce}, - {0x12d0, 0x12d6}, {0x12d8, 0x12ee}, {0x12f0, 0x130e}, {0x1312, 0x1315}, - {0x1318, 0x131e}, {0x1320, 0x1346}, {0x1348, 0x135a}, {0x13a0, 0x13f4}, - {0x1401, 0x166c}, {0x166f, 0x1676}, {0x1681, 0x169a}, {0x16a0, 0x16ea}, - {0x1780, 0x17b3}, {0x1820, 0x1877}, {0x1880, 0x18a8}, {0x1e00, 0x1e9b}, - {0x1ea0, 0x1ef9}, {0x1f00, 0x1f15}, {0x1f18, 0x1f1d}, {0x1f20, 0x1f45}, - {0x1f48, 0x1f4d}, {0x1f50, 0x1f57}, {0x1f5f, 0x1f7d}, {0x1f80, 0x1fb4}, - {0x1fb6, 0x1fbc}, {0x1fc2, 0x1fc4}, {0x1fc6, 0x1fcc}, {0x1fd0, 0x1fd3}, - {0x1fd6, 0x1fdb}, {0x1fe0, 0x1fec}, {0x1ff2, 0x1ff4}, {0x1ff6, 0x1ffc}, - {0x210a, 0x2113}, {0x2119, 0x211d}, {0x212a, 0x212d}, {0x212f, 0x2131}, - {0x2133, 0x2139}, {0x3031, 0x3035}, {0x3041, 0x3094}, {0x30a1, 0x30fa}, - {0x30fc, 0x30fe}, {0x3105, 0x312c}, {0x3131, 0x318e}, {0x31a0, 0x31b7}, - {0x3400, 0x4db5}, {0x4e00, 0x9fa5}, {0xa000, 0xa48c}, {0xac00, 0xd7a3}, - {0xf900, 0xfa2d}, {0xfb00, 0xfb06}, {0xfb13, 0xfb17}, {0xfb1f, 0xfb28}, - {0xfb2a, 0xfb36}, {0xfb38, 0xfb3c}, {0xfb46, 0xfbb1}, {0xfbd3, 0xfd3d}, - {0xfd50, 0xfd8f}, {0xfd92, 0xfdc7}, {0xfdf0, 0xfdfb}, {0xfe70, 0xfe72}, - {0xfe76, 0xfefc}, {0xff21, 0xff3a}, {0xff41, 0xff5a}, {0xff66, 0xffbe}, + {0x0041, 0x005a}, {0x0061, 0x007a}, {0x00c0, 0x00d6}, {0x00d8, 0x00f6}, + {0x00f8, 0x021f}, {0x0222, 0x0233}, {0x0250, 0x02ad}, {0x02b0, 0x02b8}, + {0x02bb, 0x02c1}, {0x02e0, 0x02e4}, {0x0388, 0x038a}, {0x038e, 0x03a1}, + {0x03a3, 0x03ce}, {0x03d0, 0x03d7}, {0x03da, 0x03f5}, {0x0400, 0x0481}, + {0x048c, 0x04c4}, {0x04d0, 0x04f5}, {0x0531, 0x0556}, {0x0561, 0x0587}, + {0x05d0, 0x05ea}, {0x05f0, 0x05f2}, {0x0621, 0x063a}, {0x0640, 0x064a}, + {0x0671, 0x06d3}, {0x06fa, 0x06fc}, {0x0712, 0x072c}, {0x0780, 0x07a5}, + {0x0905, 0x0939}, {0x0958, 0x0961}, {0x0985, 0x098c}, {0x0993, 0x09a8}, + {0x09aa, 0x09b0}, {0x09b6, 0x09b9}, {0x09df, 0x09e1}, {0x0a05, 0x0a0a}, + {0x0a13, 0x0a28}, {0x0a2a, 0x0a30}, {0x0a59, 0x0a5c}, {0x0a72, 0x0a74}, + {0x0a85, 0x0a8b}, {0x0a8f, 0x0a91}, {0x0a93, 0x0aa8}, {0x0aaa, 0x0ab0}, + {0x0ab5, 0x0ab9}, {0x0b05, 0x0b0c}, {0x0b13, 0x0b28}, {0x0b2a, 0x0b30}, + {0x0b36, 0x0b39}, {0x0b5f, 0x0b61}, {0x0b85, 0x0b8a}, {0x0b8e, 0x0b90}, + {0x0b92, 0x0b95}, {0x0ba8, 0x0baa}, {0x0bae, 0x0bb5}, {0x0bb7, 0x0bb9}, + {0x0c05, 0x0c0c}, {0x0c0e, 0x0c10}, {0x0c12, 0x0c28}, {0x0c2a, 0x0c33}, + {0x0c35, 0x0c39}, {0x0c85, 0x0c8c}, {0x0c8e, 0x0c90}, {0x0c92, 0x0ca8}, + {0x0caa, 0x0cb3}, {0x0cb5, 0x0cb9}, {0x0d05, 0x0d0c}, {0x0d0e, 0x0d10}, + {0x0d12, 0x0d28}, {0x0d2a, 0x0d39}, {0x0d85, 0x0d96}, {0x0d9a, 0x0db1}, + {0x0db3, 0x0dbb}, {0x0dc0, 0x0dc6}, {0x0e01, 0x0e30}, {0x0e40, 0x0e46}, + {0x0e94, 0x0e97}, {0x0e99, 0x0e9f}, {0x0ea1, 0x0ea3}, {0x0ead, 0x0eb0}, + {0x0ec0, 0x0ec4}, {0x0f40, 0x0f47}, {0x0f49, 0x0f6a}, {0x0f88, 0x0f8b}, + {0x1000, 0x1021}, {0x1023, 0x1027}, {0x1050, 0x1055}, {0x10a0, 0x10c5}, + {0x10d0, 0x10f6}, {0x1100, 0x1159}, {0x115f, 0x11a2}, {0x11a8, 0x11f9}, + {0x1200, 0x1206}, {0x1208, 0x1246}, {0x124a, 0x124d}, {0x1250, 0x1256}, + {0x125a, 0x125d}, {0x1260, 0x1286}, {0x128a, 0x128d}, {0x1290, 0x12ae}, + {0x12b2, 0x12b5}, {0x12b8, 0x12be}, {0x12c2, 0x12c5}, {0x12c8, 0x12ce}, + {0x12d0, 0x12d6}, {0x12d8, 0x12ee}, {0x12f0, 0x130e}, {0x1312, 0x1315}, + {0x1318, 0x131e}, {0x1320, 0x1346}, {0x1348, 0x135a}, {0x13a0, 0x13f4}, + {0x1401, 0x166c}, {0x166f, 0x1676}, {0x1681, 0x169a}, {0x16a0, 0x16ea}, + {0x1780, 0x17b3}, {0x1820, 0x1877}, {0x1880, 0x18a8}, {0x1e00, 0x1e9b}, + {0x1ea0, 0x1ef9}, {0x1f00, 0x1f15}, {0x1f18, 0x1f1d}, {0x1f20, 0x1f45}, + {0x1f48, 0x1f4d}, {0x1f50, 0x1f57}, {0x1f5f, 0x1f7d}, {0x1f80, 0x1fb4}, + {0x1fb6, 0x1fbc}, {0x1fc2, 0x1fc4}, {0x1fc6, 0x1fcc}, {0x1fd0, 0x1fd3}, + {0x1fd6, 0x1fdb}, {0x1fe0, 0x1fec}, {0x1ff2, 0x1ff4}, {0x1ff6, 0x1ffc}, + {0x210a, 0x2113}, {0x2119, 0x211d}, {0x212a, 0x212d}, {0x212f, 0x2131}, + {0x2133, 0x2139}, {0x3031, 0x3035}, {0x3041, 0x3094}, {0x30a1, 0x30fa}, + {0x30fc, 0x30fe}, {0x3105, 0x312c}, {0x3131, 0x318e}, {0x31a0, 0x31b7}, + {0x3400, 0x4db5}, {0x4e00, 0x9fa5}, {0xa000, 0xa48c}, {0xac00, 0xd7a3}, + {0xf900, 0xfa2d}, {0xfb00, 0xfb06}, {0xfb13, 0xfb17}, {0xfb1f, 0xfb28}, + {0xfb2a, 0xfb36}, {0xfb38, 0xfb3c}, {0xfb46, 0xfbb1}, {0xfbd3, 0xfd3d}, + {0xfd50, 0xfd8f}, {0xfd92, 0xfdc7}, {0xfdf0, 0xfdfb}, {0xfe70, 0xfe72}, + {0xfe76, 0xfefc}, {0xff21, 0xff3a}, {0xff41, 0xff5a}, {0xff66, 0xffbe}, {0xffc2, 0xffc7}, {0xffca, 0xffcf}, {0xffd2, 0xffd7}, {0xffda, 0xffdc} }; #define NUM_ALPHA_RANGE (sizeof(alphaRangeTable)/sizeof(crange)) static chr alphaCharTable[] = { - 0x00aa, 0x00b5, 0x00ba, 0x02d0, 0x02d1, 0x02ee, 0x037a, 0x0386, 0x038c, - 0x04c7, 0x04c8, 0x04cb, 0x04cc, 0x04f8, 0x04f9, 0x0559, 0x06d5, 0x06e5, - 0x06e6, 0x0710, 0x093d, 0x0950, 0x098f, 0x0990, 0x09b2, 0x09dc, 0x09dd, - 0x09f0, 0x09f1, 0x0a0f, 0x0a10, 0x0a32, 0x0a33, 0x0a35, 0x0a36, 0x0a38, - 0x0a39, 0x0a5e, 0x0a8d, 0x0ab2, 0x0ab3, 0x0abd, 0x0ad0, 0x0ae0, 0x0b0f, - 0x0b10, 0x0b32, 0x0b33, 0x0b3d, 0x0b5c, 0x0b5d, 0x0b99, 0x0b9a, 0x0b9c, - 0x0b9e, 0x0b9f, 0x0ba3, 0x0ba4, 0x0c60, 0x0c61, 0x0cde, 0x0ce0, 0x0ce1, - 0x0d60, 0x0d61, 0x0dbd, 0x0e32, 0x0e33, 0x0e81, 0x0e82, 0x0e84, 0x0e87, - 0x0e88, 0x0e8a, 0x0e8d, 0x0ea5, 0x0ea7, 0x0eaa, 0x0eab, 0x0eb2, 0x0eb3, - 0x0ebd, 0x0ec6, 0x0edc, 0x0edd, 0x0f00, 0x1029, 0x102a, 0x1248, 0x1258, - 0x1288, 0x12b0, 0x12c0, 0x1310, 0x1f59, 0x1f5b, 0x1f5d, 0x1fbe, 0x207f, - 0x2102, 0x2107, 0x2115, 0x2124, 0x2126, 0x2128, 0x3005, 0x3006, 0x309d, + 0x00aa, 0x00b5, 0x00ba, 0x02d0, 0x02d1, 0x02ee, 0x037a, 0x0386, 0x038c, + 0x04c7, 0x04c8, 0x04cb, 0x04cc, 0x04f8, 0x04f9, 0x0559, 0x06d5, 0x06e5, + 0x06e6, 0x0710, 0x093d, 0x0950, 0x098f, 0x0990, 0x09b2, 0x09dc, 0x09dd, + 0x09f0, 0x09f1, 0x0a0f, 0x0a10, 0x0a32, 0x0a33, 0x0a35, 0x0a36, 0x0a38, + 0x0a39, 0x0a5e, 0x0a8d, 0x0ab2, 0x0ab3, 0x0abd, 0x0ad0, 0x0ae0, 0x0b0f, + 0x0b10, 0x0b32, 0x0b33, 0x0b3d, 0x0b5c, 0x0b5d, 0x0b99, 0x0b9a, 0x0b9c, + 0x0b9e, 0x0b9f, 0x0ba3, 0x0ba4, 0x0c60, 0x0c61, 0x0cde, 0x0ce0, 0x0ce1, + 0x0d60, 0x0d61, 0x0dbd, 0x0e32, 0x0e33, 0x0e81, 0x0e82, 0x0e84, 0x0e87, + 0x0e88, 0x0e8a, 0x0e8d, 0x0ea5, 0x0ea7, 0x0eaa, 0x0eab, 0x0eb2, 0x0eb3, + 0x0ebd, 0x0ec6, 0x0edc, 0x0edd, 0x0f00, 0x1029, 0x102a, 0x1248, 0x1258, + 0x1288, 0x12b0, 0x12c0, 0x1310, 0x1f59, 0x1f5b, 0x1f5d, 0x1fbe, 0x207f, + 0x2102, 0x2107, 0x2115, 0x2124, 0x2126, 0x2128, 0x3005, 0x3006, 0x309d, 0x309e, 0xfb1d, 0xfb3e, 0xfb40, 0xfb41, 0xfb43, 0xfb44, 0xfe74, 0xfffe }; #define NUM_ALPHA_CHAR (sizeof(alphaCharTable)/sizeof(chr)) -/* Unicode: decimal digit characters */ +/* + * Unicode: decimal digit characters + */ static crange digitRangeTable[] = { - {0x0030, 0x0039}, {0x0660, 0x0669}, {0x06f0, 0x06f9}, {0x0966, 0x096f}, - {0x09e6, 0x09ef}, {0x0a66, 0x0a6f}, {0x0ae6, 0x0aef}, {0x0b66, 0x0b6f}, - {0x0be7, 0x0bef}, {0x0c66, 0x0c6f}, {0x0ce6, 0x0cef}, {0x0d66, 0x0d6f}, - {0x0e50, 0x0e59}, {0x0ed0, 0x0ed9}, {0x0f20, 0x0f29}, {0x1040, 0x1049}, + {0x0030, 0x0039}, {0x0660, 0x0669}, {0x06f0, 0x06f9}, {0x0966, 0x096f}, + {0x09e6, 0x09ef}, {0x0a66, 0x0a6f}, {0x0ae6, 0x0aef}, {0x0b66, 0x0b6f}, + {0x0be7, 0x0bef}, {0x0c66, 0x0c6f}, {0x0ce6, 0x0cef}, {0x0d66, 0x0d6f}, + {0x0e50, 0x0e59}, {0x0ed0, 0x0ed9}, {0x0f20, 0x0f29}, {0x1040, 0x1049}, {0x1369, 0x1371}, {0x17e0, 0x17e9}, {0x1810, 0x1819}, {0xff10, 0xff19} }; #define NUM_DIGIT_RANGE (sizeof(digitRangeTable)/sizeof(crange)) -/* no singletons of digit characters */ +/* + * no singletons of digit characters. + */ -/* Unicode: punctuation characters */ +/* + * Unicode: punctuation characters. + */ static crange punctRangeTable[] = { - {0x0021, 0x0023}, {0x0025, 0x002a}, {0x002c, 0x002f}, {0x005b, 0x005d}, - {0x055a, 0x055f}, {0x066a, 0x066d}, {0x0700, 0x070d}, {0x0f04, 0x0f12}, - {0x0f3a, 0x0f3d}, {0x104a, 0x104f}, {0x1361, 0x1368}, {0x16eb, 0x16ed}, - {0x17d4, 0x17da}, {0x1800, 0x180a}, {0x2010, 0x2027}, {0x2030, 0x2043}, - {0x2048, 0x204d}, {0x3001, 0x3003}, {0x3008, 0x3011}, {0x3014, 0x301f}, - {0xfe30, 0xfe44}, {0xfe49, 0xfe52}, {0xfe54, 0xfe61}, {0xff01, 0xff03}, + {0x0021, 0x0023}, {0x0025, 0x002a}, {0x002c, 0x002f}, {0x005b, 0x005d}, + {0x055a, 0x055f}, {0x066a, 0x066d}, {0x0700, 0x070d}, {0x0f04, 0x0f12}, + {0x0f3a, 0x0f3d}, {0x104a, 0x104f}, {0x1361, 0x1368}, {0x16eb, 0x16ed}, + {0x17d4, 0x17da}, {0x1800, 0x180a}, {0x2010, 0x2027}, {0x2030, 0x2043}, + {0x2048, 0x204d}, {0x3001, 0x3003}, {0x3008, 0x3011}, {0x3014, 0x301f}, + {0xfe30, 0xfe44}, {0xfe49, 0xfe52}, {0xfe54, 0xfe61}, {0xff01, 0xff03}, {0xff05, 0xff0a}, {0xff0c, 0xff0f}, {0xff3b, 0xff3d}, {0xff61, 0xff65} }; #define NUM_PUNCT_RANGE (sizeof(punctRangeTable)/sizeof(crange)) static chr punctCharTable[] = { - 0x003a, 0x003b, 0x003f, 0x0040, 0x005f, 0x007b, 0x007d, 0x00a1, 0x00ab, - 0x00ad, 0x00b7, 0x00bb, 0x00bf, 0x037e, 0x0387, 0x0589, 0x058a, 0x05be, - 0x05c0, 0x05c3, 0x05f3, 0x05f4, 0x060c, 0x061b, 0x061f, 0x06d4, 0x0964, - 0x0965, 0x0970, 0x0df4, 0x0e4f, 0x0e5a, 0x0e5b, 0x0f85, 0x10fb, 0x166d, - 0x166e, 0x169b, 0x169c, 0x17dc, 0x2045, 0x2046, 0x207d, 0x207e, 0x208d, - 0x208e, 0x2329, 0x232a, 0x3030, 0x30fb, 0xfd3e, 0xfd3f, 0xfe63, 0xfe68, + 0x003a, 0x003b, 0x003f, 0x0040, 0x005f, 0x007b, 0x007d, 0x00a1, 0x00ab, + 0x00ad, 0x00b7, 0x00bb, 0x00bf, 0x037e, 0x0387, 0x0589, 0x058a, 0x05be, + 0x05c0, 0x05c3, 0x05f3, 0x05f4, 0x060c, 0x061b, 0x061f, 0x06d4, 0x0964, + 0x0965, 0x0970, 0x0df4, 0x0e4f, 0x0e5a, 0x0e5b, 0x0f85, 0x10fb, 0x166d, + 0x166e, 0x169b, 0x169c, 0x17dc, 0x2045, 0x2046, 0x207d, 0x207e, 0x208d, + 0x208e, 0x2329, 0x232a, 0x3030, 0x30fb, 0xfd3e, 0xfd3f, 0xfe63, 0xfe68, 0xfe6a, 0xfe6b, 0xff1a, 0xff1b, 0xff1f, 0xff20, 0xff3f, 0xff5b, 0xff5d }; #define NUM_PUNCT_CHAR (sizeof(punctCharTable)/sizeof(chr)) -/* Unicode: white space characters */ +/* + * Unicode: white space characters. + */ static crange spaceRangeTable[] = { {0x0009, 0x000d}, {0x2000, 0x200b} @@ -251,264 +261,270 @@ static chr spaceCharTable[] = { #define NUM_SPACE_CHAR (sizeof(spaceCharTable)/sizeof(chr)) -/* Unicode: lowercase characters */ +/* + * Unicode: lowercase characters + */ static crange lowerRangeTable[] = { - {0x0061, 0x007a}, {0x00df, 0x00f6}, {0x00f8, 0x00ff}, {0x017e, 0x0180}, - {0x0199, 0x019b}, {0x01bd, 0x01bf}, {0x0250, 0x02ad}, {0x03ac, 0x03ce}, - {0x03d5, 0x03d7}, {0x03ef, 0x03f3}, {0x0430, 0x045f}, {0x0561, 0x0587}, - {0x1e95, 0x1e9b}, {0x1f00, 0x1f07}, {0x1f10, 0x1f15}, {0x1f20, 0x1f27}, - {0x1f30, 0x1f37}, {0x1f40, 0x1f45}, {0x1f50, 0x1f57}, {0x1f60, 0x1f67}, - {0x1f70, 0x1f7d}, {0x1f80, 0x1f87}, {0x1f90, 0x1f97}, {0x1fa0, 0x1fa7}, - {0x1fb0, 0x1fb4}, {0x1fc2, 0x1fc4}, {0x1fd0, 0x1fd3}, {0x1fe0, 0x1fe7}, + {0x0061, 0x007a}, {0x00df, 0x00f6}, {0x00f8, 0x00ff}, {0x017e, 0x0180}, + {0x0199, 0x019b}, {0x01bd, 0x01bf}, {0x0250, 0x02ad}, {0x03ac, 0x03ce}, + {0x03d5, 0x03d7}, {0x03ef, 0x03f3}, {0x0430, 0x045f}, {0x0561, 0x0587}, + {0x1e95, 0x1e9b}, {0x1f00, 0x1f07}, {0x1f10, 0x1f15}, {0x1f20, 0x1f27}, + {0x1f30, 0x1f37}, {0x1f40, 0x1f45}, {0x1f50, 0x1f57}, {0x1f60, 0x1f67}, + {0x1f70, 0x1f7d}, {0x1f80, 0x1f87}, {0x1f90, 0x1f97}, {0x1fa0, 0x1fa7}, + {0x1fb0, 0x1fb4}, {0x1fc2, 0x1fc4}, {0x1fd0, 0x1fd3}, {0x1fe0, 0x1fe7}, {0x1ff2, 0x1ff4}, {0xfb00, 0xfb06}, {0xfb13, 0xfb17}, {0xff41, 0xff5a} }; #define NUM_LOWER_RANGE (sizeof(lowerRangeTable)/sizeof(crange)) static chr lowerCharTable[] = { - 0x00aa, 0x00b5, 0x00ba, 0x0101, 0x0103, 0x0105, 0x0107, 0x0109, 0x010b, - 0x010d, 0x010f, 0x0111, 0x0113, 0x0115, 0x0117, 0x0119, 0x011b, 0x011d, - 0x011f, 0x0121, 0x0123, 0x0125, 0x0127, 0x0129, 0x012b, 0x012d, 0x012f, - 0x0131, 0x0133, 0x0135, 0x0137, 0x0138, 0x013a, 0x013c, 0x013e, 0x0140, - 0x0142, 0x0144, 0x0146, 0x0148, 0x0149, 0x014b, 0x014d, 0x014f, 0x0151, - 0x0153, 0x0155, 0x0157, 0x0159, 0x015b, 0x015d, 0x015f, 0x0161, 0x0163, - 0x0165, 0x0167, 0x0169, 0x016b, 0x016d, 0x016f, 0x0171, 0x0173, 0x0175, - 0x0177, 0x017a, 0x017c, 0x0183, 0x0185, 0x0188, 0x018c, 0x018d, 0x0192, - 0x0195, 0x019e, 0x01a1, 0x01a3, 0x01a5, 0x01a8, 0x01aa, 0x01ab, 0x01ad, - 0x01b0, 0x01b4, 0x01b6, 0x01b9, 0x01ba, 0x01c6, 0x01c9, 0x01cc, 0x01ce, - 0x01d0, 0x01d2, 0x01d4, 0x01d6, 0x01d8, 0x01da, 0x01dc, 0x01dd, 0x01df, - 0x01e1, 0x01e3, 0x01e5, 0x01e7, 0x01e9, 0x01eb, 0x01ed, 0x01ef, 0x01f0, - 0x01f3, 0x01f5, 0x01f9, 0x01fb, 0x01fd, 0x01ff, 0x0201, 0x0203, 0x0205, - 0x0207, 0x0209, 0x020b, 0x020d, 0x020f, 0x0211, 0x0213, 0x0215, 0x0217, - 0x0219, 0x021b, 0x021d, 0x021f, 0x0223, 0x0225, 0x0227, 0x0229, 0x022b, - 0x022d, 0x022f, 0x0231, 0x0233, 0x0390, 0x03d0, 0x03d1, 0x03db, 0x03dd, - 0x03df, 0x03e1, 0x03e3, 0x03e5, 0x03e7, 0x03e9, 0x03eb, 0x03ed, 0x03f5, - 0x0461, 0x0463, 0x0465, 0x0467, 0x0469, 0x046b, 0x046d, 0x046f, 0x0471, - 0x0473, 0x0475, 0x0477, 0x0479, 0x047b, 0x047d, 0x047f, 0x0481, 0x048d, - 0x048f, 0x0491, 0x0493, 0x0495, 0x0497, 0x0499, 0x049b, 0x049d, 0x049f, - 0x04a1, 0x04a3, 0x04a5, 0x04a7, 0x04a9, 0x04ab, 0x04ad, 0x04af, 0x04b1, - 0x04b3, 0x04b5, 0x04b7, 0x04b9, 0x04bb, 0x04bd, 0x04bf, 0x04c2, 0x04c4, - 0x04c8, 0x04cc, 0x04d1, 0x04d3, 0x04d5, 0x04d7, 0x04d9, 0x04db, 0x04dd, - 0x04df, 0x04e1, 0x04e3, 0x04e5, 0x04e7, 0x04e9, 0x04eb, 0x04ed, 0x04ef, - 0x04f1, 0x04f3, 0x04f5, 0x04f9, 0x1e01, 0x1e03, 0x1e05, 0x1e07, 0x1e09, - 0x1e0b, 0x1e0d, 0x1e0f, 0x1e11, 0x1e13, 0x1e15, 0x1e17, 0x1e19, 0x1e1b, - 0x1e1d, 0x1e1f, 0x1e21, 0x1e23, 0x1e25, 0x1e27, 0x1e29, 0x1e2b, 0x1e2d, - 0x1e2f, 0x1e31, 0x1e33, 0x1e35, 0x1e37, 0x1e39, 0x1e3b, 0x1e3d, 0x1e3f, - 0x1e41, 0x1e43, 0x1e45, 0x1e47, 0x1e49, 0x1e4b, 0x1e4d, 0x1e4f, 0x1e51, - 0x1e53, 0x1e55, 0x1e57, 0x1e59, 0x1e5b, 0x1e5d, 0x1e5f, 0x1e61, 0x1e63, - 0x1e65, 0x1e67, 0x1e69, 0x1e6b, 0x1e6d, 0x1e6f, 0x1e71, 0x1e73, 0x1e75, - 0x1e77, 0x1e79, 0x1e7b, 0x1e7d, 0x1e7f, 0x1e81, 0x1e83, 0x1e85, 0x1e87, - 0x1e89, 0x1e8b, 0x1e8d, 0x1e8f, 0x1e91, 0x1e93, 0x1ea1, 0x1ea3, 0x1ea5, - 0x1ea7, 0x1ea9, 0x1eab, 0x1ead, 0x1eaf, 0x1eb1, 0x1eb3, 0x1eb5, 0x1eb7, - 0x1eb9, 0x1ebb, 0x1ebd, 0x1ebf, 0x1ec1, 0x1ec3, 0x1ec5, 0x1ec7, 0x1ec9, - 0x1ecb, 0x1ecd, 0x1ecf, 0x1ed1, 0x1ed3, 0x1ed5, 0x1ed7, 0x1ed9, 0x1edb, - 0x1edd, 0x1edf, 0x1ee1, 0x1ee3, 0x1ee5, 0x1ee7, 0x1ee9, 0x1eeb, 0x1eed, - 0x1eef, 0x1ef1, 0x1ef3, 0x1ef5, 0x1ef7, 0x1ef9, 0x1fb6, 0x1fb7, 0x1fbe, - 0x1fc6, 0x1fc7, 0x1fd6, 0x1fd7, 0x1ff6, 0x1ff7, 0x207f, 0x210a, 0x210e, + 0x00aa, 0x00b5, 0x00ba, 0x0101, 0x0103, 0x0105, 0x0107, 0x0109, 0x010b, + 0x010d, 0x010f, 0x0111, 0x0113, 0x0115, 0x0117, 0x0119, 0x011b, 0x011d, + 0x011f, 0x0121, 0x0123, 0x0125, 0x0127, 0x0129, 0x012b, 0x012d, 0x012f, + 0x0131, 0x0133, 0x0135, 0x0137, 0x0138, 0x013a, 0x013c, 0x013e, 0x0140, + 0x0142, 0x0144, 0x0146, 0x0148, 0x0149, 0x014b, 0x014d, 0x014f, 0x0151, + 0x0153, 0x0155, 0x0157, 0x0159, 0x015b, 0x015d, 0x015f, 0x0161, 0x0163, + 0x0165, 0x0167, 0x0169, 0x016b, 0x016d, 0x016f, 0x0171, 0x0173, 0x0175, + 0x0177, 0x017a, 0x017c, 0x0183, 0x0185, 0x0188, 0x018c, 0x018d, 0x0192, + 0x0195, 0x019e, 0x01a1, 0x01a3, 0x01a5, 0x01a8, 0x01aa, 0x01ab, 0x01ad, + 0x01b0, 0x01b4, 0x01b6, 0x01b9, 0x01ba, 0x01c6, 0x01c9, 0x01cc, 0x01ce, + 0x01d0, 0x01d2, 0x01d4, 0x01d6, 0x01d8, 0x01da, 0x01dc, 0x01dd, 0x01df, + 0x01e1, 0x01e3, 0x01e5, 0x01e7, 0x01e9, 0x01eb, 0x01ed, 0x01ef, 0x01f0, + 0x01f3, 0x01f5, 0x01f9, 0x01fb, 0x01fd, 0x01ff, 0x0201, 0x0203, 0x0205, + 0x0207, 0x0209, 0x020b, 0x020d, 0x020f, 0x0211, 0x0213, 0x0215, 0x0217, + 0x0219, 0x021b, 0x021d, 0x021f, 0x0223, 0x0225, 0x0227, 0x0229, 0x022b, + 0x022d, 0x022f, 0x0231, 0x0233, 0x0390, 0x03d0, 0x03d1, 0x03db, 0x03dd, + 0x03df, 0x03e1, 0x03e3, 0x03e5, 0x03e7, 0x03e9, 0x03eb, 0x03ed, 0x03f5, + 0x0461, 0x0463, 0x0465, 0x0467, 0x0469, 0x046b, 0x046d, 0x046f, 0x0471, + 0x0473, 0x0475, 0x0477, 0x0479, 0x047b, 0x047d, 0x047f, 0x0481, 0x048d, + 0x048f, 0x0491, 0x0493, 0x0495, 0x0497, 0x0499, 0x049b, 0x049d, 0x049f, + 0x04a1, 0x04a3, 0x04a5, 0x04a7, 0x04a9, 0x04ab, 0x04ad, 0x04af, 0x04b1, + 0x04b3, 0x04b5, 0x04b7, 0x04b9, 0x04bb, 0x04bd, 0x04bf, 0x04c2, 0x04c4, + 0x04c8, 0x04cc, 0x04d1, 0x04d3, 0x04d5, 0x04d7, 0x04d9, 0x04db, 0x04dd, + 0x04df, 0x04e1, 0x04e3, 0x04e5, 0x04e7, 0x04e9, 0x04eb, 0x04ed, 0x04ef, + 0x04f1, 0x04f3, 0x04f5, 0x04f9, 0x1e01, 0x1e03, 0x1e05, 0x1e07, 0x1e09, + 0x1e0b, 0x1e0d, 0x1e0f, 0x1e11, 0x1e13, 0x1e15, 0x1e17, 0x1e19, 0x1e1b, + 0x1e1d, 0x1e1f, 0x1e21, 0x1e23, 0x1e25, 0x1e27, 0x1e29, 0x1e2b, 0x1e2d, + 0x1e2f, 0x1e31, 0x1e33, 0x1e35, 0x1e37, 0x1e39, 0x1e3b, 0x1e3d, 0x1e3f, + 0x1e41, 0x1e43, 0x1e45, 0x1e47, 0x1e49, 0x1e4b, 0x1e4d, 0x1e4f, 0x1e51, + 0x1e53, 0x1e55, 0x1e57, 0x1e59, 0x1e5b, 0x1e5d, 0x1e5f, 0x1e61, 0x1e63, + 0x1e65, 0x1e67, 0x1e69, 0x1e6b, 0x1e6d, 0x1e6f, 0x1e71, 0x1e73, 0x1e75, + 0x1e77, 0x1e79, 0x1e7b, 0x1e7d, 0x1e7f, 0x1e81, 0x1e83, 0x1e85, 0x1e87, + 0x1e89, 0x1e8b, 0x1e8d, 0x1e8f, 0x1e91, 0x1e93, 0x1ea1, 0x1ea3, 0x1ea5, + 0x1ea7, 0x1ea9, 0x1eab, 0x1ead, 0x1eaf, 0x1eb1, 0x1eb3, 0x1eb5, 0x1eb7, + 0x1eb9, 0x1ebb, 0x1ebd, 0x1ebf, 0x1ec1, 0x1ec3, 0x1ec5, 0x1ec7, 0x1ec9, + 0x1ecb, 0x1ecd, 0x1ecf, 0x1ed1, 0x1ed3, 0x1ed5, 0x1ed7, 0x1ed9, 0x1edb, + 0x1edd, 0x1edf, 0x1ee1, 0x1ee3, 0x1ee5, 0x1ee7, 0x1ee9, 0x1eeb, 0x1eed, + 0x1eef, 0x1ef1, 0x1ef3, 0x1ef5, 0x1ef7, 0x1ef9, 0x1fb6, 0x1fb7, 0x1fbe, + 0x1fc6, 0x1fc7, 0x1fd6, 0x1fd7, 0x1ff6, 0x1ff7, 0x207f, 0x210a, 0x210e, 0x210f, 0x2113, 0x212f, 0x2134, 0x2139 }; #define NUM_LOWER_CHAR (sizeof(lowerCharTable)/sizeof(chr)) -/* Unicode: uppercase characters */ +/* + * Unicode: uppercase characters. + */ static crange upperRangeTable[] = { - {0x0041, 0x005a}, {0x00c0, 0x00d6}, {0x00d8, 0x00de}, {0x0189, 0x018b}, - {0x018e, 0x0191}, {0x0196, 0x0198}, {0x01b1, 0x01b3}, {0x01f6, 0x01f8}, - {0x0388, 0x038a}, {0x0391, 0x03a1}, {0x03a3, 0x03ab}, {0x03d2, 0x03d4}, - {0x0400, 0x042f}, {0x0531, 0x0556}, {0x10a0, 0x10c5}, {0x1f08, 0x1f0f}, - {0x1f18, 0x1f1d}, {0x1f28, 0x1f2f}, {0x1f38, 0x1f3f}, {0x1f48, 0x1f4d}, - {0x1f68, 0x1f6f}, {0x1fb8, 0x1fbb}, {0x1fc8, 0x1fcb}, {0x1fd8, 0x1fdb}, - {0x1fe8, 0x1fec}, {0x1ff8, 0x1ffb}, {0x210b, 0x210d}, {0x2110, 0x2112}, + {0x0041, 0x005a}, {0x00c0, 0x00d6}, {0x00d8, 0x00de}, {0x0189, 0x018b}, + {0x018e, 0x0191}, {0x0196, 0x0198}, {0x01b1, 0x01b3}, {0x01f6, 0x01f8}, + {0x0388, 0x038a}, {0x0391, 0x03a1}, {0x03a3, 0x03ab}, {0x03d2, 0x03d4}, + {0x0400, 0x042f}, {0x0531, 0x0556}, {0x10a0, 0x10c5}, {0x1f08, 0x1f0f}, + {0x1f18, 0x1f1d}, {0x1f28, 0x1f2f}, {0x1f38, 0x1f3f}, {0x1f48, 0x1f4d}, + {0x1f68, 0x1f6f}, {0x1fb8, 0x1fbb}, {0x1fc8, 0x1fcb}, {0x1fd8, 0x1fdb}, + {0x1fe8, 0x1fec}, {0x1ff8, 0x1ffb}, {0x210b, 0x210d}, {0x2110, 0x2112}, {0x2119, 0x211d}, {0x212a, 0x212d}, {0xff21, 0xff3a} }; #define NUM_UPPER_RANGE (sizeof(upperRangeTable)/sizeof(crange)) static chr upperCharTable[] = { - 0x0100, 0x0102, 0x0104, 0x0106, 0x0108, 0x010a, 0x010c, 0x010e, 0x0110, - 0x0112, 0x0114, 0x0116, 0x0118, 0x011a, 0x011c, 0x011e, 0x0120, 0x0122, - 0x0124, 0x0126, 0x0128, 0x012a, 0x012c, 0x012e, 0x0130, 0x0132, 0x0134, - 0x0136, 0x0139, 0x013b, 0x013d, 0x013f, 0x0141, 0x0143, 0x0145, 0x0147, - 0x014a, 0x014c, 0x014e, 0x0150, 0x0152, 0x0154, 0x0156, 0x0158, 0x015a, - 0x015c, 0x015e, 0x0160, 0x0162, 0x0164, 0x0166, 0x0168, 0x016a, 0x016c, - 0x016e, 0x0170, 0x0172, 0x0174, 0x0176, 0x0178, 0x0179, 0x017b, 0x017d, - 0x0181, 0x0182, 0x0184, 0x0186, 0x0187, 0x0193, 0x0194, 0x019c, 0x019d, - 0x019f, 0x01a0, 0x01a2, 0x01a4, 0x01a6, 0x01a7, 0x01a9, 0x01ac, 0x01ae, - 0x01af, 0x01b5, 0x01b7, 0x01b8, 0x01bc, 0x01c4, 0x01c7, 0x01ca, 0x01cd, - 0x01cf, 0x01d1, 0x01d3, 0x01d5, 0x01d7, 0x01d9, 0x01db, 0x01de, 0x01e0, - 0x01e2, 0x01e4, 0x01e6, 0x01e8, 0x01ea, 0x01ec, 0x01ee, 0x01f1, 0x01f4, - 0x01fa, 0x01fc, 0x01fe, 0x0200, 0x0202, 0x0204, 0x0206, 0x0208, 0x020a, - 0x020c, 0x020e, 0x0210, 0x0212, 0x0214, 0x0216, 0x0218, 0x021a, 0x021c, - 0x021e, 0x0222, 0x0224, 0x0226, 0x0228, 0x022a, 0x022c, 0x022e, 0x0230, - 0x0232, 0x0386, 0x038c, 0x038e, 0x038f, 0x03da, 0x03dc, 0x03de, 0x03e0, - 0x03e2, 0x03e4, 0x03e6, 0x03e8, 0x03ea, 0x03ec, 0x03ee, 0x03f4, 0x0460, - 0x0462, 0x0464, 0x0466, 0x0468, 0x046a, 0x046c, 0x046e, 0x0470, 0x0472, - 0x0474, 0x0476, 0x0478, 0x047a, 0x047c, 0x047e, 0x0480, 0x048c, 0x048e, - 0x0490, 0x0492, 0x0494, 0x0496, 0x0498, 0x049a, 0x049c, 0x049e, 0x04a0, - 0x04a2, 0x04a4, 0x04a6, 0x04a8, 0x04aa, 0x04ac, 0x04ae, 0x04b0, 0x04b2, - 0x04b4, 0x04b6, 0x04b8, 0x04ba, 0x04bc, 0x04be, 0x04c0, 0x04c1, 0x04c3, - 0x04c7, 0x04cb, 0x04d0, 0x04d2, 0x04d4, 0x04d6, 0x04d8, 0x04da, 0x04dc, - 0x04de, 0x04e0, 0x04e2, 0x04e4, 0x04e6, 0x04e8, 0x04ea, 0x04ec, 0x04ee, - 0x04f0, 0x04f2, 0x04f4, 0x04f8, 0x1e00, 0x1e02, 0x1e04, 0x1e06, 0x1e08, - 0x1e0a, 0x1e0c, 0x1e0e, 0x1e10, 0x1e12, 0x1e14, 0x1e16, 0x1e18, 0x1e1a, - 0x1e1c, 0x1e1e, 0x1e20, 0x1e22, 0x1e24, 0x1e26, 0x1e28, 0x1e2a, 0x1e2c, - 0x1e2e, 0x1e30, 0x1e32, 0x1e34, 0x1e36, 0x1e38, 0x1e3a, 0x1e3c, 0x1e3e, - 0x1e40, 0x1e42, 0x1e44, 0x1e46, 0x1e48, 0x1e4a, 0x1e4c, 0x1e4e, 0x1e50, - 0x1e52, 0x1e54, 0x1e56, 0x1e58, 0x1e5a, 0x1e5c, 0x1e5e, 0x1e60, 0x1e62, - 0x1e64, 0x1e66, 0x1e68, 0x1e6a, 0x1e6c, 0x1e6e, 0x1e70, 0x1e72, 0x1e74, - 0x1e76, 0x1e78, 0x1e7a, 0x1e7c, 0x1e7e, 0x1e80, 0x1e82, 0x1e84, 0x1e86, - 0x1e88, 0x1e8a, 0x1e8c, 0x1e8e, 0x1e90, 0x1e92, 0x1e94, 0x1ea0, 0x1ea2, - 0x1ea4, 0x1ea6, 0x1ea8, 0x1eaa, 0x1eac, 0x1eae, 0x1eb0, 0x1eb2, 0x1eb4, - 0x1eb6, 0x1eb8, 0x1eba, 0x1ebc, 0x1ebe, 0x1ec0, 0x1ec2, 0x1ec4, 0x1ec6, - 0x1ec8, 0x1eca, 0x1ecc, 0x1ece, 0x1ed0, 0x1ed2, 0x1ed4, 0x1ed6, 0x1ed8, - 0x1eda, 0x1edc, 0x1ede, 0x1ee0, 0x1ee2, 0x1ee4, 0x1ee6, 0x1ee8, 0x1eea, - 0x1eec, 0x1eee, 0x1ef0, 0x1ef2, 0x1ef4, 0x1ef6, 0x1ef8, 0x1f59, 0x1f5b, - 0x1f5d, 0x1f5f, 0x2102, 0x2107, 0x2115, 0x2124, 0x2126, 0x2128, 0x2130, + 0x0100, 0x0102, 0x0104, 0x0106, 0x0108, 0x010a, 0x010c, 0x010e, 0x0110, + 0x0112, 0x0114, 0x0116, 0x0118, 0x011a, 0x011c, 0x011e, 0x0120, 0x0122, + 0x0124, 0x0126, 0x0128, 0x012a, 0x012c, 0x012e, 0x0130, 0x0132, 0x0134, + 0x0136, 0x0139, 0x013b, 0x013d, 0x013f, 0x0141, 0x0143, 0x0145, 0x0147, + 0x014a, 0x014c, 0x014e, 0x0150, 0x0152, 0x0154, 0x0156, 0x0158, 0x015a, + 0x015c, 0x015e, 0x0160, 0x0162, 0x0164, 0x0166, 0x0168, 0x016a, 0x016c, + 0x016e, 0x0170, 0x0172, 0x0174, 0x0176, 0x0178, 0x0179, 0x017b, 0x017d, + 0x0181, 0x0182, 0x0184, 0x0186, 0x0187, 0x0193, 0x0194, 0x019c, 0x019d, + 0x019f, 0x01a0, 0x01a2, 0x01a4, 0x01a6, 0x01a7, 0x01a9, 0x01ac, 0x01ae, + 0x01af, 0x01b5, 0x01b7, 0x01b8, 0x01bc, 0x01c4, 0x01c7, 0x01ca, 0x01cd, + 0x01cf, 0x01d1, 0x01d3, 0x01d5, 0x01d7, 0x01d9, 0x01db, 0x01de, 0x01e0, + 0x01e2, 0x01e4, 0x01e6, 0x01e8, 0x01ea, 0x01ec, 0x01ee, 0x01f1, 0x01f4, + 0x01fa, 0x01fc, 0x01fe, 0x0200, 0x0202, 0x0204, 0x0206, 0x0208, 0x020a, + 0x020c, 0x020e, 0x0210, 0x0212, 0x0214, 0x0216, 0x0218, 0x021a, 0x021c, + 0x021e, 0x0222, 0x0224, 0x0226, 0x0228, 0x022a, 0x022c, 0x022e, 0x0230, + 0x0232, 0x0386, 0x038c, 0x038e, 0x038f, 0x03da, 0x03dc, 0x03de, 0x03e0, + 0x03e2, 0x03e4, 0x03e6, 0x03e8, 0x03ea, 0x03ec, 0x03ee, 0x03f4, 0x0460, + 0x0462, 0x0464, 0x0466, 0x0468, 0x046a, 0x046c, 0x046e, 0x0470, 0x0472, + 0x0474, 0x0476, 0x0478, 0x047a, 0x047c, 0x047e, 0x0480, 0x048c, 0x048e, + 0x0490, 0x0492, 0x0494, 0x0496, 0x0498, 0x049a, 0x049c, 0x049e, 0x04a0, + 0x04a2, 0x04a4, 0x04a6, 0x04a8, 0x04aa, 0x04ac, 0x04ae, 0x04b0, 0x04b2, + 0x04b4, 0x04b6, 0x04b8, 0x04ba, 0x04bc, 0x04be, 0x04c0, 0x04c1, 0x04c3, + 0x04c7, 0x04cb, 0x04d0, 0x04d2, 0x04d4, 0x04d6, 0x04d8, 0x04da, 0x04dc, + 0x04de, 0x04e0, 0x04e2, 0x04e4, 0x04e6, 0x04e8, 0x04ea, 0x04ec, 0x04ee, + 0x04f0, 0x04f2, 0x04f4, 0x04f8, 0x1e00, 0x1e02, 0x1e04, 0x1e06, 0x1e08, + 0x1e0a, 0x1e0c, 0x1e0e, 0x1e10, 0x1e12, 0x1e14, 0x1e16, 0x1e18, 0x1e1a, + 0x1e1c, 0x1e1e, 0x1e20, 0x1e22, 0x1e24, 0x1e26, 0x1e28, 0x1e2a, 0x1e2c, + 0x1e2e, 0x1e30, 0x1e32, 0x1e34, 0x1e36, 0x1e38, 0x1e3a, 0x1e3c, 0x1e3e, + 0x1e40, 0x1e42, 0x1e44, 0x1e46, 0x1e48, 0x1e4a, 0x1e4c, 0x1e4e, 0x1e50, + 0x1e52, 0x1e54, 0x1e56, 0x1e58, 0x1e5a, 0x1e5c, 0x1e5e, 0x1e60, 0x1e62, + 0x1e64, 0x1e66, 0x1e68, 0x1e6a, 0x1e6c, 0x1e6e, 0x1e70, 0x1e72, 0x1e74, + 0x1e76, 0x1e78, 0x1e7a, 0x1e7c, 0x1e7e, 0x1e80, 0x1e82, 0x1e84, 0x1e86, + 0x1e88, 0x1e8a, 0x1e8c, 0x1e8e, 0x1e90, 0x1e92, 0x1e94, 0x1ea0, 0x1ea2, + 0x1ea4, 0x1ea6, 0x1ea8, 0x1eaa, 0x1eac, 0x1eae, 0x1eb0, 0x1eb2, 0x1eb4, + 0x1eb6, 0x1eb8, 0x1eba, 0x1ebc, 0x1ebe, 0x1ec0, 0x1ec2, 0x1ec4, 0x1ec6, + 0x1ec8, 0x1eca, 0x1ecc, 0x1ece, 0x1ed0, 0x1ed2, 0x1ed4, 0x1ed6, 0x1ed8, + 0x1eda, 0x1edc, 0x1ede, 0x1ee0, 0x1ee2, 0x1ee4, 0x1ee6, 0x1ee8, 0x1eea, + 0x1eec, 0x1eee, 0x1ef0, 0x1ef2, 0x1ef4, 0x1ef6, 0x1ef8, 0x1f59, 0x1f5b, + 0x1f5d, 0x1f5f, 0x2102, 0x2107, 0x2115, 0x2124, 0x2126, 0x2128, 0x2130, 0x2131, 0x2133 }; #define NUM_UPPER_CHAR (sizeof(upperCharTable)/sizeof(chr)) -/* Unicode: unicode print characters excluding space */ +/* + * Unicode: unicode print characters excluding space. + */ static crange graphRangeTable[] = { - {0x0021, 0x007e}, {0x00a0, 0x011f}, {0x0121, 0x021f}, {0x0222, 0x0233}, - {0x0250, 0x02ad}, {0x02b0, 0x02ee}, {0x0300, 0x031f}, {0x0321, 0x034e}, - {0x0360, 0x0362}, {0x0384, 0x038a}, {0x038e, 0x03a1}, {0x03a3, 0x03ce}, - {0x03d0, 0x03d7}, {0x03da, 0x03f5}, {0x0400, 0x041f}, {0x0421, 0x0486}, - {0x048c, 0x04c4}, {0x04d0, 0x04f5}, {0x0531, 0x0556}, {0x0559, 0x055f}, - {0x0561, 0x0587}, {0x0591, 0x05a1}, {0x05a3, 0x05b9}, {0x05bb, 0x05c4}, - {0x05d0, 0x05ea}, {0x05f0, 0x05f4}, {0x0621, 0x063a}, {0x0640, 0x0655}, - {0x0660, 0x066d}, {0x0670, 0x06ed}, {0x06f0, 0x06fe}, {0x0700, 0x070d}, - {0x0710, 0x071f}, {0x0721, 0x072c}, {0x0730, 0x074a}, {0x0780, 0x07b0}, - {0x0901, 0x0903}, {0x0905, 0x091f}, {0x0921, 0x0939}, {0x093c, 0x094d}, - {0x0950, 0x0954}, {0x0958, 0x0970}, {0x0981, 0x0983}, {0x0985, 0x098c}, - {0x0993, 0x09a8}, {0x09aa, 0x09b0}, {0x09b6, 0x09b9}, {0x09be, 0x09c4}, - {0x09cb, 0x09cd}, {0x09df, 0x09e3}, {0x09e6, 0x09fa}, {0x0a05, 0x0a0a}, - {0x0a13, 0x0a1f}, {0x0a21, 0x0a28}, {0x0a2a, 0x0a30}, {0x0a3e, 0x0a42}, - {0x0a4b, 0x0a4d}, {0x0a59, 0x0a5c}, {0x0a66, 0x0a74}, {0x0a81, 0x0a83}, - {0x0a85, 0x0a8b}, {0x0a8f, 0x0a91}, {0x0a93, 0x0aa8}, {0x0aaa, 0x0ab0}, - {0x0ab5, 0x0ab9}, {0x0abc, 0x0ac5}, {0x0ac7, 0x0ac9}, {0x0acb, 0x0acd}, - {0x0ae6, 0x0aef}, {0x0b01, 0x0b03}, {0x0b05, 0x0b0c}, {0x0b13, 0x0b1f}, - {0x0b21, 0x0b28}, {0x0b2a, 0x0b30}, {0x0b36, 0x0b39}, {0x0b3c, 0x0b43}, - {0x0b4b, 0x0b4d}, {0x0b5f, 0x0b61}, {0x0b66, 0x0b70}, {0x0b85, 0x0b8a}, - {0x0b8e, 0x0b90}, {0x0b92, 0x0b95}, {0x0ba8, 0x0baa}, {0x0bae, 0x0bb5}, - {0x0bb7, 0x0bb9}, {0x0bbe, 0x0bc2}, {0x0bc6, 0x0bc8}, {0x0bca, 0x0bcd}, - {0x0be7, 0x0bf2}, {0x0c01, 0x0c03}, {0x0c05, 0x0c0c}, {0x0c0e, 0x0c10}, - {0x0c12, 0x0c1f}, {0x0c21, 0x0c28}, {0x0c2a, 0x0c33}, {0x0c35, 0x0c39}, - {0x0c3e, 0x0c44}, {0x0c46, 0x0c48}, {0x0c4a, 0x0c4d}, {0x0c66, 0x0c6f}, - {0x0c85, 0x0c8c}, {0x0c8e, 0x0c90}, {0x0c92, 0x0ca8}, {0x0caa, 0x0cb3}, - {0x0cb5, 0x0cb9}, {0x0cbe, 0x0cc4}, {0x0cc6, 0x0cc8}, {0x0cca, 0x0ccd}, - {0x0ce6, 0x0cef}, {0x0d05, 0x0d0c}, {0x0d0e, 0x0d10}, {0x0d12, 0x0d1f}, - {0x0d21, 0x0d28}, {0x0d2a, 0x0d39}, {0x0d3e, 0x0d43}, {0x0d46, 0x0d48}, - {0x0d4a, 0x0d4d}, {0x0d66, 0x0d6f}, {0x0d85, 0x0d96}, {0x0d9a, 0x0db1}, - {0x0db3, 0x0dbb}, {0x0dc0, 0x0dc6}, {0x0dcf, 0x0dd4}, {0x0dd8, 0x0ddf}, - {0x0df2, 0x0df4}, {0x0e01, 0x0e1f}, {0x0e21, 0x0e3a}, {0x0e3f, 0x0e5b}, - {0x0e94, 0x0e97}, {0x0e99, 0x0e9f}, {0x0ea1, 0x0ea3}, {0x0ead, 0x0eb9}, - {0x0ebb, 0x0ebd}, {0x0ec0, 0x0ec4}, {0x0ec8, 0x0ecd}, {0x0ed0, 0x0ed9}, - {0x0f00, 0x0f1f}, {0x0f21, 0x0f47}, {0x0f49, 0x0f6a}, {0x0f71, 0x0f8b}, - {0x0f90, 0x0f97}, {0x0f99, 0x0fbc}, {0x0fbe, 0x0fcc}, {0x1000, 0x101f}, - {0x1023, 0x1027}, {0x102c, 0x1032}, {0x1036, 0x1039}, {0x1040, 0x1059}, - {0x10a0, 0x10c5}, {0x10d0, 0x10f6}, {0x1100, 0x111f}, {0x1121, 0x1159}, - {0x115f, 0x11a2}, {0x11a8, 0x11f9}, {0x1200, 0x1206}, {0x1208, 0x121f}, - {0x1221, 0x1246}, {0x124a, 0x124d}, {0x1250, 0x1256}, {0x125a, 0x125d}, - {0x1260, 0x1286}, {0x128a, 0x128d}, {0x1290, 0x12ae}, {0x12b2, 0x12b5}, - {0x12b8, 0x12be}, {0x12c2, 0x12c5}, {0x12c8, 0x12ce}, {0x12d0, 0x12d6}, - {0x12d8, 0x12ee}, {0x12f0, 0x130e}, {0x1312, 0x1315}, {0x1318, 0x131e}, - {0x1321, 0x1346}, {0x1348, 0x135a}, {0x1361, 0x137c}, {0x13a0, 0x13f4}, - {0x1401, 0x141f}, {0x1421, 0x151f}, {0x1521, 0x161f}, {0x1621, 0x1676}, - {0x1680, 0x169c}, {0x16a0, 0x16f0}, {0x1780, 0x17dc}, {0x17e0, 0x17e9}, - {0x1800, 0x180a}, {0x1810, 0x1819}, {0x1821, 0x1877}, {0x1880, 0x18a9}, - {0x1e00, 0x1e1f}, {0x1e21, 0x1e9b}, {0x1ea0, 0x1ef9}, {0x1f00, 0x1f15}, - {0x1f18, 0x1f1d}, {0x1f21, 0x1f45}, {0x1f48, 0x1f4d}, {0x1f50, 0x1f57}, - {0x1f5f, 0x1f7d}, {0x1f80, 0x1fb4}, {0x1fb6, 0x1fc4}, {0x1fc6, 0x1fd3}, - {0x1fd6, 0x1fdb}, {0x1fdd, 0x1fef}, {0x1ff2, 0x1ff4}, {0x1ff6, 0x1ffe}, - {0x2000, 0x200b}, {0x2010, 0x201f}, {0x2021, 0x2029}, {0x202f, 0x2046}, - {0x2048, 0x204d}, {0x2074, 0x208e}, {0x20a0, 0x20af}, {0x20d0, 0x20e3}, - {0x2100, 0x211f}, {0x2121, 0x213a}, {0x2153, 0x2183}, {0x2190, 0x21f3}, - {0x2200, 0x221f}, {0x2221, 0x22f1}, {0x2300, 0x231f}, {0x2321, 0x237b}, - {0x237d, 0x239a}, {0x2400, 0x241f}, {0x2421, 0x2426}, {0x2440, 0x244a}, - {0x2460, 0x24ea}, {0x2500, 0x251f}, {0x2521, 0x2595}, {0x25a0, 0x25f7}, - {0x2600, 0x2613}, {0x2619, 0x261f}, {0x2621, 0x2671}, {0x2701, 0x2704}, - {0x2706, 0x2709}, {0x270c, 0x271f}, {0x2721, 0x2727}, {0x2729, 0x274b}, - {0x274f, 0x2752}, {0x2758, 0x275e}, {0x2761, 0x2767}, {0x2776, 0x2794}, - {0x2798, 0x27af}, {0x27b1, 0x27be}, {0x2800, 0x281f}, {0x2821, 0x28ff}, - {0x2e80, 0x2e99}, {0x2e9b, 0x2ef3}, {0x2f00, 0x2f1f}, {0x2f21, 0x2fd5}, - {0x2ff0, 0x2ffb}, {0x3000, 0x301f}, {0x3021, 0x303a}, {0x3041, 0x3094}, - {0x3099, 0x309e}, {0x30a1, 0x30fe}, {0x3105, 0x311f}, {0x3121, 0x312c}, - {0x3131, 0x318e}, {0x3190, 0x31b7}, {0x3200, 0x321c}, {0x3221, 0x3243}, - {0x3260, 0x327b}, {0x327f, 0x32b0}, {0x32c0, 0x32cb}, {0x32d0, 0x32fe}, - {0x3300, 0x331f}, {0x3321, 0x3376}, {0x337b, 0x33dd}, {0x33e0, 0x33fe}, - {0x3400, 0x341f}, {0x3421, 0x351f}, {0x3521, 0x361f}, {0x3621, 0x371f}, - {0x3721, 0x381f}, {0x3821, 0x391f}, {0x3921, 0x3a1f}, {0x3a21, 0x3b1f}, - {0x3b21, 0x3c1f}, {0x3c21, 0x3d1f}, {0x3d21, 0x3e1f}, {0x3e21, 0x3f1f}, - {0x3f21, 0x401f}, {0x4021, 0x411f}, {0x4121, 0x421f}, {0x4221, 0x431f}, - {0x4321, 0x441f}, {0x4421, 0x451f}, {0x4521, 0x461f}, {0x4621, 0x471f}, - {0x4721, 0x481f}, {0x4821, 0x491f}, {0x4921, 0x4a1f}, {0x4a21, 0x4b1f}, - {0x4b21, 0x4c1f}, {0x4c21, 0x4d1f}, {0x4d21, 0x4db5}, {0x4e00, 0x4e1f}, - {0x4e21, 0x4f1f}, {0x4f21, 0x501f}, {0x5021, 0x511f}, {0x5121, 0x521f}, - {0x5221, 0x531f}, {0x5321, 0x541f}, {0x5421, 0x551f}, {0x5521, 0x561f}, - {0x5621, 0x571f}, {0x5721, 0x581f}, {0x5821, 0x591f}, {0x5921, 0x5a1f}, - {0x5a21, 0x5b1f}, {0x5b21, 0x5c1f}, {0x5c21, 0x5d1f}, {0x5d21, 0x5e1f}, - {0x5e21, 0x5f1f}, {0x5f21, 0x601f}, {0x6021, 0x611f}, {0x6121, 0x621f}, - {0x6221, 0x631f}, {0x6321, 0x641f}, {0x6421, 0x651f}, {0x6521, 0x661f}, - {0x6621, 0x671f}, {0x6721, 0x681f}, {0x6821, 0x691f}, {0x6921, 0x6a1f}, - {0x6a21, 0x6b1f}, {0x6b21, 0x6c1f}, {0x6c21, 0x6d1f}, {0x6d21, 0x6e1f}, - {0x6e21, 0x6f1f}, {0x6f21, 0x701f}, {0x7021, 0x711f}, {0x7121, 0x721f}, - {0x7221, 0x731f}, {0x7321, 0x741f}, {0x7421, 0x751f}, {0x7521, 0x761f}, - {0x7621, 0x771f}, {0x7721, 0x781f}, {0x7821, 0x791f}, {0x7921, 0x7a1f}, - {0x7a21, 0x7b1f}, {0x7b21, 0x7c1f}, {0x7c21, 0x7d1f}, {0x7d21, 0x7e1f}, - {0x7e21, 0x7f1f}, {0x7f21, 0x801f}, {0x8021, 0x811f}, {0x8121, 0x821f}, - {0x8221, 0x831f}, {0x8321, 0x841f}, {0x8421, 0x851f}, {0x8521, 0x861f}, - {0x8621, 0x871f}, {0x8721, 0x881f}, {0x8821, 0x891f}, {0x8921, 0x8a1f}, - {0x8a21, 0x8b1f}, {0x8b21, 0x8c1f}, {0x8c21, 0x8d1f}, {0x8d21, 0x8e1f}, - {0x8e21, 0x8f1f}, {0x8f21, 0x901f}, {0x9021, 0x911f}, {0x9121, 0x921f}, - {0x9221, 0x931f}, {0x9321, 0x941f}, {0x9421, 0x951f}, {0x9521, 0x961f}, - {0x9621, 0x971f}, {0x9721, 0x981f}, {0x9821, 0x991f}, {0x9921, 0x9a1f}, - {0x9a21, 0x9b1f}, {0x9b21, 0x9c1f}, {0x9c21, 0x9d1f}, {0x9d21, 0x9e1f}, - {0x9e21, 0x9f1f}, {0x9f21, 0x9fa5}, {0xa000, 0xa01f}, {0xa021, 0xa11f}, - {0xa121, 0xa21f}, {0xa221, 0xa31f}, {0xa321, 0xa41f}, {0xa421, 0xa48c}, - {0xa490, 0xa4a1}, {0xa4a4, 0xa4b3}, {0xa4b5, 0xa4c0}, {0xa4c2, 0xa4c4}, - {0xac00, 0xac1f}, {0xac21, 0xad1f}, {0xad21, 0xae1f}, {0xae21, 0xaf1f}, - {0xaf21, 0xb01f}, {0xb021, 0xb11f}, {0xb121, 0xb21f}, {0xb221, 0xb31f}, - {0xb321, 0xb41f}, {0xb421, 0xb51f}, {0xb521, 0xb61f}, {0xb621, 0xb71f}, - {0xb721, 0xb81f}, {0xb821, 0xb91f}, {0xb921, 0xba1f}, {0xba21, 0xbb1f}, - {0xbb21, 0xbc1f}, {0xbc21, 0xbd1f}, {0xbd21, 0xbe1f}, {0xbe21, 0xbf1f}, - {0xbf21, 0xc01f}, {0xc021, 0xc11f}, {0xc121, 0xc21f}, {0xc221, 0xc31f}, - {0xc321, 0xc41f}, {0xc421, 0xc51f}, {0xc521, 0xc61f}, {0xc621, 0xc71f}, - {0xc721, 0xc81f}, {0xc821, 0xc91f}, {0xc921, 0xca1f}, {0xca21, 0xcb1f}, - {0xcb21, 0xcc1f}, {0xcc21, 0xcd1f}, {0xcd21, 0xce1f}, {0xce21, 0xcf1f}, - {0xcf21, 0xd01f}, {0xd021, 0xd11f}, {0xd121, 0xd21f}, {0xd221, 0xd31f}, - {0xd321, 0xd41f}, {0xd421, 0xd51f}, {0xd521, 0xd61f}, {0xd621, 0xd71f}, - {0xd721, 0xd7a3}, {0xf900, 0xf91f}, {0xf921, 0xfa1f}, {0xfa21, 0xfa2d}, - {0xfb00, 0xfb06}, {0xfb13, 0xfb17}, {0xfb1d, 0xfb1f}, {0xfb21, 0xfb36}, - {0xfb38, 0xfb3c}, {0xfb46, 0xfbb1}, {0xfbd3, 0xfc1f}, {0xfc21, 0xfd1f}, - {0xfd21, 0xfd3f}, {0xfd50, 0xfd8f}, {0xfd92, 0xfdc7}, {0xfdf0, 0xfdfb}, - {0xfe21, 0xfe23}, {0xfe30, 0xfe44}, {0xfe49, 0xfe52}, {0xfe54, 0xfe66}, - {0xfe68, 0xfe6b}, {0xfe70, 0xfe72}, {0xfe76, 0xfefc}, {0xff01, 0xff1f}, - {0xff21, 0xff5e}, {0xff61, 0xffbe}, {0xffc2, 0xffc7}, {0xffca, 0xffcf}, - {0xffd2, 0xffd7}, {0xffda, 0xffdc}, {0xffe0, 0xffe6}, {0xffe8, 0xffee}, + {0x0021, 0x007e}, {0x00a0, 0x011f}, {0x0121, 0x021f}, {0x0222, 0x0233}, + {0x0250, 0x02ad}, {0x02b0, 0x02ee}, {0x0300, 0x031f}, {0x0321, 0x034e}, + {0x0360, 0x0362}, {0x0384, 0x038a}, {0x038e, 0x03a1}, {0x03a3, 0x03ce}, + {0x03d0, 0x03d7}, {0x03da, 0x03f5}, {0x0400, 0x041f}, {0x0421, 0x0486}, + {0x048c, 0x04c4}, {0x04d0, 0x04f5}, {0x0531, 0x0556}, {0x0559, 0x055f}, + {0x0561, 0x0587}, {0x0591, 0x05a1}, {0x05a3, 0x05b9}, {0x05bb, 0x05c4}, + {0x05d0, 0x05ea}, {0x05f0, 0x05f4}, {0x0621, 0x063a}, {0x0640, 0x0655}, + {0x0660, 0x066d}, {0x0670, 0x06ed}, {0x06f0, 0x06fe}, {0x0700, 0x070d}, + {0x0710, 0x071f}, {0x0721, 0x072c}, {0x0730, 0x074a}, {0x0780, 0x07b0}, + {0x0901, 0x0903}, {0x0905, 0x091f}, {0x0921, 0x0939}, {0x093c, 0x094d}, + {0x0950, 0x0954}, {0x0958, 0x0970}, {0x0981, 0x0983}, {0x0985, 0x098c}, + {0x0993, 0x09a8}, {0x09aa, 0x09b0}, {0x09b6, 0x09b9}, {0x09be, 0x09c4}, + {0x09cb, 0x09cd}, {0x09df, 0x09e3}, {0x09e6, 0x09fa}, {0x0a05, 0x0a0a}, + {0x0a13, 0x0a1f}, {0x0a21, 0x0a28}, {0x0a2a, 0x0a30}, {0x0a3e, 0x0a42}, + {0x0a4b, 0x0a4d}, {0x0a59, 0x0a5c}, {0x0a66, 0x0a74}, {0x0a81, 0x0a83}, + {0x0a85, 0x0a8b}, {0x0a8f, 0x0a91}, {0x0a93, 0x0aa8}, {0x0aaa, 0x0ab0}, + {0x0ab5, 0x0ab9}, {0x0abc, 0x0ac5}, {0x0ac7, 0x0ac9}, {0x0acb, 0x0acd}, + {0x0ae6, 0x0aef}, {0x0b01, 0x0b03}, {0x0b05, 0x0b0c}, {0x0b13, 0x0b1f}, + {0x0b21, 0x0b28}, {0x0b2a, 0x0b30}, {0x0b36, 0x0b39}, {0x0b3c, 0x0b43}, + {0x0b4b, 0x0b4d}, {0x0b5f, 0x0b61}, {0x0b66, 0x0b70}, {0x0b85, 0x0b8a}, + {0x0b8e, 0x0b90}, {0x0b92, 0x0b95}, {0x0ba8, 0x0baa}, {0x0bae, 0x0bb5}, + {0x0bb7, 0x0bb9}, {0x0bbe, 0x0bc2}, {0x0bc6, 0x0bc8}, {0x0bca, 0x0bcd}, + {0x0be7, 0x0bf2}, {0x0c01, 0x0c03}, {0x0c05, 0x0c0c}, {0x0c0e, 0x0c10}, + {0x0c12, 0x0c1f}, {0x0c21, 0x0c28}, {0x0c2a, 0x0c33}, {0x0c35, 0x0c39}, + {0x0c3e, 0x0c44}, {0x0c46, 0x0c48}, {0x0c4a, 0x0c4d}, {0x0c66, 0x0c6f}, + {0x0c85, 0x0c8c}, {0x0c8e, 0x0c90}, {0x0c92, 0x0ca8}, {0x0caa, 0x0cb3}, + {0x0cb5, 0x0cb9}, {0x0cbe, 0x0cc4}, {0x0cc6, 0x0cc8}, {0x0cca, 0x0ccd}, + {0x0ce6, 0x0cef}, {0x0d05, 0x0d0c}, {0x0d0e, 0x0d10}, {0x0d12, 0x0d1f}, + {0x0d21, 0x0d28}, {0x0d2a, 0x0d39}, {0x0d3e, 0x0d43}, {0x0d46, 0x0d48}, + {0x0d4a, 0x0d4d}, {0x0d66, 0x0d6f}, {0x0d85, 0x0d96}, {0x0d9a, 0x0db1}, + {0x0db3, 0x0dbb}, {0x0dc0, 0x0dc6}, {0x0dcf, 0x0dd4}, {0x0dd8, 0x0ddf}, + {0x0df2, 0x0df4}, {0x0e01, 0x0e1f}, {0x0e21, 0x0e3a}, {0x0e3f, 0x0e5b}, + {0x0e94, 0x0e97}, {0x0e99, 0x0e9f}, {0x0ea1, 0x0ea3}, {0x0ead, 0x0eb9}, + {0x0ebb, 0x0ebd}, {0x0ec0, 0x0ec4}, {0x0ec8, 0x0ecd}, {0x0ed0, 0x0ed9}, + {0x0f00, 0x0f1f}, {0x0f21, 0x0f47}, {0x0f49, 0x0f6a}, {0x0f71, 0x0f8b}, + {0x0f90, 0x0f97}, {0x0f99, 0x0fbc}, {0x0fbe, 0x0fcc}, {0x1000, 0x101f}, + {0x1023, 0x1027}, {0x102c, 0x1032}, {0x1036, 0x1039}, {0x1040, 0x1059}, + {0x10a0, 0x10c5}, {0x10d0, 0x10f6}, {0x1100, 0x111f}, {0x1121, 0x1159}, + {0x115f, 0x11a2}, {0x11a8, 0x11f9}, {0x1200, 0x1206}, {0x1208, 0x121f}, + {0x1221, 0x1246}, {0x124a, 0x124d}, {0x1250, 0x1256}, {0x125a, 0x125d}, + {0x1260, 0x1286}, {0x128a, 0x128d}, {0x1290, 0x12ae}, {0x12b2, 0x12b5}, + {0x12b8, 0x12be}, {0x12c2, 0x12c5}, {0x12c8, 0x12ce}, {0x12d0, 0x12d6}, + {0x12d8, 0x12ee}, {0x12f0, 0x130e}, {0x1312, 0x1315}, {0x1318, 0x131e}, + {0x1321, 0x1346}, {0x1348, 0x135a}, {0x1361, 0x137c}, {0x13a0, 0x13f4}, + {0x1401, 0x141f}, {0x1421, 0x151f}, {0x1521, 0x161f}, {0x1621, 0x1676}, + {0x1680, 0x169c}, {0x16a0, 0x16f0}, {0x1780, 0x17dc}, {0x17e0, 0x17e9}, + {0x1800, 0x180a}, {0x1810, 0x1819}, {0x1821, 0x1877}, {0x1880, 0x18a9}, + {0x1e00, 0x1e1f}, {0x1e21, 0x1e9b}, {0x1ea0, 0x1ef9}, {0x1f00, 0x1f15}, + {0x1f18, 0x1f1d}, {0x1f21, 0x1f45}, {0x1f48, 0x1f4d}, {0x1f50, 0x1f57}, + {0x1f5f, 0x1f7d}, {0x1f80, 0x1fb4}, {0x1fb6, 0x1fc4}, {0x1fc6, 0x1fd3}, + {0x1fd6, 0x1fdb}, {0x1fdd, 0x1fef}, {0x1ff2, 0x1ff4}, {0x1ff6, 0x1ffe}, + {0x2000, 0x200b}, {0x2010, 0x201f}, {0x2021, 0x2029}, {0x202f, 0x2046}, + {0x2048, 0x204d}, {0x2074, 0x208e}, {0x20a0, 0x20af}, {0x20d0, 0x20e3}, + {0x2100, 0x211f}, {0x2121, 0x213a}, {0x2153, 0x2183}, {0x2190, 0x21f3}, + {0x2200, 0x221f}, {0x2221, 0x22f1}, {0x2300, 0x231f}, {0x2321, 0x237b}, + {0x237d, 0x239a}, {0x2400, 0x241f}, {0x2421, 0x2426}, {0x2440, 0x244a}, + {0x2460, 0x24ea}, {0x2500, 0x251f}, {0x2521, 0x2595}, {0x25a0, 0x25f7}, + {0x2600, 0x2613}, {0x2619, 0x261f}, {0x2621, 0x2671}, {0x2701, 0x2704}, + {0x2706, 0x2709}, {0x270c, 0x271f}, {0x2721, 0x2727}, {0x2729, 0x274b}, + {0x274f, 0x2752}, {0x2758, 0x275e}, {0x2761, 0x2767}, {0x2776, 0x2794}, + {0x2798, 0x27af}, {0x27b1, 0x27be}, {0x2800, 0x281f}, {0x2821, 0x28ff}, + {0x2e80, 0x2e99}, {0x2e9b, 0x2ef3}, {0x2f00, 0x2f1f}, {0x2f21, 0x2fd5}, + {0x2ff0, 0x2ffb}, {0x3000, 0x301f}, {0x3021, 0x303a}, {0x3041, 0x3094}, + {0x3099, 0x309e}, {0x30a1, 0x30fe}, {0x3105, 0x311f}, {0x3121, 0x312c}, + {0x3131, 0x318e}, {0x3190, 0x31b7}, {0x3200, 0x321c}, {0x3221, 0x3243}, + {0x3260, 0x327b}, {0x327f, 0x32b0}, {0x32c0, 0x32cb}, {0x32d0, 0x32fe}, + {0x3300, 0x331f}, {0x3321, 0x3376}, {0x337b, 0x33dd}, {0x33e0, 0x33fe}, + {0x3400, 0x341f}, {0x3421, 0x351f}, {0x3521, 0x361f}, {0x3621, 0x371f}, + {0x3721, 0x381f}, {0x3821, 0x391f}, {0x3921, 0x3a1f}, {0x3a21, 0x3b1f}, + {0x3b21, 0x3c1f}, {0x3c21, 0x3d1f}, {0x3d21, 0x3e1f}, {0x3e21, 0x3f1f}, + {0x3f21, 0x401f}, {0x4021, 0x411f}, {0x4121, 0x421f}, {0x4221, 0x431f}, + {0x4321, 0x441f}, {0x4421, 0x451f}, {0x4521, 0x461f}, {0x4621, 0x471f}, + {0x4721, 0x481f}, {0x4821, 0x491f}, {0x4921, 0x4a1f}, {0x4a21, 0x4b1f}, + {0x4b21, 0x4c1f}, {0x4c21, 0x4d1f}, {0x4d21, 0x4db5}, {0x4e00, 0x4e1f}, + {0x4e21, 0x4f1f}, {0x4f21, 0x501f}, {0x5021, 0x511f}, {0x5121, 0x521f}, + {0x5221, 0x531f}, {0x5321, 0x541f}, {0x5421, 0x551f}, {0x5521, 0x561f}, + {0x5621, 0x571f}, {0x5721, 0x581f}, {0x5821, 0x591f}, {0x5921, 0x5a1f}, + {0x5a21, 0x5b1f}, {0x5b21, 0x5c1f}, {0x5c21, 0x5d1f}, {0x5d21, 0x5e1f}, + {0x5e21, 0x5f1f}, {0x5f21, 0x601f}, {0x6021, 0x611f}, {0x6121, 0x621f}, + {0x6221, 0x631f}, {0x6321, 0x641f}, {0x6421, 0x651f}, {0x6521, 0x661f}, + {0x6621, 0x671f}, {0x6721, 0x681f}, {0x6821, 0x691f}, {0x6921, 0x6a1f}, + {0x6a21, 0x6b1f}, {0x6b21, 0x6c1f}, {0x6c21, 0x6d1f}, {0x6d21, 0x6e1f}, + {0x6e21, 0x6f1f}, {0x6f21, 0x701f}, {0x7021, 0x711f}, {0x7121, 0x721f}, + {0x7221, 0x731f}, {0x7321, 0x741f}, {0x7421, 0x751f}, {0x7521, 0x761f}, + {0x7621, 0x771f}, {0x7721, 0x781f}, {0x7821, 0x791f}, {0x7921, 0x7a1f}, + {0x7a21, 0x7b1f}, {0x7b21, 0x7c1f}, {0x7c21, 0x7d1f}, {0x7d21, 0x7e1f}, + {0x7e21, 0x7f1f}, {0x7f21, 0x801f}, {0x8021, 0x811f}, {0x8121, 0x821f}, + {0x8221, 0x831f}, {0x8321, 0x841f}, {0x8421, 0x851f}, {0x8521, 0x861f}, + {0x8621, 0x871f}, {0x8721, 0x881f}, {0x8821, 0x891f}, {0x8921, 0x8a1f}, + {0x8a21, 0x8b1f}, {0x8b21, 0x8c1f}, {0x8c21, 0x8d1f}, {0x8d21, 0x8e1f}, + {0x8e21, 0x8f1f}, {0x8f21, 0x901f}, {0x9021, 0x911f}, {0x9121, 0x921f}, + {0x9221, 0x931f}, {0x9321, 0x941f}, {0x9421, 0x951f}, {0x9521, 0x961f}, + {0x9621, 0x971f}, {0x9721, 0x981f}, {0x9821, 0x991f}, {0x9921, 0x9a1f}, + {0x9a21, 0x9b1f}, {0x9b21, 0x9c1f}, {0x9c21, 0x9d1f}, {0x9d21, 0x9e1f}, + {0x9e21, 0x9f1f}, {0x9f21, 0x9fa5}, {0xa000, 0xa01f}, {0xa021, 0xa11f}, + {0xa121, 0xa21f}, {0xa221, 0xa31f}, {0xa321, 0xa41f}, {0xa421, 0xa48c}, + {0xa490, 0xa4a1}, {0xa4a4, 0xa4b3}, {0xa4b5, 0xa4c0}, {0xa4c2, 0xa4c4}, + {0xac00, 0xac1f}, {0xac21, 0xad1f}, {0xad21, 0xae1f}, {0xae21, 0xaf1f}, + {0xaf21, 0xb01f}, {0xb021, 0xb11f}, {0xb121, 0xb21f}, {0xb221, 0xb31f}, + {0xb321, 0xb41f}, {0xb421, 0xb51f}, {0xb521, 0xb61f}, {0xb621, 0xb71f}, + {0xb721, 0xb81f}, {0xb821, 0xb91f}, {0xb921, 0xba1f}, {0xba21, 0xbb1f}, + {0xbb21, 0xbc1f}, {0xbc21, 0xbd1f}, {0xbd21, 0xbe1f}, {0xbe21, 0xbf1f}, + {0xbf21, 0xc01f}, {0xc021, 0xc11f}, {0xc121, 0xc21f}, {0xc221, 0xc31f}, + {0xc321, 0xc41f}, {0xc421, 0xc51f}, {0xc521, 0xc61f}, {0xc621, 0xc71f}, + {0xc721, 0xc81f}, {0xc821, 0xc91f}, {0xc921, 0xca1f}, {0xca21, 0xcb1f}, + {0xcb21, 0xcc1f}, {0xcc21, 0xcd1f}, {0xcd21, 0xce1f}, {0xce21, 0xcf1f}, + {0xcf21, 0xd01f}, {0xd021, 0xd11f}, {0xd121, 0xd21f}, {0xd221, 0xd31f}, + {0xd321, 0xd41f}, {0xd421, 0xd51f}, {0xd521, 0xd61f}, {0xd621, 0xd71f}, + {0xd721, 0xd7a3}, {0xf900, 0xf91f}, {0xf921, 0xfa1f}, {0xfa21, 0xfa2d}, + {0xfb00, 0xfb06}, {0xfb13, 0xfb17}, {0xfb1d, 0xfb1f}, {0xfb21, 0xfb36}, + {0xfb38, 0xfb3c}, {0xfb46, 0xfbb1}, {0xfbd3, 0xfc1f}, {0xfc21, 0xfd1f}, + {0xfd21, 0xfd3f}, {0xfd50, 0xfd8f}, {0xfd92, 0xfdc7}, {0xfdf0, 0xfdfb}, + {0xfe21, 0xfe23}, {0xfe30, 0xfe44}, {0xfe49, 0xfe52}, {0xfe54, 0xfe66}, + {0xfe68, 0xfe6b}, {0xfe70, 0xfe72}, {0xfe76, 0xfefc}, {0xff01, 0xff1f}, + {0xff21, 0xff5e}, {0xff61, 0xffbe}, {0xffc2, 0xffc7}, {0xffca, 0xffcf}, + {0xffd2, 0xffd7}, {0xffda, 0xffdc}, {0xffe0, 0xffe6}, {0xffe8, 0xffee}, {0xfffc, 0xffff} }; #define NUM_GRAPH_RANGE (sizeof(graphRangeTable)/sizeof(crange)) static chr graphCharTable[] = { - 0x0374, 0x0375, 0x037a, 0x037e, 0x038c, 0x0488, 0x0489, 0x04c7, 0x04c8, - 0x04cb, 0x04cc, 0x04f8, 0x04f9, 0x0589, 0x058a, 0x060c, 0x061b, 0x061f, - 0x098f, 0x0990, 0x09b2, 0x09bc, 0x09c7, 0x09c8, 0x09d7, 0x09dc, 0x09dd, - 0x0a02, 0x0a0f, 0x0a10, 0x0a32, 0x0a33, 0x0a35, 0x0a36, 0x0a38, 0x0a39, - 0x0a3c, 0x0a47, 0x0a48, 0x0a5e, 0x0a8d, 0x0ab2, 0x0ab3, 0x0ad0, 0x0ae0, - 0x0b0f, 0x0b10, 0x0b32, 0x0b33, 0x0b47, 0x0b48, 0x0b56, 0x0b57, 0x0b5c, - 0x0b5d, 0x0b82, 0x0b83, 0x0b99, 0x0b9a, 0x0b9c, 0x0b9e, 0x0b9f, 0x0ba3, - 0x0ba4, 0x0bd7, 0x0c55, 0x0c56, 0x0c60, 0x0c61, 0x0c82, 0x0c83, 0x0cd5, - 0x0cd6, 0x0cde, 0x0ce0, 0x0ce1, 0x0d02, 0x0d03, 0x0d57, 0x0d60, 0x0d61, - 0x0d82, 0x0d83, 0x0dbd, 0x0dca, 0x0dd6, 0x0e81, 0x0e82, 0x0e84, 0x0e87, - 0x0e88, 0x0e8a, 0x0e8d, 0x0ea5, 0x0ea7, 0x0eaa, 0x0eab, 0x0ec6, 0x0edc, - 0x0edd, 0x0fcf, 0x1021, 0x1029, 0x102a, 0x10fb, 0x1248, 0x1258, 0x1288, - 0x12b0, 0x12c0, 0x1310, 0x1f59, 0x1f5b, 0x1f5d, 0x2070, 0x274d, 0x2756, + 0x0374, 0x0375, 0x037a, 0x037e, 0x038c, 0x0488, 0x0489, 0x04c7, 0x04c8, + 0x04cb, 0x04cc, 0x04f8, 0x04f9, 0x0589, 0x058a, 0x060c, 0x061b, 0x061f, + 0x098f, 0x0990, 0x09b2, 0x09bc, 0x09c7, 0x09c8, 0x09d7, 0x09dc, 0x09dd, + 0x0a02, 0x0a0f, 0x0a10, 0x0a32, 0x0a33, 0x0a35, 0x0a36, 0x0a38, 0x0a39, + 0x0a3c, 0x0a47, 0x0a48, 0x0a5e, 0x0a8d, 0x0ab2, 0x0ab3, 0x0ad0, 0x0ae0, + 0x0b0f, 0x0b10, 0x0b32, 0x0b33, 0x0b47, 0x0b48, 0x0b56, 0x0b57, 0x0b5c, + 0x0b5d, 0x0b82, 0x0b83, 0x0b99, 0x0b9a, 0x0b9c, 0x0b9e, 0x0b9f, 0x0ba3, + 0x0ba4, 0x0bd7, 0x0c55, 0x0c56, 0x0c60, 0x0c61, 0x0c82, 0x0c83, 0x0cd5, + 0x0cd6, 0x0cde, 0x0ce0, 0x0ce1, 0x0d02, 0x0d03, 0x0d57, 0x0d60, 0x0d61, + 0x0d82, 0x0d83, 0x0dbd, 0x0dca, 0x0dd6, 0x0e81, 0x0e82, 0x0e84, 0x0e87, + 0x0e88, 0x0e8a, 0x0e8d, 0x0ea5, 0x0ea7, 0x0eaa, 0x0eab, 0x0ec6, 0x0edc, + 0x0edd, 0x0fcf, 0x1021, 0x1029, 0x102a, 0x10fb, 0x1248, 0x1258, 0x1288, + 0x12b0, 0x12c0, 0x1310, 0x1f59, 0x1f5b, 0x1f5d, 0x2070, 0x274d, 0x2756, 0x303e, 0x303f, 0xa4c6, 0xfb3e, 0xfb40, 0xfb41, 0xfb43, 0xfb44, 0xfe74 }; @@ -519,60 +535,63 @@ static chr graphCharTable[] = { */ #define CH NOCELT - + /* - nmcces - how many distinct MCCEs are there? ^ static int nmcces(struct vars *); */ static int -nmcces(v) - struct vars *v; /* context */ +nmcces( + struct vars *v) /* context */ { /* * No multi-character collating elements defined at the moment. */ return 0; -} +} /* - nleaders - how many chrs can be first chrs of MCCEs? ^ static int nleaders(struct vars *); */ static int -nleaders(v) - struct vars *v; /* context */ +nleaders( + struct vars *v) /* context */ { return 0; } - + /* - allmcces - return a cvec with all the MCCEs of the locale ^ static struct cvec *allmcces(struct vars *, struct cvec *); */ static struct cvec * -allmcces(v, cv) - struct vars *v; /* context */ - struct cvec *cv; /* this is supposed to have enough room */ +allmcces( + struct vars *v, /* context */ + struct cvec *cv) /* this is supposed to have enough room */ { return clearcvec(cv); } - + /* - element - map collating-element name to celt ^ static celt element(struct vars *, chr *, chr *); */ static celt -element(v, startp, endp) - struct vars *v; /* context */ - chr *startp; /* points to start of name */ - chr *endp; /* points just past end of name */ +element( + struct vars *v, /* context */ + chr *startp, /* points to start of name */ + chr *endp) /* points just past end of name */ { struct cname *cn; size_t len; Tcl_DString ds; CONST char *np; - /* generic: one-chr names stand for themselves */ + /* + * Generic: one-chr names stand for themselves. + */ + assert(startp < endp); len = endp - startp; if (len == 1) { @@ -581,7 +600,10 @@ element(v, startp, endp) NOTE(REG_ULOCALE); - /* search table */ + /* + * Search table. + */ + Tcl_DStringInit(&ds); np = Tcl_UniCharToUtfDString(startp, (int)len, &ds); for (cn=cnames; cn->name!=NULL; cn++) { @@ -594,21 +616,24 @@ element(v, startp, endp) return CHR(cn->code); } - /* couldn't find it */ + /* + * Couldn't find it. + */ + ERR(REG_ECOLLATE); return 0; } - + /* - range - supply cvec for a range, including legality check ^ static struct cvec *range(struct vars *, celt, celt, int); */ static struct cvec * -range(v, a, b, cases) - struct vars *v; /* context */ - celt a; /* range start */ - celt b; /* range end, might equal a */ - int cases; /* case-independent? */ +range( + struct vars *v, /* context */ + celt a, /* range start */ + celt b, /* range end, might equal a */ + int cases) /* case-independent? */ { int nchrs; struct cvec *cv; @@ -627,10 +652,9 @@ range(v, a, b, cases) } /* - * When case-independent, it's hard to decide when cvec ranges are - * usable, so for now at least, we won't try. We allocate enough - * space for two case variants plus a little extra for the two - * title case variants. + * When case-independent, it's hard to decide when cvec ranges are usable, + * so for now at least, we won't try. We allocate enough space for two + * case variants plus a little extra for the two title case variants. */ nchrs = (b - a + 1)*2 + 4; @@ -656,7 +680,7 @@ range(v, a, b, cases) return cv; } - + /* - before - is celt x before celt y, for purposes of range legality? ^ static int before(celt, celt); @@ -665,13 +689,16 @@ static int /* predicate */ before(x, y) celt x, y; /* collating elements */ { - /* trivial because no MCCEs */ + /* + * trivial because no MCCEs. + */ + if (x < y) { return 1; } return 0; } - + /* - eclass - supply cvec for an equivalence class * Must include case counterparts on request. @@ -686,7 +713,10 @@ eclass(v, c, cases) { struct cvec *cv; - /* crude fake equivalence class for testing */ + /* + * Crude fake equivalence class for testing. + */ + if ((v->cflags®_FAKE) && c == 'x') { cv = getcvec(v, 4, 0, 0); addchr(cv, (chr)'x'); @@ -698,7 +728,10 @@ eclass(v, c, cases) return cv; } - /* otherwise, none */ + /* + * Otherwise, none. + */ + if (cases) { return allcases(v, c); } @@ -707,18 +740,18 @@ eclass(v, c, cases) addchr(cv, (chr)c); return cv; } - + /* - cclass - supply cvec for a character class * Must include case counterparts on request. ^ static struct cvec *cclass(struct vars *, chr *, chr *, int); */ static struct cvec * -cclass(v, startp, endp, cases) - struct vars *v; /* context */ - chr *startp; /* where the name starts */ - chr *endp; /* just past the end of the name */ - int cases; /* case-independent? */ +cclass( + struct vars *v, /* context */ + chr *startp, /* where the name starts */ + chr *endp, /* just past the end of the name */ + int cases) /* case-independent? */ { size_t len; struct cvec *cv = NULL; @@ -740,7 +773,7 @@ cclass(v, startp, endp, cases) CC_ALNUM, CC_ALPHA, CC_ASCII, CC_BLANK, CC_CNTRL, CC_DIGIT, CC_GRAPH, CC_LOWER, CC_PRINT, CC_PUNCT, CC_SPACE, CC_UPPER, CC_XDIGIT }; - + /* * Extract the class name @@ -775,7 +808,7 @@ cclass(v, startp, endp, cases) ERR(REG_ECTYPE); return NULL; } - + /* * Now compute the character class contents. */ @@ -828,7 +861,7 @@ cclass(v, startp, endp, cases) break; case CC_DIGIT: cv = getcvec(v, 0, NUM_DIGIT_RANGE, 0); - if (cv) { + if (cv) { for (i=0 ; i<NUM_DIGIT_RANGE ; i++) { addrange(cv, digitRangeTable[i].start, digitRangeTable[i].end); @@ -849,16 +882,16 @@ cclass(v, startp, endp, cases) break; case CC_XDIGIT: /* - * This is a 3 instead of (NUM_DIGIT_RANGE+2) because I've no - * idea how to define the digits 'a' through 'f' in - * non-western locales. The concept is quite possibly non - * portable, or only used in contextx where the characters - * used would be the western ones anyway! Whatever is - * actually the case, the number of ranges is fixed (until + * This is a 3 instead of (NUM_DIGIT_RANGE+2) because I've no idea how + * to define the digits 'a' through 'f' in non-western locales. The + * concept is quite possibly non portable, or only used in contextx + * where the characters used would be the western ones anyway! + * Whatever is actually the case, the number of ranges is fixed (until * someone comes up with a better arrangement!) */ + cv = getcvec(v, 0, 3, 0); - if (cv) { + if (cv) { addrange(cv, '0', '9'); addrange(cv, 'a', 'f'); addrange(cv, 'A', 'F'); @@ -918,7 +951,7 @@ cclass(v, startp, endp, cases) } return cv; } - + /* - allcases - supply cvec for all case counterparts of a chr (including itself) * This is a shortcut, preferably an efficient one, for simple characters; @@ -926,9 +959,9 @@ cclass(v, startp, endp, cases) ^ static struct cvec *allcases(struct vars *, pchr); */ static struct cvec * -allcases(v, pc) - struct vars *v; /* context */ - pchr pc; /* character to get case equivs of */ +allcases( + struct vars *v, /* context */ + pchr pc) /* character to get case equivs of */ { struct cvec *cv; chr c = (chr)pc; @@ -950,7 +983,7 @@ allcases(v, pc) } return cv; } - + /* - cmp - chr-substring compare * Backrefs need this. It should preferably be efficient. @@ -959,14 +992,14 @@ allcases(v, pc) * stop at embedded NULs! ^ static int cmp(CONST chr *, CONST chr *, size_t); */ -static int /* 0 for equal, nonzero for unequal */ -cmp(x, y, len) - CONST chr *x, *y; /* strings to compare */ - size_t len; /* exact length of comparison */ +static int /* 0 for equal, nonzero for unequal */ +cmp( + CONST chr *x, CONST chr *y, /* strings to compare */ + size_t len) /* exact length of comparison */ { return memcmp(VS(x), VS(y), len*sizeof(chr)); } - + /* - casecmp - case-independent chr-substring compare * REG_ICASE backrefs need this. It should preferably be efficient. @@ -975,10 +1008,10 @@ cmp(x, y, len) * stop at embedded NULs! ^ static int casecmp(CONST chr *, CONST chr *, size_t); */ -static int /* 0 for equal, nonzero for unequal */ -casecmp(x, y, len) - CONST chr *x, *y; /* strings to compare */ - size_t len; /* exact length of comparison */ +static int /* 0 for equal, nonzero for unequal */ +casecmp( + CONST chr *x, CONST chr *y, /* strings to compare */ + size_t len) /* exact length of comparison */ { for (; len > 0; len--, x++, y++) { if ((*x!=*y) && (Tcl_UniCharToLower(*x) != Tcl_UniCharToLower(*y))) { @@ -987,3 +1020,11 @@ casecmp(x, y, len) } return 0; } + +/* + * Local Variables: + * mode: c + * c-basic-offset: 4 + * fill-column: 78 + * End: + */ diff --git a/generic/regc_nfa.c b/generic/regc_nfa.c index 9881cd4..12a016e 100644 --- a/generic/regc_nfa.c +++ b/generic/regc_nfa.c @@ -3,20 +3,20 @@ * This file is #included by regcomp.c. * * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. - * + * * Development of this software was funded, in part, by Cray Research Inc., * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics - * Corporation, none of whom are responsible for the results. The author - * thanks all of them. - * + * Corporation, none of whom are responsible for the results. The author + * thanks all of them. + * * Redistribution and use in source and binary forms -- with or without * modification -- are permitted for any purpose, provided that * redistributions in source form retain this entire copyright notice and * indicate the origin and nature of any modifications. - * - * I'd appreciate being given credit for this package in the documentation - * of software which uses it, but that is not a requirement. - * + * + * I'd appreciate being given credit for this package in the documentation of + * software which uses it, but that is not a requirement. + * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL @@ -28,411 +28,444 @@ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * - * - * One or two things that technically ought to be in here - * are actually in color.c, thanks to some incestuous relationships in - * the color chains. + * One or two things that technically ought to be in here are actually in + * color.c, thanks to some incestuous relationships in the color chains. */ #define NISERR() VISERR(nfa->v) #define NERR(e) VERR(nfa->v, (e)) - - + /* - newnfa - set up an NFA ^ static struct nfa *newnfa(struct vars *, struct colormap *, struct nfa *); */ static struct nfa * /* the NFA, or NULL */ -newnfa(v, cm, parent) -struct vars *v; -struct colormap *cm; -struct nfa *parent; /* NULL if primary NFA */ +newnfa( + struct vars *v, + struct colormap *cm, + struct nfa *parent) /* NULL if primary NFA */ { - struct nfa *nfa; - - nfa = (struct nfa *)MALLOC(sizeof(struct nfa)); - if (nfa == NULL) - return NULL; - - nfa->states = NULL; - nfa->slast = NULL; - nfa->free = NULL; - nfa->nstates = 0; - nfa->cm = cm; - nfa->v = v; - nfa->bos[0] = nfa->bos[1] = COLORLESS; - nfa->eos[0] = nfa->eos[1] = COLORLESS; - nfa->post = newfstate(nfa, '@'); /* number 0 */ - nfa->pre = newfstate(nfa, '>'); /* number 1 */ - nfa->parent = parent; - - nfa->init = newstate(nfa); /* may become invalid later */ - nfa->final = newstate(nfa); - if (ISERR()) { - freenfa(nfa); - return NULL; - } - rainbow(nfa, nfa->cm, PLAIN, COLORLESS, nfa->pre, nfa->init); - newarc(nfa, '^', 1, nfa->pre, nfa->init); - newarc(nfa, '^', 0, nfa->pre, nfa->init); - rainbow(nfa, nfa->cm, PLAIN, COLORLESS, nfa->final, nfa->post); - newarc(nfa, '$', 1, nfa->final, nfa->post); - newarc(nfa, '$', 0, nfa->final, nfa->post); - - if (ISERR()) { - freenfa(nfa); - return NULL; - } - return nfa; -} + struct nfa *nfa; + nfa = (struct nfa *)MALLOC(sizeof(struct nfa)); + if (nfa == NULL) { + return NULL; + } + + nfa->states = NULL; + nfa->slast = NULL; + nfa->free = NULL; + nfa->nstates = 0; + nfa->cm = cm; + nfa->v = v; + nfa->bos[0] = nfa->bos[1] = COLORLESS; + nfa->eos[0] = nfa->eos[1] = COLORLESS; + nfa->post = newfstate(nfa, '@'); /* number 0 */ + nfa->pre = newfstate(nfa, '>'); /* number 1 */ + nfa->parent = parent; + + nfa->init = newstate(nfa); /* may become invalid later */ + nfa->final = newstate(nfa); + if (ISERR()) { + freenfa(nfa); + return NULL; + } + rainbow(nfa, nfa->cm, PLAIN, COLORLESS, nfa->pre, nfa->init); + newarc(nfa, '^', 1, nfa->pre, nfa->init); + newarc(nfa, '^', 0, nfa->pre, nfa->init); + rainbow(nfa, nfa->cm, PLAIN, COLORLESS, nfa->final, nfa->post); + newarc(nfa, '$', 1, nfa->final, nfa->post); + newarc(nfa, '$', 0, nfa->final, nfa->post); + + if (ISERR()) { + freenfa(nfa); + return NULL; + } + return nfa; +} + /* - freenfa - free an entire NFA ^ static VOID freenfa(struct nfa *); */ static VOID -freenfa(nfa) -struct nfa *nfa; +freenfa( + struct nfa *nfa) { - struct state *s; + struct state *s; - while ((s = nfa->states) != NULL) { - s->nins = s->nouts = 0; /* don't worry about arcs */ - freestate(nfa, s); - } - while ((s = nfa->free) != NULL) { - nfa->free = s->next; - destroystate(nfa, s); - } - - nfa->slast = NULL; - nfa->nstates = -1; - nfa->pre = NULL; - nfa->post = NULL; - FREE(nfa); + while ((s = nfa->states) != NULL) { + s->nins = s->nouts = 0; /* don't worry about arcs */ + freestate(nfa, s); + } + while ((s = nfa->free) != NULL) { + nfa->free = s->next; + destroystate(nfa, s); + } + + nfa->slast = NULL; + nfa->nstates = -1; + nfa->pre = NULL; + nfa->post = NULL; + FREE(nfa); } - + /* - newstate - allocate an NFA state, with zero flag value ^ static struct state *newstate(struct nfa *); */ static struct state * /* NULL on error */ -newstate(nfa) -struct nfa *nfa; +newstate( + struct nfa *nfa) { - struct state *s; - - if (nfa->free != NULL) { - s = nfa->free; - nfa->free = s->next; - } else { - s = (struct state *)MALLOC(sizeof(struct state)); - if (s == NULL) { - NERR(REG_ESPACE); - return NULL; - } - s->oas.next = NULL; - s->free = NULL; - s->noas = 0; + struct state *s; + + if (nfa->free != NULL) { + s = nfa->free; + nfa->free = s->next; + } else { + s = (struct state *)MALLOC(sizeof(struct state)); + if (s == NULL) { + NERR(REG_ESPACE); + return NULL; } - - assert(nfa->nstates >= 0); - s->no = nfa->nstates++; - s->flag = 0; - if (nfa->states == NULL) - nfa->states = s; - s->nins = 0; - s->ins = NULL; - s->nouts = 0; - s->outs = NULL; - s->tmp = NULL; - s->next = NULL; - if (nfa->slast != NULL) { - assert(nfa->slast->next == NULL); - nfa->slast->next = s; - } - s->prev = nfa->slast; - nfa->slast = s; - return s; + s->oas.next = NULL; + s->free = NULL; + s->noas = 0; + } + + assert(nfa->nstates >= 0); + s->no = nfa->nstates++; + s->flag = 0; + if (nfa->states == NULL) { + nfa->states = s; + } + s->nins = 0; + s->ins = NULL; + s->nouts = 0; + s->outs = NULL; + s->tmp = NULL; + s->next = NULL; + if (nfa->slast != NULL) { + assert(nfa->slast->next == NULL); + nfa->slast->next = s; + } + s->prev = nfa->slast; + nfa->slast = s; + return s; } - + /* - newfstate - allocate an NFA state with a specified flag value ^ static struct state *newfstate(struct nfa *, int flag); */ static struct state * /* NULL on error */ -newfstate(nfa, flag) -struct nfa *nfa; -int flag; +newfstate( + struct nfa *nfa, + int flag) { - struct state *s; + struct state *s; - s = newstate(nfa); - if (s != NULL) - s->flag = (char)flag; - return s; + s = newstate(nfa); + if (s != NULL) { + s->flag = (char)flag; + } + return s; } - + /* - dropstate - delete a state's inarcs and outarcs and free it ^ static VOID dropstate(struct nfa *, struct state *); */ -static VOID -dropstate(nfa, s) -struct nfa *nfa; -struct state *s; +static void +dropstate( + struct nfa *nfa, + struct state *s) { - struct arc *a; + struct arc *a; - while ((a = s->ins) != NULL) - freearc(nfa, a); - while ((a = s->outs) != NULL) - freearc(nfa, a); - freestate(nfa, s); + while ((a = s->ins) != NULL) { + freearc(nfa, a); + } + while ((a = s->outs) != NULL) { + freearc(nfa, a); + } + freestate(nfa, s); } - + /* - freestate - free a state, which has no in-arcs or out-arcs ^ static VOID freestate(struct nfa *, struct state *); */ -static VOID -freestate(nfa, s) -struct nfa *nfa; -struct state *s; +static void +freestate( + struct nfa *nfa, + struct state *s) { - assert(s != NULL); - assert(s->nins == 0 && s->nouts == 0); - - s->no = FREESTATE; - s->flag = 0; - if (s->next != NULL) - s->next->prev = s->prev; - else { - assert(s == nfa->slast); - nfa->slast = s->prev; - } - if (s->prev != NULL) - s->prev->next = s->next; - else { - assert(s == nfa->states); - nfa->states = s->next; - } - s->prev = NULL; - s->next = nfa->free; /* don't delete it, put it on the free list */ - nfa->free = s; + assert(s != NULL); + assert(s->nins == 0 && s->nouts == 0); + + s->no = FREESTATE; + s->flag = 0; + if (s->next != NULL) { + s->next->prev = s->prev; + } else { + assert(s == nfa->slast); + nfa->slast = s->prev; + } + if (s->prev != NULL) { + s->prev->next = s->next; + } else { + assert(s == nfa->states); + nfa->states = s->next; + } + s->prev = NULL; + s->next = nfa->free; /* don't delete it, put it on the free + * list */ + nfa->free = s; } - + /* - destroystate - really get rid of an already-freed state ^ static VOID destroystate(struct nfa *, struct state *); */ -static VOID -destroystate(nfa, s) -struct nfa *nfa; -struct state *s; +static void +destroystate( + struct nfa *nfa, + struct state *s) { - struct arcbatch *ab; - struct arcbatch *abnext; - - assert(s->no == FREESTATE); - for (ab = s->oas.next; ab != NULL; ab = abnext) { - abnext = ab->next; - FREE(ab); - } - s->ins = NULL; - s->outs = NULL; - s->next = NULL; - FREE(s); + struct arcbatch *ab; + struct arcbatch *abnext; + + assert(s->no == FREESTATE); + for (ab = s->oas.next; ab != NULL; ab = abnext) { + abnext = ab->next; + FREE(ab); + } + s->ins = NULL; + s->outs = NULL; + s->next = NULL; + FREE(s); } - + /* - newarc - set up a new arc within an NFA - ^ static VOID newarc(struct nfa *, int, pcolor, struct state *, + ^ static VOID newarc(struct nfa *, int, pcolor, struct state *, ^ struct state *); */ -static VOID -newarc(nfa, t, co, from, to) -struct nfa *nfa; -int t; -pcolor co; -struct state *from; -struct state *to; +static void +newarc( + struct nfa *nfa, + int t, + pcolor co, + struct state *from, + struct state *to) { - struct arc *a; - - assert(from != NULL && to != NULL); - - /* check for duplicates */ - for (a = from->outs; a != NULL; a = a->outchain) - if (a->to == to && a->co == co && a->type == t) - return; - - a = allocarc(nfa, from); - if (NISERR()) - return; - assert(a != NULL); - - a->type = t; - a->co = (color)co; - a->to = to; - a->from = from; + struct arc *a; - /* - * Put the new arc on the beginning, not the end, of the chains. - * Not only is this easier, it has the very useful side effect that - * deleting the most-recently-added arc is the cheapest case rather - * than the most expensive one. - */ - a->inchain = to->ins; - to->ins = a; - a->outchain = from->outs; - from->outs = a; + assert(from != NULL && to != NULL); - from->nouts++; - to->nins++; + /* + * Check for duplicates. + */ - if (COLORED(a) && nfa->parent == NULL) - colorchain(nfa->cm, a); + for (a = from->outs; a != NULL; a = a->outchain) { + if (a->to == to && a->co == co && a->type == t) { + return; + } + } + a = allocarc(nfa, from); + if (NISERR()) { return; + } + assert(a != NULL); + + a->type = t; + a->co = (color)co; + a->to = to; + a->from = from; + + /* + * Put the new arc on the beginning, not the end, of the chains. Not only + * is this easier, it has the very useful side effect that deleting the + * most-recently-added arc is the cheapest case rather than the most + * expensive one. + */ + + a->inchain = to->ins; + to->ins = a; + a->outchain = from->outs; + from->outs = a; + + from->nouts++; + to->nins++; + + if (COLORED(a) && nfa->parent == NULL) { + colorchain(nfa->cm, a); + } + + return; } - + /* - allocarc - allocate a new out-arc within a state ^ static struct arc *allocarc(struct nfa *, struct state *); */ static struct arc * /* NULL for failure */ -allocarc(nfa, s) -struct nfa *nfa; -struct state *s; +allocarc( + struct nfa *nfa, + struct state *s) { - struct arc *a; - struct arcbatch *new; - int i; - - /* shortcut */ - if (s->free == NULL && s->noas < ABSIZE) { - a = &s->oas.a[s->noas]; - s->noas++; - return a; - } + struct arc *a; + struct arcbatch *new; + int i; - /* if none at hand, get more */ - if (s->free == NULL) { - new = (struct arcbatch *)MALLOC(sizeof(struct arcbatch)); - if (new == NULL) { - NERR(REG_ESPACE); - return NULL; - } - new->next = s->oas.next; - s->oas.next = new; + /* + * Shortcut + */ - for (i = 0; i < ABSIZE; i++) { - new->a[i].type = 0; - new->a[i].freechain = &new->a[i+1]; - } - new->a[ABSIZE-1].freechain = NULL; - s->free = &new->a[0]; + if (s->free == NULL && s->noas < ABSIZE) { + a = &s->oas.a[s->noas]; + s->noas++; + return a; + } + + /* + * if none at hand, get more + */ + + if (s->free == NULL) { + new = (struct arcbatch *)MALLOC(sizeof(struct arcbatch)); + if (new == NULL) { + NERR(REG_ESPACE); + return NULL; } - assert(s->free != NULL); + new->next = s->oas.next; + s->oas.next = new; - a = s->free; - s->free = a->freechain; - return a; + for (i = 0; i < ABSIZE; i++) { + new->a[i].type = 0; + new->a[i].freechain = &new->a[i+1]; + } + new->a[ABSIZE-1].freechain = NULL; + s->free = &new->a[0]; + } + assert(s->free != NULL); + + a = s->free; + s->free = a->freechain; + return a; } - + /* - freearc - free an arc ^ static VOID freearc(struct nfa *, struct arc *); */ -static VOID -freearc(nfa, victim) -struct nfa *nfa; -struct arc *victim; +static void +freearc( + struct nfa *nfa, + struct arc *victim) { - struct state *from = victim->from; - struct state *to = victim->to; - struct arc *a; - - assert(victim->type != 0); - - /* take it off color chain if necessary */ - if (COLORED(victim) && nfa->parent == NULL) - uncolorchain(nfa->cm, victim); - - /* take it off source's out-chain */ - assert(from != NULL); - assert(from->outs != NULL); - a = from->outs; - if (a == victim) /* simple case: first in chain */ - from->outs = victim->outchain; - else { - for (; a != NULL && a->outchain != victim; a = a->outchain) - continue; - assert(a != NULL); - a->outchain = victim->outchain; + struct state *from = victim->from; + struct state *to = victim->to; + struct arc *a; + + assert(victim->type != 0); + + /* + * Take it off color chain if necessary. + */ + + if (COLORED(victim) && nfa->parent == NULL) { + uncolorchain(nfa->cm, victim); + } + + /* + * Take it off source's out-chain. + */ + + assert(from != NULL); + assert(from->outs != NULL); + a = from->outs; + if (a == victim) { /* simple case: first in chain */ + from->outs = victim->outchain; + } else { + for (; a != NULL && a->outchain != victim; a = a->outchain) { + continue; } - from->nouts--; - - /* take it off target's in-chain */ - assert(to != NULL); - assert(to->ins != NULL); - a = to->ins; - if (a == victim) /* simple case: first in chain */ - to->ins = victim->inchain; - else { - for (; a != NULL && a->inchain != victim; a = a->inchain) - continue; - assert(a != NULL); - a->inchain = victim->inchain; + assert(a != NULL); + a->outchain = victim->outchain; + } + from->nouts--; + + /* + * Take it off target's in-chain. + */ + + assert(to != NULL); + assert(to->ins != NULL); + a = to->ins; + if (a == victim) { /* simple case: first in chain */ + to->ins = victim->inchain; + } else { + for (; a != NULL && a->inchain != victim; a = a->inchain) { + continue; } - to->nins--; - - /* clean up and place on free list */ - victim->type = 0; - victim->from = NULL; /* precautions... */ - victim->to = NULL; - victim->inchain = NULL; - victim->outchain = NULL; - victim->freechain = from->free; - from->free = victim; + assert(a != NULL); + a->inchain = victim->inchain; + } + to->nins--; + + /* + * Clean up and place on free list. + */ + + victim->type = 0; + victim->from = NULL; /* precautions... */ + victim->to = NULL; + victim->inchain = NULL; + victim->outchain = NULL; + victim->freechain = from->free; + from->free = victim; } - + /* - findarc - find arc, if any, from given source with given type and color * If there is more than one such arc, the result is random. ^ static struct arc *findarc(struct state *, int, pcolor); */ static struct arc * -findarc(s, type, co) -struct state *s; -int type; -pcolor co; +findarc( + struct state *s, + int type, + pcolor co) { - struct arc *a; + struct arc *a; - for (a = s->outs; a != NULL; a = a->outchain) - if (a->type == type && a->co == co) - return a; - return NULL; + for (a = s->outs; a != NULL; a = a->outchain) { + if (a->type == type && a->co == co) { + return a; + } + } + return NULL; } - + /* - cparc - allocate a new arc within an NFA, copying details from old one - ^ static VOID cparc(struct nfa *, struct arc *, struct state *, + ^ static VOID cparc(struct nfa *, struct arc *, struct state *, ^ struct state *); */ -static VOID -cparc(nfa, oa, from, to) -struct nfa *nfa; -struct arc *oa; -struct state *from; -struct state *to; +static void +cparc( + struct nfa *nfa, + struct arc *oa, + struct state *from, + struct state *to) { - newarc(nfa, oa->type, oa->co, from, to); + newarc(nfa, oa->type, oa->co, from, to); } - + /* - moveins - move all in arcs of a state to another state * You might think this could be done better by just updating the @@ -441,339 +474,362 @@ struct state *to; * ones to exploit the suppression built into newarc. ^ static VOID moveins(struct nfa *, struct state *, struct state *); */ -static VOID -moveins(nfa, old, new) -struct nfa *nfa; -struct state *old; -struct state *new; +static void +moveins( + struct nfa *nfa, + struct state *old, + struct state *new) { - struct arc *a; + struct arc *a; - assert(old != new); + assert(old != new); - while ((a = old->ins) != NULL) { - cparc(nfa, a, a->from, new); - freearc(nfa, a); - } - assert(old->nins == 0); - assert(old->ins == NULL); + while ((a = old->ins) != NULL) { + cparc(nfa, a, a->from, new); + freearc(nfa, a); + } + assert(old->nins == 0); + assert(old->ins == NULL); } - + /* - copyins - copy all in arcs of a state to another state ^ static VOID copyins(struct nfa *, struct state *, struct state *); */ -static VOID -copyins(nfa, old, new) -struct nfa *nfa; -struct state *old; -struct state *new; +static void +copyins( + struct nfa *nfa, + struct state *old, + struct state *new) { - struct arc *a; + struct arc *a; - assert(old != new); + assert(old != new); - for (a = old->ins; a != NULL; a = a->inchain) - cparc(nfa, a, a->from, new); + for (a = old->ins; a != NULL; a = a->inchain) { + cparc(nfa, a, a->from, new); + } } - + /* - moveouts - move all out arcs of a state to another state ^ static VOID moveouts(struct nfa *, struct state *, struct state *); */ -static VOID -moveouts(nfa, old, new) -struct nfa *nfa; -struct state *old; -struct state *new; +static void +moveouts( + struct nfa *nfa, + struct state *old, + struct state *new) { - struct arc *a; + struct arc *a; - assert(old != new); + assert(old != new); - while ((a = old->outs) != NULL) { - cparc(nfa, a, new, a->to); - freearc(nfa, a); - } + while ((a = old->outs) != NULL) { + cparc(nfa, a, new, a->to); + freearc(nfa, a); + } } - + /* - copyouts - copy all out arcs of a state to another state ^ static VOID copyouts(struct nfa *, struct state *, struct state *); */ -static VOID -copyouts(nfa, old, new) -struct nfa *nfa; -struct state *old; -struct state *new; +static void +copyouts( + struct nfa *nfa, + struct state *old, + struct state *new) { - struct arc *a; + struct arc *a; - assert(old != new); + assert(old != new); - for (a = old->outs; a != NULL; a = a->outchain) - cparc(nfa, a, new, a->to); + for (a = old->outs; a != NULL; a = a->outchain) { + cparc(nfa, a, new, a->to); + } } - + /* - cloneouts - copy out arcs of a state to another state pair, modifying type ^ static VOID cloneouts(struct nfa *, struct state *, struct state *, ^ struct state *, int); */ -static VOID -cloneouts(nfa, old, from, to, type) -struct nfa *nfa; -struct state *old; -struct state *from; -struct state *to; -int type; +static void +cloneouts( + struct nfa *nfa, + struct state *old, + struct state *from, + struct state *to, + int type) { - struct arc *a; + struct arc *a; - assert(old != from); + assert(old != from); - for (a = old->outs; a != NULL; a = a->outchain) - newarc(nfa, type, a->co, from, to); + for (a = old->outs; a != NULL; a = a->outchain) { + newarc(nfa, type, a->co, from, to); + } } - + /* - delsub - delete a sub-NFA, updating subre pointers if necessary * This uses a recursive traversal of the sub-NFA, marking already-seen * states using their tmp pointer. ^ static VOID delsub(struct nfa *, struct state *, struct state *); */ -static VOID -delsub(nfa, lp, rp) -struct nfa *nfa; -struct state *lp; /* the sub-NFA goes from here... */ -struct state *rp; /* ...to here, *not* inclusive */ +static void +delsub( + struct nfa *nfa, + struct state *lp, /* the sub-NFA goes from here... */ + struct state *rp) /* ...to here, *not* inclusive */ { - assert(lp != rp); + assert(lp != rp); - rp->tmp = rp; /* mark end */ + rp->tmp = rp; /* mark end */ - deltraverse(nfa, lp, lp); - assert(lp->nouts == 0 && rp->nins == 0); /* did the job */ - assert(lp->no != FREESTATE && rp->no != FREESTATE); /* no more */ + deltraverse(nfa, lp, lp); + assert(lp->nouts == 0 && rp->nins == 0); /* did the job */ + assert(lp->no != FREESTATE && rp->no != FREESTATE); /* no more */ - rp->tmp = NULL; /* unmark end */ - lp->tmp = NULL; /* and begin, marked by deltraverse */ + rp->tmp = NULL; /* unmark end */ + lp->tmp = NULL; /* and begin, marked by deltraverse */ } - + /* - deltraverse - the recursive heart of delsub * This routine's basic job is to destroy all out-arcs of the state. ^ static VOID deltraverse(struct nfa *, struct state *, struct state *); */ -static VOID -deltraverse(nfa, leftend, s) -struct nfa *nfa; -struct state *leftend; -struct state *s; +static void +deltraverse( + struct nfa *nfa, + struct state *leftend, + struct state *s) { - struct arc *a; - struct state *to; - - if (s->nouts == 0) - return; /* nothing to do */ - if (s->tmp != NULL) - return; /* already in progress */ - - s->tmp = s; /* mark as in progress */ - - while ((a = s->outs) != NULL) { - to = a->to; - deltraverse(nfa, leftend, to); - assert(to->nouts == 0 || to->tmp != NULL); - freearc(nfa, a); - if (to->nins == 0 && to->tmp == NULL) { - assert(to->nouts == 0); - freestate(nfa, to); - } + struct arc *a; + struct state *to; + + if (s->nouts == 0) { + return; /* nothing to do */ + } + if (s->tmp != NULL) { + return; /* already in progress */ + } + + s->tmp = s; /* mark as in progress */ + + while ((a = s->outs) != NULL) { + to = a->to; + deltraverse(nfa, leftend, to); + assert(to->nouts == 0 || to->tmp != NULL); + freearc(nfa, a); + if (to->nins == 0 && to->tmp == NULL) { + assert(to->nouts == 0); + freestate(nfa, to); } + } - assert(s->no != FREESTATE); /* we're still here */ - assert(s == leftend || s->nins != 0); /* and still reachable */ - assert(s->nouts == 0); /* but have no outarcs */ + assert(s->no != FREESTATE); /* we're still here */ + assert(s == leftend || s->nins != 0); /* and still reachable */ + assert(s->nouts == 0); /* but have no outarcs */ - s->tmp = NULL; /* we're done here */ + s->tmp = NULL; /* we're done here */ } - + /* - dupnfa - duplicate sub-NFA * Another recursive traversal, this time using tmp to point to duplicates * as well as mark already-seen states. (You knew there was a reason why * it's a state pointer, didn't you? :-)) - ^ static VOID dupnfa(struct nfa *, struct state *, struct state *, + ^ static VOID dupnfa(struct nfa *, struct state *, struct state *, ^ struct state *, struct state *); */ -static VOID -dupnfa(nfa, start, stop, from, to) -struct nfa *nfa; -struct state *start; /* duplicate of subNFA starting here */ -struct state *stop; /* and stopping here */ -struct state *from; /* stringing duplicate from here */ -struct state *to; /* to here */ +static void +dupnfa( + struct nfa *nfa, + struct state *start, /* duplicate of subNFA starting here */ + struct state *stop, /* and stopping here */ + struct state *from, /* stringing duplicate from here */ + struct state *to) /* to here */ { - if (start == stop) { - newarc(nfa, EMPTY, 0, from, to); - return; - } + if (start == stop) { + newarc(nfa, EMPTY, 0, from, to); + return; + } - stop->tmp = to; - duptraverse(nfa, start, from); - /* done, except for clearing out the tmp pointers */ + stop->tmp = to; + duptraverse(nfa, start, from); + /* done, except for clearing out the tmp pointers */ - stop->tmp = NULL; - cleartraverse(nfa, start); + stop->tmp = NULL; + cleartraverse(nfa, start); } - + /* - duptraverse - recursive heart of dupnfa ^ static VOID duptraverse(struct nfa *, struct state *, struct state *); */ -static VOID -duptraverse(nfa, s, stmp) -struct nfa *nfa; -struct state *s; -struct state *stmp; /* s's duplicate, or NULL */ +static void +duptraverse( + struct nfa *nfa, + struct state *s, + struct state *stmp) /* s's duplicate, or NULL */ { - struct arc *a; + struct arc *a; - if (s->tmp != NULL) - return; /* already done */ + if (s->tmp != NULL) { + return; /* already done */ + } - s->tmp = (stmp == NULL) ? newstate(nfa) : stmp; - if (s->tmp == NULL) { - assert(NISERR()); - return; - } + s->tmp = (stmp == NULL) ? newstate(nfa) : stmp; + if (s->tmp == NULL) { + assert(NISERR()); + return; + } - for (a = s->outs; a != NULL && !NISERR(); a = a->outchain) { - duptraverse(nfa, a->to, (struct state *)NULL); - assert(a->to->tmp != NULL); - cparc(nfa, a, s->tmp, a->to->tmp); - } + for (a = s->outs; a != NULL && !NISERR(); a = a->outchain) { + duptraverse(nfa, a->to, (struct state *)NULL); + assert(a->to->tmp != NULL); + cparc(nfa, a, s->tmp, a->to->tmp); + } } - + /* - cleartraverse - recursive cleanup for algorithms that leave tmp ptrs set ^ static VOID cleartraverse(struct nfa *, struct state *); */ -static VOID -cleartraverse(nfa, s) -struct nfa *nfa; -struct state *s; +static void +cleartraverse( + struct nfa *nfa, + struct state *s) { - struct arc *a; + struct arc *a; - if (s->tmp == NULL) - return; - s->tmp = NULL; + if (s->tmp == NULL) { + return; + } + s->tmp = NULL; - for (a = s->outs; a != NULL; a = a->outchain) - cleartraverse(nfa, a->to); + for (a = s->outs; a != NULL; a = a->outchain) { + cleartraverse(nfa, a->to); + } } - + /* - specialcolors - fill in special colors for an NFA ^ static VOID specialcolors(struct nfa *); */ -static VOID -specialcolors(nfa) -struct nfa *nfa; +static void +specialcolors( + struct nfa *nfa) { - /* false colors for BOS, BOL, EOS, EOL */ - if (nfa->parent == NULL) { - nfa->bos[0] = pseudocolor(nfa->cm); - nfa->bos[1] = pseudocolor(nfa->cm); - nfa->eos[0] = pseudocolor(nfa->cm); - nfa->eos[1] = pseudocolor(nfa->cm); - } else { - assert(nfa->parent->bos[0] != COLORLESS); - nfa->bos[0] = nfa->parent->bos[0]; - assert(nfa->parent->bos[1] != COLORLESS); - nfa->bos[1] = nfa->parent->bos[1]; - assert(nfa->parent->eos[0] != COLORLESS); - nfa->eos[0] = nfa->parent->eos[0]; - assert(nfa->parent->eos[1] != COLORLESS); - nfa->eos[1] = nfa->parent->eos[1]; - } + /* + * False colors for BOS, BOL, EOS, EOL + */ + + if (nfa->parent == NULL) { + nfa->bos[0] = pseudocolor(nfa->cm); + nfa->bos[1] = pseudocolor(nfa->cm); + nfa->eos[0] = pseudocolor(nfa->cm); + nfa->eos[1] = pseudocolor(nfa->cm); + } else { + assert(nfa->parent->bos[0] != COLORLESS); + nfa->bos[0] = nfa->parent->bos[0]; + assert(nfa->parent->bos[1] != COLORLESS); + nfa->bos[1] = nfa->parent->bos[1]; + assert(nfa->parent->eos[0] != COLORLESS); + nfa->eos[0] = nfa->parent->eos[0]; + assert(nfa->parent->eos[1] != COLORLESS); + nfa->eos[1] = nfa->parent->eos[1]; + } } - + /* - optimize - optimize an NFA ^ static long optimize(struct nfa *, FILE *); */ static long /* re_info bits */ -optimize(nfa, f) -struct nfa *nfa; -FILE *f; /* for debug output; NULL none */ +optimize( + struct nfa *nfa, + FILE *f) /* for debug output; NULL none */ { - int verbose = (f != NULL) ? 1 : 0; - - if (verbose) - fprintf(f, "\ninitial cleanup:\n"); - cleanup(nfa); /* may simplify situation */ - if (verbose) - dumpnfa(nfa, f); - if (verbose) - fprintf(f, "\nempties:\n"); - fixempties(nfa, f); /* get rid of EMPTY arcs */ - if (verbose) - fprintf(f, "\nconstraints:\n"); - pullback(nfa, f); /* pull back constraints backward */ - pushfwd(nfa, f); /* push fwd constraints forward */ - if (verbose) - fprintf(f, "\nfinal cleanup:\n"); - cleanup(nfa); /* final tidying */ - return analyze(nfa); /* and analysis */ + int verbose = (f != NULL) ? 1 : 0; + + if (verbose) { + fprintf(f, "\ninitial cleanup:\n"); + } + cleanup(nfa); /* may simplify situation */ + if (verbose) { + dumpnfa(nfa, f); + } + if (verbose) { + fprintf(f, "\nempties:\n"); + } + fixempties(nfa, f); /* get rid of EMPTY arcs */ + if (verbose) { + fprintf(f, "\nconstraints:\n"); + } + pullback(nfa, f); /* pull back constraints backward */ + pushfwd(nfa, f); /* push fwd constraints forward */ + if (verbose) { + fprintf(f, "\nfinal cleanup:\n"); + } + cleanup(nfa); /* final tidying */ + return analyze(nfa); /* and analysis */ } - + /* - pullback - pull back constraints backward to (with luck) eliminate them ^ static VOID pullback(struct nfa *, FILE *); */ -static VOID -pullback(nfa, f) -struct nfa *nfa; -FILE *f; /* for debug output; NULL none */ +static void +pullback( + struct nfa *nfa, + FILE *f) /* for debug output; NULL none */ { - struct state *s; - struct state *nexts; - struct arc *a; - struct arc *nexta; - int progress; - - /* find and pull until there are no more */ - do { - progress = 0; - for (s = nfa->states; s != NULL && !NISERR(); s = nexts) { - nexts = s->next; - for (a = s->outs; a != NULL && !NISERR(); a = nexta) { - nexta = a->outchain; - if (a->type == '^' || a->type == BEHIND) - if (pull(nfa, a)) - progress = 1; - assert(nexta == NULL || s->no != FREESTATE); - } - } - if (progress && f != NULL) - dumpnfa(nfa, f); - } while (progress && !NISERR()); - if (NISERR()) - return; - - for (a = nfa->pre->outs; a != NULL; a = nexta) { + struct state *s; + struct state *nexts; + struct arc *a; + struct arc *nexta; + int progress; + + /* + * Find and pull until there are no more. + */ + + do { + progress = 0; + for (s = nfa->states; s != NULL && !NISERR(); s = nexts) { + nexts = s->next; + for (a = s->outs; a != NULL && !NISERR(); a = nexta) { nexta = a->outchain; - if (a->type == '^') { - assert(a->co == 0 || a->co == 1); - newarc(nfa, PLAIN, nfa->bos[a->co], a->from, a->to); - freearc(nfa, a); + if (a->type == '^' || a->type == BEHIND) { + if (pull(nfa, a)) { + progress = 1; + } } + assert(nexta == NULL || s->no != FREESTATE); + } + } + if (progress && f != NULL) { + dumpnfa(nfa, f); + } + } while (progress && !NISERR()); + if (NISERR()) { + return; + } + + for (a = nfa->pre->outs; a != NULL; a = nexta) { + nexta = a->outchain; + if (a->type == '^') { + assert(a->co == 0 || a->co == 1); + newarc(nfa, PLAIN, nfa->bos[a->co], a->from, a->to); + freearc(nfa, a); } + } } - + /* - pull - pull a back constraint backward past its source state * A significant property of this function is that it deletes at most @@ -782,116 +838,136 @@ FILE *f; /* for debug output; NULL none */ ^ static int pull(struct nfa *, struct arc *); */ static int /* 0 couldn't, 1 could */ -pull(nfa, con) -struct nfa *nfa; -struct arc *con; +pull( + struct nfa *nfa, + struct arc *con) { - struct state *from = con->from; - struct state *to = con->to; - struct arc *a; - struct arc *nexta; - struct state *s; - - if (from == to) { /* circular constraint is pointless */ - freearc(nfa, con); - return 1; + struct state *from = con->from; + struct state *to = con->to; + struct arc *a; + struct arc *nexta; + struct state *s; + + if (from == to) { /* circular constraint is pointless */ + freearc(nfa, con); + return 1; + } + if (from->flag) { /* can't pull back beyond start */ + return 0; + } + if (from->nins == 0) { /* unreachable */ + freearc(nfa, con); + return 1; + } + + /* + * First, clone from state if necessary to avoid other outarcs. + */ + + if (from->nouts > 1) { + s = newstate(nfa); + if (NISERR()) { + return 0; } - if (from->flag) /* can't pull back beyond start */ + assert(to != from); /* con is not an inarc */ + copyins(nfa, from, s); /* duplicate inarcs */ + cparc(nfa, con, s, to); /* move constraint arc */ + freearc(nfa, con); + from = s; + con = from->outs; + } + assert(from->nouts == 1); + + /* + * Propagate the constraint into the from state's inarcs. + */ + + for (a = from->ins; a != NULL; a = nexta) { + nexta = a->inchain; + switch (combine(con, a)) { + case INCOMPATIBLE: /* destroy the arc */ + freearc(nfa, a); + break; + case SATISFIED: /* no action needed */ + break; + case COMPATIBLE: /* swap the two arcs, more or less */ + s = newstate(nfa); + if (NISERR()) { return 0; - if (from->nins == 0) { /* unreachable */ - freearc(nfa, con); - return 1; - } - - /* first, clone from state if necessary to avoid other outarcs */ - if (from->nouts > 1) { - s = newstate(nfa); - if (NISERR()) - return 0; - assert(to != from); /* con is not an inarc */ - copyins(nfa, from, s); /* duplicate inarcs */ - cparc(nfa, con, s, to); /* move constraint arc */ - freearc(nfa, con); - from = s; - con = from->outs; + } + cparc(nfa, a, s, to); /* anticipate move */ + cparc(nfa, con, a->from, s); + if (NISERR()) { + return 0; + } + freearc(nfa, a); + break; + default: + assert(NOTREACHED); + break; } - assert(from->nouts == 1); + } - /* propagate the constraint into the from state's inarcs */ - for (a = from->ins; a != NULL; a = nexta) { - nexta = a->inchain; - switch (combine(con, a)) { - case INCOMPATIBLE: /* destroy the arc */ - freearc(nfa, a); - break; - case SATISFIED: /* no action needed */ - break; - case COMPATIBLE: /* swap the two arcs, more or less */ - s = newstate(nfa); - if (NISERR()) - return 0; - cparc(nfa, a, s, to); /* anticipate move */ - cparc(nfa, con, a->from, s); - if (NISERR()) - return 0; - freearc(nfa, a); - break; - default: - assert(NOTREACHED); - break; - } - } + /* + * Remaining inarcs, if any, incorporate the constraint. + */ - /* remaining inarcs, if any, incorporate the constraint */ - moveins(nfa, from, to); - dropstate(nfa, from); /* will free the constraint */ - return 1; + moveins(nfa, from, to); + dropstate(nfa, from); /* will free the constraint */ + return 1; } - + /* - pushfwd - push forward constraints forward to (with luck) eliminate them ^ static VOID pushfwd(struct nfa *, FILE *); */ -static VOID -pushfwd(nfa, f) -struct nfa *nfa; -FILE *f; /* for debug output; NULL none */ +static void +pushfwd( + struct nfa *nfa, + FILE *f) /* for debug output; NULL none */ { - struct state *s; - struct state *nexts; - struct arc *a; - struct arc *nexta; - int progress; - - /* find and push until there are no more */ - do { - progress = 0; - for (s = nfa->states; s != NULL && !NISERR(); s = nexts) { - nexts = s->next; - for (a = s->ins; a != NULL && !NISERR(); a = nexta) { - nexta = a->inchain; - if (a->type == '$' || a->type == AHEAD) - if (push(nfa, a)) - progress = 1; - assert(nexta == NULL || s->no != FREESTATE); - } - } - if (progress && f != NULL) - dumpnfa(nfa, f); - } while (progress && !NISERR()); - if (NISERR()) - return; - - for (a = nfa->post->ins; a != NULL; a = nexta) { + struct state *s; + struct state *nexts; + struct arc *a; + struct arc *nexta; + int progress; + + /* + * Find and push until there are no more. + */ + + do { + progress = 0; + for (s = nfa->states; s != NULL && !NISERR(); s = nexts) { + nexts = s->next; + for (a = s->ins; a != NULL && !NISERR(); a = nexta) { nexta = a->inchain; - if (a->type == '$') { - assert(a->co == 0 || a->co == 1); - newarc(nfa, PLAIN, nfa->eos[a->co], a->from, a->to); - freearc(nfa, a); + if (a->type == '$' || a->type == AHEAD) { + if (push(nfa, a)) { + progress = 1; + } } + assert(nexta == NULL || s->no != FREESTATE); + } + } + if (progress && f != NULL) { + dumpnfa(nfa, f); + } + } while (progress && !NISERR()); + if (NISERR()) { + return; + } + + for (a = nfa->post->ins; a != NULL; a = nexta) { + nexta = a->inchain; + if (a->type == '$') { + assert(a->co == 0 || a->co == 1); + newarc(nfa, PLAIN, nfa->eos[a->co], a->from, a->to); + freearc(nfa, a); } + } } - + /* - push - push a forward constraint forward past its destination state * A significant property of this function is that it deletes at most @@ -900,71 +976,84 @@ FILE *f; /* for debug output; NULL none */ ^ static int push(struct nfa *, struct arc *); */ static int /* 0 couldn't, 1 could */ -push(nfa, con) -struct nfa *nfa; -struct arc *con; +push( + struct nfa *nfa, + struct arc *con) { - struct state *from = con->from; - struct state *to = con->to; - struct arc *a; - struct arc *nexta; - struct state *s; - - if (to == from) { /* circular constraint is pointless */ - freearc(nfa, con); - return 1; + struct state *from = con->from; + struct state *to = con->to; + struct arc *a; + struct arc *nexta; + struct state *s; + + if (to == from) { /* circular constraint is pointless */ + freearc(nfa, con); + return 1; + } + if (to->flag) { /* can't push forward beyond end */ + return 0; + } + if (to->nouts == 0) { /* dead end */ + freearc(nfa, con); + return 1; + } + + /* + * First, clone to state if necessary to avoid other inarcs. + */ + + if (to->nins > 1) { + s = newstate(nfa); + if (NISERR()) { + return 0; } - if (to->flag) /* can't push forward beyond end */ + copyouts(nfa, to, s); /* duplicate outarcs */ + cparc(nfa, con, from, s); /* move constraint */ + freearc(nfa, con); + to = s; + con = to->ins; + } + assert(to->nins == 1); + + /* + * Propagate the constraint into the to state's outarcs. + */ + + for (a = to->outs; a != NULL; a = nexta) { + nexta = a->outchain; + switch (combine(con, a)) { + case INCOMPATIBLE: /* destroy the arc */ + freearc(nfa, a); + break; + case SATISFIED: /* no action needed */ + break; + case COMPATIBLE: /* swap the two arcs, more or less */ + s = newstate(nfa); + if (NISERR()) { return 0; - if (to->nouts == 0) { /* dead end */ - freearc(nfa, con); - return 1; - } - - /* first, clone to state if necessary to avoid other inarcs */ - if (to->nins > 1) { - s = newstate(nfa); - if (NISERR()) - return 0; - copyouts(nfa, to, s); /* duplicate outarcs */ - cparc(nfa, con, from, s); /* move constraint */ - freearc(nfa, con); - to = s; - con = to->ins; + } + cparc(nfa, con, s, a->to); /* anticipate move */ + cparc(nfa, a, from, s); + if (NISERR()) { + return 0; + } + freearc(nfa, a); + break; + default: + assert(NOTREACHED); + break; } - assert(to->nins == 1); + } - /* propagate the constraint into the to state's outarcs */ - for (a = to->outs; a != NULL; a = nexta) { - nexta = a->outchain; - switch (combine(con, a)) { - case INCOMPATIBLE: /* destroy the arc */ - freearc(nfa, a); - break; - case SATISFIED: /* no action needed */ - break; - case COMPATIBLE: /* swap the two arcs, more or less */ - s = newstate(nfa); - if (NISERR()) - return 0; - cparc(nfa, con, s, a->to); /* anticipate move */ - cparc(nfa, a, from, s); - if (NISERR()) - return 0; - freearc(nfa, a); - break; - default: - assert(NOTREACHED); - break; - } - } + /* + * Remaining outarcs, if any, incorporate the constraint. + */ - /* remaining outarcs, if any, incorporate the constraint */ - moveouts(nfa, to, from); - dropstate(nfa, to); /* will free the constraint */ - return 1; + moveouts(nfa, to, from); + dropstate(nfa, to); /* will free the constraint */ + return 1; } - + /* - combine - constraint lands on an arc, what happens? ^ #def INCOMPATIBLE 1 // destroys arc @@ -973,88 +1062,95 @@ struct arc *con; ^ static int combine(struct arc *, struct arc *); */ static int -combine(con, a) -struct arc *con; -struct arc *a; +combine( + struct arc *con, + struct arc *a) { -# define CA(ct,at) (((ct)<<CHAR_BIT) | (at)) - - switch (CA(con->type, a->type)) { - case CA('^', PLAIN): /* newlines are handled separately */ - case CA('$', PLAIN): - return INCOMPATIBLE; - break; - case CA(AHEAD, PLAIN): /* color constraints meet colors */ - case CA(BEHIND, PLAIN): - if (con->co == a->co) - return SATISFIED; - return INCOMPATIBLE; - break; - case CA('^', '^'): /* collision, similar constraints */ - case CA('$', '$'): - case CA(AHEAD, AHEAD): - case CA(BEHIND, BEHIND): - if (con->co == a->co) /* true duplication */ - return SATISFIED; - return INCOMPATIBLE; - break; - case CA('^', BEHIND): /* collision, dissimilar constraints */ - case CA(BEHIND, '^'): - case CA('$', AHEAD): - case CA(AHEAD, '$'): - return INCOMPATIBLE; - break; - case CA('^', '$'): /* constraints passing each other */ - case CA('^', AHEAD): - case CA(BEHIND, '$'): - case CA(BEHIND, AHEAD): - case CA('$', '^'): - case CA('$', BEHIND): - case CA(AHEAD, '^'): - case CA(AHEAD, BEHIND): - case CA('^', LACON): - case CA(BEHIND, LACON): - case CA('$', LACON): - case CA(AHEAD, LACON): - return COMPATIBLE; - break; +#define CA(ct,at) (((ct)<<CHAR_BIT) | (at)) + + switch (CA(con->type, a->type)) { + case CA('^', PLAIN): /* newlines are handled separately */ + case CA('$', PLAIN): + return INCOMPATIBLE; + break; + case CA(AHEAD, PLAIN): /* color constraints meet colors */ + case CA(BEHIND, PLAIN): + if (con->co == a->co) { + return SATISFIED; + } + return INCOMPATIBLE; + break; + case CA('^', '^'): /* collision, similar constraints */ + case CA('$', '$'): + case CA(AHEAD, AHEAD): + case CA(BEHIND, BEHIND): + if (con->co == a->co) { /* true duplication */ + return SATISFIED; } - assert(NOTREACHED); - return INCOMPATIBLE; /* for benefit of blind compilers */ + return INCOMPATIBLE; + break; + case CA('^', BEHIND): /* collision, dissimilar constraints */ + case CA(BEHIND, '^'): + case CA('$', AHEAD): + case CA(AHEAD, '$'): + return INCOMPATIBLE; + break; + case CA('^', '$'): /* constraints passing each other */ + case CA('^', AHEAD): + case CA(BEHIND, '$'): + case CA(BEHIND, AHEAD): + case CA('$', '^'): + case CA('$', BEHIND): + case CA(AHEAD, '^'): + case CA(AHEAD, BEHIND): + case CA('^', LACON): + case CA(BEHIND, LACON): + case CA('$', LACON): + case CA(AHEAD, LACON): + return COMPATIBLE; + break; + } + assert(NOTREACHED); + return INCOMPATIBLE; /* for benefit of blind compilers */ } - + /* - fixempties - get rid of EMPTY arcs ^ static VOID fixempties(struct nfa *, FILE *); */ -static VOID -fixempties(nfa, f) -struct nfa *nfa; -FILE *f; /* for debug output; NULL none */ +static void +fixempties( + struct nfa *nfa, + FILE *f) /* for debug output; NULL none */ { - struct state *s; - struct state *nexts; - struct arc *a; - struct arc *nexta; - int progress; - - /* find and eliminate empties until there are no more */ - do { - progress = 0; - for (s = nfa->states; s != NULL && !NISERR(); s = nexts) { - nexts = s->next; - for (a = s->outs; a != NULL && !NISERR(); a = nexta) { - nexta = a->outchain; - if (a->type == EMPTY && unempty(nfa, a)) - progress = 1; - assert(nexta == NULL || s->no != FREESTATE); - } + struct state *s; + struct state *nexts; + struct arc *a; + struct arc *nexta; + int progress; + + /* + * Find and eliminate empties until there are no more. + */ + + do { + progress = 0; + for (s = nfa->states; s != NULL && !NISERR(); s = nexts) { + nexts = s->next; + for (a = s->outs; a != NULL && !NISERR(); a = nexta) { + nexta = a->outchain; + if (a->type == EMPTY && unempty(nfa, a)) { + progress = 1; } - if (progress && f != NULL) - dumpnfa(nfa, f); - } while (progress && !NISERR()); + assert(nexta == NULL || s->no != FREESTATE); + } + } + if (progress && f != NULL) { + dumpnfa(nfa, f); + } + } while (progress && !NISERR()); } - + /* - unempty - optimize out an EMPTY arc, if possible * Actually, as it stands this function always succeeds, but the return @@ -1062,307 +1158,347 @@ FILE *f; /* for debug output; NULL none */ ^ static int unempty(struct nfa *, struct arc *); */ static int /* 0 couldn't, 1 could */ -unempty(nfa, a) -struct nfa *nfa; -struct arc *a; +unempty( + struct nfa *nfa, + struct arc *a) { - struct state *from = a->from; - struct state *to = a->to; - int usefrom; /* work on from, as opposed to to? */ + struct state *from = a->from; + struct state *to = a->to; + int usefrom; /* work on from, as opposed to to? */ - assert(a->type == EMPTY); - assert(from != nfa->pre && to != nfa->post); + assert(a->type == EMPTY); + assert(from != nfa->pre && to != nfa->post); - if (from == to) { /* vacuous loop */ - freearc(nfa, a); - return 1; - } + if (from == to) { /* vacuous loop */ + freearc(nfa, a); + return 1; + } + + /* + * Decide which end to work on. + */ + + usefrom = 1; /* default: attack from */ + if (from->nouts > to->nins) { + usefrom = 0; + } else if (from->nouts == to->nins) { + /* + * Decide on secondary issue: move/copy fewest arcs. + */ - /* decide which end to work on */ - usefrom = 1; /* default: attack from */ - if (from->nouts > to->nins) - usefrom = 0; - else if (from->nouts == to->nins) { - /* decide on secondary issue: move/copy fewest arcs */ - if (from->nins > to->nouts) - usefrom = 0; + if (from->nins > to->nouts) { + usefrom = 0; } - - freearc(nfa, a); - if (usefrom) { - if (from->nouts == 0) { - /* was the state's only outarc */ - moveins(nfa, from, to); - freestate(nfa, from); - } else - copyins(nfa, from, to); + } + + freearc(nfa, a); + if (usefrom) { + if (from->nouts == 0) { + /* was the state's only outarc */ + moveins(nfa, from, to); + freestate(nfa, from); } else { - if (to->nins == 0) { - /* was the state's only inarc */ - moveouts(nfa, to, from); - freestate(nfa, to); - } else - copyouts(nfa, to, from); + copyins(nfa, from, to); } + } else { + if (to->nins == 0) { + /* was the state's only inarc */ + moveouts(nfa, to, from); + freestate(nfa, to); + } else { + copyouts(nfa, to, from); + } + } - return 1; + return 1; } - + /* - cleanup - clean up NFA after optimizations ^ static VOID cleanup(struct nfa *); */ -static VOID -cleanup(nfa) -struct nfa *nfa; +static void +cleanup( + struct nfa *nfa) { - struct state *s; - struct state *nexts; - int n; - - /* clear out unreachable or dead-end states */ - /* use pre to mark reachable, then post to mark can-reach-post */ - markreachable(nfa, nfa->pre, (struct state *)NULL, nfa->pre); - markcanreach(nfa, nfa->post, nfa->pre, nfa->post); - for (s = nfa->states; s != NULL; s = nexts) { - nexts = s->next; - if (s->tmp != nfa->post && !s->flag) - dropstate(nfa, s); + struct state *s; + struct state *nexts; + int n; + + /* + * Clear out unreachable or dead-end states. Use pre to mark reachable, + * then post to mark can-reach-post. + */ + + markreachable(nfa, nfa->pre, (struct state *)NULL, nfa->pre); + markcanreach(nfa, nfa->post, nfa->pre, nfa->post); + for (s = nfa->states; s != NULL; s = nexts) { + nexts = s->next; + if (s->tmp != nfa->post && !s->flag) { + dropstate(nfa, s); } - assert(nfa->post->nins == 0 || nfa->post->tmp == nfa->post); - cleartraverse(nfa, nfa->pre); - assert(nfa->post->nins == 0 || nfa->post->tmp == NULL); - /* the nins==0 (final unreachable) case will be caught later */ - - /* renumber surviving states */ - n = 0; - for (s = nfa->states; s != NULL; s = s->next) - s->no = n++; - nfa->nstates = n; + } + assert(nfa->post->nins == 0 || nfa->post->tmp == nfa->post); + cleartraverse(nfa, nfa->pre); + assert(nfa->post->nins == 0 || nfa->post->tmp == NULL); + /* the nins==0 (final unreachable) case will be caught later */ + + /* + * Renumber surviving states. + */ + + n = 0; + for (s = nfa->states; s != NULL; s = s->next) { + s->no = n++; + } + nfa->nstates = n; } - + /* - markreachable - recursive marking of reachable states ^ static VOID markreachable(struct nfa *, struct state *, struct state *, ^ struct state *); */ -static VOID -markreachable(nfa, s, okay, mark) -struct nfa *nfa; -struct state *s; -struct state *okay; /* consider only states with this mark */ -struct state *mark; /* the value to mark with */ +static void +markreachable( + struct nfa *nfa, + struct state *s, + struct state *okay, /* consider only states with this mark */ + struct state *mark) /* the value to mark with */ { - struct arc *a; + struct arc *a; - if (s->tmp != okay) - return; - s->tmp = mark; + if (s->tmp != okay) { + return; + } + s->tmp = mark; - for (a = s->outs; a != NULL; a = a->outchain) - markreachable(nfa, a->to, okay, mark); + for (a = s->outs; a != NULL; a = a->outchain) { + markreachable(nfa, a->to, okay, mark); + } } - + /* - markcanreach - recursive marking of states which can reach here ^ static VOID markcanreach(struct nfa *, struct state *, struct state *, ^ struct state *); */ -static VOID -markcanreach(nfa, s, okay, mark) -struct nfa *nfa; -struct state *s; -struct state *okay; /* consider only states with this mark */ -struct state *mark; /* the value to mark with */ +static void +markcanreach( + struct nfa *nfa, + struct state *s, + struct state *okay, /* consider only states with this mark */ + struct state *mark) /* the value to mark with */ { - struct arc *a; + struct arc *a; - if (s->tmp != okay) - return; - s->tmp = mark; + if (s->tmp != okay) { + return; + } + s->tmp = mark; - for (a = s->ins; a != NULL; a = a->inchain) - markcanreach(nfa, a->from, okay, mark); + for (a = s->ins; a != NULL; a = a->inchain) { + markcanreach(nfa, a->from, okay, mark); + } } - + /* - analyze - ascertain potentially-useful facts about an optimized NFA ^ static long analyze(struct nfa *); */ static long /* re_info bits to be ORed in */ -analyze(nfa) -struct nfa *nfa; +analyze( + struct nfa *nfa) { - struct arc *a; - struct arc *aa; - - if (nfa->pre->outs == NULL) - return REG_UIMPOSSIBLE; - for (a = nfa->pre->outs; a != NULL; a = a->outchain) - for (aa = a->to->outs; aa != NULL; aa = aa->outchain) - if (aa->to == nfa->post) - return REG_UEMPTYMATCH; - return 0; + struct arc *a; + struct arc *aa; + + if (nfa->pre->outs == NULL) { + return REG_UIMPOSSIBLE; + } + for (a = nfa->pre->outs; a != NULL; a = a->outchain) { + for (aa = a->to->outs; aa != NULL; aa = aa->outchain) { + if (aa->to == nfa->post) { + return REG_UEMPTYMATCH; + } + } + } + return 0; } - + /* - compact - compact an NFA ^ static VOID compact(struct nfa *, struct cnfa *); */ -static VOID -compact(nfa, cnfa) -struct nfa *nfa; -struct cnfa *cnfa; +static void +compact( + struct nfa *nfa, + struct cnfa *cnfa) { - struct state *s; - struct arc *a; - size_t nstates; - size_t narcs; - struct carc *ca; - struct carc *first; - - assert (!NISERR()); - - nstates = 0; - narcs = 0; - for (s = nfa->states; s != NULL; s = s->next) { - nstates++; - narcs += 1 + s->nouts + 1; - /* 1 as a fake for flags, nouts for arcs, 1 as endmarker */ + struct state *s; + struct arc *a; + size_t nstates; + size_t narcs; + struct carc *ca; + struct carc *first; + + assert (!NISERR()); + + nstates = 0; + narcs = 0; + for (s = nfa->states; s != NULL; s = s->next) { + nstates++; + narcs += 1 + s->nouts + 1; + /* 1 as a fake for flags, nouts for arcs, 1 as endmarker */ + } + + cnfa->states = (struct carc **)MALLOC(nstates * sizeof(struct carc *)); + cnfa->arcs = (struct carc *)MALLOC(narcs * sizeof(struct carc)); + if (cnfa->states == NULL || cnfa->arcs == NULL) { + if (cnfa->states != NULL) { + FREE(cnfa->states); } - - cnfa->states = (struct carc **)MALLOC(nstates * sizeof(struct carc *)); - cnfa->arcs = (struct carc *)MALLOC(narcs * sizeof(struct carc)); - if (cnfa->states == NULL || cnfa->arcs == NULL) { - if (cnfa->states != NULL) - FREE(cnfa->states); - if (cnfa->arcs != NULL) - FREE(cnfa->arcs); - NERR(REG_ESPACE); - return; + if (cnfa->arcs != NULL) { + FREE(cnfa->arcs); } - cnfa->nstates = nstates; - cnfa->pre = nfa->pre->no; - cnfa->post = nfa->post->no; - cnfa->bos[0] = nfa->bos[0]; - cnfa->bos[1] = nfa->bos[1]; - cnfa->eos[0] = nfa->eos[0]; - cnfa->eos[1] = nfa->eos[1]; - cnfa->ncolors = maxcolor(nfa->cm) + 1; - cnfa->flags = 0; - - ca = cnfa->arcs; - for (s = nfa->states; s != NULL; s = s->next) { - assert((size_t)s->no < nstates); - cnfa->states[s->no] = ca; - ca->co = 0; /* clear and skip flags "arc" */ + NERR(REG_ESPACE); + return; + } + cnfa->nstates = nstates; + cnfa->pre = nfa->pre->no; + cnfa->post = nfa->post->no; + cnfa->bos[0] = nfa->bos[0]; + cnfa->bos[1] = nfa->bos[1]; + cnfa->eos[0] = nfa->eos[0]; + cnfa->eos[1] = nfa->eos[1]; + cnfa->ncolors = maxcolor(nfa->cm) + 1; + cnfa->flags = 0; + + ca = cnfa->arcs; + for (s = nfa->states; s != NULL; s = s->next) { + assert((size_t)s->no < nstates); + cnfa->states[s->no] = ca; + ca->co = 0; /* clear and skip flags "arc" */ + ca++; + first = ca; + for (a = s->outs; a != NULL; a = a->outchain) { + switch (a->type) { + case PLAIN: + ca->co = a->co; + ca->to = a->to->no; ca++; - first = ca; - for (a = s->outs; a != NULL; a = a->outchain) - switch (a->type) { - case PLAIN: - ca->co = a->co; - ca->to = a->to->no; - ca++; - break; - case LACON: - assert(s->no != cnfa->pre); - ca->co = (color)(cnfa->ncolors + a->co); - ca->to = a->to->no; - ca++; - cnfa->flags |= HASLACONS; - break; - default: - assert(NOTREACHED); - break; - } - carcsort(first, ca-1); - ca->co = COLORLESS; - ca->to = 0; + break; + case LACON: + assert(s->no != cnfa->pre); + ca->co = (color)(cnfa->ncolors + a->co); + ca->to = a->to->no; ca++; + cnfa->flags |= HASLACONS; + break; + default: + assert(NOTREACHED); + break; + } } - assert(ca == &cnfa->arcs[narcs]); - assert(cnfa->nstates != 0); - - /* mark no-progress states */ - for (a = nfa->pre->outs; a != NULL; a = a->outchain) - cnfa->states[a->to->no]->co = 1; - cnfa->states[nfa->pre->no]->co = 1; + carcsort(first, ca-1); + ca->co = COLORLESS; + ca->to = 0; + ca++; + } + assert(ca == &cnfa->arcs[narcs]); + assert(cnfa->nstates != 0); + + /* + * Mark no-progress states. + */ + + for (a = nfa->pre->outs; a != NULL; a = a->outchain) { + cnfa->states[a->to->no]->co = 1; + } + cnfa->states[nfa->pre->no]->co = 1; } - + /* - carcsort - sort compacted-NFA arcs by color * Really dumb algorithm, but if the list is long enough for that to matter, * you're in real trouble anyway. ^ static VOID carcsort(struct carc *, struct carc *); */ -static VOID -carcsort(first, last) -struct carc *first; -struct carc *last; +static void +carcsort( + struct carc *first, + struct carc *last) { - struct carc *p; - struct carc *q; - struct carc tmp; - - if (last - first <= 1) - return; - - for (p = first; p <= last; p++) - for (q = p; q <= last; q++) - if (p->co > q->co || - (p->co == q->co && p->to > q->to)) { - assert(p != q); - tmp = *p; - *p = *q; - *q = tmp; - } -} + struct carc *p; + struct carc *q; + struct carc tmp; + if (last - first <= 1) { + return; + } + + for (p = first; p <= last; p++) { + for (q = p; q <= last; q++) { + if (p->co > q->co || (p->co == q->co && p->to > q->to)) { + assert(p != q); + tmp = *p; + *p = *q; + *q = tmp; + } + } + } +} + /* - freecnfa - free a compacted NFA ^ static VOID freecnfa(struct cnfa *); */ -static VOID -freecnfa(cnfa) -struct cnfa *cnfa; +static void +freecnfa( + struct cnfa *cnfa) { - assert(cnfa->nstates != 0); /* not empty already */ - cnfa->nstates = 0; - FREE(cnfa->states); - FREE(cnfa->arcs); + assert(cnfa->nstates != 0); /* not empty already */ + cnfa->nstates = 0; + FREE(cnfa->states); + FREE(cnfa->arcs); } - + /* - dumpnfa - dump an NFA in human-readable form ^ static VOID dumpnfa(struct nfa *, FILE *); */ -static VOID -dumpnfa(nfa, f) -struct nfa *nfa; -FILE *f; +static void +dumpnfa( + struct nfa *nfa, + FILE *f) { #ifdef REG_DEBUG - struct state *s; - - fprintf(f, "pre %d, post %d", nfa->pre->no, nfa->post->no); - if (nfa->bos[0] != COLORLESS) - fprintf(f, ", bos [%ld]", (long)nfa->bos[0]); - if (nfa->bos[1] != COLORLESS) - fprintf(f, ", bol [%ld]", (long)nfa->bos[1]); - if (nfa->eos[0] != COLORLESS) - fprintf(f, ", eos [%ld]", (long)nfa->eos[0]); - if (nfa->eos[1] != COLORLESS) - fprintf(f, ", eol [%ld]", (long)nfa->eos[1]); - fprintf(f, "\n"); - for (s = nfa->states; s != NULL; s = s->next) - dumpstate(s, f); - if (nfa->parent == NULL) - dumpcolors(nfa->cm, f); - fflush(f); + struct state *s; + + fprintf(f, "pre %d, post %d", nfa->pre->no, nfa->post->no); + if (nfa->bos[0] != COLORLESS) { + fprintf(f, ", bos [%ld]", (long)nfa->bos[0]); + } + if (nfa->bos[1] != COLORLESS) { + fprintf(f, ", bol [%ld]", (long)nfa->bos[1]); + } + if (nfa->eos[0] != COLORLESS) { + fprintf(f, ", eos [%ld]", (long)nfa->eos[0]); + } + if (nfa->eos[1] != COLORLESS) { + fprintf(f, ", eol [%ld]", (long)nfa->eos[1]); + } + fprintf(f, "\n"); + for (s = nfa->states; s != NULL; s = s->next) { + dumpstate(s, f); + } + if (nfa->parent == NULL) { + dumpcolors(nfa->cm, f); + } + fflush(f); #endif } - + #ifdef REG_DEBUG /* subordinates of dumpnfa */ /* ^ #ifdef REG_DEBUG @@ -1372,165 +1508,185 @@ FILE *f; - dumpstate - dump an NFA state in human-readable form ^ static VOID dumpstate(struct state *, FILE *); */ -static VOID -dumpstate(s, f) -struct state *s; -FILE *f; +static void +dumpstate( + struct state *s, + FILE *f) { - struct arc *a; - - fprintf(f, "%d%s%c", s->no, (s->tmp != NULL) ? "T" : "", - (s->flag) ? s->flag : '.'); - if (s->prev != NULL && s->prev->next != s) - fprintf(f, "\tstate chain bad\n"); - if (s->nouts == 0) - fprintf(f, "\tno out arcs\n"); - else - dumparcs(s, f); - fflush(f); - for (a = s->ins; a != NULL; a = a->inchain) { - if (a->to != s) - fprintf(f, "\tlink from %d to %d on %d's in-chain\n", - a->from->no, a->to->no, s->no); + struct arc *a; + + fprintf(f, "%d%s%c", s->no, (s->tmp != NULL) ? "T" : "", + (s->flag) ? s->flag : '.'); + if (s->prev != NULL && s->prev->next != s) { + fprintf(f, "\tstate chain bad\n"); + } + if (s->nouts == 0) { + fprintf(f, "\tno out arcs\n"); + } else { + dumparcs(s, f); + } + fflush(f); + for (a = s->ins; a != NULL; a = a->inchain) { + if (a->to != s) { + fprintf(f, "\tlink from %d to %d on %d's in-chain\n", + a->from->no, a->to->no, s->no); } + } } - + /* - dumparcs - dump out-arcs in human-readable form ^ static VOID dumparcs(struct state *, FILE *); */ -static VOID -dumparcs(s, f) -struct state *s; -FILE *f; +static void +dumparcs( + struct state *s, + FILE *f) { - int pos; + int pos; - assert(s->nouts > 0); - /* printing arcs in reverse order is usually clearer */ - pos = dumprarcs(s->outs, s, f, 1); - if (pos != 1) - fprintf(f, "\n"); + assert(s->nouts > 0); + /* printing arcs in reverse order is usually clearer */ + pos = dumprarcs(s->outs, s, f, 1); + if (pos != 1) { + fprintf(f, "\n"); + } } - + /* - dumprarcs - dump remaining outarcs, recursively, in reverse order ^ static int dumprarcs(struct arc *, struct state *, FILE *, int); */ static int /* resulting print position */ -dumprarcs(a, s, f, pos) -struct arc *a; -struct state *s; -FILE *f; -int pos; /* initial print position */ +dumprarcs( + struct arc *a, + struct state *s, + FILE *f, + int pos) /* initial print position */ { - if (a->outchain != NULL) - pos = dumprarcs(a->outchain, s, f, pos); - dumparc(a, s, f); - if (pos == 5) { - fprintf(f, "\n"); - pos = 1; - } else - pos++; - return pos; + if (a->outchain != NULL) { + pos = dumprarcs(a->outchain, s, f, pos); + } + dumparc(a, s, f); + if (pos == 5) { + fprintf(f, "\n"); + pos = 1; + } else { + pos++; + } + return pos; } - + /* - dumparc - dump one outarc in readable form, including prefixing tab ^ static VOID dumparc(struct arc *, struct state *, FILE *); */ -static VOID -dumparc(a, s, f) -struct arc *a; -struct state *s; -FILE *f; +static void +dumparc( + struct arc *a, + struct state *s, + FILE *f) { - struct arc *aa; - struct arcbatch *ab; - - fprintf(f, "\t"); - switch (a->type) { - case PLAIN: - fprintf(f, "[%ld]", (long)a->co); - break; - case AHEAD: - fprintf(f, ">%ld>", (long)a->co); - break; - case BEHIND: - fprintf(f, "<%ld<", (long)a->co); - break; - case LACON: - fprintf(f, ":%ld:", (long)a->co); - break; - case '^': - case '$': - fprintf(f, "%c%d", a->type, (int)a->co); - break; - case EMPTY: - break; - default: - fprintf(f, "0x%x/0%lo", a->type, (long)a->co); - break; + struct arc *aa; + struct arcbatch *ab; + + fprintf(f, "\t"); + switch (a->type) { + case PLAIN: + fprintf(f, "[%ld]", (long)a->co); + break; + case AHEAD: + fprintf(f, ">%ld>", (long)a->co); + break; + case BEHIND: + fprintf(f, "<%ld<", (long)a->co); + break; + case LACON: + fprintf(f, ":%ld:", (long)a->co); + break; + case '^': + case '$': + fprintf(f, "%c%d", a->type, (int)a->co); + break; + case EMPTY: + break; + default: + fprintf(f, "0x%x/0%lo", a->type, (long)a->co); + break; + } + if (a->from != s) { + fprintf(f, "?%d?", a->from->no); + } + for (ab = &a->from->oas; ab != NULL; ab = ab->next) { + for (aa = &ab->a[0]; aa < &ab->a[ABSIZE]; aa++) { + if (aa == a) { + break; /* NOTE BREAK OUT */ + } } - if (a->from != s) - fprintf(f, "?%d?", a->from->no); - for (ab = &a->from->oas; ab != NULL; ab = ab->next) { - for (aa = &ab->a[0]; aa < &ab->a[ABSIZE]; aa++) - if (aa == a) - break; /* NOTE BREAK OUT */ - if (aa < &ab->a[ABSIZE]) /* propagate break */ - break; /* NOTE BREAK OUT */ + if (aa < &ab->a[ABSIZE]) { /* propagate break */ + break; /* NOTE BREAK OUT */ } - if (ab == NULL) - fprintf(f, "?!?"); /* not in allocated space */ - fprintf(f, "->"); - if (a->to == NULL) { - fprintf(f, "NULL"); - return; + } + if (ab == NULL) { + fprintf(f, "?!?"); /* not in allocated space */ + } + fprintf(f, "->"); + if (a->to == NULL) { + fprintf(f, "NULL"); + return; + } + fprintf(f, "%d", a->to->no); + for (aa = a->to->ins; aa != NULL; aa = aa->inchain) { + if (aa == a) { + break; /* NOTE BREAK OUT */ } - fprintf(f, "%d", a->to->no); - for (aa = a->to->ins; aa != NULL; aa = aa->inchain) - if (aa == a) - break; /* NOTE BREAK OUT */ - if (aa == NULL) - fprintf(f, "?!?"); /* missing from in-chain */ + } + if (aa == NULL) { + fprintf(f, "?!?"); /* missing from in-chain */ + } } /* ^ #endif */ #endif /* ifdef REG_DEBUG */ - + /* - dumpcnfa - dump a compacted NFA in human-readable form ^ static VOID dumpcnfa(struct cnfa *, FILE *); */ -static VOID -dumpcnfa(cnfa, f) -struct cnfa *cnfa; -FILE *f; +static void +dumpcnfa( + struct cnfa *cnfa, + FILE *f) { #ifdef REG_DEBUG - int st; - - fprintf(f, "pre %d, post %d", cnfa->pre, cnfa->post); - if (cnfa->bos[0] != COLORLESS) - fprintf(f, ", bos [%ld]", (long)cnfa->bos[0]); - if (cnfa->bos[1] != COLORLESS) - fprintf(f, ", bol [%ld]", (long)cnfa->bos[1]); - if (cnfa->eos[0] != COLORLESS) - fprintf(f, ", eos [%ld]", (long)cnfa->eos[0]); - if (cnfa->eos[1] != COLORLESS) - fprintf(f, ", eol [%ld]", (long)cnfa->eos[1]); - if (cnfa->flags&HASLACONS) - fprintf(f, ", haslacons"); - fprintf(f, "\n"); - for (st = 0; st < cnfa->nstates; st++) - dumpcstate(st, cnfa->states[st], cnfa, f); - fflush(f); + int st; + + fprintf(f, "pre %d, post %d", cnfa->pre, cnfa->post); + if (cnfa->bos[0] != COLORLESS) { + fprintf(f, ", bos [%ld]", (long)cnfa->bos[0]); + } + if (cnfa->bos[1] != COLORLESS) { + fprintf(f, ", bol [%ld]", (long)cnfa->bos[1]); + } + if (cnfa->eos[0] != COLORLESS) { + fprintf(f, ", eos [%ld]", (long)cnfa->eos[0]); + } + if (cnfa->eos[1] != COLORLESS) { + fprintf(f, ", eol [%ld]", (long)cnfa->eos[1]); + } + if (cnfa->flags&HASLACONS) { + fprintf(f, ", haslacons"); + } + fprintf(f, "\n"); + for (st = 0; st < cnfa->nstates; st++) { + dumpcstate(st, cnfa->states[st], cnfa, f); + } + fflush(f); #endif } - + #ifdef REG_DEBUG /* subordinates of dumpcnfa */ /* ^ #ifdef REG_DEBUG @@ -1540,36 +1696,46 @@ FILE *f; - dumpcstate - dump a compacted-NFA state in human-readable form ^ static VOID dumpcstate(int, struct carc *, struct cnfa *, FILE *); */ -static VOID -dumpcstate(st, ca, cnfa, f) -int st; -struct carc *ca; -struct cnfa *cnfa; -FILE *f; +static void +dumpcstate( + int st, + struct carc *ca, + struct cnfa *cnfa, + FILE *f) { - int i; - int pos; - - fprintf(f, "%d%s", st, (ca[0].co) ? ":" : "."); - pos = 1; - for (i = 1; ca[i].co != COLORLESS; i++) { - if (ca[i].co < cnfa->ncolors) - fprintf(f, "\t[%ld]->%d", (long)ca[i].co, ca[i].to); - else - fprintf(f, "\t:%ld:->%d", (long)ca[i].co-cnfa->ncolors, - ca[i].to); - if (pos == 5) { - fprintf(f, "\n"); - pos = 1; - } else - pos++; + int i; + int pos; + + fprintf(f, "%d%s", st, (ca[0].co) ? ":" : "."); + pos = 1; + for (i = 1; ca[i].co != COLORLESS; i++) { + if (ca[i].co < cnfa->ncolors) { + fprintf(f, "\t[%ld]->%d", (long)ca[i].co, ca[i].to); + } else { + fprintf(f, "\t:%ld:->%d", (long)ca[i].co-cnfa->ncolors, ca[i].to); + } + if (pos == 5) { + fprintf(f, "\n"); + pos = 1; + } else { + pos++; } - if (i == 1 || pos != 1) - fprintf(f, "\n"); - fflush(f); + } + if (i == 1 || pos != 1) { + fprintf(f, "\n"); + } + fflush(f); } /* ^ #endif */ #endif /* ifdef REG_DEBUG */ + +/* + * Local Variables: + * mode: c + * c-basic-offset: 4 + * fill-column: 78 + * End: + */ diff --git a/generic/regcomp.c b/generic/regcomp.c index b94cb8f..c6c7342 100644 --- a/generic/regcomp.c +++ b/generic/regcomp.c @@ -3,20 +3,20 @@ * This file #includes several others (see the bottom). * * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. - * + * * Development of this software was funded, in part, by Cray Research Inc., * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics * Corporation, none of whom are responsible for the results. The author - * thanks all of them. - * + * thanks all of them. + * * Redistribution and use in source and binary forms -- with or without * modification -- are permitted for any purpose, provided that * redistributions in source form retain this entire copyright notice and * indicate the origin and nature of any modifications. - * - * I'd appreciate being given credit for this package in the documentation - * of software which uses it, but that is not a requirement. - * + * + * I'd appreciate being given credit for this package in the documentation of + * software which uses it, but that is not a requirement. + * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL @@ -189,52 +189,50 @@ static int cmp(CONST chr *, CONST chr *, size_t); static int casecmp(CONST chr *, CONST chr *, size_t); /* automatically gathered by fwd; do not hand-edit */ /* =====^!^===== end forwards =====^!^===== */ - - - + /* internal variables, bundled for easy passing around */ struct vars { - regex_t *re; - chr *now; /* scan pointer into string */ - chr *stop; /* end of string */ - chr *savenow; /* saved now and stop for "subroutine call" */ - chr *savestop; - int err; /* error code (0 if none) */ - int cflags; /* copy of compile flags */ - int lasttype; /* type of previous token */ - int nexttype; /* type of next token */ - chr nextvalue; /* value (if any) of next token */ - int lexcon; /* lexical context type (see lex.c) */ - int nsubexp; /* subexpression count */ - struct subre **subs; /* subRE pointer vector */ - size_t nsubs; /* length of vector */ - struct subre *sub10[10]; /* initial vector, enough for most */ - struct nfa *nfa; /* the NFA */ - struct colormap *cm; /* character color map */ - color nlcolor; /* color of newline */ - struct state *wordchrs; /* state in nfa holding word-char outarcs */ - struct subre *tree; /* subexpression tree */ - struct subre *treechain; /* all tree nodes allocated */ - struct subre *treefree; /* any free tree nodes */ - int ntree; /* number of tree nodes */ - struct cvec *cv; /* interface cvec */ - struct cvec *cv2; /* utility cvec */ - struct cvec *mcces; /* collating-element information */ -# define ISCELEADER(v,c) (v->mcces != NULL && haschr(v->mcces, (c))) - struct state *mccepbegin; /* in nfa, start of MCCE prototypes */ - struct state *mccepend; /* in nfa, end of MCCE prototypes */ - struct subre *lacons; /* lookahead-constraint vector */ - int nlacons; /* size of lacons */ + regex_t *re; + chr *now; /* scan pointer into string */ + chr *stop; /* end of string */ + chr *savenow; /* saved now and stop for "subroutine call" */ + chr *savestop; + int err; /* error code (0 if none) */ + int cflags; /* copy of compile flags */ + int lasttype; /* type of previous token */ + int nexttype; /* type of next token */ + chr nextvalue; /* value (if any) of next token */ + int lexcon; /* lexical context type (see lex.c) */ + int nsubexp; /* subexpression count */ + struct subre **subs; /* subRE pointer vector */ + size_t nsubs; /* length of vector */ + struct subre *sub10[10]; /* initial vector, enough for most */ + struct nfa *nfa; /* the NFA */ + struct colormap *cm; /* character color map */ + color nlcolor; /* color of newline */ + struct state *wordchrs; /* state in nfa holding word-char outarcs */ + struct subre *tree; /* subexpression tree */ + struct subre *treechain; /* all tree nodes allocated */ + struct subre *treefree; /* any free tree nodes */ + int ntree; /* number of tree nodes */ + struct cvec *cv; /* interface cvec */ + struct cvec *cv2; /* utility cvec */ + struct cvec *mcces; /* collating-element information */ +#define ISCELEADER(v,c) (v->mcces != NULL && haschr(v->mcces, (c))) + struct state *mccepbegin; /* in nfa, start of MCCE prototypes */ + struct state *mccepend; /* in nfa, end of MCCE prototypes */ + struct subre *lacons; /* lookahead-constraint vector */ + int nlacons; /* size of lacons */ }; /* parsing macros; most know that `v' is the struct vars pointer */ #define NEXT() (next(v)) /* advance by one token */ #define SEE(t) (v->nexttype == (t)) /* is next token this? */ #define EAT(t) (SEE(t) && next(v)) /* if next is this, swallow it */ -#define VISERR(vv) ((vv)->err != 0) /* have we seen an error yet? */ +#define VISERR(vv) ((vv)->err != 0)/* have we seen an error yet? */ #define ISERR() VISERR(v) -#define VERR(vv,e) ((vv)->nexttype = EOS, ((vv)->err) ? (vv)->err :\ - ((vv)->err = (e))) +#define VERR(vv,e) \ + ((vv)->nexttype = EOS, ((vv)->err) ? (vv)->err : ((vv)->err = (e))) #define ERR(e) VERR(v, e) /* record an error */ #define NOERR() {if (ISERR()) return;} /* if error seen, return */ #define NOERRN() {if (ISERR()) return NULL;} /* NOERR with retval */ @@ -264,211 +262,249 @@ struct vars { #define PREFER 'P' /* length preference */ /* is an arc colored, and hence on a color chain? */ -#define COLORED(a) ((a)->type == PLAIN || (a)->type == AHEAD || \ - (a)->type == BEHIND) - - +#define COLORED(a) \ + ((a)->type == PLAIN || (a)->type == AHEAD || (a)->type == BEHIND) /* static function list */ static struct fns functions = { - rfree, /* regfree insides */ + rfree, /* regfree insides */ }; - - - + /* - compile - compile regular expression ^ int compile(regex_t *, CONST chr *, size_t, int); */ int -compile(re, string, len, flags) -regex_t *re; -CONST chr *string; -size_t len; -int flags; +compile( + regex_t *re, + CONST chr *string, + size_t len, + int flags) { - struct vars var; - struct vars *v = &var; - struct guts *g; - int i; - size_t j; - FILE *debug = (flags®_PROGRESS) ? stdout : (FILE *)NULL; -# define CNOERR() { if (ISERR()) return freev(v, v->err); } - - /* sanity checks */ - - if (re == NULL || string == NULL) - return REG_INVARG; - if ((flags®_QUOTE) && - (flags&(REG_ADVANCED|REG_EXPANDED|REG_NEWLINE))) - return REG_INVARG; - if (!(flags®_EXTENDED) && (flags®_ADVF)) - return REG_INVARG; - - /* initial setup (after which freev() is callable) */ - v->re = re; - v->now = (chr *)string; - v->stop = v->now + len; - v->savenow = v->savestop = NULL; - v->err = 0; - v->cflags = flags; - v->nsubexp = 0; - v->subs = v->sub10; - v->nsubs = 10; - for (j = 0; j < v->nsubs; j++) - v->subs[j] = NULL; - v->nfa = NULL; - v->cm = NULL; - v->nlcolor = COLORLESS; - v->wordchrs = NULL; - v->tree = NULL; - v->treechain = NULL; - v->treefree = NULL; - v->cv = NULL; - v->cv2 = NULL; - v->mcces = NULL; - v->lacons = NULL; - v->nlacons = 0; - re->re_magic = REMAGIC; - re->re_info = 0; /* bits get set during parse */ - re->re_csize = sizeof(chr); - re->re_guts = NULL; - re->re_fns = VS(&functions); - - /* more complex setup, malloced things */ - re->re_guts = VS(MALLOC(sizeof(struct guts))); - if (re->re_guts == NULL) - return freev(v, REG_ESPACE); - g = (struct guts *)re->re_guts; - g->tree = NULL; - initcm(v, &g->cmap); - v->cm = &g->cmap; - g->lacons = NULL; - g->nlacons = 0; - ZAPCNFA(g->search); - v->nfa = newnfa(v, v->cm, (struct nfa *)NULL); - CNOERR(); - v->cv = newcvec(100, 20, 10); - if (v->cv == NULL) - return freev(v, REG_ESPACE); - i = nmcces(v); - if (i > 0) { - v->mcces = newcvec(nleaders(v), 0, i); - CNOERR(); - v->mcces = allmcces(v, v->mcces); - leaders(v, v->mcces); - addmcce(v->mcces, (chr *)NULL, (chr *)NULL); /* dummy */ - } - CNOERR(); - - /* parsing */ - lexstart(v); /* also handles prefixes */ - if ((v->cflags®_NLSTOP) || (v->cflags®_NLANCH)) { - /* assign newline a unique color */ - v->nlcolor = subcolor(v->cm, newline()); - okcolors(v->nfa, v->cm); - } + struct vars var; + struct vars *v = &var; + struct guts *g; + int i; + size_t j; + FILE *debug = (flags®_PROGRESS) ? stdout : (FILE *)NULL; +#define CNOERR() { if (ISERR()) return freev(v, v->err); } + + /* + * Sanity checks. + */ + + if (re == NULL || string == NULL) { + return REG_INVARG; + } + if ((flags®_QUOTE) && (flags&(REG_ADVANCED|REG_EXPANDED|REG_NEWLINE))) { + return REG_INVARG; + } + if (!(flags®_EXTENDED) && (flags®_ADVF)) { + return REG_INVARG; + } + + /* + * Initial setup (after which freev() is callable). + */ + + v->re = re; + v->now = (chr *)string; + v->stop = v->now + len; + v->savenow = v->savestop = NULL; + v->err = 0; + v->cflags = flags; + v->nsubexp = 0; + v->subs = v->sub10; + v->nsubs = 10; + for (j = 0; j < v->nsubs; j++) { + v->subs[j] = NULL; + } + v->nfa = NULL; + v->cm = NULL; + v->nlcolor = COLORLESS; + v->wordchrs = NULL; + v->tree = NULL; + v->treechain = NULL; + v->treefree = NULL; + v->cv = NULL; + v->cv2 = NULL; + v->mcces = NULL; + v->lacons = NULL; + v->nlacons = 0; + re->re_magic = REMAGIC; + re->re_info = 0; /* bits get set during parse */ + re->re_csize = sizeof(chr); + re->re_guts = NULL; + re->re_fns = VS(&functions); + + /* + * More complex setup, malloced things. + */ + + re->re_guts = VS(MALLOC(sizeof(struct guts))); + if (re->re_guts == NULL) { + return freev(v, REG_ESPACE); + } + g = (struct guts *)re->re_guts; + g->tree = NULL; + initcm(v, &g->cmap); + v->cm = &g->cmap; + g->lacons = NULL; + g->nlacons = 0; + ZAPCNFA(g->search); + v->nfa = newnfa(v, v->cm, NULL); + CNOERR(); + v->cv = newcvec(100, 20, 10); + if (v->cv == NULL) { + return freev(v, REG_ESPACE); + } + i = nmcces(v); + if (i > 0) { + v->mcces = newcvec(nleaders(v), 0, i); CNOERR(); - v->tree = parse(v, EOS, PLAIN, v->nfa->init, v->nfa->final); - assert(SEE(EOS)); /* even if error; ISERR() => SEE(EOS) */ - CNOERR(); - assert(v->tree != NULL); + v->mcces = allmcces(v, v->mcces); + leaders(v, v->mcces); + addmcce(v->mcces, (chr *)NULL, (chr *)NULL); /* dummy */ + } + CNOERR(); + + /* + * Parsing. + */ + + lexstart(v); /* also handles prefixes */ + if ((v->cflags®_NLSTOP) || (v->cflags®_NLANCH)) { + /* + * Assign newline a unique color. + */ - /* finish setup of nfa and its subre tree */ - specialcolors(v->nfa); - CNOERR(); - if (debug != NULL) { - fprintf(debug, "\n\n\n========= RAW ==========\n"); - dumpnfa(v->nfa, debug); - dumpst(v->tree, debug, 1); - } - optst(v, v->tree); - v->ntree = numst(v->tree, 1); - markst(v->tree); - cleanst(v); + v->nlcolor = subcolor(v->cm, newline()); + okcolors(v->nfa, v->cm); + } + CNOERR(); + v->tree = parse(v, EOS, PLAIN, v->nfa->init, v->nfa->final); + assert(SEE(EOS)); /* even if error; ISERR() => SEE(EOS) */ + CNOERR(); + assert(v->tree != NULL); + + /* + * Finish setup of nfa and its subre tree. + */ + + specialcolors(v->nfa); + CNOERR(); + if (debug != NULL) { + fprintf(debug, "\n\n\n========= RAW ==========\n"); + dumpnfa(v->nfa, debug); + dumpst(v->tree, debug, 1); + } + optst(v, v->tree); + v->ntree = numst(v->tree, 1); + markst(v->tree); + cleanst(v); + if (debug != NULL) { + fprintf(debug, "\n\n\n========= TREE FIXED ==========\n"); + dumpst(v->tree, debug, 1); + } + + /* + * Build compacted NFAs for tree and lacons. + */ + + re->re_info |= nfatree(v, v->tree, debug); + CNOERR(); + assert(v->nlacons == 0 || v->lacons != NULL); + for (i = 1; i < v->nlacons; i++) { if (debug != NULL) { - fprintf(debug, "\n\n\n========= TREE FIXED ==========\n"); - dumpst(v->tree, debug, 1); + fprintf(debug, "\n\n\n========= LA%d ==========\n", i); } - - /* build compacted NFAs for tree and lacons */ - re->re_info |= nfatree(v, v->tree, debug); - CNOERR(); - assert(v->nlacons == 0 || v->lacons != NULL); - for (i = 1; i < v->nlacons; i++) { - if (debug != NULL) - fprintf(debug, "\n\n\n========= LA%d ==========\n", i); - nfanode(v, &v->lacons[i], debug); - } - CNOERR(); - if (v->tree->flags&SHORTER) - NOTE(REG_USHORTEST); - - /* build compacted NFAs for tree, lacons, fast search */ - if (debug != NULL) - fprintf(debug, "\n\n\n========= SEARCH ==========\n"); - /* can sacrifice main NFA now, so use it as work area */ - (DISCARD)optimize(v->nfa, debug); - CNOERR(); - makesearch(v, v->nfa); - CNOERR(); - compact(v->nfa, &g->search); - CNOERR(); - - /* looks okay, package it up */ - re->re_nsub = v->nsubexp; - v->re = NULL; /* freev no longer frees re */ - g->magic = GUTSMAGIC; - g->cflags = v->cflags; - g->info = re->re_info; - g->nsub = re->re_nsub; - g->tree = v->tree; - v->tree = NULL; - g->ntree = v->ntree; - g->compare = (v->cflags®_ICASE) ? casecmp : cmp; - g->lacons = v->lacons; - v->lacons = NULL; - g->nlacons = v->nlacons; - - if (flags®_DUMP) - dump(re, stdout); - - assert(v->err == 0); - return freev(v, 0); + nfanode(v, &v->lacons[i], debug); + } + CNOERR(); + if (v->tree->flags&SHORTER) { + NOTE(REG_USHORTEST); + } + + /* + * Build compacted NFAs for tree, lacons, fast search. + */ + + if (debug != NULL) { + fprintf(debug, "\n\n\n========= SEARCH ==========\n"); + } + + /* + * Can sacrifice main NFA now, so use it as work area. + */ + + (DISCARD)optimize(v->nfa, debug); + CNOERR(); + makesearch(v, v->nfa); + CNOERR(); + compact(v->nfa, &g->search); + CNOERR(); + + /* + * Looks okay, package it up. + */ + + re->re_nsub = v->nsubexp; + v->re = NULL; /* freev no longer frees re */ + g->magic = GUTSMAGIC; + g->cflags = v->cflags; + g->info = re->re_info; + g->nsub = re->re_nsub; + g->tree = v->tree; + v->tree = NULL; + g->ntree = v->ntree; + g->compare = (v->cflags®_ICASE) ? casecmp : cmp; + g->lacons = v->lacons; + v->lacons = NULL; + g->nlacons = v->nlacons; + + if (flags®_DUMP) { + dump(re, stdout); + } + + assert(v->err == 0); + return freev(v, 0); } - + /* - moresubs - enlarge subRE vector ^ static VOID moresubs(struct vars *, int); */ -static VOID -moresubs(v, wanted) -struct vars *v; -int wanted; /* want enough room for this one */ +static void +moresubs( + struct vars *v, + int wanted) /* want enough room for this one */ { - struct subre **p; - size_t n; - - assert(wanted > 0 && (size_t)wanted >= v->nsubs); - n = (size_t)wanted * 3 / 2 + 1; - if (v->subs == v->sub10) { - p = (struct subre **)MALLOC(n * sizeof(struct subre *)); - if (p != NULL) - memcpy(VS(p), VS(v->subs), - v->nsubs * sizeof(struct subre *)); - } else - p = (struct subre **)REALLOC(v->subs, n*sizeof(struct subre *)); - if (p == NULL) { - ERR(REG_ESPACE); - return; + struct subre **p; + size_t n; + + assert(wanted > 0 && (size_t)wanted >= v->nsubs); + n = (size_t)wanted * 3 / 2 + 1; + if (v->subs == v->sub10) { + p = (struct subre **)MALLOC(n * sizeof(struct subre *)); + if (p != NULL) { + memcpy(VS(p), VS(v->subs), v->nsubs * sizeof(struct subre *)); } - v->subs = p; - for (p = &v->subs[v->nsubs]; v->nsubs < n; p++, v->nsubs++) - *p = NULL; - assert(v->nsubs == n); - assert((size_t)wanted < v->nsubs); + } else { + p = (struct subre **)REALLOC(v->subs, n*sizeof(struct subre *)); + } + if (p == NULL) { + ERR(REG_ESPACE); + return; + } + + v->subs = p; + for (p = &v->subs[v->nsubs]; v->nsubs < n; p++, v->nsubs++) { + *p = NULL; + } + assert(v->nsubs == n); + assert((size_t)wanted < v->nsubs); } - + /* - freev - free vars struct's substructures where necessary * Optionally does error-number setting, and always returns error code @@ -476,108 +512,135 @@ int wanted; /* want enough room for this one */ ^ static int freev(struct vars *, int); */ static int -freev(v, err) -struct vars *v; -int err; +freev( + struct vars *v, + int err) { - if (v->re != NULL) - rfree(v->re); - if (v->subs != v->sub10) - FREE(v->subs); - if (v->nfa != NULL) - freenfa(v->nfa); - if (v->tree != NULL) - freesubre(v, v->tree); - if (v->treechain != NULL) - cleanst(v); - if (v->cv != NULL) - freecvec(v->cv); - if (v->cv2 != NULL) - freecvec(v->cv2); - if (v->mcces != NULL) - freecvec(v->mcces); - if (v->lacons != NULL) - freelacons(v->lacons, v->nlacons); - ERR(err); /* nop if err==0 */ - - return v->err; + if (v->re != NULL) { + rfree(v->re); + } + if (v->subs != v->sub10) { + FREE(v->subs); + } + if (v->nfa != NULL) { + freenfa(v->nfa); + } + if (v->tree != NULL) { + freesubre(v, v->tree); + } + if (v->treechain != NULL) { + cleanst(v); + } + if (v->cv != NULL) { + freecvec(v->cv); + } + if (v->cv2 != NULL) { + freecvec(v->cv2); + } + if (v->mcces != NULL) { + freecvec(v->mcces); + } + if (v->lacons != NULL) { + freelacons(v->lacons, v->nlacons); + } + ERR(err); /* nop if err==0 */ + + return v->err; } - + /* - makesearch - turn an NFA into a search NFA (implicit prepend of .*?) * NFA must have been optimize()d already. ^ static VOID makesearch(struct vars *, struct nfa *); */ -static VOID -makesearch(v, nfa) -struct vars *v; -struct nfa *nfa; +static void +makesearch( + struct vars *v, + struct nfa *nfa) { - struct arc *a; - struct arc *b; - struct state *pre = nfa->pre; - struct state *s; - struct state *s2; - struct state *slist; - - /* no loops are needed if it's anchored */ - for (a = pre->outs; a != NULL; a = a->outchain) { - assert(a->type == PLAIN); - if (a->co != nfa->bos[0] && a->co != nfa->bos[1]) - break; + struct arc *a; + struct arc *b; + struct state *pre = nfa->pre; + struct state *s; + struct state *s2; + struct state *slist; + + /* + * No loops are needed if it's anchored. + */ + + for (a = pre->outs; a != NULL; a = a->outchain) { + assert(a->type == PLAIN); + if (a->co != nfa->bos[0] && a->co != nfa->bos[1]) { + break; } - if (a != NULL) { - /* add implicit .* in front */ - rainbow(nfa, v->cm, PLAIN, COLORLESS, pre, pre); + } + if (a != NULL) { + /* + * Add implicit .* in front. + */ - /* and ^* and \A* too -- not always necessary, but harmless */ - newarc(nfa, PLAIN, nfa->bos[0], pre, pre); - newarc(nfa, PLAIN, nfa->bos[1], pre, pre); - } + rainbow(nfa, v->cm, PLAIN, COLORLESS, pre, pre); /* - * Now here's the subtle part. Because many REs have no lookback - * constraints, often knowing when you were in the pre state tells - * you little; it's the next state(s) that are informative. But - * some of them may have other inarcs, i.e. it may be possible to - * make actual progress and then return to one of them. We must - * de-optimize such cases, splitting each such state into progress - * and no-progress states. + * And ^* and \A* too -- not always necessary, but harmless. */ - /* first, make a list of the states */ - slist = NULL; - for (a = pre->outs; a != NULL; a = a->outchain) { - s = a->to; - for (b = s->ins; b != NULL; b = b->inchain) - if (b->from != pre) - break; - if (b != NULL) { /* must be split */ - if (s->tmp == NULL) { /* if not already in the list */ - /* (fixes bugs 505048, 230589, */ - /* 840258, 504785) */ - s->tmp = slist; - slist = s; - } - } + newarc(nfa, PLAIN, nfa->bos[0], pre, pre); + newarc(nfa, PLAIN, nfa->bos[1], pre, pre); + } + + /* + * Now here's the subtle part. Because many REs have no lookback + * constraints, often knowing when you were in the pre state tells you + * little; it's the next state(s) that are informative. But some of them + * may have other inarcs, i.e. it may be possible to make actual progress + * and then return to one of them. We must de-optimize such cases, + * splitting each such state into progress and no-progress states. + */ + + /* + * First, make a list of the states. + */ + + slist = NULL; + for (a = pre->outs; a != NULL; a = a->outchain) { + s = a->to; + for (b = s->ins; b != NULL; b = b->inchain) { + if (b->from != pre) { + break; + } } - - /* do the splits */ - for (s = slist; s != NULL; s = s2) { - s2 = newstate(nfa); - copyouts(nfa, s, s2); - for (a = s->ins; a != NULL; a = b) { - b = a->inchain; - if (a->from != pre) { - cparc(nfa, a, a->from, s2); - freearc(nfa, a); - } - } - s2 = s->tmp; - s->tmp = NULL; /* clean up while we're at it */ + if (b != NULL && s->tmp == NULL) { + /* + * Must be split if not already in the list (fixes bugs 505048, + * 230589, 840258, 504785). + */ + + s->tmp = slist; + slist = s; + } + } + + /* + * Do the splits. + */ + + for (s = slist; s != NULL; s = s2) { + s2 = newstate(nfa); + copyouts(nfa, s, s2); + for (a = s->ins; a != NULL; a = b) { + b = a->inchain; + if (a->from != pre) { + cparc(nfa, a, a->from, s2); + freearc(nfa, a); + } } + s2 = s->tmp; + s->tmp = NULL; /* clean up while we're at it */ + } } - + /* - parse - parse an RE * This is actually just the top level, which parses a bunch of branches @@ -587,72 +650,80 @@ struct nfa *nfa; ^ struct state *); */ static struct subre * -parse(v, stopper, type, init, final) -struct vars *v; -int stopper; /* EOS or ')' */ -int type; /* LACON (lookahead subRE) or PLAIN */ -struct state *init; /* initial state */ -struct state *final; /* final state */ +parse( + struct vars *v, + int stopper, /* EOS or ')' */ + int type, /* LACON (lookahead subRE) or PLAIN */ + struct state *init, /* initial state */ + struct state *final) /* final state */ { - struct state *left; /* scaffolding for branch */ - struct state *right; - struct subre *branches; /* top level */ - struct subre *branch; /* current branch */ - struct subre *t; /* temporary */ - int firstbranch; /* is this the first branch? */ - - assert(stopper == ')' || stopper == EOS); - - branches = subre(v, '|', LONGER, init, final); - NOERRN(); - branch = branches; - firstbranch = 1; - do { /* a branch */ - if (!firstbranch) { - /* need a place to hang it */ - branch->right = subre(v, '|', LONGER, init, final); - NOERRN(); - branch = branch->right; - } - firstbranch = 0; - left = newstate(v->nfa); - right = newstate(v->nfa); - NOERRN(); - EMPTYARC(init, left); - EMPTYARC(right, final); - NOERRN(); - branch->left = parsebranch(v, stopper, type, left, right, 0); - NOERRN(); - branch->flags |= UP(branch->flags | branch->left->flags); - if ((branch->flags &~ branches->flags) != 0) /* new flags */ - for (t = branches; t != branch; t = t->right) - t->flags |= branch->flags; - } while (EAT('|')); - assert(SEE(stopper) || SEE(EOS)); - - if (!SEE(stopper)) { - assert(stopper == ')' && SEE(EOS)); - ERR(REG_EPAREN); + struct state *left; /* scaffolding for branch */ + struct state *right; + struct subre *branches; /* top level */ + struct subre *branch; /* current branch */ + struct subre *t; /* temporary */ + int firstbranch; /* is this the first branch? */ + + assert(stopper == ')' || stopper == EOS); + + branches = subre(v, '|', LONGER, init, final); + NOERRN(); + branch = branches; + firstbranch = 1; + do { /* a branch */ + if (!firstbranch) { + /* + * Need a place to hang the branch. + */ + + branch->right = subre(v, '|', LONGER, init, final); + NOERRN(); + branch = branch->right; } - - /* optimize out simple cases */ - if (branch == branches) { /* only one branch */ - assert(branch->right == NULL); - t = branch->left; - branch->left = NULL; - freesubre(v, branches); - branches = t; - } else if (!MESSY(branches->flags)) { /* no interesting innards */ - freesubre(v, branches->left); - branches->left = NULL; - freesubre(v, branches->right); - branches->right = NULL; - branches->op = '='; + firstbranch = 0; + left = newstate(v->nfa); + right = newstate(v->nfa); + NOERRN(); + EMPTYARC(init, left); + EMPTYARC(right, final); + NOERRN(); + branch->left = parsebranch(v, stopper, type, left, right, 0); + NOERRN(); + branch->flags |= UP(branch->flags | branch->left->flags); + if ((branch->flags &~ branches->flags) != 0) { /* new flags */ + for (t = branches; t != branch; t = t->right) { + t->flags |= branch->flags; + } } - - return branches; + } while (EAT('|')); + assert(SEE(stopper) || SEE(EOS)); + + if (!SEE(stopper)) { + assert(stopper == ')' && SEE(EOS)); + ERR(REG_EPAREN); + } + + /* + * Optimize out simple cases. + */ + + if (branch == branches) { /* only one branch */ + assert(branch->right == NULL); + t = branch->left; + branch->left = NULL; + freesubre(v, branches); + branches = t; + } else if (!MESSY(branches->flags)) { /* no interesting innards */ + freesubre(v, branches->left); + branches->left = NULL; + freesubre(v, branches->right); + branches->right = NULL; + branches->op = '='; + } + + return branches; } - + /* - parsebranch - parse one branch of an RE * This mostly manages concatenation, working closely with parseqatom(). @@ -662,503 +733,601 @@ struct state *final; /* final state */ ^ struct state *, int); */ static struct subre * -parsebranch(v, stopper, type, left, right, partial) -struct vars *v; -int stopper; /* EOS or ')' */ -int type; /* LACON (lookahead subRE) or PLAIN */ -struct state *left; /* leftmost state */ -struct state *right; /* rightmost state */ -int partial; /* is this only part of a branch? */ +parsebranch( + struct vars *v, + int stopper, /* EOS or ')' */ + int type, /* LACON (lookahead subRE) or PLAIN */ + struct state *left, /* leftmost state */ + struct state *right, /* rightmost state */ + int partial) /* is this only part of a branch? */ { - struct state *lp; /* left end of current construct */ - int seencontent; /* is there anything in this branch yet? */ - struct subre *t; - - lp = left; - seencontent = 0; - t = subre(v, '=', 0, left, right); /* op '=' is tentative */ - NOERRN(); - while (!SEE('|') && !SEE(stopper) && !SEE(EOS)) { - if (seencontent) { /* implicit concat operator */ - lp = newstate(v->nfa); - NOERRN(); - moveins(v->nfa, right, lp); - } - seencontent = 1; - - /* NB, recursion in parseqatom() may swallow rest of branch */ - parseqatom(v, stopper, type, lp, right, t); + struct state *lp; /* left end of current construct */ + int seencontent; /* is there anything in this branch yet? */ + struct subre *t; + + lp = left; + seencontent = 0; + t = subre(v, '=', 0, left, right); /* op '=' is tentative */ + NOERRN(); + while (!SEE('|') && !SEE(stopper) && !SEE(EOS)) { + if (seencontent) { /* implicit concat operator */ + lp = newstate(v->nfa); + NOERRN(); + moveins(v->nfa, right, lp); } + seencontent = 1; - if (!seencontent) { /* empty branch */ - if (!partial) - NOTE(REG_UUNSPEC); - assert(lp == left); - EMPTYARC(left, right); + /* NB, recursion in parseqatom() may swallow rest of branch */ + parseqatom(v, stopper, type, lp, right, t); + } + + if (!seencontent) { /* empty branch */ + if (!partial) { + NOTE(REG_UUNSPEC); } + assert(lp == left); + EMPTYARC(left, right); + } - return t; + return t; } - + /* - parseqatom - parse one quantified atom or constraint of an RE - * The bookkeeping near the end cooperates very closely with parsebranch(); - * in particular, it contains a recursion that can involve parsing the rest - * of the branch, making this function's name somewhat inaccurate. + * The bookkeeping near the end cooperates very closely with parsebranch(); in + * particular, it contains a recursion that can involve parsing the rest of + * the branch, making this function's name somewhat inaccurate. ^ static VOID parseqatom(struct vars *, int, int, struct state *, ^ struct state *, struct subre *); */ -static VOID -parseqatom(v, stopper, type, lp, rp, top) -struct vars *v; -int stopper; /* EOS or ')' */ -int type; /* LACON (lookahead subRE) or PLAIN */ -struct state *lp; /* left state to hang it on */ -struct state *rp; /* right state to hang it on */ -struct subre *top; /* subtree top */ +static void +parseqatom( + struct vars *v, + int stopper, /* EOS or ')' */ + int type, /* LACON (lookahead subRE) or PLAIN */ + struct state *lp, /* left state to hang it on */ + struct state *rp, /* right state to hang it on */ + struct subre *top) /* subtree top */ { - struct state *s; /* temporaries for new states */ - struct state *s2; -# define ARCV(t, val) newarc(v->nfa, t, val, lp, rp) - int m, n; - struct subre *atom; /* atom's subtree */ - struct subre *t; - int cap; /* capturing parens? */ - int pos; /* positive lookahead? */ - int subno; /* capturing-parens or backref number */ - int atomtype; - int qprefer; /* quantifier short/long preference */ - int f; - struct subre **atomp; /* where the pointer to atom is */ - - /* initial bookkeeping */ - atom = NULL; - assert(lp->nouts == 0); /* must string new code */ - assert(rp->nins == 0); /* between lp and rp */ - subno = 0; /* just to shut lint up */ - - /* an atom or constraint... */ - atomtype = v->nexttype; - switch (atomtype) { + struct state *s; /* temporaries for new states */ + struct state *s2; +#define ARCV(t, val) newarc(v->nfa, t, val, lp, rp) + int m, n; + struct subre *atom; /* atom's subtree */ + struct subre *t; + int cap; /* capturing parens? */ + int pos; /* positive lookahead? */ + int subno; /* capturing-parens or backref number */ + int atomtype; + int qprefer; /* quantifier short/long preference */ + int f; + struct subre **atomp; /* where the pointer to atom is */ + + /* + * Initial bookkeeping. + */ + + atom = NULL; + assert(lp->nouts == 0); /* must string new code */ + assert(rp->nins == 0); /* between lp and rp */ + subno = 0; /* just to shut lint up */ + + /* + * An atom or constraint... + */ + + atomtype = v->nexttype; + switch (atomtype) { /* first, constraints, which end by returning */ - case '^': - ARCV('^', 1); - if (v->cflags®_NLANCH) - ARCV(BEHIND, v->nlcolor); - NEXT(); - return; - break; - case '$': - ARCV('$', 1); - if (v->cflags®_NLANCH) - ARCV(AHEAD, v->nlcolor); - NEXT(); - return; - break; - case SBEGIN: - ARCV('^', 1); /* BOL */ - ARCV('^', 0); /* or BOS */ - NEXT(); - return; - break; - case SEND: - ARCV('$', 1); /* EOL */ - ARCV('$', 0); /* or EOS */ - NEXT(); - return; - break; - case '<': - wordchrs(v); /* does NEXT() */ - s = newstate(v->nfa); - NOERR(); - nonword(v, BEHIND, lp, s); - word(v, AHEAD, s, rp); - return; - break; - case '>': - wordchrs(v); /* does NEXT() */ - s = newstate(v->nfa); - NOERR(); - word(v, BEHIND, lp, s); - nonword(v, AHEAD, s, rp); - return; - break; - case WBDRY: - wordchrs(v); /* does NEXT() */ - s = newstate(v->nfa); - NOERR(); - nonword(v, BEHIND, lp, s); - word(v, AHEAD, s, rp); - s = newstate(v->nfa); - NOERR(); - word(v, BEHIND, lp, s); - nonword(v, AHEAD, s, rp); - return; - break; - case NWBDRY: - wordchrs(v); /* does NEXT() */ - s = newstate(v->nfa); - NOERR(); - word(v, BEHIND, lp, s); - word(v, AHEAD, s, rp); - s = newstate(v->nfa); - NOERR(); - nonword(v, BEHIND, lp, s); - nonword(v, AHEAD, s, rp); - return; - break; - case LACON: /* lookahead constraint */ - pos = v->nextvalue; - NEXT(); - s = newstate(v->nfa); - s2 = newstate(v->nfa); - NOERR(); - t = parse(v, ')', LACON, s, s2); - freesubre(v, t); /* internal structure irrelevant */ - assert(SEE(')') || ISERR()); - NEXT(); - n = newlacon(v, s, s2, pos); - NOERR(); - ARCV(LACON, n); - return; - break; - /* then errors, to get them out of the way */ - case '*': - case '+': - case '?': - case '{': - ERR(REG_BADRPT); - return; - break; - default: - ERR(REG_ASSERT); - return; - break; - /* then plain characters, and minor variants on that theme */ - case ')': /* unbalanced paren */ - if ((v->cflags®_ADVANCED) != REG_EXTENDED) { - ERR(REG_EPAREN); - return; - } - /* legal in EREs due to specification botch */ - NOTE(REG_UPBOTCH); - /* fallthrough into case PLAIN */ - case PLAIN: - onechr(v, v->nextvalue, lp, rp); - okcolors(v->nfa, v->cm); - NOERR(); - NEXT(); - break; - case '[': - if (v->nextvalue == 1) - bracket(v, lp, rp); - else - cbracket(v, lp, rp); - assert(SEE(']') || ISERR()); - NEXT(); - break; - case '.': - rainbow(v->nfa, v->cm, PLAIN, - (v->cflags®_NLSTOP) ? v->nlcolor : COLORLESS, - lp, rp); - NEXT(); - break; - /* and finally the ugly stuff */ - case '(': /* value flags as capturing or non */ - cap = (type == LACON) ? 0 : v->nextvalue; - if (cap) { - v->nsubexp++; - subno = v->nsubexp; - if ((size_t)subno >= v->nsubs) - moresubs(v, subno); - assert((size_t)subno < v->nsubs); - } else - atomtype = PLAIN; /* something that's not '(' */ - NEXT(); - /* need new endpoints because tree will contain pointers */ - s = newstate(v->nfa); - s2 = newstate(v->nfa); - NOERR(); - EMPTYARC(lp, s); - EMPTYARC(s2, rp); - NOERR(); - atom = parse(v, ')', PLAIN, s, s2); - assert(SEE(')') || ISERR()); - NEXT(); - NOERR(); - if (cap) { - v->subs[subno] = atom; - t = subre(v, '(', atom->flags|CAP, lp, rp); - NOERR(); - t->subno = subno; - t->left = atom; - atom = t; - } - /* postpone everything else pending possible {0} */ - break; - case BACKREF: /* the Feature From The Black Lagoon */ - INSIST(type != LACON, REG_ESUBREG); - INSIST(v->nextvalue < v->nsubs, REG_ESUBREG); - INSIST(v->subs[v->nextvalue] != NULL, REG_ESUBREG); - NOERR(); - assert(v->nextvalue > 0); - atom = subre(v, 'b', BACKR, lp, rp); - subno = v->nextvalue; - atom->subno = subno; - EMPTYARC(lp, rp); /* temporarily, so there's something */ - NEXT(); - break; + case '^': + ARCV('^', 1); + if (v->cflags®_NLANCH) { + ARCV(BEHIND, v->nlcolor); } - - /* ...and an atom may be followed by a quantifier */ - switch (v->nexttype) { - case '*': - m = 0; - n = INFINITY; - qprefer = (v->nextvalue) ? LONGER : SHORTER; - NEXT(); - break; - case '+': - m = 1; - n = INFINITY; - qprefer = (v->nextvalue) ? LONGER : SHORTER; - NEXT(); - break; - case '?': - m = 0; - n = 1; - qprefer = (v->nextvalue) ? LONGER : SHORTER; - NEXT(); - break; - case '{': - NEXT(); - m = scannum(v); - if (EAT(',')) { - if (SEE(DIGIT)) - n = scannum(v); - else - n = INFINITY; - if (m > n) { - ERR(REG_BADBR); - return; - } - /* {m,n} exercises preference, even if it's {m,m} */ - qprefer = (v->nextvalue) ? LONGER : SHORTER; - } else { - n = m; - /* {m} passes operand's preference through */ - qprefer = 0; - } - if (!SEE('}')) { /* catches errors too */ - ERR(REG_BADBR); - return; - } - NEXT(); - break; - default: /* no quantifier */ - m = n = 1; - qprefer = 0; - break; + NEXT(); + return; + break; + case '$': + ARCV('$', 1); + if (v->cflags®_NLANCH) { + ARCV(AHEAD, v->nlcolor); } + NEXT(); + return; + break; + case SBEGIN: + ARCV('^', 1); /* BOL */ + ARCV('^', 0); /* or BOS */ + NEXT(); + return; + break; + case SEND: + ARCV('$', 1); /* EOL */ + ARCV('$', 0); /* or EOS */ + NEXT(); + return; + break; + case '<': + wordchrs(v); /* does NEXT() */ + s = newstate(v->nfa); + NOERR(); + nonword(v, BEHIND, lp, s); + word(v, AHEAD, s, rp); + return; + break; + case '>': + wordchrs(v); /* does NEXT() */ + s = newstate(v->nfa); + NOERR(); + word(v, BEHIND, lp, s); + nonword(v, AHEAD, s, rp); + return; + break; + case WBDRY: + wordchrs(v); /* does NEXT() */ + s = newstate(v->nfa); + NOERR(); + nonword(v, BEHIND, lp, s); + word(v, AHEAD, s, rp); + s = newstate(v->nfa); + NOERR(); + word(v, BEHIND, lp, s); + nonword(v, AHEAD, s, rp); + return; + break; + case NWBDRY: + wordchrs(v); /* does NEXT() */ + s = newstate(v->nfa); + NOERR(); + word(v, BEHIND, lp, s); + word(v, AHEAD, s, rp); + s = newstate(v->nfa); + NOERR(); + nonword(v, BEHIND, lp, s); + nonword(v, AHEAD, s, rp); + return; + break; + case LACON: /* lookahead constraint */ + pos = v->nextvalue; + NEXT(); + s = newstate(v->nfa); + s2 = newstate(v->nfa); + NOERR(); + t = parse(v, ')', LACON, s, s2); + freesubre(v, t); /* internal structure irrelevant */ + assert(SEE(')') || ISERR()); + NEXT(); + n = newlacon(v, s, s2, pos); + NOERR(); + ARCV(LACON, n); + return; + break; - /* annoying special case: {0} or {0,0} cancels everything */ - if (m == 0 && n == 0) { - if (atom != NULL) - freesubre(v, atom); - if (atomtype == '(') - v->subs[subno] = NULL; - delsub(v->nfa, lp, rp); - EMPTYARC(lp, rp); - return; + /* + * Then errors, to get them out of the way. + */ + + case '*': + case '+': + case '?': + case '{': + ERR(REG_BADRPT); + return; + break; + default: + ERR(REG_ASSERT); + return; + break; + + /* + * Then plain characters, and minor variants on that theme. + */ + + case ')': /* unbalanced paren */ + if ((v->cflags®_ADVANCED) != REG_EXTENDED) { + ERR(REG_EPAREN); + return; } - /* if not a messy case, avoid hard part */ - assert(!MESSY(top->flags)); - f = top->flags | qprefer | ((atom != NULL) ? atom->flags : 0); - if (atomtype != '(' && atomtype != BACKREF && !MESSY(UP(f))) { - if (!(m == 1 && n == 1)) - repeat(v, lp, rp, m, n); - if (atom != NULL) - freesubre(v, atom); - top->flags = f; - return; + /* + * Legal in EREs due to specification botch. + */ + + NOTE(REG_UPBOTCH); + /* fallthrough into case PLAIN */ + case PLAIN: + onechr(v, v->nextvalue, lp, rp); + okcolors(v->nfa, v->cm); + NOERR(); + NEXT(); + break; + case '[': + if (v->nextvalue == 1) { + bracket(v, lp, rp); + } else { + cbracket(v, lp, rp); } + assert(SEE(']') || ISERR()); + NEXT(); + break; + case '.': + rainbow(v->nfa, v->cm, PLAIN, + (v->cflags®_NLSTOP) ? v->nlcolor : COLORLESS, lp, rp); + NEXT(); + break; /* - * hard part: something messy - * That is, capturing parens, back reference, short/long clash, or - * an atom with substructure containing one of those. + * And finally the ugly stuff. */ - /* now we'll need a subre for the contents even if they're boring */ - if (atom == NULL) { - atom = subre(v, '=', 0, lp, rp); - NOERR(); + case '(': /* value flags as capturing or non */ + cap = (type == LACON) ? 0 : v->nextvalue; + if (cap) { + v->nsubexp++; + subno = v->nsubexp; + if ((size_t)subno >= v->nsubs) { + moresubs(v, subno); + } + assert((size_t)subno < v->nsubs); + } else { + atomtype = PLAIN; /* something that's not '(' */ } + NEXT(); /* - * prepare a general-purpose state skeleton - * - * ---> [s] ---prefix---> [begin] ---atom---> [end] ----rest---> [rp] - * / / - * [lp] ----> [s2] ----bypass--------------------- - * - * where bypass is an empty, and prefix is some repetitions of atom + * Need new endpoints because tree will contain pointers. */ - s = newstate(v->nfa); /* first, new endpoints for the atom */ + + s = newstate(v->nfa); s2 = newstate(v->nfa); NOERR(); - moveouts(v->nfa, lp, s); - moveins(v->nfa, rp, s2); + EMPTYARC(lp, s); + EMPTYARC(s2, rp); NOERR(); - atom->begin = s; - atom->end = s2; - s = newstate(v->nfa); /* and spots for prefix and bypass */ - s2 = newstate(v->nfa); + atom = parse(v, ')', PLAIN, s, s2); + assert(SEE(')') || ISERR()); + NEXT(); NOERR(); - EMPTYARC(lp, s); - EMPTYARC(lp, s2); + if (cap) { + v->subs[subno] = atom; + t = subre(v, '(', atom->flags|CAP, lp, rp); + NOERR(); + t->subno = subno; + t->left = atom; + atom = t; + } + + /* + * Postpone everything else pending possible {0}. + */ + + break; + case BACKREF: /* the Feature From The Black Lagoon */ + INSIST(type != LACON, REG_ESUBREG); + INSIST(v->nextvalue < v->nsubs, REG_ESUBREG); + INSIST(v->subs[v->nextvalue] != NULL, REG_ESUBREG); + NOERR(); + assert(v->nextvalue > 0); + atom = subre(v, 'b', BACKR, lp, rp); + subno = v->nextvalue; + atom->subno = subno; + EMPTYARC(lp, rp); /* temporarily, so there's something */ + NEXT(); + break; + } + + /* + * ...and an atom may be followed by a quantifier. + */ + + switch (v->nexttype) { + case '*': + m = 0; + n = INFINITY; + qprefer = (v->nextvalue) ? LONGER : SHORTER; + NEXT(); + break; + case '+': + m = 1; + n = INFINITY; + qprefer = (v->nextvalue) ? LONGER : SHORTER; + NEXT(); + break; + case '?': + m = 0; + n = 1; + qprefer = (v->nextvalue) ? LONGER : SHORTER; + NEXT(); + break; + case '{': + NEXT(); + m = scannum(v); + if (EAT(',')) { + if (SEE(DIGIT)) { + n = scannum(v); + } else { + n = INFINITY; + } + if (m > n) { + ERR(REG_BADBR); + return; + } + + /* + * {m,n} exercises preference, even if it's {m,m} + */ + + qprefer = (v->nextvalue) ? LONGER : SHORTER; + } else { + n = m; + /* + * {m} passes operand's preference through. + */ + + qprefer = 0; + } + if (!SEE('}')) { /* catches errors too */ + ERR(REG_BADBR); + return; + } + NEXT(); + break; + default: /* no quantifier */ + m = n = 1; + qprefer = 0; + break; + } + + /* + * Annoying special case: {0} or {0,0} cancels everything. + */ + + if (m == 0 && n == 0) { + if (atom != NULL) { + freesubre(v, atom); + } + if (atomtype == '(') { + v->subs[subno] = NULL; + } + delsub(v->nfa, lp, rp); + EMPTYARC(lp, rp); + return; + } + + /* + * If not a messy case, avoid hard part. + */ + + assert(!MESSY(top->flags)); + f = top->flags | qprefer | ((atom != NULL) ? atom->flags : 0); + if (atomtype != '(' && atomtype != BACKREF && !MESSY(UP(f))) { + if (!(m == 1 && n == 1)) { + repeat(v, lp, rp, m, n); + } + if (atom != NULL) { + freesubre(v, atom); + } + top->flags = f; + return; + } + + /* + * hard part: something messy + * That is, capturing parens, back reference, short/long clash, or an atom + * with substructure containing one of those. + */ + + /* + * Now we'll need a subre for the contents even if they're boring. + */ + + if (atom == NULL) { + atom = subre(v, '=', 0, lp, rp); + NOERR(); + } + + /* + * prepare a general-purpose state skeleton + * + * ---> [s] ---prefix---> [begin] ---atom---> [end] ----rest---> [rp] + * / / + * [lp] ----> [s2] ----bypass--------------------- + * + * where bypass is an empty, and prefix is some repetitions of atom + */ + + s = newstate(v->nfa); /* first, new endpoints for the atom */ + s2 = newstate(v->nfa); + NOERR(); + moveouts(v->nfa, lp, s); + moveins(v->nfa, rp, s2); + NOERR(); + atom->begin = s; + atom->end = s2; + s = newstate(v->nfa); /* and spots for prefix and bypass */ + s2 = newstate(v->nfa); + NOERR(); + EMPTYARC(lp, s); + EMPTYARC(lp, s2); + NOERR(); + + /* + * Break remaining subRE into x{...} and what follows. + */ + + t = subre(v, '.', COMBINE(qprefer, atom->flags), lp, rp); + t->left = atom; + atomp = &t->left; + + /* + * Here we should recurse... but we must postpone that to the end. + */ + + /* + * Split top into prefix and remaining. + */ + + assert(top->op == '=' && top->left == NULL && top->right == NULL); + top->left = subre(v, '=', top->flags, top->begin, lp); + top->op = '.'; + top->right = t; + + /* + * If it's a backref, now is the time to replicate the subNFA. + */ + + if (atomtype == BACKREF) { + assert(atom->begin->nouts == 1); /* just the EMPTY */ + delsub(v->nfa, atom->begin, atom->end); + assert(v->subs[subno] != NULL); + + /* + * And here's why the recursion got postponed: it must wait until the + * skeleton is filled in, because it may hit a backref that wants to + * copy the filled-in skeleton. + */ + + dupnfa(v->nfa, v->subs[subno]->begin, v->subs[subno]->end, + atom->begin, atom->end); NOERR(); + } + + /* + * It's quantifier time; first, turn x{0,...} into x{1,...}|empty + */ - /* break remaining subRE into x{...} and what follows */ - t = subre(v, '.', COMBINE(qprefer, atom->flags), lp, rp); + if (m == 0) { + EMPTYARC(s2, atom->end);/* the bypass */ + assert(PREF(qprefer) != 0); + f = COMBINE(qprefer, atom->flags); + t = subre(v, '|', f, lp, atom->end); + NOERR(); t->left = atom; + t->right = subre(v, '|', PREF(f), s2, atom->end); + NOERR(); + t->right->left = subre(v, '=', 0, s2, atom->end); + NOERR(); + *atomp = t; atomp = &t->left; - /* here we should recurse... but we must postpone that to the end */ - - /* split top into prefix and remaining */ - assert(top->op == '=' && top->left == NULL && top->right == NULL); - top->left = subre(v, '=', top->flags, top->begin, lp); - top->op = '.'; - top->right = t; - - /* if it's a backref, now is the time to replicate the subNFA */ - if (atomtype == BACKREF) { - assert(atom->begin->nouts == 1); /* just the EMPTY */ - delsub(v->nfa, atom->begin, atom->end); - assert(v->subs[subno] != NULL); - /* and here's why the recursion got postponed: it must */ - /* wait until the skeleton is filled in, because it may */ - /* hit a backref that wants to copy the filled-in skeleton */ - dupnfa(v->nfa, v->subs[subno]->begin, v->subs[subno]->end, - atom->begin, atom->end); - NOERR(); - } + m = 1; + } - /* it's quantifier time; first, turn x{0,...} into x{1,...}|empty */ - if (m == 0) { - EMPTYARC(s2, atom->end); /* the bypass */ - assert(PREF(qprefer) != 0); - f = COMBINE(qprefer, atom->flags); - t = subre(v, '|', f, lp, atom->end); - NOERR(); - t->left = atom; - t->right = subre(v, '|', PREF(f), s2, atom->end); - NOERR(); - t->right->left = subre(v, '=', 0, s2, atom->end); - NOERR(); - *atomp = t; - atomp = &t->left; - m = 1; - } + /* + * Deal with the rest of the quantifier. + */ - /* deal with the rest of the quantifier */ - if (atomtype == BACKREF) { - /* special case: backrefs have internal quantifiers */ - EMPTYARC(s, atom->begin); /* empty prefix */ - /* just stuff everything into atom */ - repeat(v, atom->begin, atom->end, m, n); - atom->min = (short)m; - atom->max = (short)n; - atom->flags |= COMBINE(qprefer, atom->flags); - } else if (m == 1 && n == 1) { - /* no/vacuous quantifier: done */ - EMPTYARC(s, atom->begin); /* empty prefix */ - } else { - /* turn x{m,n} into x{m-1,n-1}x, with capturing */ - /* parens in only second x */ - dupnfa(v->nfa, atom->begin, atom->end, s, atom->begin); - assert(m >= 1 && m != INFINITY && n >= 1); - repeat(v, s, atom->begin, m-1, (n == INFINITY) ? n : n-1); - f = COMBINE(qprefer, atom->flags); - t = subre(v, '.', f, s, atom->end); /* prefix and atom */ - NOERR(); - t->left = subre(v, '=', PREF(f), s, atom->begin); - NOERR(); - t->right = atom; - *atomp = t; - } + if (atomtype == BACKREF) { + /* + * Special case: backrefs have internal quantifiers. + */ - /* and finally, look after that postponed recursion */ - t = top->right; - if (!(SEE('|') || SEE(stopper) || SEE(EOS))) - t->right = parsebranch(v, stopper, type, atom->end, rp, 1); - else { - EMPTYARC(atom->end, rp); - t->right = subre(v, '=', 0, atom->end, rp); - } - assert(SEE('|') || SEE(stopper) || SEE(EOS)); - t->flags |= COMBINE(t->flags, t->right->flags); - top->flags |= COMBINE(top->flags, t->flags); -} + EMPTYARC(s, atom->begin); /* empty prefix */ + + /* + * Just stuff everything into atom. + */ + + repeat(v, atom->begin, atom->end, m, n); + atom->min = (short)m; + atom->max = (short)n; + atom->flags |= COMBINE(qprefer, atom->flags); + } else if (m == 1 && n == 1) { + /* + * No/vacuous quantifier: done. + */ + + EMPTYARC(s, atom->begin); /* empty prefix */ + } else { + /* + * Turn x{m,n} into x{m-1,n-1}x, with capturing parens in only second + * x + */ + dupnfa(v->nfa, atom->begin, atom->end, s, atom->begin); + assert(m >= 1 && m != INFINITY && n >= 1); + repeat(v, s, atom->begin, m-1, (n == INFINITY) ? n : n-1); + f = COMBINE(qprefer, atom->flags); + t = subre(v, '.', f, s, atom->end); /* prefix and atom */ + NOERR(); + t->left = subre(v, '=', PREF(f), s, atom->begin); + NOERR(); + t->right = atom; + *atomp = t; + } + + /* + * And finally, look after that postponed recursion. + */ + + t = top->right; + if (!(SEE('|') || SEE(stopper) || SEE(EOS))) { + t->right = parsebranch(v, stopper, type, atom->end, rp, 1); + } else { + EMPTYARC(atom->end, rp); + t->right = subre(v, '=', 0, atom->end, rp); + } + assert(SEE('|') || SEE(stopper) || SEE(EOS)); + t->flags |= COMBINE(t->flags, t->right->flags); + top->flags |= COMBINE(top->flags, t->flags); +} + /* - nonword - generate arcs for non-word-character ahead or behind ^ static VOID nonword(struct vars *, int, struct state *, struct state *); */ -static VOID -nonword(v, dir, lp, rp) -struct vars *v; -int dir; /* AHEAD or BEHIND */ -struct state *lp; -struct state *rp; +static void +nonword( + struct vars *v, + int dir, /* AHEAD or BEHIND */ + struct state *lp, + struct state *rp) { - int anchor = (dir == AHEAD) ? '$' : '^'; + int anchor = (dir == AHEAD) ? '$' : '^'; - assert(dir == AHEAD || dir == BEHIND); - newarc(v->nfa, anchor, 1, lp, rp); - newarc(v->nfa, anchor, 0, lp, rp); - colorcomplement(v->nfa, v->cm, dir, v->wordchrs, lp, rp); - /* (no need for special attention to \n) */ + assert(dir == AHEAD || dir == BEHIND); + newarc(v->nfa, anchor, 1, lp, rp); + newarc(v->nfa, anchor, 0, lp, rp); + colorcomplement(v->nfa, v->cm, dir, v->wordchrs, lp, rp); + /* (no need for special attention to \n) */ } - + /* - word - generate arcs for word character ahead or behind ^ static VOID word(struct vars *, int, struct state *, struct state *); */ -static VOID -word(v, dir, lp, rp) -struct vars *v; -int dir; /* AHEAD or BEHIND */ -struct state *lp; -struct state *rp; +static void +word( + struct vars *v, + int dir, /* AHEAD or BEHIND */ + struct state *lp, + struct state *rp) { - assert(dir == AHEAD || dir == BEHIND); - cloneouts(v->nfa, v->wordchrs, lp, rp, dir); - /* (no need for special attention to \n) */ + assert(dir == AHEAD || dir == BEHIND); + cloneouts(v->nfa, v->wordchrs, lp, rp, dir); + /* (no need for special attention to \n) */ } - + /* - scannum - scan a number ^ static int scannum(struct vars *); */ static int /* value, <= DUPMAX */ -scannum(v) -struct vars *v; +scannum( + struct vars *v) { - int n = 0; + int n = 0; - while (SEE(DIGIT) && n < DUPMAX) { - n = n*10 + v->nextvalue; - NEXT(); - } - if (SEE(DIGIT) || n > DUPMAX) { - ERR(REG_BADBR); - return 0; - } - return n; + while (SEE(DIGIT) && n < DUPMAX) { + n = n*10 + v->nextvalue; + NEXT(); + } + if (SEE(DIGIT) || n > DUPMAX) { + ERR(REG_BADBR); + return 0; + } + return n; } - + /* - repeat - replicate subNFA for quantifiers * The duplication sequences used here are chosen carefully so that any @@ -1169,1007 +1338,1102 @@ struct vars *v; * code in parse(), and when this is called, it doesn't matter any more. ^ static VOID repeat(struct vars *, struct state *, struct state *, int, int); */ -static VOID -repeat(v, lp, rp, m, n) -struct vars *v; -struct state *lp; -struct state *rp; -int m; -int n; +static void +repeat( + struct vars *v, + struct state *lp, + struct state *rp, + int m, + int n) { -# define SOME 2 -# define INF 3 -# define PAIR(x, y) ((x)*4 + (y)) -# define REDUCE(x) ( ((x) == INFINITY) ? INF : (((x) > 1) ? SOME : (x)) ) - CONST int rm = REDUCE(m); - CONST int rn = REDUCE(n); - struct state *s; - struct state *s2; - - switch (PAIR(rm, rn)) { - case PAIR(0, 0): /* empty string */ - delsub(v->nfa, lp, rp); - EMPTYARC(lp, rp); - break; - case PAIR(0, 1): /* do as x| */ - EMPTYARC(lp, rp); - break; - case PAIR(0, SOME): /* do as x{1,n}| */ - repeat(v, lp, rp, 1, n); - NOERR(); - EMPTYARC(lp, rp); - break; - case PAIR(0, INF): /* loop x around */ - s = newstate(v->nfa); - NOERR(); - moveouts(v->nfa, lp, s); - moveins(v->nfa, rp, s); - EMPTYARC(lp, s); - EMPTYARC(s, rp); - break; - case PAIR(1, 1): /* no action required */ - break; - case PAIR(1, SOME): /* do as x{0,n-1}x = (x{1,n-1}|)x */ - s = newstate(v->nfa); - NOERR(); - moveouts(v->nfa, lp, s); - dupnfa(v->nfa, s, rp, lp, s); - NOERR(); - repeat(v, lp, s, 1, n-1); - NOERR(); - EMPTYARC(lp, s); - break; - case PAIR(1, INF): /* add loopback arc */ - s = newstate(v->nfa); - s2 = newstate(v->nfa); - NOERR(); - moveouts(v->nfa, lp, s); - moveins(v->nfa, rp, s2); - EMPTYARC(lp, s); - EMPTYARC(s2, rp); - EMPTYARC(s2, s); - break; - case PAIR(SOME, SOME): /* do as x{m-1,n-1}x */ - s = newstate(v->nfa); - NOERR(); - moveouts(v->nfa, lp, s); - dupnfa(v->nfa, s, rp, lp, s); - NOERR(); - repeat(v, lp, s, m-1, n-1); - break; - case PAIR(SOME, INF): /* do as x{m-1,}x */ - s = newstate(v->nfa); - NOERR(); - moveouts(v->nfa, lp, s); - dupnfa(v->nfa, s, rp, lp, s); - NOERR(); - repeat(v, lp, s, m-1, n); - break; - default: - ERR(REG_ASSERT); - break; - } +#define SOME 2 +#define INF 3 +#define PAIR(x, y) ((x)*4 + (y)) +#define REDUCE(x) ( ((x) == INFINITY) ? INF : (((x) > 1) ? SOME : (x)) ) + CONST int rm = REDUCE(m); + CONST int rn = REDUCE(n); + struct state *s; + struct state *s2; + + switch (PAIR(rm, rn)) { + case PAIR(0, 0): /* empty string */ + delsub(v->nfa, lp, rp); + EMPTYARC(lp, rp); + break; + case PAIR(0, 1): /* do as x| */ + EMPTYARC(lp, rp); + break; + case PAIR(0, SOME): /* do as x{1,n}| */ + repeat(v, lp, rp, 1, n); + NOERR(); + EMPTYARC(lp, rp); + break; + case PAIR(0, INF): /* loop x around */ + s = newstate(v->nfa); + NOERR(); + moveouts(v->nfa, lp, s); + moveins(v->nfa, rp, s); + EMPTYARC(lp, s); + EMPTYARC(s, rp); + break; + case PAIR(1, 1): /* no action required */ + break; + case PAIR(1, SOME): /* do as x{0,n-1}x = (x{1,n-1}|)x */ + s = newstate(v->nfa); + NOERR(); + moveouts(v->nfa, lp, s); + dupnfa(v->nfa, s, rp, lp, s); + NOERR(); + repeat(v, lp, s, 1, n-1); + NOERR(); + EMPTYARC(lp, s); + break; + case PAIR(1, INF): /* add loopback arc */ + s = newstate(v->nfa); + s2 = newstate(v->nfa); + NOERR(); + moveouts(v->nfa, lp, s); + moveins(v->nfa, rp, s2); + EMPTYARC(lp, s); + EMPTYARC(s2, rp); + EMPTYARC(s2, s); + break; + case PAIR(SOME, SOME): /* do as x{m-1,n-1}x */ + s = newstate(v->nfa); + NOERR(); + moveouts(v->nfa, lp, s); + dupnfa(v->nfa, s, rp, lp, s); + NOERR(); + repeat(v, lp, s, m-1, n-1); + break; + case PAIR(SOME, INF): /* do as x{m-1,}x */ + s = newstate(v->nfa); + NOERR(); + moveouts(v->nfa, lp, s); + dupnfa(v->nfa, s, rp, lp, s); + NOERR(); + repeat(v, lp, s, m-1, n); + break; + default: + ERR(REG_ASSERT); + break; + } } - + /* - bracket - handle non-complemented bracket expression * Also called from cbracket for complemented bracket expressions. ^ static VOID bracket(struct vars *, struct state *, struct state *); */ -static VOID -bracket(v, lp, rp) -struct vars *v; -struct state *lp; -struct state *rp; +static void +bracket( + struct vars *v, + struct state *lp, + struct state *rp) { - assert(SEE('[')); - NEXT(); - while (!SEE(']') && !SEE(EOS)) - brackpart(v, lp, rp); - assert(SEE(']') || ISERR()); - okcolors(v->nfa, v->cm); + assert(SEE('[')); + NEXT(); + while (!SEE(']') && !SEE(EOS)) { + brackpart(v, lp, rp); + } + assert(SEE(']') || ISERR()); + okcolors(v->nfa, v->cm); } - + /* - cbracket - handle complemented bracket expression * We do it by calling bracket() with dummy endpoints, and then complementing - * the result. The alternative would be to invoke rainbow(), and then delete + * the result. The alternative would be to invoke rainbow(), and then delete * arcs as the b.e. is seen... but that gets messy. ^ static VOID cbracket(struct vars *, struct state *, struct state *); */ -static VOID -cbracket(v, lp, rp) -struct vars *v; -struct state *lp; -struct state *rp; +static void +cbracket( + struct vars *v, + struct state *lp, + struct state *rp) { - struct state *left = newstate(v->nfa); - struct state *right = newstate(v->nfa); - struct state *s; - struct arc *a; /* arc from lp */ - struct arc *ba; /* arc from left, from bracket() */ - struct arc *pa; /* MCCE-prototype arc */ - color co; - chr *p; - int i; - - NOERR(); - bracket(v, left, right); - if (v->cflags®_NLSTOP) - newarc(v->nfa, PLAIN, v->nlcolor, left, right); + struct state *left = newstate(v->nfa); + struct state *right = newstate(v->nfa); + struct state *s; + struct arc *a; /* arc from lp */ + struct arc *ba; /* arc from left, from bracket() */ + struct arc *pa; /* MCCE-prototype arc */ + color co; + chr *p; + int i; + + NOERR(); + bracket(v, left, right); + if (v->cflags®_NLSTOP) { + newarc(v->nfa, PLAIN, v->nlcolor, left, right); + } + NOERR(); + + assert(lp->nouts == 0); /* all outarcs will be ours */ + + /* + * Easy part of complementing + */ + + colorcomplement(v->nfa, v->cm, PLAIN, left, lp, rp); + NOERR(); + if (v->mcces == NULL) { /* no MCCEs -- we're done */ + dropstate(v->nfa, left); + assert(right->nins == 0); + freestate(v->nfa, right); + return; + } + + /* + * But complementing gets messy in the presence of MCCEs... + */ + + NOTE(REG_ULOCALE); + for (p = v->mcces->chrs, i = v->mcces->nchrs; i > 0; p++, i--) { + co = GETCOLOR(v->cm, *p); + a = findarc(lp, PLAIN, co); + ba = findarc(left, PLAIN, co); + if (ba == NULL) { + assert(a != NULL); + freearc(v->nfa, a); + } else { + assert(a == NULL); + } + s = newstate(v->nfa); NOERR(); - - assert(lp->nouts == 0); /* all outarcs will be ours */ - - /* easy part of complementing */ - colorcomplement(v->nfa, v->cm, PLAIN, left, lp, rp); + newarc(v->nfa, PLAIN, co, lp, s); NOERR(); - if (v->mcces == NULL) { /* no MCCEs -- we're done */ - dropstate(v->nfa, left); - assert(right->nins == 0); - freestate(v->nfa, right); - return; - } - - /* but complementing gets messy in the presence of MCCEs... */ - NOTE(REG_ULOCALE); - for (p = v->mcces->chrs, i = v->mcces->nchrs; i > 0; p++, i--) { - co = GETCOLOR(v->cm, *p); - a = findarc(lp, PLAIN, co); - ba = findarc(left, PLAIN, co); - if (ba == NULL) { - assert(a != NULL); - freearc(v->nfa, a); - } else { - assert(a == NULL); - } - s = newstate(v->nfa); - NOERR(); - newarc(v->nfa, PLAIN, co, lp, s); - NOERR(); - pa = findarc(v->mccepbegin, PLAIN, co); - assert(pa != NULL); - if (ba == NULL) { /* easy case, need all of them */ - cloneouts(v->nfa, pa->to, s, rp, PLAIN); - newarc(v->nfa, '$', 1, s, rp); - newarc(v->nfa, '$', 0, s, rp); - colorcomplement(v->nfa, v->cm, AHEAD, pa->to, s, rp); - } else { /* must be selective */ - if (findarc(ba->to, '$', 1) == NULL) { - newarc(v->nfa, '$', 1, s, rp); - newarc(v->nfa, '$', 0, s, rp); - colorcomplement(v->nfa, v->cm, AHEAD, pa->to, - s, rp); - } - for (pa = pa->to->outs; pa != NULL; pa = pa->outchain) - if (findarc(ba->to, PLAIN, pa->co) == NULL) - newarc(v->nfa, PLAIN, pa->co, s, rp); - if (s->nouts == 0) /* limit of selectivity: none */ - dropstate(v->nfa, s); /* frees arc too */ + pa = findarc(v->mccepbegin, PLAIN, co); + assert(pa != NULL); + if (ba == NULL) { /* easy case, need all of them */ + cloneouts(v->nfa, pa->to, s, rp, PLAIN); + newarc(v->nfa, '$', 1, s, rp); + newarc(v->nfa, '$', 0, s, rp); + colorcomplement(v->nfa, v->cm, AHEAD, pa->to, s, rp); + } else { /* must be selective */ + if (findarc(ba->to, '$', 1) == NULL) { + newarc(v->nfa, '$', 1, s, rp); + newarc(v->nfa, '$', 0, s, rp); + colorcomplement(v->nfa, v->cm, AHEAD, pa->to, s, rp); + } + for (pa = pa->to->outs; pa != NULL; pa = pa->outchain) { + if (findarc(ba->to, PLAIN, pa->co) == NULL) { + newarc(v->nfa, PLAIN, pa->co, s, rp); } - NOERR(); + } + if (s->nouts == 0) { /* limit of selectivity: none */ + dropstate(v->nfa, s); /* frees arc too */ + } } + NOERR(); + } - delsub(v->nfa, left, right); - assert(left->nouts == 0); - freestate(v->nfa, left); - assert(right->nins == 0); - freestate(v->nfa, right); + delsub(v->nfa, left, right); + assert(left->nouts == 0); + freestate(v->nfa, left); + assert(right->nins == 0); + freestate(v->nfa, right); } - + /* - brackpart - handle one item (or range) within a bracket expression ^ static VOID brackpart(struct vars *, struct state *, struct state *); */ -static VOID -brackpart(v, lp, rp) -struct vars *v; -struct state *lp; -struct state *rp; +static void +brackpart( + struct vars *v, + struct state *lp, + struct state *rp) { - celt startc; - celt endc; - struct cvec *cv; - chr *startp; - chr *endp; - chr c[1]; - - /* parse something, get rid of special cases, take shortcuts */ - switch (v->nexttype) { - case RANGE: /* a-b-c or other botch */ - ERR(REG_ERANGE); - return; - break; - case PLAIN: - c[0] = v->nextvalue; - NEXT(); - /* shortcut for ordinary chr (not range, not MCCE leader) */ - if (!SEE(RANGE) && !ISCELEADER(v, c[0])) { - onechr(v, c[0], lp, rp); - return; - } - startc = element(v, c, c+1); - NOERR(); - break; - case COLLEL: - startp = v->now; - endp = scanplain(v); - INSIST(startp < endp, REG_ECOLLATE); - NOERR(); - startc = element(v, startp, endp); - NOERR(); - break; - case ECLASS: - startp = v->now; - endp = scanplain(v); - INSIST(startp < endp, REG_ECOLLATE); - NOERR(); - startc = element(v, startp, endp); - NOERR(); - cv = eclass(v, startc, (v->cflags®_ICASE)); - NOERR(); - dovec(v, cv, lp, rp); - return; - break; - case CCLASS: - startp = v->now; - endp = scanplain(v); - INSIST(startp < endp, REG_ECTYPE); - NOERR(); - cv = cclass(v, startp, endp, (v->cflags®_ICASE)); - NOERR(); - dovec(v, cv, lp, rp); - return; - break; - default: - ERR(REG_ASSERT); - return; - break; - } - - if (SEE(RANGE)) { - NEXT(); - switch (v->nexttype) { - case PLAIN: - case RANGE: - c[0] = v->nextvalue; - NEXT(); - endc = element(v, c, c+1); - NOERR(); - break; - case COLLEL: - startp = v->now; - endp = scanplain(v); - INSIST(startp < endp, REG_ECOLLATE); - NOERR(); - endc = element(v, startp, endp); - NOERR(); - break; - default: - ERR(REG_ERANGE); - return; - break; - } - } else - endc = startc; + celt startc; + celt endc; + struct cvec *cv; + chr *startp; + chr *endp; + chr c[1]; + + /* + * Parse something, get rid of special cases, take shortcuts. + */ + + switch (v->nexttype) { + case RANGE: /* a-b-c or other botch */ + ERR(REG_ERANGE); + return; + break; + case PLAIN: + c[0] = v->nextvalue; + NEXT(); /* - * Ranges are unportable. Actually, standard C does - * guarantee that digits are contiguous, but making - * that an exception is just too complicated. + * Shortcut for ordinary chr (not range, not MCCE leader). */ - if (startc != endc) - NOTE(REG_UUNPORT); - cv = range(v, startc, endc, (v->cflags®_ICASE)); + + if (!SEE(RANGE) && !ISCELEADER(v, c[0])) { + onechr(v, c[0], lp, rp); + return; + } + startc = element(v, c, c+1); + NOERR(); + break; + case COLLEL: + startp = v->now; + endp = scanplain(v); + INSIST(startp < endp, REG_ECOLLATE); + NOERR(); + startc = element(v, startp, endp); + NOERR(); + break; + case ECLASS: + startp = v->now; + endp = scanplain(v); + INSIST(startp < endp, REG_ECOLLATE); + NOERR(); + startc = element(v, startp, endp); + NOERR(); + cv = eclass(v, startc, (v->cflags®_ICASE)); NOERR(); dovec(v, cv, lp, rp); + return; + break; + case CCLASS: + startp = v->now; + endp = scanplain(v); + INSIST(startp < endp, REG_ECTYPE); + NOERR(); + cv = cclass(v, startp, endp, (v->cflags®_ICASE)); + NOERR(); + dovec(v, cv, lp, rp); + return; + break; + default: + ERR(REG_ASSERT); + return; + break; + } + + if (SEE(RANGE)) { + NEXT(); + switch (v->nexttype) { + case PLAIN: + case RANGE: + c[0] = v->nextvalue; + NEXT(); + endc = element(v, c, c+1); + NOERR(); + break; + case COLLEL: + startp = v->now; + endp = scanplain(v); + INSIST(startp < endp, REG_ECOLLATE); + NOERR(); + endc = element(v, startp, endp); + NOERR(); + break; + default: + ERR(REG_ERANGE); + return; + break; + } + } else { + endc = startc; + } + + /* + * Ranges are unportable. Actually, standard C does guarantee that digits + * are contiguous, but making that an exception is just too complicated. + */ + + if (startc != endc) { + NOTE(REG_UUNPORT); + } + cv = range(v, startc, endc, (v->cflags®_ICASE)); + NOERR(); + dovec(v, cv, lp, rp); } - + /* - scanplain - scan PLAIN contents of [. etc. - * Certain bits of trickery in lex.c know that this code does not try - * to look past the final bracket of the [. etc. + * Certain bits of trickery in lex.c know that this code does not try to look + * past the final bracket of the [. etc. ^ static chr *scanplain(struct vars *); */ static chr * /* just after end of sequence */ -scanplain(v) -struct vars *v; +scanplain( + struct vars *v) { - chr *endp; + chr *endp; - assert(SEE(COLLEL) || SEE(ECLASS) || SEE(CCLASS)); - NEXT(); + assert(SEE(COLLEL) || SEE(ECLASS) || SEE(CCLASS)); + NEXT(); + endp = v->now; + while (SEE(PLAIN)) { endp = v->now; - while (SEE(PLAIN)) { - endp = v->now; - NEXT(); - } - - assert(SEE(END) || ISERR()); NEXT(); + } - return endp; -} + assert(SEE(END) || ISERR()); + NEXT(); + return endp; +} + /* - leaders - process a cvec of collating elements to also include leaders * Also gives all characters involved their own colors, which is almost * certainly necessary, and sets up little disconnected subNFA. ^ static VOID leaders(struct vars *, struct cvec *); */ -static VOID -leaders(v, cv) -struct vars *v; -struct cvec *cv; +static void +leaders( + struct vars *v, + struct cvec *cv) { - int mcce; - chr *p; - chr leader; - struct state *s; - struct arc *a; - - v->mccepbegin = newstate(v->nfa); - v->mccepend = newstate(v->nfa); - NOERR(); - - for (mcce = 0; mcce < cv->nmcces; mcce++) { - p = cv->mcces[mcce]; - leader = *p; - if (!haschr(cv, leader)) { - addchr(cv, leader); - s = newstate(v->nfa); - newarc(v->nfa, PLAIN, subcolor(v->cm, leader), - v->mccepbegin, s); - okcolors(v->nfa, v->cm); - } else { - a = findarc(v->mccepbegin, PLAIN, - GETCOLOR(v->cm, leader)); - assert(a != NULL); - s = a->to; - assert(s != v->mccepend); - } - p++; - assert(*p != 0 && *(p+1) == 0); /* only 2-char MCCEs for now */ - newarc(v->nfa, PLAIN, subcolor(v->cm, *p), s, v->mccepend); - okcolors(v->nfa, v->cm); + int mcce; + chr *p; + chr leader; + struct state *s; + struct arc *a; + + v->mccepbegin = newstate(v->nfa); + v->mccepend = newstate(v->nfa); + NOERR(); + + for (mcce = 0; mcce < cv->nmcces; mcce++) { + p = cv->mcces[mcce]; + leader = *p; + if (!haschr(cv, leader)) { + addchr(cv, leader); + s = newstate(v->nfa); + newarc(v->nfa, PLAIN, subcolor(v->cm, leader), v->mccepbegin, s); + okcolors(v->nfa, v->cm); + } else { + a = findarc(v->mccepbegin, PLAIN, GETCOLOR(v->cm, leader)); + assert(a != NULL); + s = a->to; + assert(s != v->mccepend); } + p++; + assert(*p != 0 && *(p+1) == 0); /* only 2-char MCCEs for now */ + newarc(v->nfa, PLAIN, subcolor(v->cm, *p), s, v->mccepend); + okcolors(v->nfa, v->cm); + } } - + /* - onechr - fill in arcs for a plain character, and possible case complements * This is mostly a shortcut for efficient handling of the common case. ^ static VOID onechr(struct vars *, pchr, struct state *, struct state *); */ -static VOID -onechr(v, c, lp, rp) -struct vars *v; -pchr c; -struct state *lp; -struct state *rp; +static void +onechr( + struct vars *v, + pchr c, + struct state *lp, + struct state *rp) { - if (!(v->cflags®_ICASE)) { - newarc(v->nfa, PLAIN, subcolor(v->cm, c), lp, rp); - return; - } + if (!(v->cflags®_ICASE)) { + newarc(v->nfa, PLAIN, subcolor(v->cm, c), lp, rp); + return; + } - /* rats, need general case anyway... */ - dovec(v, allcases(v, c), lp, rp); + /* rats, need general case anyway... */ + dovec(v, allcases(v, c), lp, rp); } - + /* - dovec - fill in arcs for each element of a cvec * This one has to handle the messy cases, like MCCEs and MCCE leaders. ^ static VOID dovec(struct vars *, struct cvec *, struct state *, ^ struct state *); */ -static VOID -dovec(v, cv, lp, rp) -struct vars *v; -struct cvec *cv; -struct state *lp; -struct state *rp; +static void +dovec( + struct vars *v, + struct cvec *cv, + struct state *lp, + struct state *rp) { - chr ch, from, to; - celt ce; - chr *p; - int i; - color co; - struct cvec *leads; - struct arc *a; - struct arc *pa; /* arc in prototype */ - struct state *s; - struct state *ps; /* state in prototype */ - - /* need a place to store leaders, if any */ - if (nmcces(v) > 0) { - assert(v->mcces != NULL); - if (v->cv2 == NULL || v->cv2->nchrs < v->mcces->nchrs) { - if (v->cv2 != NULL) - free(v->cv2); - v->cv2 = newcvec(v->mcces->nchrs, 0, v->mcces->nmcces); - NOERR(); - leads = v->cv2; - } else - leads = clearcvec(v->cv2); - } else - leads = NULL; - - /* first, get the ordinary characters out of the way */ - for (p = cv->chrs, i = cv->nchrs; i > 0; p++, i--) { - ch = *p; - if (!ISCELEADER(v, ch)) - newarc(v->nfa, PLAIN, subcolor(v->cm, ch), lp, rp); - else { - assert(singleton(v->cm, ch)); - assert(leads != NULL); - if (!haschr(leads, ch)) - addchr(leads, ch); - } + chr ch, from, to; + celt ce; + chr *p; + int i; + color co; + struct cvec *leads; + struct arc *a; + struct arc *pa; /* arc in prototype */ + struct state *s; + struct state *ps; /* state in prototype */ + + /* + * Need a place to store leaders, if any. + */ + + if (nmcces(v) > 0) { + assert(v->mcces != NULL); + if (v->cv2 == NULL || v->cv2->nchrs < v->mcces->nchrs) { + if (v->cv2 != NULL) { + free(v->cv2); + } + v->cv2 = newcvec(v->mcces->nchrs, 0, v->mcces->nmcces); + NOERR(); + leads = v->cv2; + } else { + leads = clearcvec(v->cv2); } - - /* and the ranges */ - for (p = cv->ranges, i = cv->nranges; i > 0; p += 2, i--) { - from = *p; - to = *(p+1); - while (from <= to && (ce = nextleader(v, from, to)) != NOCELT) { - if (from < ce) - subrange(v, from, ce - 1, lp, rp); - assert(singleton(v->cm, ce)); - assert(leads != NULL); - if (!haschr(leads, ce)) - addchr(leads, ce); - from = ce + 1; - } - if (from <= to) - subrange(v, from, to, lp, rp); + } else { + leads = NULL; + } + + /* + * First, get the ordinary characters out of the way. + */ + + for (p = cv->chrs, i = cv->nchrs; i > 0; p++, i--) { + ch = *p; + if (!ISCELEADER(v, ch)) { + newarc(v->nfa, PLAIN, subcolor(v->cm, ch), lp, rp); + } else { + assert(singleton(v->cm, ch)); + assert(leads != NULL); + if (!haschr(leads, ch)) { + addchr(leads, ch); + } } + } + + /* + * And the ranges. + */ + + for (p = cv->ranges, i = cv->nranges; i > 0; p += 2, i--) { + from = *p; + to = *(p+1); + while (from <= to && (ce = nextleader(v, from, to)) != NOCELT) { + if (from < ce) { + subrange(v, from, ce - 1, lp, rp); + } + assert(singleton(v->cm, ce)); + assert(leads != NULL); + if (!haschr(leads, ce)) { + addchr(leads, ce); + } + from = ce + 1; + } + if (from <= to) { + subrange(v, from, to, lp, rp); + } + } - if ((leads == NULL || leads->nchrs == 0) && cv->nmcces == 0) - return; + if ((leads == NULL || leads->nchrs == 0) && cv->nmcces == 0) { + return; + } - /* deal with the MCCE leaders */ - NOTE(REG_ULOCALE); - for (p = leads->chrs, i = leads->nchrs; i > 0; p++, i--) { - co = GETCOLOR(v->cm, *p); - a = findarc(lp, PLAIN, co); - if (a != NULL) - s = a->to; - else { - s = newstate(v->nfa); - NOERR(); - newarc(v->nfa, PLAIN, co, lp, s); - NOERR(); - } - pa = findarc(v->mccepbegin, PLAIN, co); - assert(pa != NULL); - ps = pa->to; - newarc(v->nfa, '$', 1, s, rp); - newarc(v->nfa, '$', 0, s, rp); - colorcomplement(v->nfa, v->cm, AHEAD, ps, s, rp); - NOERR(); - } + /* + * Deal with the MCCE leaders. + */ - /* and the MCCEs */ - for (i = 0; i < cv->nmcces; i++) { - p = cv->mcces[i]; - assert(singleton(v->cm, *p)); - if (!singleton(v->cm, *p)) { - ERR(REG_ASSERT); - return; - } - ch = *p++; - co = GETCOLOR(v->cm, ch); - a = findarc(lp, PLAIN, co); - if (a != NULL) - s = a->to; - else { - s = newstate(v->nfa); - NOERR(); - newarc(v->nfa, PLAIN, co, lp, s); - NOERR(); - } - assert(*p != 0); /* at least two chars */ - assert(singleton(v->cm, *p)); - ch = *p++; - co = GETCOLOR(v->cm, ch); - assert(*p == 0); /* and only two, for now */ - newarc(v->nfa, PLAIN, co, s, rp); - NOERR(); + NOTE(REG_ULOCALE); + for (p = leads->chrs, i = leads->nchrs; i > 0; p++, i--) { + co = GETCOLOR(v->cm, *p); + a = findarc(lp, PLAIN, co); + if (a != NULL) { + s = a->to; + } else { + s = newstate(v->nfa); + NOERR(); + newarc(v->nfa, PLAIN, co, lp, s); + NOERR(); + } + pa = findarc(v->mccepbegin, PLAIN, co); + assert(pa != NULL); + ps = pa->to; + newarc(v->nfa, '$', 1, s, rp); + newarc(v->nfa, '$', 0, s, rp); + colorcomplement(v->nfa, v->cm, AHEAD, ps, s, rp); + NOERR(); + } + + /* + * And the MCCEs. + */ + + for (i = 0; i < cv->nmcces; i++) { + p = cv->mcces[i]; + assert(singleton(v->cm, *p)); + if (!singleton(v->cm, *p)) { + ERR(REG_ASSERT); + return; + } + ch = *p++; + co = GETCOLOR(v->cm, ch); + a = findarc(lp, PLAIN, co); + if (a != NULL) { + s = a->to; + } else { + s = newstate(v->nfa); + NOERR(); + newarc(v->nfa, PLAIN, co, lp, s); + NOERR(); } + assert(*p != 0); /* at least two chars */ + assert(singleton(v->cm, *p)); + ch = *p++; + co = GETCOLOR(v->cm, ch); + assert(*p == 0); /* and only two, for now */ + newarc(v->nfa, PLAIN, co, s, rp); + NOERR(); + } } - + /* - nextleader - find next MCCE leader within range ^ static celt nextleader(struct vars *, pchr, pchr); */ static celt /* NOCELT means none */ -nextleader(v, from, to) -struct vars *v; -pchr from; -pchr to; +nextleader( + struct vars *v, + pchr from, + pchr to) { - int i; - chr *p; - chr ch; - celt it = NOCELT; - - if (v->mcces == NULL) - return it; - - for (i = v->mcces->nchrs, p = v->mcces->chrs; i > 0; i--, p++) { - ch = *p; - if (from <= ch && ch <= to) - if (it == NOCELT || ch < it) - it = ch; - } + int i; + chr *p; + chr ch; + celt it = NOCELT; + + if (v->mcces == NULL) { return it; + } + + for (i = v->mcces->nchrs, p = v->mcces->chrs; i > 0; i--, p++) { + ch = *p; + if (from <= ch && ch <= to) { + if (it == NOCELT || ch < it) { + it = ch; + } + } + } + return it; } - + /* - wordchrs - set up word-chr list for word-boundary stuff, if needed - * The list is kept as a bunch of arcs between two dummy states; it's - * disposed of by the unreachable-states sweep in NFA optimization. - * Does NEXT(). Must not be called from any unusual lexical context. - * This should be reconciled with the \w etc. handling in lex.c, and - * should be cleaned up to reduce dependencies on input scanning. + * The list is kept as a bunch of arcs between two dummy states; it's disposed + * of by the unreachable-states sweep in NFA optimization. Does NEXT(). Must + * not be called from any unusual lexical context. This should be reconciled + * with the \w etc. handling in lex.c, and should be cleaned up to reduce + * dependencies on input scanning. ^ static VOID wordchrs(struct vars *); */ -static VOID -wordchrs(v) -struct vars *v; +static void +wordchrs( + struct vars *v) { - struct state *left; - struct state *right; - - if (v->wordchrs != NULL) { - NEXT(); /* for consistency */ - return; - } - - left = newstate(v->nfa); - right = newstate(v->nfa); - NOERR(); - /* fine point: implemented with [::], and lexer will set REG_ULOCALE */ - lexword(v); - NEXT(); - assert(v->savenow != NULL && SEE('[')); - bracket(v, left, right); - assert((v->savenow != NULL && SEE(']')) || ISERR()); - NEXT(); - NOERR(); - v->wordchrs = left; + struct state *left; + struct state *right; + + if (v->wordchrs != NULL) { + NEXT(); /* for consistency */ + return; + } + + left = newstate(v->nfa); + right = newstate(v->nfa); + NOERR(); + + /* + * Fine point: implemented with [::], and lexer will set REG_ULOCALE. + */ + + lexword(v); + NEXT(); + assert(v->savenow != NULL && SEE('[')); + bracket(v, left, right); + assert((v->savenow != NULL && SEE(']')) || ISERR()); + NEXT(); + NOERR(); + v->wordchrs = left; } - + /* - subre - allocate a subre ^ static struct subre *subre(struct vars *, int, int, struct state *, ^ struct state *); */ static struct subre * -subre(v, op, flags, begin, end) -struct vars *v; -int op; -int flags; -struct state *begin; -struct state *end; +subre( + struct vars *v, + int op, + int flags, + struct state *begin, + struct state *end) { - struct subre *ret; - - ret = v->treefree; - if (ret != NULL) - v->treefree = ret->left; - else { - ret = (struct subre *)MALLOC(sizeof(struct subre)); - if (ret == NULL) { - ERR(REG_ESPACE); - return NULL; - } - ret->chain = v->treechain; - v->treechain = ret; + struct subre *ret; + + ret = v->treefree; + if (ret != NULL) { + v->treefree = ret->left; + } else { + ret = (struct subre *)MALLOC(sizeof(struct subre)); + if (ret == NULL) { + ERR(REG_ESPACE); + return NULL; } - - assert(strchr("|.b(=", op) != NULL); - - ret->op = op; - ret->flags = flags; - ret->retry = 0; - ret->subno = 0; - ret->min = ret->max = 1; - ret->left = NULL; - ret->right = NULL; - ret->begin = begin; - ret->end = end; - ZAPCNFA(ret->cnfa); - - return ret; + ret->chain = v->treechain; + v->treechain = ret; + } + + assert(strchr("|.b(=", op) != NULL); + + ret->op = op; + ret->flags = flags; + ret->retry = 0; + ret->subno = 0; + ret->min = ret->max = 1; + ret->left = NULL; + ret->right = NULL; + ret->begin = begin; + ret->end = end; + ZAPCNFA(ret->cnfa); + + return ret; } - + /* - freesubre - free a subRE subtree ^ static VOID freesubre(struct vars *, struct subre *); */ -static VOID -freesubre(v, sr) -struct vars *v; /* might be NULL */ -struct subre *sr; +static void +freesubre( + struct vars *v, /* might be NULL */ + struct subre *sr) { - if (sr == NULL) - return; - - if (sr->left != NULL) - freesubre(v, sr->left); - if (sr->right != NULL) - freesubre(v, sr->right); - - freesrnode(v, sr); + if (sr == NULL) { + return; + } + + if (sr->left != NULL) { + freesubre(v, sr->left); + } + if (sr->right != NULL) { + freesubre(v, sr->right); + } + + freesrnode(v, sr); } - + /* - freesrnode - free one node in a subRE subtree ^ static VOID freesrnode(struct vars *, struct subre *); */ -static VOID -freesrnode(v, sr) -struct vars *v; /* might be NULL */ -struct subre *sr; +static void +freesrnode( + struct vars *v, /* might be NULL */ + struct subre *sr) { - if (sr == NULL) - return; - - if (!NULLCNFA(sr->cnfa)) - freecnfa(&sr->cnfa); - sr->flags = 0; - - if (v != NULL) { - sr->left = v->treefree; - v->treefree = sr; - } else - FREE(sr); + if (sr == NULL) { + return; + } + + if (!NULLCNFA(sr->cnfa)) { + freecnfa(&sr->cnfa); + } + sr->flags = 0; + + if (v != NULL) { + sr->left = v->treefree; + v->treefree = sr; + } else { + FREE(sr); + } } - + /* - optst - optimize a subRE subtree ^ static VOID optst(struct vars *, struct subre *); */ -static VOID -optst(v, t) -struct vars *v; -struct subre *t; +static void +optst( + struct vars *v, + struct subre *t) { - if (t == NULL) - return; - - /* recurse through children */ - if (t->left != NULL) - optst(v, t->left); - if (t->right != NULL) - optst(v, t->right); + if (t == NULL) { + return; + } + + /* + * Recurse through children. + */ + + if (t->left != NULL) { + optst(v, t->left); + } + if (t->right != NULL) { + optst(v, t->right); + } } - + /* - numst - number tree nodes (assigning retry indexes) ^ static int numst(struct subre *, int); */ static int /* next number */ -numst(t, start) -struct subre *t; -int start; /* starting point for subtree numbers */ +numst( + struct subre *t, + int start) /* starting point for subtree numbers */ { - int i; - - assert(t != NULL); - - i = start; - t->retry = (short)i++; - if (t->left != NULL) - i = numst(t->left, i); - if (t->right != NULL) - i = numst(t->right, i); - return i; + int i; + + assert(t != NULL); + + i = start; + t->retry = (short)i++; + if (t->left != NULL) { + i = numst(t->left, i); + } + if (t->right != NULL) { + i = numst(t->right, i); + } + return i; } - + /* - markst - mark tree nodes as INUSE ^ static VOID markst(struct subre *); */ -static VOID -markst(t) -struct subre *t; +static void +markst( + struct subre *t) { - assert(t != NULL); - - t->flags |= INUSE; - if (t->left != NULL) - markst(t->left); - if (t->right != NULL) - markst(t->right); + assert(t != NULL); + + t->flags |= INUSE; + if (t->left != NULL) { + markst(t->left); + } + if (t->right != NULL) { + markst(t->right); + } } - + /* - cleanst - free any tree nodes not marked INUSE ^ static VOID cleanst(struct vars *); */ -static VOID -cleanst(v) -struct vars *v; +static void +cleanst( + struct vars *v) { - struct subre *t; - struct subre *next; + struct subre *t; + struct subre *next; - for (t = v->treechain; t != NULL; t = next) { - next = t->chain; - if (!(t->flags&INUSE)) - FREE(t); + for (t = v->treechain; t != NULL; t = next) { + next = t->chain; + if (!(t->flags&INUSE)) { + FREE(t); } - v->treechain = NULL; - v->treefree = NULL; /* just on general principles */ + } + v->treechain = NULL; + v->treefree = NULL; /* just on general principles */ } - + /* - nfatree - turn a subRE subtree into a tree of compacted NFAs ^ static long nfatree(struct vars *, struct subre *, FILE *); */ static long /* optimize results from top node */ -nfatree(v, t, f) -struct vars *v; -struct subre *t; -FILE *f; /* for debug output */ +nfatree( + struct vars *v, + struct subre *t, + FILE *f) /* for debug output */ { - assert(t != NULL && t->begin != NULL); + assert(t != NULL && t->begin != NULL); - if (t->left != NULL) - (DISCARD)nfatree(v, t->left, f); - if (t->right != NULL) - (DISCARD)nfatree(v, t->right, f); + if (t->left != NULL) { + (DISCARD)nfatree(v, t->left, f); + } + if (t->right != NULL) { + (DISCARD)nfatree(v, t->right, f); + } - return nfanode(v, t, f); + return nfanode(v, t, f); } - + /* - nfanode - do one NFA for nfatree ^ static long nfanode(struct vars *, struct subre *, FILE *); */ static long /* optimize results */ -nfanode(v, t, f) -struct vars *v; -struct subre *t; -FILE *f; /* for debug output */ +nfanode( + struct vars *v, + struct subre *t, + FILE *f) /* for debug output */ { - struct nfa *nfa; - long ret = 0; - char idbuf[50]; - - assert(t->begin != NULL); - - if (f != NULL) - fprintf(f, "\n\n\n========= TREE NODE %s ==========\n", - stid(t, idbuf, sizeof(idbuf))); - nfa = newnfa(v, v->cm, v->nfa); - NOERRZ(); - dupnfa(nfa, t->begin, t->end, nfa->init, nfa->final); - if (!ISERR()) { - specialcolors(nfa); - ret = optimize(nfa, f); - } - if (!ISERR()) - compact(nfa, &t->cnfa); - - freenfa(nfa); - return ret; + struct nfa *nfa; + long ret = 0; + char idbuf[50]; + + assert(t->begin != NULL); + + if (f != NULL) { + fprintf(f, "\n\n\n========= TREE NODE %s ==========\n", + stid(t, idbuf, sizeof(idbuf))); + } + nfa = newnfa(v, v->cm, v->nfa); + NOERRZ(); + dupnfa(nfa, t->begin, t->end, nfa->init, nfa->final); + if (!ISERR()) { + specialcolors(nfa); + ret = optimize(nfa, f); + } + if (!ISERR()) { + compact(nfa, &t->cnfa); + } + + freenfa(nfa); + return ret; } - + /* - newlacon - allocate a lookahead-constraint subRE ^ static int newlacon(struct vars *, struct state *, struct state *, int); */ static int /* lacon number */ -newlacon(v, begin, end, pos) -struct vars *v; -struct state *begin; -struct state *end; -int pos; +newlacon( + struct vars *v, + struct state *begin, + struct state *end, + int pos) { - int n; - struct subre *sub; - - if (v->nlacons == 0) { - v->lacons = (struct subre *)MALLOC(2 * sizeof(struct subre)); - n = 1; /* skip 0th */ - v->nlacons = 2; - } else { - v->lacons = (struct subre *)REALLOC(v->lacons, - (v->nlacons+1)*sizeof(struct subre)); - n = v->nlacons++; - } - if (v->lacons == NULL) { - ERR(REG_ESPACE); - return 0; - } - sub = &v->lacons[n]; - sub->begin = begin; - sub->end = end; - sub->subno = pos; - ZAPCNFA(sub->cnfa); - return n; + int n; + struct subre *sub; + + if (v->nlacons == 0) { + v->lacons = (struct subre *)MALLOC(2 * sizeof(struct subre)); + n = 1; /* skip 0th */ + v->nlacons = 2; + } else { + v->lacons = (struct subre *)REALLOC(v->lacons, + (v->nlacons+1)*sizeof(struct subre)); + n = v->nlacons++; + } + if (v->lacons == NULL) { + ERR(REG_ESPACE); + return 0; + } + sub = &v->lacons[n]; + sub->begin = begin; + sub->end = end; + sub->subno = pos; + ZAPCNFA(sub->cnfa); + return n; } - + /* - freelacons - free lookahead-constraint subRE vector ^ static VOID freelacons(struct subre *, int); */ -static VOID -freelacons(subs, n) -struct subre *subs; -int n; +static void +freelacons( + struct subre *subs, + int n) { - struct subre *sub; - int i; - - assert(n > 0); - for (sub = subs + 1, i = n - 1; i > 0; sub++, i--) /* no 0th */ - if (!NULLCNFA(sub->cnfa)) - freecnfa(&sub->cnfa); - FREE(subs); -} + struct subre *sub; + int i; + assert(n > 0); + for (sub=subs+1, i=n-1; i>0; sub++, i--) { /* no 0th */ + if (!NULLCNFA(sub->cnfa)) { + freecnfa(&sub->cnfa); + } + } + FREE(subs); +} + /* - rfree - free a whole RE (insides of regfree) ^ static VOID rfree(regex_t *); */ -static VOID -rfree(re) -regex_t *re; +static void +rfree( + regex_t *re) { - struct guts *g; - - if (re == NULL || re->re_magic != REMAGIC) - return; - - re->re_magic = 0; /* invalidate RE */ - g = (struct guts *)re->re_guts; - re->re_guts = NULL; - re->re_fns = NULL; - g->magic = 0; - freecm(&g->cmap); - if (g->tree != NULL) - freesubre((struct vars *)NULL, g->tree); - if (g->lacons != NULL) - freelacons(g->lacons, g->nlacons); - if (!NULLCNFA(g->search)) - freecnfa(&g->search); - FREE(g); + struct guts *g; + + if (re == NULL || re->re_magic != REMAGIC) { + return; + } + + re->re_magic = 0; /* invalidate RE */ + g = (struct guts *) re->re_guts; + re->re_guts = NULL; + re->re_fns = NULL; + g->magic = 0; + freecm(&g->cmap); + if (g->tree != NULL) { + freesubre(NULL, g->tree); + } + if (g->lacons != NULL) { + freelacons(g->lacons, g->nlacons); + } + if (!NULLCNFA(g->search)) { + freecnfa(&g->search); + } + FREE(g); } - + /* - dump - dump an RE in human-readable form ^ static VOID dump(regex_t *, FILE *); */ -static VOID -dump(re, f) -regex_t *re; -FILE *f; +static void +dump( + regex_t *re, + FILE *f) { #ifdef REG_DEBUG - struct guts *g; - int i; - - if (re->re_magic != REMAGIC) - fprintf(f, "bad magic number (0x%x not 0x%x)\n", re->re_magic, - REMAGIC); - if (re->re_guts == NULL) { - fprintf(f, "NULL guts!!!\n"); - return; - } - g = (struct guts *)re->re_guts; - if (g->magic != GUTSMAGIC) - fprintf(f, "bad guts magic number (0x%x not 0x%x)\n", g->magic, - GUTSMAGIC); - - fprintf(f, "\n\n\n========= DUMP ==========\n"); - fprintf(f, "nsub %d, info 0%lo, csize %d, ntree %d\n", - re->re_nsub, re->re_info, re->re_csize, g->ntree); - - dumpcolors(&g->cmap, f); - if (!NULLCNFA(g->search)) { - printf("\nsearch:\n"); - dumpcnfa(&g->search, f); - } - for (i = 1; i < g->nlacons; i++) { - fprintf(f, "\nla%d (%s):\n", i, - (g->lacons[i].subno) ? "positive" : "negative"); - dumpcnfa(&g->lacons[i].cnfa, f); - } - fprintf(f, "\n"); - dumpst(g->tree, f, 0); + struct guts *g; + int i; + + if (re->re_magic != REMAGIC) { + fprintf(f, "bad magic number (0x%x not 0x%x)\n", + re->re_magic, REMAGIC); + } + if (re->re_guts == NULL) { + fprintf(f, "NULL guts!!!\n"); + return; + } + g = (struct guts *)re->re_guts; + if (g->magic != GUTSMAGIC) { + fprintf(f, "bad guts magic number (0x%x not 0x%x)\n", + g->magic, GUTSMAGIC); + } + + fprintf(f, "\n\n\n========= DUMP ==========\n"); + fprintf(f, "nsub %d, info 0%lo, csize %d, ntree %d\n", + re->re_nsub, re->re_info, re->re_csize, g->ntree); + + dumpcolors(&g->cmap, f); + if (!NULLCNFA(g->search)) { + printf("\nsearch:\n"); + dumpcnfa(&g->search, f); + } + for (i = 1; i < g->nlacons; i++) { + fprintf(f, "\nla%d (%s):\n", i, + (g->lacons[i].subno) ? "positive" : "negative"); + dumpcnfa(&g->lacons[i].cnfa, f); + } + fprintf(f, "\n"); + dumpst(g->tree, f, 0); #endif } - + /* - dumpst - dump a subRE tree ^ static VOID dumpst(struct subre *, FILE *, int); */ -static VOID -dumpst(t, f, nfapresent) -struct subre *t; -FILE *f; -int nfapresent; /* is the original NFA still around? */ +static void +dumpst( + struct subre *t, + FILE *f, + int nfapresent) /* is the original NFA still around? */ { - if (t == NULL) - fprintf(f, "null tree\n"); - else - stdump(t, f, nfapresent); - fflush(f); + if (t == NULL) { + fprintf(f, "null tree\n"); + } else { + stdump(t, f, nfapresent); + } + fflush(f); } - + /* - stdump - recursive guts of dumpst ^ static VOID stdump(struct subre *, FILE *, int); */ -static VOID -stdump(t, f, nfapresent) -struct subre *t; -FILE *f; -int nfapresent; /* is the original NFA still around? */ +static void +stdump( + struct subre *t, + FILE *f, + int nfapresent) /* is the original NFA still around? */ { - char idbuf[50]; - - fprintf(f, "%s. `%c'", stid(t, idbuf, sizeof(idbuf)), t->op); - if (t->flags&LONGER) - fprintf(f, " longest"); - if (t->flags&SHORTER) - fprintf(f, " shortest"); - if (t->flags&MIXED) - fprintf(f, " hasmixed"); - if (t->flags&CAP) - fprintf(f, " hascapture"); - if (t->flags&BACKR) - fprintf(f, " hasbackref"); - if (!(t->flags&INUSE)) - fprintf(f, " UNUSED"); - if (t->subno != 0) - fprintf(f, " (#%d)", t->subno); - if (t->min != 1 || t->max != 1) { - fprintf(f, " {%d,", t->min); - if (t->max != INFINITY) - fprintf(f, "%d", t->max); - fprintf(f, "}"); + char idbuf[50]; + + fprintf(f, "%s. `%c'", stid(t, idbuf, sizeof(idbuf)), t->op); + if (t->flags&LONGER) { + fprintf(f, " longest"); + } + if (t->flags&SHORTER) { + fprintf(f, " shortest"); + } + if (t->flags&MIXED) { + fprintf(f, " hasmixed"); + } + if (t->flags&CAP) { + fprintf(f, " hascapture"); + } + if (t->flags&BACKR) { + fprintf(f, " hasbackref"); + } + if (!(t->flags&INUSE)) { + fprintf(f, " UNUSED"); + } + if (t->subno != 0) { + fprintf(f, " (#%d)", t->subno); + } + if (t->min != 1 || t->max != 1) { + fprintf(f, " {%d,", t->min); + if (t->max != INFINITY) { + fprintf(f, "%d", t->max); } - if (nfapresent) - fprintf(f, " %ld-%ld", (long)t->begin->no, (long)t->end->no); - if (t->left != NULL) - fprintf(f, " L:%s", stid(t->left, idbuf, sizeof(idbuf))); - if (t->right != NULL) - fprintf(f, " R:%s", stid(t->right, idbuf, sizeof(idbuf))); - if (!NULLCNFA(t->cnfa)) { - fprintf(f, "\n"); - dumpcnfa(&t->cnfa, f); - fprintf(f, "\n"); - } - if (t->left != NULL) - stdump(t->left, f, nfapresent); - if (t->right != NULL) - stdump(t->right, f, nfapresent); + fprintf(f, "}"); + } + if (nfapresent) { + fprintf(f, " %ld-%ld", (long)t->begin->no, (long)t->end->no); + } + if (t->left != NULL) { + fprintf(f, " L:%s", stid(t->left, idbuf, sizeof(idbuf))); + } + if (t->right != NULL) { + fprintf(f, " R:%s", stid(t->right, idbuf, sizeof(idbuf))); + } + if (!NULLCNFA(t->cnfa)) { + fprintf(f, "\n"); + dumpcnfa(&t->cnfa, f); + fprintf(f, "\n"); + } + if (t->left != NULL) { + stdump(t->left, f, nfapresent); + } + if (t->right != NULL) { + stdump(t->right, f, nfapresent); + } } - + /* - stid - identify a subtree node for dumping ^ static char *stid(struct subre *, char *, size_t); */ static char * /* points to buf or constant string */ -stid(t, buf, bufsize) -struct subre *t; -char *buf; -size_t bufsize; +stid( + struct subre *t, + char *buf, + size_t bufsize) { - /* big enough for hex int or decimal t->retry? */ - if (bufsize < sizeof(void*)*2 + 3 || bufsize < sizeof(t->retry)*3 + 1) - return "unable"; - if (t->retry != 0) - sprintf(buf, "%d", t->retry); - else - sprintf(buf, "%p", t); - return buf; + /* + * Big enough for hex int or decimal t->retry? + */ + + if (bufsize < sizeof(void*)*2 + 3 || bufsize < sizeof(t->retry)*3 + 1) { + return "unable"; + } + if (t->retry != 0) { + sprintf(buf, "%d", t->retry); + } else { + sprintf(buf, "%p", t); + } + return buf; } #include "regc_lex.c" @@ -2177,3 +2441,11 @@ size_t bufsize; #include "regc_nfa.c" #include "regc_cvec.c" #include "regc_locale.c" + +/* + * Local Variables: + * mode: c + * c-basic-offset: 4 + * fill-column: 78 + * End: + */ diff --git a/generic/regex.h b/generic/regex.h index bb1080a..dfd11ec 100644 --- a/generic/regex.h +++ b/generic/regex.h @@ -4,20 +4,20 @@ * regular expressions * * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. - * + * * Development of this software was funded, in part, by Cray Research Inc., * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics - * Corporation, none of whom are responsible for the results. The author - * thanks all of them. - * + * Corporation, none of whom are responsible for the results. The author + * thanks all of them. + * * Redistribution and use in source and binary forms -- with or without * modification -- are permitted for any purpose, provided that * redistributions in source form retain this entire copyright notice and * indicate the origin and nature of any modifications. * - * I'd appreciate being given credit for this package in the documentation - * of software which uses it, but that is not a requirement. - * + * I'd appreciate being given credit for this package in the documentation of + * software which uses it, but that is not a requirement. + * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL @@ -30,38 +30,35 @@ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * - * * Prototypes etc. marked with "^" within comments get gathered up (and - * possibly edited) by the regfwd program and inserted near the bottom of - * this file. + * possibly edited) by the regfwd program and inserted near the bottom of this + * file. * - * We offer the option of declaring one wide-character version of the - * RE functions as well as the char versions. To do that, define - * __REG_WIDE_T to the type of wide characters (unfortunately, there - * is no consensus that wchar_t is suitable) and __REG_WIDE_COMPILE and - * __REG_WIDE_EXEC to the names to be used for the compile and execute - * functions (suggestion: re_Xcomp and re_Xexec, where X is a letter - * suggestive of the wide type, e.g. re_ucomp and re_uexec for Unicode). - * For cranky old compilers, it may be necessary to do something like: + * We offer the option of declaring one wide-character version of the RE + * functions as well as the char versions. To do that, define __REG_WIDE_T to + * the type of wide characters (unfortunately, there is no consensus that + * wchar_t is suitable) and __REG_WIDE_COMPILE and __REG_WIDE_EXEC to the + * names to be used for the compile and execute functions (suggestion: + * re_Xcomp and re_Xexec, where X is a letter suggestive of the wide type, + * e.g. re_ucomp and re_uexec for Unicode). For cranky old compilers, it may + * be necessary to do something like: * #define __REG_WIDE_COMPILE(a,b,c,d) re_Xcomp(a,b,c,d) * #define __REG_WIDE_EXEC(a,b,c,d,e,f,g) re_Xexec(a,b,c,d,e,f,g) * rather than just #defining the names as parameterless macros. * * For some specialized purposes, it may be desirable to suppress the - * declarations of the "front end" functions, regcomp() and regexec(), - * or of the char versions of the compile and execute functions. To - * suppress the front-end functions, define __REG_NOFRONT. To suppress - * the char versions, define __REG_NOCHAR. + * declarations of the "front end" functions, regcomp() and regexec(), or of + * the char versions of the compile and execute functions. To suppress the + * front-end functions, define __REG_NOFRONT. To suppress the char versions, + * define __REG_NOCHAR. * * The right place to do those defines (and some others you may want, see - * below) would be <sys/types.h>. If you don't have control of that file, - * the right place to add your own defines to this file is marked below. - * This is normally done automatically, by the makefile and regmkhdr, based - * on the contents of regcustom.h. + * below) would be <sys/types.h>. If you don't have control of that file, the + * right place to add your own defines to this file is marked below. This is + * normally done automatically, by the makefile and regmkhdr, based on the + * contents of regcustom.h. */ - - /* * voodoo for C++ */ @@ -69,18 +66,15 @@ extern "C" { #endif - - /* * Add your own defines, if needed, here. */ - - /* - * Location where a chunk of regcustom.h is automatically spliced into - * this file (working from its prototype, regproto.h). + * Location where a chunk of regcustom.h is automatically spliced into this + * file (working from its prototype, regproto.h). */ + /* --- begin --- */ /* ensure certain things don't sneak in from system headers */ #ifdef __REG_WIDE_T @@ -121,15 +115,14 @@ extern "C" { #define regerror TclReError /* --- end --- */ - /* * interface types etc. */ /* - * regoff_t has to be large enough to hold either off_t or ssize_t, - * and must be signed; it's only a guess that long is suitable, so we - * offer <sys/types.h> an override. + * regoff_t has to be large enough to hold either off_t or ssize_t, and must + * be signed; it's only a guess that long is suitable, so we offer + * <sys/types.h> an override. */ #ifdef __REG_REGOFF_T typedef __REG_REGOFF_T regoff_t; @@ -148,8 +141,8 @@ typedef void re_void; #endif /* - * Also for benefit of old compilers, <sys/types.h> can supply a macro - * which expands to a substitute for `const'. + * Also for benefit of old compilers, <sys/types.h> can supply a macro which + * expands to a substitute for `const'. */ #ifndef __REG_CONST #define __REG_CONST const @@ -163,43 +156,41 @@ typedef void re_void; /* the biggie, a compiled RE (or rather, a front end to same) */ typedef struct { - int re_magic; /* magic number */ - size_t re_nsub; /* number of subexpressions */ - long re_info; /* information about RE */ -# define REG_UBACKREF 000001 -# define REG_ULOOKAHEAD 000002 -# define REG_UBOUNDS 000004 -# define REG_UBRACES 000010 -# define REG_UBSALNUM 000020 -# define REG_UPBOTCH 000040 -# define REG_UBBS 000100 -# define REG_UNONPOSIX 000200 -# define REG_UUNSPEC 000400 -# define REG_UUNPORT 001000 -# define REG_ULOCALE 002000 -# define REG_UEMPTYMATCH 004000 -# define REG_UIMPOSSIBLE 010000 -# define REG_USHORTEST 020000 - int re_csize; /* sizeof(character) */ - char *re_endp; /* backward compatibility kludge */ - /* the rest is opaque pointers to hidden innards */ - char *re_guts; /* `char *' is more portable than `void *' */ - char *re_fns; + int re_magic; /* magic number */ + size_t re_nsub; /* number of subexpressions */ + long re_info; /* information about RE */ +#define REG_UBACKREF 000001 +#define REG_ULOOKAHEAD 000002 +#define REG_UBOUNDS 000004 +#define REG_UBRACES 000010 +#define REG_UBSALNUM 000020 +#define REG_UPBOTCH 000040 +#define REG_UBBS 000100 +#define REG_UNONPOSIX 000200 +#define REG_UUNSPEC 000400 +#define REG_UUNPORT 001000 +#define REG_ULOCALE 002000 +#define REG_UEMPTYMATCH 004000 +#define REG_UIMPOSSIBLE 010000 +#define REG_USHORTEST 020000 + int re_csize; /* sizeof(character) */ + char *re_endp; /* backward compatibility kludge */ + /* the rest is opaque pointers to hidden innards */ + char *re_guts; /* `char *' is more portable than `void *' */ + char *re_fns; } regex_t; /* result reporting (may acquire more fields later) */ typedef struct { - regoff_t rm_so; /* start of substring */ - regoff_t rm_eo; /* end of substring */ + regoff_t rm_so; /* start of substring */ + regoff_t rm_eo; /* end of substring */ } regmatch_t; /* supplementary control and reporting */ typedef struct { - regmatch_t rm_extend; /* see REG_EXPECT */ + regmatch_t rm_extend; /* see REG_EXPECT */ } rm_detail_t; - - /* * compilation ^ #ifndef __REG_NOCHAR @@ -231,8 +222,6 @@ typedef struct { #define REG_FAKE 010000 /* none of your business :-) */ #define REG_PROGRESS 020000 /* none of your business :-) */ - - /* * execution ^ #ifndef __REG_NOCHAR @@ -254,23 +243,19 @@ typedef struct { #define REG_MTRACE 0020 /* none of your business */ #define REG_SMALL 0040 /* none of your business */ - - /* * misc generics (may be more functions here eventually) ^ re_void regfree(regex_t *); */ - - /* * error reporting * Be careful if modifying the list of error codes -- the table used by * regerror() is generated automatically from this file! * - * Note that there is no wide-char variant of regerror at this time; what - * kind of character is used for error reports is independent of what kind - * is used in matching. + * Note that there is no wide-char variant of regerror at this time; what kind + * of character is used for error reports is independent of what kind is used + * in matching. * ^ extern size_t regerror(int, __REG_CONST regex_t *, char *, size_t); */ @@ -296,8 +281,6 @@ typedef struct { #define REG_ATOI 101 /* convert error-code name to number */ #define REG_ITOA 102 /* convert error-code number to name */ - - /* * the prototypes, as possibly munched by regfwd */ @@ -327,8 +310,6 @@ extern size_t regerror(int, __REG_CONST regex_t *, char *, size_t); /* automatically gathered by fwd; do not hand-edit */ /* =====^!^===== end forwards =====^!^===== */ - - /* * more C++ voodoo */ @@ -336,6 +317,4 @@ extern size_t regerror(int, __REG_CONST regex_t *, char *, size_t); } #endif - - #endif diff --git a/generic/regguts.h b/generic/regguts.h index 36e5092..728d5eb 100644 --- a/generic/regguts.h +++ b/generic/regguts.h @@ -2,20 +2,20 @@ * Internal interface definitions, etc., for the reg package * * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. - * + * * Development of this software was funded, in part, by Cray Research Inc., * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics * Corporation, none of whom are responsible for the results. The author - * thanks all of them. - * + * thanks all of them. + * * Redistribution and use in source and binary forms -- with or without * modification -- are permitted for any purpose, provided that * redistributions in source form retain this entire copyright notice and * indicate the origin and nature of any modifications. - * - * I'd appreciate being given credit for this package in the documentation - * of software which uses it, but that is not a requirement. - * + * + * I'd appreciate being given credit for this package in the documentation of + * software which uses it, but that is not a requirement. + * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL @@ -28,17 +28,13 @@ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ - - /* - * Environmental customization. It should not (I hope) be necessary to - * alter the file you are now reading -- regcustom.h should handle it all, - * given care here and elsewhere. + * Environmental customization. It should not (I hope) be necessary to alter + * the file you are now reading -- regcustom.h should handle it all, given + * care here and elsewhere. */ #include "regcustom.h" - - /* * Things that regcustom.h might override. */ @@ -54,9 +50,9 @@ /* assertions */ #ifndef assert -# ifndef REG_DEBUG -# define NDEBUG /* no assertions */ -# endif +#ifndef REG_DEBUG +#define NDEBUG /* no assertions */ +#endif /* !REG_DEBUG */ #include <assert.h> #endif @@ -65,16 +61,16 @@ #define VOID void /* for function return values */ #endif #ifndef DISCARD -#define DISCARD VOID /* for throwing values away */ +#define DISCARD void /* for throwing values away */ #endif #ifndef PVOID -#define PVOID VOID * /* generic pointer */ +#define PVOID void * /* generic pointer */ #endif #ifndef VS -#define VS(x) ((PVOID)(x)) /* cast something to generic ptr */ +#define VS(x) ((void*)(x)) /* cast something to generic ptr */ #endif #ifndef NOPARMS -#define NOPARMS VOID /* for empty parm lists */ +#define NOPARMS void /* for empty parm lists */ #endif /* const */ @@ -110,8 +106,6 @@ #define _POSIX2_RE_DUP_MAX 255 /* normally from <limits.h> */ #endif - - /* * misc */ @@ -124,8 +118,6 @@ #define REMAGIC 0xfed7 /* magic number for main struct */ - - /* * debugging facilities */ @@ -139,8 +131,6 @@ #define MDEBUG(arglist) {} #endif - - /* * bitmap manipulation */ @@ -148,14 +138,13 @@ #define BSET(uv, sn) ((uv)[(sn)/UBITS] |= (unsigned)1 << ((sn)%UBITS)) #define ISBSET(uv, sn) ((uv)[(sn)/UBITS] & ((unsigned)1 << ((sn)%UBITS))) - - /* - * We dissect a chr into byts for colormap table indexing. Here we define - * a byt, which will be the same as a byte on most machines... The exact - * size of a byt is not critical, but about 8 bits is good, and extraction - * of 8-bit chunks is sometimes especially fast. + * We dissect a chr into byts for colormap table indexing. Here we define a + * byt, which will be the same as a byte on most machines... The exact size of + * a byt is not critical, but about 8 bits is good, and extraction of 8-bit + * chunks is sometimes especially fast. */ + #ifndef BYTBITS #define BYTBITS 8 /* bits in a byt */ #endif @@ -164,69 +153,66 @@ #define NBYTS ((CHRBITS+BYTBITS-1)/BYTBITS) /* the definition of GETCOLOR(), below, assumes NBYTS <= 4 */ - - /* * As soon as possible, we map chrs into equivalence classes -- "colors" -- * which are of much more manageable number. */ + typedef short color; /* colors of characters */ typedef int pcolor; /* what color promotes to */ #define COLORLESS (-1) /* impossible color */ #define WHITE 0 /* default color, parent of all others */ - - /* - * A colormap is a tree -- more precisely, a DAG -- indexed at each level - * by a byt of the chr, to map the chr to a color efficiently. Because - * lower sections of the tree can be shared, it can exploit the usual - * sparseness of such a mapping table. The tree is always NBYTS levels - * deep (in the past it was shallower during construction but was "filled" - * to full depth at the end of that); areas that are unaltered as yet point - * to "fill blocks" which are entirely WHITE in color. + * A colormap is a tree -- more precisely, a DAG -- indexed at each level by a + * byt of the chr, to map the chr to a color efficiently. Because lower + * sections of the tree can be shared, it can exploit the usual sparseness of + * such a mapping table. The tree is always NBYTS levels deep (in the past it + * was shallower during construction but was "filled" to full depth at the end + * of that); areas that are unaltered as yet point to "fill blocks" which are + * entirely WHITE in color. */ /* the tree itself */ struct colors { - color ccolor[BYTTAB]; + color ccolor[BYTTAB]; }; struct ptrs { - union tree *pptr[BYTTAB]; + union tree *pptr[BYTTAB]; }; union tree { - struct colors colors; - struct ptrs ptrs; + struct colors colors; + struct ptrs ptrs; }; #define tcolor colors.ccolor #define tptr ptrs.pptr /* internal per-color structure for the color machinery */ struct colordesc { - uchr nchrs; /* number of chars of this color */ - color sub; /* open subcolor (if any); free chain ptr */ -# define NOSUB COLORLESS - struct arc *arcs; /* color chain */ - int flags; -# define FREECOL 01 /* currently free */ -# define PSEUDO 02 /* pseudocolor, no real chars */ -# define UNUSEDCOLOR(cd) ((cd)->flags&FREECOL) - union tree *block; /* block of solid color, if any */ + uchr nchrs; /* number of chars of this color */ + color sub; /* open subcolor (if any); free chain ptr */ +#define NOSUB COLORLESS + struct arc *arcs; /* color chain */ + int flags; +#define FREECOL 01 /* currently free */ +#define PSEUDO 02 /* pseudocolor, no real chars */ +#define UNUSEDCOLOR(cd) ((cd)->flags&FREECOL) + union tree *block; /* block of solid color, if any */ }; /* the color map itself */ struct colormap { - int magic; -# define CMMAGIC 0x876 - struct vars *v; /* for compile error reporting */ - size_t ncds; /* number of colordescs */ - size_t max; /* highest in use */ - color free; /* beginning of free chain (if non-0) */ - struct colordesc *cd; -# define CDEND(cm) (&(cm)->cd[(cm)->max + 1]) -# define NINLINECDS ((size_t)10) - struct colordesc cdspace[NINLINECDS]; - union tree tree[NBYTS]; /* tree top, plus fill blocks */ + int magic; +#define CMMAGIC 0x876 + struct vars *v; /* for compile error reporting */ + size_t ncds; /* number of colordescs */ + size_t max; /* highest in use */ + color free; /* beginning of free chain (if non-0) */ + struct colordesc *cd; +#define CDEND(cm) (&(cm)->cd[(cm)->max + 1]) +#define NINLINECDS ((size_t)10) + struct colordesc cdspace[NINLINECDS]; + union tree tree[NBYTS]; /* tree top, plus fill blocks */ }; /* optimization magic to do fast chr->color mapping */ @@ -245,174 +231,177 @@ struct colormap { #define GETCOLOR(cm, c) ((cm)->tree->tptr[B3(c)]->tptr[B2(c)]->tptr[B1(c)]->tcolor[B0(c)]) #endif - - /* * Interface definitions for locale-interface functions in locale.c. * Multi-character collating elements (MCCEs) cause most of the trouble. */ + struct cvec { - int nchrs; /* number of chrs */ - int chrspace; /* number of chrs possible */ - chr *chrs; /* pointer to vector of chrs */ - int nranges; /* number of ranges (chr pairs) */ - int rangespace; /* number of chrs possible */ - chr *ranges; /* pointer to vector of chr pairs */ - int nmcces; /* number of MCCEs */ - int mccespace; /* number of MCCEs possible */ - int nmccechrs; /* number of chrs used for MCCEs */ - chr *mcces[1]; /* pointers to 0-terminated MCCEs */ + int nchrs; /* number of chrs */ + int chrspace; /* number of chrs possible */ + chr *chrs; /* pointer to vector of chrs */ + int nranges; /* number of ranges (chr pairs) */ + int rangespace; /* number of chrs possible */ + chr *ranges; /* pointer to vector of chr pairs */ + int nmcces; /* number of MCCEs */ + int mccespace; /* number of MCCEs possible */ + int nmccechrs; /* number of chrs used for MCCEs */ + chr *mcces[1]; /* pointers to 0-terminated MCCEs */ /* and both batches of chrs are on the end */ }; /* caution: this value cannot be changed easily */ #define MAXMCCE 2 /* length of longest MCCE */ - - /* * definitions for NFA internal representation * - * Having a "from" pointer within each arc may seem redundant, but it - * saves a lot of hassle. + * Having a "from" pointer within each arc may seem redundant, but it saves a + * lot of hassle. */ + struct state; struct arc { - int type; -# define ARCFREE '\0' - color co; - struct state *from; /* where it's from (and contained within) */ - struct state *to; /* where it's to */ - struct arc *outchain; /* *from's outs chain or free chain */ -# define freechain outchain - struct arc *inchain; /* *to's ins chain */ - struct arc *colorchain; /* color's arc chain */ + int type; +#define ARCFREE '\0' + color co; + struct state *from; /* where it's from (and contained within) */ + struct state *to; /* where it's to */ + struct arc *outchain; /* *from's outs chain or free chain */ +#define freechain outchain + struct arc *inchain; /* *to's ins chain */ + struct arc *colorchain; /* color's arc chain */ }; struct arcbatch { /* for bulk allocation of arcs */ - struct arcbatch *next; -# define ABSIZE 10 - struct arc a[ABSIZE]; + struct arcbatch *next; +#define ABSIZE 10 + struct arc a[ABSIZE]; }; struct state { - int no; + int no; # define FREESTATE (-1) - char flag; /* marks special states */ - int nins; /* number of inarcs */ - struct arc *ins; /* chain of inarcs */ - int nouts; /* number of outarcs */ - struct arc *outs; /* chain of outarcs */ - struct arc *free; /* chain of free arcs */ - struct state *tmp; /* temporary for traversal algorithms */ - struct state *next; /* chain for traversing all */ - struct state *prev; /* back chain */ - struct arcbatch oas; /* first arcbatch, avoid malloc in easy case */ - int noas; /* number of arcs used in first arcbatch */ + char flag; /* marks special states */ + int nins; /* number of inarcs */ + struct arc *ins; /* chain of inarcs */ + int nouts; /* number of outarcs */ + struct arc *outs; /* chain of outarcs */ + struct arc *free; /* chain of free arcs */ + struct state *tmp; /* temporary for traversal algorithms */ + struct state *next; /* chain for traversing all */ + struct state *prev; /* back chain */ + struct arcbatch oas; /* first arcbatch, avoid malloc in easy case */ + int noas; /* number of arcs used in first arcbatch */ }; struct nfa { - struct state *pre; /* pre-initial state */ - struct state *init; /* initial state */ - struct state *final; /* final state */ - struct state *post; /* post-final state */ - int nstates; /* for numbering states */ - struct state *states; /* state-chain header */ - struct state *slast; /* tail of the chain */ - struct state *free; /* free list */ - struct colormap *cm; /* the color map */ - color bos[2]; /* colors, if any, assigned to BOS and BOL */ - color eos[2]; /* colors, if any, assigned to EOS and EOL */ - struct vars *v; /* simplifies compile error reporting */ - struct nfa *parent; /* parent NFA, if any */ + struct state *pre; /* pre-initial state */ + struct state *init; /* initial state */ + struct state *final; /* final state */ + struct state *post; /* post-final state */ + int nstates; /* for numbering states */ + struct state *states; /* state-chain header */ + struct state *slast; /* tail of the chain */ + struct state *free; /* free list */ + struct colormap *cm; /* the color map */ + color bos[2]; /* colors, if any, assigned to BOS and BOL */ + color eos[2]; /* colors, if any, assigned to EOS and EOL */ + struct vars *v; /* simplifies compile error reporting */ + struct nfa *parent; /* parent NFA, if any */ }; - - /* * definitions for compacted NFA */ + struct carc { - color co; /* COLORLESS is list terminator */ - int to; /* state number */ + color co; /* COLORLESS is list terminator */ + int to; /* state number */ }; struct cnfa { - int nstates; /* number of states */ - int ncolors; /* number of colors */ - int flags; -# define HASLACONS 01 /* uses lookahead constraints */ - int pre; /* setup state number */ - int post; /* teardown state number */ - color bos[2]; /* colors, if any, assigned to BOS and BOL */ - color eos[2]; /* colors, if any, assigned to EOS and EOL */ - struct carc **states; /* vector of pointers to outarc lists */ - struct carc *arcs; /* the area for the lists */ + int nstates; /* number of states */ + int ncolors; /* number of colors */ + int flags; +#define HASLACONS 01 /* uses lookahead constraints */ + int pre; /* setup state number */ + int post; /* teardown state number */ + color bos[2]; /* colors, if any, assigned to BOS and BOL */ + color eos[2]; /* colors, if any, assigned to EOS and EOL */ + struct carc **states; /* vector of pointers to outarc lists */ + struct carc *arcs; /* the area for the lists */ }; #define ZAPCNFA(cnfa) ((cnfa).nstates = 0) #define NULLCNFA(cnfa) ((cnfa).nstates == 0) - - /* * subexpression tree */ + struct subre { - char op; /* '|', '.' (concat), 'b' (backref), '(', '=' */ - char flags; -# define LONGER 01 /* prefers longer match */ -# define SHORTER 02 /* prefers shorter match */ -# define MIXED 04 /* mixed preference below */ -# define CAP 010 /* capturing parens below */ -# define BACKR 020 /* back reference below */ -# define INUSE 0100 /* in use in final tree */ -# define LOCAL 03 /* bits which may not propagate up */ -# define LMIX(f) ((f)<<2) /* LONGER -> MIXED */ -# define SMIX(f) ((f)<<1) /* SHORTER -> MIXED */ -# define UP(f) (((f)&~LOCAL) | (LMIX(f) & SMIX(f) & MIXED)) -# define MESSY(f) ((f)&(MIXED|CAP|BACKR)) -# define PREF(f) ((f)&LOCAL) -# define PREF2(f1, f2) ((PREF(f1) != 0) ? PREF(f1) : PREF(f2)) -# define COMBINE(f1, f2) (UP((f1)|(f2)) | PREF2(f1, f2)) - short retry; /* index into retry memory */ - int subno; /* subexpression number (for 'b' and '(') */ - short min; /* min repetitions, for backref only */ - short max; /* max repetitions, for backref only */ - struct subre *left; /* left child, if any (also freelist chain) */ - struct subre *right; /* right child, if any */ - struct state *begin; /* outarcs from here... */ - struct state *end; /* ...ending in inarcs here */ - struct cnfa cnfa; /* compacted NFA, if any */ - struct subre *chain; /* for bookkeeping and error cleanup */ + char op; /* '|', '.' (concat), 'b' (backref), '(', + * '=' */ + char flags; +#define LONGER 01 /* prefers longer match */ +#define SHORTER 02 /* prefers shorter match */ +#define MIXED 04 /* mixed preference below */ +#define CAP 010 /* capturing parens below */ +#define BACKR 020 /* back reference below */ +#define INUSE 0100 /* in use in final tree */ +#define LOCAL 03 /* bits which may not propagate up */ +#define LMIX(f) ((f)<<2) /* LONGER -> MIXED */ +#define SMIX(f) ((f)<<1) /* SHORTER -> MIXED */ +#define UP(f) (((f)&~LOCAL) | (LMIX(f) & SMIX(f) & MIXED)) +#define MESSY(f) ((f)&(MIXED|CAP|BACKR)) +#define PREF(f) ((f)&LOCAL) +#define PREF2(f1, f2) ((PREF(f1) != 0) ? PREF(f1) : PREF(f2)) +#define COMBINE(f1, f2) (UP((f1)|(f2)) | PREF2(f1, f2)) + short retry; /* index into retry memory */ + int subno; /* subexpression number (for 'b' and '(') */ + short min; /* min repetitions, for backref only */ + short max; /* max repetitions, for backref only */ + struct subre *left; /* left child, if any (also freelist chain) */ + struct subre *right; /* right child, if any */ + struct state *begin; /* outarcs from here... */ + struct state *end; /* ...ending in inarcs here */ + struct cnfa cnfa; /* compacted NFA, if any */ + struct subre *chain; /* for bookkeeping and error cleanup */ }; - - /* - * table of function pointers for generic manipulation functions - * A regex_t's re_fns points to one of these. + * table of function pointers for generic manipulation functions. A regex_t's + * re_fns points to one of these. */ + struct fns { - VOID FUNCPTR(free, (regex_t *)); + VOID FUNCPTR(free, (regex_t *)); }; - - /* * the insides of a regex_t, hidden behind a void * */ + struct guts { - int magic; -# define GUTSMAGIC 0xfed9 - int cflags; /* copy of compile flags */ - long info; /* copy of re_info */ - size_t nsub; /* copy of re_nsub */ - struct subre *tree; - struct cnfa search; /* for fast preliminary search */ - int ntree; - struct colormap cmap; - int FUNCPTR(compare, (CONST chr *, CONST chr *, size_t)); - struct subre *lacons; /* lookahead-constraint vector */ - int nlacons; /* size of lacons */ + int magic; +#define GUTSMAGIC 0xfed9 + int cflags; /* copy of compile flags */ + long info; /* copy of re_info */ + size_t nsub; /* copy of re_nsub */ + struct subre *tree; + struct cnfa search; /* for fast preliminary search */ + int ntree; + struct colormap cmap; + int FUNCPTR(compare, (CONST chr *, CONST chr *, size_t)); + struct subre *lacons; /* lookahead-constraint vector */ + int nlacons; /* size of lacons */ }; + +/* + * Local Variables: + * mode: c + * c-basic-offset: 4 + * fill-column: 78 + * End: + */ |