diff options
Diffstat (limited to 'generic/regcomp.c')
| -rw-r--r-- | generic/regcomp.c | 924 | 
1 files changed, 349 insertions, 575 deletions
| diff --git a/generic/regcomp.c b/generic/regcomp.c index c6c7342..58d55fb 100644 --- a/generic/regcomp.c +++ b/generic/regcomp.c @@ -2,11 +2,11 @@   * re_*comp and friends - compile REs   * This file #includes several others (see the bottom).   * - * Copyright (c) 1998, 1999 Henry Spencer.  All rights reserved. + * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.   *   * Development of this software was funded, in part, by Cray Research Inc.,   * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics - * Corporation, none of whom are responsible for the results.  The author + * Corporation, none of whom are responsible for the results. The author   * thanks all of them.   *   * Redistribution and use in source and binary forms -- with or without @@ -19,7 +19,7 @@   *   * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,   * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY - * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL + * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL   * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,   * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,   * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; @@ -38,165 +38,170 @@  /* =====^!^===== begin forwards =====^!^===== */  /* automatically gathered by fwd; do not hand-edit */  /* === regcomp.c === */ -int compile(regex_t *, CONST chr *, size_t, int); -static VOID moresubs(struct vars *, int); +int compile(regex_t *, const chr *, size_t, int); +static void moresubs(struct vars *, int);  static int freev(struct vars *, int); -static VOID makesearch(struct vars *, struct nfa *); +static void makesearch(struct vars *, struct nfa *);  static struct subre *parse(struct vars *, int, int, struct state *, struct state *);  static struct subre *parsebranch(struct vars *, int, int, struct state *, struct state *, int); -static VOID parseqatom(struct vars *, int, int, struct state *, struct state *, struct subre *); -static VOID nonword(struct vars *, int, struct state *, struct state *); -static VOID word(struct vars *, int, struct state *, struct state *); +static void parseqatom(struct vars *, int, int, struct state *, struct state *, struct subre *); +static void nonword(struct vars *, int, struct state *, struct state *); +static void word(struct vars *, int, struct state *, struct state *);  static int scannum(struct vars *); -static VOID repeat(struct vars *, struct state *, struct state *, int, int); -static VOID bracket(struct vars *, struct state *, struct state *); -static VOID cbracket(struct vars *, struct state *, struct state *); -static VOID brackpart(struct vars *, struct state *, struct state *); -static chr *scanplain(struct vars *); -static VOID leaders(struct vars *, struct cvec *); -static VOID onechr(struct vars *, pchr, struct state *, struct state *); -static VOID dovec(struct vars *, struct cvec *, struct state *, struct state *); -static celt nextleader(struct vars *, pchr, pchr); -static VOID wordchrs(struct vars *); +static void repeat(struct vars *, struct state *, struct state *, int, int); +static void bracket(struct vars *, struct state *, struct state *); +static void cbracket(struct vars *, struct state *, struct state *); +static void brackpart(struct vars *, struct state *, struct state *); +static const chr *scanplain(struct vars *); +static void onechr(struct vars *, pchr, struct state *, struct state *); +static void dovec(struct vars *, struct cvec *, struct state *, struct state *); +static void wordchrs(struct vars *);  static struct subre *subre(struct vars *, int, int, struct state *, struct state *); -static VOID freesubre(struct vars *, struct subre *); -static VOID freesrnode(struct vars *, struct subre *); -static VOID optst(struct vars *, struct subre *); +static void freesubre(struct vars *, struct subre *); +static void freesrnode(struct vars *, struct subre *); +static void optst(struct vars *, struct subre *);  static int numst(struct subre *, int); -static VOID markst(struct subre *); -static VOID cleanst(struct vars *); +static void markst(struct subre *); +static void cleanst(struct vars *);  static long nfatree(struct vars *, struct subre *, FILE *);  static long nfanode(struct vars *, struct subre *, FILE *);  static int newlacon(struct vars *, struct state *, struct state *, int); -static VOID freelacons(struct subre *, int); -static VOID rfree(regex_t *); -static VOID dump(regex_t *, FILE *); -static VOID dumpst(struct subre *, FILE *, int); -static VOID stdump(struct subre *, FILE *, int); -static char *stid(struct subre *, char *, size_t); +static void freelacons(struct subre *, int); +static void rfree(regex_t *); +static void dump(regex_t *, FILE *); +static void dumpst(struct subre *, FILE *, int); +static void stdump(struct subre *, FILE *, int); +static const char *stid(struct subre *, char *, size_t);  /* === regc_lex.c === */ -static VOID lexstart(struct vars *); -static VOID prefixes(struct vars *); -static VOID lexnest(struct vars *, chr *, chr *); -static VOID lexword(struct vars *); +static void lexstart(struct vars *); +static void prefixes(struct vars *); +static void lexnest(struct vars *, const chr *, const chr *); +static void lexword(struct vars *);  static int next(struct vars *);  static int lexescape(struct vars *); -static chr lexdigits(struct vars *, int, int, int); +static int lexdigits(struct vars *, int, int, int);  static int brenext(struct vars *, pchr); -static VOID skip(struct vars *); -static chr newline(NOPARMS); -#ifdef REG_DEBUG -static chr *ch(NOPARMS); -#endif -static chr chrnamed(struct vars *, chr *, chr *, pchr); +static void skip(struct vars *); +static chr newline(void); +static chr chrnamed(struct vars *, const chr *, const chr *, pchr);  /* === regc_color.c === */ -static VOID initcm(struct vars *, struct colormap *); -static VOID freecm(struct colormap *); -static VOID cmtreefree(struct colormap *, union tree *, int); +static void initcm(struct vars *, struct colormap *); +static void freecm(struct colormap *); +static void cmtreefree(struct colormap *, union tree *, int);  static color setcolor(struct colormap *, pchr, pcolor);  static color maxcolor(struct colormap *);  static color newcolor(struct colormap *); -static VOID freecolor(struct colormap *, pcolor); +static void freecolor(struct colormap *, pcolor);  static color pseudocolor(struct colormap *);  static color subcolor(struct colormap *, pchr c);  static color newsub(struct colormap *, pcolor); -static VOID subrange(struct vars *, pchr, pchr, struct state *, struct state *); -static VOID subblock(struct vars *, pchr, struct state *, struct state *); -static VOID okcolors(struct nfa *, struct colormap *); -static VOID colorchain(struct colormap *, struct arc *); -static VOID uncolorchain(struct colormap *, struct arc *); -static int singleton(struct colormap *, pchr c); -static VOID rainbow(struct nfa *, struct colormap *, int, pcolor, struct state *, struct state *); -static VOID colorcomplement(struct nfa *, struct colormap *, int, struct state *, struct state *, struct state *); +static void subrange(struct vars *, pchr, pchr, struct state *, struct state *); +static void subblock(struct vars *, pchr, struct state *, struct state *); +static void okcolors(struct nfa *, struct colormap *); +static void colorchain(struct colormap *, struct arc *); +static void uncolorchain(struct colormap *, struct arc *); +static void rainbow(struct nfa *, struct colormap *, int, pcolor, struct state *, struct state *); +static void colorcomplement(struct nfa *, struct colormap *, int, struct state *, struct state *, struct state *);  #ifdef REG_DEBUG -static VOID dumpcolors(struct colormap *, FILE *); -static VOID fillcheck(struct colormap *, union tree *, int, FILE *); -static VOID dumpchr(pchr, FILE *); +static void dumpcolors(struct colormap *, FILE *); +static void fillcheck(struct colormap *, union tree *, int, FILE *); +static void dumpchr(pchr, FILE *);  #endif  /* === regc_nfa.c === */  static struct nfa *newnfa(struct vars *, struct colormap *, struct nfa *); -static VOID freenfa(struct nfa *); +static void freenfa(struct nfa *);  static struct state *newstate(struct nfa *);  static struct state *newfstate(struct nfa *, int flag); -static VOID dropstate(struct nfa *, struct state *); -static VOID freestate(struct nfa *, struct state *); -static VOID destroystate(struct nfa *, struct state *); -static VOID newarc(struct nfa *, int, pcolor, struct state *, struct state *); +static void dropstate(struct nfa *, struct state *); +static void freestate(struct nfa *, struct state *); +static void destroystate(struct nfa *, struct state *); +static void newarc(struct nfa *, int, pcolor, struct state *, struct state *); +static void createarc(struct nfa *, int, pcolor, struct state *, struct state *);  static struct arc *allocarc(struct nfa *, struct state *); -static VOID freearc(struct nfa *, struct arc *); +static void freearc(struct nfa *, struct arc *); +static void changearctarget(struct arc *, struct state *); +static int hasnonemptyout(struct state *);  static struct arc *findarc(struct state *, int, pcolor); -static VOID cparc(struct nfa *, struct arc *, struct state *, struct state *); -static VOID moveins(struct nfa *, struct state *, struct state *); -static VOID copyins(struct nfa *, struct state *, struct state *); -static VOID moveouts(struct nfa *, struct state *, struct state *); -static VOID copyouts(struct nfa *, struct state *, struct state *); -static VOID cloneouts(struct nfa *, struct state *, struct state *, struct state *, int); -static VOID delsub(struct nfa *, struct state *, struct state *); -static VOID deltraverse(struct nfa *, struct state *, struct state *); -static VOID dupnfa(struct nfa *, struct state *, struct state *, struct state *, struct state *); -static VOID duptraverse(struct nfa *, struct state *, struct state *); -static VOID cleartraverse(struct nfa *, struct state *); -static VOID specialcolors(struct nfa *); +static void cparc(struct nfa *, struct arc *, struct state *, struct state *); +static void sortins(struct nfa *, struct state *); +static int sortins_cmp(const void *, const void *); +static void sortouts(struct nfa *, struct state *); +static int sortouts_cmp(const void *, const void *); +static void moveins(struct nfa *, struct state *, struct state *); +static void copyins(struct nfa *, struct state *, struct state *); +static void mergeins(struct nfa *, struct state *, struct arc **, int); +static void moveouts(struct nfa *, struct state *, struct state *); +static void copyouts(struct nfa *, struct state *, struct state *); +static void cloneouts(struct nfa *, struct state *, struct state *, struct state *, int); +static void delsub(struct nfa *, struct state *, struct state *); +static void deltraverse(struct nfa *, struct state *, struct state *); +static void dupnfa(struct nfa *, struct state *, struct state *, struct state *, struct state *); +static void duptraverse(struct nfa *, struct state *, struct state *, int); +static void cleartraverse(struct nfa *, struct state *); +static void specialcolors(struct nfa *);  static long optimize(struct nfa *, FILE *); -static VOID pullback(struct nfa *, FILE *); -static int pull(struct nfa *, struct arc *); -static VOID pushfwd(struct nfa *, FILE *); -static int push(struct nfa *, struct arc *); +static void pullback(struct nfa *, FILE *); +static int pull(struct nfa *, struct arc *, struct state **); +static void pushfwd(struct nfa *, FILE *); +static int push(struct nfa *, struct arc *, struct state **);  #define	INCOMPATIBLE	1	/* destroys arc */  #define	SATISFIED	2	/* constraint satisfied */  #define	COMPATIBLE	3	/* compatible but not satisfied yet */  static int combine(struct arc *, struct arc *); -static VOID fixempties(struct nfa *, FILE *); -static int unempty(struct nfa *, struct arc *); -static VOID cleanup(struct nfa *); -static VOID markreachable(struct nfa *, struct state *, struct state *, struct state *); -static VOID markcanreach(struct nfa *, struct state *, struct state *, struct state *); +static void fixempties(struct nfa *, FILE *); +static struct state *emptyreachable(struct nfa *, struct state *, +			struct state *, struct arc **); +static int	isconstraintarc(struct arc *); +static int	hasconstraintout(struct state *); +static void fixconstraintloops(struct nfa *, FILE *); +static int	findconstraintloop(struct nfa *, struct state *); +static void breakconstraintloop(struct nfa *, struct state *); +static void clonesuccessorstates(struct nfa *, struct state *, struct state *, +		 struct state *, struct arc *, char *, char *, int); +static void cleanup(struct nfa *); +static void markreachable(struct nfa *, struct state *, struct state *, struct state *); +static void markcanreach(struct nfa *, struct state *, struct state *, struct state *);  static long analyze(struct nfa *); -static VOID compact(struct nfa *, struct cnfa *); -static VOID carcsort(struct carc *, struct carc *); -static VOID freecnfa(struct cnfa *); -static VOID dumpnfa(struct nfa *, FILE *); +static void compact(struct nfa *, struct cnfa *); +static void carcsort(struct carc *, size_t); +static int carc_cmp(const void *, const void *); +static void freecnfa(struct cnfa *); +static void dumpnfa(struct nfa *, FILE *);  #ifdef REG_DEBUG -static VOID dumpstate(struct state *, FILE *); -static VOID dumparcs(struct state *, FILE *); -static int dumprarcs(struct arc *, struct state *, FILE *, int); -static VOID dumparc(struct arc *, struct state *, FILE *); +static void dumpstate(struct state *, FILE *); +static void dumparcs(struct state *, FILE *); +static void dumparc(struct arc *, struct state *, FILE *);  #endif -static VOID dumpcnfa(struct cnfa *, FILE *); +static void dumpcnfa(struct cnfa *, FILE *);  #ifdef REG_DEBUG -static VOID dumpcstate(int, struct carc *, struct cnfa *, FILE *); +static void dumpcstate(int, struct cnfa *, FILE *);  #endif  /* === regc_cvec.c === */ -static struct cvec *newcvec(int, int, int);  static struct cvec *clearcvec(struct cvec *); -static VOID addchr(struct cvec *, pchr); -static VOID addrange(struct cvec *, pchr, pchr); -static VOID addmcce(struct cvec *, chr *, chr *); -static int haschr(struct cvec *, pchr); -static struct cvec *getcvec(struct vars *, int, int, int); -static VOID freecvec(struct cvec *); +static void addchr(struct cvec *, pchr); +static void addrange(struct cvec *, pchr, pchr); +static struct cvec *newcvec(int, int); +static struct cvec *getcvec(struct vars *, int, int); +static void freecvec(struct cvec *);  /* === regc_locale.c === */ -static int nmcces(struct vars *); -static int nleaders(struct vars *); -static struct cvec *allmcces(struct vars *, struct cvec *); -static celt element(struct vars *, chr *, chr *); +static celt element(struct vars *, const chr *, const chr *);  static struct cvec *range(struct vars *, celt, celt, int);  static int before(celt, celt);  static struct cvec *eclass(struct vars *, celt, int); -static struct cvec *cclass(struct vars *, chr *, chr *, int); +static struct cvec *cclass(struct vars *, const chr *, const chr *, int);  static struct cvec *allcases(struct vars *, pchr); -static int cmp(CONST chr *, CONST chr *, size_t); -static int casecmp(CONST chr *, CONST chr *, size_t); +static int cmp(const chr *, const chr *, size_t); +static int casecmp(const chr *, const chr *, size_t);  /* automatically gathered by fwd; do not hand-edit */  /* =====^!^===== end forwards =====^!^===== */  /* internal variables, bundled for easy passing around */  struct vars {      regex_t *re; -    chr *now;			/* scan pointer into string */ -    chr *stop;			/* end of string */ -    chr *savenow;		/* saved now and stop for "subroutine call" */ -    chr *savestop; +    const chr *now;		/* scan pointer into string */ +    const chr *stop;		/* end of string */ +    const chr *savenow;		/* saved now and stop for "subroutine call" */ +    const chr *savestop;      int err;			/* error code (0 if none) */      int cflags;			/* copy of compile flags */      int lasttype;		/* type of previous token */ @@ -214,15 +219,12 @@ struct vars {      struct subre *tree;		/* subexpression tree */      struct subre *treechain;	/* all tree nodes allocated */      struct subre *treefree;	/* any free tree nodes */ -    int ntree;			/* number of tree nodes */ +    int ntree;			/* number of tree nodes, plus one */      struct cvec *cv;		/* interface cvec */      struct cvec *cv2;		/* utility cvec */ -    struct cvec *mcces;		/* collating-element information */ -#define	ISCELEADER(v,c)	(v->mcces != NULL && haschr(v->mcces, (c))) -    struct state *mccepbegin;	/* in nfa, start of MCCE prototypes */ -    struct state *mccepend;	/* in nfa, end of MCCE prototypes */      struct subre *lacons;	/* lookahead-constraint vector */      int nlacons;		/* size of lacons */ +    size_t spaceused;		/* approx. space used for compilation */  };  /* parsing macros; most know that `v' is the struct vars pointer */ @@ -231,13 +233,13 @@ struct vars {  #define	EAT(t)	(SEE(t) && next(v))	/* if next is this, swallow it */  #define	VISERR(vv)	((vv)->err != 0)/* have we seen an error yet? */  #define	ISERR()	VISERR(v) -#define	VERR(vv,e) \ -	((vv)->nexttype = EOS, ((vv)->err) ? (vv)->err : ((vv)->err = (e))) +#define VERR(vv,e)	((vv)->nexttype = EOS, \ +			 (vv)->err = ((vv)->err ? (vv)->err : (e)))  #define	ERR(e)	VERR(v, e)		/* record an error */  #define	NOERR()	{if (ISERR()) return;}	/* if error seen, return */  #define	NOERRN()	{if (ISERR()) return NULL;}	/* NOERR with retval */  #define	NOERRZ()	{if (ISERR()) return 0;}	/* NOERR with retval */ -#define	INSIST(c, e)	((c) ? 0 : ERR(e))	/* if condition false, error */ +#define INSIST(c, e) do { if (!(c)) ERR(e); } while (0)	/* error if c false */  #define	NOTE(b)	(v->re->re_info |= (b))		/* note visible condition */  #define	EMPTYARC(x, y)	newarc(v->nfa, EMPTY, 0, x, y) @@ -266,27 +268,28 @@ struct vars {  	((a)->type == PLAIN || (a)->type == AHEAD || (a)->type == BEHIND)  /* static function list */ -static struct fns functions = { +static const struct fns functions = {      rfree,			/* regfree insides */  };  /*   - compile - compile regular expression - ^ int compile(regex_t *, CONST chr *, size_t, int); + * Note: on failure, no resources remain allocated, so regfree() + * need not be applied to re. + ^ int compile(regex_t *, const chr *, size_t, int);   */  int  compile(      regex_t *re, -    CONST chr *string, +    const chr *string,      size_t len,      int flags)  { -    struct vars var; -    struct vars *v = &var; +    AllocVars(v);      struct guts *g;      int i;      size_t j; -    FILE *debug = (flags®_PROGRESS) ? stdout : (FILE *)NULL; +    FILE *debug = (flags®_PROGRESS) ? stdout : NULL;  #define	CNOERR()	{ if (ISERR()) return freev(v, v->err); }      /* @@ -294,12 +297,15 @@ compile(       */      if (re == NULL || string == NULL) { +	FreeVars(v);  	return REG_INVARG;      }      if ((flags®_QUOTE) && (flags&(REG_ADVANCED|REG_EXPANDED|REG_NEWLINE))) { +	FreeVars(v);  	return REG_INVARG;      }      if (!(flags®_EXTENDED) && (flags®_ADVF)) { +	FreeVars(v);  	return REG_INVARG;      } @@ -308,7 +314,7 @@ compile(       */      v->re = re; -    v->now = (chr *)string; +    v->now = string;      v->stop = v->now + len;      v->savenow = v->savestop = NULL;      v->err = 0; @@ -328,24 +334,24 @@ compile(      v->treefree = NULL;      v->cv = NULL;      v->cv2 = NULL; -    v->mcces = NULL;      v->lacons = NULL;      v->nlacons = 0; +    v->spaceused = 0;      re->re_magic = REMAGIC;      re->re_info = 0;		/* bits get set during parse */      re->re_csize = sizeof(chr);      re->re_guts = NULL; -    re->re_fns = VS(&functions); +    re->re_fns = (void*)(&functions);      /*       * More complex setup, malloced things.       */ -    re->re_guts = VS(MALLOC(sizeof(struct guts))); +    re->re_guts = (void*)(MALLOC(sizeof(struct guts)));      if (re->re_guts == NULL) {  	return freev(v, REG_ESPACE);      } -    g = (struct guts *)re->re_guts; +    g = (struct guts *) re->re_guts;      g->tree = NULL;      initcm(v, &g->cmap);      v->cm = &g->cmap; @@ -354,19 +360,10 @@ compile(      ZAPCNFA(g->search);      v->nfa = newnfa(v, v->cm, NULL);      CNOERR(); -    v->cv = newcvec(100, 20, 10); +    v->cv = newcvec(100, 20);      if (v->cv == NULL) {  	return freev(v, REG_ESPACE);      } -    i = nmcces(v); -    if (i > 0) { -	v->mcces = newcvec(nleaders(v), 0, i); -	CNOERR(); -	v->mcces = allmcces(v, v->mcces); -	leaders(v, v->mcces); -	addmcce(v->mcces, (chr *)NULL, (chr *)NULL);	/* dummy */ -    } -    CNOERR();      /*       * Parsing. @@ -437,7 +434,7 @@ compile(       * Can sacrifice main NFA now, so use it as work area.       */ -    (DISCARD)optimize(v->nfa, debug); +    (void) optimize(v->nfa, debug);      CNOERR();      makesearch(v, v->nfa);      CNOERR(); @@ -449,7 +446,7 @@ compile(       */      re->re_nsub = v->nsubexp; -    v->re = NULL;			/* freev no longer frees re */ +    v->re = NULL;		/* freev no longer frees re */      g->magic = GUTSMAGIC;      g->cflags = v->cflags;      g->info = re->re_info; @@ -472,7 +469,7 @@ compile(  /*   - moresubs - enlarge subRE vector - ^ static VOID moresubs(struct vars *, int); + ^ static void moresubs(struct vars *, int);   */  static void  moresubs( @@ -485,12 +482,12 @@ moresubs(      assert(wanted > 0 && (size_t)wanted >= v->nsubs);      n = (size_t)wanted * 3 / 2 + 1;      if (v->subs == v->sub10) { -	p = (struct subre **)MALLOC(n * sizeof(struct subre *)); +	p = (struct subre **) MALLOC(n * sizeof(struct subre *));  	if (p != NULL) { -	    memcpy(VS(p), VS(v->subs), v->nsubs * sizeof(struct subre *)); +	    memcpy(p, v->subs, v->nsubs * sizeof(struct subre *));  	}      } else { -	p = (struct subre **)REALLOC(v->subs, n*sizeof(struct subre *)); +	p = (struct subre **) REALLOC(v->subs, n*sizeof(struct subre *));      }      if (p == NULL) {  	ERR(REG_ESPACE); @@ -507,8 +504,8 @@ moresubs(  /*   - freev - free vars struct's substructures where necessary - * Optionally does error-number setting, and always returns error code - * (if any), to make error-handling code terser. + * Optionally does error-number setting, and always returns error code (if + * any), to make error-handling code terser.   ^ static int freev(struct vars *, int);   */  static int @@ -516,6 +513,8 @@ freev(      struct vars *v,      int err)  { +    register int ret; +      if (v->re != NULL) {  	rfree(v->re);      } @@ -537,33 +536,29 @@ freev(      if (v->cv2 != NULL) {  	freecvec(v->cv2);      } -    if (v->mcces != NULL) { -	freecvec(v->mcces); -    }      if (v->lacons != NULL) {  	freelacons(v->lacons, v->nlacons);      }      ERR(err);			/* nop if err==0 */ -    return v->err; +    ret = v->err; +    FreeVars(v); +    return ret;  }  /*   - makesearch - turn an NFA into a search NFA (implicit prepend of .*?)   * NFA must have been optimize()d already. - ^ static VOID makesearch(struct vars *, struct nfa *); + ^ static void makesearch(struct vars *, struct nfa *);   */  static void  makesearch(      struct vars *v,      struct nfa *nfa)  { -    struct arc *a; -    struct arc *b; +    struct arc *a, *b;      struct state *pre = nfa->pre; -    struct state *s; -    struct state *s2; -    struct state *slist; +    struct state *s, *s2, *slist;      /*       * No loops are needed if it's anchored. @@ -604,20 +599,22 @@ makesearch(       */      slist = NULL; -    for (a = pre->outs; a != NULL; a = a->outchain) { +    for (a=pre->outs ; a!=NULL ; a=a->outchain) {  	s = a->to; -	for (b = s->ins; b != NULL; b = b->inchain) { +	for (b=s->ins ; b!=NULL ; b=b->inchain) {  	    if (b->from != pre) {  		break;  	    }  	} -	if (b != NULL && s->tmp == NULL) { -	    /* -	     * Must be split if not already in the list (fixes bugs 505048, -	     * 230589, 840258, 504785). -	     */ -	    s->tmp = slist; +	/* +	 * We want to mark states as being in the list already by having non +	 * NULL tmp fields, but we can't just store the old slist value in tmp +	 * because that doesn't work for the first such state.  Instead, the +	 * first list entry gets its own address in tmp. +	 */ +	if (b != NULL && s->tmp == NULL) { +	    s->tmp = (slist != NULL) ? slist : s;  	    slist = s;  	}      } @@ -626,26 +623,29 @@ makesearch(       * Do the splits.       */ -    for (s = slist; s != NULL; s = s2) { +    for (s=slist ; s!=NULL ; s=s2) {  	s2 = newstate(nfa); +	NOERR();  	copyouts(nfa, s, s2); -	for (a = s->ins; a != NULL; a = b) { +	NOERR(); +	for (a=s->ins ; a!=NULL ; a=b) {  	    b = a->inchain; +  	    if (a->from != pre) {  		cparc(nfa, a, a->from, s2);  		freearc(nfa, a);  	    }  	} -	s2 = s->tmp; +	s2 = (s->tmp != s) ? s->tmp : NULL;  	s->tmp = NULL;		/* clean up while we're at it */      }  }  /*   - parse - parse an RE - * This is actually just the top level, which parses a bunch of branches - * tied together with '|'.  They appear in the tree as the left children - * of a chain of '|' subres. + * This is actually just the top level, which parses a bunch of branches tied + * together with '|'. They appear in the tree as the left children of a chain + * of '|' subres.   ^ static struct subre *parse(struct vars *, int, int, struct state *,   ^ 	struct state *);   */ @@ -657,8 +657,7 @@ parse(      struct state *init,		/* initial state */      struct state *final)	/* final state */  { -    struct state *left;		/* scaffolding for branch */ -    struct state *right; +    struct state *left, *right;	/* scaffolding for branch */      struct subre *branches;	/* top level */      struct subre *branch;	/* current branch */      struct subre *t;		/* temporary */ @@ -759,6 +758,7 @@ parsebranch(  	/* NB, recursion in parseqatom() may swallow rest of branch */  	parseqatom(v, stopper, type, lp, right, t); +	NOERRN();      }      if (!seencontent) {		/* empty branch */ @@ -777,7 +777,7 @@ parsebranch(   * The bookkeeping near the end cooperates very closely with parsebranch(); in   * particular, it contains a recursion that can involve parsing the rest of   * the branch, making this function's name somewhat inaccurate. - ^ static VOID parseqatom(struct vars *, int, int, struct state *, + ^ static void parseqatom(struct vars *, int, int, struct state *,   ^ 	struct state *, struct subre *);   */  static void @@ -809,7 +809,7 @@ parseqatom(      atom = NULL;      assert(lp->nouts == 0);	/* must string new code */ -    assert(rp->nins == 0);	/*  between lp and rp */ +    assert(rp->nins == 0);	/* between lp and rp */      subno = 0;			/* just to shut lint up */      /* @@ -826,7 +826,6 @@ parseqatom(  	}  	NEXT();  	return; -	break;      case '$':  	ARCV('$', 1);  	if (v->cflags®_NLANCH) { @@ -834,19 +833,16 @@ parseqatom(  	}  	NEXT();  	return; -	break;      case SBEGIN:  	ARCV('^', 1);		/* BOL */  	ARCV('^', 0);		/* or BOS */  	NEXT();  	return; -	break;      case SEND:  	ARCV('$', 1);		/* EOL */  	ARCV('$', 0);		/* or EOS */  	NEXT();  	return; -	break;      case '<':  	wordchrs(v);		/* does NEXT() */  	s = newstate(v->nfa); @@ -854,7 +850,6 @@ parseqatom(  	nonword(v, BEHIND, lp, s);  	word(v, AHEAD, s, rp);  	return; -	break;      case '>':  	wordchrs(v);		/* does NEXT() */  	s = newstate(v->nfa); @@ -862,7 +857,6 @@ parseqatom(  	word(v, BEHIND, lp, s);  	nonword(v, AHEAD, s, rp);  	return; -	break;      case WBDRY:  	wordchrs(v);		/* does NEXT() */  	s = newstate(v->nfa); @@ -874,7 +868,6 @@ parseqatom(  	word(v, BEHIND, lp, s);  	nonword(v, AHEAD, s, rp);  	return; -	break;      case NWBDRY:  	wordchrs(v);		/* does NEXT() */  	s = newstate(v->nfa); @@ -886,7 +879,6 @@ parseqatom(  	nonword(v, BEHIND, lp, s);  	nonword(v, AHEAD, s, rp);  	return; -	break;      case LACON:			/* lookahead constraint */  	pos = v->nextvalue;  	NEXT(); @@ -901,7 +893,6 @@ parseqatom(  	NOERR();  	ARCV(LACON, n);  	return; -	break;  	/*  	 * Then errors, to get them out of the way. @@ -913,11 +904,9 @@ parseqatom(      case '{':  	ERR(REG_BADRPT);  	return; -	break;      default:  	ERR(REG_ASSERT);  	return; -	break;  	/*  	 * Then plain characters, and minor variants on that theme. @@ -1009,6 +998,7 @@ parseqatom(  	NOERR();  	assert(v->nextvalue > 0);  	atom = subre(v, 'b', BACKR, lp, rp); +	NOERR();  	subno = v->nextvalue;  	atom->subno = subno;  	EMPTYARC(lp, rp);	/* temporarily, so there's something */ @@ -1023,13 +1013,13 @@ parseqatom(      switch (v->nexttype) {      case '*':  	m = 0; -	n = INFINITY; +	n = DUPINF;  	qprefer = (v->nextvalue) ? LONGER : SHORTER;  	NEXT();  	break;      case '+':  	m = 1; -	n = INFINITY; +	n = DUPINF;  	qprefer = (v->nextvalue) ? LONGER : SHORTER;  	NEXT();  	break; @@ -1046,7 +1036,7 @@ parseqatom(  	    if (SEE(DIGIT)) {  		n = scannum(v);  	    } else { -		n = INFINITY; +		n = DUPINF;  	    }  	    if (m > n) {  		ERR(REG_BADBR); @@ -1127,13 +1117,19 @@ parseqatom(      }      /* -     * prepare a general-purpose state skeleton +     * Prepare a general-purpose state skeleton. +     * +     * In the no-backrefs case, we want this: +     * +     * [lp] ---> [s] ---prefix---> [begin] ---atom---> [end] ---rest---> [rp]       * -     *    ---> [s] ---prefix---> [begin] ---atom---> [end] ----rest---> [rp] -     *   /                                            / -     * [lp] ----> [s2] ----bypass--------------------- +     * where prefix is some repetitions of atom.  In the general case we need       * -     * where bypass is an empty, and prefix is some repetitions of atom +     * [lp] ---> [s] ---iterator---> [s2] ---rest---> [rp] +     * +     * where the iterator wraps around [begin] ---atom---> [end] +     * +     * We make the s state here for both cases; s2 is made below if needed       */      s = newstate(v->nfa);	/* first, new endpoints for the atom */ @@ -1144,11 +1140,9 @@ parseqatom(      NOERR();      atom->begin = s;      atom->end = s2; -    s = newstate(v->nfa);	/* and spots for prefix and bypass */ -    s2 = newstate(v->nfa); +    s = newstate(v->nfa);	/* set up starting state */      NOERR();      EMPTYARC(lp, s); -    EMPTYARC(lp, s2);      NOERR();      /* @@ -1156,6 +1150,7 @@ parseqatom(       */      t = subre(v, '.', COMBINE(qprefer, atom->flags), lp, rp); +    NOERR();      t->left = atom;      atomp = &t->left; @@ -1169,6 +1164,7 @@ parseqatom(      assert(top->op == '=' && top->left == NULL && top->right == NULL);      top->left = subre(v, '=', top->flags, top->begin, lp); +    NOERR();      top->op = '.';      top->right = t; @@ -1193,27 +1189,8 @@ parseqatom(      }      /* -     * It's quantifier time; first, turn x{0,...} into x{1,...}|empty -     */ - -    if (m == 0) { -	EMPTYARC(s2, atom->end);/* the bypass */ -	assert(PREF(qprefer) != 0); -	f = COMBINE(qprefer, atom->flags); -	t = subre(v, '|', f, lp, atom->end); -	NOERR(); -	t->left = atom; -	t->right = subre(v, '|', PREF(f), s2, atom->end); -	NOERR(); -	t->right->left = subre(v, '=', 0, s2, atom->end); -	NOERR(); -	*atomp = t; -	atomp = &t->left; -	m = 1; -    } - -    /* -     * Deal with the rest of the quantifier. +     * It's quantifier time.  If the atom is just a backref, we'll let it deal +     * with quantifiers internally.       */      if (atomtype == BACKREF) { @@ -1228,24 +1205,32 @@ parseqatom(  	 */  	repeat(v, atom->begin, atom->end, m, n); -	atom->min = (short)m; -	atom->max = (short)n; +	atom->min = (short) m; +	atom->max = (short) n;  	atom->flags |= COMBINE(qprefer, atom->flags); +	/* rest of branch can be strung starting from atom->end */ +	s2 = atom->end;      } else if (m == 1 && n == 1) {  	/*  	 * No/vacuous quantifier: done.  	 */  	EMPTYARC(s, atom->begin);	/* empty prefix */ -    } else { +	/* rest of branch can be strung starting from atom->end */ +	s2 = atom->end; +    } else if (m > 0 && !(atom->flags & BACKR)) {  	/* -	 * Turn x{m,n} into x{m-1,n-1}x, with capturing parens in only second -	 * x +	 * If there's no backrefs involved, we can turn x{m,n} into +	 * x{m-1,n-1}x, with capturing parens in only the second x.  This +	 * is valid because we only care about capturing matches from the +	 * final iteration of the quantifier.  It's a win because we can +	 * implement the backref-free left side as a plain DFA node, since +	 * we don't really care where its submatches are.  	 */  	dupnfa(v->nfa, atom->begin, atom->end, s, atom->begin); -	assert(m >= 1 && m != INFINITY && n >= 1); -	repeat(v, s, atom->begin, m-1, (n == INFINITY) ? n : n-1); +	assert(m >= 1 && m != DUPINF && n >= 1); +	repeat(v, s, atom->begin, m-1, (n == DUPINF) ? n : n-1);  	f = COMBINE(qprefer, atom->flags);  	t = subre(v, '.', f, s, atom->end);	/* prefix and atom */  	NOERR(); @@ -1253,6 +1238,24 @@ parseqatom(  	NOERR();  	t->right = atom;  	*atomp = t; +	/* rest of branch can be strung starting from atom->end */ +	s2 = atom->end; +    } else { +	/* general case: need an iteration node */ +	s2 = newstate(v->nfa); +	NOERR(); +	moveouts(v->nfa, atom->end, s2); +	NOERR(); +	dupnfa(v->nfa, atom->begin, atom->end, s, s2); +	repeat(v, s, s2, m, n); +	f = COMBINE(qprefer, atom->flags); +	t = subre(v, '*', f, s, s2); +	NOERR(); +	t->min = (short) m; +	t->max = (short) n; +	t->left = atom; +	*atomp = t; +	/* rest of branch is to be strung from iteration's end state */      }      /* @@ -1261,11 +1264,12 @@ parseqatom(      t = top->right;      if (!(SEE('|') || SEE(stopper) || SEE(EOS))) { -	t->right = parsebranch(v, stopper, type, atom->end, rp, 1); +	t->right = parsebranch(v, stopper, type, s2, rp, 1);      } else { -	EMPTYARC(atom->end, rp); -	t->right = subre(v, '=', 0, atom->end, rp); +	EMPTYARC(s2, rp); +	t->right = subre(v, '=', 0, s2, rp);      } +    NOERR();      assert(SEE('|') || SEE(stopper) || SEE(EOS));      t->flags |= COMBINE(t->flags, t->right->flags);      top->flags |= COMBINE(top->flags, t->flags); @@ -1273,7 +1277,7 @@ parseqatom(  /*   - nonword - generate arcs for non-word-character ahead or behind - ^ static VOID nonword(struct vars *, int, struct state *, struct state *); + ^ static void nonword(struct vars *, int, struct state *, struct state *);   */  static void  nonword( @@ -1293,7 +1297,7 @@ nonword(  /*   - word - generate arcs for word character ahead or behind - ^ static VOID word(struct vars *, int, struct state *, struct state *); + ^ static void word(struct vars *, int, struct state *, struct state *);   */  static void  word( @@ -1330,13 +1334,15 @@ scannum(  /*   - repeat - replicate subNFA for quantifiers + * The sub-NFA strung from lp to rp is modified to represent m to n + * repetitions of its initial contents.   * The duplication sequences used here are chosen carefully so that any   * pointers starting out pointing into the subexpression end up pointing into - * the last occurrence.  (Note that it may not be strung between the same - * left and right end states, however!)  This used to be important for the - * subRE tree, although the important bits are now handled by the in-line - * code in parse(), and when this is called, it doesn't matter any more. - ^ static VOID repeat(struct vars *, struct state *, struct state *, int, int); + * the last occurrence. (Note that it may not be strung between the same left + * and right end states, however!) This used to be important for the subRE + * tree, although the important bits are now handled by the in-line code in + * parse(), and when this is called, it doesn't matter any more. + ^ static void repeat(struct vars *, struct state *, struct state *, int, int);   */  static void  repeat( @@ -1349,11 +1355,10 @@ repeat(  #define	SOME		2  #define	INF		3  #define	PAIR(x, y)	((x)*4 + (y)) -#define	REDUCE(x)	( ((x) == INFINITY) ? INF : (((x) > 1) ? SOME : (x)) ) -    CONST int rm = REDUCE(m); -    CONST int rn = REDUCE(n); -    struct state *s; -    struct state *s2; +#define	REDUCE(x)	( ((x) == DUPINF) ? INF : (((x) > 1) ? SOME : (x)) ) +    const int rm = REDUCE(m); +    const int rn = REDUCE(n); +    struct state *s, *s2;      switch (PAIR(rm, rn)) {      case PAIR(0, 0):		/* empty string */ @@ -1423,7 +1428,7 @@ repeat(  /*   - bracket - handle non-complemented bracket expression   * Also called from cbracket for complemented bracket expressions. - ^ static VOID bracket(struct vars *, struct state *, struct state *); + ^ static void bracket(struct vars *, struct state *, struct state *);   */  static void  bracket( @@ -1445,7 +1450,7 @@ bracket(   * We do it by calling bracket() with dummy endpoints, and then complementing   * the result. The alternative would be to invoke rainbow(), and then delete   * arcs as the b.e. is seen... but that gets messy. - ^ static VOID cbracket(struct vars *, struct state *, struct state *); + ^ static void cbracket(struct vars *, struct state *, struct state *);   */  static void  cbracket( @@ -1455,13 +1460,6 @@ cbracket(  {      struct state *left = newstate(v->nfa);      struct state *right = newstate(v->nfa); -    struct state *s; -    struct arc *a;			/* arc from lp */ -    struct arc *ba;			/* arc from left, from bracket() */ -    struct arc *pa;			/* MCCE-prototype arc */ -    color co; -    chr *p; -    int i;      NOERR();      bracket(v, left, right); @@ -1470,75 +1468,24 @@ cbracket(      }      NOERR(); -    assert(lp->nouts == 0);		/* all outarcs will be ours */ +    assert(lp->nouts == 0);	/* all outarcs will be ours */      /* -     * Easy part of complementing +     * Easy part of complementing, and all there is to do since the MCCE code +     * was removed.       */      colorcomplement(v->nfa, v->cm, PLAIN, left, lp, rp);      NOERR(); -    if (v->mcces == NULL) {		/* no MCCEs -- we're done */ -	dropstate(v->nfa, left); -	assert(right->nins == 0); -	freestate(v->nfa, right); -	return; -    } - -    /* -     * But complementing gets messy in the presence of MCCEs... -     */ - -    NOTE(REG_ULOCALE); -    for (p = v->mcces->chrs, i = v->mcces->nchrs; i > 0; p++, i--) { -	co = GETCOLOR(v->cm, *p); -	a = findarc(lp, PLAIN, co); -	ba = findarc(left, PLAIN, co); -	if (ba == NULL) { -	    assert(a != NULL); -	    freearc(v->nfa, a); -	} else { -	    assert(a == NULL); -	} -	s = newstate(v->nfa); -	NOERR(); -	newarc(v->nfa, PLAIN, co, lp, s); -	NOERR(); -	pa = findarc(v->mccepbegin, PLAIN, co); -	assert(pa != NULL); -	if (ba == NULL) {	/* easy case, need all of them */ -	    cloneouts(v->nfa, pa->to, s, rp, PLAIN); -	    newarc(v->nfa, '$', 1, s, rp); -	    newarc(v->nfa, '$', 0, s, rp); -	    colorcomplement(v->nfa, v->cm, AHEAD, pa->to, s, rp); -	} else {		/* must be selective */ -	    if (findarc(ba->to, '$', 1) == NULL) { -		newarc(v->nfa, '$', 1, s, rp); -		newarc(v->nfa, '$', 0, s, rp); -		colorcomplement(v->nfa, v->cm, AHEAD, pa->to, s, rp); -	    } -	    for (pa = pa->to->outs; pa != NULL; pa = pa->outchain) { -		if (findarc(ba->to, PLAIN, pa->co) == NULL) { -		    newarc(v->nfa, PLAIN, pa->co, s, rp); -		} -	    } -	    if (s->nouts == 0) {	/* limit of selectivity: none */ -		dropstate(v->nfa, s);	/* frees arc too */ -	    } -	} -	NOERR(); -    } - -    delsub(v->nfa, left, right); -    assert(left->nouts == 0); -    freestate(v->nfa, left); +    dropstate(v->nfa, left);      assert(right->nins == 0);      freestate(v->nfa, right); +    return;  }  /*   - brackpart - handle one item (or range) within a bracket expression - ^ static VOID brackpart(struct vars *, struct state *, struct state *); + ^ static void brackpart(struct vars *, struct state *, struct state *);   */  static void  brackpart( @@ -1546,12 +1493,10 @@ brackpart(      struct state *lp,      struct state *rp)  { -    celt startc; -    celt endc; +    celt startc, endc;      struct cvec *cv; -    chr *startp; -    chr *endp; -    chr c[1]; +    const chr *startp, *endp; +    chr c;      /*       * Parse something, get rid of special cases, take shortcuts. @@ -1563,18 +1508,18 @@ brackpart(  	return;  	break;      case PLAIN: -	c[0] = v->nextvalue; +	c = v->nextvalue;  	NEXT();  	/* -	 * Shortcut for ordinary chr (not range, not MCCE leader). +	 * Shortcut for ordinary chr (not range).  	 */ -	if (!SEE(RANGE) && !ISCELEADER(v, c[0])) { -	    onechr(v, c[0], lp, rp); +	if (!SEE(RANGE)) { +	    onechr(v, c, lp, rp);  	    return;  	} -	startc = element(v, c, c+1); +	startc = element(v, &c, &c+1);  	NOERR();  	break;      case COLLEL: @@ -1618,9 +1563,9 @@ brackpart(  	switch (v->nexttype) {  	case PLAIN:  	case RANGE: -	    c[0] = v->nextvalue; +	    c = v->nextvalue;  	    NEXT(); -	    endc = element(v, c, c+1); +	    endc = element(v, &c, &c+1);  	    NOERR();  	    break;  	case COLLEL: @@ -1657,13 +1602,13 @@ brackpart(   - scanplain - scan PLAIN contents of [. etc.   * Certain bits of trickery in lex.c know that this code does not try to look   * past the final bracket of the [. etc. - ^ static chr *scanplain(struct vars *); + ^ static const chr *scanplain(struct vars *);   */ -static chr *			/* just after end of sequence */ +static const chr *		/* just after end of sequence */  scanplain(      struct vars *v)  { -    chr *endp; +    const chr *endp;      assert(SEE(COLLEL) || SEE(ECLASS) || SEE(CCLASS));      NEXT(); @@ -1681,51 +1626,9 @@ scanplain(  }  /* - - leaders - process a cvec of collating elements to also include leaders - * Also gives all characters involved their own colors, which is almost - * certainly necessary, and sets up little disconnected subNFA. - ^ static VOID leaders(struct vars *, struct cvec *); - */ -static void -leaders( -    struct vars *v, -    struct cvec *cv) -{ -    int mcce; -    chr *p; -    chr leader; -    struct state *s; -    struct arc *a; - -    v->mccepbegin = newstate(v->nfa); -    v->mccepend = newstate(v->nfa); -    NOERR(); - -    for (mcce = 0; mcce < cv->nmcces; mcce++) { -	p = cv->mcces[mcce]; -	leader = *p; -	if (!haschr(cv, leader)) { -	    addchr(cv, leader); -	    s = newstate(v->nfa); -	    newarc(v->nfa, PLAIN, subcolor(v->cm, leader), v->mccepbegin, s); -	    okcolors(v->nfa, v->cm); -	} else { -	    a = findarc(v->mccepbegin, PLAIN, GETCOLOR(v->cm, leader)); -	    assert(a != NULL); -	    s = a->to; -	    assert(s != v->mccepend); -	} -	p++; -	assert(*p != 0 && *(p+1) == 0);	/* only 2-char MCCEs for now */ -	newarc(v->nfa, PLAIN, subcolor(v->cm, *p), s, v->mccepend); -	okcolors(v->nfa, v->cm); -    } -} - -/*   - onechr - fill in arcs for a plain character, and possible case complements   * This is mostly a shortcut for efficient handling of the common case. - ^ static VOID onechr(struct vars *, pchr, struct state *, struct state *); + ^ static void onechr(struct vars *, pchr, struct state *, struct state *);   */  static void  onechr( @@ -1739,14 +1642,16 @@ onechr(  	return;      } -    /* rats, need general case anyway... */ +    /* +     * Rats, need general case anyway... +     */ +      dovec(v, allcases(v, c), lp, rp);  }  /*   - dovec - fill in arcs for each element of a cvec - * This one has to handle the messy cases, like MCCEs and MCCE leaders. - ^ static VOID dovec(struct vars *, struct cvec *, struct state *, + ^ static void dovec(struct vars *, struct cvec *, struct state *,   ^ 	struct state *);   */  static void @@ -1757,165 +1662,22 @@ dovec(      struct state *rp)  {      chr ch, from, to; -    celt ce; -    chr *p; +    const chr *p;      int i; -    color co; -    struct cvec *leads; -    struct arc *a; -    struct arc *pa;		/* arc in prototype */ -    struct state *s; -    struct state *ps;		/* state in prototype */ - -    /* -     * Need a place to store leaders, if any. -     */ - -    if (nmcces(v) > 0) { -	assert(v->mcces != NULL); -	if (v->cv2 == NULL || v->cv2->nchrs < v->mcces->nchrs) { -	    if (v->cv2 != NULL) { -		free(v->cv2); -	    } -	    v->cv2 = newcvec(v->mcces->nchrs, 0, v->mcces->nmcces); -	    NOERR(); -	    leads = v->cv2; -	} else { -	    leads = clearcvec(v->cv2); -	} -    } else { -	leads = NULL; -    } - -    /* -     * First, get the ordinary characters out of the way. -     */      for (p = cv->chrs, i = cv->nchrs; i > 0; p++, i--) {  	ch = *p; -	if (!ISCELEADER(v, ch)) { -	    newarc(v->nfa, PLAIN, subcolor(v->cm, ch), lp, rp); -	} else { -	    assert(singleton(v->cm, ch)); -	    assert(leads != NULL); -	    if (!haschr(leads, ch)) { -		addchr(leads, ch); -	    } -	} +	newarc(v->nfa, PLAIN, subcolor(v->cm, ch), lp, rp);      } -    /* -     * And the ranges. -     */ -      for (p = cv->ranges, i = cv->nranges; i > 0; p += 2, i--) {  	from = *p;  	to = *(p+1); -	while (from <= to && (ce = nextleader(v, from, to)) != NOCELT) { -	    if (from < ce) { -		subrange(v, from, ce - 1, lp, rp); -	    } -	    assert(singleton(v->cm, ce)); -	    assert(leads != NULL); -	    if (!haschr(leads, ce)) { -		addchr(leads, ce); -	    } -	    from = ce + 1; -	}  	if (from <= to) {  	    subrange(v, from, to, lp, rp);  	}      } -    if ((leads == NULL || leads->nchrs == 0) && cv->nmcces == 0) { -	return; -    } - -    /* -     * Deal with the MCCE leaders. -     */ - -    NOTE(REG_ULOCALE); -    for (p = leads->chrs, i = leads->nchrs; i > 0; p++, i--) { -	co = GETCOLOR(v->cm, *p); -	a = findarc(lp, PLAIN, co); -	if (a != NULL) { -	    s = a->to; -	} else { -	    s = newstate(v->nfa); -	    NOERR(); -	    newarc(v->nfa, PLAIN, co, lp, s); -	    NOERR(); -	} -	pa = findarc(v->mccepbegin, PLAIN, co); -	assert(pa != NULL); -	ps = pa->to; -	newarc(v->nfa, '$', 1, s, rp); -	newarc(v->nfa, '$', 0, s, rp); -	colorcomplement(v->nfa, v->cm, AHEAD, ps, s, rp); -	NOERR(); -    } - -    /* -     * And the MCCEs. -     */ - -    for (i = 0; i < cv->nmcces; i++) { -	p = cv->mcces[i]; -	assert(singleton(v->cm, *p)); -	if (!singleton(v->cm, *p)) { -	    ERR(REG_ASSERT); -	    return; -	} -	ch = *p++; -	co = GETCOLOR(v->cm, ch); -	a = findarc(lp, PLAIN, co); -	if (a != NULL) { -	    s = a->to; -	} else { -	    s = newstate(v->nfa); -	    NOERR(); -	    newarc(v->nfa, PLAIN, co, lp, s); -	    NOERR(); -	} -	assert(*p != 0);	/* at least two chars */ -	assert(singleton(v->cm, *p)); -	ch = *p++; -	co = GETCOLOR(v->cm, ch); -	assert(*p == 0);	/* and only two, for now */ -	newarc(v->nfa, PLAIN, co, s, rp); -	NOERR(); -    } -} - -/* - - nextleader - find next MCCE leader within range - ^ static celt nextleader(struct vars *, pchr, pchr); - */ -static celt			/* NOCELT means none */ -nextleader( -    struct vars *v, -    pchr from, -    pchr to) -{ -    int i; -    chr *p; -    chr ch; -    celt it = NOCELT; - -    if (v->mcces == NULL) { -	return it; -    } - -    for (i = v->mcces->nchrs, p = v->mcces->chrs; i > 0; i--, p++) { -	ch = *p; -	if (from <= ch && ch <= to) { -	    if (it == NOCELT || ch < it) { -		it = ch; -	    } -	} -    } -    return it;  }  /* @@ -1925,14 +1687,13 @@ nextleader(   * not be called from any unusual lexical context. This should be reconciled   * with the \w etc. handling in lex.c, and should be cleaned up to reduce   * dependencies on input scanning. - ^ static VOID wordchrs(struct vars *); + ^ static void wordchrs(struct vars *);   */  static void  wordchrs(      struct vars *v)  { -    struct state *left; -    struct state *right; +    struct state *left, *right;      if (v->wordchrs != NULL) {  	NEXT();		/* for consistency */ @@ -1970,13 +1731,12 @@ subre(      struct state *begin,      struct state *end)  { -    struct subre *ret; +    struct subre *ret = v->treefree; -    ret = v->treefree;      if (ret != NULL) {  	v->treefree = ret->left;      } else { -	ret = (struct subre *)MALLOC(sizeof(struct subre)); +	ret = (struct subre *) MALLOC(sizeof(struct subre));  	if (ret == NULL) {  	    ERR(REG_ESPACE);  	    return NULL; @@ -1985,11 +1745,11 @@ subre(  	v->treechain = ret;      } -    assert(strchr("|.b(=", op) != NULL); +    assert(strchr("=b|.*(", op) != NULL);      ret->op = op;      ret->flags = flags; -    ret->retry = 0; +    ret->id = 0;		/* will be assigned later */      ret->subno = 0;      ret->min = ret->max = 1;      ret->left = NULL; @@ -2003,7 +1763,7 @@ subre(  /*   - freesubre - free a subRE subtree - ^ static VOID freesubre(struct vars *, struct subre *); + ^ static void freesubre(struct vars *, struct subre *);   */  static void  freesubre( @@ -2026,7 +1786,7 @@ freesubre(  /*   - freesrnode - free one node in a subRE subtree - ^ static VOID freesrnode(struct vars *, struct subre *); + ^ static void freesrnode(struct vars *, struct subre *);   */  static void  freesrnode( @@ -2042,7 +1802,8 @@ freesrnode(      }      sr->flags = 0; -    if (v != NULL) { +    if (v != NULL && v->treechain != NULL) { +	/* we're still parsing, maybe we can reuse the subre */  	sr->left = v->treefree;  	v->treefree = sr;      } else { @@ -2052,31 +1813,25 @@ freesrnode(  /*   - optst - optimize a subRE subtree - ^ static VOID optst(struct vars *, struct subre *); + ^ static void optst(struct vars *, struct subre *);   */  static void  optst(      struct vars *v,      struct subre *t)  { -    if (t == NULL) { -	return; -    } -      /* -     * Recurse through children. +     * DGP (2007-11-13): I assume it was the programmer's intent to eventually +     * come back and add code to optimize subRE trees, but the routine coded +     * just spends effort traversing the tree and doing nothing. We can do +     * nothing with less effort.       */ -    if (t->left != NULL) { -	optst(v, t->left); -    } -    if (t->right != NULL) { -	optst(v, t->right); -    } +    return;  }  /* - - numst - number tree nodes (assigning retry indexes) + - numst - number tree nodes (assigning "id" indexes)   ^ static int numst(struct subre *, int);   */  static int			/* next number */ @@ -2089,7 +1844,7 @@ numst(      assert(t != NULL);      i = start; -    t->retry = (short)i++; +    t->id = (short) i++;      if (t->left != NULL) {  	i = numst(t->left, i);      } @@ -2101,7 +1856,20 @@ numst(  /*   - markst - mark tree nodes as INUSE - ^ static VOID markst(struct subre *); + * Note: this is a great deal more subtle than it looks.  During initial + * parsing of a regex, all subres are linked into the treechain list; + * discarded ones are also linked into the treefree list for possible reuse. + * After we are done creating all subres required for a regex, we run markst() + * then cleanst(), which results in discarding all subres not reachable from + * v->tree.  We then clear v->treechain, indicating that subres must be found + * by descending from v->tree.  This changes the behavior of freesubre(): it + * will henceforth FREE() unwanted subres rather than sticking them into the + * treefree list.  (Doing that any earlier would result in dangling links in + * the treechain list.)  This all means that freev() will clean up correctly + * if invoked before or after markst()+cleanst(); but it would not work if + * called partway through this state conversion, so we mustn't error out + * in or between these two functions. + ^ static void markst(struct subre *);   */  static void  markst( @@ -2120,7 +1888,7 @@ markst(  /*   - cleanst - free any tree nodes not marked INUSE - ^ static VOID cleanst(struct vars *); + ^ static void cleanst(struct vars *);   */  static void  cleanst( @@ -2152,10 +1920,10 @@ nfatree(      assert(t != NULL && t->begin != NULL);      if (t->left != NULL) { -	(DISCARD)nfatree(v, t->left, f); +	(void) nfatree(v, t->left, f);      }      if (t->right != NULL) { -	(DISCARD)nfatree(v, t->right, f); +	(void) nfatree(v, t->right, f);      }      return nfanode(v, t, f); @@ -2208,21 +1976,25 @@ newlacon(      int pos)  {      int n; +    struct subre *newlacons;      struct subre *sub;      if (v->nlacons == 0) { -	v->lacons = (struct subre *)MALLOC(2 * sizeof(struct subre));  	n = 1;		/* skip 0th */ -	v->nlacons = 2; +	newlacons = (struct subre *) MALLOC(2 * sizeof(struct subre));      } else { -	v->lacons = (struct subre *)REALLOC(v->lacons, -		(v->nlacons+1)*sizeof(struct subre)); -	n = v->nlacons++; +	n = v->nlacons; +	newlacons = (struct subre *) REALLOC(v->lacons, +					     (n + 1) * sizeof(struct subre));      } -    if (v->lacons == NULL) { + +    if (newlacons == NULL) {  	ERR(REG_ESPACE);  	return 0;      } + +    v->lacons = newlacons; +    v->nlacons = n + 1;      sub = &v->lacons[n];      sub->begin = begin;      sub->end = end; @@ -2233,7 +2005,7 @@ newlacon(  /*   - freelacons - free lookahead-constraint subRE vector - ^ static VOID freelacons(struct subre *, int); + ^ static void freelacons(struct subre *, int);   */  static void  freelacons( @@ -2254,7 +2026,7 @@ freelacons(  /*   - rfree - free a whole RE (insides of regfree) - ^ static VOID rfree(regex_t *); + ^ static void rfree(regex_t *);   */  static void  rfree( @@ -2270,23 +2042,25 @@ rfree(      g = (struct guts *) re->re_guts;      re->re_guts = NULL;      re->re_fns = NULL; -    g->magic = 0; -    freecm(&g->cmap); -    if (g->tree != NULL) { -	freesubre(NULL, g->tree); -    } -    if (g->lacons != NULL) { -	freelacons(g->lacons, g->nlacons); -    } -    if (!NULLCNFA(g->search)) { -	freecnfa(&g->search); +    if (g != NULL) { +	g->magic = 0; +	freecm(&g->cmap); +	if (g->tree != NULL) { +	    freesubre(NULL, g->tree); +	} +	if (g->lacons != NULL) { +	    freelacons(g->lacons, g->nlacons); +	} +	if (!NULLCNFA(g->search)) { +	    freecnfa(&g->search); +	} +	FREE(g);      } -    FREE(g);  }  /*   - dump - dump an RE in human-readable form - ^ static VOID dump(regex_t *, FILE *); + ^ static void dump(regex_t *, FILE *);   */  static void  dump( @@ -2305,7 +2079,7 @@ dump(  	fprintf(f, "NULL guts!!!\n");  	return;      } -    g = (struct guts *)re->re_guts; +    g = (struct guts *) re->re_guts;      if (g->magic != GUTSMAGIC) {  	fprintf(f, "bad guts magic number (0x%x not 0x%x)\n",  		g->magic, GUTSMAGIC); @@ -2313,11 +2087,11 @@ dump(      fprintf(f, "\n\n\n========= DUMP ==========\n");      fprintf(f, "nsub %d, info 0%lo, csize %d, ntree %d\n", -	    re->re_nsub, re->re_info, re->re_csize, g->ntree); +	    (int) re->re_nsub, re->re_info, re->re_csize, g->ntree);      dumpcolors(&g->cmap, f);      if (!NULLCNFA(g->search)) { -	printf("\nsearch:\n"); +	fprintf(f, "\nsearch:\n");  	dumpcnfa(&g->search, f);      }      for (i = 1; i < g->nlacons; i++) { @@ -2332,7 +2106,7 @@ dump(  /*   - dumpst - dump a subRE tree - ^ static VOID dumpst(struct subre *, FILE *, int); + ^ static void dumpst(struct subre *, FILE *, int);   */  static void  dumpst( @@ -2350,7 +2124,7 @@ dumpst(  /*   - stdump - recursive guts of dumpst - ^ static VOID stdump(struct subre *, FILE *, int); + ^ static void stdump(struct subre *, FILE *, int);   */  static void  stdump( @@ -2384,7 +2158,7 @@ stdump(      }      if (t->min != 1 || t->max != 1) {  	fprintf(f, " {%d,", t->min); -	if (t->max != INFINITY) { +	if (t->max != DUPINF) {  	    fprintf(f, "%d", t->max);  	}  	fprintf(f, "}"); @@ -2401,8 +2175,8 @@ stdump(      if (!NULLCNFA(t->cnfa)) {  	fprintf(f, "\n");  	dumpcnfa(&t->cnfa, f); -	fprintf(f, "\n");      } +    fprintf(f, "\n");      if (t->left != NULL) {  	stdump(t->left, f, nfapresent);      } @@ -2413,23 +2187,23 @@ stdump(  /*   - stid - identify a subtree node for dumping - ^ static char *stid(struct subre *, char *, size_t); + ^ static const char *stid(struct subre *, char *, size_t);   */ -static char *			/* points to buf or constant string */ +static const char *			/* points to buf or constant string */  stid(      struct subre *t,      char *buf,      size_t bufsize)  {      /* -     * Big enough for hex int or decimal t->retry? +     * Big enough for hex int or decimal t->id?       */ -    if (bufsize < sizeof(void*)*2 + 3 || bufsize < sizeof(t->retry)*3 + 1) { +    if (bufsize < sizeof(void*)*2 + 3 || bufsize < sizeof(t->id)*3 + 1) {  	return "unable";      } -    if (t->retry != 0) { -	sprintf(buf, "%d", t->retry); +    if (t->id != 0) { +	sprintf(buf, "%d", t->id);      } else {  	sprintf(buf, "%p", t);      } | 
