Fixes for regexp issues raised in [Bug 578363].

Lots of thanks to pvgoran@users.sf.net for tracking them down! Also made the RE files touched meet the Tcl Engineering Guidelines a bit better (they've a long way to go, but this is a start...)
author: dkf <donal.k.fellows@manchester.ac.uk> 2002-07-29 10:56:08 (GMT)
committer: dkf <donal.k.fellows@manchester.ac.uk> 2002-07-29 10:56:08 (GMT)
commit: 392e6300e2782ea71b0e7e57f97e7b74ced0530b (patch)
tree: e6c38e79aa2217b6f74c03b96dea9f14f3e29f97
parent: 8aac5314070c34799ffa1a70feb28b35584bc49a (diff)
download: tcl-392e6300e2782ea71b0e7e57f97e7b74ced0530b.zip
tcl-392e6300e2782ea71b0e7e57f97e7b74ced0530b.tar.gz
tcl-392e6300e2782ea71b0e7e57f97e7b74ced0530b.tar.bz2
4 files changed, 485 insertions, 443 deletions
diff --git a/ChangeLog b/ChangeLog
index fbbc6bd..96a4453 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,13 @@
+2002-07-29  Donal K. Fellows  <fellowsd@cs.man.ac.uk>
+
+	* generic/regc_locale.c (cclass): [[:xdigit:]] is only a defined
+	concept on western characters, so should not allow any unicode
+	digit, and hence number of ranges in [[:xdigit:]] is fixed.
+	* tests/reg.test: Added test to detect the bug.
+	* generic/regc_cvec.c (newcvec): Corrected initial size value in
+	character vector structure.  [Bug 578363]  Many thanks to
+	pvgoran@users.sf.net for tracking this down.
+
 2002-07-28  Miguel Sofer  <msofer@users.sourceforge.net>
 
 	* generic/tcl.h: 
diff --git a/generic/regc_cvec.c b/generic/regc_cvec.c
index 86765ea..d2d56fc 100644
--- a/generic/regc_cvec.c
+++ b/generic/regc_cvec.c
@@ -36,26 +36,27 @@
  */
 static struct cvec *
 newcvec(nchrs, nranges, nmcces)
-int nchrs;			/* to hold this many chrs... */
-int nranges;			/* ... and this many ranges... */
-int nmcces;			/* ... and this many MCCEs */
+    int nchrs;				/* to hold this many chrs... */
+    int nranges;			/* ... and this many ranges... */
+    int nmcces;				/* ... and this many MCCEs */
 {
-	size_t n;
-	size_t nc;
-	struct cvec *cv;
+    size_t n;
+    size_t nc;
+    struct cvec *cv;
 
-	nc = (size_t)nchrs + (size_t)nmcces*(MAXMCCE+1) + (size_t)nranges*2;
-	n = sizeof(struct cvec) + (size_t)(nmcces-1)*sizeof(chr *) +
-								nc*sizeof(chr);
-	cv = (struct cvec *)MALLOC(n);
-	if (cv == NULL)
-		return NULL;
-	cv->chrspace = nc;
-	cv->chrs = (chr *)&cv->mcces[nmcces];	/* chrs just after MCCE ptrs */
-	cv->mccespace = nmcces;
-	cv->ranges = cv->chrs + nchrs + nmcces*(MAXMCCE+1);
-	cv->rangespace = nranges;
-	return clearcvec(cv);
+    nc = (size_t)nchrs + (size_t)nmcces*(MAXMCCE+1) + (size_t)nranges*2;
+    n = sizeof(struct cvec) + (size_t)(nmcces-1)*sizeof(chr *)
+	    + nc*sizeof(chr);
+    cv = (struct cvec *)MALLOC(n);
+    if (cv == NULL) {
+	return NULL;
+    }
+    cv->chrspace = nchrs;
+    cv->chrs = (chr *)&cv->mcces[nmcces];	/* chrs just after MCCE ptrs */
+    cv->mccespace = nmcces;
+    cv->ranges = cv->chrs + nchrs + nmcces*(MAXMCCE+1);
+    cv->rangespace = nranges;
+    return clearcvec(cv);
 }
 
 /*
@@ -65,20 +66,21 @@ int nmcces;			/* ... and this many MCCEs */
  */
 static struct cvec *
 clearcvec(cv)
-struct cvec *cv;
+    struct cvec *cv;			/* character vector */
 {
-	int i;
+    int i;
 
-	assert(cv != NULL);
-	cv->nchrs = 0;
-	assert(cv->chrs == (chr *)&cv->mcces[cv->mccespace]);
-	cv->nmcces = 0;
-	cv->nmccechrs = 0;
-	cv->nranges = 0;
-	for (i = 0; i < cv->mccespace; i++)
-		cv->mcces[i] = NULL;
+    assert(cv != NULL);
+    cv->nchrs = 0;
+    assert(cv->chrs == (chr *)&cv->mcces[cv->mccespace]);
+    cv->nmcces = 0;
+    cv->nmccechrs = 0;
+    cv->nranges = 0;
+    for (i = 0; i < cv->mccespace; i++) {
+	cv->mcces[i] = NULL;
+    }
 
-	return cv;
+    return cv;
 }
 
 /*
@@ -87,11 +89,11 @@ struct cvec *cv;
  */
 static VOID
 addchr(cv, c)
-struct cvec *cv;
-pchr c;
+    struct cvec *cv;			/* character vector */
+    pchr c;				/* character to add */
 {
-	assert(cv->nchrs < cv->chrspace - cv->nmccechrs);
-	cv->chrs[cv->nchrs++] = (chr)c;
+    assert(cv->nchrs < cv->chrspace - cv->nmccechrs);
+    cv->chrs[cv->nchrs++] = (chr)c;
 }
 
 /*
@@ -100,14 +102,14 @@ pchr c;
  */
 static VOID
 addrange(cv, from, to)
-struct cvec *cv;
-pchr from;
-pchr to;
+    struct cvec *cv;			/* character vector */
+    pchr from;				/* first character of range */
+    pchr to;				/* last character of range */
 {
-	assert(cv->nranges < cv->rangespace);
-	cv->ranges[cv->nranges*2] = (chr)from;
-	cv->ranges[cv->nranges*2 + 1] = (chr)to;
-	cv->nranges++;
+    assert(cv->nranges < cv->rangespace);
+    cv->ranges[cv->nranges*2] = (chr)from;
+    cv->ranges[cv->nranges*2 + 1] = (chr)to;
+    cv->nranges++;
 }
 
 /*
@@ -116,49 +118,55 @@ pchr to;
  */
 static VOID
 addmcce(cv, startp, endp)
-struct cvec *cv;
-chr *startp;			/* beginning of text */
-chr *endp;			/* just past end of text */
+    struct cvec *cv;			/* character vector */
+    chr *startp;			/* beginning of text */
+    chr *endp;				/* just past end of text */
 {
-	int len;
-	int i;
-	chr *s;
-	chr *d;
+    int len;
+    int i;
+    chr *s;
+    chr *d;
 
-	if (startp == NULL && endp == NULL)
-		return;
-	len = endp - startp;
-	assert(len > 0);
-	assert(cv->nchrs + len < cv->chrspace - cv->nmccechrs);
-	assert(cv->nmcces < cv->mccespace);
-	d = &cv->chrs[cv->chrspace - cv->nmccechrs - len - 1];
-	cv->mcces[cv->nmcces++] = d;
-	for (s = startp, i = len; i > 0; s++, i--)
-		*d++ = *s;
-	*d++ = 0;		/* endmarker */
-	assert(d == &cv->chrs[cv->chrspace - cv->nmccechrs]);
-	cv->nmccechrs += len + 1;
+    if (startp == NULL && endp == NULL) {
+	return;
+    }
+    len = endp - startp;
+    assert(len > 0);
+    assert(cv->nchrs + len < cv->chrspace - cv->nmccechrs);
+    assert(cv->nmcces < cv->mccespace);
+    d = &cv->chrs[cv->chrspace - cv->nmccechrs - len - 1];
+    cv->mcces[cv->nmcces++] = d;
+    for (s = startp, i = len; i > 0; s++, i--) {
+	*d++ = *s;
+    }
+    *d++ = 0;				/* endmarker */
+    assert(d == &cv->chrs[cv->chrspace - cv->nmccechrs]);
+    cv->nmccechrs += len + 1;
 }
 
 /*
  - haschr - does a cvec contain this chr?
  ^ static int haschr(struct cvec *, pchr);
  */
-static int			/* predicate */
+static int				/* predicate */
 haschr(cv, c)
-struct cvec *cv;
-pchr c;
+    struct cvec *cv;			/* character vector */
+    pchr c;				/* character to test for */
 {
-	int i;
-	chr *p;
+    int i;
+    chr *p;
 
-	for (p = cv->chrs, i = cv->nchrs; i > 0; p++, i--)
-		if (*p == c)
-			return 1;
-	for (p = cv->ranges, i = cv->nranges; i > 0; p += 2, i--)
-		if (*p <= c && c <= *(p+1))
-			return 1;
-	return 0;
+    for (p = cv->chrs, i = cv->nchrs; i > 0; p++, i--) {
+	if (*p == c) {
+	    return 1;
+	}
+    }
+    for (p = cv->ranges, i = cv->nranges; i > 0; p += 2, i--) {
+	if ((*p <= c) && (c <= *(p+1))) {
+	    return 1;
+	}
+    }
+    return 0;
 }
 
 /*
@@ -167,23 +175,25 @@ pchr c;
  */
 static struct cvec *
 getcvec(v, nchrs, nranges, nmcces)
-struct vars *v;
-int nchrs;			/* to hold this many chrs... */
-int nranges;			/* ... and this many ranges... */
-int nmcces;			/* ... and this many MCCEs */
+    struct vars *v;			/* context */
+    int nchrs;				/* to hold this many chrs... */
+    int nranges;			/* ... and this many ranges... */
+    int nmcces;				/* ... and this many MCCEs */
 {
-	if (v->cv != NULL && nchrs <= v->cv->chrspace &&
-					nranges <= v->cv->rangespace &&
-					nmcces <= v->cv->mccespace)
-		return clearcvec(v->cv);
+    if (v->cv != NULL && nchrs <= v->cv->chrspace &&
+	    nranges <= v->cv->rangespace && nmcces <= v->cv->mccespace) {
+	return clearcvec(v->cv);
+    }
 
-	if (v->cv != NULL)
-		freecvec(v->cv);
-	v->cv = newcvec(nchrs, nranges, nmcces);
-	if (v->cv == NULL)
-		ERR(REG_ESPACE);
+    if (v->cv != NULL) {
+	freecvec(v->cv);
+    }
+    v->cv = newcvec(nchrs, nranges, nmcces);
+    if (v->cv == NULL) {
+	ERR(REG_ESPACE);
+    }
 
-	return v->cv;
+    return v->cv;
 }
 
 /*
@@ -192,7 +202,7 @@ int nmcces;			/* ... and this many MCCEs */
  */
 static VOID
 freecvec(cv)
-struct cvec *cv;
+    struct cvec *cv;			/* character vector */
 {
-	FREE(cv);
+    FREE(cv);
 }
diff --git a/generic/regc_locale.c b/generic/regc_locale.c
index bc5ef3d..3374007 100644
--- a/generic/regc_locale.c
+++ b/generic/regc_locale.c
@@ -9,111 +9,111 @@
  * See the file "license.terms" for information on usage and redistribution
  * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
  *
- * RCS: @(#) $Id: regc_locale.c,v 1.8 2002/02/07 00:48:03 hobbs Exp $
+ * RCS: @(#) $Id: regc_locale.c,v 1.9 2002/07/29 10:56:08 dkf Exp $
  */
 
 /* ASCII character-name table */
 
 static struct cname {
-	char *name;
-	char code;
+    char *name;
+    char code;
 } cnames[] = {
-	{"NUL",	'\0'},
-	{"SOH",	'\001'},
-	{"STX",	'\002'},
-	{"ETX",	'\003'},
-	{"EOT",	'\004'},
-	{"ENQ",	'\005'},
-	{"ACK",	'\006'},
-	{"BEL",	'\007'},
-	{"alert",	'\007'},
-	{"BS",		'\010'},
-	{"backspace",	'\b'},
-	{"HT",		'\011'},
-	{"tab",		'\t'},
-	{"LF",		'\012'},
-	{"newline",	'\n'},
-	{"VT",		'\013'},
-	{"vertical-tab",	'\v'},
-	{"FF",		'\014'},
-	{"form-feed",	'\f'},
-	{"CR",		'\015'},
-	{"carriage-return",	'\r'},
-	{"SO",	'\016'},
-	{"SI",	'\017'},
-	{"DLE",	'\020'},
-	{"DC1",	'\021'},
-	{"DC2",	'\022'},
-	{"DC3",	'\023'},
-	{"DC4",	'\024'},
-	{"NAK",	'\025'},
-	{"SYN",	'\026'},
-	{"ETB",	'\027'},
-	{"CAN",	'\030'},
-	{"EM",	'\031'},
-	{"SUB",	'\032'},
-	{"ESC",	'\033'},
-	{"IS4",	'\034'},
-	{"FS",	'\034'},
-	{"IS3",	'\035'},
-	{"GS",	'\035'},
-	{"IS2",	'\036'},
-	{"RS",	'\036'},
-	{"IS1",	'\037'},
-	{"US",	'\037'},
-	{"space",		' '},
-	{"exclamation-mark",	'!'},
-	{"quotation-mark",	'"'},
-	{"number-sign",		'#'},
-	{"dollar-sign",		'$'},
-	{"percent-sign",		'%'},
-	{"ampersand",		'&'},
-	{"apostrophe",		'\''},
-	{"left-parenthesis",	'('},
-	{"right-parenthesis",	')'},
-	{"asterisk",	'*'},
-	{"plus-sign",	'+'},
-	{"comma",	','},
-	{"hyphen",	'-'},
-	{"hyphen-minus",	'-'},
-	{"period",	'.'},
-	{"full-stop",	'.'},
-	{"slash",	'/'},
-	{"solidus",	'/'},
-	{"zero",		'0'},
-	{"one",		'1'},
-	{"two",		'2'},
-	{"three",	'3'},
-	{"four",		'4'},
-	{"five",		'5'},
-	{"six",		'6'},
-	{"seven",	'7'},
-	{"eight",	'8'},
-	{"nine",		'9'},
-	{"colon",	':'},
-	{"semicolon",	';'},
-	{"less-than-sign",	'<'},
-	{"equals-sign",		'='},
-	{"greater-than-sign",	'>'},
-	{"question-mark",	'?'},
-	{"commercial-at",	'@'},
-	{"left-square-bracket",	'['},
-	{"backslash",		'\\'},
-	{"reverse-solidus",	'\\'},
-	{"right-square-bracket",	']'},
-	{"circumflex",		'^'},
-	{"circumflex-accent",	'^'},
-	{"underscore",		'_'},
-	{"low-line",		'_'},
-	{"grave-accent",		'`'},
-	{"left-brace",		'{'},
-	{"left-curly-bracket",	'{'},
-	{"vertical-line",	'|'},
-	{"right-brace",		'}'},
-	{"right-curly-bracket",	'}'},
-	{"tilde",		'~'},
-	{"DEL",	'\177'},
-	{NULL,	0}
+    {"NUL",		'\0'},
+    {"SOH",		'\001'},
+    {"STX",		'\002'},
+    {"ETX",		'\003'},
+    {"EOT",		'\004'},
+    {"ENQ",		'\005'},
+    {"ACK",		'\006'},
+    {"BEL",		'\007'},
+    {"alert",		'\007'},
+    {"BS",		'\010'},
+    {"backspace",	'\b'},
+    {"HT",		'\011'},
+    {"tab",		'\t'},
+    {"LF",		'\012'},
+    {"newline",		'\n'},
+    {"VT",		'\013'},
+    {"vertical-tab",	'\v'},
+    {"FF",		'\014'},
+    {"form-feed",	'\f'},
+    {"CR",		'\015'},
+    {"carriage-return",	'\r'},
+    {"SO",		'\016'},
+    {"SI",		'\017'},
+    {"DLE",		'\020'},
+    {"DC1",		'\021'},
+    {"DC2",		'\022'},
+    {"DC3",		'\023'},
+    {"DC4",		'\024'},
+    {"NAK",		'\025'},
+    {"SYN",		'\026'},
+    {"ETB",		'\027'},
+    {"CAN",		'\030'},
+    {"EM",		'\031'},
+    {"SUB",		'\032'},
+    {"ESC",		'\033'},
+    {"IS4",		'\034'},
+    {"FS",		'\034'},
+    {"IS3",		'\035'},
+    {"GS",		'\035'},
+    {"IS2",		'\036'},
+    {"RS",		'\036'},
+    {"IS1",		'\037'},
+    {"US",		'\037'},
+    {"space",		' '},
+    {"exclamation-mark",'!'},
+    {"quotation-mark",	'"'},
+    {"number-sign",	'#'},
+    {"dollar-sign",	'$'},
+    {"percent-sign",	'%'},
+    {"ampersand",	'&'},
+    {"apostrophe",	'\''},
+    {"left-parenthesis",'('},
+    {"right-parenthesis", ')'},
+    {"asterisk",	'*'},
+    {"plus-sign",	'+'},
+    {"comma",		','},
+    {"hyphen",		'-'},
+    {"hyphen-minus",	'-'},
+    {"period",		'.'},
+    {"full-stop",	'.'},
+    {"slash",		'/'},
+    {"solidus",		'/'},
+    {"zero",		'0'},
+    {"one",		'1'},
+    {"two",		'2'},
+    {"three",		'3'},
+    {"four",		'4'},
+    {"five",		'5'},
+    {"six",		'6'},
+    {"seven",		'7'},
+    {"eight",		'8'},
+    {"nine",		'9'},
+    {"colon",		':'},
+    {"semicolon",	';'},
+    {"less-than-sign",	'<'},
+    {"equals-sign",	'='},
+    {"greater-than-sign", '>'},
+    {"question-mark",	'?'},
+    {"commercial-at",	'@'},
+    {"left-square-bracket", '['},
+    {"backslash",	'\\'},
+    {"reverse-solidus",	'\\'},
+    {"right-square-bracket", ']'},
+    {"circumflex",	'^'},
+    {"circumflex-accent", '^'},
+    {"underscore",	'_'},
+    {"low-line",	'_'},
+    {"grave-accent",	'`'},
+    {"left-brace",	'{'},
+    {"left-curly-bracket", '{'},
+    {"vertical-line",	'|'},
+    {"right-brace",	'}'},
+    {"right-curly-bracket", '}'},
+    {"tilde",		'~'},
+    {"DEL",		'\177'},
+    {NULL,		0}
 };
 
 /* Unicode character-class tables */
@@ -526,9 +526,9 @@ static chr graphCharTable[] = {
  */
 static int
 nmcces(v)
-struct vars *v;
+    struct vars *v;
 {
-	return 0;
+    return 0;
 }
 
 /*
@@ -537,9 +537,9 @@ struct vars *v;
  */
 static int
 nleaders(v)
-struct vars *v;
+    struct vars *v;
 {
-	return 0;
+    return 0;
 }
 
 /*
@@ -548,10 +548,10 @@ struct vars *v;
  */
 static struct cvec *
 allmcces(v, cv)
-struct vars *v;
-struct cvec *cv;		/* this is supposed to have enough room */
+    struct vars *v;
+    struct cvec *cv;		/* this is supposed to have enough room */
 {
-	return clearcvec(cv);
+    return clearcvec(cv);
 }
 
 /*
@@ -560,36 +560,40 @@ struct cvec *cv;		/* this is supposed to have enough room */
  */
 static celt
 element(v, startp, endp)
-struct vars *v;
-chr *startp;			/* points to start of name */
-chr *endp;			/* points just past end of name */
+    struct vars *v;
+    chr *startp;		/* points to start of name */
+    chr *endp;			/* points just past end of name */
 {
-	struct cname *cn;
-	size_t len;
-	Tcl_DString ds;
-	CONST char *np;
-
-	/* generic:  one-chr names stand for themselves */
-	assert(startp < endp);
-	len = endp - startp;
-	if (len == 1)
-		return *startp;
-
-	NOTE(REG_ULOCALE);
-
-	/* search table */
-	Tcl_DStringInit(&ds);
-	np = Tcl_UniCharToUtfDString(startp, (int)len, &ds);
-	for (cn = cnames; cn->name != NULL; cn++)
-		if (strlen(cn->name) == len && strncmp(cn->name, np, len) == 0)
-			break;		/* NOTE BREAK OUT */
-	Tcl_DStringFree(&ds);
-	if (cn->name != NULL)
-		return CHR(cn->code);
-
-	/* couldn't find it */
-	ERR(REG_ECOLLATE);
-	return 0;
+    struct cname *cn;
+    size_t len;
+    Tcl_DString ds;
+    CONST char *np;
+
+    /* generic:  one-chr names stand for themselves */
+    assert(startp < endp);
+    len = endp - startp;
+    if (len == 1) {
+	return *startp;
+    }
+
+    NOTE(REG_ULOCALE);
+
+    /* search table */
+    Tcl_DStringInit(&ds);
+    np = Tcl_UniCharToUtfDString(startp, (int)len, &ds);
+    for (cn=cnames; cn->name!=NULL; cn++) {
+	if (strlen(cn->name)==len && strncmp(cn->name, np, len)==0) {
+	    break;			/* NOTE BREAK OUT */
+	}
+    }
+    Tcl_DStringFree(&ds);
+    if (cn->name != NULL) {
+	return CHR(cn->code);
+    }
+
+    /* couldn't find it */
+    ERR(REG_ECOLLATE);
+    return 0;
 }
 
 /*
@@ -598,56 +602,56 @@ chr *endp;			/* points just past end of name */
  */
 static struct cvec *
 range(v, a, b, cases)
-struct vars *v;
-celt a;
-celt b;				/* might equal a */
-int cases;			/* case-independent? */
+    struct vars *v;
+    celt a;
+    celt b;				/* might equal a */
+    int cases;				/* case-independent? */
 {
-	int nchrs;
-	struct cvec *cv;
-	celt c, lc, uc, tc;
+    int nchrs;
+    struct cvec *cv;
+    celt c, lc, uc, tc;
 
-	if (a != b && !before(a, b)) {
-		ERR(REG_ERANGE);
-		return NULL;
-	}
+    if (a != b && !before(a, b)) {
+	ERR(REG_ERANGE);
+	return NULL;
+    }
 
-	if (!cases) {		/* easy version */
-		cv = getcvec(v, 0, 1, 0);
-		NOERRN();
-		addrange(cv, a, b);
-		return cv;
-	}
+    if (!cases) {			/* easy version */
+	cv = getcvec(v, 0, 1, 0);
+	NOERRN();
+	addrange(cv, a, b);
+	return cv;
+    }
 
-	/*
-	 * When case-independent, it's hard to decide when cvec ranges are
-	 * usable, so for now at least, we won't try.  We allocate enough
-	 * space for two case variants plus a little extra for the two
-	 * title case variants.
-	 */
+    /*
+     * When case-independent, it's hard to decide when cvec ranges are
+     * usable, so for now at least, we won't try.  We allocate enough
+     * space for two case variants plus a little extra for the two
+     * title case variants.
+     */
 
-	nchrs = (b - a + 1)*2 + 4;
+    nchrs = (b - a + 1)*2 + 4;
 
-	cv = getcvec(v, nchrs, 0, 0);
-	NOERRN();
+    cv = getcvec(v, nchrs, 0, 0);
+    NOERRN();
 
-	for (c = a; c <= b; c++) {
-		addchr(cv, c);
-		lc = Tcl_UniCharToLower((chr)c);
-		uc = Tcl_UniCharToUpper((chr)c);
-		tc = Tcl_UniCharToTitle((chr)c);
-		if (c != lc) {
-			addchr(cv, lc);
-		}
-		if (c != uc) {
-			addchr(cv, uc);
-		}
-		if (c != tc && tc != uc) {
-			addchr(cv, tc);
-		}
+    for (c=a; c<=b; c++) {
+	addchr(cv, c);
+	lc = Tcl_UniCharToLower((chr)c);
+	uc = Tcl_UniCharToUpper((chr)c);
+	tc = Tcl_UniCharToTitle((chr)c);
+	if (c != lc) {
+	    addchr(cv, lc);
 	}
+	if (c != uc) {
+	    addchr(cv, uc);
+	}
+	if (c != tc && tc != uc) {
+	    addchr(cv, tc);
+	}
+    }
 
-	return cv;
+    return cv;
 }
 
 /*
@@ -656,13 +660,14 @@ int cases;			/* case-independent? */
  */
 static int			/* predicate */
 before(x, y)
-celt x;
-celt y;
+    celt x;
+    celt y;
 {
-	/* trivial because no MCCEs */
-	if (x < y)
-		return 1;
-	return 0;
+    /* trivial because no MCCEs */
+    if (x < y) {
+	return 1;
+    }
+    return 0;
 }
 
 /*
@@ -672,31 +677,32 @@ celt y;
  */
 static struct cvec *
 eclass(v, c, cases)
-struct vars *v;
-celt c;
-int cases;			/* all cases? */
+    struct vars *v;
+    celt c;
+    int cases;			/* all cases? */
 {
-	struct cvec *cv;
-
-	/* crude fake equivalence class for testing */
-	if ((v->cflags&REG_FAKE) && c == 'x') {
-		cv = getcvec(v, 4, 0, 0);
-		addchr(cv, (chr)'x');
-		addchr(cv, (chr)'y');
-		if (cases) {
-			addchr(cv, (chr)'X');
-			addchr(cv, (chr)'Y');
-		}
-		return cv;
+    struct cvec *cv;
+
+    /* crude fake equivalence class for testing */
+    if ((v->cflags&REG_FAKE) && c == 'x') {
+	cv = getcvec(v, 4, 0, 0);
+	addchr(cv, (chr)'x');
+	addchr(cv, (chr)'y');
+	if (cases) {
+	    addchr(cv, (chr)'X');
+	    addchr(cv, (chr)'Y');
 	}
-
-	/* otherwise, none */
-	if (cases)
-		return allcases(v, c);
-	cv = getcvec(v, 1, 0, 0);
-	assert(cv != NULL);
-	addchr(cv, (chr)c);
 	return cv;
+    }
+
+    /* otherwise, none */
+    if (cases) {
+	return allcases(v, c);
+    }
+    cv = getcvec(v, 1, 0, 0);
+    assert(cv != NULL);
+    addchr(cv, (chr)c);
+    return cv;
 }
 
 /*
@@ -706,10 +712,10 @@ int cases;			/* all cases? */
  */
 static struct cvec *
 cclass(v, startp, endp, cases)
-struct vars *v;
-chr *startp;			/* where the name starts */
-chr *endp;			/* just past the end of the name */
-int cases;			/* case-independent? */
+    struct vars *v;
+    chr *startp;			/* where the name starts */
+    chr *endp;				/* just past the end of the name */
+    int cases;				/* case-independent? */
 {
     size_t len;
     struct cvec *cv = NULL;
@@ -755,7 +761,7 @@ int cases;			/* case-independent? */
      */
 
     index = -1;
-    for (namePtr = classNames, i = 0; *namePtr != NULL; namePtr++, i++) {
+    for (namePtr=classNames,i=0 ; *namePtr!=NULL ; namePtr++,i++) {
 	if ((strlen(*namePtr) == len) && (strncmp(*namePtr, np, len) == 0)) {
 	    index = i;
 	    break;
@@ -772,129 +778,137 @@ int cases;			/* case-independent? */
      */
 
     switch((enum classes) index) {
-	case CC_PRINT:
-	case CC_ALNUM:
-	    cv = getcvec(v, NUM_ALPHA_CHAR,
-		    NUM_DIGIT_RANGE + NUM_ALPHA_RANGE, 0);
-	    if (cv) {
-		for (i = 0; i < NUM_ALPHA_CHAR; i++) {
-		    addchr(cv, alphaCharTable[i]);
-		}
-		for (i = 0; i < NUM_ALPHA_RANGE; i++) {
-		    addrange(cv, alphaRangeTable[i].start,
-			     alphaRangeTable[i].end);
-		}
-		for (i = 0; i < NUM_DIGIT_RANGE; i++) {
-		    addrange(cv, digitRangeTable[i].start,
-			    digitRangeTable[i].end);
-		}
+    case CC_PRINT:
+    case CC_ALNUM:
+	cv = getcvec(v, NUM_ALPHA_CHAR, NUM_DIGIT_RANGE + NUM_ALPHA_RANGE, 0);
+	if (cv) {
+	    for (i=0 ; i<NUM_ALPHA_CHAR ; i++) {
+		addchr(cv, alphaCharTable[i]);
 	    }
-	    break;
-	case CC_ALPHA:
-	    cv = getcvec(v, NUM_ALPHA_CHAR, NUM_ALPHA_RANGE, 0);
-	    if (cv) {
-		for (i = 0; i < NUM_ALPHA_RANGE; i++) {
-		    addrange(cv, alphaRangeTable[i].start,
-			     alphaRangeTable[i].end);
-		}
-		for (i = 0; i < NUM_ALPHA_CHAR; i++) {
-		    addchr(cv, alphaCharTable[i]);
-		}
+	    for (i=0 ; i<NUM_ALPHA_RANGE ; i++) {
+		addrange(cv, alphaRangeTable[i].start,
+			alphaRangeTable[i].end);
 	    }
-	    break;
-	case CC_ASCII:
-	    cv = getcvec(v, 0, 1, 0);
-	    if (cv) {
-		addrange(cv, 0, 0x7f);
+	    for (i=0 ; i<NUM_DIGIT_RANGE ; i++) {
+		addrange(cv, digitRangeTable[i].start,
+			digitRangeTable[i].end);
 	    }
-	    break;
-	case CC_BLANK:
-	    cv = getcvec(v, 2, 0, 0);
-	    addchr(cv, '\t');
-	    addchr(cv, ' ');
-	    break;
-	case CC_CNTRL:
-	    cv = getcvec(v, 0, 2, 0);
-	    addrange(cv, 0x0, 0x1f);
-	    addrange(cv, 0x7f, 0x9f);
-	    break;
-	case CC_DIGIT:
-	    cv = getcvec(v, 0, NUM_DIGIT_RANGE, 0);
-	    if (cv) {	
-		for (i = 0; i < NUM_DIGIT_RANGE; i++) {
-		    addrange(cv, digitRangeTable[i].start,
-			    digitRangeTable[i].end);
-		}
+	}
+	break;
+    case CC_ALPHA:
+	cv = getcvec(v, NUM_ALPHA_CHAR, NUM_ALPHA_RANGE, 0);
+	if (cv) {
+	    for (i=0 ; i<NUM_ALPHA_RANGE ; i++) {
+		addrange(cv, alphaRangeTable[i].start,
+			alphaRangeTable[i].end);
 	    }
-	    break;
-	case CC_PUNCT:
-	    cv = getcvec(v, NUM_PUNCT_CHAR, NUM_PUNCT_RANGE, 0);
-	    if (cv) {
-		for (i = 0; i < NUM_PUNCT_RANGE; i++) {
-		    addrange(cv, punctRangeTable[i].start,
-			     punctRangeTable[i].end);
-		}
-		for (i = 0; i < NUM_PUNCT_CHAR; i++) {
-		    addchr(cv, punctCharTable[i]);
-		}
+	    for (i=0 ; i<NUM_ALPHA_CHAR ; i++) {
+		addchr(cv, alphaCharTable[i]);
 	    }
-	    break;
-	case CC_XDIGIT:
-	    cv = getcvec(v, 0, NUM_DIGIT_RANGE+2, 0);
-	    if (cv) {	
-		addrange(cv, '0', '9');
-		addrange(cv, 'a', 'f');
-		addrange(cv, 'A', 'F');
+	}
+	break;
+    case CC_ASCII:
+	cv = getcvec(v, 0, 1, 0);
+	if (cv) {
+	    addrange(cv, 0, 0x7f);
+	}
+	break;
+    case CC_BLANK:
+	cv = getcvec(v, 2, 0, 0);
+	addchr(cv, '\t');
+	addchr(cv, ' ');
+	break;
+    case CC_CNTRL:
+	cv = getcvec(v, 0, 2, 0);
+	addrange(cv, 0x0, 0x1f);
+	addrange(cv, 0x7f, 0x9f);
+	break;
+    case CC_DIGIT:
+	cv = getcvec(v, 0, NUM_DIGIT_RANGE, 0);
+	if (cv) {	
+	    for (i=0 ; i<NUM_DIGIT_RANGE ; i++) {
+		addrange(cv, digitRangeTable[i].start,
+			digitRangeTable[i].end);
 	    }
-	    break;
-	case CC_SPACE:
-	    cv = getcvec(v, NUM_SPACE_CHAR, NUM_SPACE_RANGE, 0);
-	    if (cv) {
-		for (i = 0; i < NUM_SPACE_RANGE; i++) {
-		    addrange(cv, spaceRangeTable[i].start,
-			     spaceRangeTable[i].end);
-		}
-		for (i = 0; i < NUM_SPACE_CHAR; i++) {
-		    addchr(cv, spaceCharTable[i]);
-		}
+	}
+	break;
+    case CC_PUNCT:
+	cv = getcvec(v, NUM_PUNCT_CHAR, NUM_PUNCT_RANGE, 0);
+	if (cv) {
+	    for (i=0 ; i<NUM_PUNCT_RANGE ; i++) {
+		addrange(cv, punctRangeTable[i].start,
+			punctRangeTable[i].end);
 	    }
-	    break;
-	case CC_LOWER:
-	    cv  = getcvec(v, NUM_LOWER_CHAR, NUM_LOWER_RANGE, 0);
-	    if (cv) {
-		for (i = 0; i < NUM_LOWER_RANGE; i++) {
-		    addrange(cv, lowerRangeTable[i].start,
-			     lowerRangeTable[i].end);
-		}
-		for (i = 0; i < NUM_LOWER_CHAR; i++) {
-		    addchr(cv, lowerCharTable[i]);
-		}
+	    for (i=0 ; i<NUM_PUNCT_CHAR ; i++) {
+		addchr(cv, punctCharTable[i]);
 	    }
-	    break;
-	case CC_UPPER:
-	    cv  = getcvec(v, NUM_UPPER_CHAR, NUM_UPPER_RANGE, 0);
-	    if (cv) {
-		for (i = 0; i < NUM_UPPER_RANGE; i++) {
-		    addrange(cv, upperRangeTable[i].start,
-			     upperRangeTable[i].end);
-		}
-		for (i = 0; i < NUM_UPPER_CHAR; i++) {
-		    addchr(cv, upperCharTable[i]);
-		}
+	}
+	break;
+    case CC_XDIGIT:
+	/*
+	 * This is a 3 instead of (NUM_DIGIT_RANGE+2) because I've no
+	 * idea how to define the digits 'a' through 'f' in
+	 * non-western locales.  The concept is quite possibly non
+	 * portable, or only used in contextx where the characters
+	 * used would be the western ones anyway!  Whatever is
+	 * actually the case, the number of ranges is fixed (until
+	 * someone comes up with a better arrangement!)
+	 */
+	cv = getcvec(v, 0, 3, 0);
+	if (cv) {	
+	    addrange(cv, '0', '9');
+	    addrange(cv, 'a', 'f');
+	    addrange(cv, 'A', 'F');
+	}
+	break;
+    case CC_SPACE:
+	cv = getcvec(v, NUM_SPACE_CHAR, NUM_SPACE_RANGE, 0);
+	if (cv) {
+	    for (i=0 ; i<NUM_SPACE_RANGE ; i++) {
+		addrange(cv, spaceRangeTable[i].start,
+			spaceRangeTable[i].end);
 	    }
-	    break;
-	case CC_GRAPH:
-	    cv  = getcvec(v, NUM_GRAPH_CHAR, NUM_GRAPH_RANGE, 0);
-	    if (cv) {
-		for (i = 0; i < NUM_GRAPH_RANGE; i++) {
-		    addrange(cv, graphRangeTable[i].start,
-			     graphRangeTable[i].end);
-		}
-		for (i = 0; i < NUM_GRAPH_CHAR; i++) {
-		    addchr(cv, graphCharTable[i]);
-		}
+	    for (i=0 ; i<NUM_SPACE_CHAR ; i++) {
+		addchr(cv, spaceCharTable[i]);
 	    }
-	    break;
+	}
+	break;
+    case CC_LOWER:
+	cv  = getcvec(v, NUM_LOWER_CHAR, NUM_LOWER_RANGE, 0);
+	if (cv) {
+	    for (i=0 ; i<NUM_LOWER_RANGE ; i++) {
+		addrange(cv, lowerRangeTable[i].start,
+			lowerRangeTable[i].end);
+	    }
+	    for (i=0 ; i<NUM_LOWER_CHAR ; i++) {
+		addchr(cv, lowerCharTable[i]);
+	    }
+	}
+	break;
+    case CC_UPPER:
+	cv  = getcvec(v, NUM_UPPER_CHAR, NUM_UPPER_RANGE, 0);
+	if (cv) {
+	    for (i=0 ; i<NUM_UPPER_RANGE ; i++) {
+		addrange(cv, upperRangeTable[i].start,
+			upperRangeTable[i].end);
+	    }
+	    for (i=0 ; i<NUM_UPPER_CHAR ; i++) {
+		addchr(cv, upperCharTable[i]);
+	    }
+	}
+	break;
+    case CC_GRAPH:
+	cv  = getcvec(v, NUM_GRAPH_CHAR, NUM_GRAPH_RANGE, 0);
+	if (cv) {
+	    for (i=0 ; i<NUM_GRAPH_RANGE ; i++) {
+		addrange(cv, graphRangeTable[i].start,
+			graphRangeTable[i].end);
+	    }
+	    for (i=0 ; i<NUM_GRAPH_CHAR ; i++) {
+		addchr(cv, graphCharTable[i]);
+	    }
+	}
+	break;
     }
     if (cv == NULL) {
 	ERR(REG_ESPACE);
@@ -910,28 +924,28 @@ int cases;			/* case-independent? */
  */
 static struct cvec *
 allcases(v, pc)
-struct vars *v;
-pchr pc;
+    struct vars *v;
+    pchr pc;
 {
-	struct cvec *cv;
-	chr c = (chr)pc;
-	chr lc, uc, tc;
-
-	lc = Tcl_UniCharToLower((chr)c);
-	uc = Tcl_UniCharToUpper((chr)c);
-	tc = Tcl_UniCharToTitle((chr)c);
-
-	if (tc != uc) {
-	    cv = getcvec(v, 3, 0, 0);
-	    addchr(cv, tc);
-	} else {
-	    cv = getcvec(v, 2, 0, 0);
-	}
-	addchr(cv, lc);
-	if (lc != uc) {
-	    addchr(cv, uc);
-	}
-	return cv;
+    struct cvec *cv;
+    chr c = (chr)pc;
+    chr lc, uc, tc;
+
+    lc = Tcl_UniCharToLower((chr)c);
+    uc = Tcl_UniCharToUpper((chr)c);
+    tc = Tcl_UniCharToTitle((chr)c);
+
+    if (tc != uc) {
+	cv = getcvec(v, 3, 0, 0);
+	addchr(cv, tc);
+    } else {
+	cv = getcvec(v, 2, 0, 0);
+    }
+    addchr(cv, lc);
+    if (lc != uc) {
+	addchr(cv, uc);
+    }
+    return cv;
 }
 
 /*
@@ -944,11 +958,11 @@ pchr pc;
  */
 static int			/* 0 for equal, nonzero for unequal */
 cmp(x, y, len)
-CONST chr *x;
-CONST chr *y;
-size_t len;			/* exact length of comparison */
+    CONST chr *x;
+    CONST chr *y;
+    size_t len;			/* exact length of comparison */
 {
-	return memcmp(VS(x), VS(y), len*sizeof(chr));
+    return memcmp(VS(x), VS(y), len*sizeof(chr));
 }
 
 /*
@@ -961,12 +975,14 @@ size_t len;			/* exact length of comparison */
  */
 static int			/* 0 for equal, nonzero for unequal */
 casecmp(x, y, len)
-CONST chr *x;
-CONST chr *y;
-size_t len;			/* exact length of comparison */
+    CONST chr *x;
+    CONST chr *y;
+    size_t len;			/* exact length of comparison */
 {
-    for ( ; len > 0; len--, x++, y++)
-	if ((*x != *y) && (Tcl_UniCharToLower(*x) != Tcl_UniCharToLower(*y)))
+    for (; len > 0; len--, x++, y++) {
+	if ((*x!=*y) && (Tcl_UniCharToLower(*x) != Tcl_UniCharToLower(*y))) {
 	    return 1;
+	}
+    }
     return 0;
 }
diff --git a/tests/reg.test b/tests/reg.test
index f35050b..58ebc55 100644
--- a/tests/reg.test
+++ b/tests/reg.test
@@ -9,7 +9,7 @@
 #
 # Copyright (c) 1998, 1999 Henry Spencer.  All rights reserved.
 #
-# RCS: @(#) $Id: reg.test,v 1.14 2002/06/22 04:19:47 dgp Exp $
+# RCS: @(#) $Id: reg.test,v 1.15 2002/07/29 10:56:08 dkf Exp $
 
 if {[lsearch [namespace children] ::tcltest] == -1} {
     package require tcltest 2
@@ -987,8 +987,14 @@ m  9	HLP	{(?n)^(?![t#])\S+}	"tk\n\n#\n#\nit0"	it0
 # flush any leftover complaints
 doing 0 "flush"
 
+# Tests resulting from bugs reported by users
+test reg-31.1 {[[:xdigit:]] behaves correctly when followed by [[:space:]]} {
+    set str {2:::DebugWin32} 
+    set re {([[:xdigit:]])([[:space:]]*)} 
+    list [regexp $re $str match xdigit spaces] $match $xdigit $spaces
+    # Code used to produce {1 2:::DebugWin32 2 :::DebugWin32} !!!
+} {1 2 2 {}}
+
 # cleanup
 ::tcltest::cleanupTests
 return
-
-
author	dkf <donal.k.fellows@manchester.ac.uk>	2002-07-29 10:56:08 (GMT)
committer	dkf <donal.k.fellows@manchester.ac.uk>	2002-07-29 10:56:08 (GMT)
commit	392e6300e2782ea71b0e7e57f97e7b74ced0530b (patch)
tree	e6c38e79aa2217b6f74c03b96dea9f14f3e29f97
parent	8aac5314070c34799ffa1a70feb28b35584bc49a (diff)
download	tcl-392e6300e2782ea71b0e7e57f97e7b74ced0530b.zip tcl-392e6300e2782ea71b0e7e57f97e7b74ced0530b.tar.gz tcl-392e6300e2782ea71b0e7e57f97e7b74ced0530b.tar.bz2