summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authordkf <donal.k.fellows@manchester.ac.uk>2002-07-29 10:56:08 (GMT)
committerdkf <donal.k.fellows@manchester.ac.uk>2002-07-29 10:56:08 (GMT)
commit392e6300e2782ea71b0e7e57f97e7b74ced0530b (patch)
treee6c38e79aa2217b6f74c03b96dea9f14f3e29f97
parent8aac5314070c34799ffa1a70feb28b35584bc49a (diff)
downloadtcl-392e6300e2782ea71b0e7e57f97e7b74ced0530b.zip
tcl-392e6300e2782ea71b0e7e57f97e7b74ced0530b.tar.gz
tcl-392e6300e2782ea71b0e7e57f97e7b74ced0530b.tar.bz2
Fixes for regexp issues raised in [Bug 578363].
Lots of thanks to pvgoran@users.sf.net for tracking them down! Also made the RE files touched meet the Tcl Engineering Guidelines a bit better (they've a long way to go, but this is a start...)
-rw-r--r--ChangeLog10
-rw-r--r--generic/regc_cvec.c186
-rw-r--r--generic/regc_locale.c720
-rw-r--r--tests/reg.test12
4 files changed, 485 insertions, 443 deletions
diff --git a/ChangeLog b/ChangeLog
index fbbc6bd..96a4453 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,13 @@
+2002-07-29 Donal K. Fellows <fellowsd@cs.man.ac.uk>
+
+ * generic/regc_locale.c (cclass): [[:xdigit:]] is only a defined
+ concept on western characters, so should not allow any unicode
+ digit, and hence number of ranges in [[:xdigit:]] is fixed.
+ * tests/reg.test: Added test to detect the bug.
+ * generic/regc_cvec.c (newcvec): Corrected initial size value in
+ character vector structure. [Bug 578363] Many thanks to
+ pvgoran@users.sf.net for tracking this down.
+
2002-07-28 Miguel Sofer <msofer@users.sourceforge.net>
* generic/tcl.h:
diff --git a/generic/regc_cvec.c b/generic/regc_cvec.c
index 86765ea..d2d56fc 100644
--- a/generic/regc_cvec.c
+++ b/generic/regc_cvec.c
@@ -36,26 +36,27 @@
*/
static struct cvec *
newcvec(nchrs, nranges, nmcces)
-int nchrs; /* to hold this many chrs... */
-int nranges; /* ... and this many ranges... */
-int nmcces; /* ... and this many MCCEs */
+ int nchrs; /* to hold this many chrs... */
+ int nranges; /* ... and this many ranges... */
+ int nmcces; /* ... and this many MCCEs */
{
- size_t n;
- size_t nc;
- struct cvec *cv;
+ size_t n;
+ size_t nc;
+ struct cvec *cv;
- nc = (size_t)nchrs + (size_t)nmcces*(MAXMCCE+1) + (size_t)nranges*2;
- n = sizeof(struct cvec) + (size_t)(nmcces-1)*sizeof(chr *) +
- nc*sizeof(chr);
- cv = (struct cvec *)MALLOC(n);
- if (cv == NULL)
- return NULL;
- cv->chrspace = nc;
- cv->chrs = (chr *)&cv->mcces[nmcces]; /* chrs just after MCCE ptrs */
- cv->mccespace = nmcces;
- cv->ranges = cv->chrs + nchrs + nmcces*(MAXMCCE+1);
- cv->rangespace = nranges;
- return clearcvec(cv);
+ nc = (size_t)nchrs + (size_t)nmcces*(MAXMCCE+1) + (size_t)nranges*2;
+ n = sizeof(struct cvec) + (size_t)(nmcces-1)*sizeof(chr *)
+ + nc*sizeof(chr);
+ cv = (struct cvec *)MALLOC(n);
+ if (cv == NULL) {
+ return NULL;
+ }
+ cv->chrspace = nchrs;
+ cv->chrs = (chr *)&cv->mcces[nmcces]; /* chrs just after MCCE ptrs */
+ cv->mccespace = nmcces;
+ cv->ranges = cv->chrs + nchrs + nmcces*(MAXMCCE+1);
+ cv->rangespace = nranges;
+ return clearcvec(cv);
}
/*
@@ -65,20 +66,21 @@ int nmcces; /* ... and this many MCCEs */
*/
static struct cvec *
clearcvec(cv)
-struct cvec *cv;
+ struct cvec *cv; /* character vector */
{
- int i;
+ int i;
- assert(cv != NULL);
- cv->nchrs = 0;
- assert(cv->chrs == (chr *)&cv->mcces[cv->mccespace]);
- cv->nmcces = 0;
- cv->nmccechrs = 0;
- cv->nranges = 0;
- for (i = 0; i < cv->mccespace; i++)
- cv->mcces[i] = NULL;
+ assert(cv != NULL);
+ cv->nchrs = 0;
+ assert(cv->chrs == (chr *)&cv->mcces[cv->mccespace]);
+ cv->nmcces = 0;
+ cv->nmccechrs = 0;
+ cv->nranges = 0;
+ for (i = 0; i < cv->mccespace; i++) {
+ cv->mcces[i] = NULL;
+ }
- return cv;
+ return cv;
}
/*
@@ -87,11 +89,11 @@ struct cvec *cv;
*/
static VOID
addchr(cv, c)
-struct cvec *cv;
-pchr c;
+ struct cvec *cv; /* character vector */
+ pchr c; /* character to add */
{
- assert(cv->nchrs < cv->chrspace - cv->nmccechrs);
- cv->chrs[cv->nchrs++] = (chr)c;
+ assert(cv->nchrs < cv->chrspace - cv->nmccechrs);
+ cv->chrs[cv->nchrs++] = (chr)c;
}
/*
@@ -100,14 +102,14 @@ pchr c;
*/
static VOID
addrange(cv, from, to)
-struct cvec *cv;
-pchr from;
-pchr to;
+ struct cvec *cv; /* character vector */
+ pchr from; /* first character of range */
+ pchr to; /* last character of range */
{
- assert(cv->nranges < cv->rangespace);
- cv->ranges[cv->nranges*2] = (chr)from;
- cv->ranges[cv->nranges*2 + 1] = (chr)to;
- cv->nranges++;
+ assert(cv->nranges < cv->rangespace);
+ cv->ranges[cv->nranges*2] = (chr)from;
+ cv->ranges[cv->nranges*2 + 1] = (chr)to;
+ cv->nranges++;
}
/*
@@ -116,49 +118,55 @@ pchr to;
*/
static VOID
addmcce(cv, startp, endp)
-struct cvec *cv;
-chr *startp; /* beginning of text */
-chr *endp; /* just past end of text */
+ struct cvec *cv; /* character vector */
+ chr *startp; /* beginning of text */
+ chr *endp; /* just past end of text */
{
- int len;
- int i;
- chr *s;
- chr *d;
+ int len;
+ int i;
+ chr *s;
+ chr *d;
- if (startp == NULL && endp == NULL)
- return;
- len = endp - startp;
- assert(len > 0);
- assert(cv->nchrs + len < cv->chrspace - cv->nmccechrs);
- assert(cv->nmcces < cv->mccespace);
- d = &cv->chrs[cv->chrspace - cv->nmccechrs - len - 1];
- cv->mcces[cv->nmcces++] = d;
- for (s = startp, i = len; i > 0; s++, i--)
- *d++ = *s;
- *d++ = 0; /* endmarker */
- assert(d == &cv->chrs[cv->chrspace - cv->nmccechrs]);
- cv->nmccechrs += len + 1;
+ if (startp == NULL && endp == NULL) {
+ return;
+ }
+ len = endp - startp;
+ assert(len > 0);
+ assert(cv->nchrs + len < cv->chrspace - cv->nmccechrs);
+ assert(cv->nmcces < cv->mccespace);
+ d = &cv->chrs[cv->chrspace - cv->nmccechrs - len - 1];
+ cv->mcces[cv->nmcces++] = d;
+ for (s = startp, i = len; i > 0; s++, i--) {
+ *d++ = *s;
+ }
+ *d++ = 0; /* endmarker */
+ assert(d == &cv->chrs[cv->chrspace - cv->nmccechrs]);
+ cv->nmccechrs += len + 1;
}
/*
- haschr - does a cvec contain this chr?
^ static int haschr(struct cvec *, pchr);
*/
-static int /* predicate */
+static int /* predicate */
haschr(cv, c)
-struct cvec *cv;
-pchr c;
+ struct cvec *cv; /* character vector */
+ pchr c; /* character to test for */
{
- int i;
- chr *p;
+ int i;
+ chr *p;
- for (p = cv->chrs, i = cv->nchrs; i > 0; p++, i--)
- if (*p == c)
- return 1;
- for (p = cv->ranges, i = cv->nranges; i > 0; p += 2, i--)
- if (*p <= c && c <= *(p+1))
- return 1;
- return 0;
+ for (p = cv->chrs, i = cv->nchrs; i > 0; p++, i--) {
+ if (*p == c) {
+ return 1;
+ }
+ }
+ for (p = cv->ranges, i = cv->nranges; i > 0; p += 2, i--) {
+ if ((*p <= c) && (c <= *(p+1))) {
+ return 1;
+ }
+ }
+ return 0;
}
/*
@@ -167,23 +175,25 @@ pchr c;
*/
static struct cvec *
getcvec(v, nchrs, nranges, nmcces)
-struct vars *v;
-int nchrs; /* to hold this many chrs... */
-int nranges; /* ... and this many ranges... */
-int nmcces; /* ... and this many MCCEs */
+ struct vars *v; /* context */
+ int nchrs; /* to hold this many chrs... */
+ int nranges; /* ... and this many ranges... */
+ int nmcces; /* ... and this many MCCEs */
{
- if (v->cv != NULL && nchrs <= v->cv->chrspace &&
- nranges <= v->cv->rangespace &&
- nmcces <= v->cv->mccespace)
- return clearcvec(v->cv);
+ if (v->cv != NULL && nchrs <= v->cv->chrspace &&
+ nranges <= v->cv->rangespace && nmcces <= v->cv->mccespace) {
+ return clearcvec(v->cv);
+ }
- if (v->cv != NULL)
- freecvec(v->cv);
- v->cv = newcvec(nchrs, nranges, nmcces);
- if (v->cv == NULL)
- ERR(REG_ESPACE);
+ if (v->cv != NULL) {
+ freecvec(v->cv);
+ }
+ v->cv = newcvec(nchrs, nranges, nmcces);
+ if (v->cv == NULL) {
+ ERR(REG_ESPACE);
+ }
- return v->cv;
+ return v->cv;
}
/*
@@ -192,7 +202,7 @@ int nmcces; /* ... and this many MCCEs */
*/
static VOID
freecvec(cv)
-struct cvec *cv;
+ struct cvec *cv; /* character vector */
{
- FREE(cv);
+ FREE(cv);
}
diff --git a/generic/regc_locale.c b/generic/regc_locale.c
index bc5ef3d..3374007 100644
--- a/generic/regc_locale.c
+++ b/generic/regc_locale.c
@@ -9,111 +9,111 @@
* See the file "license.terms" for information on usage and redistribution
* of this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: regc_locale.c,v 1.8 2002/02/07 00:48:03 hobbs Exp $
+ * RCS: @(#) $Id: regc_locale.c,v 1.9 2002/07/29 10:56:08 dkf Exp $
*/
/* ASCII character-name table */
static struct cname {
- char *name;
- char code;
+ char *name;
+ char code;
} cnames[] = {
- {"NUL", '\0'},
- {"SOH", '\001'},
- {"STX", '\002'},
- {"ETX", '\003'},
- {"EOT", '\004'},
- {"ENQ", '\005'},
- {"ACK", '\006'},
- {"BEL", '\007'},
- {"alert", '\007'},
- {"BS", '\010'},
- {"backspace", '\b'},
- {"HT", '\011'},
- {"tab", '\t'},
- {"LF", '\012'},
- {"newline", '\n'},
- {"VT", '\013'},
- {"vertical-tab", '\v'},
- {"FF", '\014'},
- {"form-feed", '\f'},
- {"CR", '\015'},
- {"carriage-return", '\r'},
- {"SO", '\016'},
- {"SI", '\017'},
- {"DLE", '\020'},
- {"DC1", '\021'},
- {"DC2", '\022'},
- {"DC3", '\023'},
- {"DC4", '\024'},
- {"NAK", '\025'},
- {"SYN", '\026'},
- {"ETB", '\027'},
- {"CAN", '\030'},
- {"EM", '\031'},
- {"SUB", '\032'},
- {"ESC", '\033'},
- {"IS4", '\034'},
- {"FS", '\034'},
- {"IS3", '\035'},
- {"GS", '\035'},
- {"IS2", '\036'},
- {"RS", '\036'},
- {"IS1", '\037'},
- {"US", '\037'},
- {"space", ' '},
- {"exclamation-mark", '!'},
- {"quotation-mark", '"'},
- {"number-sign", '#'},
- {"dollar-sign", '$'},
- {"percent-sign", '%'},
- {"ampersand", '&'},
- {"apostrophe", '\''},
- {"left-parenthesis", '('},
- {"right-parenthesis", ')'},
- {"asterisk", '*'},
- {"plus-sign", '+'},
- {"comma", ','},
- {"hyphen", '-'},
- {"hyphen-minus", '-'},
- {"period", '.'},
- {"full-stop", '.'},
- {"slash", '/'},
- {"solidus", '/'},
- {"zero", '0'},
- {"one", '1'},
- {"two", '2'},
- {"three", '3'},
- {"four", '4'},
- {"five", '5'},
- {"six", '6'},
- {"seven", '7'},
- {"eight", '8'},
- {"nine", '9'},
- {"colon", ':'},
- {"semicolon", ';'},
- {"less-than-sign", '<'},
- {"equals-sign", '='},
- {"greater-than-sign", '>'},
- {"question-mark", '?'},
- {"commercial-at", '@'},
- {"left-square-bracket", '['},
- {"backslash", '\\'},
- {"reverse-solidus", '\\'},
- {"right-square-bracket", ']'},
- {"circumflex", '^'},
- {"circumflex-accent", '^'},
- {"underscore", '_'},
- {"low-line", '_'},
- {"grave-accent", '`'},
- {"left-brace", '{'},
- {"left-curly-bracket", '{'},
- {"vertical-line", '|'},
- {"right-brace", '}'},
- {"right-curly-bracket", '}'},
- {"tilde", '~'},
- {"DEL", '\177'},
- {NULL, 0}
+ {"NUL", '\0'},
+ {"SOH", '\001'},
+ {"STX", '\002'},
+ {"ETX", '\003'},
+ {"EOT", '\004'},
+ {"ENQ", '\005'},
+ {"ACK", '\006'},
+ {"BEL", '\007'},
+ {"alert", '\007'},
+ {"BS", '\010'},
+ {"backspace", '\b'},
+ {"HT", '\011'},
+ {"tab", '\t'},
+ {"LF", '\012'},
+ {"newline", '\n'},
+ {"VT", '\013'},
+ {"vertical-tab", '\v'},
+ {"FF", '\014'},
+ {"form-feed", '\f'},
+ {"CR", '\015'},
+ {"carriage-return", '\r'},
+ {"SO", '\016'},
+ {"SI", '\017'},
+ {"DLE", '\020'},
+ {"DC1", '\021'},
+ {"DC2", '\022'},
+ {"DC3", '\023'},
+ {"DC4", '\024'},
+ {"NAK", '\025'},
+ {"SYN", '\026'},
+ {"ETB", '\027'},
+ {"CAN", '\030'},
+ {"EM", '\031'},
+ {"SUB", '\032'},
+ {"ESC", '\033'},
+ {"IS4", '\034'},
+ {"FS", '\034'},
+ {"IS3", '\035'},
+ {"GS", '\035'},
+ {"IS2", '\036'},
+ {"RS", '\036'},
+ {"IS1", '\037'},
+ {"US", '\037'},
+ {"space", ' '},
+ {"exclamation-mark",'!'},
+ {"quotation-mark", '"'},
+ {"number-sign", '#'},
+ {"dollar-sign", '$'},
+ {"percent-sign", '%'},
+ {"ampersand", '&'},
+ {"apostrophe", '\''},
+ {"left-parenthesis",'('},
+ {"right-parenthesis", ')'},
+ {"asterisk", '*'},
+ {"plus-sign", '+'},
+ {"comma", ','},
+ {"hyphen", '-'},
+ {"hyphen-minus", '-'},
+ {"period", '.'},
+ {"full-stop", '.'},
+ {"slash", '/'},
+ {"solidus", '/'},
+ {"zero", '0'},
+ {"one", '1'},
+ {"two", '2'},
+ {"three", '3'},
+ {"four", '4'},
+ {"five", '5'},
+ {"six", '6'},
+ {"seven", '7'},
+ {"eight", '8'},
+ {"nine", '9'},
+ {"colon", ':'},
+ {"semicolon", ';'},
+ {"less-than-sign", '<'},
+ {"equals-sign", '='},
+ {"greater-than-sign", '>'},
+ {"question-mark", '?'},
+ {"commercial-at", '@'},
+ {"left-square-bracket", '['},
+ {"backslash", '\\'},
+ {"reverse-solidus", '\\'},
+ {"right-square-bracket", ']'},
+ {"circumflex", '^'},
+ {"circumflex-accent", '^'},
+ {"underscore", '_'},
+ {"low-line", '_'},
+ {"grave-accent", '`'},
+ {"left-brace", '{'},
+ {"left-curly-bracket", '{'},
+ {"vertical-line", '|'},
+ {"right-brace", '}'},
+ {"right-curly-bracket", '}'},
+ {"tilde", '~'},
+ {"DEL", '\177'},
+ {NULL, 0}
};
/* Unicode character-class tables */
@@ -526,9 +526,9 @@ static chr graphCharTable[] = {
*/
static int
nmcces(v)
-struct vars *v;
+ struct vars *v;
{
- return 0;
+ return 0;
}
/*
@@ -537,9 +537,9 @@ struct vars *v;
*/
static int
nleaders(v)
-struct vars *v;
+ struct vars *v;
{
- return 0;
+ return 0;
}
/*
@@ -548,10 +548,10 @@ struct vars *v;
*/
static struct cvec *
allmcces(v, cv)
-struct vars *v;
-struct cvec *cv; /* this is supposed to have enough room */
+ struct vars *v;
+ struct cvec *cv; /* this is supposed to have enough room */
{
- return clearcvec(cv);
+ return clearcvec(cv);
}
/*
@@ -560,36 +560,40 @@ struct cvec *cv; /* this is supposed to have enough room */
*/
static celt
element(v, startp, endp)
-struct vars *v;
-chr *startp; /* points to start of name */
-chr *endp; /* points just past end of name */
+ struct vars *v;
+ chr *startp; /* points to start of name */
+ chr *endp; /* points just past end of name */
{
- struct cname *cn;
- size_t len;
- Tcl_DString ds;
- CONST char *np;
-
- /* generic: one-chr names stand for themselves */
- assert(startp < endp);
- len = endp - startp;
- if (len == 1)
- return *startp;
-
- NOTE(REG_ULOCALE);
-
- /* search table */
- Tcl_DStringInit(&ds);
- np = Tcl_UniCharToUtfDString(startp, (int)len, &ds);
- for (cn = cnames; cn->name != NULL; cn++)
- if (strlen(cn->name) == len && strncmp(cn->name, np, len) == 0)
- break; /* NOTE BREAK OUT */
- Tcl_DStringFree(&ds);
- if (cn->name != NULL)
- return CHR(cn->code);
-
- /* couldn't find it */
- ERR(REG_ECOLLATE);
- return 0;
+ struct cname *cn;
+ size_t len;
+ Tcl_DString ds;
+ CONST char *np;
+
+ /* generic: one-chr names stand for themselves */
+ assert(startp < endp);
+ len = endp - startp;
+ if (len == 1) {
+ return *startp;
+ }
+
+ NOTE(REG_ULOCALE);
+
+ /* search table */
+ Tcl_DStringInit(&ds);
+ np = Tcl_UniCharToUtfDString(startp, (int)len, &ds);
+ for (cn=cnames; cn->name!=NULL; cn++) {
+ if (strlen(cn->name)==len && strncmp(cn->name, np, len)==0) {
+ break; /* NOTE BREAK OUT */
+ }
+ }
+ Tcl_DStringFree(&ds);
+ if (cn->name != NULL) {
+ return CHR(cn->code);
+ }
+
+ /* couldn't find it */
+ ERR(REG_ECOLLATE);
+ return 0;
}
/*
@@ -598,56 +602,56 @@ chr *endp; /* points just past end of name */
*/
static struct cvec *
range(v, a, b, cases)
-struct vars *v;
-celt a;
-celt b; /* might equal a */
-int cases; /* case-independent? */
+ struct vars *v;
+ celt a;
+ celt b; /* might equal a */
+ int cases; /* case-independent? */
{
- int nchrs;
- struct cvec *cv;
- celt c, lc, uc, tc;
+ int nchrs;
+ struct cvec *cv;
+ celt c, lc, uc, tc;
- if (a != b && !before(a, b)) {
- ERR(REG_ERANGE);
- return NULL;
- }
+ if (a != b && !before(a, b)) {
+ ERR(REG_ERANGE);
+ return NULL;
+ }
- if (!cases) { /* easy version */
- cv = getcvec(v, 0, 1, 0);
- NOERRN();
- addrange(cv, a, b);
- return cv;
- }
+ if (!cases) { /* easy version */
+ cv = getcvec(v, 0, 1, 0);
+ NOERRN();
+ addrange(cv, a, b);
+ return cv;
+ }
- /*
- * When case-independent, it's hard to decide when cvec ranges are
- * usable, so for now at least, we won't try. We allocate enough
- * space for two case variants plus a little extra for the two
- * title case variants.
- */
+ /*
+ * When case-independent, it's hard to decide when cvec ranges are
+ * usable, so for now at least, we won't try. We allocate enough
+ * space for two case variants plus a little extra for the two
+ * title case variants.
+ */
- nchrs = (b - a + 1)*2 + 4;
+ nchrs = (b - a + 1)*2 + 4;
- cv = getcvec(v, nchrs, 0, 0);
- NOERRN();
+ cv = getcvec(v, nchrs, 0, 0);
+ NOERRN();
- for (c = a; c <= b; c++) {
- addchr(cv, c);
- lc = Tcl_UniCharToLower((chr)c);
- uc = Tcl_UniCharToUpper((chr)c);
- tc = Tcl_UniCharToTitle((chr)c);
- if (c != lc) {
- addchr(cv, lc);
- }
- if (c != uc) {
- addchr(cv, uc);
- }
- if (c != tc && tc != uc) {
- addchr(cv, tc);
- }
+ for (c=a; c<=b; c++) {
+ addchr(cv, c);
+ lc = Tcl_UniCharToLower((chr)c);
+ uc = Tcl_UniCharToUpper((chr)c);
+ tc = Tcl_UniCharToTitle((chr)c);
+ if (c != lc) {
+ addchr(cv, lc);
}
+ if (c != uc) {
+ addchr(cv, uc);
+ }
+ if (c != tc && tc != uc) {
+ addchr(cv, tc);
+ }
+ }
- return cv;
+ return cv;
}
/*
@@ -656,13 +660,14 @@ int cases; /* case-independent? */
*/
static int /* predicate */
before(x, y)
-celt x;
-celt y;
+ celt x;
+ celt y;
{
- /* trivial because no MCCEs */
- if (x < y)
- return 1;
- return 0;
+ /* trivial because no MCCEs */
+ if (x < y) {
+ return 1;
+ }
+ return 0;
}
/*
@@ -672,31 +677,32 @@ celt y;
*/
static struct cvec *
eclass(v, c, cases)
-struct vars *v;
-celt c;
-int cases; /* all cases? */
+ struct vars *v;
+ celt c;
+ int cases; /* all cases? */
{
- struct cvec *cv;
-
- /* crude fake equivalence class for testing */
- if ((v->cflags&REG_FAKE) && c == 'x') {
- cv = getcvec(v, 4, 0, 0);
- addchr(cv, (chr)'x');
- addchr(cv, (chr)'y');
- if (cases) {
- addchr(cv, (chr)'X');
- addchr(cv, (chr)'Y');
- }
- return cv;
+ struct cvec *cv;
+
+ /* crude fake equivalence class for testing */
+ if ((v->cflags&REG_FAKE) && c == 'x') {
+ cv = getcvec(v, 4, 0, 0);
+ addchr(cv, (chr)'x');
+ addchr(cv, (chr)'y');
+ if (cases) {
+ addchr(cv, (chr)'X');
+ addchr(cv, (chr)'Y');
}
-
- /* otherwise, none */
- if (cases)
- return allcases(v, c);
- cv = getcvec(v, 1, 0, 0);
- assert(cv != NULL);
- addchr(cv, (chr)c);
return cv;
+ }
+
+ /* otherwise, none */
+ if (cases) {
+ return allcases(v, c);
+ }
+ cv = getcvec(v, 1, 0, 0);
+ assert(cv != NULL);
+ addchr(cv, (chr)c);
+ return cv;
}
/*
@@ -706,10 +712,10 @@ int cases; /* all cases? */
*/
static struct cvec *
cclass(v, startp, endp, cases)
-struct vars *v;
-chr *startp; /* where the name starts */
-chr *endp; /* just past the end of the name */
-int cases; /* case-independent? */
+ struct vars *v;
+ chr *startp; /* where the name starts */
+ chr *endp; /* just past the end of the name */
+ int cases; /* case-independent? */
{
size_t len;
struct cvec *cv = NULL;
@@ -755,7 +761,7 @@ int cases; /* case-independent? */
*/
index = -1;
- for (namePtr = classNames, i = 0; *namePtr != NULL; namePtr++, i++) {
+ for (namePtr=classNames,i=0 ; *namePtr!=NULL ; namePtr++,i++) {
if ((strlen(*namePtr) == len) && (strncmp(*namePtr, np, len) == 0)) {
index = i;
break;
@@ -772,129 +778,137 @@ int cases; /* case-independent? */
*/
switch((enum classes) index) {
- case CC_PRINT:
- case CC_ALNUM:
- cv = getcvec(v, NUM_ALPHA_CHAR,
- NUM_DIGIT_RANGE + NUM_ALPHA_RANGE, 0);
- if (cv) {
- for (i = 0; i < NUM_ALPHA_CHAR; i++) {
- addchr(cv, alphaCharTable[i]);
- }
- for (i = 0; i < NUM_ALPHA_RANGE; i++) {
- addrange(cv, alphaRangeTable[i].start,
- alphaRangeTable[i].end);
- }
- for (i = 0; i < NUM_DIGIT_RANGE; i++) {
- addrange(cv, digitRangeTable[i].start,
- digitRangeTable[i].end);
- }
+ case CC_PRINT:
+ case CC_ALNUM:
+ cv = getcvec(v, NUM_ALPHA_CHAR, NUM_DIGIT_RANGE + NUM_ALPHA_RANGE, 0);
+ if (cv) {
+ for (i=0 ; i<NUM_ALPHA_CHAR ; i++) {
+ addchr(cv, alphaCharTable[i]);
}
- break;
- case CC_ALPHA:
- cv = getcvec(v, NUM_ALPHA_CHAR, NUM_ALPHA_RANGE, 0);
- if (cv) {
- for (i = 0; i < NUM_ALPHA_RANGE; i++) {
- addrange(cv, alphaRangeTable[i].start,
- alphaRangeTable[i].end);
- }
- for (i = 0; i < NUM_ALPHA_CHAR; i++) {
- addchr(cv, alphaCharTable[i]);
- }
+ for (i=0 ; i<NUM_ALPHA_RANGE ; i++) {
+ addrange(cv, alphaRangeTable[i].start,
+ alphaRangeTable[i].end);
}
- break;
- case CC_ASCII:
- cv = getcvec(v, 0, 1, 0);
- if (cv) {
- addrange(cv, 0, 0x7f);
+ for (i=0 ; i<NUM_DIGIT_RANGE ; i++) {
+ addrange(cv, digitRangeTable[i].start,
+ digitRangeTable[i].end);
}
- break;
- case CC_BLANK:
- cv = getcvec(v, 2, 0, 0);
- addchr(cv, '\t');
- addchr(cv, ' ');
- break;
- case CC_CNTRL:
- cv = getcvec(v, 0, 2, 0);
- addrange(cv, 0x0, 0x1f);
- addrange(cv, 0x7f, 0x9f);
- break;
- case CC_DIGIT:
- cv = getcvec(v, 0, NUM_DIGIT_RANGE, 0);
- if (cv) {
- for (i = 0; i < NUM_DIGIT_RANGE; i++) {
- addrange(cv, digitRangeTable[i].start,
- digitRangeTable[i].end);
- }
+ }
+ break;
+ case CC_ALPHA:
+ cv = getcvec(v, NUM_ALPHA_CHAR, NUM_ALPHA_RANGE, 0);
+ if (cv) {
+ for (i=0 ; i<NUM_ALPHA_RANGE ; i++) {
+ addrange(cv, alphaRangeTable[i].start,
+ alphaRangeTable[i].end);
}
- break;
- case CC_PUNCT:
- cv = getcvec(v, NUM_PUNCT_CHAR, NUM_PUNCT_RANGE, 0);
- if (cv) {
- for (i = 0; i < NUM_PUNCT_RANGE; i++) {
- addrange(cv, punctRangeTable[i].start,
- punctRangeTable[i].end);
- }
- for (i = 0; i < NUM_PUNCT_CHAR; i++) {
- addchr(cv, punctCharTable[i]);
- }
+ for (i=0 ; i<NUM_ALPHA_CHAR ; i++) {
+ addchr(cv, alphaCharTable[i]);
}
- break;
- case CC_XDIGIT:
- cv = getcvec(v, 0, NUM_DIGIT_RANGE+2, 0);
- if (cv) {
- addrange(cv, '0', '9');
- addrange(cv, 'a', 'f');
- addrange(cv, 'A', 'F');
+ }
+ break;
+ case CC_ASCII:
+ cv = getcvec(v, 0, 1, 0);
+ if (cv) {
+ addrange(cv, 0, 0x7f);
+ }
+ break;
+ case CC_BLANK:
+ cv = getcvec(v, 2, 0, 0);
+ addchr(cv, '\t');
+ addchr(cv, ' ');
+ break;
+ case CC_CNTRL:
+ cv = getcvec(v, 0, 2, 0);
+ addrange(cv, 0x0, 0x1f);
+ addrange(cv, 0x7f, 0x9f);
+ break;
+ case CC_DIGIT:
+ cv = getcvec(v, 0, NUM_DIGIT_RANGE, 0);
+ if (cv) {
+ for (i=0 ; i<NUM_DIGIT_RANGE ; i++) {
+ addrange(cv, digitRangeTable[i].start,
+ digitRangeTable[i].end);
}
- break;
- case CC_SPACE:
- cv = getcvec(v, NUM_SPACE_CHAR, NUM_SPACE_RANGE, 0);
- if (cv) {
- for (i = 0; i < NUM_SPACE_RANGE; i++) {
- addrange(cv, spaceRangeTable[i].start,
- spaceRangeTable[i].end);
- }
- for (i = 0; i < NUM_SPACE_CHAR; i++) {
- addchr(cv, spaceCharTable[i]);
- }
+ }
+ break;
+ case CC_PUNCT:
+ cv = getcvec(v, NUM_PUNCT_CHAR, NUM_PUNCT_RANGE, 0);
+ if (cv) {
+ for (i=0 ; i<NUM_PUNCT_RANGE ; i++) {
+ addrange(cv, punctRangeTable[i].start,
+ punctRangeTable[i].end);
}
- break;
- case CC_LOWER:
- cv = getcvec(v, NUM_LOWER_CHAR, NUM_LOWER_RANGE, 0);
- if (cv) {
- for (i = 0; i < NUM_LOWER_RANGE; i++) {
- addrange(cv, lowerRangeTable[i].start,
- lowerRangeTable[i].end);
- }
- for (i = 0; i < NUM_LOWER_CHAR; i++) {
- addchr(cv, lowerCharTable[i]);
- }
+ for (i=0 ; i<NUM_PUNCT_CHAR ; i++) {
+ addchr(cv, punctCharTable[i]);
}
- break;
- case CC_UPPER:
- cv = getcvec(v, NUM_UPPER_CHAR, NUM_UPPER_RANGE, 0);
- if (cv) {
- for (i = 0; i < NUM_UPPER_RANGE; i++) {
- addrange(cv, upperRangeTable[i].start,
- upperRangeTable[i].end);
- }
- for (i = 0; i < NUM_UPPER_CHAR; i++) {
- addchr(cv, upperCharTable[i]);
- }
+ }
+ break;
+ case CC_XDIGIT:
+ /*
+ * This is a 3 instead of (NUM_DIGIT_RANGE+2) because I've no
+ * idea how to define the digits 'a' through 'f' in
+ * non-western locales. The concept is quite possibly non
+ * portable, or only used in contextx where the characters
+ * used would be the western ones anyway! Whatever is
+ * actually the case, the number of ranges is fixed (until
+ * someone comes up with a better arrangement!)
+ */
+ cv = getcvec(v, 0, 3, 0);
+ if (cv) {
+ addrange(cv, '0', '9');
+ addrange(cv, 'a', 'f');
+ addrange(cv, 'A', 'F');
+ }
+ break;
+ case CC_SPACE:
+ cv = getcvec(v, NUM_SPACE_CHAR, NUM_SPACE_RANGE, 0);
+ if (cv) {
+ for (i=0 ; i<NUM_SPACE_RANGE ; i++) {
+ addrange(cv, spaceRangeTable[i].start,
+ spaceRangeTable[i].end);
}
- break;
- case CC_GRAPH:
- cv = getcvec(v, NUM_GRAPH_CHAR, NUM_GRAPH_RANGE, 0);
- if (cv) {
- for (i = 0; i < NUM_GRAPH_RANGE; i++) {
- addrange(cv, graphRangeTable[i].start,
- graphRangeTable[i].end);
- }
- for (i = 0; i < NUM_GRAPH_CHAR; i++) {
- addchr(cv, graphCharTable[i]);
- }
+ for (i=0 ; i<NUM_SPACE_CHAR ; i++) {
+ addchr(cv, spaceCharTable[i]);
}
- break;
+ }
+ break;
+ case CC_LOWER:
+ cv = getcvec(v, NUM_LOWER_CHAR, NUM_LOWER_RANGE, 0);
+ if (cv) {
+ for (i=0 ; i<NUM_LOWER_RANGE ; i++) {
+ addrange(cv, lowerRangeTable[i].start,
+ lowerRangeTable[i].end);
+ }
+ for (i=0 ; i<NUM_LOWER_CHAR ; i++) {
+ addchr(cv, lowerCharTable[i]);
+ }
+ }
+ break;
+ case CC_UPPER:
+ cv = getcvec(v, NUM_UPPER_CHAR, NUM_UPPER_RANGE, 0);
+ if (cv) {
+ for (i=0 ; i<NUM_UPPER_RANGE ; i++) {
+ addrange(cv, upperRangeTable[i].start,
+ upperRangeTable[i].end);
+ }
+ for (i=0 ; i<NUM_UPPER_CHAR ; i++) {
+ addchr(cv, upperCharTable[i]);
+ }
+ }
+ break;
+ case CC_GRAPH:
+ cv = getcvec(v, NUM_GRAPH_CHAR, NUM_GRAPH_RANGE, 0);
+ if (cv) {
+ for (i=0 ; i<NUM_GRAPH_RANGE ; i++) {
+ addrange(cv, graphRangeTable[i].start,
+ graphRangeTable[i].end);
+ }
+ for (i=0 ; i<NUM_GRAPH_CHAR ; i++) {
+ addchr(cv, graphCharTable[i]);
+ }
+ }
+ break;
}
if (cv == NULL) {
ERR(REG_ESPACE);
@@ -910,28 +924,28 @@ int cases; /* case-independent? */
*/
static struct cvec *
allcases(v, pc)
-struct vars *v;
-pchr pc;
+ struct vars *v;
+ pchr pc;
{
- struct cvec *cv;
- chr c = (chr)pc;
- chr lc, uc, tc;
-
- lc = Tcl_UniCharToLower((chr)c);
- uc = Tcl_UniCharToUpper((chr)c);
- tc = Tcl_UniCharToTitle((chr)c);
-
- if (tc != uc) {
- cv = getcvec(v, 3, 0, 0);
- addchr(cv, tc);
- } else {
- cv = getcvec(v, 2, 0, 0);
- }
- addchr(cv, lc);
- if (lc != uc) {
- addchr(cv, uc);
- }
- return cv;
+ struct cvec *cv;
+ chr c = (chr)pc;
+ chr lc, uc, tc;
+
+ lc = Tcl_UniCharToLower((chr)c);
+ uc = Tcl_UniCharToUpper((chr)c);
+ tc = Tcl_UniCharToTitle((chr)c);
+
+ if (tc != uc) {
+ cv = getcvec(v, 3, 0, 0);
+ addchr(cv, tc);
+ } else {
+ cv = getcvec(v, 2, 0, 0);
+ }
+ addchr(cv, lc);
+ if (lc != uc) {
+ addchr(cv, uc);
+ }
+ return cv;
}
/*
@@ -944,11 +958,11 @@ pchr pc;
*/
static int /* 0 for equal, nonzero for unequal */
cmp(x, y, len)
-CONST chr *x;
-CONST chr *y;
-size_t len; /* exact length of comparison */
+ CONST chr *x;
+ CONST chr *y;
+ size_t len; /* exact length of comparison */
{
- return memcmp(VS(x), VS(y), len*sizeof(chr));
+ return memcmp(VS(x), VS(y), len*sizeof(chr));
}
/*
@@ -961,12 +975,14 @@ size_t len; /* exact length of comparison */
*/
static int /* 0 for equal, nonzero for unequal */
casecmp(x, y, len)
-CONST chr *x;
-CONST chr *y;
-size_t len; /* exact length of comparison */
+ CONST chr *x;
+ CONST chr *y;
+ size_t len; /* exact length of comparison */
{
- for ( ; len > 0; len--, x++, y++)
- if ((*x != *y) && (Tcl_UniCharToLower(*x) != Tcl_UniCharToLower(*y)))
+ for (; len > 0; len--, x++, y++) {
+ if ((*x!=*y) && (Tcl_UniCharToLower(*x) != Tcl_UniCharToLower(*y))) {
return 1;
+ }
+ }
return 0;
}
diff --git a/tests/reg.test b/tests/reg.test
index f35050b..58ebc55 100644
--- a/tests/reg.test
+++ b/tests/reg.test
@@ -9,7 +9,7 @@
#
# Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
#
-# RCS: @(#) $Id: reg.test,v 1.14 2002/06/22 04:19:47 dgp Exp $
+# RCS: @(#) $Id: reg.test,v 1.15 2002/07/29 10:56:08 dkf Exp $
if {[lsearch [namespace children] ::tcltest] == -1} {
package require tcltest 2
@@ -987,8 +987,14 @@ m 9 HLP {(?n)^(?![t#])\S+} "tk\n\n#\n#\nit0" it0
# flush any leftover complaints
doing 0 "flush"
+# Tests resulting from bugs reported by users
+test reg-31.1 {[[:xdigit:]] behaves correctly when followed by [[:space:]]} {
+ set str {2:::DebugWin32}
+ set re {([[:xdigit:]])([[:space:]]*)}
+ list [regexp $re $str match xdigit spaces] $match $xdigit $spaces
+ # Code used to produce {1 2:::DebugWin32 2 :::DebugWin32} !!!
+} {1 2 2 {}}
+
# cleanup
::tcltest::cleanupTests
return
-
-