diff options
author | dkf <donal.k.fellows@manchester.ac.uk> | 2002-07-29 10:56:08 (GMT) |
---|---|---|
committer | dkf <donal.k.fellows@manchester.ac.uk> | 2002-07-29 10:56:08 (GMT) |
commit | 392e6300e2782ea71b0e7e57f97e7b74ced0530b (patch) | |
tree | e6c38e79aa2217b6f74c03b96dea9f14f3e29f97 | |
parent | 8aac5314070c34799ffa1a70feb28b35584bc49a (diff) | |
download | tcl-392e6300e2782ea71b0e7e57f97e7b74ced0530b.zip tcl-392e6300e2782ea71b0e7e57f97e7b74ced0530b.tar.gz tcl-392e6300e2782ea71b0e7e57f97e7b74ced0530b.tar.bz2 |
Fixes for regexp issues raised in [Bug 578363].
Lots of thanks to pvgoran@users.sf.net for tracking them down!
Also made the RE files touched meet the Tcl Engineering Guidelines a bit better (they've a long way to go, but this is a start...)
-rw-r--r-- | ChangeLog | 10 | ||||
-rw-r--r-- | generic/regc_cvec.c | 186 | ||||
-rw-r--r-- | generic/regc_locale.c | 720 | ||||
-rw-r--r-- | tests/reg.test | 12 |
4 files changed, 485 insertions, 443 deletions
@@ -1,3 +1,13 @@ +2002-07-29 Donal K. Fellows <fellowsd@cs.man.ac.uk> + + * generic/regc_locale.c (cclass): [[:xdigit:]] is only a defined + concept on western characters, so should not allow any unicode + digit, and hence number of ranges in [[:xdigit:]] is fixed. + * tests/reg.test: Added test to detect the bug. + * generic/regc_cvec.c (newcvec): Corrected initial size value in + character vector structure. [Bug 578363] Many thanks to + pvgoran@users.sf.net for tracking this down. + 2002-07-28 Miguel Sofer <msofer@users.sourceforge.net> * generic/tcl.h: diff --git a/generic/regc_cvec.c b/generic/regc_cvec.c index 86765ea..d2d56fc 100644 --- a/generic/regc_cvec.c +++ b/generic/regc_cvec.c @@ -36,26 +36,27 @@ */ static struct cvec * newcvec(nchrs, nranges, nmcces) -int nchrs; /* to hold this many chrs... */ -int nranges; /* ... and this many ranges... */ -int nmcces; /* ... and this many MCCEs */ + int nchrs; /* to hold this many chrs... */ + int nranges; /* ... and this many ranges... */ + int nmcces; /* ... and this many MCCEs */ { - size_t n; - size_t nc; - struct cvec *cv; + size_t n; + size_t nc; + struct cvec *cv; - nc = (size_t)nchrs + (size_t)nmcces*(MAXMCCE+1) + (size_t)nranges*2; - n = sizeof(struct cvec) + (size_t)(nmcces-1)*sizeof(chr *) + - nc*sizeof(chr); - cv = (struct cvec *)MALLOC(n); - if (cv == NULL) - return NULL; - cv->chrspace = nc; - cv->chrs = (chr *)&cv->mcces[nmcces]; /* chrs just after MCCE ptrs */ - cv->mccespace = nmcces; - cv->ranges = cv->chrs + nchrs + nmcces*(MAXMCCE+1); - cv->rangespace = nranges; - return clearcvec(cv); + nc = (size_t)nchrs + (size_t)nmcces*(MAXMCCE+1) + (size_t)nranges*2; + n = sizeof(struct cvec) + (size_t)(nmcces-1)*sizeof(chr *) + + nc*sizeof(chr); + cv = (struct cvec *)MALLOC(n); + if (cv == NULL) { + return NULL; + } + cv->chrspace = nchrs; + cv->chrs = (chr *)&cv->mcces[nmcces]; /* chrs just after MCCE ptrs */ + cv->mccespace = nmcces; + cv->ranges = cv->chrs + nchrs + nmcces*(MAXMCCE+1); + cv->rangespace = nranges; + return clearcvec(cv); } /* @@ -65,20 +66,21 @@ int nmcces; /* ... and this many MCCEs */ */ static struct cvec * clearcvec(cv) -struct cvec *cv; + struct cvec *cv; /* character vector */ { - int i; + int i; - assert(cv != NULL); - cv->nchrs = 0; - assert(cv->chrs == (chr *)&cv->mcces[cv->mccespace]); - cv->nmcces = 0; - cv->nmccechrs = 0; - cv->nranges = 0; - for (i = 0; i < cv->mccespace; i++) - cv->mcces[i] = NULL; + assert(cv != NULL); + cv->nchrs = 0; + assert(cv->chrs == (chr *)&cv->mcces[cv->mccespace]); + cv->nmcces = 0; + cv->nmccechrs = 0; + cv->nranges = 0; + for (i = 0; i < cv->mccespace; i++) { + cv->mcces[i] = NULL; + } - return cv; + return cv; } /* @@ -87,11 +89,11 @@ struct cvec *cv; */ static VOID addchr(cv, c) -struct cvec *cv; -pchr c; + struct cvec *cv; /* character vector */ + pchr c; /* character to add */ { - assert(cv->nchrs < cv->chrspace - cv->nmccechrs); - cv->chrs[cv->nchrs++] = (chr)c; + assert(cv->nchrs < cv->chrspace - cv->nmccechrs); + cv->chrs[cv->nchrs++] = (chr)c; } /* @@ -100,14 +102,14 @@ pchr c; */ static VOID addrange(cv, from, to) -struct cvec *cv; -pchr from; -pchr to; + struct cvec *cv; /* character vector */ + pchr from; /* first character of range */ + pchr to; /* last character of range */ { - assert(cv->nranges < cv->rangespace); - cv->ranges[cv->nranges*2] = (chr)from; - cv->ranges[cv->nranges*2 + 1] = (chr)to; - cv->nranges++; + assert(cv->nranges < cv->rangespace); + cv->ranges[cv->nranges*2] = (chr)from; + cv->ranges[cv->nranges*2 + 1] = (chr)to; + cv->nranges++; } /* @@ -116,49 +118,55 @@ pchr to; */ static VOID addmcce(cv, startp, endp) -struct cvec *cv; -chr *startp; /* beginning of text */ -chr *endp; /* just past end of text */ + struct cvec *cv; /* character vector */ + chr *startp; /* beginning of text */ + chr *endp; /* just past end of text */ { - int len; - int i; - chr *s; - chr *d; + int len; + int i; + chr *s; + chr *d; - if (startp == NULL && endp == NULL) - return; - len = endp - startp; - assert(len > 0); - assert(cv->nchrs + len < cv->chrspace - cv->nmccechrs); - assert(cv->nmcces < cv->mccespace); - d = &cv->chrs[cv->chrspace - cv->nmccechrs - len - 1]; - cv->mcces[cv->nmcces++] = d; - for (s = startp, i = len; i > 0; s++, i--) - *d++ = *s; - *d++ = 0; /* endmarker */ - assert(d == &cv->chrs[cv->chrspace - cv->nmccechrs]); - cv->nmccechrs += len + 1; + if (startp == NULL && endp == NULL) { + return; + } + len = endp - startp; + assert(len > 0); + assert(cv->nchrs + len < cv->chrspace - cv->nmccechrs); + assert(cv->nmcces < cv->mccespace); + d = &cv->chrs[cv->chrspace - cv->nmccechrs - len - 1]; + cv->mcces[cv->nmcces++] = d; + for (s = startp, i = len; i > 0; s++, i--) { + *d++ = *s; + } + *d++ = 0; /* endmarker */ + assert(d == &cv->chrs[cv->chrspace - cv->nmccechrs]); + cv->nmccechrs += len + 1; } /* - haschr - does a cvec contain this chr? ^ static int haschr(struct cvec *, pchr); */ -static int /* predicate */ +static int /* predicate */ haschr(cv, c) -struct cvec *cv; -pchr c; + struct cvec *cv; /* character vector */ + pchr c; /* character to test for */ { - int i; - chr *p; + int i; + chr *p; - for (p = cv->chrs, i = cv->nchrs; i > 0; p++, i--) - if (*p == c) - return 1; - for (p = cv->ranges, i = cv->nranges; i > 0; p += 2, i--) - if (*p <= c && c <= *(p+1)) - return 1; - return 0; + for (p = cv->chrs, i = cv->nchrs; i > 0; p++, i--) { + if (*p == c) { + return 1; + } + } + for (p = cv->ranges, i = cv->nranges; i > 0; p += 2, i--) { + if ((*p <= c) && (c <= *(p+1))) { + return 1; + } + } + return 0; } /* @@ -167,23 +175,25 @@ pchr c; */ static struct cvec * getcvec(v, nchrs, nranges, nmcces) -struct vars *v; -int nchrs; /* to hold this many chrs... */ -int nranges; /* ... and this many ranges... */ -int nmcces; /* ... and this many MCCEs */ + struct vars *v; /* context */ + int nchrs; /* to hold this many chrs... */ + int nranges; /* ... and this many ranges... */ + int nmcces; /* ... and this many MCCEs */ { - if (v->cv != NULL && nchrs <= v->cv->chrspace && - nranges <= v->cv->rangespace && - nmcces <= v->cv->mccespace) - return clearcvec(v->cv); + if (v->cv != NULL && nchrs <= v->cv->chrspace && + nranges <= v->cv->rangespace && nmcces <= v->cv->mccespace) { + return clearcvec(v->cv); + } - if (v->cv != NULL) - freecvec(v->cv); - v->cv = newcvec(nchrs, nranges, nmcces); - if (v->cv == NULL) - ERR(REG_ESPACE); + if (v->cv != NULL) { + freecvec(v->cv); + } + v->cv = newcvec(nchrs, nranges, nmcces); + if (v->cv == NULL) { + ERR(REG_ESPACE); + } - return v->cv; + return v->cv; } /* @@ -192,7 +202,7 @@ int nmcces; /* ... and this many MCCEs */ */ static VOID freecvec(cv) -struct cvec *cv; + struct cvec *cv; /* character vector */ { - FREE(cv); + FREE(cv); } diff --git a/generic/regc_locale.c b/generic/regc_locale.c index bc5ef3d..3374007 100644 --- a/generic/regc_locale.c +++ b/generic/regc_locale.c @@ -9,111 +9,111 @@ * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: regc_locale.c,v 1.8 2002/02/07 00:48:03 hobbs Exp $ + * RCS: @(#) $Id: regc_locale.c,v 1.9 2002/07/29 10:56:08 dkf Exp $ */ /* ASCII character-name table */ static struct cname { - char *name; - char code; + char *name; + char code; } cnames[] = { - {"NUL", '\0'}, - {"SOH", '\001'}, - {"STX", '\002'}, - {"ETX", '\003'}, - {"EOT", '\004'}, - {"ENQ", '\005'}, - {"ACK", '\006'}, - {"BEL", '\007'}, - {"alert", '\007'}, - {"BS", '\010'}, - {"backspace", '\b'}, - {"HT", '\011'}, - {"tab", '\t'}, - {"LF", '\012'}, - {"newline", '\n'}, - {"VT", '\013'}, - {"vertical-tab", '\v'}, - {"FF", '\014'}, - {"form-feed", '\f'}, - {"CR", '\015'}, - {"carriage-return", '\r'}, - {"SO", '\016'}, - {"SI", '\017'}, - {"DLE", '\020'}, - {"DC1", '\021'}, - {"DC2", '\022'}, - {"DC3", '\023'}, - {"DC4", '\024'}, - {"NAK", '\025'}, - {"SYN", '\026'}, - {"ETB", '\027'}, - {"CAN", '\030'}, - {"EM", '\031'}, - {"SUB", '\032'}, - {"ESC", '\033'}, - {"IS4", '\034'}, - {"FS", '\034'}, - {"IS3", '\035'}, - {"GS", '\035'}, - {"IS2", '\036'}, - {"RS", '\036'}, - {"IS1", '\037'}, - {"US", '\037'}, - {"space", ' '}, - {"exclamation-mark", '!'}, - {"quotation-mark", '"'}, - {"number-sign", '#'}, - {"dollar-sign", '$'}, - {"percent-sign", '%'}, - {"ampersand", '&'}, - {"apostrophe", '\''}, - {"left-parenthesis", '('}, - {"right-parenthesis", ')'}, - {"asterisk", '*'}, - {"plus-sign", '+'}, - {"comma", ','}, - {"hyphen", '-'}, - {"hyphen-minus", '-'}, - {"period", '.'}, - {"full-stop", '.'}, - {"slash", '/'}, - {"solidus", '/'}, - {"zero", '0'}, - {"one", '1'}, - {"two", '2'}, - {"three", '3'}, - {"four", '4'}, - {"five", '5'}, - {"six", '6'}, - {"seven", '7'}, - {"eight", '8'}, - {"nine", '9'}, - {"colon", ':'}, - {"semicolon", ';'}, - {"less-than-sign", '<'}, - {"equals-sign", '='}, - {"greater-than-sign", '>'}, - {"question-mark", '?'}, - {"commercial-at", '@'}, - {"left-square-bracket", '['}, - {"backslash", '\\'}, - {"reverse-solidus", '\\'}, - {"right-square-bracket", ']'}, - {"circumflex", '^'}, - {"circumflex-accent", '^'}, - {"underscore", '_'}, - {"low-line", '_'}, - {"grave-accent", '`'}, - {"left-brace", '{'}, - {"left-curly-bracket", '{'}, - {"vertical-line", '|'}, - {"right-brace", '}'}, - {"right-curly-bracket", '}'}, - {"tilde", '~'}, - {"DEL", '\177'}, - {NULL, 0} + {"NUL", '\0'}, + {"SOH", '\001'}, + {"STX", '\002'}, + {"ETX", '\003'}, + {"EOT", '\004'}, + {"ENQ", '\005'}, + {"ACK", '\006'}, + {"BEL", '\007'}, + {"alert", '\007'}, + {"BS", '\010'}, + {"backspace", '\b'}, + {"HT", '\011'}, + {"tab", '\t'}, + {"LF", '\012'}, + {"newline", '\n'}, + {"VT", '\013'}, + {"vertical-tab", '\v'}, + {"FF", '\014'}, + {"form-feed", '\f'}, + {"CR", '\015'}, + {"carriage-return", '\r'}, + {"SO", '\016'}, + {"SI", '\017'}, + {"DLE", '\020'}, + {"DC1", '\021'}, + {"DC2", '\022'}, + {"DC3", '\023'}, + {"DC4", '\024'}, + {"NAK", '\025'}, + {"SYN", '\026'}, + {"ETB", '\027'}, + {"CAN", '\030'}, + {"EM", '\031'}, + {"SUB", '\032'}, + {"ESC", '\033'}, + {"IS4", '\034'}, + {"FS", '\034'}, + {"IS3", '\035'}, + {"GS", '\035'}, + {"IS2", '\036'}, + {"RS", '\036'}, + {"IS1", '\037'}, + {"US", '\037'}, + {"space", ' '}, + {"exclamation-mark",'!'}, + {"quotation-mark", '"'}, + {"number-sign", '#'}, + {"dollar-sign", '$'}, + {"percent-sign", '%'}, + {"ampersand", '&'}, + {"apostrophe", '\''}, + {"left-parenthesis",'('}, + {"right-parenthesis", ')'}, + {"asterisk", '*'}, + {"plus-sign", '+'}, + {"comma", ','}, + {"hyphen", '-'}, + {"hyphen-minus", '-'}, + {"period", '.'}, + {"full-stop", '.'}, + {"slash", '/'}, + {"solidus", '/'}, + {"zero", '0'}, + {"one", '1'}, + {"two", '2'}, + {"three", '3'}, + {"four", '4'}, + {"five", '5'}, + {"six", '6'}, + {"seven", '7'}, + {"eight", '8'}, + {"nine", '9'}, + {"colon", ':'}, + {"semicolon", ';'}, + {"less-than-sign", '<'}, + {"equals-sign", '='}, + {"greater-than-sign", '>'}, + {"question-mark", '?'}, + {"commercial-at", '@'}, + {"left-square-bracket", '['}, + {"backslash", '\\'}, + {"reverse-solidus", '\\'}, + {"right-square-bracket", ']'}, + {"circumflex", '^'}, + {"circumflex-accent", '^'}, + {"underscore", '_'}, + {"low-line", '_'}, + {"grave-accent", '`'}, + {"left-brace", '{'}, + {"left-curly-bracket", '{'}, + {"vertical-line", '|'}, + {"right-brace", '}'}, + {"right-curly-bracket", '}'}, + {"tilde", '~'}, + {"DEL", '\177'}, + {NULL, 0} }; /* Unicode character-class tables */ @@ -526,9 +526,9 @@ static chr graphCharTable[] = { */ static int nmcces(v) -struct vars *v; + struct vars *v; { - return 0; + return 0; } /* @@ -537,9 +537,9 @@ struct vars *v; */ static int nleaders(v) -struct vars *v; + struct vars *v; { - return 0; + return 0; } /* @@ -548,10 +548,10 @@ struct vars *v; */ static struct cvec * allmcces(v, cv) -struct vars *v; -struct cvec *cv; /* this is supposed to have enough room */ + struct vars *v; + struct cvec *cv; /* this is supposed to have enough room */ { - return clearcvec(cv); + return clearcvec(cv); } /* @@ -560,36 +560,40 @@ struct cvec *cv; /* this is supposed to have enough room */ */ static celt element(v, startp, endp) -struct vars *v; -chr *startp; /* points to start of name */ -chr *endp; /* points just past end of name */ + struct vars *v; + chr *startp; /* points to start of name */ + chr *endp; /* points just past end of name */ { - struct cname *cn; - size_t len; - Tcl_DString ds; - CONST char *np; - - /* generic: one-chr names stand for themselves */ - assert(startp < endp); - len = endp - startp; - if (len == 1) - return *startp; - - NOTE(REG_ULOCALE); - - /* search table */ - Tcl_DStringInit(&ds); - np = Tcl_UniCharToUtfDString(startp, (int)len, &ds); - for (cn = cnames; cn->name != NULL; cn++) - if (strlen(cn->name) == len && strncmp(cn->name, np, len) == 0) - break; /* NOTE BREAK OUT */ - Tcl_DStringFree(&ds); - if (cn->name != NULL) - return CHR(cn->code); - - /* couldn't find it */ - ERR(REG_ECOLLATE); - return 0; + struct cname *cn; + size_t len; + Tcl_DString ds; + CONST char *np; + + /* generic: one-chr names stand for themselves */ + assert(startp < endp); + len = endp - startp; + if (len == 1) { + return *startp; + } + + NOTE(REG_ULOCALE); + + /* search table */ + Tcl_DStringInit(&ds); + np = Tcl_UniCharToUtfDString(startp, (int)len, &ds); + for (cn=cnames; cn->name!=NULL; cn++) { + if (strlen(cn->name)==len && strncmp(cn->name, np, len)==0) { + break; /* NOTE BREAK OUT */ + } + } + Tcl_DStringFree(&ds); + if (cn->name != NULL) { + return CHR(cn->code); + } + + /* couldn't find it */ + ERR(REG_ECOLLATE); + return 0; } /* @@ -598,56 +602,56 @@ chr *endp; /* points just past end of name */ */ static struct cvec * range(v, a, b, cases) -struct vars *v; -celt a; -celt b; /* might equal a */ -int cases; /* case-independent? */ + struct vars *v; + celt a; + celt b; /* might equal a */ + int cases; /* case-independent? */ { - int nchrs; - struct cvec *cv; - celt c, lc, uc, tc; + int nchrs; + struct cvec *cv; + celt c, lc, uc, tc; - if (a != b && !before(a, b)) { - ERR(REG_ERANGE); - return NULL; - } + if (a != b && !before(a, b)) { + ERR(REG_ERANGE); + return NULL; + } - if (!cases) { /* easy version */ - cv = getcvec(v, 0, 1, 0); - NOERRN(); - addrange(cv, a, b); - return cv; - } + if (!cases) { /* easy version */ + cv = getcvec(v, 0, 1, 0); + NOERRN(); + addrange(cv, a, b); + return cv; + } - /* - * When case-independent, it's hard to decide when cvec ranges are - * usable, so for now at least, we won't try. We allocate enough - * space for two case variants plus a little extra for the two - * title case variants. - */ + /* + * When case-independent, it's hard to decide when cvec ranges are + * usable, so for now at least, we won't try. We allocate enough + * space for two case variants plus a little extra for the two + * title case variants. + */ - nchrs = (b - a + 1)*2 + 4; + nchrs = (b - a + 1)*2 + 4; - cv = getcvec(v, nchrs, 0, 0); - NOERRN(); + cv = getcvec(v, nchrs, 0, 0); + NOERRN(); - for (c = a; c <= b; c++) { - addchr(cv, c); - lc = Tcl_UniCharToLower((chr)c); - uc = Tcl_UniCharToUpper((chr)c); - tc = Tcl_UniCharToTitle((chr)c); - if (c != lc) { - addchr(cv, lc); - } - if (c != uc) { - addchr(cv, uc); - } - if (c != tc && tc != uc) { - addchr(cv, tc); - } + for (c=a; c<=b; c++) { + addchr(cv, c); + lc = Tcl_UniCharToLower((chr)c); + uc = Tcl_UniCharToUpper((chr)c); + tc = Tcl_UniCharToTitle((chr)c); + if (c != lc) { + addchr(cv, lc); } + if (c != uc) { + addchr(cv, uc); + } + if (c != tc && tc != uc) { + addchr(cv, tc); + } + } - return cv; + return cv; } /* @@ -656,13 +660,14 @@ int cases; /* case-independent? */ */ static int /* predicate */ before(x, y) -celt x; -celt y; + celt x; + celt y; { - /* trivial because no MCCEs */ - if (x < y) - return 1; - return 0; + /* trivial because no MCCEs */ + if (x < y) { + return 1; + } + return 0; } /* @@ -672,31 +677,32 @@ celt y; */ static struct cvec * eclass(v, c, cases) -struct vars *v; -celt c; -int cases; /* all cases? */ + struct vars *v; + celt c; + int cases; /* all cases? */ { - struct cvec *cv; - - /* crude fake equivalence class for testing */ - if ((v->cflags®_FAKE) && c == 'x') { - cv = getcvec(v, 4, 0, 0); - addchr(cv, (chr)'x'); - addchr(cv, (chr)'y'); - if (cases) { - addchr(cv, (chr)'X'); - addchr(cv, (chr)'Y'); - } - return cv; + struct cvec *cv; + + /* crude fake equivalence class for testing */ + if ((v->cflags®_FAKE) && c == 'x') { + cv = getcvec(v, 4, 0, 0); + addchr(cv, (chr)'x'); + addchr(cv, (chr)'y'); + if (cases) { + addchr(cv, (chr)'X'); + addchr(cv, (chr)'Y'); } - - /* otherwise, none */ - if (cases) - return allcases(v, c); - cv = getcvec(v, 1, 0, 0); - assert(cv != NULL); - addchr(cv, (chr)c); return cv; + } + + /* otherwise, none */ + if (cases) { + return allcases(v, c); + } + cv = getcvec(v, 1, 0, 0); + assert(cv != NULL); + addchr(cv, (chr)c); + return cv; } /* @@ -706,10 +712,10 @@ int cases; /* all cases? */ */ static struct cvec * cclass(v, startp, endp, cases) -struct vars *v; -chr *startp; /* where the name starts */ -chr *endp; /* just past the end of the name */ -int cases; /* case-independent? */ + struct vars *v; + chr *startp; /* where the name starts */ + chr *endp; /* just past the end of the name */ + int cases; /* case-independent? */ { size_t len; struct cvec *cv = NULL; @@ -755,7 +761,7 @@ int cases; /* case-independent? */ */ index = -1; - for (namePtr = classNames, i = 0; *namePtr != NULL; namePtr++, i++) { + for (namePtr=classNames,i=0 ; *namePtr!=NULL ; namePtr++,i++) { if ((strlen(*namePtr) == len) && (strncmp(*namePtr, np, len) == 0)) { index = i; break; @@ -772,129 +778,137 @@ int cases; /* case-independent? */ */ switch((enum classes) index) { - case CC_PRINT: - case CC_ALNUM: - cv = getcvec(v, NUM_ALPHA_CHAR, - NUM_DIGIT_RANGE + NUM_ALPHA_RANGE, 0); - if (cv) { - for (i = 0; i < NUM_ALPHA_CHAR; i++) { - addchr(cv, alphaCharTable[i]); - } - for (i = 0; i < NUM_ALPHA_RANGE; i++) { - addrange(cv, alphaRangeTable[i].start, - alphaRangeTable[i].end); - } - for (i = 0; i < NUM_DIGIT_RANGE; i++) { - addrange(cv, digitRangeTable[i].start, - digitRangeTable[i].end); - } + case CC_PRINT: + case CC_ALNUM: + cv = getcvec(v, NUM_ALPHA_CHAR, NUM_DIGIT_RANGE + NUM_ALPHA_RANGE, 0); + if (cv) { + for (i=0 ; i<NUM_ALPHA_CHAR ; i++) { + addchr(cv, alphaCharTable[i]); } - break; - case CC_ALPHA: - cv = getcvec(v, NUM_ALPHA_CHAR, NUM_ALPHA_RANGE, 0); - if (cv) { - for (i = 0; i < NUM_ALPHA_RANGE; i++) { - addrange(cv, alphaRangeTable[i].start, - alphaRangeTable[i].end); - } - for (i = 0; i < NUM_ALPHA_CHAR; i++) { - addchr(cv, alphaCharTable[i]); - } + for (i=0 ; i<NUM_ALPHA_RANGE ; i++) { + addrange(cv, alphaRangeTable[i].start, + alphaRangeTable[i].end); } - break; - case CC_ASCII: - cv = getcvec(v, 0, 1, 0); - if (cv) { - addrange(cv, 0, 0x7f); + for (i=0 ; i<NUM_DIGIT_RANGE ; i++) { + addrange(cv, digitRangeTable[i].start, + digitRangeTable[i].end); } - break; - case CC_BLANK: - cv = getcvec(v, 2, 0, 0); - addchr(cv, '\t'); - addchr(cv, ' '); - break; - case CC_CNTRL: - cv = getcvec(v, 0, 2, 0); - addrange(cv, 0x0, 0x1f); - addrange(cv, 0x7f, 0x9f); - break; - case CC_DIGIT: - cv = getcvec(v, 0, NUM_DIGIT_RANGE, 0); - if (cv) { - for (i = 0; i < NUM_DIGIT_RANGE; i++) { - addrange(cv, digitRangeTable[i].start, - digitRangeTable[i].end); - } + } + break; + case CC_ALPHA: + cv = getcvec(v, NUM_ALPHA_CHAR, NUM_ALPHA_RANGE, 0); + if (cv) { + for (i=0 ; i<NUM_ALPHA_RANGE ; i++) { + addrange(cv, alphaRangeTable[i].start, + alphaRangeTable[i].end); } - break; - case CC_PUNCT: - cv = getcvec(v, NUM_PUNCT_CHAR, NUM_PUNCT_RANGE, 0); - if (cv) { - for (i = 0; i < NUM_PUNCT_RANGE; i++) { - addrange(cv, punctRangeTable[i].start, - punctRangeTable[i].end); - } - for (i = 0; i < NUM_PUNCT_CHAR; i++) { - addchr(cv, punctCharTable[i]); - } + for (i=0 ; i<NUM_ALPHA_CHAR ; i++) { + addchr(cv, alphaCharTable[i]); } - break; - case CC_XDIGIT: - cv = getcvec(v, 0, NUM_DIGIT_RANGE+2, 0); - if (cv) { - addrange(cv, '0', '9'); - addrange(cv, 'a', 'f'); - addrange(cv, 'A', 'F'); + } + break; + case CC_ASCII: + cv = getcvec(v, 0, 1, 0); + if (cv) { + addrange(cv, 0, 0x7f); + } + break; + case CC_BLANK: + cv = getcvec(v, 2, 0, 0); + addchr(cv, '\t'); + addchr(cv, ' '); + break; + case CC_CNTRL: + cv = getcvec(v, 0, 2, 0); + addrange(cv, 0x0, 0x1f); + addrange(cv, 0x7f, 0x9f); + break; + case CC_DIGIT: + cv = getcvec(v, 0, NUM_DIGIT_RANGE, 0); + if (cv) { + for (i=0 ; i<NUM_DIGIT_RANGE ; i++) { + addrange(cv, digitRangeTable[i].start, + digitRangeTable[i].end); } - break; - case CC_SPACE: - cv = getcvec(v, NUM_SPACE_CHAR, NUM_SPACE_RANGE, 0); - if (cv) { - for (i = 0; i < NUM_SPACE_RANGE; i++) { - addrange(cv, spaceRangeTable[i].start, - spaceRangeTable[i].end); - } - for (i = 0; i < NUM_SPACE_CHAR; i++) { - addchr(cv, spaceCharTable[i]); - } + } + break; + case CC_PUNCT: + cv = getcvec(v, NUM_PUNCT_CHAR, NUM_PUNCT_RANGE, 0); + if (cv) { + for (i=0 ; i<NUM_PUNCT_RANGE ; i++) { + addrange(cv, punctRangeTable[i].start, + punctRangeTable[i].end); } - break; - case CC_LOWER: - cv = getcvec(v, NUM_LOWER_CHAR, NUM_LOWER_RANGE, 0); - if (cv) { - for (i = 0; i < NUM_LOWER_RANGE; i++) { - addrange(cv, lowerRangeTable[i].start, - lowerRangeTable[i].end); - } - for (i = 0; i < NUM_LOWER_CHAR; i++) { - addchr(cv, lowerCharTable[i]); - } + for (i=0 ; i<NUM_PUNCT_CHAR ; i++) { + addchr(cv, punctCharTable[i]); } - break; - case CC_UPPER: - cv = getcvec(v, NUM_UPPER_CHAR, NUM_UPPER_RANGE, 0); - if (cv) { - for (i = 0; i < NUM_UPPER_RANGE; i++) { - addrange(cv, upperRangeTable[i].start, - upperRangeTable[i].end); - } - for (i = 0; i < NUM_UPPER_CHAR; i++) { - addchr(cv, upperCharTable[i]); - } + } + break; + case CC_XDIGIT: + /* + * This is a 3 instead of (NUM_DIGIT_RANGE+2) because I've no + * idea how to define the digits 'a' through 'f' in + * non-western locales. The concept is quite possibly non + * portable, or only used in contextx where the characters + * used would be the western ones anyway! Whatever is + * actually the case, the number of ranges is fixed (until + * someone comes up with a better arrangement!) + */ + cv = getcvec(v, 0, 3, 0); + if (cv) { + addrange(cv, '0', '9'); + addrange(cv, 'a', 'f'); + addrange(cv, 'A', 'F'); + } + break; + case CC_SPACE: + cv = getcvec(v, NUM_SPACE_CHAR, NUM_SPACE_RANGE, 0); + if (cv) { + for (i=0 ; i<NUM_SPACE_RANGE ; i++) { + addrange(cv, spaceRangeTable[i].start, + spaceRangeTable[i].end); } - break; - case CC_GRAPH: - cv = getcvec(v, NUM_GRAPH_CHAR, NUM_GRAPH_RANGE, 0); - if (cv) { - for (i = 0; i < NUM_GRAPH_RANGE; i++) { - addrange(cv, graphRangeTable[i].start, - graphRangeTable[i].end); - } - for (i = 0; i < NUM_GRAPH_CHAR; i++) { - addchr(cv, graphCharTable[i]); - } + for (i=0 ; i<NUM_SPACE_CHAR ; i++) { + addchr(cv, spaceCharTable[i]); } - break; + } + break; + case CC_LOWER: + cv = getcvec(v, NUM_LOWER_CHAR, NUM_LOWER_RANGE, 0); + if (cv) { + for (i=0 ; i<NUM_LOWER_RANGE ; i++) { + addrange(cv, lowerRangeTable[i].start, + lowerRangeTable[i].end); + } + for (i=0 ; i<NUM_LOWER_CHAR ; i++) { + addchr(cv, lowerCharTable[i]); + } + } + break; + case CC_UPPER: + cv = getcvec(v, NUM_UPPER_CHAR, NUM_UPPER_RANGE, 0); + if (cv) { + for (i=0 ; i<NUM_UPPER_RANGE ; i++) { + addrange(cv, upperRangeTable[i].start, + upperRangeTable[i].end); + } + for (i=0 ; i<NUM_UPPER_CHAR ; i++) { + addchr(cv, upperCharTable[i]); + } + } + break; + case CC_GRAPH: + cv = getcvec(v, NUM_GRAPH_CHAR, NUM_GRAPH_RANGE, 0); + if (cv) { + for (i=0 ; i<NUM_GRAPH_RANGE ; i++) { + addrange(cv, graphRangeTable[i].start, + graphRangeTable[i].end); + } + for (i=0 ; i<NUM_GRAPH_CHAR ; i++) { + addchr(cv, graphCharTable[i]); + } + } + break; } if (cv == NULL) { ERR(REG_ESPACE); @@ -910,28 +924,28 @@ int cases; /* case-independent? */ */ static struct cvec * allcases(v, pc) -struct vars *v; -pchr pc; + struct vars *v; + pchr pc; { - struct cvec *cv; - chr c = (chr)pc; - chr lc, uc, tc; - - lc = Tcl_UniCharToLower((chr)c); - uc = Tcl_UniCharToUpper((chr)c); - tc = Tcl_UniCharToTitle((chr)c); - - if (tc != uc) { - cv = getcvec(v, 3, 0, 0); - addchr(cv, tc); - } else { - cv = getcvec(v, 2, 0, 0); - } - addchr(cv, lc); - if (lc != uc) { - addchr(cv, uc); - } - return cv; + struct cvec *cv; + chr c = (chr)pc; + chr lc, uc, tc; + + lc = Tcl_UniCharToLower((chr)c); + uc = Tcl_UniCharToUpper((chr)c); + tc = Tcl_UniCharToTitle((chr)c); + + if (tc != uc) { + cv = getcvec(v, 3, 0, 0); + addchr(cv, tc); + } else { + cv = getcvec(v, 2, 0, 0); + } + addchr(cv, lc); + if (lc != uc) { + addchr(cv, uc); + } + return cv; } /* @@ -944,11 +958,11 @@ pchr pc; */ static int /* 0 for equal, nonzero for unequal */ cmp(x, y, len) -CONST chr *x; -CONST chr *y; -size_t len; /* exact length of comparison */ + CONST chr *x; + CONST chr *y; + size_t len; /* exact length of comparison */ { - return memcmp(VS(x), VS(y), len*sizeof(chr)); + return memcmp(VS(x), VS(y), len*sizeof(chr)); } /* @@ -961,12 +975,14 @@ size_t len; /* exact length of comparison */ */ static int /* 0 for equal, nonzero for unequal */ casecmp(x, y, len) -CONST chr *x; -CONST chr *y; -size_t len; /* exact length of comparison */ + CONST chr *x; + CONST chr *y; + size_t len; /* exact length of comparison */ { - for ( ; len > 0; len--, x++, y++) - if ((*x != *y) && (Tcl_UniCharToLower(*x) != Tcl_UniCharToLower(*y))) + for (; len > 0; len--, x++, y++) { + if ((*x!=*y) && (Tcl_UniCharToLower(*x) != Tcl_UniCharToLower(*y))) { return 1; + } + } return 0; } diff --git a/tests/reg.test b/tests/reg.test index f35050b..58ebc55 100644 --- a/tests/reg.test +++ b/tests/reg.test @@ -9,7 +9,7 @@ # # Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. # -# RCS: @(#) $Id: reg.test,v 1.14 2002/06/22 04:19:47 dgp Exp $ +# RCS: @(#) $Id: reg.test,v 1.15 2002/07/29 10:56:08 dkf Exp $ if {[lsearch [namespace children] ::tcltest] == -1} { package require tcltest 2 @@ -987,8 +987,14 @@ m 9 HLP {(?n)^(?![t#])\S+} "tk\n\n#\n#\nit0" it0 # flush any leftover complaints doing 0 "flush" +# Tests resulting from bugs reported by users +test reg-31.1 {[[:xdigit:]] behaves correctly when followed by [[:space:]]} { + set str {2:::DebugWin32} + set re {([[:xdigit:]])([[:space:]]*)} + list [regexp $re $str match xdigit spaces] $match $xdigit $spaces + # Code used to produce {1 2:::DebugWin32 2 :::DebugWin32} !!! +} {1 2 2 {}} + # cleanup ::tcltest::cleanupTests return - - |