diff options
author | stanton <stanton> | 1998-11-04 04:39:51 (GMT) |
---|---|---|
committer | stanton <stanton> | 1998-11-04 04:39:51 (GMT) |
commit | 89fbe663adddd77c676a1ab1139bb3c6e5c83e5a (patch) | |
tree | 5bea621521ff02de3b4373621910c8c6a3faf4c7 /generic/regc_cvec.c | |
parent | cb753ddd702424bc449165b39d0c9ddd46413c13 (diff) | |
download | tcl-89fbe663adddd77c676a1ab1139bb3c6e5c83e5a.zip tcl-89fbe663adddd77c676a1ab1139bb3c6e5c83e5a.tar.gz tcl-89fbe663adddd77c676a1ab1139bb3c6e5c83e5a.tar.bz2 |
Merged Henry's latest changes to add support for character ranges
in cvec data type
Added support for Unicode character classes in regular expressions
We now support the following character classes:
alnum, alpha, blank, cntrl, digit, graph, lower,
print, punct, space, upper, xdigit
These all follow the example set by the GNU regular expression package
for Java except that "digit" only matches the ASCII '0'-'9'
characters.
Renamed tclUtf.h to tclUniData.c
Diffstat (limited to 'generic/regc_cvec.c')
-rw-r--r-- | generic/regc_cvec.c | 39 |
1 files changed, 32 insertions, 7 deletions
diff --git a/generic/regc_cvec.c b/generic/regc_cvec.c index 0650883..9e79df2 100644 --- a/generic/regc_cvec.c +++ b/generic/regc_cvec.c @@ -5,18 +5,19 @@ /* - newcvec - allocate a new cvec - ^ static struct cvec *newcvec(int, int); + ^ static struct cvec *newcvec(int, int, int); */ static struct cvec * -newcvec(nchrs, nmcces) +newcvec(nchrs, nranges, nmcces) int nchrs; /* to hold this many chrs... */ +int nranges; /* ... and this many ranges... */ int nmcces; /* ... and this many MCCEs */ { size_t n; size_t nc; struct cvec *cv; - nc = (size_t)nchrs + (size_t)nmcces*(MAXMCCE+1); + nc = (size_t)nchrs + (size_t)nmcces*(MAXMCCE+1) + (size_t)nranges*2; n = sizeof(struct cvec) + (size_t)(nmcces-1)*sizeof(chr *) + nc*sizeof(chr); cv = (struct cvec *)MALLOC(n); @@ -25,6 +26,8 @@ int nmcces; /* ... and this many MCCEs */ cv->chrspace = nc; cv->chrs = (chr *)&cv->mcces[nmcces]; /* chrs just after MCCE ptrs */ cv->mccespace = nmcces; + cv->ranges = cv->chrs + nchrs + nmcces*(MAXMCCE+1); + cv->rangespace = nranges; return clearcvec(cv); } @@ -44,6 +47,7 @@ struct cvec *cv; assert(cv->chrs == (chr *)&cv->mcces[cv->mccespace]); cv->nmcces = 0; cv->nmccechrs = 0; + cv->nranges = 0; for (i = 0; i < cv->mccespace; i++) cv->mcces[i] = NULL; @@ -64,6 +68,22 @@ pchr c; } /* + - addrange - add a range to a cvec + ^ static VOID addrange(struct cvec *, pchr, pchr); + */ +static VOID +addrange(cv, from, to) +struct cvec *cv; +pchr from; +pchr to; +{ + assert(cv->nranges < cv->rangespace); + cv->ranges[cv->nranges*2] = (chr)from; + cv->ranges[cv->nranges*2 + 1] = (chr)to; + cv->nranges++; +} + +/* - addmcce - add an MCCE to a cvec ^ static VOID addmcce(struct cvec *, chr *, chr *); */ @@ -105,26 +125,31 @@ pchr c; for (p = cv->chrs, i = cv->nchrs; i > 0; p++, i--) if (*p == c) return 1; + for (p = cv->ranges, i = cv->nranges; i > 0; p += 2, i--) + if (*p <= c && c <= *(p+1)) + return 1; return 0; } /* - getcvec - get a cvec, remembering it as v->cv - ^ static struct cvec *getcvec(struct vars *, int, int); + ^ static struct cvec *getcvec(struct vars *, int, int, int); */ static struct cvec * -getcvec(v, nchrs, nmcces) +getcvec(v, nchrs, nranges, nmcces) struct vars *v; int nchrs; /* to hold this many chrs... */ +int nranges; /* ... and this many ranges... */ int nmcces; /* ... and this many MCCEs */ { if (v->cv != NULL && nchrs <= v->cv->chrspace && - nmcces <= v->cv->mccespace) + nranges <= v->cv->rangespace && + nmcces <= v->cv->mccespace) return clearcvec(v->cv); if (v->cv != NULL) freecvec(v->cv); - v->cv = newcvec(nchrs, nmcces); + v->cv = newcvec(nchrs, nranges, nmcces); if (v->cv == NULL) ERR(REG_ESPACE); |