summaryrefslogtreecommitdiffstats
path: root/generic/regc_cvec.c
diff options
context:
space:
mode:
authorstanton <stanton>1998-11-04 04:39:51 (GMT)
committerstanton <stanton>1998-11-04 04:39:51 (GMT)
commit89fbe663adddd77c676a1ab1139bb3c6e5c83e5a (patch)
tree5bea621521ff02de3b4373621910c8c6a3faf4c7 /generic/regc_cvec.c
parentcb753ddd702424bc449165b39d0c9ddd46413c13 (diff)
downloadtcl-89fbe663adddd77c676a1ab1139bb3c6e5c83e5a.zip
tcl-89fbe663adddd77c676a1ab1139bb3c6e5c83e5a.tar.gz
tcl-89fbe663adddd77c676a1ab1139bb3c6e5c83e5a.tar.bz2
Merged Henry's latest changes to add support for character ranges
in cvec data type Added support for Unicode character classes in regular expressions We now support the following character classes: alnum, alpha, blank, cntrl, digit, graph, lower, print, punct, space, upper, xdigit These all follow the example set by the GNU regular expression package for Java except that "digit" only matches the ASCII '0'-'9' characters. Renamed tclUtf.h to tclUniData.c
Diffstat (limited to 'generic/regc_cvec.c')
-rw-r--r--generic/regc_cvec.c39
1 files changed, 32 insertions, 7 deletions
diff --git a/generic/regc_cvec.c b/generic/regc_cvec.c
index 0650883..9e79df2 100644
--- a/generic/regc_cvec.c
+++ b/generic/regc_cvec.c
@@ -5,18 +5,19 @@
/*
- newcvec - allocate a new cvec
- ^ static struct cvec *newcvec(int, int);
+ ^ static struct cvec *newcvec(int, int, int);
*/
static struct cvec *
-newcvec(nchrs, nmcces)
+newcvec(nchrs, nranges, nmcces)
int nchrs; /* to hold this many chrs... */
+int nranges; /* ... and this many ranges... */
int nmcces; /* ... and this many MCCEs */
{
size_t n;
size_t nc;
struct cvec *cv;
- nc = (size_t)nchrs + (size_t)nmcces*(MAXMCCE+1);
+ nc = (size_t)nchrs + (size_t)nmcces*(MAXMCCE+1) + (size_t)nranges*2;
n = sizeof(struct cvec) + (size_t)(nmcces-1)*sizeof(chr *) +
nc*sizeof(chr);
cv = (struct cvec *)MALLOC(n);
@@ -25,6 +26,8 @@ int nmcces; /* ... and this many MCCEs */
cv->chrspace = nc;
cv->chrs = (chr *)&cv->mcces[nmcces]; /* chrs just after MCCE ptrs */
cv->mccespace = nmcces;
+ cv->ranges = cv->chrs + nchrs + nmcces*(MAXMCCE+1);
+ cv->rangespace = nranges;
return clearcvec(cv);
}
@@ -44,6 +47,7 @@ struct cvec *cv;
assert(cv->chrs == (chr *)&cv->mcces[cv->mccespace]);
cv->nmcces = 0;
cv->nmccechrs = 0;
+ cv->nranges = 0;
for (i = 0; i < cv->mccespace; i++)
cv->mcces[i] = NULL;
@@ -64,6 +68,22 @@ pchr c;
}
/*
+ - addrange - add a range to a cvec
+ ^ static VOID addrange(struct cvec *, pchr, pchr);
+ */
+static VOID
+addrange(cv, from, to)
+struct cvec *cv;
+pchr from;
+pchr to;
+{
+ assert(cv->nranges < cv->rangespace);
+ cv->ranges[cv->nranges*2] = (chr)from;
+ cv->ranges[cv->nranges*2 + 1] = (chr)to;
+ cv->nranges++;
+}
+
+/*
- addmcce - add an MCCE to a cvec
^ static VOID addmcce(struct cvec *, chr *, chr *);
*/
@@ -105,26 +125,31 @@ pchr c;
for (p = cv->chrs, i = cv->nchrs; i > 0; p++, i--)
if (*p == c)
return 1;
+ for (p = cv->ranges, i = cv->nranges; i > 0; p += 2, i--)
+ if (*p <= c && c <= *(p+1))
+ return 1;
return 0;
}
/*
- getcvec - get a cvec, remembering it as v->cv
- ^ static struct cvec *getcvec(struct vars *, int, int);
+ ^ static struct cvec *getcvec(struct vars *, int, int, int);
*/
static struct cvec *
-getcvec(v, nchrs, nmcces)
+getcvec(v, nchrs, nranges, nmcces)
struct vars *v;
int nchrs; /* to hold this many chrs... */
+int nranges; /* ... and this many ranges... */
int nmcces; /* ... and this many MCCEs */
{
if (v->cv != NULL && nchrs <= v->cv->chrspace &&
- nmcces <= v->cv->mccespace)
+ nranges <= v->cv->rangespace &&
+ nmcces <= v->cv->mccespace)
return clearcvec(v->cv);
if (v->cv != NULL)
freecvec(v->cv);
- v->cv = newcvec(nchrs, nmcces);
+ v->cv = newcvec(nchrs, nranges, nmcces);
if (v->cv == NULL)
ERR(REG_ESPACE);