diff options
author | jan.nijtmans <nijtmans@users.sourceforge.net> | 2012-01-09 19:59:47 (GMT) |
---|---|---|
committer | jan.nijtmans <nijtmans@users.sourceforge.net> | 2012-01-09 19:59:47 (GMT) |
commit | 05a1c1c8d017993fac4875d1baed7e62ece0bd93 (patch) | |
tree | e9c70fecb67f1316d832759eafcec6f8df633692 /generic/regc_locale.c | |
parent | e1074be4bf98ca2d9b91651693f0da2ee6c1042d (diff) | |
download | tcl-05a1c1c8d017993fac4875d1baed7e62ece0bd93.zip tcl-05a1c1c8d017993fac4875d1baed7e62ece0bd93.tar.gz tcl-05a1c1c8d017993fac4875d1baed7e62ece0bd93.tar.bz2 |
[Bug 3464428] string is graph \u0120 is wrong
Diffstat (limited to 'generic/regc_locale.c')
-rw-r--r-- | generic/regc_locale.c | 175 |
1 files changed, 90 insertions, 85 deletions
diff --git a/generic/regc_locale.c b/generic/regc_locale.c index 77cfd8e..ef8ecdc 100644 --- a/generic/regc_locale.c +++ b/generic/regc_locale.c @@ -12,7 +12,7 @@ /* ASCII character-name table */ -static struct cname { +static CONST struct cname { CONST char *name; CONST char code; } cnames[] = { @@ -224,6 +224,23 @@ static CONST chr alphaCharTable[] = { #define NUM_ALPHA_CHAR (sizeof(alphaCharTable)/sizeof(chr)) /* + * Unicode: control characters. + */ + +static CONST crange controlRangeTable[] = { + {0x007f, 0x009f}, {0x0600, 0x0603}, {0x200b, 0x200f}, {0x202a, 0x202e}, + {0x2060, 0x2064}, {0x206a, 0x206f}, {0xe000, 0xf8ff}, {0xfff9, 0xfffb} +}; + +#define NUM_CONTROL_RANGE (sizeof(controlRangeTable)/sizeof(crange)) + +static CONST chr controlCharTable[] = { + 0x00ad, 0x06dd, 0x070f, 0x17b4, 0x17b5, 0xfeff +}; + +#define NUM_CONTROL_CHAR (sizeof(controlCharTable)/sizeof(chr)) + +/* * Unicode: decimal digit characters. */ @@ -478,7 +495,7 @@ static CONST chr upperCharTable[] = { */ static CONST crange graphRangeTable[] = { - {0x0021, 0x007e}, {0x00a0, 0x00ac}, {0x00ae, 0x0377}, {0x037a, 0x037e}, + {0x0021, 0x007e}, {0x00a1, 0x00ac}, {0x00ae, 0x0377}, {0x037a, 0x037e}, {0x0384, 0x038a}, {0x038e, 0x03a1}, {0x03a3, 0x0527}, {0x0531, 0x0556}, {0x0559, 0x055f}, {0x0561, 0x0587}, {0x0591, 0x05c7}, {0x05d0, 0x05ea}, {0x05f0, 0x05f4}, {0x0606, 0x061b}, {0x061e, 0x06dc}, {0x06de, 0x070d}, @@ -513,21 +530,21 @@ static CONST crange graphRangeTable[] = { {0x1250, 0x1256}, {0x125a, 0x125d}, {0x1260, 0x1288}, {0x128a, 0x128d}, {0x1290, 0x12b0}, {0x12b2, 0x12b5}, {0x12b8, 0x12be}, {0x12c2, 0x12c5}, {0x12c8, 0x12d6}, {0x12d8, 0x1310}, {0x1312, 0x1315}, {0x1318, 0x135a}, - {0x135d, 0x137c}, {0x1380, 0x1399}, {0x13a0, 0x13f4}, {0x1400, 0x169c}, - {0x16a0, 0x16f0}, {0x1700, 0x170c}, {0x170e, 0x1714}, {0x1720, 0x1736}, - {0x1740, 0x1753}, {0x1760, 0x176c}, {0x176e, 0x1770}, {0x1780, 0x17b3}, - {0x17b6, 0x17dd}, {0x17e0, 0x17e9}, {0x17f0, 0x17f9}, {0x1800, 0x180e}, - {0x1810, 0x1819}, {0x1820, 0x1877}, {0x1880, 0x18aa}, {0x18b0, 0x18f5}, - {0x1900, 0x191c}, {0x1920, 0x192b}, {0x1930, 0x193b}, {0x1944, 0x196d}, - {0x1970, 0x1974}, {0x1980, 0x19ab}, {0x19b0, 0x19c9}, {0x19d0, 0x19da}, - {0x19de, 0x1a1b}, {0x1a1e, 0x1a5e}, {0x1a60, 0x1a7c}, {0x1a7f, 0x1a89}, - {0x1a90, 0x1a99}, {0x1aa0, 0x1aad}, {0x1b00, 0x1b4b}, {0x1b50, 0x1b7c}, - {0x1b80, 0x1baa}, {0x1bae, 0x1bb9}, {0x1bc0, 0x1bf3}, {0x1bfc, 0x1c37}, - {0x1c3b, 0x1c49}, {0x1c4d, 0x1c7f}, {0x1cd0, 0x1cf2}, {0x1d00, 0x1de6}, - {0x1dfc, 0x1f15}, {0x1f18, 0x1f1d}, {0x1f20, 0x1f45}, {0x1f48, 0x1f4d}, - {0x1f50, 0x1f57}, {0x1f5f, 0x1f7d}, {0x1f80, 0x1fb4}, {0x1fb6, 0x1fc4}, - {0x1fc6, 0x1fd3}, {0x1fd6, 0x1fdb}, {0x1fdd, 0x1fef}, {0x1ff2, 0x1ff4}, - {0x1ff6, 0x1ffe}, {0x2000, 0x200a}, {0x2010, 0x2029}, {0x202f, 0x205f}, + {0x135d, 0x137c}, {0x1380, 0x1399}, {0x13a0, 0x13f4}, {0x1400, 0x167f}, + {0x1681, 0x169c}, {0x16a0, 0x16f0}, {0x1700, 0x170c}, {0x170e, 0x1714}, + {0x1720, 0x1736}, {0x1740, 0x1753}, {0x1760, 0x176c}, {0x176e, 0x1770}, + {0x1780, 0x17b3}, {0x17b6, 0x17dd}, {0x17e0, 0x17e9}, {0x17f0, 0x17f9}, + {0x1800, 0x180d}, {0x1810, 0x1819}, {0x1820, 0x1877}, {0x1880, 0x18aa}, + {0x18b0, 0x18f5}, {0x1900, 0x191c}, {0x1920, 0x192b}, {0x1930, 0x193b}, + {0x1944, 0x196d}, {0x1970, 0x1974}, {0x1980, 0x19ab}, {0x19b0, 0x19c9}, + {0x19d0, 0x19da}, {0x19de, 0x1a1b}, {0x1a1e, 0x1a5e}, {0x1a60, 0x1a7c}, + {0x1a7f, 0x1a89}, {0x1a90, 0x1a99}, {0x1aa0, 0x1aad}, {0x1b00, 0x1b4b}, + {0x1b50, 0x1b7c}, {0x1b80, 0x1baa}, {0x1bae, 0x1bb9}, {0x1bc0, 0x1bf3}, + {0x1bfc, 0x1c37}, {0x1c3b, 0x1c49}, {0x1c4d, 0x1c7f}, {0x1cd0, 0x1cf2}, + {0x1d00, 0x1de6}, {0x1dfc, 0x1f15}, {0x1f18, 0x1f1d}, {0x1f20, 0x1f45}, + {0x1f48, 0x1f4d}, {0x1f50, 0x1f57}, {0x1f5f, 0x1f7d}, {0x1f80, 0x1fb4}, + {0x1fb6, 0x1fc4}, {0x1fc6, 0x1fd3}, {0x1fd6, 0x1fdb}, {0x1fdd, 0x1fef}, + {0x1ff2, 0x1ff4}, {0x1ff6, 0x1ffe}, {0x2010, 0x2027}, {0x2030, 0x205e}, {0x2074, 0x208e}, {0x2090, 0x209c}, {0x20a0, 0x20b9}, {0x20d0, 0x20f0}, {0x2100, 0x2189}, {0x2190, 0x23f3}, {0x2400, 0x2426}, {0x2440, 0x244a}, {0x2460, 0x26ff}, {0x2701, 0x27ca}, {0x27ce, 0x2b4c}, {0x2b50, 0x2b59}, @@ -535,7 +552,7 @@ static CONST crange graphRangeTable[] = { {0x2d30, 0x2d65}, {0x2d7f, 0x2d96}, {0x2da0, 0x2da6}, {0x2da8, 0x2dae}, {0x2db0, 0x2db6}, {0x2db8, 0x2dbe}, {0x2dc0, 0x2dc6}, {0x2dc8, 0x2dce}, {0x2dd0, 0x2dd6}, {0x2dd8, 0x2dde}, {0x2de0, 0x2e31}, {0x2e80, 0x2e99}, - {0x2e9b, 0x2ef3}, {0x2f00, 0x2fd5}, {0x2ff0, 0x2ffb}, {0x3000, 0x303f}, + {0x2e9b, 0x2ef3}, {0x2f00, 0x2fd5}, {0x2ff0, 0x2ffb}, {0x3001, 0x303f}, {0x3041, 0x3096}, {0x3099, 0x30ff}, {0x3105, 0x312d}, {0x3131, 0x318e}, {0x3190, 0x31ba}, {0x31c0, 0x31e3}, {0x31f0, 0x321e}, {0x3220, 0x32fe}, {0x3300, 0x4db5}, {0x4dc0, 0x9fcb}, {0xa000, 0xa48c}, {0xa490, 0xa4c6}, @@ -582,43 +599,6 @@ static CONST chr graphCharTable[] = { #define CH NOCELT /* - - nmcces - how many distinct MCCEs are there? - ^ static int nmcces(struct vars *); - */ -static int -nmcces(v) - struct vars *v; /* context */ -{ - /* - * No multi-character collating elements defined at the moment. - */ - return 0; -} - -/* - - nleaders - how many chrs can be first chrs of MCCEs? - ^ static int nleaders(struct vars *); - */ -static int -nleaders(v) - struct vars *v; /* context */ -{ - return 0; -} - -/* - - allmcces - return a cvec with all the MCCEs of the locale - ^ static struct cvec *allmcces(struct vars *, struct cvec *); - */ -static struct cvec * -allmcces(v, cv) - struct vars *v; /* context */ - struct cvec *cv; /* this is supposed to have enough room */ -{ - return clearcvec(cv); -} - -/* - element - map collating-element name to celt ^ static celt element(struct vars *, CONST chr *, CONST chr *); */ @@ -690,7 +670,7 @@ range(v, a, b, cases) } if (!cases) { /* easy version */ - cv = getcvec(v, 0, 1, 0); + cv = getcvec(v, 0, 1); NOERRN(); addrange(cv, a, b); return cv; @@ -704,7 +684,7 @@ range(v, a, b, cases) nchrs = (b - a + 1)*2 + 4; - cv = getcvec(v, nchrs, 0, 0); + cv = getcvec(v, nchrs, 0); NOERRN(); for (c=a; c<=b; c++) { @@ -759,7 +739,7 @@ eclass(v, c, cases) */ if ((v->cflags®_FAKE) && c == 'x') { - cv = getcvec(v, 4, 0, 0); + cv = getcvec(v, 4, 0); addchr(cv, (chr)'x'); addchr(cv, (chr)'y'); if (cases) { @@ -776,7 +756,7 @@ eclass(v, c, cases) if (cases) { return allcases(v, c); } - cv = getcvec(v, 1, 0, 0); + cv = getcvec(v, 1, 0); assert(cv != NULL); addchr(cv, (chr)c); return cv; @@ -825,15 +805,6 @@ cclass(v, startp, endp, cases) np = Tcl_UniCharToUtfDString(startp, (int)len, &ds); /* - * Remap lower and upper to alpha if the match is case insensitive. - */ - - if (cases && len == 5 && (strncmp("lower", np, 5) == 0 - || strncmp("upper", np, 5) == 0)) { - np = "alpha"; - } - - /* * Map the name to the corresponding enumerated value. */ @@ -851,13 +822,20 @@ cclass(v, startp, endp, cases) } /* + * Remap lower and upper to alpha if the match is case insensitive. + */ + + if (cases && ((index == CC_LOWER) || (index == CC_UPPER))) { + index = CC_ALNUM; + } + + /* * Now compute the character class contents. */ switch((enum classes) index) { - case CC_PRINT: case CC_ALNUM: - cv = getcvec(v, NUM_ALPHA_CHAR, NUM_DIGIT_RANGE + NUM_ALPHA_RANGE, 0); + cv = getcvec(v, NUM_ALPHA_CHAR, NUM_DIGIT_RANGE + NUM_ALPHA_RANGE); if (cv) { for (i=0 ; (size_t)i<NUM_ALPHA_CHAR ; i++) { addchr(cv, alphaCharTable[i]); @@ -873,7 +851,7 @@ cclass(v, startp, endp, cases) } break; case CC_ALPHA: - cv = getcvec(v, NUM_ALPHA_CHAR, NUM_ALPHA_RANGE, 0); + cv = getcvec(v, NUM_ALPHA_CHAR, NUM_ALPHA_RANGE); if (cv) { for (i=0 ; (size_t)i<NUM_ALPHA_RANGE ; i++) { addrange(cv, alphaRangeTable[i].start, @@ -885,23 +863,30 @@ cclass(v, startp, endp, cases) } break; case CC_ASCII: - cv = getcvec(v, 0, 1, 0); + cv = getcvec(v, 0, 1); if (cv) { addrange(cv, 0, 0x7f); } break; case CC_BLANK: - cv = getcvec(v, 2, 0, 0); + cv = getcvec(v, 2, 0); addchr(cv, '\t'); addchr(cv, ' '); break; case CC_CNTRL: - cv = getcvec(v, 0, 2, 0); - addrange(cv, 0x0, 0x1f); - addrange(cv, 0x7f, 0x9f); + cv = getcvec(v, NUM_CONTROL_CHAR, NUM_CONTROL_RANGE); + if (cv) { + for (i=0 ; (size_t)i<NUM_CONTROL_RANGE ; i++) { + addrange(cv, controlRangeTable[i].start, + controlRangeTable[i].end); + } + for (i=0 ; (size_t)i<NUM_CONTROL_CHAR ; i++) { + addchr(cv, controlCharTable[i]); + } + } break; case CC_DIGIT: - cv = getcvec(v, 0, NUM_DIGIT_RANGE, 0); + cv = getcvec(v, 0, NUM_DIGIT_RANGE); if (cv) { for (i=0 ; (size_t)i<NUM_DIGIT_RANGE ; i++) { addrange(cv, digitRangeTable[i].start, @@ -910,7 +895,7 @@ cclass(v, startp, endp, cases) } break; case CC_PUNCT: - cv = getcvec(v, NUM_PUNCT_CHAR, NUM_PUNCT_RANGE, 0); + cv = getcvec(v, NUM_PUNCT_CHAR, NUM_PUNCT_RANGE); if (cv) { for (i=0 ; (size_t)i<NUM_PUNCT_RANGE ; i++) { addrange(cv, punctRangeTable[i].start, @@ -930,7 +915,8 @@ cclass(v, startp, endp, cases) * Whatever is actually the case, the number of ranges is fixed (until * someone comes up with a better arrangement!) */ - cv = getcvec(v, 0, 3, 0); + + cv = getcvec(v, 0, 3); if (cv) { addrange(cv, '0', '9'); addrange(cv, 'a', 'f'); @@ -938,7 +924,7 @@ cclass(v, startp, endp, cases) } break; case CC_SPACE: - cv = getcvec(v, NUM_SPACE_CHAR, NUM_SPACE_RANGE, 0); + cv = getcvec(v, NUM_SPACE_CHAR, NUM_SPACE_RANGE); if (cv) { for (i=0 ; (size_t)i<NUM_SPACE_RANGE ; i++) { addrange(cv, spaceRangeTable[i].start, @@ -950,7 +936,7 @@ cclass(v, startp, endp, cases) } break; case CC_LOWER: - cv = getcvec(v, NUM_LOWER_CHAR, NUM_LOWER_RANGE, 0); + cv = getcvec(v, NUM_LOWER_CHAR, NUM_LOWER_RANGE); if (cv) { for (i=0 ; (size_t)i<NUM_LOWER_RANGE ; i++) { addrange(cv, lowerRangeTable[i].start, @@ -962,7 +948,7 @@ cclass(v, startp, endp, cases) } break; case CC_UPPER: - cv = getcvec(v, NUM_UPPER_CHAR, NUM_UPPER_RANGE, 0); + cv = getcvec(v, NUM_UPPER_CHAR, NUM_UPPER_RANGE); if (cv) { for (i=0 ; (size_t)i<NUM_UPPER_RANGE ; i++) { addrange(cv, upperRangeTable[i].start, @@ -973,8 +959,27 @@ cclass(v, startp, endp, cases) } } break; + case CC_PRINT: + cv = getcvec(v, NUM_SPACE_CHAR + NUM_GRAPH_CHAR, NUM_SPACE_RANGE + NUM_GRAPH_RANGE - 1); + if (cv) { + for (i=1 ; (size_t)i<NUM_SPACE_RANGE ; i++) { + addrange(cv, spaceRangeTable[i].start, + spaceRangeTable[i].end); + } + for (i=0 ; (size_t)i<NUM_SPACE_CHAR ; i++) { + addchr(cv, spaceCharTable[i]); + } + for (i=0 ; (size_t)i<NUM_GRAPH_RANGE ; i++) { + addrange(cv, graphRangeTable[i].start, + graphRangeTable[i].end); + } + for (i=0 ; (size_t)i<NUM_GRAPH_CHAR ; i++) { + addchr(cv, graphCharTable[i]); + } + } + break; case CC_GRAPH: - cv = getcvec(v, NUM_GRAPH_CHAR, NUM_GRAPH_RANGE, 0); + cv = getcvec(v, NUM_GRAPH_CHAR, NUM_GRAPH_RANGE); if (cv) { for (i=0 ; (size_t)i<NUM_GRAPH_RANGE ; i++) { addrange(cv, graphRangeTable[i].start, @@ -1012,10 +1017,10 @@ allcases(v, pc) tc = Tcl_UniCharToTitle((chr)c); if (tc != uc) { - cv = getcvec(v, 3, 0, 0); + cv = getcvec(v, 3, 0); addchr(cv, tc); } else { - cv = getcvec(v, 2, 0, 0); + cv = getcvec(v, 2, 0); } addchr(cv, lc); if (lc != uc) { |