diff options
| author | jan.nijtmans <nijtmans@users.sourceforge.net> | 2024-01-31 12:42:32 (GMT) |
|---|---|---|
| committer | jan.nijtmans <nijtmans@users.sourceforge.net> | 2024-01-31 12:42:32 (GMT) |
| commit | 1ceaeddc08dcec61c7cf00678b693be43ca61535 (patch) | |
| tree | 0f098f0ff2901bafc72a21fdc996c6b7fc327943 | |
| parent | cbd4f2d22bacb8fe295c4c38eb67188335be2dab (diff) | |
| download | tcl-1ceaeddc08dcec61c7cf00678b693be43ca61535.zip tcl-1ceaeddc08dcec61c7cf00678b693be43ca61535.tar.gz tcl-1ceaeddc08dcec61c7cf00678b693be43ca61535.tar.bz2 | |
Remove private characters from regexp control table, but add them back in [:cntrl:] class (so no change in regexp handling). Eliminated (size_t) type-casts.
This makes implementing more character-classes easier.
| -rw-r--r-- | generic/regc_locale.c | 80 | ||||
| -rw-r--r-- | tools/uniClass.tcl | 10 |
2 files changed, 47 insertions, 43 deletions
diff --git a/generic/regc_locale.c b/generic/regc_locale.c index 9a984f5..e8f9381 100644 --- a/generic/regc_locale.c +++ b/generic/regc_locale.c @@ -254,7 +254,7 @@ static const crange alphaRangeTable[] = { #endif }; -#define NUM_ALPHA_RANGE (sizeof(alphaRangeTable)/sizeof(crange)) +#define NUM_ALPHA_RANGE ((int)(sizeof(alphaRangeTable)/sizeof(crange))) static const chr alphaCharTable[] = { 0xAA, 0xB5, 0xBA, 0x2EC, 0x2EE, 0x376, 0x377, 0x37F, 0x386, @@ -291,7 +291,7 @@ static const chr alphaCharTable[] = { #endif }; -#define NUM_ALPHA_CHAR (sizeof(alphaCharTable)/sizeof(chr)) +#define NUM_ALPHA_CHAR ((int)(sizeof(alphaCharTable)/sizeof(chr))) /* * Unicode: control characters. @@ -299,14 +299,13 @@ static const chr alphaCharTable[] = { static const crange controlRangeTable[] = { {0x0, 0x1F}, {0x7F, 0x9F}, {0x600, 0x605}, {0x200B, 0x200F}, - {0x202A, 0x202E}, {0x2060, 0x2064}, {0x2066, 0x206F}, {0xE000, 0xF8FF}, - {0xFFF9, 0xFFFB} + {0x202A, 0x202E}, {0x2060, 0x2064}, {0x2066, 0x206F}, {0xFFF9, 0xFFFB} #if CHRBITS > 16 ,{0x13430, 0x1343F}, {0x1BCA0, 0x1BCA3}, {0x1D173, 0x1D17A}, {0xE0020, 0xE007F} #endif }; -#define NUM_CONTROL_RANGE (sizeof(controlRangeTable)/sizeof(crange)) +#define NUM_CONTROL_RANGE ((int)(sizeof(controlRangeTable)/sizeof(crange))) static const chr controlCharTable[] = { 0xAD, 0x61C, 0x6DD, 0x70F, 0x890, 0x891, 0x8E2, 0x180E, 0xFEFF @@ -315,7 +314,7 @@ static const chr controlCharTable[] = { #endif }; -#define NUM_CONTROL_CHAR (sizeof(controlCharTable)/sizeof(chr)) +#define NUM_CONTROL_CHAR ((int)(sizeof(controlCharTable)/sizeof(chr))) /* * Unicode: decimal digit characters. @@ -343,7 +342,7 @@ static const crange digitRangeTable[] = { #endif }; -#define NUM_DIGIT_RANGE (sizeof(digitRangeTable)/sizeof(crange)) +#define NUM_DIGIT_RANGE ((int)(sizeof(digitRangeTable)/sizeof(crange))) /* * no singletons of digit characters. @@ -380,7 +379,7 @@ static const crange punctRangeTable[] = { #endif }; -#define NUM_PUNCT_RANGE (sizeof(punctRangeTable)/sizeof(crange)) +#define NUM_PUNCT_RANGE ((int)(sizeof(punctRangeTable)/sizeof(crange))) static const chr punctCharTable[] = { 0x3A, 0x3B, 0x3F, 0x40, 0x5F, 0x7B, 0x7D, 0xA1, 0xA7, @@ -405,7 +404,7 @@ static const chr punctCharTable[] = { #endif }; -#define NUM_PUNCT_CHAR (sizeof(punctCharTable)/sizeof(chr)) +#define NUM_PUNCT_CHAR ((int)(sizeof(punctCharTable)/sizeof(chr))) /* * Unicode: white space characters. @@ -415,14 +414,14 @@ static const crange spaceRangeTable[] = { {0x9, 0xD}, {0x2000, 0x200B} }; -#define NUM_SPACE_RANGE (sizeof(spaceRangeTable)/sizeof(crange)) +#define NUM_SPACE_RANGE ((int)(sizeof(spaceRangeTable)/sizeof(crange))) static const chr spaceCharTable[] = { 0x20, 0x85, 0xA0, 0x1680, 0x180E, 0x2028, 0x2029, 0x202F, 0x205F, 0x2060, 0x3000, 0xFEFF }; -#define NUM_SPACE_CHAR (sizeof(spaceCharTable)/sizeof(chr)) +#define NUM_SPACE_CHAR ((int)(sizeof(spaceCharTable)/sizeof(chr))) /* * Unicode: lowercase characters. @@ -456,7 +455,7 @@ static const crange lowerRangeTable[] = { #endif }; -#define NUM_LOWER_RANGE (sizeof(lowerRangeTable)/sizeof(crange)) +#define NUM_LOWER_RANGE ((int)(sizeof(lowerRangeTable)/sizeof(crange))) static const chr lowerCharTable[] = { 0xB5, 0x101, 0x103, 0x105, 0x107, 0x109, 0x10B, 0x10D, 0x10F, @@ -529,7 +528,7 @@ static const chr lowerCharTable[] = { #endif }; -#define NUM_LOWER_CHAR (sizeof(lowerCharTable)/sizeof(chr)) +#define NUM_LOWER_CHAR ((int)(sizeof(lowerCharTable)/sizeof(chr))) /* * Unicode: uppercase characters. @@ -559,7 +558,7 @@ static const crange upperRangeTable[] = { #endif }; -#define NUM_UPPER_RANGE (sizeof(upperRangeTable)/sizeof(crange)) +#define NUM_UPPER_RANGE ((int)(sizeof(upperRangeTable)/sizeof(crange))) static const chr upperCharTable[] = { 0x100, 0x102, 0x104, 0x106, 0x108, 0x10A, 0x10C, 0x10E, 0x110, @@ -633,7 +632,7 @@ static const chr upperCharTable[] = { #endif }; -#define NUM_UPPER_CHAR (sizeof(upperCharTable)/sizeof(chr)) +#define NUM_UPPER_CHAR ((int)(sizeof(upperCharTable)/sizeof(chr))) /* * Unicode: unicode print characters excluding space. @@ -792,7 +791,7 @@ static const crange graphRangeTable[] = { #endif }; -#define NUM_GRAPH_RANGE (sizeof(graphRangeTable)/sizeof(crange)) +#define NUM_GRAPH_RANGE ((int)(sizeof(graphRangeTable)/sizeof(crange))) static const chr graphCharTable[] = { 0x38C, 0x85E, 0x98F, 0x990, 0x9B2, 0x9C7, 0x9C8, 0x9D7, 0x9DC, @@ -820,7 +819,7 @@ static const chr graphCharTable[] = { #endif }; -#define NUM_GRAPH_CHAR (sizeof(graphCharTable)/sizeof(chr)) +#define NUM_GRAPH_CHAR ((int)(sizeof(graphCharTable)/sizeof(chr))) /* * End of auto-generated Unicode character ranges declarations. @@ -1067,14 +1066,14 @@ cclass( case CC_ALNUM: cv = getcvec(v, NUM_ALPHA_CHAR, NUM_DIGIT_RANGE + NUM_ALPHA_RANGE); if (cv) { - for (i=0 ; (size_t)i<NUM_ALPHA_CHAR ; i++) { + for (i=0 ; i<NUM_ALPHA_CHAR ; i++) { addchr(cv, alphaCharTable[i]); } - for (i=0 ; (size_t)i<NUM_ALPHA_RANGE ; i++) { + for (i=0 ; i<NUM_ALPHA_RANGE ; i++) { addrange(cv, alphaRangeTable[i].start, alphaRangeTable[i].end); } - for (i=0 ; (size_t)i<NUM_DIGIT_RANGE ; i++) { + for (i=0 ; i<NUM_DIGIT_RANGE ; i++) { addrange(cv, digitRangeTable[i].start, digitRangeTable[i].end); } @@ -1083,11 +1082,11 @@ cclass( case CC_ALPHA: cv = getcvec(v, NUM_ALPHA_CHAR, NUM_ALPHA_RANGE); if (cv) { - for (i=0 ; (size_t)i<NUM_ALPHA_RANGE ; i++) { + for (i=0 ; i<NUM_ALPHA_RANGE ; i++) { addrange(cv, alphaRangeTable[i].start, alphaRangeTable[i].end); } - for (i=0 ; (size_t)i<NUM_ALPHA_CHAR ; i++) { + for (i=0 ; i<NUM_ALPHA_CHAR ; i++) { addchr(cv, alphaCharTable[i]); } } @@ -1104,13 +1103,14 @@ cclass( addchr(cv, ' '); break; case CC_CNTRL: - cv = getcvec(v, NUM_CONTROL_CHAR, NUM_CONTROL_RANGE); + cv = getcvec(v, NUM_CONTROL_CHAR, NUM_CONTROL_RANGE+1); if (cv) { - for (i=0 ; (size_t)i<NUM_CONTROL_RANGE ; i++) { + addrange(cv, 0xE000, 0xF8FF); /* private */ + for (i=0 ; i<NUM_CONTROL_RANGE ; i++) { addrange(cv, controlRangeTable[i].start, controlRangeTable[i].end); } - for (i=0 ; (size_t)i<NUM_CONTROL_CHAR ; i++) { + for (i=0 ; i<NUM_CONTROL_CHAR ; i++) { addchr(cv, controlCharTable[i]); } } @@ -1118,7 +1118,7 @@ cclass( case CC_DIGIT: cv = getcvec(v, 0, NUM_DIGIT_RANGE); if (cv) { - for (i=0 ; (size_t)i<NUM_DIGIT_RANGE ; i++) { + for (i=0 ; i<NUM_DIGIT_RANGE ; i++) { addrange(cv, digitRangeTable[i].start, digitRangeTable[i].end); } @@ -1127,11 +1127,11 @@ cclass( case CC_PUNCT: cv = getcvec(v, NUM_PUNCT_CHAR, NUM_PUNCT_RANGE); if (cv) { - for (i=0 ; (size_t)i<NUM_PUNCT_RANGE ; i++) { + for (i=0 ; i<NUM_PUNCT_RANGE ; i++) { addrange(cv, punctRangeTable[i].start, punctRangeTable[i].end); } - for (i=0 ; (size_t)i<NUM_PUNCT_CHAR ; i++) { + for (i=0 ; i<NUM_PUNCT_CHAR ; i++) { addchr(cv, punctCharTable[i]); } } @@ -1156,11 +1156,11 @@ cclass( case CC_SPACE: cv = getcvec(v, NUM_SPACE_CHAR, NUM_SPACE_RANGE); if (cv) { - for (i=0 ; (size_t)i<NUM_SPACE_RANGE ; i++) { + for (i=0 ; i<NUM_SPACE_RANGE ; i++) { addrange(cv, spaceRangeTable[i].start, spaceRangeTable[i].end); } - for (i=0 ; (size_t)i<NUM_SPACE_CHAR ; i++) { + for (i=0 ; i<NUM_SPACE_CHAR ; i++) { addchr(cv, spaceCharTable[i]); } } @@ -1168,11 +1168,11 @@ cclass( case CC_LOWER: cv = getcvec(v, NUM_LOWER_CHAR, NUM_LOWER_RANGE); if (cv) { - for (i=0 ; (size_t)i<NUM_LOWER_RANGE ; i++) { + for (i=0 ; i<NUM_LOWER_RANGE ; i++) { addrange(cv, lowerRangeTable[i].start, lowerRangeTable[i].end); } - for (i=0 ; (size_t)i<NUM_LOWER_CHAR ; i++) { + for (i=0 ; i<NUM_LOWER_CHAR ; i++) { addchr(cv, lowerCharTable[i]); } } @@ -1180,11 +1180,11 @@ cclass( case CC_UPPER: cv = getcvec(v, NUM_UPPER_CHAR, NUM_UPPER_RANGE); if (cv) { - for (i=0 ; (size_t)i<NUM_UPPER_RANGE ; i++) { + for (i=0 ; i<NUM_UPPER_RANGE ; i++) { addrange(cv, upperRangeTable[i].start, upperRangeTable[i].end); } - for (i=0 ; (size_t)i<NUM_UPPER_CHAR ; i++) { + for (i=0 ; i<NUM_UPPER_CHAR ; i++) { addchr(cv, upperCharTable[i]); } } @@ -1192,18 +1192,18 @@ cclass( case CC_PRINT: cv = getcvec(v, NUM_SPACE_CHAR + NUM_GRAPH_CHAR, NUM_SPACE_RANGE + NUM_GRAPH_RANGE - 1); if (cv) { - for (i=1 ; (size_t)i<NUM_SPACE_RANGE ; i++) { + for (i=1 ; i<NUM_SPACE_RANGE ; i++) { addrange(cv, spaceRangeTable[i].start, spaceRangeTable[i].end); } - for (i=0 ; (size_t)i<NUM_SPACE_CHAR ; i++) { + for (i=0 ; i<NUM_SPACE_CHAR ; i++) { addchr(cv, spaceCharTable[i]); } - for (i=0 ; (size_t)i<NUM_GRAPH_RANGE ; i++) { + for (i=0 ; i<NUM_GRAPH_RANGE ; i++) { addrange(cv, graphRangeTable[i].start, graphRangeTable[i].end); } - for (i=0 ; (size_t)i<NUM_GRAPH_CHAR ; i++) { + for (i=0 ; i<NUM_GRAPH_CHAR ; i++) { addchr(cv, graphCharTable[i]); } } @@ -1211,11 +1211,11 @@ cclass( case CC_GRAPH: cv = getcvec(v, NUM_GRAPH_CHAR, NUM_GRAPH_RANGE); if (cv) { - for (i=0 ; (size_t)i<NUM_GRAPH_RANGE ; i++) { + for (i=0 ; i<NUM_GRAPH_RANGE ; i++) { addrange(cv, graphRangeTable[i].start, graphRangeTable[i].end); } - for (i=0 ; (size_t)i<NUM_GRAPH_CHAR ; i++) { + for (i=0 ; i<NUM_GRAPH_CHAR ; i++) { addchr(cv, graphCharTable[i]); } } diff --git a/tools/uniClass.tcl b/tools/uniClass.tcl index 39fa28d..a4a58c2 100644 --- a/tools/uniClass.tcl +++ b/tools/uniClass.tcl @@ -63,11 +63,15 @@ proc genTable {type} { set extchars 0 set extranges 0 - for {set i 0} {$i <= 0x10FFFF} {incr i} { + for {set i 0} {$i <= 0xEFFFF} {incr i} { if {$i == 0xD800} { # Skip surrogates set i 0xE000 } + if {$i == 0xE000} { + # Skip private + set i 0xF900 + } if {[string is $type [format %c $i]]} { if {$i == ($last + 1)} { set last $i @@ -92,13 +96,13 @@ proc genTable {type} { } if {$ranges ne ""} { puts "static const crange ${type}RangeTable\[\] = {\n$ranges\n};\n" - puts "#define NUM_[string toupper $type]_RANGE (sizeof(${type}RangeTable)/sizeof(crange))\n" + puts "#define NUM_[string toupper $type]_RANGE ((int)(sizeof(${type}RangeTable)/sizeof(crange)))\n" } else { puts "/* no contiguous ranges of $type characters */\n" } if {$chars ne ""} { puts "static const chr ${type}CharTable\[\] = {\n$chars\n};\n" - puts "#define NUM_[string toupper $type]_CHAR (sizeof(${type}CharTable)/sizeof(chr))\n" + puts "#define NUM_[string toupper $type]_CHAR ((int)(sizeof(${type}CharTable)/sizeof(chr)))\n" } else { puts "/*\n * no singletons of $type characters.\n */\n" } |
