summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--ChangeLog5
-rw-r--r--generic/regc_locale.c92
-rw-r--r--tests/utf.test4
3 files changed, 14 insertions, 87 deletions
diff --git a/ChangeLog b/ChangeLog
index 7b91049..410f689 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,8 @@
+2011-12-11 Jan Nijtmans <nijtmans@users.sf.net>
+
+ * generic/regc_locale.c: [Bug 3457031]: Some Unicode 6.0 chars not
+ * tests/utf.test: in [:print:] class
+
2011-12-07 Jan Nijtmans <nijtmans@users.sf.net>
* tools/uniParse.tcl: [Bug 3444754] string tolower \u01c5 is wrong
diff --git a/generic/regc_locale.c b/generic/regc_locale.c
index 124dff4..f393820 100644
--- a/generic/regc_locale.c
+++ b/generic/regc_locale.c
@@ -630,80 +630,6 @@ static const chr graphCharTable[] = {
#define NUM_GRAPH_CHAR (sizeof(graphCharTable)/sizeof(chr))
/*
- * Unicode: unicode print characters including space, i.e. all Letters (class
- * L*), Numbers (N*), Punctuation (P*), Symbols (S*) and Spaces (Zs).
- */
-
-static const crange printRangeTable[] = {
- {0x0020, 0x007E}, {0x00A0, 0x01F5}, {0x01FA, 0x0217}, {0x0250, 0x02A8},
- {0x02B0, 0x02DE}, {0x02E0, 0x02E9}, {0x0374, 0x0375}, {0x0384, 0x038A},
- {0x038E, 0x03A1}, {0x03A3, 0x03CE}, {0x03D0, 0x03D6}, {0x03E2, 0x03F3},
- {0x0401, 0x040C}, {0x040E, 0x044F}, {0x0451, 0x045C}, {0x045E, 0x0482},
- {0x0490, 0x04C4}, {0x04C7, 0x04C8}, {0x04CB, 0x04CC}, {0x04D0, 0x04EB},
- {0x04EE, 0x04F5}, {0x04F8, 0x04F9}, {0x0531, 0x0556}, {0x0559, 0x055F},
- {0x0561, 0x0587}, {0x05D0, 0x05EA}, {0x05F0, 0x05F4}, {0x0621, 0x063A},
- {0x0640, 0x064A}, {0x0660, 0x066D}, {0x0671, 0x06B7}, {0x06BA, 0x06BE},
- {0x06C0, 0x06CE}, {0x06D0, 0x06D5}, {0x06E5, 0x06E6}, {0x06F0, 0x06F9},
- {0x0905, 0x0939}, {0x0958, 0x0961}, {0x0964, 0x0970}, {0x0985, 0x098C},
- {0x098F, 0x0990}, {0x0993, 0x09A8}, {0x09AA, 0x09B0}, {0x09B6, 0x09B9},
- {0x09DC, 0x09DD}, {0x09DF, 0x09E1}, {0x09E6, 0x09FA}, {0x0A05, 0x0A0A},
- {0x0A0F, 0x0A10}, {0x0A13, 0x0A28}, {0x0A2A, 0x0A30}, {0x0A32, 0x0A33},
- {0x0A35, 0x0A36}, {0x0A38, 0x0A39}, {0x0A59, 0x0A5C}, {0x0A66, 0x0A6F},
- {0x0A72, 0x0A74}, {0x0A85, 0x0A8B}, {0x0A8F, 0x0A91}, {0x0A93, 0x0AA8},
- {0x0AAA, 0x0AB0}, {0x0AB2, 0x0AB3}, {0x0AB5, 0x0AB9}, {0x0AE6, 0x0AEF},
- {0x0B05, 0x0B0C}, {0x0B0F, 0x0B10}, {0x0B13, 0x0B28}, {0x0B2A, 0x0B30},
- {0x0B32, 0x0B33}, {0x0B36, 0x0B39}, {0x0B5C, 0x0B5D}, {0x0B5F, 0x0B61},
- {0x0B66, 0x0B70}, {0x0B85, 0x0B8A}, {0x0B8E, 0x0B90}, {0x0B92, 0x0B95},
- {0x0B99, 0x0B9A}, {0x0B9E, 0x0B9F}, {0x0BA3, 0x0BA4}, {0x0BA8, 0x0BAA},
- {0x0BAE, 0x0BB5}, {0x0BB7, 0x0BB9}, {0x0BE7, 0x0BF2}, {0x0C05, 0x0C0C},
- {0x0C0E, 0x0C10}, {0x0C12, 0x0C28}, {0x0C2A, 0x0C33}, {0x0C35, 0x0C39},
- {0x0C60, 0x0C61}, {0x0C66, 0x0C6F}, {0x0C85, 0x0C8C}, {0x0C8E, 0x0C90},
- {0x0C92, 0x0CA8}, {0x0CAA, 0x0CB3}, {0x0CB5, 0x0CB9}, {0x0CE0, 0x0CE1},
- {0x0CE6, 0x0CEF}, {0x0D05, 0x0D0C}, {0x0D0E, 0x0D10}, {0x0D12, 0x0D28},
- {0x0D2A, 0x0D39}, {0x0D60, 0x0D61}, {0x0D66, 0x0D6F}, {0x0E3F, 0x0E46},
- {0x0E4F, 0x0E5B}, {0x0E99, 0x0E9F}, {0x0EA1, 0x0EA3}, {0x0EAA, 0x0EAB},
- {0x0EAD, 0x0EB0}, {0x0EB2, 0x0EB3}, {0x0EC0, 0x0EC4}, {0x0ED0, 0x0ED9},
- {0x0EDC, 0x0EDD}, {0x0F00, 0x0F17}, {0x0F1A, 0x0F34}, {0x0F3A, 0x0F3D},
- {0x0F40, 0x0F47}, {0x0F49, 0x0F69}, {0x0F88, 0x0F8B}, {0x10A0, 0x10C5},
- {0x10D0, 0x10F6}, {0x1100, 0x1159}, {0x115F, 0x11A2}, {0x11A8, 0x11F9},
- {0x1E00, 0x1E9B}, {0x1EA0, 0x1EF9}, {0x1F00, 0x1F15}, {0x1F18, 0x1F1D},
- {0x1F20, 0x1F45}, {0x1F48, 0x1F4D}, {0x1F50, 0x1F57}, {0x1F5F, 0x1F7D},
- {0x1F80, 0x1FB4}, {0x1FB6, 0x1FC4}, {0x1FC6, 0x1FD3}, {0x1FD6, 0x1FDB},
- {0x1FDD, 0x1FEF}, {0x1FF2, 0x1FF4}, {0x1FF6, 0x1FFE}, {0x2000, 0x200B},
- {0x2010, 0x2027}, {0x2030, 0x2046}, {0x2074, 0x208E}, {0x20A0, 0x20AC},
- {0x2100, 0x2138}, {0x2153, 0x2182}, {0x2190, 0x21EA}, {0x2200, 0x22F1},
- {0x2302, 0x237A}, {0x2400, 0x2424}, {0x2440, 0x244A}, {0x2460, 0x24EA},
- {0x2500, 0x2595}, {0x25A0, 0x25EF}, {0x2600, 0x2613}, {0x261A, 0x266F},
- {0x2701, 0x2704}, {0x2706, 0x2709}, {0x270C, 0x2727}, {0x2729, 0x274B},
- {0x274F, 0x2752}, {0x2758, 0x275E}, {0x2761, 0x2767}, {0x2776, 0x2794},
- {0x2798, 0x27AF}, {0x27B1, 0x27BE}, {0x3000, 0x3029}, {0x3030, 0x3037},
- {0x3041, 0x3094}, {0x309B, 0x309E}, {0x30A1, 0x30FE}, {0x3105, 0x312C},
- {0x3131, 0x318E}, {0x3190, 0x319F}, {0x3200, 0x321C}, {0x3220, 0x3243},
- {0x3260, 0x327B}, {0x327F, 0x32B0}, {0x32C0, 0x32CB}, {0x32D0, 0x32FE},
- {0x3300, 0x3376}, {0x337B, 0x33DD}, {0x33E0, 0x33FE}, {0x4E00, 0x9FA5},
- {0xAC00, 0xD7A3}, {0xF900, 0xFA2D}, {0xFB00, 0xFB06}, {0xFB13, 0xFB17},
- {0xFB1F, 0xFB36}, {0xFB38, 0xFB3C}, {0xFB40, 0xFB41}, {0xFB43, 0xFB44},
- {0xFB46, 0xFBB1}, {0xFBD3, 0xFD3F}, {0xFD50, 0xFD8F}, {0xFD92, 0xFDC7},
- {0xFDF0, 0xFDFB}, {0xFE30, 0xFE44}, {0xFE49, 0xFE52}, {0xFE54, 0xFE66},
- {0xFE68, 0xFE6B}, {0xFE70, 0xFE72}, {0xFE76, 0xFEFC}, {0xFF01, 0xFF5E},
- {0xFF61, 0xFFBE}, {0xFFC2, 0xFFC7}, {0xFFCA, 0xFFCF}, {0xFFD2, 0xFFD7},
- {0xFFDA, 0xFFDC}, {0xFFE0, 0xFFE6}, {0xFFE8, 0xFFEE}, {0xFFFC, 0xFFFD}
-};
-
-#define NUM_PRINT_RANGE (sizeof(printRangeTable)/sizeof(crange))
-
-static const chr printCharTable[] = {
- 0x037A, 0x037E, 0x038C, 0x03DA, 0x03DC, 0x03DE, 0x03E0, 0x0589, 0x05BE,
- 0x05C0, 0x05C3, 0x060C, 0x061B, 0x061F, 0x06E9, 0x093D, 0x0950, 0x09B2,
- 0x0A5E, 0x0A8D, 0x0ABD, 0x0AD0, 0x0AE0, 0x0B3D, 0x0B9C, 0x0CDE, 0x0E01,
- 0x0E32, 0x0E81, 0x0E84, 0x0E87, 0x0E8A, 0x0E8D, 0x0E94, 0x0EA5, 0x0EA7,
- 0x0EBD, 0x0EC6, 0x0F36, 0x0F38, 0x0F85, 0x10FB, 0x1F59, 0x1F5B, 0x1F5D,
- 0x2070, 0x2300, 0x274D, 0x2756, 0x303F, 0xFB3E, 0xFE74
-};
-
-#define NUM_PRINT_CHAR (sizeof(printCharTable)/sizeof(chr))
-
-/*
* End of auto-generated Unicode character ranges declarations.
*/
@@ -946,18 +872,6 @@ cclass(
*/
switch((enum classes) index) {
- case CC_PRINT:
- cv = getcvec(v, NUM_PRINT_CHAR, NUM_PRINT_RANGE);
- if (cv) {
- for (i=0 ; (size_t)i<NUM_PRINT_CHAR ; i++) {
- addchr(cv, printCharTable[i]);
- }
- for (i=0 ; (size_t)i<NUM_PRINT_RANGE ; i++) {
- addrange(cv, printRangeTable[i].start,
- printRangeTable[i].end);
- }
- }
- break;
case CC_ALNUM:
cv = getcvec(v, NUM_ALPHA_CHAR, NUM_DIGIT_RANGE + NUM_ALPHA_RANGE);
if (cv) {
@@ -1076,10 +990,14 @@ cclass(
}
}
break;
+ case CC_PRINT:
case CC_GRAPH:
cv = getcvec(v, NUM_GRAPH_CHAR, NUM_GRAPH_RANGE);
if (cv) {
- for (i=0 ; (size_t)i<NUM_GRAPH_RANGE ; i++) {
+ /* For CC_PRINT, include space as well */
+ addrange(cv, graphRangeTable[0].start - (index == CC_PRINT),
+ graphRangeTable[0].end);
+ for (i=1 ; (size_t)i<NUM_GRAPH_RANGE ; i++) {
addrange(cv, graphRangeTable[i].start,
graphRangeTable[i].end);
}
diff --git a/tests/utf.test b/tests/utf.test
index 88e8275..60e3d52 100644
--- a/tests/utf.test
+++ b/tests/utf.test
@@ -282,6 +282,10 @@ test utf-21.2 {unicode alnum char in regc_locale.c} {
# this returns 1 with Unicode 6 compliance
list [regexp {^[[:alnum:]]+$} \u1040\u021f\u0220] [regexp {^\w+$} \u1040\u021f\u0220]
} {1 1}
+test utf-21.3 {unicode print char in regc_locale.c} {
+ # this returns 1 with Unicode 6 compliance
+ regexp {^[[:print:]]+$} \ufbc1
+} 1
test utf-22.1 {TclUniCharIsWordChar} {
string wordend "xyz123_bar fg" 0