diff options
author | jan.nijtmans <nijtmans@users.sourceforge.net> | 2011-12-11 09:13:34 (GMT) |
---|---|---|
committer | jan.nijtmans <nijtmans@users.sourceforge.net> | 2011-12-11 09:13:34 (GMT) |
commit | 7cc01f77560c27ac24cdc87a1171c1c75853a7cf (patch) | |
tree | d35ce3e727a50dbab248f6b112ee1c1065aa9a22 | |
parent | 2bb69f505c4c33c902681ef4459bdd3ef0556c3b (diff) | |
download | tcl-7cc01f77560c27ac24cdc87a1171c1c75853a7cf.zip tcl-7cc01f77560c27ac24cdc87a1171c1c75853a7cf.tar.gz tcl-7cc01f77560c27ac24cdc87a1171c1c75853a7cf.tar.bz2 |
[Bug 3457031]: Some Unicode 6.0 chars not in [:print:] class
-rw-r--r-- | ChangeLog | 5 | ||||
-rw-r--r-- | generic/regc_locale.c | 92 | ||||
-rw-r--r-- | tests/utf.test | 4 |
3 files changed, 14 insertions, 87 deletions
@@ -1,3 +1,8 @@ +2011-12-11 Jan Nijtmans <nijtmans@users.sf.net> + + * generic/regc_locale.c: [Bug 3457031]: Some Unicode 6.0 chars not + * tests/utf.test: in [:print:] class + 2011-12-07 Jan Nijtmans <nijtmans@users.sf.net> * tools/uniParse.tcl: [Bug 3444754] string tolower \u01c5 is wrong diff --git a/generic/regc_locale.c b/generic/regc_locale.c index 124dff4..f393820 100644 --- a/generic/regc_locale.c +++ b/generic/regc_locale.c @@ -630,80 +630,6 @@ static const chr graphCharTable[] = { #define NUM_GRAPH_CHAR (sizeof(graphCharTable)/sizeof(chr)) /* - * Unicode: unicode print characters including space, i.e. all Letters (class - * L*), Numbers (N*), Punctuation (P*), Symbols (S*) and Spaces (Zs). - */ - -static const crange printRangeTable[] = { - {0x0020, 0x007E}, {0x00A0, 0x01F5}, {0x01FA, 0x0217}, {0x0250, 0x02A8}, - {0x02B0, 0x02DE}, {0x02E0, 0x02E9}, {0x0374, 0x0375}, {0x0384, 0x038A}, - {0x038E, 0x03A1}, {0x03A3, 0x03CE}, {0x03D0, 0x03D6}, {0x03E2, 0x03F3}, - {0x0401, 0x040C}, {0x040E, 0x044F}, {0x0451, 0x045C}, {0x045E, 0x0482}, - {0x0490, 0x04C4}, {0x04C7, 0x04C8}, {0x04CB, 0x04CC}, {0x04D0, 0x04EB}, - {0x04EE, 0x04F5}, {0x04F8, 0x04F9}, {0x0531, 0x0556}, {0x0559, 0x055F}, - {0x0561, 0x0587}, {0x05D0, 0x05EA}, {0x05F0, 0x05F4}, {0x0621, 0x063A}, - {0x0640, 0x064A}, {0x0660, 0x066D}, {0x0671, 0x06B7}, {0x06BA, 0x06BE}, - {0x06C0, 0x06CE}, {0x06D0, 0x06D5}, {0x06E5, 0x06E6}, {0x06F0, 0x06F9}, - {0x0905, 0x0939}, {0x0958, 0x0961}, {0x0964, 0x0970}, {0x0985, 0x098C}, - {0x098F, 0x0990}, {0x0993, 0x09A8}, {0x09AA, 0x09B0}, {0x09B6, 0x09B9}, - {0x09DC, 0x09DD}, {0x09DF, 0x09E1}, {0x09E6, 0x09FA}, {0x0A05, 0x0A0A}, - {0x0A0F, 0x0A10}, {0x0A13, 0x0A28}, {0x0A2A, 0x0A30}, {0x0A32, 0x0A33}, - {0x0A35, 0x0A36}, {0x0A38, 0x0A39}, {0x0A59, 0x0A5C}, {0x0A66, 0x0A6F}, - {0x0A72, 0x0A74}, {0x0A85, 0x0A8B}, {0x0A8F, 0x0A91}, {0x0A93, 0x0AA8}, - {0x0AAA, 0x0AB0}, {0x0AB2, 0x0AB3}, {0x0AB5, 0x0AB9}, {0x0AE6, 0x0AEF}, - {0x0B05, 0x0B0C}, {0x0B0F, 0x0B10}, {0x0B13, 0x0B28}, {0x0B2A, 0x0B30}, - {0x0B32, 0x0B33}, {0x0B36, 0x0B39}, {0x0B5C, 0x0B5D}, {0x0B5F, 0x0B61}, - {0x0B66, 0x0B70}, {0x0B85, 0x0B8A}, {0x0B8E, 0x0B90}, {0x0B92, 0x0B95}, - {0x0B99, 0x0B9A}, {0x0B9E, 0x0B9F}, {0x0BA3, 0x0BA4}, {0x0BA8, 0x0BAA}, - {0x0BAE, 0x0BB5}, {0x0BB7, 0x0BB9}, {0x0BE7, 0x0BF2}, {0x0C05, 0x0C0C}, - {0x0C0E, 0x0C10}, {0x0C12, 0x0C28}, {0x0C2A, 0x0C33}, {0x0C35, 0x0C39}, - {0x0C60, 0x0C61}, {0x0C66, 0x0C6F}, {0x0C85, 0x0C8C}, {0x0C8E, 0x0C90}, - {0x0C92, 0x0CA8}, {0x0CAA, 0x0CB3}, {0x0CB5, 0x0CB9}, {0x0CE0, 0x0CE1}, - {0x0CE6, 0x0CEF}, {0x0D05, 0x0D0C}, {0x0D0E, 0x0D10}, {0x0D12, 0x0D28}, - {0x0D2A, 0x0D39}, {0x0D60, 0x0D61}, {0x0D66, 0x0D6F}, {0x0E3F, 0x0E46}, - {0x0E4F, 0x0E5B}, {0x0E99, 0x0E9F}, {0x0EA1, 0x0EA3}, {0x0EAA, 0x0EAB}, - {0x0EAD, 0x0EB0}, {0x0EB2, 0x0EB3}, {0x0EC0, 0x0EC4}, {0x0ED0, 0x0ED9}, - {0x0EDC, 0x0EDD}, {0x0F00, 0x0F17}, {0x0F1A, 0x0F34}, {0x0F3A, 0x0F3D}, - {0x0F40, 0x0F47}, {0x0F49, 0x0F69}, {0x0F88, 0x0F8B}, {0x10A0, 0x10C5}, - {0x10D0, 0x10F6}, {0x1100, 0x1159}, {0x115F, 0x11A2}, {0x11A8, 0x11F9}, - {0x1E00, 0x1E9B}, {0x1EA0, 0x1EF9}, {0x1F00, 0x1F15}, {0x1F18, 0x1F1D}, - {0x1F20, 0x1F45}, {0x1F48, 0x1F4D}, {0x1F50, 0x1F57}, {0x1F5F, 0x1F7D}, - {0x1F80, 0x1FB4}, {0x1FB6, 0x1FC4}, {0x1FC6, 0x1FD3}, {0x1FD6, 0x1FDB}, - {0x1FDD, 0x1FEF}, {0x1FF2, 0x1FF4}, {0x1FF6, 0x1FFE}, {0x2000, 0x200B}, - {0x2010, 0x2027}, {0x2030, 0x2046}, {0x2074, 0x208E}, {0x20A0, 0x20AC}, - {0x2100, 0x2138}, {0x2153, 0x2182}, {0x2190, 0x21EA}, {0x2200, 0x22F1}, - {0x2302, 0x237A}, {0x2400, 0x2424}, {0x2440, 0x244A}, {0x2460, 0x24EA}, - {0x2500, 0x2595}, {0x25A0, 0x25EF}, {0x2600, 0x2613}, {0x261A, 0x266F}, - {0x2701, 0x2704}, {0x2706, 0x2709}, {0x270C, 0x2727}, {0x2729, 0x274B}, - {0x274F, 0x2752}, {0x2758, 0x275E}, {0x2761, 0x2767}, {0x2776, 0x2794}, - {0x2798, 0x27AF}, {0x27B1, 0x27BE}, {0x3000, 0x3029}, {0x3030, 0x3037}, - {0x3041, 0x3094}, {0x309B, 0x309E}, {0x30A1, 0x30FE}, {0x3105, 0x312C}, - {0x3131, 0x318E}, {0x3190, 0x319F}, {0x3200, 0x321C}, {0x3220, 0x3243}, - {0x3260, 0x327B}, {0x327F, 0x32B0}, {0x32C0, 0x32CB}, {0x32D0, 0x32FE}, - {0x3300, 0x3376}, {0x337B, 0x33DD}, {0x33E0, 0x33FE}, {0x4E00, 0x9FA5}, - {0xAC00, 0xD7A3}, {0xF900, 0xFA2D}, {0xFB00, 0xFB06}, {0xFB13, 0xFB17}, - {0xFB1F, 0xFB36}, {0xFB38, 0xFB3C}, {0xFB40, 0xFB41}, {0xFB43, 0xFB44}, - {0xFB46, 0xFBB1}, {0xFBD3, 0xFD3F}, {0xFD50, 0xFD8F}, {0xFD92, 0xFDC7}, - {0xFDF0, 0xFDFB}, {0xFE30, 0xFE44}, {0xFE49, 0xFE52}, {0xFE54, 0xFE66}, - {0xFE68, 0xFE6B}, {0xFE70, 0xFE72}, {0xFE76, 0xFEFC}, {0xFF01, 0xFF5E}, - {0xFF61, 0xFFBE}, {0xFFC2, 0xFFC7}, {0xFFCA, 0xFFCF}, {0xFFD2, 0xFFD7}, - {0xFFDA, 0xFFDC}, {0xFFE0, 0xFFE6}, {0xFFE8, 0xFFEE}, {0xFFFC, 0xFFFD} -}; - -#define NUM_PRINT_RANGE (sizeof(printRangeTable)/sizeof(crange)) - -static const chr printCharTable[] = { - 0x037A, 0x037E, 0x038C, 0x03DA, 0x03DC, 0x03DE, 0x03E0, 0x0589, 0x05BE, - 0x05C0, 0x05C3, 0x060C, 0x061B, 0x061F, 0x06E9, 0x093D, 0x0950, 0x09B2, - 0x0A5E, 0x0A8D, 0x0ABD, 0x0AD0, 0x0AE0, 0x0B3D, 0x0B9C, 0x0CDE, 0x0E01, - 0x0E32, 0x0E81, 0x0E84, 0x0E87, 0x0E8A, 0x0E8D, 0x0E94, 0x0EA5, 0x0EA7, - 0x0EBD, 0x0EC6, 0x0F36, 0x0F38, 0x0F85, 0x10FB, 0x1F59, 0x1F5B, 0x1F5D, - 0x2070, 0x2300, 0x274D, 0x2756, 0x303F, 0xFB3E, 0xFE74 -}; - -#define NUM_PRINT_CHAR (sizeof(printCharTable)/sizeof(chr)) - -/* * End of auto-generated Unicode character ranges declarations. */ @@ -946,18 +872,6 @@ cclass( */ switch((enum classes) index) { - case CC_PRINT: - cv = getcvec(v, NUM_PRINT_CHAR, NUM_PRINT_RANGE); - if (cv) { - for (i=0 ; (size_t)i<NUM_PRINT_CHAR ; i++) { - addchr(cv, printCharTable[i]); - } - for (i=0 ; (size_t)i<NUM_PRINT_RANGE ; i++) { - addrange(cv, printRangeTable[i].start, - printRangeTable[i].end); - } - } - break; case CC_ALNUM: cv = getcvec(v, NUM_ALPHA_CHAR, NUM_DIGIT_RANGE + NUM_ALPHA_RANGE); if (cv) { @@ -1076,10 +990,14 @@ cclass( } } break; + case CC_PRINT: case CC_GRAPH: cv = getcvec(v, NUM_GRAPH_CHAR, NUM_GRAPH_RANGE); if (cv) { - for (i=0 ; (size_t)i<NUM_GRAPH_RANGE ; i++) { + /* For CC_PRINT, include space as well */ + addrange(cv, graphRangeTable[0].start - (index == CC_PRINT), + graphRangeTable[0].end); + for (i=1 ; (size_t)i<NUM_GRAPH_RANGE ; i++) { addrange(cv, graphRangeTable[i].start, graphRangeTable[i].end); } diff --git a/tests/utf.test b/tests/utf.test index 88e8275..60e3d52 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -282,6 +282,10 @@ test utf-21.2 {unicode alnum char in regc_locale.c} { # this returns 1 with Unicode 6 compliance list [regexp {^[[:alnum:]]+$} \u1040\u021f\u0220] [regexp {^\w+$} \u1040\u021f\u0220] } {1 1} +test utf-21.3 {unicode print char in regc_locale.c} { + # this returns 1 with Unicode 6 compliance + regexp {^[[:print:]]+$} \ufbc1 +} 1 test utf-22.1 {TclUniCharIsWordChar} { string wordend "xyz123_bar fg" 0 |