diff options
-rw-r--r-- | generic/regc_locale.c | 47 | ||||
-rw-r--r-- | generic/tclUniData.c | 40 | ||||
-rw-r--r-- | generic/tclUtf.c | 98 | ||||
-rw-r--r-- | tools/uniClass.tcl | 6 | ||||
-rw-r--r-- | tools/uniParse.tcl | 17 |
5 files changed, 138 insertions, 70 deletions
diff --git a/generic/regc_locale.c b/generic/regc_locale.c index 32adee8..4435557 100644 --- a/generic/regc_locale.c +++ b/generic/regc_locale.c @@ -193,17 +193,13 @@ static const crange alphaRangeTable[] = { {0xaab9, 0xaabd}, {0xaadb, 0xaadd}, {0xaae0, 0xaaea}, {0xaaf2, 0xaaf4}, {0xab01, 0xab06}, {0xab09, 0xab0e}, {0xab11, 0xab16}, {0xab20, 0xab26}, {0xab28, 0xab2e}, {0xab30, 0xab5a}, {0xab5c, 0xab65}, {0xab70, 0xabe2}, - {0xac00, 0xd7a3}, {0xd7b0, 0xd7c6}, {0xd7cb, 0xd7fb}, {0xdc00, 0xdc3e}, - {0xdc40, 0xdc7e}, {0xdc80, 0xdcbe}, {0xdcc0, 0xdcfe}, {0xdd00, 0xdd3e}, - {0xdd40, 0xdd7e}, {0xdd80, 0xddbe}, {0xddc0, 0xddfe}, {0xde00, 0xde3e}, - {0xde40, 0xde7e}, {0xde80, 0xdebe}, {0xdec0, 0xdefe}, {0xdf00, 0xdf3e}, - {0xdf40, 0xdf7e}, {0xdf80, 0xdfbe}, {0xdfc0, 0xdffe}, {0xf900, 0xfa6d}, + {0xac00, 0xd7a3}, {0xd7b0, 0xd7c6}, {0xd7cb, 0xd7fb}, {0xf900, 0xfa6d}, {0xfa70, 0xfad9}, {0xfb00, 0xfb06}, {0xfb13, 0xfb17}, {0xfb1f, 0xfb28}, {0xfb2a, 0xfb36}, {0xfb38, 0xfb3c}, {0xfb46, 0xfbb1}, {0xfbd3, 0xfd3d}, {0xfd50, 0xfd8f}, {0xfd92, 0xfdc7}, {0xfdf0, 0xfdfb}, {0xfe70, 0xfe74}, {0xfe76, 0xfefc}, {0xff21, 0xff3a}, {0xff41, 0xff5a}, {0xff66, 0xffbe}, {0xffc2, 0xffc7}, {0xffca, 0xffcf}, {0xffd2, 0xffd7}, {0xffda, 0xffdc} -#if TCL_UTF_MAX > 4 +#if CHRBITS > 16 ,{0x10000, 0x1000b}, {0x1000d, 0x10026}, {0x10028, 0x1003a}, {0x1003f, 0x1004d}, {0x10050, 0x1005d}, {0x10080, 0x100fa}, {0x10280, 0x1029c}, {0x102a0, 0x102d0}, {0x10300, 0x1031f}, {0x1032d, 0x10340}, {0x10342, 0x10349}, {0x10350, 0x10375}, @@ -266,7 +262,7 @@ static const chr alphaCharTable[] = { 0x2cf2, 0x2cf3, 0x2d27, 0x2d2d, 0x2d6f, 0x2e2f, 0x3005, 0x3006, 0x303b, 0x303c, 0xa62a, 0xa62b, 0xa8fb, 0xa8fd, 0xa9cf, 0xaa7a, 0xaab1, 0xaab5, 0xaab6, 0xaac0, 0xaac2, 0xfb1d, 0xfb3e, 0xfb40, 0xfb41, 0xfb43, 0xfb44 -#if TCL_UTF_MAX > 4 +#if CHRBITS > 16 ,0x1003c, 0x1003d, 0x10808, 0x10837, 0x10838, 0x1083c, 0x108f4, 0x108f5, 0x109be, 0x109bf, 0x10a00, 0x11176, 0x111da, 0x111dc, 0x11288, 0x1130f, 0x11310, 0x11332, 0x11333, 0x1133d, 0x11350, 0x114c4, 0x114c5, 0x114c7, 0x11644, 0x118ff, 0x11a00, @@ -288,7 +284,7 @@ static const crange controlRangeTable[] = { {0x0, 0x1f}, {0x7f, 0x9f}, {0x600, 0x605}, {0x200b, 0x200f}, {0x202a, 0x202e}, {0x2060, 0x2064}, {0x2066, 0x206f}, {0xe000, 0xf8ff}, {0xfff9, 0xfffb} -#if TCL_UTF_MAX > 4 +#if CHRBITS > 16 ,{0x1bca0, 0x1bca3}, {0x1d173, 0x1d17a}, {0xe0020, 0xe007f}, {0xf0000, 0xffffd}, {0x100000, 0x10fffd} #endif @@ -298,7 +294,7 @@ static const crange controlRangeTable[] = { static const chr controlCharTable[] = { 0xad, 0x61c, 0x6dd, 0x70f, 0x8e2, 0x180e, 0xfeff -#if TCL_UTF_MAX > 4 +#if CHRBITS > 16 ,0x110bd, 0xe0001 #endif }; @@ -320,7 +316,7 @@ static const crange digitRangeTable[] = { {0x1c50, 0x1c59}, {0xa620, 0xa629}, {0xa8d0, 0xa8d9}, {0xa900, 0xa909}, {0xa9d0, 0xa9d9}, {0xa9f0, 0xa9f9}, {0xaa50, 0xaa59}, {0xabf0, 0xabf9}, {0xff10, 0xff19} -#if TCL_UTF_MAX > 4 +#if CHRBITS > 16 ,{0x104a0, 0x104a9}, {0x11066, 0x1106f}, {0x110f0, 0x110f9}, {0x11136, 0x1113f}, {0x111d0, 0x111d9}, {0x112f0, 0x112f9}, {0x11450, 0x11459}, {0x114d0, 0x114d9}, {0x11650, 0x11659}, {0x116c0, 0x116c9}, {0x11730, 0x11739}, {0x118e0, 0x118e9}, @@ -354,7 +350,7 @@ static const crange punctRangeTable[] = { {0xaa5c, 0xaa5f}, {0xfe10, 0xfe19}, {0xfe30, 0xfe52}, {0xfe54, 0xfe61}, {0xff01, 0xff03}, {0xff05, 0xff0a}, {0xff0c, 0xff0f}, {0xff3b, 0xff3d}, {0xff5f, 0xff65} -#if TCL_UTF_MAX > 4 +#if CHRBITS > 16 ,{0x10100, 0x10102}, {0x10a50, 0x10a58}, {0x10af0, 0x10af6}, {0x10b39, 0x10b3f}, {0x10b99, 0x10b9c}, {0x11047, 0x1104d}, {0x110be, 0x110c1}, {0x11140, 0x11143}, {0x111c5, 0x111c9}, {0x111dd, 0x111df}, {0x11238, 0x1123d}, {0x1144b, 0x1144f}, @@ -379,7 +375,7 @@ static const chr punctCharTable[] = { 0xa67e, 0xa8ce, 0xa8cf, 0xa8fc, 0xa92e, 0xa92f, 0xa95f, 0xa9de, 0xa9df, 0xaade, 0xaadf, 0xaaf0, 0xaaf1, 0xabeb, 0xfd3e, 0xfd3f, 0xfe63, 0xfe68, 0xfe6a, 0xfe6b, 0xff1a, 0xff1b, 0xff1f, 0xff20, 0xff3f, 0xff5b, 0xff5d -#if TCL_UTF_MAX > 4 +#if CHRBITS > 16 ,0x1039f, 0x103d0, 0x1056f, 0x10857, 0x1091f, 0x1093f, 0x10a7f, 0x110bb, 0x110bc, 0x11174, 0x11175, 0x111cd, 0x111db, 0x112a9, 0x1145b, 0x1145d, 0x114c6, 0x11c70, 0x11c71, 0x16a6e, 0x16a6f, 0x16af5, 0x16b44, 0x1bc9f, 0x1e95e, 0x1e95f @@ -423,7 +419,7 @@ static const crange lowerRangeTable[] = { {0x2d00, 0x2d25}, {0xa72f, 0xa731}, {0xa771, 0xa778}, {0xa793, 0xa795}, {0xab30, 0xab5a}, {0xab60, 0xab65}, {0xab70, 0xabbf}, {0xfb00, 0xfb06}, {0xfb13, 0xfb17}, {0xff41, 0xff5a} -#if TCL_UTF_MAX > 4 +#if CHRBITS > 16 ,{0x10428, 0x1044f}, {0x104d8, 0x104fb}, {0x10cc0, 0x10cf2}, {0x118c0, 0x118df}, {0x1d41a, 0x1d433}, {0x1d44e, 0x1d454}, {0x1d456, 0x1d467}, {0x1d482, 0x1d49b}, {0x1d4b6, 0x1d4b9}, {0x1d4bd, 0x1d4c3}, {0x1d4c5, 0x1d4cf}, {0x1d4ea, 0x1d503}, @@ -502,7 +498,7 @@ static const chr lowerCharTable[] = { 0xa77f, 0xa781, 0xa783, 0xa785, 0xa787, 0xa78c, 0xa78e, 0xa791, 0xa797, 0xa799, 0xa79b, 0xa79d, 0xa79f, 0xa7a1, 0xa7a3, 0xa7a5, 0xa7a7, 0xa7a9, 0xa7b5, 0xa7b7, 0xa7fa -#if TCL_UTF_MAX > 4 +#if CHRBITS > 16 ,0x1d4bb, 0x1d7cb #endif }; @@ -524,7 +520,7 @@ static const crange upperRangeTable[] = { {0x210b, 0x210d}, {0x2110, 0x2112}, {0x2119, 0x211d}, {0x212a, 0x212d}, {0x2130, 0x2133}, {0x2c00, 0x2c2e}, {0x2c62, 0x2c64}, {0x2c6d, 0x2c70}, {0x2c7e, 0x2c80}, {0xa7aa, 0xa7ae}, {0xa7b0, 0xa7b4}, {0xff21, 0xff3a} -#if TCL_UTF_MAX > 4 +#if CHRBITS > 16 ,{0x10400, 0x10427}, {0x104b0, 0x104d3}, {0x10c80, 0x10cb2}, {0x118a0, 0x118bf}, {0x1d400, 0x1d419}, {0x1d434, 0x1d44d}, {0x1d468, 0x1d481}, {0x1d4a9, 0x1d4ac}, {0x1d4ae, 0x1d4b5}, {0x1d4d0, 0x1d4e9}, {0x1d507, 0x1d50a}, {0x1d50d, 0x1d514}, @@ -601,7 +597,7 @@ static const chr upperCharTable[] = { 0xa768, 0xa76a, 0xa76c, 0xa76e, 0xa779, 0xa77b, 0xa77d, 0xa77e, 0xa780, 0xa782, 0xa784, 0xa786, 0xa78b, 0xa78d, 0xa790, 0xa792, 0xa796, 0xa798, 0xa79a, 0xa79c, 0xa79e, 0xa7a0, 0xa7a2, 0xa7a4, 0xa7a6, 0xa7a8, 0xa7b6 -#if TCL_UTF_MAX > 4 +#if CHRBITS > 16 ,0x1d49c, 0x1d49e, 0x1d49f, 0x1d4a2, 0x1d4a5, 0x1d4a6, 0x1d504, 0x1d505, 0x1d538, 0x1d539, 0x1d546, 0x1d7ca #endif @@ -684,17 +680,13 @@ static const crange graphRangeTable[] = { {0xaadb, 0xaaf6}, {0xab01, 0xab06}, {0xab09, 0xab0e}, {0xab11, 0xab16}, {0xab20, 0xab26}, {0xab28, 0xab2e}, {0xab30, 0xab65}, {0xab70, 0xabed}, {0xabf0, 0xabf9}, {0xac00, 0xd7a3}, {0xd7b0, 0xd7c6}, {0xd7cb, 0xd7fb}, - {0xdc00, 0xdc3e}, {0xdc40, 0xdc7e}, {0xdc80, 0xdcbe}, {0xdcc0, 0xdcfe}, - {0xdd00, 0xdd3e}, {0xdd40, 0xdd7e}, {0xdd80, 0xddbe}, {0xddc0, 0xddfe}, - {0xde00, 0xde3e}, {0xde40, 0xde7e}, {0xde80, 0xdebe}, {0xdec0, 0xdefe}, - {0xdf00, 0xdf3e}, {0xdf40, 0xdf7e}, {0xdf80, 0xdfbe}, {0xdfc0, 0xdffe}, {0xf900, 0xfa6d}, {0xfa70, 0xfad9}, {0xfb00, 0xfb06}, {0xfb13, 0xfb17}, {0xfb1d, 0xfb36}, {0xfb38, 0xfb3c}, {0xfb46, 0xfbc1}, {0xfbd3, 0xfd3f}, {0xfd50, 0xfd8f}, {0xfd92, 0xfdc7}, {0xfdf0, 0xfdfd}, {0xfe00, 0xfe19}, {0xfe20, 0xfe52}, {0xfe54, 0xfe66}, {0xfe68, 0xfe6b}, {0xfe70, 0xfe74}, {0xfe76, 0xfefc}, {0xff01, 0xffbe}, {0xffc2, 0xffc7}, {0xffca, 0xffcf}, {0xffd2, 0xffd7}, {0xffda, 0xffdc}, {0xffe0, 0xffe6}, {0xffe8, 0xffee} -#if TCL_UTF_MAX > 4 +#if CHRBITS > 16 ,{0x10000, 0x1000b}, {0x1000d, 0x10026}, {0x10028, 0x1003a}, {0x1003f, 0x1004d}, {0x10050, 0x1005d}, {0x10080, 0x100fa}, {0x10100, 0x10102}, {0x10107, 0x10133}, {0x10137, 0x1018e}, {0x10190, 0x1019b}, {0x101d0, 0x101fd}, {0x10280, 0x1029c}, @@ -747,12 +739,13 @@ static const crange graphRangeTable[] = { {0x1f030, 0x1f093}, {0x1f0a0, 0x1f0ae}, {0x1f0b1, 0x1f0bf}, {0x1f0c1, 0x1f0cf}, {0x1f0d1, 0x1f0f5}, {0x1f100, 0x1f10c}, {0x1f110, 0x1f12e}, {0x1f130, 0x1f16b}, {0x1f170, 0x1f1ac}, {0x1f1e6, 0x1f202}, {0x1f210, 0x1f23b}, {0x1f240, 0x1f248}, - {0x1f260, 0x1f265}, {0x1f300, 0x1f6d4}, {0x1f6e0, 0x1f6ec}, {0x1f6f0, 0x1f6f8}, + {0x1f260, 0x1f265}, {0x1f300, 0x1f6d4}, {0x1f6e0, 0x1f6ec}, {0x1f6f0, 0x1f6f9}, {0x1f700, 0x1f773}, {0x1f780, 0x1f7d4}, {0x1f800, 0x1f80b}, {0x1f810, 0x1f847}, {0x1f850, 0x1f859}, {0x1f860, 0x1f887}, {0x1f890, 0x1f8ad}, {0x1f900, 0x1f90b}, - {0x1f910, 0x1f93e}, {0x1f940, 0x1f94c}, {0x1f950, 0x1f96b}, {0x1f980, 0x1f997}, - {0x1f9d0, 0x1f9e6}, {0x20000, 0x2a6d6}, {0x2a700, 0x2b734}, {0x2b740, 0x2b81d}, - {0x2b820, 0x2cea1}, {0x2ceb0, 0x2ebe0}, {0x2f800, 0x2fa1d}, {0xe0100, 0xe01ef} + {0x1f910, 0x1f93e}, {0x1f940, 0x1f970}, {0x1f973, 0x1f976}, {0x1f97c, 0x1f9a2}, + {0x1f9b0, 0x1f9b9}, {0x1f9c0, 0x1f9c2}, {0x1f9d0, 0x1f9ff}, {0x1fa60, 0x1fa6d}, + {0x20000, 0x2a6d6}, {0x2a700, 0x2b734}, {0x2b740, 0x2b81d}, {0x2b820, 0x2cea1}, + {0x2ceb0, 0x2ebe0}, {0x2f800, 0x2fa1d}, {0xe0100, 0xe01ef} #endif }; @@ -770,7 +763,7 @@ static const chr graphCharTable[] = { 0xec6, 0x10c7, 0x10cd, 0x1258, 0x12c0, 0x1772, 0x1773, 0x1940, 0x1f59, 0x1f5b, 0x1f5d, 0x2070, 0x2071, 0x2d27, 0x2d2d, 0x2d6f, 0x2d70, 0xfb3e, 0xfb40, 0xfb41, 0xfb43, 0xfb44, 0xfffc, 0xfffd -#if TCL_UTF_MAX > 4 +#if CHRBITS > 16 ,0x1003c, 0x1003d, 0x101a0, 0x1056f, 0x10808, 0x10837, 0x10838, 0x1083c, 0x108f4, 0x108f5, 0x1093f, 0x10a05, 0x10a06, 0x11288, 0x1130f, 0x11310, 0x11332, 0x11333, 0x11347, 0x11348, 0x11350, 0x11357, 0x1145b, 0x1145d, 0x118ff, 0x11d08, 0x11d09, @@ -778,7 +771,7 @@ static const chr graphCharTable[] = { 0x1d4a2, 0x1d4a5, 0x1d4a6, 0x1d4bb, 0x1d546, 0x1e023, 0x1e024, 0x1e95e, 0x1e95f, 0x1ee21, 0x1ee22, 0x1ee24, 0x1ee27, 0x1ee39, 0x1ee3b, 0x1ee42, 0x1ee47, 0x1ee49, 0x1ee4b, 0x1ee51, 0x1ee52, 0x1ee54, 0x1ee57, 0x1ee59, 0x1ee5b, 0x1ee5d, 0x1ee5f, - 0x1ee61, 0x1ee62, 0x1ee64, 0x1ee7e, 0x1eef0, 0x1eef1, 0x1f250, 0x1f251, 0x1f9c0 + 0x1ee61, 0x1ee62, 0x1ee64, 0x1ee7e, 0x1eef0, 0x1eef1, 0x1f250, 0x1f251, 0x1f97a #endif }; diff --git a/generic/tclUniData.c b/generic/tclUniData.c index 9f05230..834fcc4 100644 --- a/generic/tclUniData.c +++ b/generic/tclUniData.c @@ -367,11 +367,11 @@ static const unsigned short pageMap[] = { 4928, 4928, 4928, 4928, 4928, 4928, 4928, 4928, 4928, 4928, 4928, 4928, 4928, 4928, 4928, 4928, 4928, 4928, 16864, 16896, 4928, 4928, 4928, 8032, 4928, 4928, 16864, 1824, 16480, 4928, 16928, 4928, 16960, 16992, - 1824, 1824, 16480, 8416, 17024, 17056, 17088, 1824, 17120, 6784, 1824, + 1824, 1824, 16480, 8416, 4928, 17024, 4928, 17056, 16704, 4928, 1824, + 1824, 1824, 16992, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, - 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, @@ -483,7 +483,7 @@ static const unsigned short pageMap[] = { 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, - 1344, 7840, 1824, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 7840, 1824, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, @@ -494,7 +494,7 @@ static const unsigned short pageMap[] = { 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, - 17152, 1344, 1344, 1344, 1344, 1344, 1344, 11360, 1344, 1344, 1344, + 1344, 17088, 1344, 1344, 1344, 1344, 1344, 1344, 11360, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, @@ -509,7 +509,7 @@ static const unsigned short pageMap[] = { 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, - 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 17184, 1344, + 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 17120, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, @@ -529,7 +529,7 @@ static const unsigned short pageMap[] = { 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, - 1344, 1344, 1344, 1344, 17216, 1824, 1824, 1824, 1824, 1824, 1824, + 1344, 1344, 1344, 1344, 1344, 17152, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, @@ -537,8 +537,9 @@ static const unsigned short pageMap[] = { 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, - 1824, 1824, 1824, 1824, 1824, 1824, 1344, 1344, 1344, 1344, 1344, 1344, - 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 11360 + 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 11360 + #endif /* TCL_UTF_MAX > 3 */ }; @@ -1502,24 +1503,20 @@ static const unsigned char groupMap[] = { 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 0, 0, 0, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 0, 0, 0, 0, 0, 0, 0, 14, 14, 14, 14, 14, 14, 14, 14, 0, 0, + 14, 14, 14, 0, 0, 0, 0, 0, 0, 14, 14, 14, 14, 14, 14, 14, 14, 0, 0, 0, 0, 0, 0, 0, 0, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 0, 0, 0, 0, 0, 0, 14, 14, 14, 14, 14, 14, 14, 14, 0, 0, 0, 0, 0, 0, 0, 0, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 0, 0, 0, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 0, 0, 0, 0, 0, 0, 0, 0, 14, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, - 15, 15, 15, 15, 15, 15, 15, 15, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 15, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, 15, 15, 15, 15, - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0 + 14, 14, 14, 14, 14, 14, 14, 14, 0, 0, 14, 14, 14, 14, 0, 0, 0, 14, + 0, 14, 14, 14, 14, 14, 14, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 0, 0, 0, 0, 0, 0, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, 15, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 #endif /* TCL_UTF_MAX > 3 */ }; @@ -1535,6 +1532,7 @@ static const unsigned char groupMap[] = { * 100 = subtract delta for title/upper * 101 = sub delta for upper, sub 1 for title * 110 = sub delta for upper, add delta for lower + * 111 = subtract delta for upper * * Bits 8-31 Case delta: delta for case conversions. This should be the * highest field so we can easily sign extend. diff --git a/generic/tclUtf.c b/generic/tclUtf.c index a405367..656a6f0 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -158,15 +158,22 @@ Tcl_UniCharToUtf( if ((ch & 0xF800) == 0xD800) { if (ch & 0x0400) { /* Low surrogate */ - buf[3] = (char) ((ch | 0x80) & 0xBF); - buf[2] |= (char) (((ch >> 6) | 0x80) & 0x8F); - return 4; + if (((buf[0] & 0xF8) == 0xF0) && ((buf[1] & 0xC0) == 0x80) + && ((buf[2] & 0xCF) == 0)) { + /* Previous Tcl_UniChar was a High surrogate, so combine */ + buf[3] = (char) ((ch & 0x3F) | 0x80); + buf[2] |= (char) (((ch >> 6) & 0x0F) | 0x80); + return 4; + } + /* Previous Tcl_UniChar was not a High surrogate, so just output */ } else { /* High surrogate */ ch += 0x40; - buf[2] = (char) (((ch << 4) | 0x80) & 0xB0); - buf[1] = (char) (((ch >> 2) | 0x80) & 0xBF); - buf[0] = (char) (((ch >> 8) | 0xF0) & 0xF7); + /* Fill buffer with specific 3-byte (invalid) byte combination, + so following Low surrogate can recognize it and combine */ + buf[2] = (char) ((ch << 4) & 0x30); + buf[1] = (char) (((ch >> 2) & 0x3F) | 0x80); + buf[0] = (char) (((ch >> 8) & 0x07) | 0xF0); return 0; } } @@ -259,6 +266,15 @@ Tcl_UniCharToUtfDString( * Tcl_UtfCharComplete() before calling this routine to ensure that * enough bytes remain in the string. * + * If TCL_UTF_MAX == 4, special handling of Surrogate pairs is done: + * For any UTF-8 string containing a character outside of the BMP, the + * first call to this function will fill *chPtr with the high surrogate + * and generate a return value of 0. Calling Tcl_UtfToUniChar again + * will produce the low surrogate and a return value of 4. Because *chPtr + * is used to remember whether the high surrogate is already produced, it + * is recommended to initialize the variable it points to as 0 before + * the first call to Tcl_UtfToUniChar is done. + * * Results: * *chPtr is filled with the Tcl_UniChar, and the return value is the * number of bytes from the UTF-8 string that were consumed. @@ -315,7 +331,7 @@ Tcl_UtfToUniChar( *chPtr = (Tcl_UniChar) (((byte & 0x0F) << 12) | ((src[1] & 0x3F) << 6) | (src[2] & 0x3F)); - if (*chPtr > 0x7ff) { + if (*chPtr > 0x7FF) { return 3; } } @@ -331,12 +347,30 @@ Tcl_UtfToUniChar( /* * Four-byte-character lead byte followed by three trail bytes. */ - +#if TCL_UTF_MAX == 4 + Tcl_UniChar surrogate; + + byte = (((byte & 0x07) << 18) | ((src[1] & 0x3F) << 12) + | ((src[2] & 0x3F) << 6) | (src[3] & 0x3F)) - 0x10000; + surrogate = (Tcl_UniChar) (0xD800 + (byte >> 10)); + if (byte & 0x100000) { + /* out of range, < 0x10000 or > 0x10ffff */ + } else if (*chPtr != surrogate) { + /* produce high surrogate, but don't advance source pointer */ + *chPtr = surrogate; + return 0; + } else { + /* produce low surrogate, and advance source pointer */ + *chPtr = (Tcl_UniChar) (0xDC00 | (byte & 0x3FF)); + return 4; + } +#else *chPtr = (Tcl_UniChar) (((byte & 0x07) << 18) | ((src[1] & 0x3F) << 12) | ((src[2] & 0x3F) << 6) | (src[3] & 0x3F)); if ((unsigned)(*chPtr - 0x10000) <= 0xFFFFF) { return 4; } +#endif } /* @@ -433,10 +467,7 @@ Tcl_UtfCharComplete( * a complete UTF-8 character. */ int length) /* Length of above string in bytes. */ { - int ch; - - ch = *((unsigned char *) src); - return length >= totalBytes[ch]; + return length >= totalBytes[(unsigned char)*src]; } /* @@ -928,7 +959,11 @@ Tcl_UtfToTitle( } while (*src) { bytes = TclUtfToUniChar(src, &ch); - lowChar = Tcl_UniCharToLower(ch); + lowChar = ch; + /* Special exception for Georgian Asomtavruli chars, no titlecase. */ + if ((unsigned)(lowChar - 0x1C90) >= 0x30) { + lowChar = Tcl_UniCharToLower(lowChar); + } if (bytes < UtfCount(lowChar)) { memcpy(dst, src, (size_t) bytes); @@ -1030,6 +1065,16 @@ Tcl_UtfNcmp( cs += TclUtfToUniChar(cs, &ch1); ct += TclUtfToUniChar(ct, &ch2); if (ch1 != ch2) { +#if TCL_UTF_MAX == 4 + /* Surrogates always report higher than non-surrogates */ + if (((ch1 & 0xFC00) == 0xD800)) { + if ((ch2 & 0xFC00) != 0xD800) { + return ch1; + } + } else if ((ch2 & 0xFC00) == 0xD800) { + return -ch2; + } +#endif return (ch1 - ch2); } } @@ -1070,6 +1115,16 @@ Tcl_UtfNcasecmp( cs += TclUtfToUniChar(cs, &ch1); ct += TclUtfToUniChar(ct, &ch2); if (ch1 != ch2) { +#if TCL_UTF_MAX == 4 + /* Surrogates always report higher than non-surrogates */ + if (((ch1 & 0xFC00) == 0xD800)) { + if ((ch2 & 0xFC00) != 0xD800) { + return ch1; + } + } else if ((ch2 & 0xFC00) == 0xD800) { + return -ch2; + } +#endif ch1 = Tcl_UniCharToLower(ch1); ch2 = Tcl_UniCharToLower(ch2); if (ch1 != ch2) { @@ -1109,6 +1164,16 @@ TclUtfCasecmp( cs += TclUtfToUniChar(cs, &ch1); ct += TclUtfToUniChar(ct, &ch2); if (ch1 != ch2) { +#if TCL_UTF_MAX == 4 + /* Surrogates always report higher than non-surrogates */ + if (((ch1 & 0xFC00) == 0xD800)) { + if ((ch2 & 0xFC00) != 0xD800) { + return ch1; + } + } else if ((ch2 & 0xFC00) == 0xD800) { + return -ch2; + } +#endif ch1 = Tcl_UniCharToLower(ch1); ch2 = Tcl_UniCharToLower(ch2); if (ch1 != ch2) { @@ -1169,8 +1234,9 @@ Tcl_UniCharToLower( int ch) /* Unicode character to convert. */ { int info = GetUniCharInfo(ch); + int mode = GetCaseType(info); - if (GetCaseType(info) & 0x02) { + if ((mode & 0x02) && (mode != 0x7)) { ch += GetDelta(info); } return (Tcl_UniChar) ch; @@ -1204,7 +1270,9 @@ Tcl_UniCharToTitle( * Subtract or add one depending on the original case. */ - ch += ((mode & 0x4) ? -1 : 1); + if (mode != 0x7) { + ch += ((mode & 0x4) ? -1 : 1); + } } else if (mode == 0x4) { ch -= GetDelta(info); } diff --git a/tools/uniClass.tcl b/tools/uniClass.tcl index 9b4819d..86ec931 100644 --- a/tools/uniClass.tcl +++ b/tools/uniClass.tcl @@ -20,7 +20,7 @@ proc emitRange {first last} { set extranges 1 set numranges 0 set ranges [string trimright $ranges " \n\r\t,"] - append ranges "\n#if TCL_UTF_MAX > 4\n ," + append ranges "\n#if CHRBITS > 16\n ," } append ranges [format "{0x%x, 0x%x}, " \ $first $last] @@ -33,7 +33,7 @@ proc emitRange {first last} { set extchars 1 set numchars 0 set chars [string trimright $chars " \n\r\t,"] - append chars "\n#if TCL_UTF_MAX > 4\n ," + append chars "\n#if CHRBITS > 16\n ," } append chars [format "0x%x, " $first] incr numchars @@ -66,7 +66,7 @@ proc genTable {type} { for {set i 0} {$i <= 0x10ffff} {incr i} { if {$i == 0xd800} { # Skip surrogates - set i 0xdc00 + set i 0xe000 } if {[string is $type [format %c $i]]} { if {$i == ($last + 1)} { diff --git a/tools/uniParse.tcl b/tools/uniParse.tcl index e33b3c7..c712e62 100644 --- a/tools/uniParse.tcl +++ b/tools/uniParse.tcl @@ -272,6 +272,7 @@ static const unsigned char groupMap\[\] = {" * 100 = subtract delta for title/upper * 101 = sub delta for upper, sub 1 for title * 110 = sub delta for upper, add delta for lower + * 111 = subtract delta for upper * * Bits 8-31 Case delta: delta for case conversions. This should be the * highest field so we can easily sign extend. @@ -309,10 +310,14 @@ static const int groups\[\] = {" } } } elseif {$toupper} { - # subtract delta for upper, add delta for lower - set case 6 set delta $toupper - if {$tolower != $toupper} { + if {$tolower == $toupper} { + # subtract delta for upper, add delta for lower + set case 6 + } elseif {!$tolower} { + # subtract delta for upper + set case 7 + } else { error "New case conversion type needed: $toupper $tolower $totitle" } } elseif {$tolower} { @@ -396,7 +401,11 @@ enum { * Unicode character tables. */ -#define GetUniCharInfo(ch) (groups\[groupMap\[pageMap\[((ch) & 0xffff) >> OFFSET_BITS\] | ((ch) & ((1 << OFFSET_BITS)-1))\]\]) +#if TCL_UTF_MAX > 3 +# define GetUniCharInfo(ch) (groups\[groupMap\[pageMap\[((ch) & 0x1fffff) >> OFFSET_BITS\] | ((ch) & ((1 << OFFSET_BITS)-1))\]\]) +#else +# define GetUniCharInfo(ch) (groups\[groupMap\[pageMap\[((ch) & 0xffff) >> OFFSET_BITS\] | ((ch) & ((1 << OFFSET_BITS)-1))\]\]) +#endif " close $f |