summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--generic/regc_locale.c47
-rw-r--r--generic/tclUniData.c40
-rw-r--r--generic/tclUtf.c98
-rw-r--r--tools/uniClass.tcl6
-rw-r--r--tools/uniParse.tcl17
5 files changed, 138 insertions, 70 deletions
diff --git a/generic/regc_locale.c b/generic/regc_locale.c
index 32adee8..4435557 100644
--- a/generic/regc_locale.c
+++ b/generic/regc_locale.c
@@ -193,17 +193,13 @@ static const crange alphaRangeTable[] = {
{0xaab9, 0xaabd}, {0xaadb, 0xaadd}, {0xaae0, 0xaaea}, {0xaaf2, 0xaaf4},
{0xab01, 0xab06}, {0xab09, 0xab0e}, {0xab11, 0xab16}, {0xab20, 0xab26},
{0xab28, 0xab2e}, {0xab30, 0xab5a}, {0xab5c, 0xab65}, {0xab70, 0xabe2},
- {0xac00, 0xd7a3}, {0xd7b0, 0xd7c6}, {0xd7cb, 0xd7fb}, {0xdc00, 0xdc3e},
- {0xdc40, 0xdc7e}, {0xdc80, 0xdcbe}, {0xdcc0, 0xdcfe}, {0xdd00, 0xdd3e},
- {0xdd40, 0xdd7e}, {0xdd80, 0xddbe}, {0xddc0, 0xddfe}, {0xde00, 0xde3e},
- {0xde40, 0xde7e}, {0xde80, 0xdebe}, {0xdec0, 0xdefe}, {0xdf00, 0xdf3e},
- {0xdf40, 0xdf7e}, {0xdf80, 0xdfbe}, {0xdfc0, 0xdffe}, {0xf900, 0xfa6d},
+ {0xac00, 0xd7a3}, {0xd7b0, 0xd7c6}, {0xd7cb, 0xd7fb}, {0xf900, 0xfa6d},
{0xfa70, 0xfad9}, {0xfb00, 0xfb06}, {0xfb13, 0xfb17}, {0xfb1f, 0xfb28},
{0xfb2a, 0xfb36}, {0xfb38, 0xfb3c}, {0xfb46, 0xfbb1}, {0xfbd3, 0xfd3d},
{0xfd50, 0xfd8f}, {0xfd92, 0xfdc7}, {0xfdf0, 0xfdfb}, {0xfe70, 0xfe74},
{0xfe76, 0xfefc}, {0xff21, 0xff3a}, {0xff41, 0xff5a}, {0xff66, 0xffbe},
{0xffc2, 0xffc7}, {0xffca, 0xffcf}, {0xffd2, 0xffd7}, {0xffda, 0xffdc}
-#if TCL_UTF_MAX > 4
+#if CHRBITS > 16
,{0x10000, 0x1000b}, {0x1000d, 0x10026}, {0x10028, 0x1003a}, {0x1003f, 0x1004d},
{0x10050, 0x1005d}, {0x10080, 0x100fa}, {0x10280, 0x1029c}, {0x102a0, 0x102d0},
{0x10300, 0x1031f}, {0x1032d, 0x10340}, {0x10342, 0x10349}, {0x10350, 0x10375},
@@ -266,7 +262,7 @@ static const chr alphaCharTable[] = {
0x2cf2, 0x2cf3, 0x2d27, 0x2d2d, 0x2d6f, 0x2e2f, 0x3005, 0x3006, 0x303b,
0x303c, 0xa62a, 0xa62b, 0xa8fb, 0xa8fd, 0xa9cf, 0xaa7a, 0xaab1, 0xaab5,
0xaab6, 0xaac0, 0xaac2, 0xfb1d, 0xfb3e, 0xfb40, 0xfb41, 0xfb43, 0xfb44
-#if TCL_UTF_MAX > 4
+#if CHRBITS > 16
,0x1003c, 0x1003d, 0x10808, 0x10837, 0x10838, 0x1083c, 0x108f4, 0x108f5, 0x109be,
0x109bf, 0x10a00, 0x11176, 0x111da, 0x111dc, 0x11288, 0x1130f, 0x11310, 0x11332,
0x11333, 0x1133d, 0x11350, 0x114c4, 0x114c5, 0x114c7, 0x11644, 0x118ff, 0x11a00,
@@ -288,7 +284,7 @@ static const crange controlRangeTable[] = {
{0x0, 0x1f}, {0x7f, 0x9f}, {0x600, 0x605}, {0x200b, 0x200f},
{0x202a, 0x202e}, {0x2060, 0x2064}, {0x2066, 0x206f}, {0xe000, 0xf8ff},
{0xfff9, 0xfffb}
-#if TCL_UTF_MAX > 4
+#if CHRBITS > 16
,{0x1bca0, 0x1bca3}, {0x1d173, 0x1d17a}, {0xe0020, 0xe007f}, {0xf0000, 0xffffd},
{0x100000, 0x10fffd}
#endif
@@ -298,7 +294,7 @@ static const crange controlRangeTable[] = {
static const chr controlCharTable[] = {
0xad, 0x61c, 0x6dd, 0x70f, 0x8e2, 0x180e, 0xfeff
-#if TCL_UTF_MAX > 4
+#if CHRBITS > 16
,0x110bd, 0xe0001
#endif
};
@@ -320,7 +316,7 @@ static const crange digitRangeTable[] = {
{0x1c50, 0x1c59}, {0xa620, 0xa629}, {0xa8d0, 0xa8d9}, {0xa900, 0xa909},
{0xa9d0, 0xa9d9}, {0xa9f0, 0xa9f9}, {0xaa50, 0xaa59}, {0xabf0, 0xabf9},
{0xff10, 0xff19}
-#if TCL_UTF_MAX > 4
+#if CHRBITS > 16
,{0x104a0, 0x104a9}, {0x11066, 0x1106f}, {0x110f0, 0x110f9}, {0x11136, 0x1113f},
{0x111d0, 0x111d9}, {0x112f0, 0x112f9}, {0x11450, 0x11459}, {0x114d0, 0x114d9},
{0x11650, 0x11659}, {0x116c0, 0x116c9}, {0x11730, 0x11739}, {0x118e0, 0x118e9},
@@ -354,7 +350,7 @@ static const crange punctRangeTable[] = {
{0xaa5c, 0xaa5f}, {0xfe10, 0xfe19}, {0xfe30, 0xfe52}, {0xfe54, 0xfe61},
{0xff01, 0xff03}, {0xff05, 0xff0a}, {0xff0c, 0xff0f}, {0xff3b, 0xff3d},
{0xff5f, 0xff65}
-#if TCL_UTF_MAX > 4
+#if CHRBITS > 16
,{0x10100, 0x10102}, {0x10a50, 0x10a58}, {0x10af0, 0x10af6}, {0x10b39, 0x10b3f},
{0x10b99, 0x10b9c}, {0x11047, 0x1104d}, {0x110be, 0x110c1}, {0x11140, 0x11143},
{0x111c5, 0x111c9}, {0x111dd, 0x111df}, {0x11238, 0x1123d}, {0x1144b, 0x1144f},
@@ -379,7 +375,7 @@ static const chr punctCharTable[] = {
0xa67e, 0xa8ce, 0xa8cf, 0xa8fc, 0xa92e, 0xa92f, 0xa95f, 0xa9de, 0xa9df,
0xaade, 0xaadf, 0xaaf0, 0xaaf1, 0xabeb, 0xfd3e, 0xfd3f, 0xfe63, 0xfe68,
0xfe6a, 0xfe6b, 0xff1a, 0xff1b, 0xff1f, 0xff20, 0xff3f, 0xff5b, 0xff5d
-#if TCL_UTF_MAX > 4
+#if CHRBITS > 16
,0x1039f, 0x103d0, 0x1056f, 0x10857, 0x1091f, 0x1093f, 0x10a7f, 0x110bb, 0x110bc,
0x11174, 0x11175, 0x111cd, 0x111db, 0x112a9, 0x1145b, 0x1145d, 0x114c6, 0x11c70,
0x11c71, 0x16a6e, 0x16a6f, 0x16af5, 0x16b44, 0x1bc9f, 0x1e95e, 0x1e95f
@@ -423,7 +419,7 @@ static const crange lowerRangeTable[] = {
{0x2d00, 0x2d25}, {0xa72f, 0xa731}, {0xa771, 0xa778}, {0xa793, 0xa795},
{0xab30, 0xab5a}, {0xab60, 0xab65}, {0xab70, 0xabbf}, {0xfb00, 0xfb06},
{0xfb13, 0xfb17}, {0xff41, 0xff5a}
-#if TCL_UTF_MAX > 4
+#if CHRBITS > 16
,{0x10428, 0x1044f}, {0x104d8, 0x104fb}, {0x10cc0, 0x10cf2}, {0x118c0, 0x118df},
{0x1d41a, 0x1d433}, {0x1d44e, 0x1d454}, {0x1d456, 0x1d467}, {0x1d482, 0x1d49b},
{0x1d4b6, 0x1d4b9}, {0x1d4bd, 0x1d4c3}, {0x1d4c5, 0x1d4cf}, {0x1d4ea, 0x1d503},
@@ -502,7 +498,7 @@ static const chr lowerCharTable[] = {
0xa77f, 0xa781, 0xa783, 0xa785, 0xa787, 0xa78c, 0xa78e, 0xa791, 0xa797,
0xa799, 0xa79b, 0xa79d, 0xa79f, 0xa7a1, 0xa7a3, 0xa7a5, 0xa7a7, 0xa7a9,
0xa7b5, 0xa7b7, 0xa7fa
-#if TCL_UTF_MAX > 4
+#if CHRBITS > 16
,0x1d4bb, 0x1d7cb
#endif
};
@@ -524,7 +520,7 @@ static const crange upperRangeTable[] = {
{0x210b, 0x210d}, {0x2110, 0x2112}, {0x2119, 0x211d}, {0x212a, 0x212d},
{0x2130, 0x2133}, {0x2c00, 0x2c2e}, {0x2c62, 0x2c64}, {0x2c6d, 0x2c70},
{0x2c7e, 0x2c80}, {0xa7aa, 0xa7ae}, {0xa7b0, 0xa7b4}, {0xff21, 0xff3a}
-#if TCL_UTF_MAX > 4
+#if CHRBITS > 16
,{0x10400, 0x10427}, {0x104b0, 0x104d3}, {0x10c80, 0x10cb2}, {0x118a0, 0x118bf},
{0x1d400, 0x1d419}, {0x1d434, 0x1d44d}, {0x1d468, 0x1d481}, {0x1d4a9, 0x1d4ac},
{0x1d4ae, 0x1d4b5}, {0x1d4d0, 0x1d4e9}, {0x1d507, 0x1d50a}, {0x1d50d, 0x1d514},
@@ -601,7 +597,7 @@ static const chr upperCharTable[] = {
0xa768, 0xa76a, 0xa76c, 0xa76e, 0xa779, 0xa77b, 0xa77d, 0xa77e, 0xa780,
0xa782, 0xa784, 0xa786, 0xa78b, 0xa78d, 0xa790, 0xa792, 0xa796, 0xa798,
0xa79a, 0xa79c, 0xa79e, 0xa7a0, 0xa7a2, 0xa7a4, 0xa7a6, 0xa7a8, 0xa7b6
-#if TCL_UTF_MAX > 4
+#if CHRBITS > 16
,0x1d49c, 0x1d49e, 0x1d49f, 0x1d4a2, 0x1d4a5, 0x1d4a6, 0x1d504, 0x1d505, 0x1d538,
0x1d539, 0x1d546, 0x1d7ca
#endif
@@ -684,17 +680,13 @@ static const crange graphRangeTable[] = {
{0xaadb, 0xaaf6}, {0xab01, 0xab06}, {0xab09, 0xab0e}, {0xab11, 0xab16},
{0xab20, 0xab26}, {0xab28, 0xab2e}, {0xab30, 0xab65}, {0xab70, 0xabed},
{0xabf0, 0xabf9}, {0xac00, 0xd7a3}, {0xd7b0, 0xd7c6}, {0xd7cb, 0xd7fb},
- {0xdc00, 0xdc3e}, {0xdc40, 0xdc7e}, {0xdc80, 0xdcbe}, {0xdcc0, 0xdcfe},
- {0xdd00, 0xdd3e}, {0xdd40, 0xdd7e}, {0xdd80, 0xddbe}, {0xddc0, 0xddfe},
- {0xde00, 0xde3e}, {0xde40, 0xde7e}, {0xde80, 0xdebe}, {0xdec0, 0xdefe},
- {0xdf00, 0xdf3e}, {0xdf40, 0xdf7e}, {0xdf80, 0xdfbe}, {0xdfc0, 0xdffe},
{0xf900, 0xfa6d}, {0xfa70, 0xfad9}, {0xfb00, 0xfb06}, {0xfb13, 0xfb17},
{0xfb1d, 0xfb36}, {0xfb38, 0xfb3c}, {0xfb46, 0xfbc1}, {0xfbd3, 0xfd3f},
{0xfd50, 0xfd8f}, {0xfd92, 0xfdc7}, {0xfdf0, 0xfdfd}, {0xfe00, 0xfe19},
{0xfe20, 0xfe52}, {0xfe54, 0xfe66}, {0xfe68, 0xfe6b}, {0xfe70, 0xfe74},
{0xfe76, 0xfefc}, {0xff01, 0xffbe}, {0xffc2, 0xffc7}, {0xffca, 0xffcf},
{0xffd2, 0xffd7}, {0xffda, 0xffdc}, {0xffe0, 0xffe6}, {0xffe8, 0xffee}
-#if TCL_UTF_MAX > 4
+#if CHRBITS > 16
,{0x10000, 0x1000b}, {0x1000d, 0x10026}, {0x10028, 0x1003a}, {0x1003f, 0x1004d},
{0x10050, 0x1005d}, {0x10080, 0x100fa}, {0x10100, 0x10102}, {0x10107, 0x10133},
{0x10137, 0x1018e}, {0x10190, 0x1019b}, {0x101d0, 0x101fd}, {0x10280, 0x1029c},
@@ -747,12 +739,13 @@ static const crange graphRangeTable[] = {
{0x1f030, 0x1f093}, {0x1f0a0, 0x1f0ae}, {0x1f0b1, 0x1f0bf}, {0x1f0c1, 0x1f0cf},
{0x1f0d1, 0x1f0f5}, {0x1f100, 0x1f10c}, {0x1f110, 0x1f12e}, {0x1f130, 0x1f16b},
{0x1f170, 0x1f1ac}, {0x1f1e6, 0x1f202}, {0x1f210, 0x1f23b}, {0x1f240, 0x1f248},
- {0x1f260, 0x1f265}, {0x1f300, 0x1f6d4}, {0x1f6e0, 0x1f6ec}, {0x1f6f0, 0x1f6f8},
+ {0x1f260, 0x1f265}, {0x1f300, 0x1f6d4}, {0x1f6e0, 0x1f6ec}, {0x1f6f0, 0x1f6f9},
{0x1f700, 0x1f773}, {0x1f780, 0x1f7d4}, {0x1f800, 0x1f80b}, {0x1f810, 0x1f847},
{0x1f850, 0x1f859}, {0x1f860, 0x1f887}, {0x1f890, 0x1f8ad}, {0x1f900, 0x1f90b},
- {0x1f910, 0x1f93e}, {0x1f940, 0x1f94c}, {0x1f950, 0x1f96b}, {0x1f980, 0x1f997},
- {0x1f9d0, 0x1f9e6}, {0x20000, 0x2a6d6}, {0x2a700, 0x2b734}, {0x2b740, 0x2b81d},
- {0x2b820, 0x2cea1}, {0x2ceb0, 0x2ebe0}, {0x2f800, 0x2fa1d}, {0xe0100, 0xe01ef}
+ {0x1f910, 0x1f93e}, {0x1f940, 0x1f970}, {0x1f973, 0x1f976}, {0x1f97c, 0x1f9a2},
+ {0x1f9b0, 0x1f9b9}, {0x1f9c0, 0x1f9c2}, {0x1f9d0, 0x1f9ff}, {0x1fa60, 0x1fa6d},
+ {0x20000, 0x2a6d6}, {0x2a700, 0x2b734}, {0x2b740, 0x2b81d}, {0x2b820, 0x2cea1},
+ {0x2ceb0, 0x2ebe0}, {0x2f800, 0x2fa1d}, {0xe0100, 0xe01ef}
#endif
};
@@ -770,7 +763,7 @@ static const chr graphCharTable[] = {
0xec6, 0x10c7, 0x10cd, 0x1258, 0x12c0, 0x1772, 0x1773, 0x1940, 0x1f59,
0x1f5b, 0x1f5d, 0x2070, 0x2071, 0x2d27, 0x2d2d, 0x2d6f, 0x2d70, 0xfb3e,
0xfb40, 0xfb41, 0xfb43, 0xfb44, 0xfffc, 0xfffd
-#if TCL_UTF_MAX > 4
+#if CHRBITS > 16
,0x1003c, 0x1003d, 0x101a0, 0x1056f, 0x10808, 0x10837, 0x10838, 0x1083c, 0x108f4,
0x108f5, 0x1093f, 0x10a05, 0x10a06, 0x11288, 0x1130f, 0x11310, 0x11332, 0x11333,
0x11347, 0x11348, 0x11350, 0x11357, 0x1145b, 0x1145d, 0x118ff, 0x11d08, 0x11d09,
@@ -778,7 +771,7 @@ static const chr graphCharTable[] = {
0x1d4a2, 0x1d4a5, 0x1d4a6, 0x1d4bb, 0x1d546, 0x1e023, 0x1e024, 0x1e95e, 0x1e95f,
0x1ee21, 0x1ee22, 0x1ee24, 0x1ee27, 0x1ee39, 0x1ee3b, 0x1ee42, 0x1ee47, 0x1ee49,
0x1ee4b, 0x1ee51, 0x1ee52, 0x1ee54, 0x1ee57, 0x1ee59, 0x1ee5b, 0x1ee5d, 0x1ee5f,
- 0x1ee61, 0x1ee62, 0x1ee64, 0x1ee7e, 0x1eef0, 0x1eef1, 0x1f250, 0x1f251, 0x1f9c0
+ 0x1ee61, 0x1ee62, 0x1ee64, 0x1ee7e, 0x1eef0, 0x1eef1, 0x1f250, 0x1f251, 0x1f97a
#endif
};
diff --git a/generic/tclUniData.c b/generic/tclUniData.c
index 9f05230..834fcc4 100644
--- a/generic/tclUniData.c
+++ b/generic/tclUniData.c
@@ -367,11 +367,11 @@ static const unsigned short pageMap[] = {
4928, 4928, 4928, 4928, 4928, 4928, 4928, 4928, 4928, 4928, 4928, 4928,
4928, 4928, 4928, 4928, 4928, 4928, 16864, 16896, 4928, 4928, 4928,
8032, 4928, 4928, 16864, 1824, 16480, 4928, 16928, 4928, 16960, 16992,
- 1824, 1824, 16480, 8416, 17024, 17056, 17088, 1824, 17120, 6784, 1824,
+ 1824, 1824, 16480, 8416, 4928, 17024, 4928, 17056, 16704, 4928, 1824,
+ 1824, 1824, 16992, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824,
1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824,
1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824,
1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824,
- 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1344,
1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344,
1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344,
1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344,
@@ -483,7 +483,7 @@ static const unsigned short pageMap[] = {
1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344,
1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344,
1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344,
- 1344, 7840, 1824, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344,
+ 1344, 1344, 7840, 1824, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344,
1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344,
1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344,
1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344,
@@ -494,7 +494,7 @@ static const unsigned short pageMap[] = {
1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344,
1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344,
1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344,
- 17152, 1344, 1344, 1344, 1344, 1344, 1344, 11360, 1344, 1344, 1344,
+ 1344, 17088, 1344, 1344, 1344, 1344, 1344, 1344, 11360, 1344, 1344,
1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344,
1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344,
1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344,
@@ -509,7 +509,7 @@ static const unsigned short pageMap[] = {
1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344,
1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344,
1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344,
- 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 17184, 1344,
+ 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 17120,
1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344,
1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344,
1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344,
@@ -529,7 +529,7 @@ static const unsigned short pageMap[] = {
1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344,
1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344,
1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344,
- 1344, 1344, 1344, 1344, 17216, 1824, 1824, 1824, 1824, 1824, 1824,
+ 1344, 1344, 1344, 1344, 1344, 17152, 1824, 1824, 1824, 1824, 1824,
1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824,
1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824,
1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824,
@@ -537,8 +537,9 @@ static const unsigned short pageMap[] = {
1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824,
1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824,
1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1824,
- 1824, 1824, 1824, 1824, 1824, 1824, 1344, 1344, 1344, 1344, 1344, 1344,
- 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 11360
+ 1824, 1824, 1824, 1824, 1824, 1824, 1824, 1344, 1344, 1344, 1344, 1344,
+ 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 11360
+
#endif /* TCL_UTF_MAX > 3 */
};
@@ -1502,24 +1503,20 @@ static const unsigned char groupMap[] = {
14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
14, 14, 14, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14, 14, 14, 14, 14,
14, 14, 14, 14, 14, 14, 14, 14, 0, 0, 0, 14, 14, 14, 14, 14, 14, 14,
- 14, 14, 0, 0, 0, 0, 0, 0, 0, 14, 14, 14, 14, 14, 14, 14, 14, 0, 0,
+ 14, 14, 14, 0, 0, 0, 0, 0, 0, 14, 14, 14, 14, 14, 14, 14, 14, 0, 0,
0, 0, 0, 0, 0, 0, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 0, 0, 0,
0, 0, 0, 14, 14, 14, 14, 14, 14, 14, 14, 0, 0, 0, 0, 0, 0, 0, 0, 14,
14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14, 14, 14, 14, 14, 14, 14, 14, 14,
- 14, 14, 14, 14, 0, 0, 0, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
- 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14, 14,
- 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
- 14, 14, 14, 14, 14, 0, 0, 0, 0, 0, 0, 0, 0, 14, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
- 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
- 15, 15, 15, 15, 15, 15, 15, 15, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 15, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, 15, 15, 15, 15,
- 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0
+ 14, 14, 14, 14, 14, 14, 14, 14, 0, 0, 14, 14, 14, 14, 0, 0, 0, 14,
+ 0, 14, 14, 14, 14, 14, 14, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 0, 0, 0, 0, 0, 0, 15, 15,
+ 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+ 15, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, 15, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+ 15, 15, 15, 15, 15, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
#endif /* TCL_UTF_MAX > 3 */
};
@@ -1535,6 +1532,7 @@ static const unsigned char groupMap[] = {
* 100 = subtract delta for title/upper
* 101 = sub delta for upper, sub 1 for title
* 110 = sub delta for upper, add delta for lower
+ * 111 = subtract delta for upper
*
* Bits 8-31 Case delta: delta for case conversions. This should be the
* highest field so we can easily sign extend.
diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index a405367..656a6f0 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -158,15 +158,22 @@ Tcl_UniCharToUtf(
if ((ch & 0xF800) == 0xD800) {
if (ch & 0x0400) {
/* Low surrogate */
- buf[3] = (char) ((ch | 0x80) & 0xBF);
- buf[2] |= (char) (((ch >> 6) | 0x80) & 0x8F);
- return 4;
+ if (((buf[0] & 0xF8) == 0xF0) && ((buf[1] & 0xC0) == 0x80)
+ && ((buf[2] & 0xCF) == 0)) {
+ /* Previous Tcl_UniChar was a High surrogate, so combine */
+ buf[3] = (char) ((ch & 0x3F) | 0x80);
+ buf[2] |= (char) (((ch >> 6) & 0x0F) | 0x80);
+ return 4;
+ }
+ /* Previous Tcl_UniChar was not a High surrogate, so just output */
} else {
/* High surrogate */
ch += 0x40;
- buf[2] = (char) (((ch << 4) | 0x80) & 0xB0);
- buf[1] = (char) (((ch >> 2) | 0x80) & 0xBF);
- buf[0] = (char) (((ch >> 8) | 0xF0) & 0xF7);
+ /* Fill buffer with specific 3-byte (invalid) byte combination,
+ so following Low surrogate can recognize it and combine */
+ buf[2] = (char) ((ch << 4) & 0x30);
+ buf[1] = (char) (((ch >> 2) & 0x3F) | 0x80);
+ buf[0] = (char) (((ch >> 8) & 0x07) | 0xF0);
return 0;
}
}
@@ -259,6 +266,15 @@ Tcl_UniCharToUtfDString(
* Tcl_UtfCharComplete() before calling this routine to ensure that
* enough bytes remain in the string.
*
+ * If TCL_UTF_MAX == 4, special handling of Surrogate pairs is done:
+ * For any UTF-8 string containing a character outside of the BMP, the
+ * first call to this function will fill *chPtr with the high surrogate
+ * and generate a return value of 0. Calling Tcl_UtfToUniChar again
+ * will produce the low surrogate and a return value of 4. Because *chPtr
+ * is used to remember whether the high surrogate is already produced, it
+ * is recommended to initialize the variable it points to as 0 before
+ * the first call to Tcl_UtfToUniChar is done.
+ *
* Results:
* *chPtr is filled with the Tcl_UniChar, and the return value is the
* number of bytes from the UTF-8 string that were consumed.
@@ -315,7 +331,7 @@ Tcl_UtfToUniChar(
*chPtr = (Tcl_UniChar) (((byte & 0x0F) << 12)
| ((src[1] & 0x3F) << 6) | (src[2] & 0x3F));
- if (*chPtr > 0x7ff) {
+ if (*chPtr > 0x7FF) {
return 3;
}
}
@@ -331,12 +347,30 @@ Tcl_UtfToUniChar(
/*
* Four-byte-character lead byte followed by three trail bytes.
*/
-
+#if TCL_UTF_MAX == 4
+ Tcl_UniChar surrogate;
+
+ byte = (((byte & 0x07) << 18) | ((src[1] & 0x3F) << 12)
+ | ((src[2] & 0x3F) << 6) | (src[3] & 0x3F)) - 0x10000;
+ surrogate = (Tcl_UniChar) (0xD800 + (byte >> 10));
+ if (byte & 0x100000) {
+ /* out of range, < 0x10000 or > 0x10ffff */
+ } else if (*chPtr != surrogate) {
+ /* produce high surrogate, but don't advance source pointer */
+ *chPtr = surrogate;
+ return 0;
+ } else {
+ /* produce low surrogate, and advance source pointer */
+ *chPtr = (Tcl_UniChar) (0xDC00 | (byte & 0x3FF));
+ return 4;
+ }
+#else
*chPtr = (Tcl_UniChar) (((byte & 0x07) << 18) | ((src[1] & 0x3F) << 12)
| ((src[2] & 0x3F) << 6) | (src[3] & 0x3F));
if ((unsigned)(*chPtr - 0x10000) <= 0xFFFFF) {
return 4;
}
+#endif
}
/*
@@ -433,10 +467,7 @@ Tcl_UtfCharComplete(
* a complete UTF-8 character. */
int length) /* Length of above string in bytes. */
{
- int ch;
-
- ch = *((unsigned char *) src);
- return length >= totalBytes[ch];
+ return length >= totalBytes[(unsigned char)*src];
}
/*
@@ -928,7 +959,11 @@ Tcl_UtfToTitle(
}
while (*src) {
bytes = TclUtfToUniChar(src, &ch);
- lowChar = Tcl_UniCharToLower(ch);
+ lowChar = ch;
+ /* Special exception for Georgian Asomtavruli chars, no titlecase. */
+ if ((unsigned)(lowChar - 0x1C90) >= 0x30) {
+ lowChar = Tcl_UniCharToLower(lowChar);
+ }
if (bytes < UtfCount(lowChar)) {
memcpy(dst, src, (size_t) bytes);
@@ -1030,6 +1065,16 @@ Tcl_UtfNcmp(
cs += TclUtfToUniChar(cs, &ch1);
ct += TclUtfToUniChar(ct, &ch2);
if (ch1 != ch2) {
+#if TCL_UTF_MAX == 4
+ /* Surrogates always report higher than non-surrogates */
+ if (((ch1 & 0xFC00) == 0xD800)) {
+ if ((ch2 & 0xFC00) != 0xD800) {
+ return ch1;
+ }
+ } else if ((ch2 & 0xFC00) == 0xD800) {
+ return -ch2;
+ }
+#endif
return (ch1 - ch2);
}
}
@@ -1070,6 +1115,16 @@ Tcl_UtfNcasecmp(
cs += TclUtfToUniChar(cs, &ch1);
ct += TclUtfToUniChar(ct, &ch2);
if (ch1 != ch2) {
+#if TCL_UTF_MAX == 4
+ /* Surrogates always report higher than non-surrogates */
+ if (((ch1 & 0xFC00) == 0xD800)) {
+ if ((ch2 & 0xFC00) != 0xD800) {
+ return ch1;
+ }
+ } else if ((ch2 & 0xFC00) == 0xD800) {
+ return -ch2;
+ }
+#endif
ch1 = Tcl_UniCharToLower(ch1);
ch2 = Tcl_UniCharToLower(ch2);
if (ch1 != ch2) {
@@ -1109,6 +1164,16 @@ TclUtfCasecmp(
cs += TclUtfToUniChar(cs, &ch1);
ct += TclUtfToUniChar(ct, &ch2);
if (ch1 != ch2) {
+#if TCL_UTF_MAX == 4
+ /* Surrogates always report higher than non-surrogates */
+ if (((ch1 & 0xFC00) == 0xD800)) {
+ if ((ch2 & 0xFC00) != 0xD800) {
+ return ch1;
+ }
+ } else if ((ch2 & 0xFC00) == 0xD800) {
+ return -ch2;
+ }
+#endif
ch1 = Tcl_UniCharToLower(ch1);
ch2 = Tcl_UniCharToLower(ch2);
if (ch1 != ch2) {
@@ -1169,8 +1234,9 @@ Tcl_UniCharToLower(
int ch) /* Unicode character to convert. */
{
int info = GetUniCharInfo(ch);
+ int mode = GetCaseType(info);
- if (GetCaseType(info) & 0x02) {
+ if ((mode & 0x02) && (mode != 0x7)) {
ch += GetDelta(info);
}
return (Tcl_UniChar) ch;
@@ -1204,7 +1270,9 @@ Tcl_UniCharToTitle(
* Subtract or add one depending on the original case.
*/
- ch += ((mode & 0x4) ? -1 : 1);
+ if (mode != 0x7) {
+ ch += ((mode & 0x4) ? -1 : 1);
+ }
} else if (mode == 0x4) {
ch -= GetDelta(info);
}
diff --git a/tools/uniClass.tcl b/tools/uniClass.tcl
index 9b4819d..86ec931 100644
--- a/tools/uniClass.tcl
+++ b/tools/uniClass.tcl
@@ -20,7 +20,7 @@ proc emitRange {first last} {
set extranges 1
set numranges 0
set ranges [string trimright $ranges " \n\r\t,"]
- append ranges "\n#if TCL_UTF_MAX > 4\n ,"
+ append ranges "\n#if CHRBITS > 16\n ,"
}
append ranges [format "{0x%x, 0x%x}, " \
$first $last]
@@ -33,7 +33,7 @@ proc emitRange {first last} {
set extchars 1
set numchars 0
set chars [string trimright $chars " \n\r\t,"]
- append chars "\n#if TCL_UTF_MAX > 4\n ,"
+ append chars "\n#if CHRBITS > 16\n ,"
}
append chars [format "0x%x, " $first]
incr numchars
@@ -66,7 +66,7 @@ proc genTable {type} {
for {set i 0} {$i <= 0x10ffff} {incr i} {
if {$i == 0xd800} {
# Skip surrogates
- set i 0xdc00
+ set i 0xe000
}
if {[string is $type [format %c $i]]} {
if {$i == ($last + 1)} {
diff --git a/tools/uniParse.tcl b/tools/uniParse.tcl
index e33b3c7..c712e62 100644
--- a/tools/uniParse.tcl
+++ b/tools/uniParse.tcl
@@ -272,6 +272,7 @@ static const unsigned char groupMap\[\] = {"
* 100 = subtract delta for title/upper
* 101 = sub delta for upper, sub 1 for title
* 110 = sub delta for upper, add delta for lower
+ * 111 = subtract delta for upper
*
* Bits 8-31 Case delta: delta for case conversions. This should be the
* highest field so we can easily sign extend.
@@ -309,10 +310,14 @@ static const int groups\[\] = {"
}
}
} elseif {$toupper} {
- # subtract delta for upper, add delta for lower
- set case 6
set delta $toupper
- if {$tolower != $toupper} {
+ if {$tolower == $toupper} {
+ # subtract delta for upper, add delta for lower
+ set case 6
+ } elseif {!$tolower} {
+ # subtract delta for upper
+ set case 7
+ } else {
error "New case conversion type needed: $toupper $tolower $totitle"
}
} elseif {$tolower} {
@@ -396,7 +401,11 @@ enum {
* Unicode character tables.
*/
-#define GetUniCharInfo(ch) (groups\[groupMap\[pageMap\[((ch) & 0xffff) >> OFFSET_BITS\] | ((ch) & ((1 << OFFSET_BITS)-1))\]\])
+#if TCL_UTF_MAX > 3
+# define GetUniCharInfo(ch) (groups\[groupMap\[pageMap\[((ch) & 0x1fffff) >> OFFSET_BITS\] | ((ch) & ((1 << OFFSET_BITS)-1))\]\])
+#else
+# define GetUniCharInfo(ch) (groups\[groupMap\[pageMap\[((ch) & 0xffff) >> OFFSET_BITS\] | ((ch) & ((1 << OFFSET_BITS)-1))\]\])
+#endif
"
close $f