From a7072a53147e689d2c265a66e87a2f6f74c86014 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Sat, 14 Jan 2012 09:30:32 +0000 Subject: rfe-3473670: Various Unicode-related speedups/robustness --- generic/tclUniData.c | 351 ++++++++++++++++++++++++++++----------------------- generic/tclUtf.c | 17 +-- tools/uniParse.tcl | 38 +++--- 3 files changed, 219 insertions(+), 187 deletions(-) diff --git a/generic/tclUniData.c b/generic/tclUniData.c index 5b735a4..bad556f 100644 --- a/generic/tclUniData.c +++ b/generic/tclUniData.c @@ -24,134 +24,177 @@ */ static CONST unsigned short pageMap[] = { - 0, 1, 2, 3, 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 7, 15, 16, 17, - 18, 19, 20, 21, 22, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 7, 32, - 7, 33, 7, 7, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 42, 42, 45, - 46, 47, 48, 49, 42, 42, 50, 51, 52, 53, 54, 55, 56, 56, 56, 56, 56, - 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 61, - 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 84, 88, - 89, 90, 91, 92, 93, 94, 95, 96, 97, 56, 98, 99, 100, 56, 101, 102, - 103, 104, 105, 106, 107, 56, 42, 108, 109, 110, 111, 112, 113, 114, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 115, 42, 116, 117, 118, 42, - 119, 42, 120, 121, 122, 42, 42, 123, 124, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 125, 126, 42, 42, 127, - 128, 129, 130, 131, 42, 132, 133, 134, 135, 42, 136, 137, 42, 138, - 42, 139, 140, 141, 142, 143, 42, 144, 145, 146, 147, 42, 148, 149, - 150, 151, 56, 56, 152, 153, 154, 155, 156, 157, 42, 158, 42, 159, 160, - 161, 56, 56, 162, 163, 164, 165, 166, 167, 168, 166, 22, 169, 7, 7, - 7, 7, 170, 7, 7, 7, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, - 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, - 195, 195, 195, 195, 195, 195, 195, 195, 196, 197, 146, 198, 199, 200, - 201, 202, 146, 203, 204, 205, 206, 207, 208, 209, 146, 146, 146, 146, - 146, 210, 211, 212, 146, 146, 146, 213, 146, 146, 146, 146, 214, 146, - 146, 215, 216, 146, 217, 218, 146, 146, 146, 146, 146, 146, 146, 146, - 195, 195, 195, 195, 219, 195, 220, 221, 195, 195, 195, 195, 195, 195, - 195, 195, 146, 222, 223, 56, 56, 56, 56, 56, 224, 225, 226, 227, 7, - 7, 7, 228, 229, 230, 42, 231, 232, 233, 233, 22, 234, 235, 56, 56, - 236, 146, 146, 237, 146, 146, 146, 146, 146, 146, 238, 239, 240, 241, - 95, 42, 242, 124, 42, 243, 244, 245, 42, 42, 246, 247, 146, 248, 249, - 250, 251, 146, 250, 251, 146, 249, 146, 146, 146, 146, 146, 146, 146, - 146, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 139, 146, 146, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 252, 56, 253, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 254, 146, 255, 161, 42, 42, 42, 42, 42, 42, 42, 42, 256, 257, 7, - 258, 259, 42, 42, 260, 261, 262, 7, 263, 264, 265, 56, 266, 267, 268, - 42, 269, 270, 271, 272, 273, 51, 274, 275, 140, 57, 276, 277, 56, 42, - 278, 279, 280, 42, 281, 282, 56, 283, 284, 56, 56, 56, 56, 42, 285, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 286, 287, 288, 289, 289, 289, 289, - 289, 289, 289, 289, 289, 289, 289, 289, 289, 289, 289, 289, 289, 289, - 289, 289, 289, 289, 289, 289, 289, 289, 289, 289, 289, 289, 289, 289, - 289, 289, 289, 289, 289, 289, 289, 289, 289, 289, 289, 289, 289, 289, - 289, 289, 289, 289, 289, 289, 289, 289, 289, 289, 289, 289, 289, 289, - 289, 289, 289, 289, 290, 290, 290, 290, 290, 290, 290, 290, 290, 290, - 290, 290, 290, 290, 290, 290, 290, 290, 290, 290, 290, 290, 290, 290, - 290, 290, 290, 290, 290, 290, 290, 290, 290, 290, 290, 290, 290, 290, - 290, 290, 290, 290, 290, 290, 290, 290, 290, 290, 290, 290, 290, 290, - 290, 290, 290, 290, 290, 290, 290, 290, 290, 290, 290, 290, 290, 290, - 290, 290, 290, 290, 290, 290, 290, 290, 290, 290, 290, 290, 290, 290, - 290, 290, 290, 290, 290, 290, 290, 290, 290, 290, 290, 290, 290, 290, - 290, 290, 290, 290, 290, 290, 290, 290, 290, 290, 290, 290, 290, 290, - 290, 290, 290, 290, 290, 290, 290, 290, 290, 290, 290, 290, 290, 290, - 290, 290, 290, 290, 290, 290, 290, 290, 290, 290, 290, 290, 290, 290, - 290, 290, 290, 290, 290, 290, 290, 290, 290, 290, 290, 290, 290, 290, - 290, 290, 290, 290, 290, 290, 290, 290, 290, 290, 290, 290, 290, 290, - 290, 290, 290, 290, 290, 290, 290, 290, 290, 290, 290, 290, 290, 290, - 290, 290, 290, 290, 290, 290, 290, 290, 290, 290, 290, 290, 290, 290, - 290, 290, 290, 290, 290, 290, 290, 290, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 291, 42, 291, 42, 42, 292, 56, 293, 294, 295, 42, 42, 296, - 297, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 298, 299, 42, 300, 42, - 301, 302, 303, 304, 305, 306, 42, 42, 42, 307, 308, 2, 309, 310, 311, - 312, 313, 314 + 0, 32, 64, 96, 0, 128, 160, 192, 224, 256, 288, 320, 352, 384, 416, + 448, 224, 480, 512, 544, 576, 608, 640, 672, 704, 704, 736, 768, 800, + 832, 864, 896, 928, 960, 992, 224, 1024, 224, 1056, 224, 224, 1088, + 1120, 1152, 1184, 1216, 1248, 1280, 1312, 1344, 1376, 1408, 1344, 1344, + 1440, 1472, 1504, 1536, 1568, 1344, 1344, 1600, 1632, 1664, 1696, 1728, + 1760, 1792, 1792, 1792, 1792, 1792, 1824, 1856, 1888, 1920, 1952, 1984, + 2016, 2048, 2080, 2112, 2144, 2176, 2208, 2240, 2272, 2304, 1952, 2336, + 2368, 2400, 2432, 2464, 2496, 2528, 2560, 2592, 2624, 2656, 2688, 2720, + 2752, 2784, 2688, 2816, 2848, 2880, 2912, 2944, 2976, 3008, 3040, 3072, + 3104, 1792, 3136, 3168, 3200, 1792, 3232, 3264, 3296, 3328, 3360, 3392, + 3424, 1792, 1344, 3456, 3488, 3520, 3552, 3584, 3616, 3648, 1344, 1344, + 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 3680, 1344, 3712, 3744, + 3776, 1344, 3808, 1344, 3840, 3872, 3904, 1344, 1344, 3936, 3968, 1344, + 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 1344, 1344, 1344, 4000, 4032, 1344, 1344, 4064, 4096, 4128, + 4160, 4192, 1344, 4224, 4256, 4288, 4320, 1344, 4352, 4384, 1344, 4416, + 1344, 4448, 4480, 4512, 4544, 4576, 1344, 4608, 4640, 4672, 4704, 1344, + 4736, 4768, 4800, 4832, 1792, 1792, 4864, 4896, 4928, 4960, 4992, 5024, + 1344, 5056, 1344, 5088, 5120, 5152, 1792, 1792, 5184, 5216, 5248, 5280, + 5312, 5344, 5376, 5312, 704, 5408, 224, 224, 224, 224, 5440, 224, 224, + 224, 5472, 5504, 5536, 5568, 5600, 5632, 5664, 5696, 5728, 5760, 5792, + 5824, 5856, 5888, 5920, 5952, 5984, 6016, 6048, 6080, 6112, 6144, 6176, + 6208, 6240, 6240, 6240, 6240, 6240, 6240, 6240, 6240, 6272, 6304, 4672, + 6336, 6368, 6400, 6432, 6464, 4672, 6496, 6528, 6560, 6592, 6624, 6656, + 6688, 4672, 4672, 4672, 4672, 4672, 6720, 6752, 6784, 4672, 4672, 4672, + 6816, 4672, 4672, 4672, 4672, 6848, 4672, 4672, 6880, 6912, 4672, 6944, + 6976, 4672, 4672, 4672, 4672, 4672, 4672, 4672, 4672, 6240, 6240, 6240, + 6240, 7008, 6240, 7040, 7072, 6240, 6240, 6240, 6240, 6240, 6240, 6240, + 6240, 4672, 7104, 7136, 1792, 1792, 1792, 1792, 1792, 7168, 7200, 7232, + 7264, 224, 224, 224, 7296, 7328, 7360, 1344, 7392, 7424, 7456, 7456, + 704, 7488, 7520, 1792, 1792, 7552, 4672, 4672, 7584, 4672, 4672, 4672, + 4672, 4672, 4672, 7616, 7648, 7680, 7712, 3040, 1344, 7744, 3968, 1344, + 7776, 7808, 7840, 1344, 1344, 7872, 7904, 4672, 7936, 7968, 8000, 8032, + 4672, 8000, 8032, 4672, 7968, 4672, 4672, 4672, 4672, 4672, 4672, 4672, + 4672, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 4448, 4672, 4672, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 8064, + 1792, 8096, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 8128, 4672, 8160, 5152, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 8192, 8224, 224, 8256, 8288, 1344, 1344, 8320, 8352, 8384, 224, + 8416, 8448, 8480, 1792, 8512, 8544, 8576, 1344, 8608, 8640, 8672, 8704, + 8736, 1632, 8768, 8800, 4480, 1824, 8832, 8864, 1792, 1344, 8896, 8928, + 8960, 1344, 8992, 9024, 1792, 9056, 9088, 1792, 1792, 1792, 1792, 1344, + 9120, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 9152, 9184, 9216, 9248, 9248, 9248, 9248, 9248, 9248, 9248, + 9248, 9248, 9248, 9248, 9248, 9248, 9248, 9248, 9248, 9248, 9248, 9248, + 9248, 9248, 9248, 9248, 9248, 9248, 9248, 9248, 9248, 9248, 9248, 9248, + 9248, 9248, 9248, 9248, 9248, 9248, 9248, 9248, 9248, 9248, 9248, 9248, + 9248, 9248, 9248, 9248, 9248, 9248, 9248, 9248, 9248, 9248, 9248, 9248, + 9248, 9248, 9248, 9248, 9248, 9248, 9248, 9248, 9248, 9280, 9280, 9280, + 9280, 9280, 9280, 9280, 9280, 9280, 9280, 9280, 9280, 9280, 9280, 9280, + 9280, 9280, 9280, 9280, 9280, 9280, 9280, 9280, 9280, 9280, 9280, 9280, + 9280, 9280, 9280, 9280, 9280, 9280, 9280, 9280, 9280, 9280, 9280, 9280, + 9280, 9280, 9280, 9280, 9280, 9280, 9280, 9280, 9280, 9280, 9280, 9280, + 9280, 9280, 9280, 9280, 9280, 9280, 9280, 9280, 9280, 9280, 9280, 9280, + 9280, 9280, 9280, 9280, 9280, 9280, 9280, 9280, 9280, 9280, 9280, 9280, + 9280, 9280, 9280, 9280, 9280, 9280, 9280, 9280, 9280, 9280, 9280, 9280, + 9280, 9280, 9280, 9280, 9280, 9280, 9280, 9280, 9280, 9280, 9280, 9280, + 9280, 9280, 9280, 9280, 9280, 9280, 9280, 9280, 9280, 9280, 9280, 9280, + 9280, 9280, 9280, 9280, 9280, 9280, 9280, 9280, 9280, 9280, 9280, 9280, + 9280, 9280, 9280, 9280, 9280, 9280, 9280, 9280, 9280, 9280, 9280, 9280, + 9280, 9280, 9280, 9280, 9280, 9280, 9280, 9280, 9280, 9280, 9280, 9280, + 9280, 9280, 9280, 9280, 9280, 9280, 9280, 9280, 9280, 9280, 9280, 9280, + 9280, 9280, 9280, 9280, 9280, 9280, 9280, 9280, 9280, 9280, 9280, 9280, + 9280, 9280, 9280, 9280, 9280, 9280, 9280, 9280, 9280, 9280, 9280, 9280, + 9280, 9280, 9280, 9280, 9280, 9280, 9280, 9280, 9280, 9280, 9280, 9280, + 9280, 9280, 9280, 9280, 9280, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 9312, 1344, 9312, 1344, 1344, 9344, 1792, 9376, 9408, 9440, + 1344, 1344, 9472, 9504, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, + 1344, 1344, 9536, 9568, 1344, 9600, 1344, 9632, 9664, 9696, 9728, 9760, + 9792, 1344, 1344, 1344, 9824, 9856, 64, 9888, 9920, 9952, 9984, 10016, + 10048 }; /* @@ -739,35 +782,29 @@ static CONST unsigned char groupMap[] = { * 101 = sub delta for upper, sub 1 for title * 110 = sub delta for upper, add delta for lower * - * Bits 8-14 Reserved for future use. - * - * Bits 15-31 Case delta: delta for case conversions. This should be the + * Bits 8-31 Case delta: delta for case conversions. This should be the * highest field so we can easily sign extend. */ static CONST int groups[] = { - 0, 15, 12, 25, 27, 21, 22, 26, 20, 9, 1048641, 28, 19, 1048706, - 29, 2, 23, 16, 11, -24346494, 24, -3964798, 32833, 32898, -6520767, - 7602306, -3964863, 9830530, -6389630, 6881345, 6750273, 6717505, - 2588737, 6619201, 6651969, 6783041, -3178366, 6914113, 6848577, - -5341054, 6979649, -4259710, 7012417, 7143489, 7110721, 7176257, - 5, -1834878, 65633, 32963, 65698, 2588802, -3178431, -1834943, - -4259775, 353730625, -5341119, 353632321, -354385790, -6389695, - 2261057, 2326593, -353337214, -353238910, -353304446, 6881410, - 6750338, 6717570, 6619266, 6652034, 6783106, -1385430910, 6848642, - 6914178, -352026494, -352223102, 6979714, 7012482, -351502206, - 7143554, 2261122, 7110786, 2326658, 7176322, 4, 6, -2752378, 1245249, - 1212481, 2097217, 2064449, 1245314, 1212546, 1015938, 2097282, - 2064514, 262209, 2031746, 1867906, 1, 1540226, 1769602, 262274, - 2818178, 2621570, -229246, -1966015, 3145858, -229311, 2621505, - 7, 491585, 491650, 1572929, 1572994, 8, 238026817, 10, -1157758846, - -124977022, 1933442, -249528255, -262014, -262079, -2424702, -2817918, - -3276670, -4194174, -3669886, -4128638, -262077, -294782, -2424767, - -294845, 236093570, -2817983, -3276735, -3669951, -4194239, -4128703, - 13, 14, -246316991, -274694079, -270729151, 917569, 917634, 524362, - 524426, 852061, 852125, -352026559, -124977087, -351502271, 353730690, - 353632386, -353238975, -352223167, -353337279, -353304511, -354385855, - 238026882, -1157758911, -1385430975, 18, 17 + 0, 15, 12, 25, 27, 21, 22, 26, 20, 9, 8257, 28, 19, 8322, 29, + 2, 23, 16, 11, -190078, 24, -30846, 321, 386, -50879, 59522, -30911, + 76930, -49790, 53825, 52801, 52545, 20289, 51777, 52033, 53057, + -24702, 54081, 53569, -41598, 54593, -33150, 54849, 55873, 55617, + 56129, 5, -14206, 609, 451, 674, 20354, -24767, -14271, -33215, + 2763585, -41663, 2762817, -2768510, -49855, 17729, 18241, -2760318, + -2759550, -2760062, 53890, 52866, 52610, 51842, 52098, 53122, + -10823550, 53634, 54146, -2750078, -2751614, 54658, 54914, -2745982, + 55938, 17794, 55682, 18306, 56194, 4, 6, -21370, 9793, 9537, 16449, + 16193, 9858, 9602, 8066, 16514, 16258, 2113, 16002, 14722, 1, + 12162, 13954, 2178, 22146, 20610, -1662, -15295, 24706, -1727, + 20545, 7, 3905, 3970, 12353, 12418, 8, 1859649, 10, -9044862, + -976254, 15234, -1949375, -1918, -1983, -18814, -21886, -25470, + -32638, -28542, -32126, -1981, -2174, -18879, -2237, 1844610, + -21951, -25535, -28607, -32703, -32191, 13, 14, -1924287, -2145983, + -2115007, 7233, 7298, 4170, 4234, 6749, 6813, -2750143, -976319, + -2746047, 2763650, 2762882, -2759615, -2751679, -2760383, -2760127, + -2768575, 1859714, -9044927, -10823615, 18, 17 }; /* @@ -775,9 +812,6 @@ static CONST int groups[] = { * Unicode character. */ -#define UNICODE_CATEGORY_MASK 0x1f -#define UNICODE_OUT_OF_RANGE 0x10000u - enum { UNASSIGNED, UPPERCASE_LETTER, @@ -819,12 +853,11 @@ enum { #define GetCaseType(info) (((info) & 0xe0) >> 5) #define GetCategory(ch) (GetUniCharInfo(ch) & 0x1f) -#define GetDelta(info) (((info) > 0) ? ((info) >> 15) : (~(~((info)) >> 15))) +#define GetDelta(info) ((info) >> 8) /* * This macro extracts the information about a character from the * Unicode character tables. */ -#define GetUniCharInfo(ch) (groups[groupMap[(pageMap[(((int)(ch)) & 0xffff) >> OFFSET_BITS] << OFFSET_BITS) | ((ch) & ((1 << OFFSET_BITS)-1))]]) - +#define GetUniCharInfo(ch) (groups[groupMap[pageMap[((ch) & 0xffff) >> OFFSET_BITS] | ((ch) & ((1 << OFFSET_BITS)-1))]]) diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 6b5e2e8..b6da7c3 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -1122,10 +1122,9 @@ Tcl_UniCharToUpper(ch) int info = GetUniCharInfo(ch); if (GetCaseType(info) & 0x04) { - return (Tcl_UniChar) (ch - GetDelta(info)); - } else { - return ch; + ch -= GetDelta(info); } + return (Tcl_UniChar) ch; } /* @@ -1151,10 +1150,9 @@ Tcl_UniCharToLower(ch) int info = GetUniCharInfo(ch); if (GetCaseType(info) & 0x02) { - return (Tcl_UniChar) (ch + GetDelta(info)); - } else { - return ch; + ch += GetDelta(info); } + return (Tcl_UniChar) ch; } /* @@ -1185,12 +1183,11 @@ Tcl_UniCharToTitle(ch) * Subtract or add one depending on the original case. */ - return (Tcl_UniChar) (ch + ((mode & 0x4) ? -1 : 1)); + ch += ((mode & 0x4) ? -1 : 1); } else if (mode == 0x4) { - return (Tcl_UniChar) (ch - GetDelta(info)); - } else { - return ch; + ch -= GetDelta(info); } + return (Tcl_UniChar) ch; } /* diff --git a/tools/uniParse.tcl b/tools/uniParse.tcl index af71eeb..bddee3e 100644 --- a/tools/uniParse.tcl +++ b/tools/uniParse.tcl @@ -58,8 +58,7 @@ proc uni::getValue {items index} { set categoryIndex [lsearch -exact $categories $category] if {$categoryIndex < 0} { - puts "Unexpected character category: $index($category)" - set categoryIndex 0 + error "Unexpected character category: $index($category)" } return [list $categoryIndex $toupper $tolower $totitle] @@ -79,13 +78,14 @@ proc uni::getGroup {value} { proc uni::addPage {info} { variable pMap variable pages + variable shift set pIndex [lsearch -exact $pages $info] if {$pIndex == -1} { set pIndex [llength $pages] lappend pages $info } - lappend pMap $pIndex + lappend pMap [expr {$pIndex << $shift}] return } @@ -141,15 +141,11 @@ proc uni::buildTables {data} { # Enter all assigned characters up to the current character for {set i $next} {$i <= $index} {incr i} { - # Split character index into offset and page number - set offset [expr {$i & $mask}] - set page [expr {($i >> $shift)}] - # Add the group index to the info for the current page lappend info $gIndex # If this is the last entry in the page, add the page - if {$offset == $mask} { + if {($i & $mask) == $mask} { addPage $info set info {} } @@ -262,9 +258,7 @@ static CONST unsigned char groupMap\[\] = {" * 101 = sub delta for upper, sub 1 for title * 110 = sub delta for upper, add delta for lower * - * Bits 8-14 Reserved for future use. - * - * Bits 15-31 Case delta: delta for case conversions. This should be the + * Bits 8-31 Case delta: delta for case conversions. This should be the * highest field so we can easily sign extend. */ @@ -281,19 +275,31 @@ static CONST int groups\[\] = {" # subtract delta for title or upper set case 4 set delta $toupper + if {$tolower} { + error "New case conversion type needed: $toupper $tolower $totitle" + } } elseif {$toupper} { # subtract delta for upper, subtract 1 for title set case 5 set delta $toupper + if {($totitle != 1) || $tolower} { + error "New case conversion type needed: $toupper $tolower $totitle" + } } else { # add delta for lower, add 1 for title set case 3 set delta $tolower + if {$totitle != -1} { + error "New case conversion type needed: $toupper $tolower $totitle" + } } } elseif {$toupper} { # subtract delta for upper, add delta for lower set case 6 set delta $toupper + if {$tolower != $toupper} { + error "New case conversion type needed: $toupper $tolower $totitle" + } } elseif {$tolower} { # add delta for lower set case 2 @@ -304,7 +310,7 @@ static CONST int groups\[\] = {" set delta 0 } - append line [expr {($delta << 15) | ($case << 5) | $type}] + append line [expr {($delta << 8) | ($case << 5) | $type}] if {$i != $last} { append line ", " } @@ -321,10 +327,6 @@ static CONST int groups\[\] = {" * Unicode character. */ -#define UNICODE_CATEGORY_MASK 0x1f -#define UNICODE_OUT_OF_RANGE " - puts $f [format 0x%xu $next] - puts $f " enum { UNASSIGNED, UPPERCASE_LETTER, @@ -366,14 +368,14 @@ enum { #define GetCaseType(info) (((info) & 0xe0) >> 5) #define GetCategory(ch) (GetUniCharInfo(ch) & 0x1f) -#define GetDelta(info) (((info) > 0) ? ((info) >> 15) : (~(~((info)) >> 15))) +#define GetDelta(info) ((info) >> 8) /* * This macro extracts the information about a character from the * Unicode character tables. */ -#define GetUniCharInfo(ch) (groups\[groupMap\[(pageMap\[(((int)(ch)) & 0xffff) >> OFFSET_BITS\] << OFFSET_BITS) | ((ch) & ((1 << OFFSET_BITS)-1))\]\]) +#define GetUniCharInfo(ch) (groups\[groupMap\[pageMap\[((ch) & 0xffff) >> OFFSET_BITS\] | ((ch) & ((1 << OFFSET_BITS)-1))\]\]) " close $f -- cgit v0.12