summaryrefslogtreecommitdiffstats
path: root/tools/uniParse.tcl
diff options
context:
space:
mode:
authorjan.nijtmans <nijtmans@users.sourceforge.net>2012-01-14 09:30:32 (GMT)
committerjan.nijtmans <nijtmans@users.sourceforge.net>2012-01-14 09:30:32 (GMT)
commita7072a53147e689d2c265a66e87a2f6f74c86014 (patch)
treebec1f7fe2bc73a31209794bcb26c2a8972268b71 /tools/uniParse.tcl
parentdd2ad07b7f17937585812360682cef3aed8ceb86 (diff)
downloadtcl-a7072a53147e689d2c265a66e87a2f6f74c86014.zip
tcl-a7072a53147e689d2c265a66e87a2f6f74c86014.tar.gz
tcl-a7072a53147e689d2c265a66e87a2f6f74c86014.tar.bz2
rfe-3473670: Various Unicode-related speedups/robustness
Diffstat (limited to 'tools/uniParse.tcl')
-rw-r--r--tools/uniParse.tcl38
1 files changed, 20 insertions, 18 deletions
diff --git a/tools/uniParse.tcl b/tools/uniParse.tcl
index af71eeb..bddee3e 100644
--- a/tools/uniParse.tcl
+++ b/tools/uniParse.tcl
@@ -58,8 +58,7 @@ proc uni::getValue {items index} {
set categoryIndex [lsearch -exact $categories $category]
if {$categoryIndex < 0} {
- puts "Unexpected character category: $index($category)"
- set categoryIndex 0
+ error "Unexpected character category: $index($category)"
}
return [list $categoryIndex $toupper $tolower $totitle]
@@ -79,13 +78,14 @@ proc uni::getGroup {value} {
proc uni::addPage {info} {
variable pMap
variable pages
+ variable shift
set pIndex [lsearch -exact $pages $info]
if {$pIndex == -1} {
set pIndex [llength $pages]
lappend pages $info
}
- lappend pMap $pIndex
+ lappend pMap [expr {$pIndex << $shift}]
return
}
@@ -141,15 +141,11 @@ proc uni::buildTables {data} {
# Enter all assigned characters up to the current character
for {set i $next} {$i <= $index} {incr i} {
- # Split character index into offset and page number
- set offset [expr {$i & $mask}]
- set page [expr {($i >> $shift)}]
-
# Add the group index to the info for the current page
lappend info $gIndex
# If this is the last entry in the page, add the page
- if {$offset == $mask} {
+ if {($i & $mask) == $mask} {
addPage $info
set info {}
}
@@ -262,9 +258,7 @@ static CONST unsigned char groupMap\[\] = {"
* 101 = sub delta for upper, sub 1 for title
* 110 = sub delta for upper, add delta for lower
*
- * Bits 8-14 Reserved for future use.
- *
- * Bits 15-31 Case delta: delta for case conversions. This should be the
+ * Bits 8-31 Case delta: delta for case conversions. This should be the
* highest field so we can easily sign extend.
*/
@@ -281,19 +275,31 @@ static CONST int groups\[\] = {"
# subtract delta for title or upper
set case 4
set delta $toupper
+ if {$tolower} {
+ error "New case conversion type needed: $toupper $tolower $totitle"
+ }
} elseif {$toupper} {
# subtract delta for upper, subtract 1 for title
set case 5
set delta $toupper
+ if {($totitle != 1) || $tolower} {
+ error "New case conversion type needed: $toupper $tolower $totitle"
+ }
} else {
# add delta for lower, add 1 for title
set case 3
set delta $tolower
+ if {$totitle != -1} {
+ error "New case conversion type needed: $toupper $tolower $totitle"
+ }
}
} elseif {$toupper} {
# subtract delta for upper, add delta for lower
set case 6
set delta $toupper
+ if {$tolower != $toupper} {
+ error "New case conversion type needed: $toupper $tolower $totitle"
+ }
} elseif {$tolower} {
# add delta for lower
set case 2
@@ -304,7 +310,7 @@ static CONST int groups\[\] = {"
set delta 0
}
- append line [expr {($delta << 15) | ($case << 5) | $type}]
+ append line [expr {($delta << 8) | ($case << 5) | $type}]
if {$i != $last} {
append line ", "
}
@@ -321,10 +327,6 @@ static CONST int groups\[\] = {"
* Unicode character.
*/
-#define UNICODE_CATEGORY_MASK 0x1f
-#define UNICODE_OUT_OF_RANGE "
- puts $f [format 0x%xu $next]
- puts $f "
enum {
UNASSIGNED,
UPPERCASE_LETTER,
@@ -366,14 +368,14 @@ enum {
#define GetCaseType(info) (((info) & 0xe0) >> 5)
#define GetCategory(ch) (GetUniCharInfo(ch) & 0x1f)
-#define GetDelta(info) (((info) > 0) ? ((info) >> 15) : (~(~((info)) >> 15)))
+#define GetDelta(info) ((info) >> 8)
/*
* This macro extracts the information about a character from the
* Unicode character tables.
*/
-#define GetUniCharInfo(ch) (groups\[groupMap\[(pageMap\[(((int)(ch)) & 0xffff) >> OFFSET_BITS\] << OFFSET_BITS) | ((ch) & ((1 << OFFSET_BITS)-1))\]\])
+#define GetUniCharInfo(ch) (groups\[groupMap\[pageMap\[((ch) & 0xffff) >> OFFSET_BITS\] | ((ch) & ((1 << OFFSET_BITS)-1))\]\])
"
close $f