diff options
author | jan.nijtmans <nijtmans@users.sourceforge.net> | 2012-01-14 09:30:32 (GMT) |
---|---|---|
committer | jan.nijtmans <nijtmans@users.sourceforge.net> | 2012-01-14 09:30:32 (GMT) |
commit | a7072a53147e689d2c265a66e87a2f6f74c86014 (patch) | |
tree | bec1f7fe2bc73a31209794bcb26c2a8972268b71 /tools/uniParse.tcl | |
parent | dd2ad07b7f17937585812360682cef3aed8ceb86 (diff) | |
download | tcl-a7072a53147e689d2c265a66e87a2f6f74c86014.zip tcl-a7072a53147e689d2c265a66e87a2f6f74c86014.tar.gz tcl-a7072a53147e689d2c265a66e87a2f6f74c86014.tar.bz2 |
rfe-3473670: Various Unicode-related speedups/robustness
Diffstat (limited to 'tools/uniParse.tcl')
-rw-r--r-- | tools/uniParse.tcl | 38 |
1 files changed, 20 insertions, 18 deletions
diff --git a/tools/uniParse.tcl b/tools/uniParse.tcl index af71eeb..bddee3e 100644 --- a/tools/uniParse.tcl +++ b/tools/uniParse.tcl @@ -58,8 +58,7 @@ proc uni::getValue {items index} { set categoryIndex [lsearch -exact $categories $category] if {$categoryIndex < 0} { - puts "Unexpected character category: $index($category)" - set categoryIndex 0 + error "Unexpected character category: $index($category)" } return [list $categoryIndex $toupper $tolower $totitle] @@ -79,13 +78,14 @@ proc uni::getGroup {value} { proc uni::addPage {info} { variable pMap variable pages + variable shift set pIndex [lsearch -exact $pages $info] if {$pIndex == -1} { set pIndex [llength $pages] lappend pages $info } - lappend pMap $pIndex + lappend pMap [expr {$pIndex << $shift}] return } @@ -141,15 +141,11 @@ proc uni::buildTables {data} { # Enter all assigned characters up to the current character for {set i $next} {$i <= $index} {incr i} { - # Split character index into offset and page number - set offset [expr {$i & $mask}] - set page [expr {($i >> $shift)}] - # Add the group index to the info for the current page lappend info $gIndex # If this is the last entry in the page, add the page - if {$offset == $mask} { + if {($i & $mask) == $mask} { addPage $info set info {} } @@ -262,9 +258,7 @@ static CONST unsigned char groupMap\[\] = {" * 101 = sub delta for upper, sub 1 for title * 110 = sub delta for upper, add delta for lower * - * Bits 8-14 Reserved for future use. - * - * Bits 15-31 Case delta: delta for case conversions. This should be the + * Bits 8-31 Case delta: delta for case conversions. This should be the * highest field so we can easily sign extend. */ @@ -281,19 +275,31 @@ static CONST int groups\[\] = {" # subtract delta for title or upper set case 4 set delta $toupper + if {$tolower} { + error "New case conversion type needed: $toupper $tolower $totitle" + } } elseif {$toupper} { # subtract delta for upper, subtract 1 for title set case 5 set delta $toupper + if {($totitle != 1) || $tolower} { + error "New case conversion type needed: $toupper $tolower $totitle" + } } else { # add delta for lower, add 1 for title set case 3 set delta $tolower + if {$totitle != -1} { + error "New case conversion type needed: $toupper $tolower $totitle" + } } } elseif {$toupper} { # subtract delta for upper, add delta for lower set case 6 set delta $toupper + if {$tolower != $toupper} { + error "New case conversion type needed: $toupper $tolower $totitle" + } } elseif {$tolower} { # add delta for lower set case 2 @@ -304,7 +310,7 @@ static CONST int groups\[\] = {" set delta 0 } - append line [expr {($delta << 15) | ($case << 5) | $type}] + append line [expr {($delta << 8) | ($case << 5) | $type}] if {$i != $last} { append line ", " } @@ -321,10 +327,6 @@ static CONST int groups\[\] = {" * Unicode character. */ -#define UNICODE_CATEGORY_MASK 0x1f -#define UNICODE_OUT_OF_RANGE " - puts $f [format 0x%xu $next] - puts $f " enum { UNASSIGNED, UPPERCASE_LETTER, @@ -366,14 +368,14 @@ enum { #define GetCaseType(info) (((info) & 0xe0) >> 5) #define GetCategory(ch) (GetUniCharInfo(ch) & 0x1f) -#define GetDelta(info) (((info) > 0) ? ((info) >> 15) : (~(~((info)) >> 15))) +#define GetDelta(info) ((info) >> 8) /* * This macro extracts the information about a character from the * Unicode character tables. */ -#define GetUniCharInfo(ch) (groups\[groupMap\[(pageMap\[(((int)(ch)) & 0xffff) >> OFFSET_BITS\] << OFFSET_BITS) | ((ch) & ((1 << OFFSET_BITS)-1))\]\]) +#define GetUniCharInfo(ch) (groups\[groupMap\[pageMap\[((ch) & 0xffff) >> OFFSET_BITS\] | ((ch) & ((1 << OFFSET_BITS)-1))\]\]) " close $f |