diff options
Diffstat (limited to 'tools/uniClass.tcl')
| -rw-r--r-- | tools/uniClass.tcl | 95 |
1 files changed, 82 insertions, 13 deletions
diff --git a/tools/uniClass.tcl b/tools/uniClass.tcl index 2820ba4..9b4819d 100644 --- a/tools/uniClass.tcl +++ b/tools/uniClass.tcl @@ -1,20 +1,48 @@ +#!/bin/sh +# The next line is executed by /bin/sh, but not tcl \ +exec tclsh "$0" ${1+"$@"} + +# +# uniClass.tcl -- +# +# Generates the character ranges and singletons that are used in +# generic/regc_locale.c for translation of character classes. +# This file must be generated using a tclsh that contains the +# correct corresponding tclUniData.c file (generated by uniParse.tcl) +# in order for the class ranges to match. +# + proc emitRange {first last} { - global ranges numranges chars numchars + global ranges numranges chars numchars extchars extranges if {$first < ($last-1)} { - append ranges [format "{0x%04x, 0x%04x}, " \ + if {!$extranges && ($first) > 0xffff} { + set extranges 1 + set numranges 0 + set ranges [string trimright $ranges " \n\r\t,"] + append ranges "\n#if TCL_UTF_MAX > 4\n ," + } + append ranges [format "{0x%x, 0x%x}, " \ $first $last] if {[incr numranges] % 4 == 0} { + set ranges [string trimright $ranges] append ranges "\n " } } else { - append chars [format "0x%04x, " $first] + if {!$extchars && ($first) > 0xffff} { + set extchars 1 + set numchars 0 + set chars [string trimright $chars " \n\r\t,"] + append chars "\n#if TCL_UTF_MAX > 4\n ," + } + append chars [format "0x%x, " $first] incr numchars if {$numchars % 9 == 0} { + set chars [string trimright $chars] append chars "\n " } if {$first != $last} { - append chars [format "0x%04x, " $last] + append chars [format "0x%x, " $last] incr numchars if {$numchars % 9 == 0} { append chars "\n " @@ -24,7 +52,7 @@ proc emitRange {first last} { } proc genTable {type} { - global first last ranges numranges chars numchars + global first last ranges numranges chars numchars extchars extranges set first -2 set last -2 @@ -32,13 +60,19 @@ proc genTable {type} { set numranges 0 set chars " " set numchars 0 + set extchars 0 + set extranges 0 - for {set i 0} {$i < 0x10000} {incr i} { + for {set i 0} {$i <= 0x10ffff} {incr i} { + if {$i == 0xd800} { + # Skip surrogates + set i 0xdc00 + } if {[string is $type [format %c $i]]} { if {$i == ($last + 1)} { set last $i } else { - if {$first > 0} { + if {$first >= 0} { emitRange $first $last } set first $i @@ -47,15 +81,50 @@ proc genTable {type} { } } emitRange $first $last - - puts "static crange ${type}RangeTable\[\] = {\n$ranges\n};\n" - puts "#define NUM_[string toupper $type]_RANGE (sizeof(${type}RangeTable)/sizeof(crange))\n" - puts "static chr ${type}CharTable\[\] = {\n$chars\n};\n" - puts "#define NUM_[string toupper $type]_CHAR (sizeof(${type}CharTable)/sizeof(chr))\n" + + set ranges [string trimright $ranges "\t\n ,"] + if {$extranges} { + append ranges "\n#endif" + } + set chars [string trimright $chars "\t\n ,"] + if {$extchars} { + append chars "\n#endif" + } + if {$ranges ne ""} { + puts "static const crange ${type}RangeTable\[\] = {\n$ranges\n};\n" + puts "#define NUM_[string toupper $type]_RANGE (sizeof(${type}RangeTable)/sizeof(crange))\n" + } else { + puts "/* no contiguous ranges of $type characters */\n" + } + if {$chars ne ""} { + puts "static const chr ${type}CharTable\[\] = {\n$chars\n};\n" + puts "#define NUM_[string toupper $type]_CHAR (sizeof(${type}CharTable)/sizeof(chr))\n" + } else { + puts "/*\n * no singletons of $type characters.\n */\n" + } } +puts "/* + * Declarations of Unicode character ranges. This code + * is automatically generated by the tools/uniClass.tcl script + * and used in generic/regc_locale.c. Do not modify by hand. + */ +" -foreach type {alpha digit punct space lower upper graph } { +foreach {type desc} { + alpha "alphabetic characters" + control "control characters" + digit "decimal digit characters" + punct "punctuation characters" + space "white space characters" + lower "lowercase characters" + upper "uppercase characters" + graph "unicode print characters excluding space" +} { + puts "/*\n * Unicode: $desc.\n */\n" genTable $type } +puts "/* + * End of auto-generated Unicode character ranges declarations. + */" |
