diff options
-rw-r--r-- | tools/uniClass.tcl | 56 | ||||
-rw-r--r-- | tools/uniParse.tcl | 6 |
2 files changed, 52 insertions, 10 deletions
diff --git a/tools/uniClass.tcl b/tools/uniClass.tcl index 2820ba4..442fc2a 100644 --- a/tools/uniClass.tcl +++ b/tools/uniClass.tcl @@ -1,3 +1,17 @@ +#!/bin/sh +# The next line is executed by /bin/sh, but not tcl \ +exec tclsh "$0" ${1+"$@"} + +# +# uniClass.tcl -- +# +# Generates the character ranges and singletons that are used in +# generic/regc_locale.c for translation of character classes. +# This file must be generated using a tclsh that contains the +# correct corresponding tclUniData.c file (generated by uniParse.tcl) +# in order for the class ranges to match. +# + proc emitRange {first last} { global ranges numranges chars numchars @@ -33,7 +47,7 @@ proc genTable {type} { set chars " " set numchars 0 - for {set i 0} {$i < 0x10000} {incr i} { + for {set i 0} {$i <= 0xFFFF} {incr i} { if {[string is $type [format %c $i]]} { if {$i == ($last + 1)} { set last $i @@ -47,15 +61,43 @@ proc genTable {type} { } } emitRange $first $last - - puts "static crange ${type}RangeTable\[\] = {\n$ranges\n};\n" - puts "#define NUM_[string toupper $type]_RANGE (sizeof(${type}RangeTable)/sizeof(crange))\n" - puts "static chr ${type}CharTable\[\] = {\n$chars\n};\n" - puts "#define NUM_[string toupper $type]_CHAR (sizeof(${type}CharTable)/sizeof(chr))\n" + + set ranges [string trimright $ranges "\t\n ,"] + set chars [string trimright $chars "\t\n ,"] + if {$ranges != ""} { + puts "static crange ${type}RangeTable\[\] = {\n$ranges\n};\n" + puts "#define NUM_[string toupper $type]_RANGE (sizeof(${type}RangeTable)/sizeof(crange))\n" + } else { + puts "/* no contiguous ranges of $type characters */\n" + } + if {$chars != ""} { + puts "static chr ${type}CharTable\[\] = {\n$chars\n};\n" + puts "#define NUM_[string toupper $type]_CHAR (sizeof(${type}CharTable)/sizeof(chr))\n" + } else { + puts "/* no singletons of $type characters */\n" + } } +puts "/* + * Declarations of Unicode character ranges. This code + * is automatically generated by the tools/uniClass.tcl script + * and used in generic/regc_locale.c. Do not modify by hand. + */ +" -foreach type {alpha digit punct space lower upper graph } { +foreach {type desc} { + alpha "alphabetic characters" + digit "decimal digit characters" + punct "punctuation characters" + space "white space characters" + lower "lowercase characters" + upper "uppercase characters" + graph "unicode print characters excluding space" +} { + puts "/* Unicode: $desc */\n" genTable $type } +puts "/* + * End of auto-generated Unicode character ranges declarations. + */" diff --git a/tools/uniParse.tcl b/tools/uniParse.tcl index cd22762..3fe38d2 100644 --- a/tools/uniParse.tcl +++ b/tools/uniParse.tcl @@ -9,7 +9,7 @@ # Copyright (c) 1998-1999 by Scriptics Corporation. # All rights reserved. # -# RCS: @(#) $Id: uniParse.tcl,v 1.3 1999/06/24 03:27:58 stanton Exp $ +# RCS: @(#) $Id: uniParse.tcl,v 1.4 2001/05/28 04:37:57 hobbs Exp $ namespace eval uni { @@ -183,7 +183,7 @@ proc uni::main {} { set f [open [file join [lindex $argv 1] tclUniData.c] w] fconfigure $f -translation lf puts $f "/* - * tclUtfData.c -- + * tclUniData.c -- * * Declarations of Unicode character information tables. This file is * automatically generated by the tools/uniParse.tcl script. Do not @@ -368,7 +368,7 @@ enum { #define GetCaseType(info) (((info) & 0xE0) >> 5) #define GetCategory(info) ((info) & 0x1F) -#define GetDelta(infO) (((info) > 0) ? ((info) >> 22) : (~(~((info)) >> 22))) +#define GetDelta(info) (((info) > 0) ? ((info) >> 22) : (~(~((info)) >> 22))) /* * This macro extracts the information about a character from the |