diff options
author | stanton <stanton> | 1999-06-24 03:27:56 (GMT) |
---|---|---|
committer | stanton <stanton> | 1999-06-24 03:27:56 (GMT) |
commit | 79d36b8166d773a6f2740a59820c30748c102226 (patch) | |
tree | 255e71bdabf736a3697e79ffdd09a91136ea7b33 /tools | |
parent | 3497f9531e33550bfb1cec92c30b535497e86289 (diff) | |
download | tcl-79d36b8166d773a6f2740a59820c30748c102226.zip tcl-79d36b8166d773a6f2740a59820c30748c102226.tar.gz tcl-79d36b8166d773a6f2740a59820c30748c102226.tar.bz2 |
* unix/Makefile.in: Changed install-doc to install-man.
* tools/uniParse.tcl:
* tools/uniClass.tcl:
* tools/README:
* tests/string.test:
* generic/regc_locale.c:
* generic/tclUniData.c:
* generic/tclUtf.c:
* doc/string.n: Updated Unicode character tables to reflect latest
Unicode 2.1 data. Also rationalized "regexp" and "string is"
definitions of character classes.
Diffstat (limited to 'tools')
-rw-r--r-- | tools/README | 10 | ||||
-rw-r--r-- | tools/uniClass.tcl | 61 | ||||
-rw-r--r-- | tools/uniParse.tcl | 59 |
3 files changed, 107 insertions, 23 deletions
diff --git a/tools/README b/tools/README index 9161214..e0de597 100644 --- a/tools/README +++ b/tools/README @@ -1,6 +1,12 @@ +This directory contains unsupported tools used to build parts of Tcl +for distribution. - This directory contains unsupported tools that are used - during the release engineering process. + +uniParse.tcl -- Script for converting the Unicode character database + into a compact table stored in generic/tclUniData.c. + +uniClass.tcl -- Script for generating regexp class tables from the Tcl + "string is" classes Generating HTML files. The tcl8.1-tk8.1-man-html.tcl script from Robert Critchlow diff --git a/tools/uniClass.tcl b/tools/uniClass.tcl new file mode 100644 index 0000000..2820ba4 --- /dev/null +++ b/tools/uniClass.tcl @@ -0,0 +1,61 @@ +proc emitRange {first last} { + global ranges numranges chars numchars + + if {$first < ($last-1)} { + append ranges [format "{0x%04x, 0x%04x}, " \ + $first $last] + if {[incr numranges] % 4 == 0} { + append ranges "\n " + } + } else { + append chars [format "0x%04x, " $first] + incr numchars + if {$numchars % 9 == 0} { + append chars "\n " + } + if {$first != $last} { + append chars [format "0x%04x, " $last] + incr numchars + if {$numchars % 9 == 0} { + append chars "\n " + } + } + } +} + +proc genTable {type} { + global first last ranges numranges chars numchars + set first -2 + set last -2 + + set ranges " " + set numranges 0 + set chars " " + set numchars 0 + + for {set i 0} {$i < 0x10000} {incr i} { + if {[string is $type [format %c $i]]} { + if {$i == ($last + 1)} { + set last $i + } else { + if {$first > 0} { + emitRange $first $last + } + set first $i + set last $i + } + } + } + emitRange $first $last + + puts "static crange ${type}RangeTable\[\] = {\n$ranges\n};\n" + puts "#define NUM_[string toupper $type]_RANGE (sizeof(${type}RangeTable)/sizeof(crange))\n" + puts "static chr ${type}CharTable\[\] = {\n$chars\n};\n" + puts "#define NUM_[string toupper $type]_CHAR (sizeof(${type}CharTable)/sizeof(chr))\n" +} + + +foreach type {alpha digit punct space lower upper graph } { + genTable $type +} + diff --git a/tools/uniParse.tcl b/tools/uniParse.tcl index f92275f..cd22762 100644 --- a/tools/uniParse.tcl +++ b/tools/uniParse.tcl @@ -1,25 +1,42 @@ +# uniParse.tcl -- +# +# This program parses the UnicodeData file and generates the +# corresponding tclUniData.c file with compressed character +# data tables. The input to this program should be the latest +# UnicodeData file from: +# ftp://ftp.unicode.org/Public/UNIDATA/UnicodeData-Latest.txt +# +# Copyright (c) 1998-1999 by Scriptics Corporation. +# All rights reserved. +# +# RCS: @(#) $Id: uniParse.tcl,v 1.3 1999/06/24 03:27:58 stanton Exp $ + + namespace eval uni { - set shift 9 ;# number of bits of data within a page - variable pMap ;# map from page to page index, each entry is - # an index into the pages table, indexed by - # page number - variable pages ;# map from page index to page info, each - # entry is a list of indices into the groups - # table, the list is indexed by the offset - variable groups ;# list of character info values, indexed by - # group number, initialized with the - # unassigned character group + set shift 5; # number of bits of data within a page + # This value can be adjusted to find the + # best split to minimize table size + + variable pMap; # map from page to page index, each entry is + # an index into the pages table, indexed by + # page number + variable pages; # map from page index to page info, each + # entry is a list of indices into the groups + # table, the list is indexed by the offset + variable groups; # list of character info values, indexed by + # group number, initialized with the + # unassigned character group variable categories { Cn Lu Ll Lt Lm Lo Mn Me Mc Nd Nl No Zs Zl Zp Cc Cf Co Cs Pc Pd Ps Pe Pi Pf Po Sm Sc Sk So - } ;# Ordered list of character categories, must - # match the enumeration in the header file. + }; # Ordered list of character categories, must + # match the enumeration in the header file. - variable titleCount 0 ;# Count of the number of title case - # characters. This value is used in the - # regular expression code to allocate enough - # space for the title case variants. + variable titleCount 0; # Count of the number of title case + # characters. This value is used in the + # regular expression code to allocate enough + # space for the title case variants. } proc uni::getValue {items index} { @@ -149,15 +166,14 @@ proc uni::main {} { variable shift variable titleCount - if {$argc != 2 && $argc != 3} { - puts stderr "\nusage: $argv0 <datafile> <outdir> ?optimize?\n" + if {$argc != 2} { + puts stderr "\nusage: $argv0 <datafile> <outdir>\n" exit 1 } set f [open [lindex $argv 0] r] set data [read $f] close $f - set shift 6 buildTables $data puts "X = [llength $pMap] Y= [llength $pages] A= [llength $groups]" set size [expr {[llength $pMap] + [llength $pages]*(1<<$shift)}] @@ -165,6 +181,7 @@ proc uni::main {} { puts "title case count = $titleCount" set f [open [file join [lindex $argv 1] tclUniData.c] w] + fconfigure $f -translation lf puts $f "/* * tclUtfData.c -- * @@ -192,7 +209,7 @@ proc uni::main {} { * to the same alternate page number. */ -static char pageMap\[\] = {" +static unsigned char pageMap\[\] = {" set line " " set last [expr {[llength $pMap] - 1}] for {set i 0} {$i <= $last} {incr i} { @@ -214,7 +231,7 @@ static char pageMap\[\] = {" * set of character attributes. */ -static char groupMap\[\] = {" +static unsigned char groupMap\[\] = {" set line " " set lasti [expr {[llength $pages] - 1}] for {set i 0} {$i <= $lasti} {incr i} { |