summaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
authorstanton <stanton>1999-06-24 03:27:56 (GMT)
committerstanton <stanton>1999-06-24 03:27:56 (GMT)
commit79d36b8166d773a6f2740a59820c30748c102226 (patch)
tree255e71bdabf736a3697e79ffdd09a91136ea7b33 /tools
parent3497f9531e33550bfb1cec92c30b535497e86289 (diff)
downloadtcl-79d36b8166d773a6f2740a59820c30748c102226.zip
tcl-79d36b8166d773a6f2740a59820c30748c102226.tar.gz
tcl-79d36b8166d773a6f2740a59820c30748c102226.tar.bz2
* unix/Makefile.in: Changed install-doc to install-man.
* tools/uniParse.tcl: * tools/uniClass.tcl: * tools/README: * tests/string.test: * generic/regc_locale.c: * generic/tclUniData.c: * generic/tclUtf.c: * doc/string.n: Updated Unicode character tables to reflect latest Unicode 2.1 data. Also rationalized "regexp" and "string is" definitions of character classes.
Diffstat (limited to 'tools')
-rw-r--r--tools/README10
-rw-r--r--tools/uniClass.tcl61
-rw-r--r--tools/uniParse.tcl59
3 files changed, 107 insertions, 23 deletions
diff --git a/tools/README b/tools/README
index 9161214..e0de597 100644
--- a/tools/README
+++ b/tools/README
@@ -1,6 +1,12 @@
+This directory contains unsupported tools used to build parts of Tcl
+for distribution.
- This directory contains unsupported tools that are used
- during the release engineering process.
+
+uniParse.tcl -- Script for converting the Unicode character database
+ into a compact table stored in generic/tclUniData.c.
+
+uniClass.tcl -- Script for generating regexp class tables from the Tcl
+ "string is" classes
Generating HTML files.
The tcl8.1-tk8.1-man-html.tcl script from Robert Critchlow
diff --git a/tools/uniClass.tcl b/tools/uniClass.tcl
new file mode 100644
index 0000000..2820ba4
--- /dev/null
+++ b/tools/uniClass.tcl
@@ -0,0 +1,61 @@
+proc emitRange {first last} {
+ global ranges numranges chars numchars
+
+ if {$first < ($last-1)} {
+ append ranges [format "{0x%04x, 0x%04x}, " \
+ $first $last]
+ if {[incr numranges] % 4 == 0} {
+ append ranges "\n "
+ }
+ } else {
+ append chars [format "0x%04x, " $first]
+ incr numchars
+ if {$numchars % 9 == 0} {
+ append chars "\n "
+ }
+ if {$first != $last} {
+ append chars [format "0x%04x, " $last]
+ incr numchars
+ if {$numchars % 9 == 0} {
+ append chars "\n "
+ }
+ }
+ }
+}
+
+proc genTable {type} {
+ global first last ranges numranges chars numchars
+ set first -2
+ set last -2
+
+ set ranges " "
+ set numranges 0
+ set chars " "
+ set numchars 0
+
+ for {set i 0} {$i < 0x10000} {incr i} {
+ if {[string is $type [format %c $i]]} {
+ if {$i == ($last + 1)} {
+ set last $i
+ } else {
+ if {$first > 0} {
+ emitRange $first $last
+ }
+ set first $i
+ set last $i
+ }
+ }
+ }
+ emitRange $first $last
+
+ puts "static crange ${type}RangeTable\[\] = {\n$ranges\n};\n"
+ puts "#define NUM_[string toupper $type]_RANGE (sizeof(${type}RangeTable)/sizeof(crange))\n"
+ puts "static chr ${type}CharTable\[\] = {\n$chars\n};\n"
+ puts "#define NUM_[string toupper $type]_CHAR (sizeof(${type}CharTable)/sizeof(chr))\n"
+}
+
+
+foreach type {alpha digit punct space lower upper graph } {
+ genTable $type
+}
+
diff --git a/tools/uniParse.tcl b/tools/uniParse.tcl
index f92275f..cd22762 100644
--- a/tools/uniParse.tcl
+++ b/tools/uniParse.tcl
@@ -1,25 +1,42 @@
+# uniParse.tcl --
+#
+# This program parses the UnicodeData file and generates the
+# corresponding tclUniData.c file with compressed character
+# data tables. The input to this program should be the latest
+# UnicodeData file from:
+# ftp://ftp.unicode.org/Public/UNIDATA/UnicodeData-Latest.txt
+#
+# Copyright (c) 1998-1999 by Scriptics Corporation.
+# All rights reserved.
+#
+# RCS: @(#) $Id: uniParse.tcl,v 1.3 1999/06/24 03:27:58 stanton Exp $
+
+
namespace eval uni {
- set shift 9 ;# number of bits of data within a page
- variable pMap ;# map from page to page index, each entry is
- # an index into the pages table, indexed by
- # page number
- variable pages ;# map from page index to page info, each
- # entry is a list of indices into the groups
- # table, the list is indexed by the offset
- variable groups ;# list of character info values, indexed by
- # group number, initialized with the
- # unassigned character group
+ set shift 5; # number of bits of data within a page
+ # This value can be adjusted to find the
+ # best split to minimize table size
+
+ variable pMap; # map from page to page index, each entry is
+ # an index into the pages table, indexed by
+ # page number
+ variable pages; # map from page index to page info, each
+ # entry is a list of indices into the groups
+ # table, the list is indexed by the offset
+ variable groups; # list of character info values, indexed by
+ # group number, initialized with the
+ # unassigned character group
variable categories {
Cn Lu Ll Lt Lm Lo Mn Me Mc Nd Nl No Zs Zl Zp
Cc Cf Co Cs Pc Pd Ps Pe Pi Pf Po Sm Sc Sk So
- } ;# Ordered list of character categories, must
- # match the enumeration in the header file.
+ }; # Ordered list of character categories, must
+ # match the enumeration in the header file.
- variable titleCount 0 ;# Count of the number of title case
- # characters. This value is used in the
- # regular expression code to allocate enough
- # space for the title case variants.
+ variable titleCount 0; # Count of the number of title case
+ # characters. This value is used in the
+ # regular expression code to allocate enough
+ # space for the title case variants.
}
proc uni::getValue {items index} {
@@ -149,15 +166,14 @@ proc uni::main {} {
variable shift
variable titleCount
- if {$argc != 2 && $argc != 3} {
- puts stderr "\nusage: $argv0 <datafile> <outdir> ?optimize?\n"
+ if {$argc != 2} {
+ puts stderr "\nusage: $argv0 <datafile> <outdir>\n"
exit 1
}
set f [open [lindex $argv 0] r]
set data [read $f]
close $f
- set shift 6
buildTables $data
puts "X = [llength $pMap] Y= [llength $pages] A= [llength $groups]"
set size [expr {[llength $pMap] + [llength $pages]*(1<<$shift)}]
@@ -165,6 +181,7 @@ proc uni::main {} {
puts "title case count = $titleCount"
set f [open [file join [lindex $argv 1] tclUniData.c] w]
+ fconfigure $f -translation lf
puts $f "/*
* tclUtfData.c --
*
@@ -192,7 +209,7 @@ proc uni::main {} {
* to the same alternate page number.
*/
-static char pageMap\[\] = {"
+static unsigned char pageMap\[\] = {"
set line " "
set last [expr {[llength $pMap] - 1}]
for {set i 0} {$i <= $last} {incr i} {
@@ -214,7 +231,7 @@ static char pageMap\[\] = {"
* set of character attributes.
*/
-static char groupMap\[\] = {"
+static unsigned char groupMap\[\] = {"
set line " "
set lasti [expr {[llength $pages] - 1}]
for {set i 0} {$i <= $lasti} {incr i} {