1 files changed, 247 insertions, 0 deletions
diff --git a/tcllib/modules/stringprep/tools/gen_unicode_test.tcl b/tcllib/modules/stringprep/tools/gen_unicode_test.tcl
new file mode 100644
index 0000000..ec9b2c3
--- /dev/null
+++ b/tcllib/modules/stringprep/tools/gen_unicode_test.tcl
@@ -0,0 +1,247 @@
+#!/usr/bin/tclsh
+
+# gen_unicode_test.tcl --
+#
+#	This program parses the RFC 3454 file and generates the
+#	corresponding unicode.test file with unicode package tests.
+#	The input to this program should be NormalizationTest.txt.
+#	It can be downloaded from:
+#	ftp://ftp.unicode.org/Public/UNIDATA/NormalizationTest.txt
+#	Short test suite is generated by default. If you want to generate
+#	all tests (more than 300000 test cases) add suffix 'full' as the
+#	third argument.
+#
+# Usage: gen_unicode_test.tcl infile outdir ?full?
+# 
+# RCS: @(#) $Id: gen_unicode_test.tcl,v 1.1 2008/01/29 02:18:10 patthoyts Exp $
+
+package require struct::list
+
+set short_test_list [list \
+    "LATIN CAPITAL LETTER D, COMBINING DOT ABOVE, COMBINING DOT BELOW" \
+    "NO-BREAK SPACE" \
+    "VULGAR FRACTION ONE HALF" \
+    "ORIYA LETTER RRA" \
+    "KANNADA VOWEL SIGN EE" \
+    "TIBETAN LETTER GHA" \
+    "MODIFIER LETTER CAPITAL A" \
+    "GREEK SMALL LETTER EPSILON WITH PSILI AND OXIA" \
+    "KANGXI RADICAL SPROUT" \
+    "HIRAGANA LETTER DE" \
+    "KATAKANA LETTER PA" \
+    "HANGUL LETTER SIOS-PIEUP" \
+    "HANGUL SYLLABLE GYANG" \
+    "CJK COMPATIBILITY IDEOGRAPH-F98E" \
+    "ARABIC LETTER HEH DOACHASHMEE ISOLATED FORM" \
+    "ARABIC LIGATURE AIN WITH JEEM ISOLATED FORM" \
+    "FULLWIDTH DIGIT THREE" \
+    "LATIN SMALL LETTER A, COMBINING CYRILLIC TITLO, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B" \
+    "LATIN SMALL LETTER A, DEVANAGARI SIGN NUKTA, COMBINING TILDE OVERLAY, COMBINING RING OVERLAY, LATIN SMALL LETTER B" \
+    "HANGUL SYLLABLE BYO, COMBINING TILDE OVERLAY, HANGUL JONGSEONG TIKEUT"]
+
+set fd [open [lindex $argv 0]]
+
+set all_tests {}
+set n 0
+while {[gets $fd line] >= 0} {
+    set line [string trim $line]
+    if {![regexp \
+	      {^([ [:xdigit:]]+);([ [:xdigit:]]+);([ [:xdigit:]]+);([ [:xdigit:]]+);([ [:xdigit:]]+);.*\) (.*)} \
+	      $line -> c(1) c(2) c(3) c(4) c(5) title]} continue
+
+    set q 1
+    foreach i {1 2 3 4 5} {
+	set s($i) {}
+	set us($i) ""
+	foreach xnum $c($i) {
+	    set uc [scan $xnum %x]
+	    if {$uc > 0xffff} {
+		set q 0
+	    }
+	    lappend s($i) $uc
+	    append us($i) \\u$xnum
+	}
+    }
+    if {!$q} {
+	# Test case contains character which is greater than 0xFFFF and can't
+	# be represented in Tcl
+	continue
+    }
+    set test($n) [list $s(1) $s(2) $s(3) $s(4) $s(5) $title]
+    set test1($n) [list $us(1) $us(2) $us(3) $us(4) $us(5) $title]
+    if {[lsearch $short_test_list $title] >= 0} {
+	lappend all_tests $n
+    }
+    incr n
+}
+
+close $fd
+
+if {[string equal [lindex $argv 2] full]} {
+    set all_tests [struct::list iota $n]
+}
+
+set f [open [file join [lindex $argv 1] unicode.test] w]
+fconfigure $f -translation lf
+puts $f \
+"# unicode.test
+#
+# Tests for the unicode package. This file is automatically generated by
+# the gen_unicode_test.tcl script. Do not modify this file by hands.
+#
+# RCS: @(#) \$Id\$
+
+# -------------------------------------------------------------------------
+
+source \[file join \\
+	\[file dirname \[file dirname \[file join \[pwd\] \[info script\]\]\]\] \\
+	devtools testutilities.tcl\]
+
+testsNeedTcl     8.3
+testsNeedTcltest 1.0
+
+testing {
+    useLocalFile unicode_data.tcl
+    useLocalFile unicode.tcl
+}
+
+# -------------------------------------------------------------------------
+"
+
+set j 0
+foreach i $all_tests {
+    puts $f \
+"
+test unicode-1.[incr j] {normalizeS D: [lindex $test1($i) 5]} {
+    unicode::normalizeS D \"[lindex $test1($i) 0]\"
+} \"[lindex $test1($i) 2]\"
+
+test unicode-1.[incr j] {normalize D: [lindex $test($i) 5]} {
+    unicode::normalize D [list [lindex $test($i) 1]]
+} {[lindex $test($i) 2]}
+
+test unicode-1.[incr j] {normalize D: [lindex $test($i) 5]} {
+    unicode::normalize D [list [lindex $test($i) 2]]
+} {[lindex $test($i) 2]}
+
+test unicode-1.[incr j] {normalize D: [lindex $test($i) 5]} {
+    unicode::normalize D [list [lindex $test($i) 3]]
+} {[lindex $test($i) 4]}
+
+test unicode-1.[incr j] {normalize D: [lindex $test($i) 5]} {
+    unicode::normalize D [list [lindex $test($i) 4]]
+} {[lindex $test($i) 4]}
+"
+}
+
+set j 0
+foreach i $all_tests {
+    puts $f \
+"
+test unicode-2.[incr j] {normalize C: [lindex $test($i) 5]} {
+    unicode::normalize C [list [lindex $test($i) 0]]
+} {[lindex $test($i) 1]}
+
+test unicode-2.[incr j] {normalizeS C: [lindex $test1($i) 5]} {
+    unicode::normalizeS C \"[lindex $test1($i) 1]\"
+} \"[lindex $test1($i) 1]\"
+
+test unicode-2.[incr j] {normalize C: [lindex $test($i) 5]} {
+    unicode::normalize C [list [lindex $test($i) 2]]
+} {[lindex $test($i) 1]}
+
+test unicode-2.[incr j] {normalize C: [lindex $test($i) 5]} {
+    unicode::normalize C [list [lindex $test($i) 3]]
+} {[lindex $test($i) 3]}
+
+test unicode-2.[incr j] {normalize C: [lindex $test($i) 5]} {
+    unicode::normalize C [list [lindex $test($i) 4]]
+} {[lindex $test($i) 3]}
+"
+}
+
+set j 0
+foreach i $all_tests {
+    puts $f \
+"
+test unicode-3.[incr j] {normalize KD: [lindex $test($i) 5]} {
+    unicode::normalize KD [list [lindex $test($i) 0]]
+} {[lindex $test($i) 4]}
+
+test unicode-3.[incr j] {normalize KD: [lindex $test($i) 5]} {
+    unicode::normalize KD [list [lindex $test($i) 1]]
+} {[lindex $test($i) 4]}
+
+test unicode-3.[incr j] {normalizeS KD: [lindex $test1($i) 5]} {
+    unicode::normalizeS KD \"[lindex $test1($i) 2]\"
+} \"[lindex $test1($i) 4]\"
+
+test unicode-3.[incr j] {normalize KD: [lindex $test($i) 5]} {
+    unicode::normalize KD [list [lindex $test($i) 3]]
+} {[lindex $test($i) 4]}
+
+test unicode-1.[incr j] {normalize KD: [lindex $test($i) 5]} {
+    unicode::normalize KD [list [lindex $test($i) 4]]
+} {[lindex $test($i) 4]}
+"
+}
+
+set j 0
+foreach i $all_tests {
+    puts $f \
+"
+test unicode-4.[incr j] {normalize KC: [lindex $test($i) 5]} {
+    unicode::normalize KC [list [lindex $test($i) 0]]
+} {[lindex $test($i) 3]}
+
+test unicode-4.[incr j] {normalize KC: [lindex $test($i) 5]} {
+    unicode::normalize KC [list [lindex $test($i) 1]]
+} {[lindex $test($i) 3]}
+
+test unicode-4.[incr j] {normalize KC: [lindex $test($i) 5]} {
+    unicode::normalize KC [list [lindex $test($i) 2]]
+} {[lindex $test($i) 3]}
+
+test unicode-4.[incr j] {normalizeS KC: [lindex $test1($i) 5]} {
+    unicode::normalizeS KC \"[lindex $test1($i) 3]\"
+} \"[lindex $test1($i) 3]\"
+
+test unicode-4.[incr j] {normalize KC: [lindex $test($i) 5]} {
+    unicode::normalize KC [list [lindex $test($i) 4]]
+} {[lindex $test($i) 3]}
+"
+}
+
+puts $f \
+"
+test unicode-5.1 {fromstring} {
+    unicode::fromstring \"\\u0403\\u0405\\u0406\\u041f\\u0034\"
+} {1027 1029 1030 1055 52}
+
+test unicode-5.2 {fromstring} {
+    unicode::fromstring \"\\u0001\\u0002\\u0003\\u0004\\u0005\\u0006\\u0007\\u0008\\u0009\\u000a\\u000b\\u000c\\u000d\"
+} {1 2 3 4 5 6 7 8 9 10 11 12 13}
+
+test unicode-6.1 {tostring} {
+    unicode::tostring {16 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1}
+} \"\\u0010\\u000f\\u000e\\u000d\\u000c\\u000b\\u000a\\u0009\\u0008\\u0007\\u0006\\u0005\\u0004\\u0003\\u0002\\u0001\"
+
+test unicode-6.2 {tostring} {
+    unicode::tostring {12345 12346 12347 12348 12349 12350 12351}
+} \"\\u3039\\u303a\\u303b\\u303c\\u303d\\u303e\\u303f\"
+
+test unicode-7.1 {normalize bad form} {
+    catch {unicode::normalize S \"\"} result
+    set result
+} \"::unicode::normalize: Only D, C, KD and KC forms are allowed\"
+
+test unicode-8.1 {normalizeS bad form} {
+    catch {unicode::normalizeS S \"\"} result
+    set result
+} \"::unicode::normalizeS: Only D, C, KD and KC forms are allowed\"
+
+::tcltest::cleanupTests
+"
+
+close $f
+