summaryrefslogtreecommitdiffstats
path: root/tcllib/modules/stringprep/tools/gen_unicode_test.tcl
diff options
context:
space:
mode:
Diffstat (limited to 'tcllib/modules/stringprep/tools/gen_unicode_test.tcl')
-rw-r--r--tcllib/modules/stringprep/tools/gen_unicode_test.tcl247
1 files changed, 247 insertions, 0 deletions
diff --git a/tcllib/modules/stringprep/tools/gen_unicode_test.tcl b/tcllib/modules/stringprep/tools/gen_unicode_test.tcl
new file mode 100644
index 0000000..ec9b2c3
--- /dev/null
+++ b/tcllib/modules/stringprep/tools/gen_unicode_test.tcl
@@ -0,0 +1,247 @@
+#!/usr/bin/tclsh
+
+# gen_unicode_test.tcl --
+#
+# This program parses the RFC 3454 file and generates the
+# corresponding unicode.test file with unicode package tests.
+# The input to this program should be NormalizationTest.txt.
+# It can be downloaded from:
+# ftp://ftp.unicode.org/Public/UNIDATA/NormalizationTest.txt
+# Short test suite is generated by default. If you want to generate
+# all tests (more than 300000 test cases) add suffix 'full' as the
+# third argument.
+#
+# Usage: gen_unicode_test.tcl infile outdir ?full?
+#
+# RCS: @(#) $Id: gen_unicode_test.tcl,v 1.1 2008/01/29 02:18:10 patthoyts Exp $
+
+package require struct::list
+
+set short_test_list [list \
+ "LATIN CAPITAL LETTER D, COMBINING DOT ABOVE, COMBINING DOT BELOW" \
+ "NO-BREAK SPACE" \
+ "VULGAR FRACTION ONE HALF" \
+ "ORIYA LETTER RRA" \
+ "KANNADA VOWEL SIGN EE" \
+ "TIBETAN LETTER GHA" \
+ "MODIFIER LETTER CAPITAL A" \
+ "GREEK SMALL LETTER EPSILON WITH PSILI AND OXIA" \
+ "KANGXI RADICAL SPROUT" \
+ "HIRAGANA LETTER DE" \
+ "KATAKANA LETTER PA" \
+ "HANGUL LETTER SIOS-PIEUP" \
+ "HANGUL SYLLABLE GYANG" \
+ "CJK COMPATIBILITY IDEOGRAPH-F98E" \
+ "ARABIC LETTER HEH DOACHASHMEE ISOLATED FORM" \
+ "ARABIC LIGATURE AIN WITH JEEM ISOLATED FORM" \
+ "FULLWIDTH DIGIT THREE" \
+ "LATIN SMALL LETTER A, COMBINING CYRILLIC TITLO, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B" \
+ "LATIN SMALL LETTER A, DEVANAGARI SIGN NUKTA, COMBINING TILDE OVERLAY, COMBINING RING OVERLAY, LATIN SMALL LETTER B" \
+ "HANGUL SYLLABLE BYO, COMBINING TILDE OVERLAY, HANGUL JONGSEONG TIKEUT"]
+
+set fd [open [lindex $argv 0]]
+
+set all_tests {}
+set n 0
+while {[gets $fd line] >= 0} {
+ set line [string trim $line]
+ if {![regexp \
+ {^([ [:xdigit:]]+);([ [:xdigit:]]+);([ [:xdigit:]]+);([ [:xdigit:]]+);([ [:xdigit:]]+);.*\) (.*)} \
+ $line -> c(1) c(2) c(3) c(4) c(5) title]} continue
+
+ set q 1
+ foreach i {1 2 3 4 5} {
+ set s($i) {}
+ set us($i) ""
+ foreach xnum $c($i) {
+ set uc [scan $xnum %x]
+ if {$uc > 0xffff} {
+ set q 0
+ }
+ lappend s($i) $uc
+ append us($i) \\u$xnum
+ }
+ }
+ if {!$q} {
+ # Test case contains character which is greater than 0xFFFF and can't
+ # be represented in Tcl
+ continue
+ }
+ set test($n) [list $s(1) $s(2) $s(3) $s(4) $s(5) $title]
+ set test1($n) [list $us(1) $us(2) $us(3) $us(4) $us(5) $title]
+ if {[lsearch $short_test_list $title] >= 0} {
+ lappend all_tests $n
+ }
+ incr n
+}
+
+close $fd
+
+if {[string equal [lindex $argv 2] full]} {
+ set all_tests [struct::list iota $n]
+}
+
+set f [open [file join [lindex $argv 1] unicode.test] w]
+fconfigure $f -translation lf
+puts $f \
+"# unicode.test
+#
+# Tests for the unicode package. This file is automatically generated by
+# the gen_unicode_test.tcl script. Do not modify this file by hands.
+#
+# RCS: @(#) \$Id\$
+
+# -------------------------------------------------------------------------
+
+source \[file join \\
+ \[file dirname \[file dirname \[file join \[pwd\] \[info script\]\]\]\] \\
+ devtools testutilities.tcl\]
+
+testsNeedTcl 8.3
+testsNeedTcltest 1.0
+
+testing {
+ useLocalFile unicode_data.tcl
+ useLocalFile unicode.tcl
+}
+
+# -------------------------------------------------------------------------
+"
+
+set j 0
+foreach i $all_tests {
+ puts $f \
+"
+test unicode-1.[incr j] {normalizeS D: [lindex $test1($i) 5]} {
+ unicode::normalizeS D \"[lindex $test1($i) 0]\"
+} \"[lindex $test1($i) 2]\"
+
+test unicode-1.[incr j] {normalize D: [lindex $test($i) 5]} {
+ unicode::normalize D [list [lindex $test($i) 1]]
+} {[lindex $test($i) 2]}
+
+test unicode-1.[incr j] {normalize D: [lindex $test($i) 5]} {
+ unicode::normalize D [list [lindex $test($i) 2]]
+} {[lindex $test($i) 2]}
+
+test unicode-1.[incr j] {normalize D: [lindex $test($i) 5]} {
+ unicode::normalize D [list [lindex $test($i) 3]]
+} {[lindex $test($i) 4]}
+
+test unicode-1.[incr j] {normalize D: [lindex $test($i) 5]} {
+ unicode::normalize D [list [lindex $test($i) 4]]
+} {[lindex $test($i) 4]}
+"
+}
+
+set j 0
+foreach i $all_tests {
+ puts $f \
+"
+test unicode-2.[incr j] {normalize C: [lindex $test($i) 5]} {
+ unicode::normalize C [list [lindex $test($i) 0]]
+} {[lindex $test($i) 1]}
+
+test unicode-2.[incr j] {normalizeS C: [lindex $test1($i) 5]} {
+ unicode::normalizeS C \"[lindex $test1($i) 1]\"
+} \"[lindex $test1($i) 1]\"
+
+test unicode-2.[incr j] {normalize C: [lindex $test($i) 5]} {
+ unicode::normalize C [list [lindex $test($i) 2]]
+} {[lindex $test($i) 1]}
+
+test unicode-2.[incr j] {normalize C: [lindex $test($i) 5]} {
+ unicode::normalize C [list [lindex $test($i) 3]]
+} {[lindex $test($i) 3]}
+
+test unicode-2.[incr j] {normalize C: [lindex $test($i) 5]} {
+ unicode::normalize C [list [lindex $test($i) 4]]
+} {[lindex $test($i) 3]}
+"
+}
+
+set j 0
+foreach i $all_tests {
+ puts $f \
+"
+test unicode-3.[incr j] {normalize KD: [lindex $test($i) 5]} {
+ unicode::normalize KD [list [lindex $test($i) 0]]
+} {[lindex $test($i) 4]}
+
+test unicode-3.[incr j] {normalize KD: [lindex $test($i) 5]} {
+ unicode::normalize KD [list [lindex $test($i) 1]]
+} {[lindex $test($i) 4]}
+
+test unicode-3.[incr j] {normalizeS KD: [lindex $test1($i) 5]} {
+ unicode::normalizeS KD \"[lindex $test1($i) 2]\"
+} \"[lindex $test1($i) 4]\"
+
+test unicode-3.[incr j] {normalize KD: [lindex $test($i) 5]} {
+ unicode::normalize KD [list [lindex $test($i) 3]]
+} {[lindex $test($i) 4]}
+
+test unicode-1.[incr j] {normalize KD: [lindex $test($i) 5]} {
+ unicode::normalize KD [list [lindex $test($i) 4]]
+} {[lindex $test($i) 4]}
+"
+}
+
+set j 0
+foreach i $all_tests {
+ puts $f \
+"
+test unicode-4.[incr j] {normalize KC: [lindex $test($i) 5]} {
+ unicode::normalize KC [list [lindex $test($i) 0]]
+} {[lindex $test($i) 3]}
+
+test unicode-4.[incr j] {normalize KC: [lindex $test($i) 5]} {
+ unicode::normalize KC [list [lindex $test($i) 1]]
+} {[lindex $test($i) 3]}
+
+test unicode-4.[incr j] {normalize KC: [lindex $test($i) 5]} {
+ unicode::normalize KC [list [lindex $test($i) 2]]
+} {[lindex $test($i) 3]}
+
+test unicode-4.[incr j] {normalizeS KC: [lindex $test1($i) 5]} {
+ unicode::normalizeS KC \"[lindex $test1($i) 3]\"
+} \"[lindex $test1($i) 3]\"
+
+test unicode-4.[incr j] {normalize KC: [lindex $test($i) 5]} {
+ unicode::normalize KC [list [lindex $test($i) 4]]
+} {[lindex $test($i) 3]}
+"
+}
+
+puts $f \
+"
+test unicode-5.1 {fromstring} {
+ unicode::fromstring \"\\u0403\\u0405\\u0406\\u041f\\u0034\"
+} {1027 1029 1030 1055 52}
+
+test unicode-5.2 {fromstring} {
+ unicode::fromstring \"\\u0001\\u0002\\u0003\\u0004\\u0005\\u0006\\u0007\\u0008\\u0009\\u000a\\u000b\\u000c\\u000d\"
+} {1 2 3 4 5 6 7 8 9 10 11 12 13}
+
+test unicode-6.1 {tostring} {
+ unicode::tostring {16 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1}
+} \"\\u0010\\u000f\\u000e\\u000d\\u000c\\u000b\\u000a\\u0009\\u0008\\u0007\\u0006\\u0005\\u0004\\u0003\\u0002\\u0001\"
+
+test unicode-6.2 {tostring} {
+ unicode::tostring {12345 12346 12347 12348 12349 12350 12351}
+} \"\\u3039\\u303a\\u303b\\u303c\\u303d\\u303e\\u303f\"
+
+test unicode-7.1 {normalize bad form} {
+ catch {unicode::normalize S \"\"} result
+ set result
+} \"::unicode::normalize: Only D, C, KD and KC forms are allowed\"
+
+test unicode-8.1 {normalizeS bad form} {
+ catch {unicode::normalizeS S \"\"} result
+ set result
+} \"::unicode::normalizeS: Only D, C, KD and KC forms are allowed\"
+
+::tcltest::cleanupTests
+"
+
+close $f
+