diff options
Diffstat (limited to 'tcllib/modules/stringprep/tools/gen_unicode_test.tcl')
-rw-r--r-- | tcllib/modules/stringprep/tools/gen_unicode_test.tcl | 247 |
1 files changed, 247 insertions, 0 deletions
diff --git a/tcllib/modules/stringprep/tools/gen_unicode_test.tcl b/tcllib/modules/stringprep/tools/gen_unicode_test.tcl new file mode 100644 index 0000000..ec9b2c3 --- /dev/null +++ b/tcllib/modules/stringprep/tools/gen_unicode_test.tcl @@ -0,0 +1,247 @@ +#!/usr/bin/tclsh + +# gen_unicode_test.tcl -- +# +# This program parses the RFC 3454 file and generates the +# corresponding unicode.test file with unicode package tests. +# The input to this program should be NormalizationTest.txt. +# It can be downloaded from: +# ftp://ftp.unicode.org/Public/UNIDATA/NormalizationTest.txt +# Short test suite is generated by default. If you want to generate +# all tests (more than 300000 test cases) add suffix 'full' as the +# third argument. +# +# Usage: gen_unicode_test.tcl infile outdir ?full? +# +# RCS: @(#) $Id: gen_unicode_test.tcl,v 1.1 2008/01/29 02:18:10 patthoyts Exp $ + +package require struct::list + +set short_test_list [list \ + "LATIN CAPITAL LETTER D, COMBINING DOT ABOVE, COMBINING DOT BELOW" \ + "NO-BREAK SPACE" \ + "VULGAR FRACTION ONE HALF" \ + "ORIYA LETTER RRA" \ + "KANNADA VOWEL SIGN EE" \ + "TIBETAN LETTER GHA" \ + "MODIFIER LETTER CAPITAL A" \ + "GREEK SMALL LETTER EPSILON WITH PSILI AND OXIA" \ + "KANGXI RADICAL SPROUT" \ + "HIRAGANA LETTER DE" \ + "KATAKANA LETTER PA" \ + "HANGUL LETTER SIOS-PIEUP" \ + "HANGUL SYLLABLE GYANG" \ + "CJK COMPATIBILITY IDEOGRAPH-F98E" \ + "ARABIC LETTER HEH DOACHASHMEE ISOLATED FORM" \ + "ARABIC LIGATURE AIN WITH JEEM ISOLATED FORM" \ + "FULLWIDTH DIGIT THREE" \ + "LATIN SMALL LETTER A, COMBINING CYRILLIC TITLO, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B" \ + "LATIN SMALL LETTER A, DEVANAGARI SIGN NUKTA, COMBINING TILDE OVERLAY, COMBINING RING OVERLAY, LATIN SMALL LETTER B" \ + "HANGUL SYLLABLE BYO, COMBINING TILDE OVERLAY, HANGUL JONGSEONG TIKEUT"] + +set fd [open [lindex $argv 0]] + +set all_tests {} +set n 0 +while {[gets $fd line] >= 0} { + set line [string trim $line] + if {![regexp \ + {^([ [:xdigit:]]+);([ [:xdigit:]]+);([ [:xdigit:]]+);([ [:xdigit:]]+);([ [:xdigit:]]+);.*\) (.*)} \ + $line -> c(1) c(2) c(3) c(4) c(5) title]} continue + + set q 1 + foreach i {1 2 3 4 5} { + set s($i) {} + set us($i) "" + foreach xnum $c($i) { + set uc [scan $xnum %x] + if {$uc > 0xffff} { + set q 0 + } + lappend s($i) $uc + append us($i) \\u$xnum + } + } + if {!$q} { + # Test case contains character which is greater than 0xFFFF and can't + # be represented in Tcl + continue + } + set test($n) [list $s(1) $s(2) $s(3) $s(4) $s(5) $title] + set test1($n) [list $us(1) $us(2) $us(3) $us(4) $us(5) $title] + if {[lsearch $short_test_list $title] >= 0} { + lappend all_tests $n + } + incr n +} + +close $fd + +if {[string equal [lindex $argv 2] full]} { + set all_tests [struct::list iota $n] +} + +set f [open [file join [lindex $argv 1] unicode.test] w] +fconfigure $f -translation lf +puts $f \ +"# unicode.test +# +# Tests for the unicode package. This file is automatically generated by +# the gen_unicode_test.tcl script. Do not modify this file by hands. +# +# RCS: @(#) \$Id\$ + +# ------------------------------------------------------------------------- + +source \[file join \\ + \[file dirname \[file dirname \[file join \[pwd\] \[info script\]\]\]\] \\ + devtools testutilities.tcl\] + +testsNeedTcl 8.3 +testsNeedTcltest 1.0 + +testing { + useLocalFile unicode_data.tcl + useLocalFile unicode.tcl +} + +# ------------------------------------------------------------------------- +" + +set j 0 +foreach i $all_tests { + puts $f \ +" +test unicode-1.[incr j] {normalizeS D: [lindex $test1($i) 5]} { + unicode::normalizeS D \"[lindex $test1($i) 0]\" +} \"[lindex $test1($i) 2]\" + +test unicode-1.[incr j] {normalize D: [lindex $test($i) 5]} { + unicode::normalize D [list [lindex $test($i) 1]] +} {[lindex $test($i) 2]} + +test unicode-1.[incr j] {normalize D: [lindex $test($i) 5]} { + unicode::normalize D [list [lindex $test($i) 2]] +} {[lindex $test($i) 2]} + +test unicode-1.[incr j] {normalize D: [lindex $test($i) 5]} { + unicode::normalize D [list [lindex $test($i) 3]] +} {[lindex $test($i) 4]} + +test unicode-1.[incr j] {normalize D: [lindex $test($i) 5]} { + unicode::normalize D [list [lindex $test($i) 4]] +} {[lindex $test($i) 4]} +" +} + +set j 0 +foreach i $all_tests { + puts $f \ +" +test unicode-2.[incr j] {normalize C: [lindex $test($i) 5]} { + unicode::normalize C [list [lindex $test($i) 0]] +} {[lindex $test($i) 1]} + +test unicode-2.[incr j] {normalizeS C: [lindex $test1($i) 5]} { + unicode::normalizeS C \"[lindex $test1($i) 1]\" +} \"[lindex $test1($i) 1]\" + +test unicode-2.[incr j] {normalize C: [lindex $test($i) 5]} { + unicode::normalize C [list [lindex $test($i) 2]] +} {[lindex $test($i) 1]} + +test unicode-2.[incr j] {normalize C: [lindex $test($i) 5]} { + unicode::normalize C [list [lindex $test($i) 3]] +} {[lindex $test($i) 3]} + +test unicode-2.[incr j] {normalize C: [lindex $test($i) 5]} { + unicode::normalize C [list [lindex $test($i) 4]] +} {[lindex $test($i) 3]} +" +} + +set j 0 +foreach i $all_tests { + puts $f \ +" +test unicode-3.[incr j] {normalize KD: [lindex $test($i) 5]} { + unicode::normalize KD [list [lindex $test($i) 0]] +} {[lindex $test($i) 4]} + +test unicode-3.[incr j] {normalize KD: [lindex $test($i) 5]} { + unicode::normalize KD [list [lindex $test($i) 1]] +} {[lindex $test($i) 4]} + +test unicode-3.[incr j] {normalizeS KD: [lindex $test1($i) 5]} { + unicode::normalizeS KD \"[lindex $test1($i) 2]\" +} \"[lindex $test1($i) 4]\" + +test unicode-3.[incr j] {normalize KD: [lindex $test($i) 5]} { + unicode::normalize KD [list [lindex $test($i) 3]] +} {[lindex $test($i) 4]} + +test unicode-1.[incr j] {normalize KD: [lindex $test($i) 5]} { + unicode::normalize KD [list [lindex $test($i) 4]] +} {[lindex $test($i) 4]} +" +} + +set j 0 +foreach i $all_tests { + puts $f \ +" +test unicode-4.[incr j] {normalize KC: [lindex $test($i) 5]} { + unicode::normalize KC [list [lindex $test($i) 0]] +} {[lindex $test($i) 3]} + +test unicode-4.[incr j] {normalize KC: [lindex $test($i) 5]} { + unicode::normalize KC [list [lindex $test($i) 1]] +} {[lindex $test($i) 3]} + +test unicode-4.[incr j] {normalize KC: [lindex $test($i) 5]} { + unicode::normalize KC [list [lindex $test($i) 2]] +} {[lindex $test($i) 3]} + +test unicode-4.[incr j] {normalizeS KC: [lindex $test1($i) 5]} { + unicode::normalizeS KC \"[lindex $test1($i) 3]\" +} \"[lindex $test1($i) 3]\" + +test unicode-4.[incr j] {normalize KC: [lindex $test($i) 5]} { + unicode::normalize KC [list [lindex $test($i) 4]] +} {[lindex $test($i) 3]} +" +} + +puts $f \ +" +test unicode-5.1 {fromstring} { + unicode::fromstring \"\\u0403\\u0405\\u0406\\u041f\\u0034\" +} {1027 1029 1030 1055 52} + +test unicode-5.2 {fromstring} { + unicode::fromstring \"\\u0001\\u0002\\u0003\\u0004\\u0005\\u0006\\u0007\\u0008\\u0009\\u000a\\u000b\\u000c\\u000d\" +} {1 2 3 4 5 6 7 8 9 10 11 12 13} + +test unicode-6.1 {tostring} { + unicode::tostring {16 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1} +} \"\\u0010\\u000f\\u000e\\u000d\\u000c\\u000b\\u000a\\u0009\\u0008\\u0007\\u0006\\u0005\\u0004\\u0003\\u0002\\u0001\" + +test unicode-6.2 {tostring} { + unicode::tostring {12345 12346 12347 12348 12349 12350 12351} +} \"\\u3039\\u303a\\u303b\\u303c\\u303d\\u303e\\u303f\" + +test unicode-7.1 {normalize bad form} { + catch {unicode::normalize S \"\"} result + set result +} \"::unicode::normalize: Only D, C, KD and KC forms are allowed\" + +test unicode-8.1 {normalizeS bad form} { + catch {unicode::normalizeS S \"\"} result + set result +} \"::unicode::normalizeS: Only D, C, KD and KC forms are allowed\" + +::tcltest::cleanupTests +" + +close $f + |