diff options
author | jan.nijtmans <nijtmans@users.sourceforge.net> | 2024-06-27 21:50:59 (GMT) |
---|---|---|
committer | jan.nijtmans <nijtmans@users.sourceforge.net> | 2024-06-27 21:50:59 (GMT) |
commit | 811b044eb5691553abaa86cf5b70f0f6f6ad20c5 (patch) | |
tree | d50ee069b257173bdc04fb85ae14a1abe24ae7cc /tools | |
parent | 0d26404eeee85995537532be98ec07a77fbba0e6 (diff) | |
parent | b99f44e2b60aa470e6f3b2c7f119b3f1a77f8606 (diff) | |
download | tcl-811b044eb5691553abaa86cf5b70f0f6f6ad20c5.zip tcl-811b044eb5691553abaa86cf5b70f0f6f6ad20c5.tar.gz tcl-811b044eb5691553abaa86cf5b70f0f6f6ad20c5.tar.bz2 |
Merge 8.7. Random indent fixes
Diffstat (limited to 'tools')
-rw-r--r-- | tools/ucm2tests.tcl | 384 |
1 files changed, 192 insertions, 192 deletions
diff --git a/tools/ucm2tests.tcl b/tools/ucm2tests.tcl index dc878ef..3c1b83f 100644 --- a/tools/ucm2tests.tcl +++ b/tools/ucm2tests.tcl @@ -16,31 +16,31 @@ namespace eval ucm { # Map Tcl encoding name to ICU UCM file name variable encNameMap array set encNameMap { - cp1250 glibc-CP1250-2.1.2 - cp1251 glibc-CP1251-2.1.2 - cp1252 glibc-CP1252-2.1.2 - cp1253 glibc-CP1253-2.1.2 - cp1254 glibc-CP1254-2.1.2 - cp1255 glibc-CP1255-2.1.2 - cp1256 glibc-CP1256-2.1.2 - cp1257 glibc-CP1257-2.1.2 - cp1258 glibc-CP1258-2.1.2 - gb1988 glibc-GB_1988_80-2.3.3 - iso8859-1 glibc-ISO_8859_1-2.1.2 - iso8859-2 glibc-ISO_8859_2-2.1.2 - iso8859-3 glibc-ISO_8859_3-2.1.2 - iso8859-4 glibc-ISO_8859_4-2.1.2 - iso8859-5 glibc-ISO_8859_5-2.1.2 - iso8859-6 glibc-ISO_8859_6-2.1.2 - iso8859-7 glibc-ISO_8859_7-2.3.3 - iso8859-8 glibc-ISO_8859_8-2.3.3 - iso8859-9 glibc-ISO_8859_9-2.1.2 - iso8859-10 glibc-ISO_8859_10-2.1.2 - iso8859-11 glibc-ISO_8859_11-2.1.2 - iso8859-13 glibc-ISO_8859_13-2.3.3 - iso8859-14 glibc-ISO_8859_14-2.1.2 - iso8859-15 glibc-ISO_8859_15-2.1.2 - iso8859-16 glibc-ISO_8859_16-2.3.3 + cp1250 glibc-CP1250-2.1.2 + cp1251 glibc-CP1251-2.1.2 + cp1252 glibc-CP1252-2.1.2 + cp1253 glibc-CP1253-2.1.2 + cp1254 glibc-CP1254-2.1.2 + cp1255 glibc-CP1255-2.1.2 + cp1256 glibc-CP1256-2.1.2 + cp1257 glibc-CP1257-2.1.2 + cp1258 glibc-CP1258-2.1.2 + gb1988 glibc-GB_1988_80-2.3.3 + iso8859-1 glibc-ISO_8859_1-2.1.2 + iso8859-2 glibc-ISO_8859_2-2.1.2 + iso8859-3 glibc-ISO_8859_3-2.1.2 + iso8859-4 glibc-ISO_8859_4-2.1.2 + iso8859-5 glibc-ISO_8859_5-2.1.2 + iso8859-6 glibc-ISO_8859_6-2.1.2 + iso8859-7 glibc-ISO_8859_7-2.3.3 + iso8859-8 glibc-ISO_8859_8-2.3.3 + iso8859-9 glibc-ISO_8859_9-2.1.2 + iso8859-10 glibc-ISO_8859_10-2.1.2 + iso8859-11 glibc-ISO_8859_11-2.1.2 + iso8859-13 glibc-ISO_8859_13-2.3.3 + iso8859-14 glibc-ISO_8859_14-2.1.2 + iso8859-15 glibc-ISO_8859_15-2.1.2 + iso8859-16 glibc-ISO_8859_16-2.3.3 } # Array keyed by Tcl encoding name. Each element contains mapping of @@ -72,8 +72,8 @@ proc ucm::warn {msg} { proc ucm::log {msg} { variable verbose if {$verbose} { - variable errorChan - puts $errorChan $msg + variable errorChan + puts $errorChan $msg } } proc ucm::print {s} { @@ -88,21 +88,21 @@ proc ucm::parse_SBCS {encName fd} { set result {} while {[gets $fd line] >= 0} { - if {[string match #* $line]} { - continue - } - if {[string equal "END CHARMAP" [string trim $line]]} { - break - } - if {![regexp {^\s*<U([[:xdigit:]]{4})>\s*((\\x[[:xdigit:]]{2})+)\s*(\|(0|1|2|3|4))} $line -> unichar bytes - - precision]} { - error "Unexpected line parsing SBCS: $line" - } - set bytes [string map {\\x {}} $bytes]; # \xNN -> NN - if {$precision eq "" || $precision eq "0"} { - lappend result $unichar $bytes - } else { - # It is a fallback mapping - ignore - } + if {[string match #* $line]} { + continue + } + if {[string equal "END CHARMAP" [string trim $line]]} { + break + } + if {![regexp {^\s*<U([[:xdigit:]]{4})>\s*((\\x[[:xdigit:]]{2})+)\s*(\|(0|1|2|3|4))} $line -> unichar bytes - - precision]} { + error "Unexpected line parsing SBCS: $line" + } + set bytes [string map {\\x {}} $bytes]; # \xNN -> NN + if {$precision eq "" || $precision eq "0"} { + lappend result $unichar $bytes + } else { + # It is a fallback mapping - ignore + } } set charMap($encName) $result @@ -110,33 +110,33 @@ proc ucm::parse_SBCS {encName fd} { set valid {} set mapped {} foreach {unich bytes} $result { - lappend mapped $unich - lappend valid $bytes + lappend mapped $unich + lappend valid $bytes } set invalidCodeSequences($encName) {} for {set i 0} {$i <= 255} {incr i} { - set hex [format %.2X $i] - if {[lsearch -exact $valid $hex] < 0} { - lappend invalidCodeSequences($encName) $hex - } + set hex [format %.2X $i] + if {[lsearch -exact $valid $hex] < 0} { + lappend invalidCodeSequences($encName) $hex + } } set unmappedCodePoints($encName) {} for {set i 0} {$i <= 65535} {incr i} { - set hex [format %.4X $i] - if {[lsearch -exact $mapped $hex] < 0} { - lappend unmappedCodePoints($encName) $hex - # Only look for (at most) one below 256 and one above 1024 - if {$i < 255} { - # Found one so jump past 8 bits - set i 255 - } else { - break - } - } - if {$i == 255} { - set i 1023 - } + set hex [format %.4X $i] + if {[lsearch -exact $mapped $hex] < 0} { + lappend unmappedCodePoints($encName) $hex + # Only look for (at most) one below 256 and one above 1024 + if {$i < 255} { + # Found one so jump past 8 bits + set i 255 + } else { + break + } + } + if {$i == 255} { + set i 1023 + } } lappend unmappedCodePoints($encName) D800 DC00 10000 10FFFF } @@ -153,41 +153,41 @@ proc ucm::generate_boilerplate {} { proc ucmConvertfromMismatches {enc map} { set mismatches {} foreach {unihex hex} $map { - set unihex [string range 00000000$unihex end-7 end]; # Make 8 digits - set unich [subst "\\U$unihex"] - if {[encoding convertfrom -profile strict $enc [binary decode hex $hex]] ne $unich} { - lappend mismatches "<[printable $unich],$hex>" - } + set unihex [string range 00000000$unihex end-7 end]; # Make 8 digits + set unich [subst "\\U$unihex"] + if {[encoding convertfrom -profile strict $enc [binary decode hex $hex]] ne $unich} { + lappend mismatches "<[printable $unich],$hex>" + } } return $mismatches } proc ucmConverttoMismatches {enc map} { set mismatches {} foreach {unihex hex} $map { - set unihex [string range 00000000$unihex end-7 end]; # Make 8 digits - set unich [subst "\\U$unihex"] - if {[encoding convertto -profile strict $enc $unich] ne [binary decode hex $hex]} { - lappend mismatches "<[printable $unich],$hex>" - } + set unihex [string range 00000000$unihex end-7 end]; # Make 8 digits + set unich [subst "\\U$unihex"] + if {[encoding convertto -profile strict $enc $unich] ne [binary decode hex $hex]} { + lappend mismatches "<[printable $unich],$hex>" + } } return $mismatches } if {[info commands printable] eq ""} { proc printable {s} { - set print "" - foreach c [split $s ""] { - set i [scan $c %c] - if {[string is print $c] && ($i <= 127)} { - append print $c - } elseif {$i <= 0xff} { - append print \\x[format %02X $i] - } elseif {$i <= 0xffff} { - append print \\u[format %04X $i] - } else { - append print \\U[format %08X $i] - } - } - return $print + set print "" + foreach c [split $s ""] { + set i [scan $c %c] + if {[string is print $c] && ($i <= 127)} { + append print $c + } elseif {$i <= 0xff} { + append print \\x[format %02X $i] + } elseif {$i <= 0xffff} { + append print \\u[format %04X $i] + } else { + append print \\U[format %08X $i] + } + } + return $print } } } @@ -203,79 +203,79 @@ proc ucm::generate_tests {} { variable encSubchar if {[info exists outputPath]} { - set outputChan [open $outputPath w] - fconfigure $outputChan -translation lf + set outputChan [open $outputPath w] + fconfigure $outputChan -translation lf } else { - set outputChan stdout + set outputChan stdout } array set tclNames {} foreach encName [encoding names] { - set tclNames($encName) "" + set tclNames($encName) "" } generate_boilerplate foreach encName [lsort -dictionary [array names encNameMap]] { - if {![info exists charMap($encName)]} { - warn "No character map read for $encName" - continue - } - unset tclNames($encName) + if {![info exists charMap($encName)]} { + warn "No character map read for $encName" + continue + } + unset tclNames($encName) - # Print the valid tests - print "\n#\n# $encName (generated from $encNameMap($encName))" - print "\ntest encoding-convertfrom-ucmCompare-$encName {Compare against ICU UCM} -body \{" - print " ucmConvertfromMismatches $encName {$charMap($encName)}" - print "\} -result {}" - print "\ntest encoding-convertto-ucmCompare-$encName {Compare against ICU UCM} -body \{" - print " ucmConverttoMismatches $encName {$charMap($encName)}" - print "\} -result {}" - if {0} { - # This will generate individual tests for every char - # and test in lead, tail, middle, solo configurations - # but takes considerable time - print "lappend encValidStrings \{*\}\{" - foreach {unich hex} $charMap($encName) { - print " $encName \\u$unich $hex {} {}" - } - print "\}; # $encName" - } + # Print the valid tests + print "\n#\n# $encName (generated from $encNameMap($encName))" + print "\ntest encoding-convertfrom-ucmCompare-$encName {Compare against ICU UCM} -body \{" + print " ucmConvertfromMismatches $encName {$charMap($encName)}" + print "\} -result {}" + print "\ntest encoding-convertto-ucmCompare-$encName {Compare against ICU UCM} -body \{" + print " ucmConverttoMismatches $encName {$charMap($encName)}" + print "\} -result {}" + if {0} { + # This will generate individual tests for every char + # and test in lead, tail, middle, solo configurations + # but takes considerable time + print "lappend encValidStrings \{*\}\{" + foreach {unich hex} $charMap($encName) { + print " $encName \\u$unich $hex {} {}" + } + print "\}; # $encName" + } - # Generate the invalidity checks - print "\n# $encName - invalid byte sequences" - print "lappend encInvalidBytes \{*\}\{" - foreach hex $invalidCodeSequences($encName) { - # Map XXXX... to \xXX\xXX... - set uhex [regsub -all .. $hex {\\x\0}] - set uhex \\U[string range 00000000$hex end-7 end] - print " $encName $hex tcl8 $uhex -1 {} {}" - print " $encName $hex replace \\uFFFD -1 {} {}" - print " $encName $hex strict {} 0 {} {}" - } - print "\}; # $encName" + # Generate the invalidity checks + print "\n# $encName - invalid byte sequences" + print "lappend encInvalidBytes \{*\}\{" + foreach hex $invalidCodeSequences($encName) { + # Map XXXX... to \xXX\xXX... + set uhex [regsub -all .. $hex {\\x\0}] + set uhex \\U[string range 00000000$hex end-7 end] + print " $encName $hex tcl8 $uhex -1 {} {}" + print " $encName $hex replace \\uFFFD -1 {} {}" + print " $encName $hex strict {} 0 {} {}" + } + print "\}; # $encName" - print "\n# $encName - invalid byte sequences" - print "lappend encUnencodableStrings \{*\}\{" - if {[info exists encSubchar($encName)]} { - set subchar $encSubchar($encName) - } else { - set subchar "3F"; # Tcl uses ? by default - } - foreach hex $unmappedCodePoints($encName) { - set uhex \\U[string range 00000000$hex end-7 end] - print " $encName $uhex tcl8 $subchar -1 {} {}" - print " $encName $uhex replace $subchar -1 {} {}" - print " $encName $uhex strict {} 0 {} {}" - } - print "\}; # $encName" + print "\n# $encName - invalid byte sequences" + print "lappend encUnencodableStrings \{*\}\{" + if {[info exists encSubchar($encName)]} { + set subchar $encSubchar($encName) + } else { + set subchar "3F"; # Tcl uses ? by default + } + foreach hex $unmappedCodePoints($encName) { + set uhex \\U[string range 00000000$hex end-7 end] + print " $encName $uhex tcl8 $subchar -1 {} {}" + print " $encName $uhex replace $subchar -1 {} {}" + print " $encName $uhex strict {} 0 {} {}" + } + print "\}; # $encName" } if {[array size tclNames]} { - warn "Missing encoding: [lsort [array names tclNames]]" + warn "Missing encoding: [lsort [array names tclNames]]" } if {[info exists outputPath]} { - close $outputChan - unset outputChan + close $outputChan + unset outputChan } } @@ -285,51 +285,51 @@ proc ucm::parse_file {encName ucmPath} { set fd [open $ucmPath] try { - # Parse the metadata - unset -nocomplain state - while {[gets $fd line] >= 0} { - if {[regexp {<(code_set_name|mb_cur_max|mb_cur_min|uconv_class|subchar)>\s+(\S+)} $line -> key val]} { - set state($key) $val - } elseif {[regexp {^\s*CHARMAP\s*$} $line]} { - set state(charmap) "" - break - } else { - # Skip all else - } - } - if {![info exists state(charmap)]} { - abort "Error: $ucmPath has No CHARMAP line." - } - foreach key {code_set_name uconv_class} { - if {[info exists state($key)]} { - set state($key) [string trim $state($key) {"}] - } - } - if {[info exists charMap($encName)]} { - abort "Duplicate file for $encName ($path)" - } - if {![info exists state(uconv_class)]} { - abort "Error: $ucmPath has no uconv_class definition." - } - if {[info exists state(subchar)]} { - # \xNN\xNN.. -> NNNN.. - set encSubchar($encName) [string map {\\x {}} $state(subchar)] - } - switch -exact -- $state(uconv_class) { - SBCS { - if {[catch { - parse_SBCS $encName $fd - } result]} { - abort "Could not process $ucmPath. $result" - } - } - default { - log "Skipping $ucmPath -- not SBCS encoding." - return - } - } + # Parse the metadata + unset -nocomplain state + while {[gets $fd line] >= 0} { + if {[regexp {<(code_set_name|mb_cur_max|mb_cur_min|uconv_class|subchar)>\s+(\S+)} $line -> key val]} { + set state($key) $val + } elseif {[regexp {^\s*CHARMAP\s*$} $line]} { + set state(charmap) "" + break + } else { + # Skip all else + } + } + if {![info exists state(charmap)]} { + abort "Error: $ucmPath has No CHARMAP line." + } + foreach key {code_set_name uconv_class} { + if {[info exists state($key)]} { + set state($key) [string trim $state($key) {"}] + } + } + if {[info exists charMap($encName)]} { + abort "Duplicate file for $encName ($path)" + } + if {![info exists state(uconv_class)]} { + abort "Error: $ucmPath has no uconv_class definition." + } + if {[info exists state(subchar)]} { + # \xNN\xNN.. -> NNNN.. + set encSubchar($encName) [string map {\\x {}} $state(subchar)] + } + switch -exact -- $state(uconv_class) { + SBCS { + if {[catch { + parse_SBCS $encName $fd + } result]} { + abort "Could not process $ucmPath. $result" + } + } + default { + log "Skipping $ucmPath -- not SBCS encoding." + return + } + } } finally { - close $fd + close $fd } } @@ -337,14 +337,14 @@ proc ucm::run {} { variable encNameMap variable outputPath switch [llength $::argv] { - 2 {set outputPath [lindex $::argv 1]} - 1 {} - default { - abort "Usage: [info nameofexecutable] $::argv0 path/to/icu/ucm/data ?outputfile?" - } + 2 {set outputPath [lindex $::argv 1]} + 1 {} + default { + abort "Usage: [info nameofexecutable] $::argv0 path/to/icu/ucm/data ?outputfile?" + } } foreach {encName fname} [array get encNameMap] { - ucm::parse_file $encName [file join [lindex $::argv 0] ${fname}.ucm] + ucm::parse_file $encName [file join [lindex $::argv 0] ${fname}.ucm] } generate_tests } |