summaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
authorjan.nijtmans <nijtmans@users.sourceforge.net>2024-06-27 21:50:59 (GMT)
committerjan.nijtmans <nijtmans@users.sourceforge.net>2024-06-27 21:50:59 (GMT)
commit811b044eb5691553abaa86cf5b70f0f6f6ad20c5 (patch)
treed50ee069b257173bdc04fb85ae14a1abe24ae7cc /tools
parent0d26404eeee85995537532be98ec07a77fbba0e6 (diff)
parentb99f44e2b60aa470e6f3b2c7f119b3f1a77f8606 (diff)
downloadtcl-811b044eb5691553abaa86cf5b70f0f6f6ad20c5.zip
tcl-811b044eb5691553abaa86cf5b70f0f6f6ad20c5.tar.gz
tcl-811b044eb5691553abaa86cf5b70f0f6f6ad20c5.tar.bz2
Merge 8.7. Random indent fixes
Diffstat (limited to 'tools')
-rw-r--r--tools/ucm2tests.tcl384
1 files changed, 192 insertions, 192 deletions
diff --git a/tools/ucm2tests.tcl b/tools/ucm2tests.tcl
index dc878ef..3c1b83f 100644
--- a/tools/ucm2tests.tcl
+++ b/tools/ucm2tests.tcl
@@ -16,31 +16,31 @@ namespace eval ucm {
# Map Tcl encoding name to ICU UCM file name
variable encNameMap
array set encNameMap {
- cp1250 glibc-CP1250-2.1.2
- cp1251 glibc-CP1251-2.1.2
- cp1252 glibc-CP1252-2.1.2
- cp1253 glibc-CP1253-2.1.2
- cp1254 glibc-CP1254-2.1.2
- cp1255 glibc-CP1255-2.1.2
- cp1256 glibc-CP1256-2.1.2
- cp1257 glibc-CP1257-2.1.2
- cp1258 glibc-CP1258-2.1.2
- gb1988 glibc-GB_1988_80-2.3.3
- iso8859-1 glibc-ISO_8859_1-2.1.2
- iso8859-2 glibc-ISO_8859_2-2.1.2
- iso8859-3 glibc-ISO_8859_3-2.1.2
- iso8859-4 glibc-ISO_8859_4-2.1.2
- iso8859-5 glibc-ISO_8859_5-2.1.2
- iso8859-6 glibc-ISO_8859_6-2.1.2
- iso8859-7 glibc-ISO_8859_7-2.3.3
- iso8859-8 glibc-ISO_8859_8-2.3.3
- iso8859-9 glibc-ISO_8859_9-2.1.2
- iso8859-10 glibc-ISO_8859_10-2.1.2
- iso8859-11 glibc-ISO_8859_11-2.1.2
- iso8859-13 glibc-ISO_8859_13-2.3.3
- iso8859-14 glibc-ISO_8859_14-2.1.2
- iso8859-15 glibc-ISO_8859_15-2.1.2
- iso8859-16 glibc-ISO_8859_16-2.3.3
+ cp1250 glibc-CP1250-2.1.2
+ cp1251 glibc-CP1251-2.1.2
+ cp1252 glibc-CP1252-2.1.2
+ cp1253 glibc-CP1253-2.1.2
+ cp1254 glibc-CP1254-2.1.2
+ cp1255 glibc-CP1255-2.1.2
+ cp1256 glibc-CP1256-2.1.2
+ cp1257 glibc-CP1257-2.1.2
+ cp1258 glibc-CP1258-2.1.2
+ gb1988 glibc-GB_1988_80-2.3.3
+ iso8859-1 glibc-ISO_8859_1-2.1.2
+ iso8859-2 glibc-ISO_8859_2-2.1.2
+ iso8859-3 glibc-ISO_8859_3-2.1.2
+ iso8859-4 glibc-ISO_8859_4-2.1.2
+ iso8859-5 glibc-ISO_8859_5-2.1.2
+ iso8859-6 glibc-ISO_8859_6-2.1.2
+ iso8859-7 glibc-ISO_8859_7-2.3.3
+ iso8859-8 glibc-ISO_8859_8-2.3.3
+ iso8859-9 glibc-ISO_8859_9-2.1.2
+ iso8859-10 glibc-ISO_8859_10-2.1.2
+ iso8859-11 glibc-ISO_8859_11-2.1.2
+ iso8859-13 glibc-ISO_8859_13-2.3.3
+ iso8859-14 glibc-ISO_8859_14-2.1.2
+ iso8859-15 glibc-ISO_8859_15-2.1.2
+ iso8859-16 glibc-ISO_8859_16-2.3.3
}
# Array keyed by Tcl encoding name. Each element contains mapping of
@@ -72,8 +72,8 @@ proc ucm::warn {msg} {
proc ucm::log {msg} {
variable verbose
if {$verbose} {
- variable errorChan
- puts $errorChan $msg
+ variable errorChan
+ puts $errorChan $msg
}
}
proc ucm::print {s} {
@@ -88,21 +88,21 @@ proc ucm::parse_SBCS {encName fd} {
set result {}
while {[gets $fd line] >= 0} {
- if {[string match #* $line]} {
- continue
- }
- if {[string equal "END CHARMAP" [string trim $line]]} {
- break
- }
- if {![regexp {^\s*<U([[:xdigit:]]{4})>\s*((\\x[[:xdigit:]]{2})+)\s*(\|(0|1|2|3|4))} $line -> unichar bytes - - precision]} {
- error "Unexpected line parsing SBCS: $line"
- }
- set bytes [string map {\\x {}} $bytes]; # \xNN -> NN
- if {$precision eq "" || $precision eq "0"} {
- lappend result $unichar $bytes
- } else {
- # It is a fallback mapping - ignore
- }
+ if {[string match #* $line]} {
+ continue
+ }
+ if {[string equal "END CHARMAP" [string trim $line]]} {
+ break
+ }
+ if {![regexp {^\s*<U([[:xdigit:]]{4})>\s*((\\x[[:xdigit:]]{2})+)\s*(\|(0|1|2|3|4))} $line -> unichar bytes - - precision]} {
+ error "Unexpected line parsing SBCS: $line"
+ }
+ set bytes [string map {\\x {}} $bytes]; # \xNN -> NN
+ if {$precision eq "" || $precision eq "0"} {
+ lappend result $unichar $bytes
+ } else {
+ # It is a fallback mapping - ignore
+ }
}
set charMap($encName) $result
@@ -110,33 +110,33 @@ proc ucm::parse_SBCS {encName fd} {
set valid {}
set mapped {}
foreach {unich bytes} $result {
- lappend mapped $unich
- lappend valid $bytes
+ lappend mapped $unich
+ lappend valid $bytes
}
set invalidCodeSequences($encName) {}
for {set i 0} {$i <= 255} {incr i} {
- set hex [format %.2X $i]
- if {[lsearch -exact $valid $hex] < 0} {
- lappend invalidCodeSequences($encName) $hex
- }
+ set hex [format %.2X $i]
+ if {[lsearch -exact $valid $hex] < 0} {
+ lappend invalidCodeSequences($encName) $hex
+ }
}
set unmappedCodePoints($encName) {}
for {set i 0} {$i <= 65535} {incr i} {
- set hex [format %.4X $i]
- if {[lsearch -exact $mapped $hex] < 0} {
- lappend unmappedCodePoints($encName) $hex
- # Only look for (at most) one below 256 and one above 1024
- if {$i < 255} {
- # Found one so jump past 8 bits
- set i 255
- } else {
- break
- }
- }
- if {$i == 255} {
- set i 1023
- }
+ set hex [format %.4X $i]
+ if {[lsearch -exact $mapped $hex] < 0} {
+ lappend unmappedCodePoints($encName) $hex
+ # Only look for (at most) one below 256 and one above 1024
+ if {$i < 255} {
+ # Found one so jump past 8 bits
+ set i 255
+ } else {
+ break
+ }
+ }
+ if {$i == 255} {
+ set i 1023
+ }
}
lappend unmappedCodePoints($encName) D800 DC00 10000 10FFFF
}
@@ -153,41 +153,41 @@ proc ucm::generate_boilerplate {} {
proc ucmConvertfromMismatches {enc map} {
set mismatches {}
foreach {unihex hex} $map {
- set unihex [string range 00000000$unihex end-7 end]; # Make 8 digits
- set unich [subst "\\U$unihex"]
- if {[encoding convertfrom -profile strict $enc [binary decode hex $hex]] ne $unich} {
- lappend mismatches "<[printable $unich],$hex>"
- }
+ set unihex [string range 00000000$unihex end-7 end]; # Make 8 digits
+ set unich [subst "\\U$unihex"]
+ if {[encoding convertfrom -profile strict $enc [binary decode hex $hex]] ne $unich} {
+ lappend mismatches "<[printable $unich],$hex>"
+ }
}
return $mismatches
}
proc ucmConverttoMismatches {enc map} {
set mismatches {}
foreach {unihex hex} $map {
- set unihex [string range 00000000$unihex end-7 end]; # Make 8 digits
- set unich [subst "\\U$unihex"]
- if {[encoding convertto -profile strict $enc $unich] ne [binary decode hex $hex]} {
- lappend mismatches "<[printable $unich],$hex>"
- }
+ set unihex [string range 00000000$unihex end-7 end]; # Make 8 digits
+ set unich [subst "\\U$unihex"]
+ if {[encoding convertto -profile strict $enc $unich] ne [binary decode hex $hex]} {
+ lappend mismatches "<[printable $unich],$hex>"
+ }
}
return $mismatches
}
if {[info commands printable] eq ""} {
proc printable {s} {
- set print ""
- foreach c [split $s ""] {
- set i [scan $c %c]
- if {[string is print $c] && ($i <= 127)} {
- append print $c
- } elseif {$i <= 0xff} {
- append print \\x[format %02X $i]
- } elseif {$i <= 0xffff} {
- append print \\u[format %04X $i]
- } else {
- append print \\U[format %08X $i]
- }
- }
- return $print
+ set print ""
+ foreach c [split $s ""] {
+ set i [scan $c %c]
+ if {[string is print $c] && ($i <= 127)} {
+ append print $c
+ } elseif {$i <= 0xff} {
+ append print \\x[format %02X $i]
+ } elseif {$i <= 0xffff} {
+ append print \\u[format %04X $i]
+ } else {
+ append print \\U[format %08X $i]
+ }
+ }
+ return $print
}
}
}
@@ -203,79 +203,79 @@ proc ucm::generate_tests {} {
variable encSubchar
if {[info exists outputPath]} {
- set outputChan [open $outputPath w]
- fconfigure $outputChan -translation lf
+ set outputChan [open $outputPath w]
+ fconfigure $outputChan -translation lf
} else {
- set outputChan stdout
+ set outputChan stdout
}
array set tclNames {}
foreach encName [encoding names] {
- set tclNames($encName) ""
+ set tclNames($encName) ""
}
generate_boilerplate
foreach encName [lsort -dictionary [array names encNameMap]] {
- if {![info exists charMap($encName)]} {
- warn "No character map read for $encName"
- continue
- }
- unset tclNames($encName)
+ if {![info exists charMap($encName)]} {
+ warn "No character map read for $encName"
+ continue
+ }
+ unset tclNames($encName)
- # Print the valid tests
- print "\n#\n# $encName (generated from $encNameMap($encName))"
- print "\ntest encoding-convertfrom-ucmCompare-$encName {Compare against ICU UCM} -body \{"
- print " ucmConvertfromMismatches $encName {$charMap($encName)}"
- print "\} -result {}"
- print "\ntest encoding-convertto-ucmCompare-$encName {Compare against ICU UCM} -body \{"
- print " ucmConverttoMismatches $encName {$charMap($encName)}"
- print "\} -result {}"
- if {0} {
- # This will generate individual tests for every char
- # and test in lead, tail, middle, solo configurations
- # but takes considerable time
- print "lappend encValidStrings \{*\}\{"
- foreach {unich hex} $charMap($encName) {
- print " $encName \\u$unich $hex {} {}"
- }
- print "\}; # $encName"
- }
+ # Print the valid tests
+ print "\n#\n# $encName (generated from $encNameMap($encName))"
+ print "\ntest encoding-convertfrom-ucmCompare-$encName {Compare against ICU UCM} -body \{"
+ print " ucmConvertfromMismatches $encName {$charMap($encName)}"
+ print "\} -result {}"
+ print "\ntest encoding-convertto-ucmCompare-$encName {Compare against ICU UCM} -body \{"
+ print " ucmConverttoMismatches $encName {$charMap($encName)}"
+ print "\} -result {}"
+ if {0} {
+ # This will generate individual tests for every char
+ # and test in lead, tail, middle, solo configurations
+ # but takes considerable time
+ print "lappend encValidStrings \{*\}\{"
+ foreach {unich hex} $charMap($encName) {
+ print " $encName \\u$unich $hex {} {}"
+ }
+ print "\}; # $encName"
+ }
- # Generate the invalidity checks
- print "\n# $encName - invalid byte sequences"
- print "lappend encInvalidBytes \{*\}\{"
- foreach hex $invalidCodeSequences($encName) {
- # Map XXXX... to \xXX\xXX...
- set uhex [regsub -all .. $hex {\\x\0}]
- set uhex \\U[string range 00000000$hex end-7 end]
- print " $encName $hex tcl8 $uhex -1 {} {}"
- print " $encName $hex replace \\uFFFD -1 {} {}"
- print " $encName $hex strict {} 0 {} {}"
- }
- print "\}; # $encName"
+ # Generate the invalidity checks
+ print "\n# $encName - invalid byte sequences"
+ print "lappend encInvalidBytes \{*\}\{"
+ foreach hex $invalidCodeSequences($encName) {
+ # Map XXXX... to \xXX\xXX...
+ set uhex [regsub -all .. $hex {\\x\0}]
+ set uhex \\U[string range 00000000$hex end-7 end]
+ print " $encName $hex tcl8 $uhex -1 {} {}"
+ print " $encName $hex replace \\uFFFD -1 {} {}"
+ print " $encName $hex strict {} 0 {} {}"
+ }
+ print "\}; # $encName"
- print "\n# $encName - invalid byte sequences"
- print "lappend encUnencodableStrings \{*\}\{"
- if {[info exists encSubchar($encName)]} {
- set subchar $encSubchar($encName)
- } else {
- set subchar "3F"; # Tcl uses ? by default
- }
- foreach hex $unmappedCodePoints($encName) {
- set uhex \\U[string range 00000000$hex end-7 end]
- print " $encName $uhex tcl8 $subchar -1 {} {}"
- print " $encName $uhex replace $subchar -1 {} {}"
- print " $encName $uhex strict {} 0 {} {}"
- }
- print "\}; # $encName"
+ print "\n# $encName - invalid byte sequences"
+ print "lappend encUnencodableStrings \{*\}\{"
+ if {[info exists encSubchar($encName)]} {
+ set subchar $encSubchar($encName)
+ } else {
+ set subchar "3F"; # Tcl uses ? by default
+ }
+ foreach hex $unmappedCodePoints($encName) {
+ set uhex \\U[string range 00000000$hex end-7 end]
+ print " $encName $uhex tcl8 $subchar -1 {} {}"
+ print " $encName $uhex replace $subchar -1 {} {}"
+ print " $encName $uhex strict {} 0 {} {}"
+ }
+ print "\}; # $encName"
}
if {[array size tclNames]} {
- warn "Missing encoding: [lsort [array names tclNames]]"
+ warn "Missing encoding: [lsort [array names tclNames]]"
}
if {[info exists outputPath]} {
- close $outputChan
- unset outputChan
+ close $outputChan
+ unset outputChan
}
}
@@ -285,51 +285,51 @@ proc ucm::parse_file {encName ucmPath} {
set fd [open $ucmPath]
try {
- # Parse the metadata
- unset -nocomplain state
- while {[gets $fd line] >= 0} {
- if {[regexp {<(code_set_name|mb_cur_max|mb_cur_min|uconv_class|subchar)>\s+(\S+)} $line -> key val]} {
- set state($key) $val
- } elseif {[regexp {^\s*CHARMAP\s*$} $line]} {
- set state(charmap) ""
- break
- } else {
- # Skip all else
- }
- }
- if {![info exists state(charmap)]} {
- abort "Error: $ucmPath has No CHARMAP line."
- }
- foreach key {code_set_name uconv_class} {
- if {[info exists state($key)]} {
- set state($key) [string trim $state($key) {"}]
- }
- }
- if {[info exists charMap($encName)]} {
- abort "Duplicate file for $encName ($path)"
- }
- if {![info exists state(uconv_class)]} {
- abort "Error: $ucmPath has no uconv_class definition."
- }
- if {[info exists state(subchar)]} {
- # \xNN\xNN.. -> NNNN..
- set encSubchar($encName) [string map {\\x {}} $state(subchar)]
- }
- switch -exact -- $state(uconv_class) {
- SBCS {
- if {[catch {
- parse_SBCS $encName $fd
- } result]} {
- abort "Could not process $ucmPath. $result"
- }
- }
- default {
- log "Skipping $ucmPath -- not SBCS encoding."
- return
- }
- }
+ # Parse the metadata
+ unset -nocomplain state
+ while {[gets $fd line] >= 0} {
+ if {[regexp {<(code_set_name|mb_cur_max|mb_cur_min|uconv_class|subchar)>\s+(\S+)} $line -> key val]} {
+ set state($key) $val
+ } elseif {[regexp {^\s*CHARMAP\s*$} $line]} {
+ set state(charmap) ""
+ break
+ } else {
+ # Skip all else
+ }
+ }
+ if {![info exists state(charmap)]} {
+ abort "Error: $ucmPath has No CHARMAP line."
+ }
+ foreach key {code_set_name uconv_class} {
+ if {[info exists state($key)]} {
+ set state($key) [string trim $state($key) {"}]
+ }
+ }
+ if {[info exists charMap($encName)]} {
+ abort "Duplicate file for $encName ($path)"
+ }
+ if {![info exists state(uconv_class)]} {
+ abort "Error: $ucmPath has no uconv_class definition."
+ }
+ if {[info exists state(subchar)]} {
+ # \xNN\xNN.. -> NNNN..
+ set encSubchar($encName) [string map {\\x {}} $state(subchar)]
+ }
+ switch -exact -- $state(uconv_class) {
+ SBCS {
+ if {[catch {
+ parse_SBCS $encName $fd
+ } result]} {
+ abort "Could not process $ucmPath. $result"
+ }
+ }
+ default {
+ log "Skipping $ucmPath -- not SBCS encoding."
+ return
+ }
+ }
} finally {
- close $fd
+ close $fd
}
}
@@ -337,14 +337,14 @@ proc ucm::run {} {
variable encNameMap
variable outputPath
switch [llength $::argv] {
- 2 {set outputPath [lindex $::argv 1]}
- 1 {}
- default {
- abort "Usage: [info nameofexecutable] $::argv0 path/to/icu/ucm/data ?outputfile?"
- }
+ 2 {set outputPath [lindex $::argv 1]}
+ 1 {}
+ default {
+ abort "Usage: [info nameofexecutable] $::argv0 path/to/icu/ucm/data ?outputfile?"
+ }
}
foreach {encName fname} [array get encNameMap] {
- ucm::parse_file $encName [file join [lindex $::argv 0] ${fname}.ucm]
+ ucm::parse_file $encName [file join [lindex $::argv 0] ${fname}.ucm]
}
generate_tests
}