diff options
author | dkf <donal.k.fellows@manchester.ac.uk> | 2023-05-01 13:22:24 (GMT) |
---|---|---|
committer | dkf <donal.k.fellows@manchester.ac.uk> | 2023-05-01 13:22:24 (GMT) |
commit | 530e4b3bb9e1b9acd0231c4da1a29d73180a20d5 (patch) | |
tree | d32275c10833e1cca203e0c05f52340d30682262 /tools | |
parent | 528b7b71686a0bb1993b1cce1166b5b70d511d31 (diff) | |
parent | 6c4b78cfa8c06ea5963591778902da74850d1985 (diff) | |
download | tcl-530e4b3bb9e1b9acd0231c4da1a29d73180a20d5.zip tcl-530e4b3bb9e1b9acd0231c4da1a29d73180a20d5.tar.gz tcl-530e4b3bb9e1b9acd0231c4da1a29d73180a20d5.tar.bz2 |
Merge 8.7
Diffstat (limited to 'tools')
-rw-r--r-- | tools/encoding/Makefile | 5 | ||||
-rw-r--r-- | tools/encoding/txt2enc.c | 8 | ||||
-rw-r--r-- | tools/genStubs.tcl | 2 | ||||
-rw-r--r-- | tools/mkdepend.tcl | 4 | ||||
-rw-r--r-- | tools/regexpTestLib.tcl | 2 | ||||
-rw-r--r-- | tools/tclOOScript.tcl | 4 | ||||
-rwxr-xr-x | tools/tcltk-man2html.tcl | 4 | ||||
-rw-r--r-- | tools/ucm2tests.tcl | 352 | ||||
-rw-r--r-- | tools/valgrind_check_success | 30 | ||||
-rw-r--r-- | tools/valgrind_suppress | 137 |
10 files changed, 530 insertions, 18 deletions
diff --git a/tools/encoding/Makefile b/tools/encoding/Makefile index 361239e..ff19492 100644 --- a/tools/encoding/Makefile +++ b/tools/encoding/Makefile @@ -71,11 +71,6 @@ encodings: clean txt2enc $(EUC_ENCODINGS) done @echo @echo Compiling special versions of encoding files. - @for p in ascii.txt; do \ - enc=`echo $$p | sed 's/\..*$$/\.enc/'`; \ - echo $$enc; \ - ./txt2enc -m $$p > $$enc; \ - done @for p in jis0208.txt; do \ enc=`echo $$p | sed 's/\..*$$/\.enc/'`; \ echo $$enc; \ diff --git a/tools/encoding/txt2enc.c b/tools/encoding/txt2enc.c index 7ee797b..32c7344 100644 --- a/tools/encoding/txt2enc.c +++ b/tools/encoding/txt2enc.c @@ -106,7 +106,7 @@ main(int argc, char **argv) fputs(" -f\tfallback character (default: QUESTION MARK)\n", stderr); fputs(" -t\toverride implicit type with single, double, or multi\n", stderr); fputs(" -s\tsymbol+ascii encoding\n", stderr); - fputs(" -m\tdon't implicitly include range 0080 to 00FF\n", stderr); + fputs(" -m\tdon't implicitly include 007F\n", stderr); return 1; } @@ -208,10 +208,8 @@ main(int argc, char **argv) toUnicode[0][i] = i; } if (fixmissing) { - for (i = 0x7F; i < 0xA0; i++) { - if (toUnicode[i] == NULL && toUnicode[0][i] == 0) { - toUnicode[0][i] = i; - } + if (toUnicode[0x7F] == NULL && toUnicode[0][0x7F] == 0) { + toUnicode[0][0x7F] = 0x7F; } } } diff --git a/tools/genStubs.tcl b/tools/genStubs.tcl index 282abcc..89e4ccc 100644 --- a/tools/genStubs.tcl +++ b/tools/genStubs.tcl @@ -257,7 +257,7 @@ proc genStubs::rewriteFile {file text} { return } set in [open ${file} r] - fconfigure $in -eofchar "\032 {}" -encoding utf-8 + fconfigure $in -eofchar "\x1A {}" -encoding utf-8 set out [open ${file}.new w] fconfigure $out -translation lf -encoding utf-8 diff --git a/tools/mkdepend.tcl b/tools/mkdepend.tcl index 6e3d6ed..b02e396 100644 --- a/tools/mkdepend.tcl +++ b/tools/mkdepend.tcl @@ -252,7 +252,7 @@ proc compressDeps {depends} { # Adds a new set of path and replacement string to the global list. # # Arguments: -# newPathInfo comma seperated path and replacement string +# newPathInfo comma separated path and replacement string # # Results: # None. @@ -296,7 +296,7 @@ proc readInputListFile {objectListFile} { set fl [read $f] close $f - # fix native path seperator so it isn't treated as an escape. + # fix native path separator so it isn't treated as an escape. regsub -all {\\} $fl {/} fl # Treat the string as a list so filenames between double quotes are diff --git a/tools/regexpTestLib.tcl b/tools/regexpTestLib.tcl index 454a4e8..71dc909 100644 --- a/tools/regexpTestLib.tcl +++ b/tools/regexpTestLib.tcl @@ -42,7 +42,7 @@ proc readInputFile {} { # # strings with embedded @'s are truncated -# unpreceeded @'s are replaced by {} +# unpreceded @'s are replaced by {} # proc removeAts {ls} { set len [llength $ls] diff --git a/tools/tclOOScript.tcl b/tools/tclOOScript.tcl index de953f0..4591a1b 100644 --- a/tools/tclOOScript.tcl +++ b/tools/tclOOScript.tcl @@ -195,7 +195,7 @@ # # ---------------------------------------------------------------------- - proc define::classmethod {name {args {}} {body {}}} { + proc define::classmethod {name args} { # Create the method on the class if the caller gave arguments and body ::set argc [::llength [::info level 0]] ::if {$argc == 3} { @@ -205,7 +205,7 @@ } ::set cls [::uplevel 1 self] ::if {$argc == 4} { - ::oo::define [::oo::DelegateName $cls] method $name $args $body + ::oo::define [::oo::DelegateName $cls] method $name {*}$args } # Make the connection by forwarding ::tailcall forward $name myclass $name diff --git a/tools/tcltk-man2html.tcl b/tools/tcltk-man2html.tcl index e6d9375..5f211f3 100755 --- a/tools/tcltk-man2html.tcl +++ b/tools/tcltk-man2html.tcl @@ -293,14 +293,14 @@ proc css-stylesheet {} { font-size: 11px; } css-style ".keylist dt" ".arguments dt" { - width: 20em; + width: 25em; float: left; padding: 2px; border-top: 1px solid #999999; } css-style ".keylist dt" { font-weight: bold; } css-style ".keylist dd" ".arguments dd" { - margin-left: 20em; + margin-left: 25em; padding: 2px; border-top: 1px solid #999999; } diff --git a/tools/ucm2tests.tcl b/tools/ucm2tests.tcl new file mode 100644 index 0000000..dc878ef --- /dev/null +++ b/tools/ucm2tests.tcl @@ -0,0 +1,352 @@ +# ucm2tests.tcl +# +# Parses given ucm files (from ICU) to generate test data +# for encodings. +# +# tclsh ucm2tests.tcl PATH_TO_ICU_UCM_DIRECTORY ?OUTPUTPATH? +# + +namespace eval ucm { + # No means to change these currently but ... + variable outputPath + variable outputChan + variable errorChan stderr + variable verbose 0 + + # Map Tcl encoding name to ICU UCM file name + variable encNameMap + array set encNameMap { + cp1250 glibc-CP1250-2.1.2 + cp1251 glibc-CP1251-2.1.2 + cp1252 glibc-CP1252-2.1.2 + cp1253 glibc-CP1253-2.1.2 + cp1254 glibc-CP1254-2.1.2 + cp1255 glibc-CP1255-2.1.2 + cp1256 glibc-CP1256-2.1.2 + cp1257 glibc-CP1257-2.1.2 + cp1258 glibc-CP1258-2.1.2 + gb1988 glibc-GB_1988_80-2.3.3 + iso8859-1 glibc-ISO_8859_1-2.1.2 + iso8859-2 glibc-ISO_8859_2-2.1.2 + iso8859-3 glibc-ISO_8859_3-2.1.2 + iso8859-4 glibc-ISO_8859_4-2.1.2 + iso8859-5 glibc-ISO_8859_5-2.1.2 + iso8859-6 glibc-ISO_8859_6-2.1.2 + iso8859-7 glibc-ISO_8859_7-2.3.3 + iso8859-8 glibc-ISO_8859_8-2.3.3 + iso8859-9 glibc-ISO_8859_9-2.1.2 + iso8859-10 glibc-ISO_8859_10-2.1.2 + iso8859-11 glibc-ISO_8859_11-2.1.2 + iso8859-13 glibc-ISO_8859_13-2.3.3 + iso8859-14 glibc-ISO_8859_14-2.1.2 + iso8859-15 glibc-ISO_8859_15-2.1.2 + iso8859-16 glibc-ISO_8859_16-2.3.3 + } + + # Array keyed by Tcl encoding name. Each element contains mapping of + # Unicode code point -> byte sequence for that encoding as a flat list + # (or dictionary). Both are stored as hex strings + variable charMap + + # Array keyed by Tcl encoding name. List of invalid code sequences + # each being a hex string. + variable invalidCodeSequences + + # Array keyed by Tcl encoding name. List of unicode code points that are + # not mapped, each being a hex string. + variable unmappedCodePoints + + # The fallback character per encoding + variable encSubchar +} + +proc ucm::abort {msg} { + variable errorChan + puts $errorChan $msg + exit 1 +} +proc ucm::warn {msg} { + variable errorChan + puts $errorChan $msg +} +proc ucm::log {msg} { + variable verbose + if {$verbose} { + variable errorChan + puts $errorChan $msg + } +} +proc ucm::print {s} { + variable outputChan + puts $outputChan $s +} + +proc ucm::parse_SBCS {encName fd} { + variable charMap + variable invalidCodeSequences + variable unmappedCodePoints + + set result {} + while {[gets $fd line] >= 0} { + if {[string match #* $line]} { + continue + } + if {[string equal "END CHARMAP" [string trim $line]]} { + break + } + if {![regexp {^\s*<U([[:xdigit:]]{4})>\s*((\\x[[:xdigit:]]{2})+)\s*(\|(0|1|2|3|4))} $line -> unichar bytes - - precision]} { + error "Unexpected line parsing SBCS: $line" + } + set bytes [string map {\\x {}} $bytes]; # \xNN -> NN + if {$precision eq "" || $precision eq "0"} { + lappend result $unichar $bytes + } else { + # It is a fallback mapping - ignore + } + } + set charMap($encName) $result + + # Find out invalid code sequences and unicode code points that are not mapped + set valid {} + set mapped {} + foreach {unich bytes} $result { + lappend mapped $unich + lappend valid $bytes + } + set invalidCodeSequences($encName) {} + for {set i 0} {$i <= 255} {incr i} { + set hex [format %.2X $i] + if {[lsearch -exact $valid $hex] < 0} { + lappend invalidCodeSequences($encName) $hex + } + } + + set unmappedCodePoints($encName) {} + for {set i 0} {$i <= 65535} {incr i} { + set hex [format %.4X $i] + if {[lsearch -exact $mapped $hex] < 0} { + lappend unmappedCodePoints($encName) $hex + # Only look for (at most) one below 256 and one above 1024 + if {$i < 255} { + # Found one so jump past 8 bits + set i 255 + } else { + break + } + } + if {$i == 255} { + set i 1023 + } + } + lappend unmappedCodePoints($encName) D800 DC00 10000 10FFFF +} + +proc ucm::generate_boilerplate {} { + # Common procedures + print { +# This file is automatically generated by ucm2tests.tcl. +# Edits will be overwritten on next generation. +# +# Generates tests comparing Tcl encodings to ICU. +# The generated file is NOT standalone. It should be sourced into a test script. + +proc ucmConvertfromMismatches {enc map} { + set mismatches {} + foreach {unihex hex} $map { + set unihex [string range 00000000$unihex end-7 end]; # Make 8 digits + set unich [subst "\\U$unihex"] + if {[encoding convertfrom -profile strict $enc [binary decode hex $hex]] ne $unich} { + lappend mismatches "<[printable $unich],$hex>" + } + } + return $mismatches +} +proc ucmConverttoMismatches {enc map} { + set mismatches {} + foreach {unihex hex} $map { + set unihex [string range 00000000$unihex end-7 end]; # Make 8 digits + set unich [subst "\\U$unihex"] + if {[encoding convertto -profile strict $enc $unich] ne [binary decode hex $hex]} { + lappend mismatches "<[printable $unich],$hex>" + } + } + return $mismatches +} +if {[info commands printable] eq ""} { + proc printable {s} { + set print "" + foreach c [split $s ""] { + set i [scan $c %c] + if {[string is print $c] && ($i <= 127)} { + append print $c + } elseif {$i <= 0xff} { + append print \\x[format %02X $i] + } elseif {$i <= 0xffff} { + append print \\u[format %04X $i] + } else { + append print \\U[format %08X $i] + } + } + return $print + } +} + } +} ; # generate_boilerplate + +proc ucm::generate_tests {} { + variable encNameMap + variable charMap + variable invalidCodeSequences + variable unmappedCodePoints + variable outputPath + variable outputChan + variable encSubchar + + if {[info exists outputPath]} { + set outputChan [open $outputPath w] + fconfigure $outputChan -translation lf + } else { + set outputChan stdout + } + + array set tclNames {} + foreach encName [encoding names] { + set tclNames($encName) "" + } + + generate_boilerplate + foreach encName [lsort -dictionary [array names encNameMap]] { + if {![info exists charMap($encName)]} { + warn "No character map read for $encName" + continue + } + unset tclNames($encName) + + # Print the valid tests + print "\n#\n# $encName (generated from $encNameMap($encName))" + print "\ntest encoding-convertfrom-ucmCompare-$encName {Compare against ICU UCM} -body \{" + print " ucmConvertfromMismatches $encName {$charMap($encName)}" + print "\} -result {}" + print "\ntest encoding-convertto-ucmCompare-$encName {Compare against ICU UCM} -body \{" + print " ucmConverttoMismatches $encName {$charMap($encName)}" + print "\} -result {}" + if {0} { + # This will generate individual tests for every char + # and test in lead, tail, middle, solo configurations + # but takes considerable time + print "lappend encValidStrings \{*\}\{" + foreach {unich hex} $charMap($encName) { + print " $encName \\u$unich $hex {} {}" + } + print "\}; # $encName" + } + + # Generate the invalidity checks + print "\n# $encName - invalid byte sequences" + print "lappend encInvalidBytes \{*\}\{" + foreach hex $invalidCodeSequences($encName) { + # Map XXXX... to \xXX\xXX... + set uhex [regsub -all .. $hex {\\x\0}] + set uhex \\U[string range 00000000$hex end-7 end] + print " $encName $hex tcl8 $uhex -1 {} {}" + print " $encName $hex replace \\uFFFD -1 {} {}" + print " $encName $hex strict {} 0 {} {}" + } + print "\}; # $encName" + + print "\n# $encName - invalid byte sequences" + print "lappend encUnencodableStrings \{*\}\{" + if {[info exists encSubchar($encName)]} { + set subchar $encSubchar($encName) + } else { + set subchar "3F"; # Tcl uses ? by default + } + foreach hex $unmappedCodePoints($encName) { + set uhex \\U[string range 00000000$hex end-7 end] + print " $encName $uhex tcl8 $subchar -1 {} {}" + print " $encName $uhex replace $subchar -1 {} {}" + print " $encName $uhex strict {} 0 {} {}" + } + print "\}; # $encName" + } + + if {[array size tclNames]} { + warn "Missing encoding: [lsort [array names tclNames]]" + } + if {[info exists outputPath]} { + close $outputChan + unset outputChan + } +} + +proc ucm::parse_file {encName ucmPath} { + variable charMap + variable encSubchar + + set fd [open $ucmPath] + try { + # Parse the metadata + unset -nocomplain state + while {[gets $fd line] >= 0} { + if {[regexp {<(code_set_name|mb_cur_max|mb_cur_min|uconv_class|subchar)>\s+(\S+)} $line -> key val]} { + set state($key) $val + } elseif {[regexp {^\s*CHARMAP\s*$} $line]} { + set state(charmap) "" + break + } else { + # Skip all else + } + } + if {![info exists state(charmap)]} { + abort "Error: $ucmPath has No CHARMAP line." + } + foreach key {code_set_name uconv_class} { + if {[info exists state($key)]} { + set state($key) [string trim $state($key) {"}] + } + } + if {[info exists charMap($encName)]} { + abort "Duplicate file for $encName ($path)" + } + if {![info exists state(uconv_class)]} { + abort "Error: $ucmPath has no uconv_class definition." + } + if {[info exists state(subchar)]} { + # \xNN\xNN.. -> NNNN.. + set encSubchar($encName) [string map {\\x {}} $state(subchar)] + } + switch -exact -- $state(uconv_class) { + SBCS { + if {[catch { + parse_SBCS $encName $fd + } result]} { + abort "Could not process $ucmPath. $result" + } + } + default { + log "Skipping $ucmPath -- not SBCS encoding." + return + } + } + } finally { + close $fd + } +} + +proc ucm::run {} { + variable encNameMap + variable outputPath + switch [llength $::argv] { + 2 {set outputPath [lindex $::argv 1]} + 1 {} + default { + abort "Usage: [info nameofexecutable] $::argv0 path/to/icu/ucm/data ?outputfile?" + } + } + foreach {encName fname} [array get encNameMap] { + ucm::parse_file $encName [file join [lindex $::argv 0] ${fname}.ucm] + } + generate_tests +} + +ucm::run diff --git a/tools/valgrind_check_success b/tools/valgrind_check_success new file mode 100644 index 0000000..24830d5 --- /dev/null +++ b/tools/valgrind_check_success @@ -0,0 +1,30 @@ +#! /usr/bin/env tclsh + + +proc main {sourcetype source} { + switch $sourcetype { + file { + set chan [open $source] + try { + set data [read $chan] + } finally { + close $chan + } + } + string { + set data $source + } + default { + error [list {wrong # args}] + } + } + set found [regexp -inline -all {blocks are\ + (?:(?:(?:definitely|indirectly|possibly) lost)|still reachable)} $data] + if {[llength $found]} { + puts 0 + } else { + puts 1 + } + flush stdout +} +main {*}$argv diff --git a/tools/valgrind_suppress b/tools/valgrind_suppress index fb7f173..11ca880 100644 --- a/tools/valgrind_suppress +++ b/tools/valgrind_suppress @@ -1,3 +1,17 @@ +#{ +# Tcl_GetChannelOption/TcpGetOptionProc/TcphostPortList/getnameinfo/gethostbyaddr_r +# Memcheck:Leak +# match-leak-kinds: reachable +# fun:malloc +# fun:strdup +# ... +# fun:module_load +# ... +# fun:getnameinfo +# ... +# fun:Tcl_GetChannelOption +#} + { TclCreatesocketAddress/getaddrinfo/calloc Memcheck:Leak @@ -11,6 +25,16 @@ { TclCreatesocketAddress/getaddrinfo/malloc Memcheck:Leak + match-leak-kinds: definite + fun:malloc + ... + fun:getaddrinfo + fun:TclCreateSocketAddress +} + +{ + TclCreatesocketAddress/getaddrinfo/malloc + Memcheck:Leak match-leak-kinds: reachable fun:malloc ... @@ -19,6 +43,18 @@ } { + TclpDlopen/decompose_rpath + Memcheck:Leak + match-leak-kinds: reachable + fun:malloc + fun:decompose_rpath + ... + fun:dlopen_doit + ... + fun:TclpDlopen +} + +{ TclpDlopen/load Memcheck:Leak match-leak-kinds: reachable @@ -72,6 +108,46 @@ } { + TclpGeHostByName/gethostbyname_r/strdup/malloc + Memcheck:Leak + match-leak-kinds: reachable + fun:malloc + fun:strdup + ... + fun:dl_open_worker + ... + fun:do_dlopen + ... + fun:TclpGetHostByName +} + +{ + TclpGeHostByName/gethostbyname_r/calloc + Memcheck:Leak + match-leak-kinds: reachable + fun:calloc + ... + fun:dl_open_worker + ... + fun:do_dlopen + ... + fun:TclpGetHostByName +} + +{ + TclpGeHostByName/gethostbyname_r/malloc + Memcheck:Leak + match-leak-kinds: reachable + fun:malloc + ... + fun:dl_open_worker + ... + fun:do_dlopen + ... + fun:TclpGetHostByName +} + +{ TclpGetPwNam/getpwname_r/__nss_next2/calloc Memcheck:Leak match-leak-kinds: reachable @@ -105,6 +181,57 @@ } { + TclpGetGrGid/getgrgid_r/module_load + Memcheck:Leak + match-leak-kinds: reachable + fun:calloc + ... + fun:module_load + ... + fun:TclpGetGrGid +} + +{ + TclpGetGrGid/getgrgid_r/module_load + Memcheck:Leak + match-leak-kinds: reachable + fun:malloc + ... + fun:module_load + ... + fun:TclpGetGrGid +} + +{ + TcphostPortList/getnameinfo/module_load/calloc + Memcheck:Leak + match-leak-kinds: definite,reachable + fun:calloc + ... + fun:dl_open_worker_begin + ... + fun:module_load + ... + fun:getnameinfo + fun:TcpHostPortList +} + +{ + # see sourceware glibc Bug 14984 - getnameinfo() might be leaking memory + TcphostPortList/getnameinfo/module_load/mallco + Memcheck:Leak + match-leak-kinds: definite,reachable + fun:malloc + ... + fun:dl_open_worker_begin + ... + fun:module_load + ... + fun:getnameinfo + fun:TcpHostPortList +} + +{ TclpThreadExit/pthread_exit/calloc Memcheck:Leak match-leak-kinds: reachable @@ -124,3 +251,13 @@ fun:TclpThreadExit } +{ + TclpThreadExit/pthread_exit/malloc + Memcheck:Leak + match-leak-kinds: definite + fun:malloc + ... + fun:pthread_exit + fun:TclpThreadExit +} + |